парсер

import requests from selectolax.parser import HTMLParser import csv def get_html(url): try: result = requests.get(url) result.raise_for_status() return result.text except(requests.RequestException, ValueError): print(‘Server error’) return False def write_csv(data): with open(‘firms.csv’, ‘a’, encoding=’utf-8′, newline=”) as file: order = [‘url’, ‘name’, ‘city’, ‘category’, ‘site’, ‘social’, ‘phone’] writer = csv.DictWriter(file, fieldnames=order) writer.writerow(data) def get_data(html, url): dom = HTMLParser(html) business_card_view = dom.css_first(‘.business-card-view’) breadcrumbs = business_card_view.css(‘.breadcrumbs-view__breadcrumb’) try: city = breadcrumbs[1].text() category = breadcrumbs[3].text() except: city = ” category = ” name = business_card_view.css_first(‘h1’).text() try: site = business_card_view.css_first(‘.business-urls-view__url > a’).attrs[‘href’] except: site = ” try: social = business_card_view.css_first(‘.business-contacts-view__social-button > a’).attrs[‘href’] except: social = ” try: phone = business_card_view.css_first(‘.card-phones-view__number > span’).text() except: phone = ” data = {‘url’: url, ‘name’: name, ‘city’: city, ‘category’: category, ‘site’: site, ‘social’: social, ‘phone’: phone, } write_csv(data) print(f'{city} | {category} | {name}’) def main(): with open(‘firms-url.txt’, ‘r’, ) as file: urls = [line.strip() for line in file.readlines()] n = 0 for url in urls: n += 1 print(n) html = get_html(url) get_data(html, url) if __name__ == ‘__main__’: main()
Авторизация
*
*
Генерация пароля

Защищено патентом.