diff --git a/dealwatch/cli.py b/dealwatch/cli.py index c82d583..8b99c93 100644 --- a/dealwatch/cli.py +++ b/dealwatch/cli.py @@ -76,9 +76,9 @@ def parse_config(config_filename): # Create a ScrapeTarget for each targets to scrape result.append(ScrapeTarget( product_name=product_name, - target_name=get_field_or_die(target, 'name'), url=get_field_or_die(target, 'url'), selector=get_field_or_die(target, 'selector'), + target_name=target.get('name'), regex=target.get('regex'), parser=target.get('parser'), )) diff --git a/dealwatch/scrape_target.py b/dealwatch/scrape_target.py index b81d135..5907a47 100644 --- a/dealwatch/scrape_target.py +++ b/dealwatch/scrape_target.py @@ -1,15 +1,16 @@ -from email import parser import json import re +from urllib.parse import urlparse + import httpx import parsel import pyjq class ScrapeTarget: - def __init__(self, product_name, target_name, url, selector, regex=None, parser=None): + def __init__(self, product_name, url, selector, target_name=None, regex=None, parser=None): self.product_name = product_name - self.target_name = target_name + self.target_name = target_name if target_name else urlparse(url).hostname self.url = url self.selector = selector self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')