1
0
Fork 0

make target name optional

This commit is contained in:
Massaki Archambault 2022-10-30 14:24:57 -04:00
parent bda9b43318
commit 51968e443a
2 changed files with 5 additions and 4 deletions

View File

@ -76,9 +76,9 @@ def parse_config(config_filename):
# Create a ScrapeTarget for each targets to scrape
result.append(ScrapeTarget(
product_name=product_name,
target_name=get_field_or_die(target, 'name'),
url=get_field_or_die(target, 'url'),
selector=get_field_or_die(target, 'selector'),
target_name=target.get('name'),
regex=target.get('regex'),
parser=target.get('parser'),
))

View File

@ -1,15 +1,16 @@
from email import parser
import json
import re
from urllib.parse import urlparse
import httpx
import parsel
import pyjq
class ScrapeTarget:
def __init__(self, product_name, target_name, url, selector, regex=None, parser=None):
def __init__(self, product_name, url, selector, target_name=None, regex=None, parser=None):
self.product_name = product_name
self.target_name = target_name
self.target_name = target_name if target_name else urlparse(url).hostname
self.url = url
self.selector = selector
self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')