make target name optional
This commit is contained in:
parent
bda9b43318
commit
51968e443a
|
@ -76,9 +76,9 @@ def parse_config(config_filename):
|
|||
# Create a ScrapeTarget for each targets to scrape
|
||||
result.append(ScrapeTarget(
|
||||
product_name=product_name,
|
||||
target_name=get_field_or_die(target, 'name'),
|
||||
url=get_field_or_die(target, 'url'),
|
||||
selector=get_field_or_die(target, 'selector'),
|
||||
target_name=target.get('name'),
|
||||
regex=target.get('regex'),
|
||||
parser=target.get('parser'),
|
||||
))
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
from email import parser
|
||||
import json
|
||||
import re
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
import parsel
|
||||
import pyjq
|
||||
|
||||
class ScrapeTarget:
|
||||
def __init__(self, product_name, target_name, url, selector, regex=None, parser=None):
|
||||
def __init__(self, product_name, url, selector, target_name=None, regex=None, parser=None):
|
||||
self.product_name = product_name
|
||||
self.target_name = target_name
|
||||
self.target_name = target_name if target_name else urlparse(url).hostname
|
||||
self.url = url
|
||||
self.selector = selector
|
||||
self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')
|
||||
|
|
Loading…
Reference in New Issue