make target name optional
This commit is contained in:
parent
bda9b43318
commit
51968e443a
|
@ -76,9 +76,9 @@ def parse_config(config_filename):
|
||||||
# Create a ScrapeTarget for each targets to scrape
|
# Create a ScrapeTarget for each targets to scrape
|
||||||
result.append(ScrapeTarget(
|
result.append(ScrapeTarget(
|
||||||
product_name=product_name,
|
product_name=product_name,
|
||||||
target_name=get_field_or_die(target, 'name'),
|
|
||||||
url=get_field_or_die(target, 'url'),
|
url=get_field_or_die(target, 'url'),
|
||||||
selector=get_field_or_die(target, 'selector'),
|
selector=get_field_or_die(target, 'selector'),
|
||||||
|
target_name=target.get('name'),
|
||||||
regex=target.get('regex'),
|
regex=target.get('regex'),
|
||||||
parser=target.get('parser'),
|
parser=target.get('parser'),
|
||||||
))
|
))
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
from email import parser
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import parsel
|
import parsel
|
||||||
import pyjq
|
import pyjq
|
||||||
|
|
||||||
class ScrapeTarget:
|
class ScrapeTarget:
|
||||||
def __init__(self, product_name, target_name, url, selector, regex=None, parser=None):
|
def __init__(self, product_name, url, selector, target_name=None, regex=None, parser=None):
|
||||||
self.product_name = product_name
|
self.product_name = product_name
|
||||||
self.target_name = target_name
|
self.target_name = target_name if target_name else urlparse(url).hostname
|
||||||
self.url = url
|
self.url = url
|
||||||
self.selector = selector
|
self.selector = selector
|
||||||
self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')
|
self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')
|
||||||
|
|
Loading…
Reference in New Issue