1
0
Fork 0

configuration file parsing

This commit is contained in:
Massaki Archambault 2022-10-29 23:35:36 -04:00
parent 858ce77d82
commit 687ac4317d
6 changed files with 72 additions and 6 deletions

2
.gitignore vendored
View File

@ -201,3 +201,5 @@ tags
# Persistent undo
[._]*.un~
### Project-specific
dealwatch.yml

5
dealwatch.example.yml Normal file
View File

@ -0,0 +1,5 @@
targets:
amazon.ca:
url: https://www.amazon.ca/Intel-i7-12700K-Desktop-Processor-Unlocked/dp/B09FXNVDBJ/?_encoding=UTF8&pd_rd_w=BXQyU&content-id=amzn1.sym.b09e9731-f0de-43db-b62a-8954bcec282c&pf_rd_p=b09e9731-f0de-43db-b62a-8954bcec282c&pf_rd_r=Z2HRQ8TYGA943PQFTW1Q&pd_rd_wg=AG2TD&pd_rd_r=e4766451-3584-4c4f-8235-bcd4a316909a&ref_=pd_gw_ci_mcx_mr_hp_atf_m
selector: .a-offscreen
regex: '[0-9]+(\.[0-9]{2})?'

52
dealwatch/cli.py Normal file
View File

@ -0,0 +1,52 @@
import argparse
import yaml
from dealwatch.scrape_target import ScrapeTarget
def main():
parser = argparse.ArgumentParser("An utility to scrape e-commerce target price fluctuations")
parser.add_argument(
'-c', '--config',
help='The configuration file. (default: %(default)s)',
type=str,
default='dealwatch.yml',
)
args = parser.parse_args()
products = parse_config(args.config)
print(products)
def parse_config(config_filename):
result = []
print('Loading configurations from %s' % config_filename)
with open(config_filename, 'r') as f:
config = yaml.safe_load(f)
# iterate through products listed in the configuration
products = get_field_or_die(config, 'products')
for product in products:
product_name = get_field_or_die(product, 'name')
# iterate through the targets listed for each products in the configuration
targets = get_field_or_die(product, 'targets')
for target in targets:
# Create a ScrapeTarget for each targets to scrape
result.append(ScrapeTarget(
product_name=product_name,
target_name=get_field_or_die(target, 'name'),
url=get_field_or_die(target, 'url'),
selector=get_field_or_die(target, 'selector'),
regex=target.get('regex'),
))
return result
def get_field_or_die(mapping, field_name):
value = mapping.get(field_name)
if value is None:
raise Exception('Missing required field: %s' % field_name)
else:
return value
if __name__ == '__main__':
main()

View File

@ -1,5 +0,0 @@
def main():
print("Hello world")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,9 @@
import re
class ScrapeTarget:
def __init__(self, product_name, target_name, url, selector, regex=None):
self.product_name = product_name
self.target_name = target_name
self.url = url
self.selector = selector
self.regex = re.compile(regex if regex else r'[0-9]+(\.[0-9]{2})?')

View File

@ -10,10 +10,13 @@ setup_requires =
setuptools
setuptools_scm
install_requires=
PyYAML~=6.0
requests~=2.28.1
parsel~=1.6.0
prometheus-client~=0.15.0
[options.entry_points]
console_scripts =
dealwatch = dealwatch.main:main
dealwatch = dealwatch.cli:main
[tool.setuptools_scm]