1
0
Fork 0
ecommerce-exporter/dealwatch/cli.py

52 lines
1.7 KiB
Python

import argparse
import yaml
from dealwatch.scrape_target import ScrapeTarget
def main():
parser = argparse.ArgumentParser("An utility to scrape e-commerce target price fluctuations")
parser.add_argument(
'-c', '--config',
help='The configuration file. (default: %(default)s)',
type=str,
default='dealwatch.yml',
)
args = parser.parse_args()
products = parse_config(args.config)
print(products)
def parse_config(config_filename):
result = []
print('Loading configurations from %s' % config_filename)
with open(config_filename, 'r') as f:
config = yaml.safe_load(f)
# iterate through products listed in the configuration
products = get_field_or_die(config, 'products')
for product in products:
product_name = get_field_or_die(product, 'name')
# iterate through the targets listed for each products in the configuration
targets = get_field_or_die(product, 'targets')
for target in targets:
# Create a ScrapeTarget for each targets to scrape
result.append(ScrapeTarget(
product_name=product_name,
target_name=get_field_or_die(target, 'name'),
url=get_field_or_die(target, 'url'),
selector=get_field_or_die(target, 'selector'),
regex=target.get('regex'),
))
return result
def get_field_or_die(mapping, field_name):
value = mapping.get(field_name)
if value is None:
raise Exception('Missing required field: %s' % field_name)
else:
return value
if __name__ == '__main__':
main()