1
0
Fork 0
ecommerce-exporter/ecommerce_exporter/cli.py

135 lines
4.9 KiB
Python
Raw Normal View History

2022-10-30 03:35:36 +00:00
import argparse
2022-10-31 02:44:02 +00:00
import os
2022-10-30 18:20:59 +00:00
import time
2024-10-09 23:11:11 +00:00
import logging
2022-10-30 03:35:36 +00:00
import yaml
2022-10-30 22:31:54 +00:00
from prometheus_client import start_http_server, Gauge, Counter
2022-10-30 04:46:49 +00:00
2024-10-09 23:11:11 +00:00
from ecommerce_exporter.scrape_target import ScrapeTarget
logging.basicConfig(
format=os.environ.get('LOG_FORMAT', '[%(asctime)s] [%(levelname)-8s] %(message)s'),
level=os.environ.get('LOG_LEVEL', 'INFO')
)
logger = logging.getLogger(__name__)
2022-10-30 22:31:54 +00:00
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_VALUE = Gauge(
'ecommerce_scrape_target_value',
2022-10-30 22:31:54 +00:00
'The value scraped from a scrape target',
['product_name', 'target_name'],
)
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_SUCCESS = Counter(
'ecommerce_scrape_target_success_total',
2022-10-30 22:31:54 +00:00
'The number of successful scrape and parse of a scrape target',
['product_name', 'target_name'],
)
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_FAILURE = Counter(
'ecommerce_scrape_target_failure_total',
2022-10-30 22:31:54 +00:00
'The number of failed scrape and parse of a scrape target',
2022-10-31 02:44:02 +00:00
['product_name', 'target_name', 'exception'],
2022-10-30 22:31:54 +00:00
)
2022-10-30 03:35:36 +00:00
def main():
2022-10-30 04:46:49 +00:00
parser = argparse.ArgumentParser("An utility to scrape e-commerce product price and expose them as prometheus metrics")
2022-10-30 03:35:36 +00:00
parser.add_argument(
'-c', '--config',
help='The configuration file. (default: %(default)s)',
type=str,
2022-10-31 21:54:13 +00:00
default='ecommerce-exporter.yml',
2022-10-30 03:35:36 +00:00
)
2022-10-30 18:20:59 +00:00
parser.add_argument(
'-i', '--interval',
help='The target scrape interval, in minutes. (default: %(default)s)',
type=float,
2022-10-30 22:31:54 +00:00
default=15,
2022-10-30 18:20:59 +00:00
)
2022-10-30 04:46:49 +00:00
parser.add_argument(
'--user-agent',
help='The user-agent to spoof. (default: %(default)s)',
type=str,
2024-10-09 22:21:30 +00:00
default='Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0',
2022-10-30 04:46:49 +00:00
)
parser.add_argument(
'-p', '--listen-port',
help='The listen port for the http server. (default: %(default)s)',
type=int,
default=8000,
)
parser.add_argument(
'-a', '--listen-address',
help='The listen address for the http server. (default: %(default)s)',
type=str,
default='0.0.0.0',
)
2022-10-30 03:35:36 +00:00
args = parser.parse_args()
2024-10-10 02:37:34 +00:00
scrape_targets = parse_config(os.path.abspath(args.config), user_agent=args.user_agent)
2022-10-30 04:46:49 +00:00
# start the http server to server the prometheus metrics
2024-10-09 23:11:11 +00:00
logger.info("serving metrics on http://%s:%s/metrics", args.listen_address, args.listen_port)
2022-10-30 04:46:49 +00:00
start_http_server(args.listen_port, args.listen_address)
2022-10-30 18:20:59 +00:00
# start the main loop
while True:
for scrape_target in scrape_targets:
2022-10-31 02:44:02 +00:00
try:
2024-10-09 23:11:11 +00:00
logger.info("Starting scrape. product: '%s', target '%s'", scrape_target.product_name, scrape_target.target_name)
2022-10-31 02:44:02 +00:00
value = scrape_target.query_target()
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_VALUE.labels(
2022-10-31 02:44:02 +00:00
product_name=scrape_target.product_name,
target_name=scrape_target.target_name
).set(value)
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_SUCCESS.labels(
2022-10-31 02:44:02 +00:00
product_name=scrape_target.product_name,
target_name=scrape_target.target_name,
).inc()
2024-10-09 22:21:30 +00:00
except KeyboardInterrupt:
return
except Exception as e:
2024-10-09 23:11:11 +00:00
logger.error("Failed to scrape! product: '%s', target: '%s', message: '%s'" , scrape_target.product_name, scrape_target.target_name, e)
2022-10-31 21:54:13 +00:00
ECOMMERCE_SCRAPE_TARGET_FAILURE.labels(
2022-10-31 02:44:02 +00:00
product_name=scrape_target.product_name,
target_name=scrape_target.target_name,
exception=e.__class__.__name__,
).inc()
2022-10-30 18:20:59 +00:00
time.sleep(args.interval * 60)
2022-10-30 03:35:36 +00:00
2024-10-10 02:37:34 +00:00
def parse_config(config_filename, user_agent):
2022-10-30 03:35:36 +00:00
result = []
2024-10-09 23:11:11 +00:00
logger.info('Loading configurations from %s', config_filename)
2022-10-30 03:35:36 +00:00
with open(config_filename, 'r') as f:
config = yaml.safe_load(f)
# iterate through products listed in the configuration
products = get_field_or_die(config, 'products')
for product in products:
product_name = get_field_or_die(product, 'name')
# iterate through the targets listed for each products in the configuration
targets = get_field_or_die(product, 'targets')
for target in targets:
# Create a ScrapeTarget for each targets to scrape
result.append(ScrapeTarget(
product_name=product_name,
url=get_field_or_die(target, 'url'),
selector=get_field_or_die(target, 'selector'),
2022-10-30 18:24:57 +00:00
target_name=target.get('name'),
2022-10-30 03:35:36 +00:00
regex=target.get('regex'),
2022-10-30 18:20:59 +00:00
parser=target.get('parser'),
2024-10-10 02:37:34 +00:00
headers = {
'User-Agent': user_agent,
},
2022-10-30 03:35:36 +00:00
))
return result
def get_field_or_die(mapping, field_name):
value = mapping.get(field_name)
if value is None:
raise Exception('Missing required field: %s' % field_name)
else:
return value
if __name__ == '__main__':
main()