Archived
1
0
Fork 0
This repository has been archived on 2024-12-18. You can view files and clone it, but cannot push or open issues or pull requests.
restockbot/main.py

82 lines
3.2 KiB
Python

#!/usr/bin/env python3
import argparse
import logging
from concurrent import futures
from config import extract_shops, read_config
from crawlers import (AlternateCrawler, LDLCCrawler, MaterielNetCrawler,
TopAchatCrawler)
from db import create_tables, list_shops, upsert_products, upsert_shops
from notifiers import TwitterNotifier
logger = logging.getLogger(__name__)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO,
help='print more output')
parser.add_argument('-d', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG,
default=logging.WARNING, help='print even more output')
parser.add_argument('-o', '--logfile', help='logging file location')
parser.add_argument('-c', '--config', default='config.json', help='configuration file location')
parser.add_argument('-N', '--disable-notifications', dest='disable_notifications', action='store_true',
help='Do not send notifications')
parser.add_argument('-t', '--workers', type=int, help='number of workers for crawling')
args = parser.parse_args()
return args
def setup_logging(args):
log_format = '%(asctime)s %(levelname)s: %(message)s' if args.logfile else '%(levelname)s: %(message)s'
logging.basicConfig(format=log_format, level=args.loglevel, filename=args.logfile)
def crawl_shop(shop, urls):
logger.debug(f'processing {shop}')
if shop.name == 'topachat.com':
crawler = TopAchatCrawler(shop=shop, urls=urls)
elif shop.name == 'ldlc.com':
crawler = LDLCCrawler(shop=shop, urls=urls)
elif shop.name == 'materiel.net':
crawler = MaterielNetCrawler(shop=shop, urls=urls)
elif shop.name == 'alternate.be':
crawler = AlternateCrawler(shop=shop, urls=urls)
else:
logger.warning(f'shop {shop} not supported')
return []
return crawler.products
def main():
args = parse_arguments()
setup_logging(args)
config = read_config(args.config)
create_tables()
shops = extract_shops(config['urls'])
upsert_shops(shops.keys())
if args.disable_notifications:
notifier = None
else:
notifier = TwitterNotifier(consumer_key=config['twitter']['consumer_key'],
consumer_secret=config['twitter']['consumer_secret'],
access_token=config['twitter']['access_token'],
access_token_secret=config['twitter']['access_token_secret'])
with futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
all_futures = []
for shop in list_shops():
urls = shops.get(shop.name)
if not urls:
logger.warning(f'cannot find urls for shop {shop} in the configuration file')
continue
all_futures.append(executor.submit(crawl_shop, shop, urls))
for future in futures.as_completed(all_futures):
products = future.result()
upsert_products(products=products, notifier=notifier)
if __name__ == '__main__':
main()