72 lines
2.8 KiB
Python
72 lines
2.8 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import argparse
|
||
|
import logging
|
||
|
|
||
|
from config import extract_shops, read_config
|
||
|
from crawlers import (AlternateCrawler, LDLCCrawler, MaterielNetCrawler,
|
||
|
TopAchatCrawler)
|
||
|
from db import create_tables, list_shops, upsert_products, upsert_shops
|
||
|
from notifiers import TwitterNotifier
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
def parse_arguments():
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO,
|
||
|
help='print more output')
|
||
|
parser.add_argument('-d', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG,
|
||
|
default=logging.WARNING, help='print even more output')
|
||
|
parser.add_argument('-o', '--logfile', help='logging file location')
|
||
|
parser.add_argument('-c', '--config', default='config.json', help='configuration file location')
|
||
|
parser.add_argument('-N', '--disable-notifications', dest='disable_notifications', action='store_true',
|
||
|
help='Do not send notifications')
|
||
|
args = parser.parse_args()
|
||
|
return args
|
||
|
|
||
|
|
||
|
def setup_logging(args):
|
||
|
log_format = '%(asctime)s %(levelname)s: %(message)s' if args.logfile else '%(levelname)s: %(message)s'
|
||
|
logging.basicConfig(format=log_format, level=args.loglevel, filename=args.logfile)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
args = parse_arguments()
|
||
|
setup_logging(args)
|
||
|
config = read_config(args.config)
|
||
|
create_tables()
|
||
|
|
||
|
shops = extract_shops(config['urls'])
|
||
|
upsert_shops(shops.keys())
|
||
|
|
||
|
if args.disable_notifications:
|
||
|
notifier = None
|
||
|
else:
|
||
|
notifier = TwitterNotifier(consumer_key=config['twitter']['consumer_key'],
|
||
|
consumer_secret=config['twitter']['consumer_secret'],
|
||
|
access_token=config['twitter']['access_token'],
|
||
|
access_token_secret=config['twitter']['access_token_secret'])
|
||
|
|
||
|
for shop in list_shops():
|
||
|
logger.debug(f'processing {shop}')
|
||
|
urls = shops.get(shop.name)
|
||
|
if not urls:
|
||
|
logger.warning(f'cannot find urls for shop {shop} in the configuration file')
|
||
|
continue
|
||
|
if shop.name == 'topachat.com':
|
||
|
crawler = TopAchatCrawler(shop=shop, urls=urls)
|
||
|
elif shop.name == 'ldlc.com':
|
||
|
crawler = LDLCCrawler(shop=shop, urls=urls)
|
||
|
elif shop.name == 'materiel.net':
|
||
|
crawler = MaterielNetCrawler(shop=shop, urls=urls)
|
||
|
elif shop.name == 'alternate.be':
|
||
|
crawler = AlternateCrawler(shop=shop, urls=urls)
|
||
|
else:
|
||
|
logger.warning(f'shop {shop} not supported')
|
||
|
continue
|
||
|
upsert_products(products=crawler.products, notifier=notifier)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|