Archived
1
0
Fork 0
This repository has been archived on 2024-12-18. You can view files and clone it, but cannot push or open issues or pull requests.
restockbot/main.py

72 lines
2.7 KiB
Python
Raw Normal View History

2020-12-27 18:27:07 +01:00
#!/usr/bin/env python3
import argparse
import logging
2020-12-30 15:05:28 +01:00
from concurrent import futures
2020-12-27 18:27:07 +01:00
from config import extract_shops, read_config
2020-12-30 15:25:36 +01:00
from crawlers import CRAWLERS
2020-12-27 18:27:07 +01:00
from db import create_tables, list_shops, upsert_products, upsert_shops
from notifiers import TwitterNotifier
logger = logging.getLogger(__name__)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO,
help='print more output')
parser.add_argument('-d', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG,
default=logging.WARNING, help='print even more output')
parser.add_argument('-o', '--logfile', help='logging file location')
parser.add_argument('-c', '--config', default='config.json', help='configuration file location')
parser.add_argument('-N', '--disable-notifications', dest='disable_notifications', action='store_true',
2020-12-30 15:29:55 +01:00
help='do not send notifications')
2020-12-30 15:05:28 +01:00
parser.add_argument('-t', '--workers', type=int, help='number of workers for crawling')
2020-12-27 18:27:07 +01:00
args = parser.parse_args()
return args
def setup_logging(args):
log_format = '%(asctime)s %(levelname)s: %(message)s' if args.logfile else '%(levelname)s: %(message)s'
logging.basicConfig(format=log_format, level=args.loglevel, filename=args.logfile)
2020-12-30 15:05:28 +01:00
def crawl_shop(shop, urls):
logger.debug(f'processing {shop}')
2020-12-30 15:25:36 +01:00
crawler = CRAWLERS[shop.name](shop=shop, urls=urls)
2020-12-30 15:05:28 +01:00
return crawler.products
2020-12-27 18:27:07 +01:00
def main():
args = parse_arguments()
setup_logging(args)
config = read_config(args.config)
create_tables()
shops = extract_shops(config['urls'])
upsert_shops(shops.keys())
if args.disable_notifications:
notifier = None
else:
notifier = TwitterNotifier(consumer_key=config['twitter']['consumer_key'],
consumer_secret=config['twitter']['consumer_secret'],
access_token=config['twitter']['access_token'],
access_token_secret=config['twitter']['access_token_secret'])
2020-12-30 15:05:28 +01:00
with futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
all_futures = []
for shop in list_shops():
urls = shops.get(shop.name)
if not urls:
logger.warning(f'cannot find urls for shop {shop} in the configuration file')
continue
all_futures.append(executor.submit(crawl_shop, shop, urls))
for future in futures.as_completed(all_futures):
products = future.result()
upsert_products(products=products, notifier=notifier)
2020-12-27 18:27:07 +01:00
if __name__ == '__main__':
main()