Archived
1
0
Fork 0

Improve crawlers mapping

This commit is contained in:
Julien Riou 2020-12-30 15:25:36 +01:00
parent 39eaf21c0a
commit 7e351a79f6
No known key found for this signature in database
GPG key ID: FF42D23B580C89F7
2 changed files with 10 additions and 13 deletions

View file

@ -92,3 +92,11 @@ class AlternateCrawler(ProductCrawler):
webpage = self.fetch(url=url)
parser.feed(webpage)
self.products += self.add_shop(parser.products)
CRAWLERS = {
'topachat.com': TopAchatCrawler,
'ldlc.com': LDLCCrawler,
'materiel.net': MaterielNetCrawler,
'alternate.be': AlternateCrawler
}

15
main.py
View file

@ -4,8 +4,7 @@ import logging
from concurrent import futures
from config import extract_shops, read_config
from crawlers import (AlternateCrawler, LDLCCrawler, MaterielNetCrawler,
TopAchatCrawler)
from crawlers import CRAWLERS
from db import create_tables, list_shops, upsert_products, upsert_shops
from notifiers import TwitterNotifier
@ -34,17 +33,7 @@ def setup_logging(args):
def crawl_shop(shop, urls):
logger.debug(f'processing {shop}')
if shop.name == 'topachat.com':
crawler = TopAchatCrawler(shop=shop, urls=urls)
elif shop.name == 'ldlc.com':
crawler = LDLCCrawler(shop=shop, urls=urls)
elif shop.name == 'materiel.net':
crawler = MaterielNetCrawler(shop=shop, urls=urls)
elif shop.name == 'alternate.be':
crawler = AlternateCrawler(shop=shop, urls=urls)
else:
logger.warning(f'shop {shop} not supported')
return []
crawler = CRAWLERS[shop.name](shop=shop, urls=urls)
return crawler.products