From 7e351a79f6246fc35a9700db3f827542b5f908ef Mon Sep 17 00:00:00 2001 From: Julien Riou Date: Wed, 30 Dec 2020 15:25:36 +0100 Subject: [PATCH] Improve crawlers mapping --- crawlers.py | 8 ++++++++ main.py | 15 ++------------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/crawlers.py b/crawlers.py index a04021c..72f1c74 100644 --- a/crawlers.py +++ b/crawlers.py @@ -92,3 +92,11 @@ class AlternateCrawler(ProductCrawler): webpage = self.fetch(url=url) parser.feed(webpage) self.products += self.add_shop(parser.products) + + +CRAWLERS = { + 'topachat.com': TopAchatCrawler, + 'ldlc.com': LDLCCrawler, + 'materiel.net': MaterielNetCrawler, + 'alternate.be': AlternateCrawler +} diff --git a/main.py b/main.py index d15a0d9..cc2ed1a 100644 --- a/main.py +++ b/main.py @@ -4,8 +4,7 @@ import logging from concurrent import futures from config import extract_shops, read_config -from crawlers import (AlternateCrawler, LDLCCrawler, MaterielNetCrawler, - TopAchatCrawler) +from crawlers import CRAWLERS from db import create_tables, list_shops, upsert_products, upsert_shops from notifiers import TwitterNotifier @@ -34,17 +33,7 @@ def setup_logging(args): def crawl_shop(shop, urls): logger.debug(f'processing {shop}') - if shop.name == 'topachat.com': - crawler = TopAchatCrawler(shop=shop, urls=urls) - elif shop.name == 'ldlc.com': - crawler = LDLCCrawler(shop=shop, urls=urls) - elif shop.name == 'materiel.net': - crawler = MaterielNetCrawler(shop=shop, urls=urls) - elif shop.name == 'alternate.be': - crawler = AlternateCrawler(shop=shop, urls=urls) - else: - logger.warning(f'shop {shop} not supported') - return [] + crawler = CRAWLERS[shop.name](shop=shop, urls=urls) return crawler.products