diff --git a/.gitignore b/.gitignore index 4cb714b..c660b27 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -__pycache__ -config.json -*.html -TODO.txt -restock.db -geckodriver.log +bin/ +restockbot.db +restockbot.json +restockbot.log +restockbot.pid +ferret.log +shop.fql diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e71337..252873b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,29 +1,8 @@ + --- repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: master - hooks: - - id: check-executables-have-shebangs - - id: check-merge-conflict - - id: double-quote-string-fixer - - id: end-of-file-fixer - - id: fix-encoding-pragma - args: ['--remove'] - - id: requirements-txt-fixer - - id: trailing-whitespace - - id: check-json - - repo: https://gitlab.com/pycqa/flake8 - rev: master - hooks: - - id: flake8 - args: ['--max-line-length=120'] - - repo: https://github.com/FalconSocial/pre-commit-python-sorter - rev: master - hooks: - - id: python-import-sorter - args: ['--silent-overwrite'] - - repo: https://github.com/chewse/pre-commit-mirrors-pydocstyle - rev: master - hooks: - - id: pydocstyle - args: ['--config=.pydocstyle', '--match="(?!test_).*\.py"'] +- repo: https://github.com/dnephin/pre-commit-golang + rev: v0.3.5 + hooks: + - id: go-fmt + - id: go-lint \ No newline at end of file diff --git a/.pydocstyle b/.pydocstyle deleted file mode 100644 index aef2483..0000000 --- a/.pydocstyle +++ /dev/null @@ -1,2 +0,0 @@ -[pydocstyle] -ignore = D100,D104,D400,D203,D204,D101,D213,D202 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1abd79e --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +APPVERSION := $(shell cat ./VERSION) +GOVERSION := $(shell go version | awk '{print $$3}') +GITCOMMIT := $(shell git log -1 --oneline | awk '{print $$1}') +LDFLAGS = -X main.AppVersion=${APPVERSION} -X main.GoVersion=${GOVERSION} -X main.GitCommit=${GITCOMMIT} +PLATFORM := $(shell uname -s) +ARCH := $(shell uname -m) + +.PHONY: clean + +build: + go build -ldflags "${LDFLAGS}" -o bin/restockbot *.go + +release: + go build -ldflags "${LDFLAGS}" -o bin/restockbot-${APPVERSION}-${PLATFORM}-${ARCH} *.go + +clean: + rm -rf bin \ No newline at end of file diff --git a/README.md b/README.md index f6fac2a..dca53a4 100644 --- a/README.md +++ b/README.md @@ -1,88 +1,168 @@ -Year 2020 has been quite hard for hardware supply. Graphics Cards are out of stock everywhere. Nobody can grab the -new generation (AMD RX 6000 series, NVIDIA GeForce RTX 3000 series). Even older generations are hard to find. -**GraphicRestock** is a bot that crawl retailers websites and notify when a product is available. +# RestockBot -# Setup +Year 2020 has been quite hard for hardware supply. Graphics cards are out of stock everywhere. Nobody can grab the new generation (AMD RX 6000 series, NVIDIA GeForce RTX 3000 series). Even older generations are hard to find. `RestockBot` is a bot that crawl retailers websites and notify when a product is available. -Based on Debian 10: +## Requirements + +### Headless browser + +Use Docker: ``` -apt install python3-selenium python3-sqlalchemy python3-tweepy python3-bs4 firefox-esr -curl -L -s https://github.com/mozilla/geckodriver/releases/download/v0.28.0/geckodriver-v0.28.0-linux64.tar.gz | tar xvpzf - -C /usr/local/bin/ -chown root:root /usr/local/bin/geckodriver -chmod +x /usr/local/bin/geckodriver +docker run --name chromium --rm -d -p 9222:9222 montferret/chromium ``` -# Configure +Or get inspired by the [source code](https://github.com/MontFerret/chromium) to run it on your own. -Configuration file example can be found [here](config.json.example). +### Twitter (optional) + +Follow [this procedure](https://github.com/jouir/twitter-login) to generate all the required settings: +* `consumer_key` +* `consumer_secret` +* `access_token` +* `access_token_secret` + +## Installation + +Download the latest [release](https://github.com/jouir/restockbot/releases). + +Ensure checksums are identical. + +Then execute the binary: + +``` +./restockbot -version +./restockbot -help +``` + +## Compilation + +Clone the repository: +``` +git clone https://github.com/jouir/restockbot.git +``` + +Build the `restockbot` binary: +``` +make build +ls -l bin/restockbot +``` + +Build with the architecture in the binary name: + +``` +make release +``` + +Eventually remove produced binaries with: + +``` +make clean +``` + +## Configuration + +Default file is `restockbot.json` in the current directory. The file name can be passed with the `-config` argument. Options: -* **twitter.consumer_key**: key of your Twitter application -* **twitter.consumer_secret**: secret of your Twitter application -* **twitter.access_token**: authentication token generated by [twitter_auth.py](twitter_auth.py) -* **twitter.access_token_secret**: authentication token secret generated by [twitter_auth.py](twitter_auth.py) -* **urls**: list of retailers web pages (they need to respect crawlers' format) -* **executable_path** (optional): path to selenium driver (firefox/gecko browser) +* `urls`: list of retailers web pages +* `twitter` (optional): + * `consumer_key`: API key of your Twitter application + * `consumer_secret`: API secret of your Twitter application + * `access_token`: authentication token generated for your Twitter account + * `access_token_secret`: authentication token secret generated for your Twitter account + * `hashtags`: map of key/values used to append hashtags to each tweet. Key is the pattern to match in the product name, value is the string to append to the tweet. For example, `{"twitter": {"hashtags": {"rtx 3090": "#nvidia #rtx3090"}}}` will detect `rtx 3090` to append `#nvidia #rtx3090` at the end of the tweet. +* `include_regex` (optional): include products with a name matching this regexp +* `exclude_regex` (optional): exclude products with a name matching this regexp -# Twitter authentication +## How to contribute -Create a configuration file with **twitter.consumer_key** and **twitter.consumer_secret** parameters. - -Then authenticate: +Lint the code with pre-commit: ``` -python3 twitter_auth.py -``` - -You will have to open the URL and authenticate: - -``` -Please go to https://api.twitter.com/oauth/authorize?oauth_token=**** -``` -Click on **Authorize app**. A verifier code will be shown. Go back to your console and enter the code. - -``` -Verifier:******* -``` - -Tokens will be created: - -``` -access_token = ***** -access_token_secret = **** -``` - -Finally, write them to configuration file in **twitter.access_token** and **twitter.access_token_secret** parameters. - - -# Usage - -``` -python3 main.py --help -``` - -# How to contribute - -First things first, check issues to ensure the feature or bug you are facing is not already declared. - -Pull requests are highly appreciated. - -Please lint your code: - -``` -docker run -it -v $(pwd):/mnt/ --rm debian:10 bash -apt-get update && apt-get upgrade -y && apt-get install -y python3-pip git +docker run -it -v $(pwd):/mnt/ --rm golang:latest bash +go get -u golang.org/x/lint/golint +apt-get update && apt-get upgrade -y && apt-get install -y git python3-pip pip3 install pre-commit cd /mnt pre-commit run --all-files ``` -Happy coding! +## How to parse a shop +### Create the Ferret query -# Disclaimer +`RestockBot` uses [Ferret](https://github.com/MontFerret/ferret) and its FQL (Ferret Query Language) to parse websites. The full documentation is available [here](https://www.montferret.dev/docs/introduction/). Once installed, this library can be used as a CLI command or embedded in the application. To create the query, we can use the CLI for fast iterations, then we'll integrate the query in `RestockBot` later. -Crawling a website should be used with caution. Please check with retailers if the bot respects the terms of use for -their websites. Authors of the bot are not responsible of the bot usage. +``` +vim shop.fql +ferret --cdp http://127.0.0.1:9222 -time shop.fql +``` + +The query must return a list of products in JSON format with the following elements: +* `name`: string +* `url`: string +* `price`: float +* `price_currency`: string +* `available`: boolean + +Example: + +```json +[ + { + "available": false, + "name": "Zotac GeForce RTX 3070 AMP Holo", + "price": 799.99, + "price_currency": "EUR", + "url": "https://www.topachat.com/pages/detail2_cat_est_micro_puis_rubrique_est_wgfx_pcie_puis_ref_est_in20007322.html" + }, + { + "available": false, + "name": "Asus GeForce RTX 3070 DUAL 8G", + "price": 739.99, + "price_currency": "EUR", + "url": "https://www.topachat.com/pages/detail2_cat_est_micro_puis_rubrique_est_wgfx_pcie_puis_ref_est_in20005540.html" + }, + { + "available": false, + "name": "Palit GeForce RTX 3070 GamingPro OC", + "price": 819.99, + "price_currency": "EUR", + "url": "https://www.topachat.com/pages/detail2_cat_est_micro_puis_rubrique_est_wgfx_pcie_puis_ref_est_in20005819.html" + } +] +``` + +`RestockBot` will convert this JSON to a list of `Product`. + +### Embed the query + +Shops are configured as a list of URLs: + +```json +{ + "urls": [ + "https://www.topachat.com/pages/produits_cat_est_micro_puis_rubrique_est_wgfx_pcie_puis_f_est_58-11447,11445,11446,11559,11558.html", + "https://www.ldlc.com/informatique/pieces-informatique/carte-graphique-interne/c4684/+fv121-19183,19184,19185,19339,19340.html", + "https://www.materiel.net/carte-graphique/l426/+fv121-19183,19184,19185,19339,19340/" + ] +} +``` + +The `Parse` function ([parser.go](parser.go)) will be called. In this example, the following **shop names** will be deduced: `topachat.com`, `ldlc.com` and `materiel.net`. + +Each shop should implement a function to create a ferret query based on an URL: +* `func createQueryForLDLC(url string) string` +* `func createQueryForMaterielNet(url string) string` +* `func createQueryForTopachat(url string) string` +* ... + +This function should be added to the switch of the `createQuery` function ([parser.go](parser.go)). + +Products will then be parsed. + +## Disclaimer + +Crawling a website should be used with caution. Please check with retailers if the bot respects the terms of use for their websites. Authors of the bot are not responsible of the bot usage. \ No newline at end of file diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..341cf11 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.2.0 \ No newline at end of file diff --git a/config.go b/config.go new file mode 100644 index 0000000..0fa79a8 --- /dev/null +++ b/config.go @@ -0,0 +1,53 @@ +package main + +import ( + "encoding/json" + "io/ioutil" + "path/filepath" +) + +// Config to store JSON configuration +type Config struct { + TwitterConfig `json:"twitter"` + URLs []string `json:"urls"` + IncludeRegex string `json:"include_regex"` + ExcludeRegex string `json:"exclude_regex"` +} + +// TwitterConfig to store Twitter API secrets +type TwitterConfig struct { + ConsumerKey string `json:"consumer_key"` + ConsumerSecret string `json:"consumer_secret"` + AccessToken string `json:"access_token"` + AccessTokenSecret string `json:"access_token_secret"` + Hashtags map[string]string `json:"hashtags"` +} + +// NewConfig creates a Config struct +func NewConfig() *Config { + return &Config{} +} + +// Read Config from configuration file +func (c *Config) Read(file string) error { + file, err := filepath.Abs(file) + if err != nil { + return err + } + + jsonFile, err := ioutil.ReadFile(file) + if err != nil { + return err + } + + err = json.Unmarshal(jsonFile, &c) + if err != nil { + return err + } + return nil +} + +// HasTwitter returns true when Twitter has been configured +func (c *Config) HasTwitter() bool { + return (c.TwitterConfig.AccessToken != "" && c.TwitterConfig.AccessTokenSecret != "" && c.TwitterConfig.ConsumerKey != "" && c.TwitterConfig.ConsumerSecret != "") +} diff --git a/config.json.example b/config.json.example deleted file mode 100644 index 2085547..0000000 --- a/config.json.example +++ /dev/null @@ -1,18 +0,0 @@ -{ - "twitter": { - "consumer_key": "***", - "consumer_secret": "***", - "access_token": "***", - "access_token_secret": "***" - }, - "urls": [ - "https://www.topachat.com/pages/produits_cat_est_micro_puis_rubrique_est_wgfx_pcie_puis_f_est_58-11447,11445,11446,11559,11558.html", - "https://www.ldlc.com/informatique/pieces-informatique/carte-graphique-interne/c4684/+fv121-19183,19184,19185,19339,19340.html", - "https://www.materiel.net/carte-graphique/l426/+fv121-19183,19184,19185,19339,19340/", - "https://www.alternate.be/Hardware/Grafische-kaarten/NVIDIA/RTX-3060-Ti", - "https://www.alternate.be/Hardware/Grafische-kaarten/NVIDIA/RTX-3070", - "https://www.alternate.be/Hardware/Grafische-kaarten/NVIDIA/RTX-3080", - "https://www.alternate.be/Hardware/Grafische-kaarten/NVIDIA/RTX-3090" - ], - "executable_path": "/usr/bin/geckodriver" -} diff --git a/config.py b/config.py deleted file mode 100644 index d354f41..0000000 --- a/config.py +++ /dev/null @@ -1,24 +0,0 @@ -import json - -from utils import parse_base_url - - -def read_config(filename): - with open(filename, 'r') as fd: - return json.load(fd) - - -def extract_shops(urls): - """ - Parse shop name and return list of addresses for each shop - Example: {"toto.com/first", "toto.com/second", "tata.com/first"} - -> {"toto.com": ["toto.com/first", "toto.com/second"], "tata.com": ["tata.com/first"]} - """ - result = {} - for url in urls: - base_url = parse_base_url(url, include_scheme=False) - if base_url not in result: - result[base_url] = [url] - else: - result[base_url].append(url) - return result diff --git a/crawlers.py b/crawlers.py deleted file mode 100644 index 84cd0be..0000000 --- a/crawlers.py +++ /dev/null @@ -1,113 +0,0 @@ -import logging - -from parsers import (AlternateParser, LDLCParser, MaterielNetParser, - MineShopParser, TopAchatParser) -from selenium import webdriver -from selenium.common.exceptions import TimeoutException -from selenium.webdriver.common.by import By -from selenium.webdriver.firefox.options import Options -from selenium.webdriver.support import expected_conditions -from selenium.webdriver.support.ui import WebDriverWait - -logger = logging.getLogger(__name__) - - -class ProductCrawler(object): - - TIMEOUT = 3 - - def __init__(self, shop): - options = Options() - options.headless = True - self._driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=options) - self._shop = shop - self.products = [] - - def __del__(self): - self._driver.quit() - - def fetch(self, url, wait_for=None): - self._driver.get(url) - if wait_for: - try: - condition = expected_conditions.presence_of_element_located((By.CLASS_NAME, wait_for)) - WebDriverWait(self._driver, self.TIMEOUT).until(condition) - except TimeoutException: - logger.warning(f'timeout waiting for element "{wait_for}" at {url}') - logger.info(f'url {url} fetched') - webpage = self._driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML") - return webpage - - def add_shop(self, products): - for product in products: - product.shop = self._shop - return products - - -class TopAchatCrawler(ProductCrawler): - def __init__(self, shop, urls): - super().__init__(shop) - parser = TopAchatParser() - for url in urls: - webpage = self.fetch(url=url) - parser.feed(webpage) - self.products += self.add_shop(parser.products) - - -class LDLCCrawler(ProductCrawler): - def __init__(self, shop, urls): - super().__init__(shop) - parser = LDLCParser() - for url in urls: - next_page = url - previous_page = None - while next_page != previous_page: - webpage = self.fetch(url=next_page) - parser.feed(webpage) - previous_page = next_page - next_page = parser.next_page - self.products += self.add_shop(parser.products) - - -class MaterielNetCrawler(ProductCrawler): - def __init__(self, shop, urls): - super().__init__(shop) - parser = MaterielNetParser() - for url in urls: - next_page = url - previous_page = None - while next_page != previous_page: - webpage = self.fetch(url=next_page, wait_for='o-product__price') - parser.feed(webpage) - previous_page = next_page - next_page = parser.next_page - self.products += self.add_shop(parser.products) - - -class AlternateCrawler(ProductCrawler): - def __init__(self, shop, urls): - super().__init__(shop) - parser = AlternateParser() - for url in urls: - webpage = self.fetch(url=url) - parser.feed(webpage) - self.products += self.add_shop(parser.products) - - -class MineShopCrawler(ProductCrawler): - def __init__(self, shop, urls): - super().__init__(shop) - parser = MineShopParser() - for url in urls: - webpage = self.fetch(url=url) - parser.feed(webpage) - self.products += self.add_shop(parser.products) - - -CRAWLERS = { - 'topachat.com': TopAchatCrawler, - 'ldlc.com': LDLCCrawler, - 'materiel.net': MaterielNetCrawler, - 'alternate.be': AlternateCrawler, - 'mineshop.eu': MineShopCrawler -} diff --git a/db.py b/db.py deleted file mode 100644 index 4610c65..0000000 --- a/db.py +++ /dev/null @@ -1,119 +0,0 @@ -import logging -from datetime import datetime - -from sqlalchemy import (Boolean, Column, DateTime, Float, ForeignKey, Integer, - String, create_engine, exc) -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship, sessionmaker - -logger = logging.getLogger(__name__) - - -Base = declarative_base() -engine = create_engine('sqlite:///restock.db') -Session = sessionmaker(bind=engine, autoflush=False) - - -class Shop(Base): - __tablename__ = 'shop' - id = Column(Integer, primary_key=True) - name = Column(String, unique=True, nullable=False) - - def __repr__(self): - return f'Shop<{self.name}>' - - def __ne__(self, shop): - return self.name != shop.name - - -class Product(Base): - __tablename__ = 'product' - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - url = Column(String, nullable=False, unique=True) - price = Column(Float, nullable=False) - price_currency = Column(String, nullable=False) - available = Column(Boolean, nullable=False) - updated_at = Column(DateTime) - tweet_id = Column(Integer, unique=True) - shop_id = Column(Integer, ForeignKey('shop.id'), nullable=False) - shop = relationship('Shop', foreign_keys=[shop_id]) - - def __repr__(self): - return f'Product<{self.name}@{self.shop.name}>' - - def __ne__(self, product): - return self.name != product.name or self.price != product.price or self.available != product.available \ - or self.url != product.url or self.shop != product.shop - - def ok(self): - return self.name and self.url and self.price and self.price_currency and self.available is not None - - -def create_tables(): - Base.metadata.create_all(engine) - logger.debug('tables created') - - -def list_shops(): - session = Session() - shops = session.query(Shop).all() - session.close() - return shops - - -def upsert_shops(names): - session = Session() - try: - for name in names: - shop = Shop(name=name) - query = session.query(Shop).filter(Shop.name == shop.name) - shop_database = query.first() - if not shop_database: - logger.info(f'{shop} added') - session.add(shop) - session.commit() - logger.debug('transaction committed') - except exc.SQLAlchemyError: - logger.exception('cannot commit transaction') - finally: - session.close() - - -def upsert_products(products, notifier=None): - session = Session() - try: - for product in products: - query = session.query(Product).filter(Product.name == product.name, Product.shop == product.shop) - product_database = query.first() - now = datetime.utcnow() - tweet_id = None - if not product_database: - # product is new and available so we need to create an initial thread - if notifier and product.available: - product.tweet_id = notifier.create_thread(product).id - product.updated_at = now - session.add(product) - logger.info(f'{product} added') - elif product != product_database: - # notifications - if notifier and product.available != product_database.available: - if product.available and not product_database.tweet_id: - # product is now available so we need to create an initial tweet (or thread) - tweet = notifier.create_thread(product) - if tweet: - tweet_id = tweet.id - elif not product.available and product_database.available and product_database.tweet_id: - # product is out of stock so we need to reply to previous tweet to close the thread - notifier.close_thread(tweet_id=product_database.tweet_id, - duration=now-product_database.updated_at) - query.update({Product.price: product.price, Product.price_currency: product.price_currency, - Product.available: product.available, Product.url: product.url, - Product.tweet_id: tweet_id, Product.updated_at: now}) - logger.info(f'{product} updated') - session.commit() - logger.debug('transaction committed') - except exc.SQLAlchemyError: - logger.exception('cannot commit transaction') - finally: - session.close() diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..f99a0da --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module github.com/jouir/restockbot + +go 1.16 + +require ( + github.com/MontFerret/ferret v0.13.0 + github.com/dghubble/go-twitter v0.0.0-20201011215211-4b180d0cc78d + github.com/dghubble/oauth1 v0.7.0 + github.com/sirupsen/logrus v1.8.0 + gorm.io/driver/sqlite v1.1.4 + gorm.io/gorm v1.20.12 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1ad0899 --- /dev/null +++ b/go.sum @@ -0,0 +1,148 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= +github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= +github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= +github.com/MontFerret/ferret v0.13.0 h1:Le/8K3Qr+YO2ZVwgGUtbEzAUm5iE7tIjQSX1QdHV8d8= +github.com/MontFerret/ferret v0.13.0/go.mod h1:vk1PI8xyeudPPIXu6bkgUtTaYTt/7oJe8XWK4YBDOz0= +github.com/PuerkitoBio/goquery v1.6.0 h1:j7taAbelrdcsOlGeMenZxc2AWXD5fieT1/znArdnx94= +github.com/PuerkitoBio/goquery v1.6.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= +github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= +github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= +github.com/antchfx/xpath v1.1.11 h1:WOFtK8TVAjLm3lbgqeP0arlHpvCEeTANeWZ/csPpJkQ= +github.com/antchfx/xpath v1.1.11/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/antlr/antlr4 v0.0.0-20200417160354-8c50731894e0 h1:j7MyDjg6pb7A2ziow17FDZ2Oj5vGnJsLyDmjpN4Jkcg= +github.com/antlr/antlr4 v0.0.0-20200417160354-8c50731894e0/go.mod h1:T7PbCXFs94rrTttyxjbyT5+/1V8T2TYDejxUfHJjw1Y= +github.com/cenkalti/backoff v2.1.1+incompatible h1:tKJnvO2kl0zmb/jA5UKAt4VoEVw1qxKWjE/Bpp46npY= +github.com/cenkalti/backoff v2.1.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/corpix/uarand v0.1.1 h1:RMr1TWc9F4n5jiPDzFHtmaUXLKLNUFK0SgCLo4BhX/U= +github.com/corpix/uarand v0.1.1/go.mod h1:SFKZvkcRoLqVRFZ4u25xPmp6m9ktANfbpXZ7SJ0/FNU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/derekparker/trie v0.0.0-20200317170641-1fdf38b7b0e9/go.mod h1:D6ICZm05D9VN1n/8iOtBxLpXtoGp6HDFUJ1RNVieOSE= +github.com/dghubble/go-twitter v0.0.0-20201011215211-4b180d0cc78d h1:sBKr0A8iQ1qAOozedZ8Aox+Jpv+TeP1Qv7dcQyW8V+M= +github.com/dghubble/go-twitter v0.0.0-20201011215211-4b180d0cc78d/go.mod h1:xfg4uS5LEzOj8PgZV7SQYRHbG7jPUnelEiaAVJxmhJE= +github.com/dghubble/oauth1 v0.7.0 h1:AlpZdbRiJM4XGHIlQ8BuJ/wlpGwFEJNnB4Mc+78tA/w= +github.com/dghubble/oauth1 v0.7.0/go.mod h1:8pFdfPkv/jr8mkChVbNVuJ0suiHe278BtWI4Tk1ujxk= +github.com/dghubble/sling v1.3.0 h1:pZHjCJq4zJvc6qVQ5wN1jo5oNZlNE0+8T/h0XeXBUKU= +github.com/dghubble/sling v1.3.0/go.mod h1:XXShWaBWKzNLhu2OxikSNFrlsvowtz4kyRuXUG7oQKY= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/google/go-cmp v0.4.1 h1:/exdXoGamhu5ONeUJH0deniYLWYvQwW66yvlfiiKTu0= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= +github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.1 h1:g39TucaRWyV3dwDO++eEc6qf8TVIQ/Da48WmqjZ3i7E= +github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mafredri/cdp v0.30.0 h1:Lvcwjajq6wB6Uk8dYeCLrF26LG85rUdpMxgrwdEvU0o= +github.com/mafredri/cdp v0.30.0/go.mod h1:71D84qPmWUvBWYj24Zp+U69mrUof4o8qL2X1fQJ/lHc= +github.com/mafredri/go-lint v0.0.0-20180911205320-920981dfc79e/go.mod h1:k/zdyxI3q6dup24o8xpYjJKTCf2F7rfxLp6w/efTiWs= +github.com/magefile/mage v1.10.0 h1:3HiXzCUY12kh9bIuyXShaVe529fJfyqoVM42o/uom2g= +github.com/magefile/mage v1.10.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= +github.com/mattn/go-sqlite3 v1.14.5 h1:1IdxlwTNazvbKJQSxoJ5/9ECbEeaTTyeU7sEAZ5KKTQ= +github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/natefinch/lumberjack v2.0.0+incompatible/go.mod h1:Wi9p2TTF5DG5oU+6YfsmYQpsTIOm0B1VNzQg9Mw6nPk= +github.com/ngdinhtoan/glide-cleanup v0.2.0/go.mod h1:UQzsmiDOb8YV3nOsCxK/c9zPpCZVNoHScRE3EO9pVMM= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= +github.com/rs/zerolog v1.19.0 h1:hYz4ZVdUgjXTBUmrkrw55j1nHx68LfOKIQk5IYtyScg= +github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJRjo= +github.com/segmentio/encoding v0.1.10 h1:0b8dva47cSuNQR5ZcU3d0pfi9EnPpSK6q7y5ZGEW36Q= +github.com/segmentio/encoding v0.1.10/go.mod h1:RWhr02uzMB9gQC1x+MfYxedtmBibb9cZ6Vv9VxRSSbw= +github.com/sethgrid/pester v1.1.0 h1:IyEAVvwSUPjs2ACFZkBe5N59BBUpSIkQ71Hr6cM5A+w= +github.com/sethgrid/pester v1.1.0/go.mod h1:Ad7IjTpvzZO8Fl0vh9AzQ+j/jYZfyp2diGwI8m5q+ns= +github.com/sirupsen/logrus v1.8.0 h1:nfhvjKcUMhBMVqbKHJlk5RPrrfYr/NMo3692g0dwfWU= +github.com/sirupsen/logrus v1.8.0/go.mod h1:4GuYW9TZmE769R5STWrRakJc4UqQ3+QQ95fyz7ENv1A= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/wI2L/jettison v0.7.1 h1:XNq/WvSOAiJhFww9F5JZZcBZtKFL2Y/9WHHEHLDq9TE= +github.com/wI2L/jettison v0.7.1/go.mod h1:dj49nOP41M7x6Jql62BqqF/+nW+XJgBaWzJR0hd6M84= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd h1:QPwSajcTUrFriMF1nJ3XzgoqakqQEsnZf9LdXdi2nkI= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a h1:WXEvlFVvvGxCJLG6REjsT03iWnKLEWinaScsxF2Vm2o= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8= +golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190828213141-aed303cbaa74/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200601175630-2caf76543d99/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gorm.io/driver/sqlite v1.1.4 h1:PDzwYE+sI6De2+mxAneV9Xs11+ZyKV6oxD3wDGkaNvM= +gorm.io/driver/sqlite v1.1.4/go.mod h1:mJCeTFr7+crvS+TRnWc5Z3UvwxUN1BGBLMrf5LA9DYw= +gorm.io/gorm v1.20.7/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw= +gorm.io/gorm v1.20.12 h1:ebZ5KrSHzet+sqOCVdH9mTjW91L298nX3v5lVxAzSUY= +gorm.io/gorm v1.20.12/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw= diff --git a/main.go b/main.go new file mode 100644 index 0000000..24cdd42 --- /dev/null +++ b/main.go @@ -0,0 +1,277 @@ +package main + +import ( + "flag" + "fmt" + "math/rand" + "sync" + "time" + + "os" + + log "github.com/sirupsen/logrus" + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +// initialize logging +func init() { + log.SetFormatter(&log.TextFormatter{ + DisableColors: true, + }) + log.SetOutput(os.Stdout) +} + +// AppName to store application name +var AppName string = "restockbot" + +// AppVersion to set version at compilation time +var AppVersion string = "9999" + +// GitCommit to set git commit at compilation time (can be empty) +var GitCommit string + +// GoVersion to set Go version at compilation time +var GoVersion string + +func main() { + + rand.Seed(time.Now().UnixNano()) + + var err error + config := NewConfig() + + version := flag.Bool("version", false, "Print version and exit") + quiet := flag.Bool("quiet", false, "Log errors only") + verbose := flag.Bool("verbose", false, "Print more logs") + debug := flag.Bool("debug", false, "Print even more logs") + databaseFileName := flag.String("database", AppName+".db", "Database file name") + configFileName := flag.String("config", AppName+".json", "Configuration file name") + logFileName := flag.String("log-file", "", "Log file name") + disableNotifications := flag.Bool("disable-notifications", false, "Do not send notifications") + workers := flag.Int("workers", 1, "number of workers for parsing shops") + pidFile := flag.String("pid-file", "", "write process ID to this file to disable concurrent executions") + pidWaitTimeout := flag.Int("pid-wait-timeout", 0, "seconds to wait before giving up when another instance is running") + + flag.Parse() + + if *version { + showVersion() + return + } + + log.SetLevel(log.WarnLevel) + if *debug { + log.SetLevel(log.DebugLevel) + } + if *verbose { + log.SetLevel(log.InfoLevel) + } + if *quiet { + log.SetLevel(log.ErrorLevel) + } + + if *logFileName != "" { + fd, err := os.OpenFile(*logFileName, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + fmt.Printf("cannot open file for logging: %s\n", err) + } + log.SetOutput(fd) + } + + if *configFileName != "" { + err = config.Read(*configFileName) + if err != nil { + log.Fatalf("cannot parse configuration file: %s", err) + } + } + log.Debugf("configuration file %s parsed", *configFileName) + + // handle PID file + if *pidFile != "" { + if err := waitPid(*pidFile, *pidWaitTimeout); err != nil { + log.Warnf("%s", err) + return + } + if err := writePid(*pidFile); err != nil { + log.Fatalf("cannot write PID file: %s", err) + } + defer removePid(*pidFile) + } + + // create parser + parser, err := NewParser(config.IncludeRegex, config.ExcludeRegex) + if err != nil { + log.Fatalf("could not create parser: %s", err) + } + + // connect to the database + db, err := gorm.Open(sqlite.Open(*databaseFileName), &gorm.Config{}) + if err != nil { + log.Fatalf("cannot connect to database: %s", err) + } + log.Debugf("connected to database %s", *databaseFileName) + + // create tables + if err := db.AutoMigrate(&Product{}); err != nil { + log.Fatalf("cannot create products table") + } + if err := db.AutoMigrate(&Shop{}); err != nil { + log.Fatalf("cannot create shops table") + } + + // register notifiers + notifiers := []Notifier{} + + if !*disableNotifications { + if config.HasTwitter() { + twitterNotifier, err := NewTwitterNotifier(&config.TwitterConfig, db) + if err != nil { + log.Fatalf("cannot create twitter client: %s", err) + } + notifiers = append(notifiers, twitterNotifier) + } + } + + // Group links by shop + ShopsMap := make(map[string][]string) + + for _, link := range config.URLs { + name, err := ExtractShopName(link) + if err != nil { + log.Warnf("cannot extract shop name from %s: %s", link, err) + } else { + ShopsMap[name] = append(ShopsMap[name], link) + } + } + + // crawl shops asynchronously + var wg sync.WaitGroup + jobsCount := 0 + for shopName, shopLinks := range ShopsMap { + if jobsCount < *workers { + wg.Add(1) + jobsCount++ + go crawlShop(parser, shopName, shopLinks, notifiers, db, &wg) + } else { + log.Debugf("waiting for intermediate jobs to end") + wg.Wait() + jobsCount = 0 + } + } + log.Debugf("waiting for all jobs to end") + wg.Wait() +} + +// For a given shop, fetch and parse all the dependent URLs, then eventually send notifications +func crawlShop(parser *Parser, shopName string, shopLinks []string, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) { + defer wg.Done() + log.Debugf("parsing shop %s", shopName) + + // read shop from database or create it + var shop Shop + trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop) + if trx.Error != nil { + log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error) + return + } + + for _, link := range shopLinks { + + log.Debugf("parsing url %s", link) + products, err := parser.Parse(link) + if err != nil { + log.Warnf("cannot parse %s: %s", link, err) + continue + } + log.Debugf("url %s parsed", link) + + // upsert products to database + for _, product := range products { + + log.Debugf("detected product %+v", product) + + if !product.IsValid() { + log.Warnf("parsed malformatted product: %+v", product) + continue + } + + // check if product is already in the database + // sometimes new products are detected on the website, directly available, without reference in the database + // the bot has to send a notification instead of blindly creating it in the database and check availability afterwards + var count int64 + trx = db.Model(&Product{}).Where(Product{URL: product.URL}).Count(&count) + if trx.Error != nil { + log.Warnf("cannot see if product %s already exists in the database: %s", product.Name, trx.Error) + continue + } + + // fetch product from database or create it if it doesn't exist + var dbProduct Product + trx = db.Where(Product{URL: product.URL}).Attrs(Product{Name: product.Name, Shop: shop, Price: product.Price, PriceCurrency: product.PriceCurrency, Available: product.Available}).FirstOrCreate(&dbProduct) + if trx.Error != nil { + log.Warnf("cannot fetch product %s from database: %s", product.Name, trx.Error) + continue + } + log.Debugf("product %s found in database", dbProduct.Name) + + // detect availability change + duration := time.Now().Sub(dbProduct.UpdatedAt).Truncate(time.Second) + createThread := false + closeThread := false + + // non-existing product directly available + if count == 0 && product.Available { + log.Infof("product %s on %s is now available", product.Name, shopName) + createThread = true + } + + // existing product with availability change + if count > 0 && (dbProduct.Available != product.Available) { + if product.Available { + log.Infof("product %s on %s is now available", product.Name, shopName) + createThread = true + } else { + log.Infof("product %s on %s is not available anymore", product.Name, shopName) + closeThread = true + } + } + + // update product in database before sending notification + // if there is a database failure, we don't want the bot to send a notification at each run + if dbProduct.ToMerge(product) { + dbProduct.Merge(product) + trx = db.Save(&dbProduct) + if trx.Error != nil { + log.Warnf("cannot save product %s to database: %s", dbProduct.Name, trx.Error) + continue + } + log.Debugf("product %s updated in database", dbProduct.Name) + } + + // send notifications + if createThread { + for _, notifier := range notifiers { + if err := notifier.NotifyWhenAvailable(shop.Name, dbProduct.Name, dbProduct.Price, dbProduct.PriceCurrency, dbProduct.URL); err != nil { + log.Errorf("%s", err) + } + } + } else if closeThread { + for _, notifier := range notifiers { + if err := notifier.NotifyWhenNotAvailable(dbProduct.URL, duration); err != nil { + log.Errorf("%s", err) + } + } + } + } + } + + log.Debugf("shop %s parsed", shopName) +} + +func showVersion() { + if GitCommit != "" { + AppVersion = fmt.Sprintf("%s-%s", AppVersion, GitCommit) + } + fmt.Printf("%s version %s (compiled with %s)\n", AppName, AppVersion, GoVersion) +} diff --git a/main.py b/main.py deleted file mode 100644 index c9b040b..0000000 --- a/main.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import logging -from concurrent import futures - -from config import extract_shops, read_config -from crawlers import CRAWLERS -from db import create_tables, list_shops, upsert_products, upsert_shops -from notifiers import TwitterNotifier - -logger = logging.getLogger(__name__) - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO, - help='print more output') - parser.add_argument('-d', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG, - default=logging.WARNING, help='print even more output') - parser.add_argument('-o', '--logfile', help='logging file location') - parser.add_argument('-c', '--config', default='config.json', help='configuration file location') - parser.add_argument('-N', '--disable-notifications', dest='disable_notifications', action='store_true', - help='do not send notifications') - parser.add_argument('-t', '--workers', type=int, help='number of workers for crawling') - args = parser.parse_args() - return args - - -def setup_logging(args): - log_format = '%(asctime)s %(levelname)s: %(message)s' if args.logfile else '%(levelname)s: %(message)s' - logging.basicConfig(format=log_format, level=args.loglevel, filename=args.logfile) - - -def crawl_shop(shop, urls): - logger.debug(f'processing {shop}') - crawler = CRAWLERS[shop.name](shop=shop, urls=urls) - return crawler.products - - -def main(): - args = parse_arguments() - setup_logging(args) - config = read_config(args.config) - create_tables() - - shops = extract_shops(config['urls']) - upsert_shops(shops.keys()) - - if args.disable_notifications: - notifier = None - else: - notifier = TwitterNotifier(consumer_key=config['twitter']['consumer_key'], - consumer_secret=config['twitter']['consumer_secret'], - access_token=config['twitter']['access_token'], - access_token_secret=config['twitter']['access_token_secret']) - - with futures.ThreadPoolExecutor(max_workers=args.workers) as executor: - all_futures = [] - for shop in list_shops(): - urls = shops.get(shop.name) - if not urls: - continue - all_futures.append(executor.submit(crawl_shop, shop, urls)) - for future in futures.as_completed(all_futures): - products = future.result() - upsert_products(products=products, notifier=notifier) - - -if __name__ == '__main__': - main() diff --git a/models.go b/models.go new file mode 100644 index 0000000..f5dece8 --- /dev/null +++ b/models.go @@ -0,0 +1,45 @@ +package main + +import ( + "gorm.io/gorm" +) + +// Product is self-explainatory +type Product struct { + gorm.Model + Name string `gorm:"not null" json:"name"` + URL string `gorm:"unique" json:"url"` + Price float64 `gorm:"not null" json:"price"` + PriceCurrency string `gorm:"not null" json:"price_currency"` + Available bool `gorm:"not null;default:false" json:"available"` + ShopID uint + Shop Shop +} + +// Equal compares a database product to another product +func (p *Product) Equal(other *Product) bool { + return p.URL == other.URL && p.Available == other.Available +} + +// IsValid returns true when a Product has all required values +func (p *Product) IsValid() bool { + return p.Name != "" && p.URL != "" && p.Price != 0 && p.PriceCurrency != "" +} + +// Merge one product with another +func (p *Product) Merge(o *Product) { + p.Price = o.Price + p.PriceCurrency = o.PriceCurrency + p.Available = o.Available +} + +// ToMerge detects if a product needs to be merged with another one +func (p *Product) ToMerge(o *Product) bool { + return p.Price != o.Price || p.PriceCurrency != o.PriceCurrency || p.Available != o.Available +} + +// Shop represents a retailer website +type Shop struct { + ID uint `gorm:"primaryKey"` + Name string `gorm:"unique"` +} diff --git a/notifier.go b/notifier.go new file mode 100644 index 0000000..6dddedf --- /dev/null +++ b/notifier.go @@ -0,0 +1,9 @@ +package main + +import "time" + +// Notifier interface to notify when a product becomes available or is sold out again +type Notifier interface { + NotifyWhenAvailable(string, string, float64, string, string) error + NotifyWhenNotAvailable(string, time.Duration) error +} diff --git a/notifiers.py b/notifiers.py deleted file mode 100644 index f6246bd..0000000 --- a/notifiers.py +++ /dev/null @@ -1,58 +0,0 @@ -import logging - -import tweepy -from utils import format_timedelta - -logger = logging.getLogger(__name__) - - -class TwitterNotifier(object): - - _hashtags_map = { - 'rtx 3060 ti': ['#nvidia', '#rtx3060ti'], - 'rtx 3070': ['#nvidia', '#rtx3070'], - 'rtx 3080': ['#nvidia', '#rtx3080'], - 'rtx 3090': ['#nvidia', '#rtx3090'], - 'rx 6800 xt': ['#amd', '#rx6800xt'], - 'rx 6800': ['#amd', '#rx6800'], - 'rx 5700 xt': ['#amd', '#rx5700xt'], - } - - _currency_map = { - 'EUR': '€' - } - - def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret): - auth = tweepy.OAuthHandler(consumer_key, consumer_secret) - auth.set_access_token(access_token, access_token_secret) - self._api = tweepy.API(auth) - - def create_thread(self, product): - currency_sign = self._currency_map[product.price_currency] - shop_name = product.shop.name - price = f'{product.price}{currency_sign}' - message = f'{shop_name}: {product.name} for {price} is available at {product.url}' - hashtags = self._parse_hashtags(product) - if hashtags: - message += f' {hashtags}' - return self._create_tweet(message=message) - - def close_thread(self, tweet_id, duration): - thread = self._api.get_status(id=tweet_id) - duration = format_timedelta(duration, '{hours_total}h{minutes2}m') - message = f'''@{thread.user.screen_name} And it's over ({duration})''' - return self._create_tweet(message=message, tweet_id=tweet_id) - - def _create_tweet(self, message, tweet_id=None): - try: - tweet = self._api.update_status(status=message, in_reply_to_status_id=tweet_id) - logger.info(f'tweet {tweet.id} sent with message "{message}"') - return tweet - except tweepy.error.TweepError as err: - logger.warning(f'cannot send tweet with message "{message}"') - logger.warning(str(err)) - - def _parse_hashtags(self, product): - for patterns in self._hashtags_map: - if all(elem in product.name.lower().split(' ') for elem in patterns.split(' ')): - return ' '.join(self._hashtags_map[patterns]) diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..9e8716a --- /dev/null +++ b/parser.go @@ -0,0 +1,335 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + + log "github.com/sirupsen/logrus" + + "github.com/MontFerret/ferret/pkg/compiler" + "github.com/MontFerret/ferret/pkg/drivers" + "github.com/MontFerret/ferret/pkg/drivers/cdp" + "github.com/MontFerret/ferret/pkg/drivers/http" +) + +// Parser structure to handle websites parsing logic +type Parser struct { + includeRegex *regexp.Regexp + excludeRegex *regexp.Regexp + ctx context.Context +} + +// NewParser to create a new Parser instance +func NewParser(includeRegex string, excludeRegex string) (*Parser, error) { + + log.Debugf("compiling include name regex") + includeRegexCompiled, err := compileRegex(includeRegex) + if err != nil { + return nil, err + } + + log.Debugf("compiling exclude name regex") + excludeRegexCompiled, err := compileRegex(excludeRegex) + if err != nil { + return nil, err + } + + log.Debugf("creating context with headless browser drivers") + ctx := context.Background() + ctx = drivers.WithContext(ctx, cdp.NewDriver()) + ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault()) + + return &Parser{ + includeRegex: includeRegexCompiled, + excludeRegex: excludeRegexCompiled, + ctx: ctx, + }, nil +} + +// Parse a website to return list of products +// TODO: redirect output to logger +func (p *Parser) Parse(url string) ([]*Product, error) { + shopName, err := ExtractShopName(url) + if err != nil { + return nil, err + } + + query, err := createQuery(shopName, url) + if err != nil { + return nil, err + } + comp := compiler.New() + program, err := comp.Compile(string(query)) + if err != nil { + return nil, err + } + + out, err := program.Run(p.ctx) + if err != nil { + return nil, err + } + var products []*Product + err = json.Unmarshal(out, &products) + if err != nil { + return nil, err + } + + // apply filters + products = p.filterInclusive(products) + products = p.filterExclusive(products) + + return products, nil +} + +// filterInclusive returns a list of products matching the include regex +func (p *Parser) filterInclusive(products []*Product) []*Product { + var filtered []*Product + if p.includeRegex != nil { + for _, product := range products { + if p.includeRegex.MatchString(product.Name) { + log.Debugf("product %s included because it matches the include regex", product.Name) + filtered = append(filtered, product) + } else { + log.Debugf("product %s excluded because it does not match the include regex", product.Name) + } + } + return filtered + } + return products +} + +// filterExclusive returns a list of products that don't match the exclude regex +func (p *Parser) filterExclusive(products []*Product) []*Product { + var filtered []*Product + if p.excludeRegex != nil { + for _, product := range products { + if !p.excludeRegex.MatchString(product.Name) { + log.Debugf("product %s included because it matches does not match the exclude regex", product.Name) + filtered = append(filtered, product) + } else { + log.Debugf("product %s excluded because it matches the exclude regex", product.Name) + } + } + } + return products +} + +func createQuery(shopName string, url string) (string, error) { + switch shopName { + case "cybertek.fr": + return createQueryForCybertek(url), nil + case "ldlc.com": + return createQueryForLDLC(url), nil + case "materiel.net": + return createQueryForMaterielNet(url), nil + case "mediamarkt.ch": + return createQueryForMediamarktCh(url), nil + case "topachat.com": + return createQueryForTopachat(url), nil + default: + return "", fmt.Errorf("shop %s not supported", shopName) + } +} + +func createQueryForLDLC(url string) string { + q := ` +// gather first page +LET first_page = '` + url + `' +LET doc = DOCUMENT(first_page, {driver: "cdp"}) + +// discover next pages +LET pagination = ELEMENT(doc, ".pagination") +LET next_pages = ( + FOR url in ELEMENTS(pagination, "a") + RETURN "https://www.ldlc.com" + url.attributes.href +) + +// append first page to pagination and remove duplicates +LET pages = SORTED_UNIQUE(APPEND(next_pages, first_page)) + +// create a result array containing an one array of products per page +LET results = ( + FOR page IN pages + NAVIGATE(doc, page) + LET products = ( + FOR el IN ELEMENTS(doc, ".pdt-item") + LET url = ELEMENT(el, "a") + LET name = INNER_TEXT(ELEMENT(el, "h3")) + LET price = TO_FLOAT(SUBSTITUTE(SUBSTITUTE(INNER_TEXT(ELEMENT(el, ".price")), "€", "."), " ", "")) + LET available = !CONTAINS(INNER_TEXT(ELEMENT(el, ".stock-web"), 'span'), "RUPTURE") + RETURN { + name: name, + url: "https://www.ldlc.com" + url.attributes.href, + price: price, + price_currency: "EUR", + available: available, + } + ) + RETURN products +) + +// combine all arrays to a single one +RETURN FLATTEN(results) + ` + return q +} + +func createQueryForMaterielNet(url string) string { + q := ` +// gather first page +LET first_page = '` + url + `' +LET doc = DOCUMENT(first_page, {driver: "cdp"}) + +// discover next pages +LET pagination = ELEMENT(doc, ".pagination") +LET next_pages = ( + FOR url in ELEMENTS(pagination, "a") + RETURN "https://www.materiel.net" + url.attributes.href +) + +// append first page to pagination and remove duplicates +LET pages = SORTED_UNIQUE(APPEND(next_pages, first_page)) + +// create a result array containing an one array of products per page +LET results = ( + FOR page IN pages + NAVIGATE(doc, page) + WAIT_ELEMENT(doc, "div .o-product__price") + LET products = ( + FOR el IN ELEMENTS(doc, "div .ajax-product-item") + LET image = ELEMENT(el, "img") + LET url = ELEMENT(el, "a") + LET price = TO_FLOAT(SUBSTITUTE(SUBSTITUTE(INNER_TEXT(ELEMENT(el, "div .o-product__price")), "€", "."), " ", "")) + LET available = !CONTAINS(ELEMENT(el, "div .o-availability__value"), "Rupture") + RETURN { + name: image.attributes.alt, + url: "https://www.materiel.net" + url.attributes.href, + price: price, + price_currency: "EUR", + available: available, + } + ) + RETURN products +) + +// combine all arrays to a single one +RETURN FLATTEN(results) + ` + return q +} + +func createQueryForTopachat(url string) string { + q := ` +LET page = '` + url + `' +LET doc = DOCUMENT(page, {driver: "cdp"}) + +FOR el IN ELEMENTS(doc, "article .grille-produit") + LET url = ELEMENT(el, "a") + LET name = INNER_TEXT(ELEMENT(el, "h3")) + LET price = TO_FLOAT(ELEMENT(el, "div .prod_px_euro").attributes.content) + LET available = !CONTAINS(ELEMENT(el, "link").attributes.href, "http://schema.org/OutOfStock") + RETURN { + url: "https://www.topachat.com" + url.attributes.href, + name: name, + price: price, + price_currency: "EUR", + available: available, + } + ` + return q +} + +func createQueryForCybertek(url string) string { + q := ` +// gather first page +LET first_page = '` + url + `' +LET doc = DOCUMENT(first_page, {driver: "cdp"}) + +// discover next pages +LET pagination = ELEMENT(doc, "div .pagination-div") +LET next_pages = ( + FOR url in ELEMENTS(pagination, "a") + RETURN url.attributes.href +) + +// append first page to pagination, remove "#" link and remove duplicates +LET pages = SORTED_UNIQUE(APPEND(MINUS(next_pages, ["#"]), first_page)) + +// create a result array containing an one array of products per page +LET results = ( + FOR page in pages + NAVIGATE(doc, page) + LET products_available = ( + FOR el IN ELEMENTS(doc, "div .listing_dispo") + LET url = ELEMENT(el, "a") + LET name = TRIM(FIRST(SPLIT(INNER_TEXT(ELEMENT(el, "div .height-txt-cat")), "-"))) + LET price = TO_FLOAT(SUBSTITUTE(INNER_TEXT(ELEMENT(el, "div .price_prod_resp")), "€", ".")) + RETURN { + name: name, + url: url.attributes.href, + available: true, + price: price, + price_currency: "EUR", + } + ) + LET products_not_available = ( + FOR el IN ELEMENTS(doc, "div .listing_nodispo") + LET url = ELEMENT(el, "a") + LET name = TRIM(FIRST(SPLIT(INNER_TEXT(ELEMENT(el, "div .height-txt-cat")), "-"))) + LET price = TO_FLOAT(SUBSTITUTE(INNER_TEXT(ELEMENT(el, "div .price_prod_resp")), "€", ".")) + RETURN { + name: name, + url: url.attributes.href, + available: false, + price: price, + price_currency: "EUR", + } + ) + // combine available and not available list of products into a single array of products + RETURN FLATTEN([products_available, products_not_available]) +) + +// combine all arrays to a single one +RETURN FLATTEN(results) + ` + return q +} + +func createQueryForMediamarktCh(url string) string { + q := ` +LET page = '` + url + `' +LET doc = DOCUMENT(page, {driver: "cdp"}) + +LET pagination = ( + FOR el IN ELEMENTS(doc, "div .pagination-wrapper a") + RETURN "https://www.mediamarkt.ch" + el.attributes.href +) + +LET pages = SORTED_UNIQUE(pagination) + +LET results = ( + FOR page IN pages + NAVIGATE(doc, page) + LET products = ( + FOR el IN ELEMENTS(doc, "div .product-wrapper") + LET name = TRIM(FIRST(SPLIT(INNER_TEXT(ELEMENT(el, "h2")), "-"))) + LET url = ELEMENT(el, "a").attributes.href + LET price = TO_FLOAT(CONCAT(POP(ELEMENTS(el, "div .price span")))) + LET available = !REGEX_TEST(INNER_TEXT(ELEMENT(el, "div .availability li")), "^Non disponible(.*)") + RETURN { + name: name, + url: "https://www.mediamarkt.ch" + url, + price: price, + price_currency: "CHF", + available: available, + } + ) + RETURN products +) + +RETURN FLATTEN(results) + ` + return q +} diff --git a/parsers.py b/parsers.py deleted file mode 100644 index 0713005..0000000 --- a/parsers.py +++ /dev/null @@ -1,474 +0,0 @@ -import logging -from html.parser import HTMLParser - -from bs4 import BeautifulSoup -from bs4.element import Tag -from db import Product -from utils import parse_base_url - -logger = logging.getLogger(__name__) - - -# Parsers definitively need to be replaced by beautifulsoup because the code is not maintainable - - -class ProductParser(HTMLParser): - def __init__(self): - super().__init__() - self.products = [] - self.next_page = None - - -class TopAchatParser(ProductParser): - def __init__(self, url=None): - super().__init__() - self._parsing_article = False - self._parsing_availability = False - self._parsing_price = False - self._parsing_price_currency = False - self._parsing_name = False - self._parsing_url = False - self._product = Product() - if url: - self._base_url = parse_base_url(url) - else: - self._base_url = 'https://www.topachat.com' - - @staticmethod - def parse_name(data): - return data.split(' + ')[0].strip() - - def handle_starttag(self, tag, attrs): - if tag == 'article': - for name, value in attrs: - if 'grille-produit' in value.split(' '): - self._parsing_article = True - elif self._parsing_article: - if tag == 'link': - for name, value in attrs: - if name == 'itemprop' and value == 'availability': - self._parsing_availability = True - elif self._parsing_availability and name == 'href': - self._product.available = value != 'http://schema.org/OutOfStock' - elif tag == 'div': - for name, value in attrs: - if name == 'itemprop' and value == 'price': - self._parsing_price = True - elif self._parsing_price and name == 'content': - self._product.price = float(value) - elif name == 'class' and value == 'libelle': - self._parsing_url = True - self._parsing_name = True - elif tag == 'meta': - for name, value in attrs: - if name == 'itemprop' and value == 'priceCurrency': - self._parsing_price_currency = True - elif self._parsing_price_currency and name == 'content': - self._product.price_currency = value - elif tag == 'a': - for name, value in attrs: - if self._parsing_url and name == 'href': - self._product.url = f'{self._base_url}{value}' - - def handle_data(self, data): - if self._parsing_name and self.get_starttag_text().startswith('