From ab5abcd171ccf1a7566ef5bc48069340e50e626d Mon Sep 17 00:00:00 2001 From: Julien Riou Date: Thu, 1 Apr 2021 17:50:50 +0200 Subject: [PATCH] Select or create shop before parsing A shop map was created to group URLs by shops and process them in order. Now that we have Amazon and each URL can be parsed independently, there is no need to group them anymore. Moreover, shops were passed as an argument to the handleProducts function. Shop name can be deduced by the parser itself. The parser has a reference to the database. The parser now select or create the shop before parsing products. Signed-off-by: Julien Riou --- main.go | 66 +++++++++++++++++++----------------------------- parser.go | 1 + parser_amazon.go | 4 +-- parser_url.go | 5 ++++ 4 files changed, 34 insertions(+), 42 deletions(-) diff --git a/main.go b/main.go index 4151ea9..b23c421 100644 --- a/main.go +++ b/main.go @@ -140,41 +140,19 @@ func main() { } } - // create shops and parsers - var shop Shop + // create parsers parsers := []Parser{} if config.HasURLs() { - // group links by shop - ShopsMap := make(map[string][]string) - - for _, link := range config.URLs { - name, err := ExtractShopName(link) + for _, url := range config.URLs { + // create parser + parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex) if err != nil { - log.Warnf("cannot extract shop name from %s: %s", link, err) - } else { - ShopsMap[name] = append(ShopsMap[name], link) - } - } - - for shopName, shopLinks := range ShopsMap { - // read shop from database or create it - trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop) - if trx.Error != nil { - log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error) + log.Warnf("could not create URL parser for '%s'", url) continue } - - for _, link := range shopLinks { - // create parser - parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex) - if err != nil { - log.Warnf("could not create URL parser for %s", link) - continue - } - parsers = append(parsers, parser) - log.Debugf("parser %s registered", parser) - } + parsers = append(parsers, parser) + log.Debugf("parser %s registered", parser) } } @@ -187,13 +165,6 @@ func main() { continue } - // read shop from database or create it - trx := db.Where(Shop{Name: parser.ShopName()}).FirstOrCreate(&shop) - if trx.Error != nil { - log.Errorf("cannot create or select shop %s to/from database: %s", parser.ShopName(), trx.Error) - continue - } - parsers = append(parsers, parser) log.Debugf("parser %s registered", parser) } @@ -207,7 +178,7 @@ func main() { if jobsCount < *workers { wg.Add(1) jobsCount++ - go handleProducts(shop, parser, notifiers, db, &wg) + go handleProducts(parser, notifiers, db, &wg) } else { log.Debugf("waiting for intermediate jobs to end") wg.Wait() @@ -219,11 +190,26 @@ func main() { wg.Wait() } -// For a given shop, use the parser to return a list of products, then eventually send notifications -func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) { +// For parser to return a list of products, then eventually send notifications +func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) { defer wg.Done() log.Debugf("parsing with %s", parser) + + // read shop from database or create it + var shop Shop + shopName, err := parser.ShopName() + if err != nil { + log.Warnf("cannot extract shop name from parser: %s", err) + return + } + trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop) + if trx.Error != nil { + log.Warnf("cannot create or select shop %s to/from database: %s", shopName, trx.Error) + return + } + + // parse products products, err := parser.Parse() if err != nil { log.Warnf("cannot parse: %s", err) @@ -231,7 +217,7 @@ func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, } log.Debugf("parsed") - // upsert products to database + // insert or update products to database for _, product := range products { log.Debugf("detected product %+v", product) diff --git a/parser.go b/parser.go index 9fe2f75..ee301b3 100644 --- a/parser.go +++ b/parser.go @@ -10,6 +10,7 @@ import ( type Parser interface { Parse() ([]*Product, error) String() string + ShopName() (string, error) } // filterInclusive returns a list of products matching the include regex diff --git a/parser_amazon.go b/parser_amazon.go index b109c63..acedd25 100644 --- a/parser_amazon.go +++ b/parser_amazon.go @@ -153,6 +153,6 @@ func (p *AmazonParser) String() string { } // ShopName returns shop name from Amazon Marketplace -func (p *AmazonParser) ShopName() string { - return strings.ReplaceAll(p.client.Marketplace(), "www.", "") +func (p *AmazonParser) ShopName() (string, error) { + return strings.ReplaceAll(p.client.Marketplace(), "www.", ""), nil } diff --git a/parser_url.go b/parser_url.go index 7244d5c..64d9234 100644 --- a/parser_url.go +++ b/parser_url.go @@ -28,6 +28,11 @@ func (p *URLParser) String() string { return fmt.Sprintf("URLParser<%s>", p.url) } +// ShopName returns shop name from URL +func (p *URLParser) ShopName() (string, error) { + return ExtractShopName(p.url) +} + // NewURLParser to create a new URLParser instance func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) { var err error