Select or create shop before parsing
A shop map was created to group URLs by shops and process them in order. Now that we have Amazon and each URL can be parsed independently, there is no need to group them anymore. Moreover, shops were passed as an argument to the handleProducts function. Shop name can be deduced by the parser itself. The parser has a reference to the database. The parser now select or create the shop before parsing products. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
5f69b8435b
commit
ab5abcd171
4 changed files with 34 additions and 42 deletions
66
main.go
66
main.go
|
@ -140,41 +140,19 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
// create shops and parsers
|
||||
var shop Shop
|
||||
// create parsers
|
||||
parsers := []Parser{}
|
||||
|
||||
if config.HasURLs() {
|
||||
// group links by shop
|
||||
ShopsMap := make(map[string][]string)
|
||||
|
||||
for _, link := range config.URLs {
|
||||
name, err := ExtractShopName(link)
|
||||
for _, url := range config.URLs {
|
||||
// create parser
|
||||
parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Warnf("cannot extract shop name from %s: %s", link, err)
|
||||
} else {
|
||||
ShopsMap[name] = append(ShopsMap[name], link)
|
||||
}
|
||||
}
|
||||
|
||||
for shopName, shopLinks := range ShopsMap {
|
||||
// read shop from database or create it
|
||||
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
||||
log.Warnf("could not create URL parser for '%s'", url)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, link := range shopLinks {
|
||||
// create parser
|
||||
parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Warnf("could not create URL parser for %s", link)
|
||||
continue
|
||||
}
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -187,13 +165,6 @@ func main() {
|
|||
continue
|
||||
}
|
||||
|
||||
// read shop from database or create it
|
||||
trx := db.Where(Shop{Name: parser.ShopName()}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Errorf("cannot create or select shop %s to/from database: %s", parser.ShopName(), trx.Error)
|
||||
continue
|
||||
}
|
||||
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
|
@ -207,7 +178,7 @@ func main() {
|
|||
if jobsCount < *workers {
|
||||
wg.Add(1)
|
||||
jobsCount++
|
||||
go handleProducts(shop, parser, notifiers, db, &wg)
|
||||
go handleProducts(parser, notifiers, db, &wg)
|
||||
} else {
|
||||
log.Debugf("waiting for intermediate jobs to end")
|
||||
wg.Wait()
|
||||
|
@ -219,11 +190,26 @@ func main() {
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// For a given shop, use the parser to return a list of products, then eventually send notifications
|
||||
func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
// For parser to return a list of products, then eventually send notifications
|
||||
func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
log.Debugf("parsing with %s", parser)
|
||||
|
||||
// read shop from database or create it
|
||||
var shop Shop
|
||||
shopName, err := parser.ShopName()
|
||||
if err != nil {
|
||||
log.Warnf("cannot extract shop name from parser: %s", err)
|
||||
return
|
||||
}
|
||||
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Warnf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
||||
return
|
||||
}
|
||||
|
||||
// parse products
|
||||
products, err := parser.Parse()
|
||||
if err != nil {
|
||||
log.Warnf("cannot parse: %s", err)
|
||||
|
@ -231,7 +217,7 @@ func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB,
|
|||
}
|
||||
log.Debugf("parsed")
|
||||
|
||||
// upsert products to database
|
||||
// insert or update products to database
|
||||
for _, product := range products {
|
||||
|
||||
log.Debugf("detected product %+v", product)
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
type Parser interface {
|
||||
Parse() ([]*Product, error)
|
||||
String() string
|
||||
ShopName() (string, error)
|
||||
}
|
||||
|
||||
// filterInclusive returns a list of products matching the include regex
|
||||
|
|
|
@ -153,6 +153,6 @@ func (p *AmazonParser) String() string {
|
|||
}
|
||||
|
||||
// ShopName returns shop name from Amazon Marketplace
|
||||
func (p *AmazonParser) ShopName() string {
|
||||
return strings.ReplaceAll(p.client.Marketplace(), "www.", "")
|
||||
func (p *AmazonParser) ShopName() (string, error) {
|
||||
return strings.ReplaceAll(p.client.Marketplace(), "www.", ""), nil
|
||||
}
|
||||
|
|
|
@ -28,6 +28,11 @@ func (p *URLParser) String() string {
|
|||
return fmt.Sprintf("URLParser<%s>", p.url)
|
||||
}
|
||||
|
||||
// ShopName returns shop name from URL
|
||||
func (p *URLParser) ShopName() (string, error) {
|
||||
return ExtractShopName(p.url)
|
||||
}
|
||||
|
||||
// NewURLParser to create a new URLParser instance
|
||||
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
|
||||
var err error
|
||||
|
|
Reference in a new issue