Select or create shop before parsing
A shop map was created to group URLs by shops and process them in order. Now that we have Amazon and each URL can be parsed independently, there is no need to group them anymore. Moreover, shops were passed as an argument to the handleProducts function. Shop name can be deduced by the parser itself. The parser has a reference to the database. The parser now select or create the shop before parsing products. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
5f69b8435b
commit
ab5abcd171
4 changed files with 34 additions and 42 deletions
66
main.go
66
main.go
|
@ -140,41 +140,19 @@ func main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create shops and parsers
|
// create parsers
|
||||||
var shop Shop
|
|
||||||
parsers := []Parser{}
|
parsers := []Parser{}
|
||||||
|
|
||||||
if config.HasURLs() {
|
if config.HasURLs() {
|
||||||
// group links by shop
|
for _, url := range config.URLs {
|
||||||
ShopsMap := make(map[string][]string)
|
// create parser
|
||||||
|
parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||||
for _, link := range config.URLs {
|
|
||||||
name, err := ExtractShopName(link)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("cannot extract shop name from %s: %s", link, err)
|
log.Warnf("could not create URL parser for '%s'", url)
|
||||||
} else {
|
|
||||||
ShopsMap[name] = append(ShopsMap[name], link)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for shopName, shopLinks := range ShopsMap {
|
|
||||||
// read shop from database or create it
|
|
||||||
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
|
||||||
if trx.Error != nil {
|
|
||||||
log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
parsers = append(parsers, parser)
|
||||||
for _, link := range shopLinks {
|
log.Debugf("parser %s registered", parser)
|
||||||
// create parser
|
|
||||||
parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("could not create URL parser for %s", link)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
parsers = append(parsers, parser)
|
|
||||||
log.Debugf("parser %s registered", parser)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,13 +165,6 @@ func main() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// read shop from database or create it
|
|
||||||
trx := db.Where(Shop{Name: parser.ShopName()}).FirstOrCreate(&shop)
|
|
||||||
if trx.Error != nil {
|
|
||||||
log.Errorf("cannot create or select shop %s to/from database: %s", parser.ShopName(), trx.Error)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
parsers = append(parsers, parser)
|
parsers = append(parsers, parser)
|
||||||
log.Debugf("parser %s registered", parser)
|
log.Debugf("parser %s registered", parser)
|
||||||
}
|
}
|
||||||
|
@ -207,7 +178,7 @@ func main() {
|
||||||
if jobsCount < *workers {
|
if jobsCount < *workers {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
jobsCount++
|
jobsCount++
|
||||||
go handleProducts(shop, parser, notifiers, db, &wg)
|
go handleProducts(parser, notifiers, db, &wg)
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("waiting for intermediate jobs to end")
|
log.Debugf("waiting for intermediate jobs to end")
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
@ -219,11 +190,26 @@ func main() {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// For a given shop, use the parser to return a list of products, then eventually send notifications
|
// For parser to return a list of products, then eventually send notifications
|
||||||
func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
log.Debugf("parsing with %s", parser)
|
log.Debugf("parsing with %s", parser)
|
||||||
|
|
||||||
|
// read shop from database or create it
|
||||||
|
var shop Shop
|
||||||
|
shopName, err := parser.ShopName()
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("cannot extract shop name from parser: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
||||||
|
if trx.Error != nil {
|
||||||
|
log.Warnf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse products
|
||||||
products, err := parser.Parse()
|
products, err := parser.Parse()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("cannot parse: %s", err)
|
log.Warnf("cannot parse: %s", err)
|
||||||
|
@ -231,7 +217,7 @@ func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB,
|
||||||
}
|
}
|
||||||
log.Debugf("parsed")
|
log.Debugf("parsed")
|
||||||
|
|
||||||
// upsert products to database
|
// insert or update products to database
|
||||||
for _, product := range products {
|
for _, product := range products {
|
||||||
|
|
||||||
log.Debugf("detected product %+v", product)
|
log.Debugf("detected product %+v", product)
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
type Parser interface {
|
type Parser interface {
|
||||||
Parse() ([]*Product, error)
|
Parse() ([]*Product, error)
|
||||||
String() string
|
String() string
|
||||||
|
ShopName() (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// filterInclusive returns a list of products matching the include regex
|
// filterInclusive returns a list of products matching the include regex
|
||||||
|
|
|
@ -153,6 +153,6 @@ func (p *AmazonParser) String() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ShopName returns shop name from Amazon Marketplace
|
// ShopName returns shop name from Amazon Marketplace
|
||||||
func (p *AmazonParser) ShopName() string {
|
func (p *AmazonParser) ShopName() (string, error) {
|
||||||
return strings.ReplaceAll(p.client.Marketplace(), "www.", "")
|
return strings.ReplaceAll(p.client.Marketplace(), "www.", ""), nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,11 @@ func (p *URLParser) String() string {
|
||||||
return fmt.Sprintf("URLParser<%s>", p.url)
|
return fmt.Sprintf("URLParser<%s>", p.url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShopName returns shop name from URL
|
||||||
|
func (p *URLParser) ShopName() (string, error) {
|
||||||
|
return ExtractShopName(p.url)
|
||||||
|
}
|
||||||
|
|
||||||
// NewURLParser to create a new URLParser instance
|
// NewURLParser to create a new URLParser instance
|
||||||
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
|
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
|
||||||
var err error
|
var err error
|
||||||
|
|
Reference in a new issue