Add Amazon support (#3)
This commit introduces the Amazon support with calls to the Product Advertising API (PA API). For now, I was only able to use the "www.amazon.fr" marketplace. I will add more marketplaces when my Amazon Associate accounts will be validated. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
f994093baf
commit
5ac5f78ae2
11 changed files with 399 additions and 116 deletions
18
README.md
18
README.md
|
@ -14,6 +14,14 @@ docker run --name chromium --rm -d -p 9222:9222 montferret/chromium
|
|||
|
||||
Or get inspired by the [source code](https://github.com/MontFerret/chromium) to run it on your own.
|
||||
|
||||
### Amazon (optional)
|
||||
|
||||
To access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/) and start to notify for Amazon products, you will need to have a valid [Amazon Associates](https://affiliate-program.amazon.com) account in the [Marketplace](https://github.com/spiegel-im-spiegel/pa-api/blob/v0.9.0/marketplace.go#L36) of your choice. You will then be able to retreive your **partner tag**, and the **Marketplace name** obviously.
|
||||
|
||||
Once your account has been validated, you can request access to the Product Advertising API (PA API) to retreive your **access key** and your **secret key**.
|
||||
|
||||
Ensure you follow the **terms of services** before subscribing to the Amazon Associates program and use the PA API.
|
||||
|
||||
### Twitter (optional)
|
||||
|
||||
Follow [this procedure](https://github.com/jouir/twitter-login) to generate all the required settings:
|
||||
|
@ -108,7 +116,15 @@ Default file is `restockbot.json` in the current directory. The file name can be
|
|||
|
||||
Options:
|
||||
|
||||
* `urls`: list of retailers web pages
|
||||
* `urls` (optional): list of retailers web pages
|
||||
* `amazon` (optional)
|
||||
* `searches`: list of keywords to search for (ex: `["nvidia rtx", "amd rx"]`)
|
||||
* `access_key`: access key to access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/)
|
||||
* `secret_key`: secret key to access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/)
|
||||
* `marketplaces`: list of documents containing a Marketplace `name` and a `partner_tag` (ex: `{"marketplaces":[{"name": "www.amazon.com", "partner_tag": "mytag-01"}]}`)
|
||||
* `amazon_fulfilled`: include only products packaged by Amazon
|
||||
* `amazon_merchant`: include only products sold by Amazon
|
||||
* `affiliate_links`: generate affiliate links with the partner tag
|
||||
* `twitter` (optional):
|
||||
* `consumer_key`: API key of your Twitter application
|
||||
* `consumer_secret`: API secret of your Twitter application
|
||||
|
|
34
config.go
34
config.go
|
@ -11,6 +11,7 @@ type Config struct {
|
|||
TwitterConfig `json:"twitter"`
|
||||
TelegramConfig `json:"telegram"`
|
||||
ApiConfig `json:"api"`
|
||||
AmazonConfig `json:"amazon"`
|
||||
URLs []string `json:"urls"`
|
||||
IncludeRegex string `json:"include_regex"`
|
||||
ExcludeRegex string `json:"exclude_regex"`
|
||||
|
@ -40,6 +41,20 @@ type ApiConfig struct {
|
|||
Keyfile string `json:"key_file"`
|
||||
}
|
||||
|
||||
// AmazonConfig to store Amazon API secrets
|
||||
type AmazonConfig struct {
|
||||
Searches []string `json:"searches"`
|
||||
AccessKey string `json:"access_key"`
|
||||
SecretKey string `json:"secret_key"`
|
||||
Marketplaces []struct {
|
||||
Name string `json:"name"`
|
||||
PartnerTag string `json:"partner_tag"`
|
||||
} `json:"marketplaces"`
|
||||
AmazonFulfilled bool `json:"amazon_fulfilled"`
|
||||
AmazonMerchant bool `json:"amazon_merchant"`
|
||||
AffiliateLinks bool `json:"affiliate_links"`
|
||||
}
|
||||
|
||||
// NewConfig creates a Config struct
|
||||
func NewConfig() *Config {
|
||||
return &Config{}
|
||||
|
@ -73,3 +88,22 @@ func (c *Config) HasTwitter() bool {
|
|||
func (c *Config) HasTelegram() bool {
|
||||
return c.TelegramConfig.Token != "" && (c.TelegramConfig.ChatID != 0 || c.TelegramConfig.ChannelName != "")
|
||||
}
|
||||
|
||||
// HasURL returns true when list of URLS has been configured
|
||||
func (c *Config) HasURLs() bool {
|
||||
return len(c.URLs) > 0
|
||||
}
|
||||
|
||||
// HasAmazon returns true when Amazon has been configured
|
||||
func (c *Config) HasAmazon() bool {
|
||||
var hasKeys, hasSearches, hasMarketplaces bool
|
||||
hasKeys = c.AmazonConfig.AccessKey != "" && c.AmazonConfig.SecretKey != ""
|
||||
hasSearches = len(c.AmazonConfig.Searches) > 0
|
||||
for _, marketplace := range c.AmazonConfig.Marketplaces {
|
||||
if marketplace.PartnerTag != "" && marketplace.Name != "" {
|
||||
hasMarketplaces = true
|
||||
break
|
||||
}
|
||||
}
|
||||
return hasKeys && hasSearches && hasMarketplaces
|
||||
}
|
||||
|
|
1
go.mod
1
go.mod
|
@ -9,6 +9,7 @@ require (
|
|||
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.0.0-rc1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/sirupsen/logrus v1.8.0
|
||||
github.com/spiegel-im-spiegel/pa-api v0.9.0 // indirect
|
||||
gorm.io/driver/sqlite v1.1.4
|
||||
gorm.io/gorm v1.20.12
|
||||
)
|
||||
|
|
6
go.sum
6
go.sum
|
@ -101,6 +101,12 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykE
|
|||
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
|
||||
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
|
||||
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
|
||||
github.com/spiegel-im-spiegel/errs v1.0.2 h1:v4amEwRDqRWjKHOILQnJSovYhZ4ZttEnBBXNXEzS6Sc=
|
||||
github.com/spiegel-im-spiegel/errs v1.0.2/go.mod h1:UoasJYYujMcdkbT9USv8dfZWoMyaY3btqQxoLJImw0A=
|
||||
github.com/spiegel-im-spiegel/fetch v0.2.3 h1:Zh5rHvOjfC81rxKvtUD21JT609smds+BRh+H84s8qEw=
|
||||
github.com/spiegel-im-spiegel/fetch v0.2.3/go.mod h1:ePIXxdC9OvSarXEO6HW1MgQwtBaKQo0qgDLOhKFXkQ0=
|
||||
github.com/spiegel-im-spiegel/pa-api v0.9.0 h1:xbrPJDAbDf0dzYu7BRfIr0pdHQDYpvsQmcjitpaOxP8=
|
||||
github.com/spiegel-im-spiegel/pa-api v0.9.0/go.mod h1:DYAuXUPAi1xrNroBybPo/JIzPXo3VCAF/33mEJgf9hU=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
|
|
103
main.go
103
main.go
|
@ -140,15 +140,62 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
// Group links by shop
|
||||
ShopsMap := make(map[string][]string)
|
||||
// create shops and parsers
|
||||
var shop Shop
|
||||
parsers := []Parser{}
|
||||
|
||||
for _, link := range config.URLs {
|
||||
name, err := ExtractShopName(link)
|
||||
if err != nil {
|
||||
log.Warnf("cannot extract shop name from %s: %s", link, err)
|
||||
} else {
|
||||
ShopsMap[name] = append(ShopsMap[name], link)
|
||||
if config.HasURLs() {
|
||||
// group links by shop
|
||||
ShopsMap := make(map[string][]string)
|
||||
|
||||
for _, link := range config.URLs {
|
||||
name, err := ExtractShopName(link)
|
||||
if err != nil {
|
||||
log.Warnf("cannot extract shop name from %s: %s", link, err)
|
||||
} else {
|
||||
ShopsMap[name] = append(ShopsMap[name], link)
|
||||
}
|
||||
}
|
||||
|
||||
for shopName, shopLinks := range ShopsMap {
|
||||
// read shop from database or create it
|
||||
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, link := range shopLinks {
|
||||
// create parser
|
||||
parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Warnf("could not create URL parser for %s", link)
|
||||
continue
|
||||
}
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if config.HasAmazon() {
|
||||
for _, marketplace := range config.AmazonConfig.Marketplaces {
|
||||
// create parser
|
||||
parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
|
||||
if err != nil {
|
||||
log.Warnf("could not create Amazon parser: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// read shop from database or create it
|
||||
trx := db.Where(Shop{Name: parser.ShopName()}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Errorf("cannot create or select shop %s to/from database: %s", parser.ShopName(), trx.Error)
|
||||
continue
|
||||
}
|
||||
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,33 +203,15 @@ func main() {
|
|||
var wg sync.WaitGroup
|
||||
jobsCount := 0
|
||||
|
||||
// start with URLs
|
||||
for shopName, shopLinks := range ShopsMap {
|
||||
|
||||
// read shop from database or create it
|
||||
var shop Shop
|
||||
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
|
||||
if trx.Error != nil {
|
||||
log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, link := range shopLinks {
|
||||
if jobsCount < *workers {
|
||||
// create parser
|
||||
parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Warnf("could not create URL parser for %s", link)
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
jobsCount++
|
||||
go handleProducts(shop, parser, notifiers, db, &wg)
|
||||
} else {
|
||||
log.Debugf("waiting for intermediate jobs to end")
|
||||
wg.Wait()
|
||||
jobsCount = 0
|
||||
}
|
||||
for _, parser := range parsers {
|
||||
if jobsCount < *workers {
|
||||
wg.Add(1)
|
||||
jobsCount++
|
||||
go handleProducts(shop, parser, notifiers, db, &wg)
|
||||
} else {
|
||||
log.Debugf("waiting for intermediate jobs to end")
|
||||
wg.Wait()
|
||||
jobsCount = 0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -190,8 +219,8 @@ func main() {
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// For a given shop, fetch and parse its URL, then eventually send notifications
|
||||
func handleProducts(shop Shop, parser *URLParser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
// For a given shop, use the parser to return a list of products, then eventually send notifications
|
||||
func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
log.Debugf("parsing with %s", parser)
|
||||
|
|
10
models.go
10
models.go
|
@ -23,7 +23,13 @@ func (p *Product) Equal(other *Product) bool {
|
|||
|
||||
// IsValid returns true when a Product has all required values
|
||||
func (p *Product) IsValid() bool {
|
||||
return p.Name != "" && p.URL != "" && p.Price != 0 && p.PriceCurrency != ""
|
||||
if p.Name == "" || p.URL == "" {
|
||||
return false
|
||||
}
|
||||
if p.Available && p.PriceCurrency == "" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Merge one product with another
|
||||
|
@ -40,6 +46,6 @@ func (p *Product) ToMerge(o *Product) bool {
|
|||
|
||||
// Shop represents a retailer website
|
||||
type Shop struct {
|
||||
ID uint `gorm:"primaryKey"`
|
||||
ID uint `gorm:"primaryKey;autoIncrement"`
|
||||
Name string `gorm:"unique" json:"name"`
|
||||
}
|
||||
|
|
47
parser.go
Normal file
47
parser.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Parser interface to parse an external service and return a list of products
|
||||
type Parser interface {
|
||||
Parse() ([]*Product, error)
|
||||
String() string
|
||||
}
|
||||
|
||||
// filterInclusive returns a list of products matching the include regex
|
||||
func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if includeRegex != nil {
|
||||
for _, product := range products {
|
||||
if includeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s included because it matches the include regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
} else {
|
||||
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
||||
|
||||
// filterExclusive returns a list of products that don't match the exclude regex
|
||||
func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if excludeRegex != nil {
|
||||
for _, product := range products {
|
||||
if excludeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
||||
} else {
|
||||
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
158
parser_amazon.go
Normal file
158
parser_amazon.go
Normal file
|
@ -0,0 +1,158 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
paapi5 "github.com/spiegel-im-spiegel/pa-api"
|
||||
"github.com/spiegel-im-spiegel/pa-api/entity"
|
||||
"github.com/spiegel-im-spiegel/pa-api/query"
|
||||
)
|
||||
|
||||
// NewAmazonServer creates an Amazon Server function based on the Marketplace.
|
||||
// The paapi5 marketplaceEnum is not exported, so this type cannot be used in simple map.
|
||||
func NewAmazonServer(marketplace string) *paapi5.Server {
|
||||
switch marketplace {
|
||||
case "www.amazon.fr":
|
||||
return paapi5.New(paapi5.WithMarketplace(paapi5.LocaleFrance))
|
||||
case "www.amazon.com":
|
||||
return paapi5.New(paapi5.WithMarketplace(paapi5.LocaleUnitedStates))
|
||||
default:
|
||||
return paapi5.New() // default Marketplace
|
||||
}
|
||||
}
|
||||
|
||||
// Map of messages to detect availability
|
||||
var availabilityMessages = []string{"En stock."}
|
||||
|
||||
// AmazonParser structure to handle Amazon parsing logic
|
||||
type AmazonParser struct {
|
||||
client paapi5.Client
|
||||
searches []string
|
||||
includeRegex *regexp.Regexp
|
||||
excludeRegex *regexp.Regexp
|
||||
amazonFulfilled bool
|
||||
amazonMerchant bool
|
||||
affiliateLinks bool
|
||||
}
|
||||
|
||||
// NewAmazonParser to create a new AmazonParser instance
|
||||
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) {
|
||||
var err error
|
||||
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
|
||||
|
||||
log.Debugf("compiling include name regex")
|
||||
if includeRegex != "" {
|
||||
includeRegexCompiled, err = regexp.Compile(includeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("compiling exclude name regex")
|
||||
if excludeRegex != "" {
|
||||
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &AmazonParser{
|
||||
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
|
||||
searches: searches,
|
||||
includeRegex: includeRegexCompiled,
|
||||
excludeRegex: excludeRegexCompiled,
|
||||
amazonFulfilled: amazonFulfilled,
|
||||
amazonMerchant: amazonMerchant,
|
||||
affiliateLinks: affiliateLinks,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Parse Amazon API to return list of products
|
||||
// Implements Parser interface
|
||||
func (p *AmazonParser) Parse() ([]*Product, error) {
|
||||
|
||||
var products []*Product
|
||||
|
||||
for _, search := range p.searches {
|
||||
|
||||
log.Debugf("searching for '%s' on %s", search, p.client.Marketplace())
|
||||
|
||||
// create search request on API
|
||||
q := query.NewSearchItems(
|
||||
p.client.Marketplace(),
|
||||
p.client.PartnerTag(),
|
||||
p.client.PartnerType(),
|
||||
).Search(query.Keywords, search).EnableItemInfo().EnableOffers()
|
||||
body, err := p.client.Request(q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// decode response
|
||||
res, err := entity.DecodeResponse(body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// decode products
|
||||
for _, item := range res.SearchResult.Items {
|
||||
|
||||
product := &Product{}
|
||||
if !p.affiliateLinks {
|
||||
product.URL = fmt.Sprintf("https://%s/dp/%s", p.client.Marketplace(), item.ASIN)
|
||||
|
||||
} else {
|
||||
product.URL = item.DetailPageURL // includes partner tag
|
||||
}
|
||||
product.Name = item.ItemInfo.Title.DisplayValue
|
||||
|
||||
if item.Offers != nil && *item.Offers.Listings != nil {
|
||||
for _, offer := range *item.Offers.Listings {
|
||||
// detect if product is packaged by Amazon
|
||||
if p.amazonFulfilled && !offer.DeliveryInfo.IsAmazonFulfilled {
|
||||
log.Debugf("excluding offer by '%s' for product '%s' because not fulfilled by Amazon", offer.MerchantInfo.Name, product.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
// detect if product is sold by Amazon
|
||||
if p.amazonMerchant && !strings.HasPrefix(offer.MerchantInfo.Name, "Amazon") {
|
||||
log.Debugf("excluding offer by '%s' for product '%s' because not sold by Amazon", offer.MerchantInfo.Name, product.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
// detect price
|
||||
product.Price = offer.Price.Amount
|
||||
product.PriceCurrency = offer.Price.Currency
|
||||
|
||||
// detect availability
|
||||
if ContainsString(availabilityMessages, offer.Availability.Message) {
|
||||
product.Available = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
products = append(products, product)
|
||||
}
|
||||
}
|
||||
|
||||
// apply filters
|
||||
products = filterInclusive(p.includeRegex, products)
|
||||
products = filterExclusive(p.excludeRegex, products)
|
||||
|
||||
return products, nil
|
||||
}
|
||||
|
||||
// String to print AmazonParser
|
||||
// Implements the Parser interface
|
||||
func (p *AmazonParser) String() string {
|
||||
return fmt.Sprintf("AmazonParser<%s@%s>", p.client.PartnerTag(), p.client.Marketplace())
|
||||
}
|
||||
|
||||
// ShopName returns shop name from Amazon Marketplace
|
||||
func (p *AmazonParser) ShopName() string {
|
||||
return strings.ReplaceAll(p.client.Marketplace(), "www.", "")
|
||||
}
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -18,29 +19,32 @@ func TestFilterInclusive(t *testing.T) {
|
|||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) {
|
||||
p, err := NewURLParser("", "", tc.regex, "")
|
||||
if err != nil {
|
||||
t.Errorf("failed to initialize parser: %s", err)
|
||||
} else {
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := p.filterInclusive(products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
|
||||
var regexCompiled *regexp.Regexp
|
||||
var err error
|
||||
if tc.regex != "" {
|
||||
regexCompiled, err = regexp.Compile(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
||||
}
|
||||
}
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := filterInclusive(regexCompiled, products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -58,27 +62,30 @@ func TestFilterExclusive(t *testing.T) {
|
|||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) {
|
||||
p, err := NewURLParser("", "", "", tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("failed to initialize parser: %s", err)
|
||||
} else {
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := p.filterExclusive(products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
var regexCompiled *regexp.Regexp
|
||||
var err error
|
||||
if tc.regex != "" {
|
||||
regexCompiled, err = regexp.Compile(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := filterExclusive(regexCompiled, products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
})
|
|
@ -22,6 +22,8 @@ type URLParser struct {
|
|||
ctx context.Context
|
||||
}
|
||||
|
||||
// String to print URLParser
|
||||
// Implements the Parser interface
|
||||
func (p *URLParser) String() string {
|
||||
return fmt.Sprintf("URLParser<%s>", p.url)
|
||||
}
|
||||
|
@ -61,6 +63,7 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
|
|||
}
|
||||
|
||||
// Parse a website to return list of products
|
||||
// Implements Parser interface
|
||||
// TODO: redirect output to logger
|
||||
func (p *URLParser) Parse() ([]*Product, error) {
|
||||
shopName, err := ExtractShopName(p.url)
|
||||
|
@ -89,46 +92,12 @@ func (p *URLParser) Parse() ([]*Product, error) {
|
|||
}
|
||||
|
||||
// apply filters
|
||||
products = p.filterInclusive(products)
|
||||
products = p.filterExclusive(products)
|
||||
products = filterInclusive(p.includeRegex, products)
|
||||
products = filterExclusive(p.excludeRegex, products)
|
||||
|
||||
return products, nil
|
||||
}
|
||||
|
||||
// filterInclusive returns a list of products matching the include regex
|
||||
func (p *URLParser) filterInclusive(products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if p.includeRegex != nil {
|
||||
for _, product := range products {
|
||||
if p.includeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s included because it matches the include regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
} else {
|
||||
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
||||
|
||||
// filterExclusive returns a list of products that don't match the exclude regex
|
||||
func (p *URLParser) filterExclusive(products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if p.excludeRegex != nil {
|
||||
for _, product := range products {
|
||||
if p.excludeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
||||
} else {
|
||||
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
||||
|
||||
func createQuery(shopName string, url string) (string, error) {
|
||||
switch shopName {
|
||||
case "cybertek.fr":
|
||||
|
|
10
utils.go
10
utils.go
|
@ -16,3 +16,13 @@ func ExtractShopName(link string) (name string, err error) {
|
|||
re := regexp.MustCompile(`^www\.`)
|
||||
return strings.ToLower(re.ReplaceAllString(u.Hostname(), "")), nil
|
||||
}
|
||||
|
||||
// ContainsString returns true when string is found in the array of strings
|
||||
func ContainsString(arr []string, str string) bool {
|
||||
for _, elem := range arr {
|
||||
if elem == str {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
Reference in a new issue