Archived
1
0
Fork 0

Add Amazon support (#3)

This commit introduces the Amazon support with calls to the Product Advertising
API (PA API). For now, I was only able to use the "www.amazon.fr" marketplace.
I will add more marketplaces when my Amazon Associate accounts will be
validated.

Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
Julien Riou 2021-03-31 17:48:47 +02:00
parent f994093baf
commit 5ac5f78ae2
No known key found for this signature in database
GPG key ID: FF42D23B580C89F7
11 changed files with 399 additions and 116 deletions

View file

@ -14,6 +14,14 @@ docker run --name chromium --rm -d -p 9222:9222 montferret/chromium
Or get inspired by the [source code](https://github.com/MontFerret/chromium) to run it on your own.
### Amazon (optional)
To access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/) and start to notify for Amazon products, you will need to have a valid [Amazon Associates](https://affiliate-program.amazon.com) account in the [Marketplace](https://github.com/spiegel-im-spiegel/pa-api/blob/v0.9.0/marketplace.go#L36) of your choice. You will then be able to retreive your **partner tag**, and the **Marketplace name** obviously.
Once your account has been validated, you can request access to the Product Advertising API (PA API) to retreive your **access key** and your **secret key**.
Ensure you follow the **terms of services** before subscribing to the Amazon Associates program and use the PA API.
### Twitter (optional)
Follow [this procedure](https://github.com/jouir/twitter-login) to generate all the required settings:
@ -108,7 +116,15 @@ Default file is `restockbot.json` in the current directory. The file name can be
Options:
* `urls`: list of retailers web pages
* `urls` (optional): list of retailers web pages
* `amazon` (optional)
* `searches`: list of keywords to search for (ex: `["nvidia rtx", "amd rx"]`)
* `access_key`: access key to access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/)
* `secret_key`: secret key to access the [Product Advertising API](https://webservices.amazon.com/paapi5/documentation/)
* `marketplaces`: list of documents containing a Marketplace `name` and a `partner_tag` (ex: `{"marketplaces":[{"name": "www.amazon.com", "partner_tag": "mytag-01"}]}`)
* `amazon_fulfilled`: include only products packaged by Amazon
* `amazon_merchant`: include only products sold by Amazon
* `affiliate_links`: generate affiliate links with the partner tag
* `twitter` (optional):
* `consumer_key`: API key of your Twitter application
* `consumer_secret`: API secret of your Twitter application

View file

@ -11,6 +11,7 @@ type Config struct {
TwitterConfig `json:"twitter"`
TelegramConfig `json:"telegram"`
ApiConfig `json:"api"`
AmazonConfig `json:"amazon"`
URLs []string `json:"urls"`
IncludeRegex string `json:"include_regex"`
ExcludeRegex string `json:"exclude_regex"`
@ -40,6 +41,20 @@ type ApiConfig struct {
Keyfile string `json:"key_file"`
}
// AmazonConfig to store Amazon API secrets
type AmazonConfig struct {
Searches []string `json:"searches"`
AccessKey string `json:"access_key"`
SecretKey string `json:"secret_key"`
Marketplaces []struct {
Name string `json:"name"`
PartnerTag string `json:"partner_tag"`
} `json:"marketplaces"`
AmazonFulfilled bool `json:"amazon_fulfilled"`
AmazonMerchant bool `json:"amazon_merchant"`
AffiliateLinks bool `json:"affiliate_links"`
}
// NewConfig creates a Config struct
func NewConfig() *Config {
return &Config{}
@ -73,3 +88,22 @@ func (c *Config) HasTwitter() bool {
func (c *Config) HasTelegram() bool {
return c.TelegramConfig.Token != "" && (c.TelegramConfig.ChatID != 0 || c.TelegramConfig.ChannelName != "")
}
// HasURL returns true when list of URLS has been configured
func (c *Config) HasURLs() bool {
return len(c.URLs) > 0
}
// HasAmazon returns true when Amazon has been configured
func (c *Config) HasAmazon() bool {
var hasKeys, hasSearches, hasMarketplaces bool
hasKeys = c.AmazonConfig.AccessKey != "" && c.AmazonConfig.SecretKey != ""
hasSearches = len(c.AmazonConfig.Searches) > 0
for _, marketplace := range c.AmazonConfig.Marketplaces {
if marketplace.PartnerTag != "" && marketplace.Name != "" {
hasMarketplaces = true
break
}
}
return hasKeys && hasSearches && hasMarketplaces
}

1
go.mod
View file

@ -9,6 +9,7 @@ require (
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.0.0-rc1
github.com/gorilla/mux v1.8.0
github.com/sirupsen/logrus v1.8.0
github.com/spiegel-im-spiegel/pa-api v0.9.0 // indirect
gorm.io/driver/sqlite v1.1.4
gorm.io/gorm v1.20.12
)

6
go.sum
View file

@ -101,6 +101,12 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykE
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/spiegel-im-spiegel/errs v1.0.2 h1:v4amEwRDqRWjKHOILQnJSovYhZ4ZttEnBBXNXEzS6Sc=
github.com/spiegel-im-spiegel/errs v1.0.2/go.mod h1:UoasJYYujMcdkbT9USv8dfZWoMyaY3btqQxoLJImw0A=
github.com/spiegel-im-spiegel/fetch v0.2.3 h1:Zh5rHvOjfC81rxKvtUD21JT609smds+BRh+H84s8qEw=
github.com/spiegel-im-spiegel/fetch v0.2.3/go.mod h1:ePIXxdC9OvSarXEO6HW1MgQwtBaKQo0qgDLOhKFXkQ0=
github.com/spiegel-im-spiegel/pa-api v0.9.0 h1:xbrPJDAbDf0dzYu7BRfIr0pdHQDYpvsQmcjitpaOxP8=
github.com/spiegel-im-spiegel/pa-api v0.9.0/go.mod h1:DYAuXUPAi1xrNroBybPo/JIzPXo3VCAF/33mEJgf9hU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

53
main.go
View file

@ -140,7 +140,12 @@ func main() {
}
}
// Group links by shop
// create shops and parsers
var shop Shop
parsers := []Parser{}
if config.HasURLs() {
// group links by shop
ShopsMap := make(map[string][]string)
for _, link := range config.URLs {
@ -152,15 +157,8 @@ func main() {
}
}
// parse asynchronously
var wg sync.WaitGroup
jobsCount := 0
// start with URLs
for shopName, shopLinks := range ShopsMap {
// read shop from database or create it
var shop Shop
trx := db.Where(Shop{Name: shopName}).FirstOrCreate(&shop)
if trx.Error != nil {
log.Errorf("cannot create or select shop %s to/from database: %s", shopName, trx.Error)
@ -168,13 +166,45 @@ func main() {
}
for _, link := range shopLinks {
if jobsCount < *workers {
// create parser
parser, err := NewURLParser(link, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
if err != nil {
log.Warnf("could not create URL parser for %s", link)
continue
}
parsers = append(parsers, parser)
log.Debugf("parser %s registered", parser)
}
}
}
if config.HasAmazon() {
for _, marketplace := range config.AmazonConfig.Marketplaces {
// create parser
parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
if err != nil {
log.Warnf("could not create Amazon parser: %s", err)
continue
}
// read shop from database or create it
trx := db.Where(Shop{Name: parser.ShopName()}).FirstOrCreate(&shop)
if trx.Error != nil {
log.Errorf("cannot create or select shop %s to/from database: %s", parser.ShopName(), trx.Error)
continue
}
parsers = append(parsers, parser)
log.Debugf("parser %s registered", parser)
}
}
// parse asynchronously
var wg sync.WaitGroup
jobsCount := 0
for _, parser := range parsers {
if jobsCount < *workers {
wg.Add(1)
jobsCount++
go handleProducts(shop, parser, notifiers, db, &wg)
@ -184,14 +214,13 @@ func main() {
jobsCount = 0
}
}
}
log.Debugf("waiting for all jobs to end")
wg.Wait()
}
// For a given shop, fetch and parse its URL, then eventually send notifications
func handleProducts(shop Shop, parser *URLParser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
// For a given shop, use the parser to return a list of products, then eventually send notifications
func handleProducts(shop Shop, parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
defer wg.Done()
log.Debugf("parsing with %s", parser)

View file

@ -23,7 +23,13 @@ func (p *Product) Equal(other *Product) bool {
// IsValid returns true when a Product has all required values
func (p *Product) IsValid() bool {
return p.Name != "" && p.URL != "" && p.Price != 0 && p.PriceCurrency != ""
if p.Name == "" || p.URL == "" {
return false
}
if p.Available && p.PriceCurrency == "" {
return false
}
return true
}
// Merge one product with another
@ -40,6 +46,6 @@ func (p *Product) ToMerge(o *Product) bool {
// Shop represents a retailer website
type Shop struct {
ID uint `gorm:"primaryKey"`
ID uint `gorm:"primaryKey;autoIncrement"`
Name string `gorm:"unique" json:"name"`
}

47
parser.go Normal file
View file

@ -0,0 +1,47 @@
package main
import (
"regexp"
log "github.com/sirupsen/logrus"
)
// Parser interface to parse an external service and return a list of products
type Parser interface {
Parse() ([]*Product, error)
String() string
}
// filterInclusive returns a list of products matching the include regex
func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product {
var filtered []*Product
if includeRegex != nil {
for _, product := range products {
if includeRegex.MatchString(product.Name) {
log.Debugf("product %s included because it matches the include regex", product.Name)
filtered = append(filtered, product)
} else {
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
}
}
return filtered
}
return products
}
// filterExclusive returns a list of products that don't match the exclude regex
func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product {
var filtered []*Product
if excludeRegex != nil {
for _, product := range products {
if excludeRegex.MatchString(product.Name) {
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
} else {
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
filtered = append(filtered, product)
}
}
return filtered
}
return products
}

158
parser_amazon.go Normal file
View file

@ -0,0 +1,158 @@
package main
import (
"fmt"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
paapi5 "github.com/spiegel-im-spiegel/pa-api"
"github.com/spiegel-im-spiegel/pa-api/entity"
"github.com/spiegel-im-spiegel/pa-api/query"
)
// NewAmazonServer creates an Amazon Server function based on the Marketplace.
// The paapi5 marketplaceEnum is not exported, so this type cannot be used in simple map.
func NewAmazonServer(marketplace string) *paapi5.Server {
switch marketplace {
case "www.amazon.fr":
return paapi5.New(paapi5.WithMarketplace(paapi5.LocaleFrance))
case "www.amazon.com":
return paapi5.New(paapi5.WithMarketplace(paapi5.LocaleUnitedStates))
default:
return paapi5.New() // default Marketplace
}
}
// Map of messages to detect availability
var availabilityMessages = []string{"En stock."}
// AmazonParser structure to handle Amazon parsing logic
type AmazonParser struct {
client paapi5.Client
searches []string
includeRegex *regexp.Regexp
excludeRegex *regexp.Regexp
amazonFulfilled bool
amazonMerchant bool
affiliateLinks bool
}
// NewAmazonParser to create a new AmazonParser instance
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) {
var err error
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
log.Debugf("compiling include name regex")
if includeRegex != "" {
includeRegexCompiled, err = regexp.Compile(includeRegex)
if err != nil {
return nil, err
}
}
log.Debugf("compiling exclude name regex")
if excludeRegex != "" {
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
if err != nil {
return nil, err
}
}
return &AmazonParser{
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
searches: searches,
includeRegex: includeRegexCompiled,
excludeRegex: excludeRegexCompiled,
amazonFulfilled: amazonFulfilled,
amazonMerchant: amazonMerchant,
affiliateLinks: affiliateLinks,
}, nil
}
// Parse Amazon API to return list of products
// Implements Parser interface
func (p *AmazonParser) Parse() ([]*Product, error) {
var products []*Product
for _, search := range p.searches {
log.Debugf("searching for '%s' on %s", search, p.client.Marketplace())
// create search request on API
q := query.NewSearchItems(
p.client.Marketplace(),
p.client.PartnerTag(),
p.client.PartnerType(),
).Search(query.Keywords, search).EnableItemInfo().EnableOffers()
body, err := p.client.Request(q)
if err != nil {
return nil, err
}
// decode response
res, err := entity.DecodeResponse(body)
if err != nil {
return nil, err
}
// decode products
for _, item := range res.SearchResult.Items {
product := &Product{}
if !p.affiliateLinks {
product.URL = fmt.Sprintf("https://%s/dp/%s", p.client.Marketplace(), item.ASIN)
} else {
product.URL = item.DetailPageURL // includes partner tag
}
product.Name = item.ItemInfo.Title.DisplayValue
if item.Offers != nil && *item.Offers.Listings != nil {
for _, offer := range *item.Offers.Listings {
// detect if product is packaged by Amazon
if p.amazonFulfilled && !offer.DeliveryInfo.IsAmazonFulfilled {
log.Debugf("excluding offer by '%s' for product '%s' because not fulfilled by Amazon", offer.MerchantInfo.Name, product.Name)
continue
}
// detect if product is sold by Amazon
if p.amazonMerchant && !strings.HasPrefix(offer.MerchantInfo.Name, "Amazon") {
log.Debugf("excluding offer by '%s' for product '%s' because not sold by Amazon", offer.MerchantInfo.Name, product.Name)
continue
}
// detect price
product.Price = offer.Price.Amount
product.PriceCurrency = offer.Price.Currency
// detect availability
if ContainsString(availabilityMessages, offer.Availability.Message) {
product.Available = true
break
}
}
}
products = append(products, product)
}
}
// apply filters
products = filterInclusive(p.includeRegex, products)
products = filterExclusive(p.excludeRegex, products)
return products, nil
}
// String to print AmazonParser
// Implements the Parser interface
func (p *AmazonParser) String() string {
return fmt.Sprintf("AmazonParser<%s@%s>", p.client.PartnerTag(), p.client.Marketplace())
}
// ShopName returns shop name from Amazon Marketplace
func (p *AmazonParser) ShopName() string {
return strings.ReplaceAll(p.client.Marketplace(), "www.", "")
}

View file

@ -2,6 +2,7 @@ package main
import (
"fmt"
"regexp"
"testing"
)
@ -18,12 +19,16 @@ func TestFilterInclusive(t *testing.T) {
for i, tc := range tests {
t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) {
p, err := NewURLParser("", "", tc.regex, "")
var regexCompiled *regexp.Regexp
var err error
if tc.regex != "" {
regexCompiled, err = regexp.Compile(tc.regex)
if err != nil {
t.Errorf("failed to initialize parser: %s", err)
} else {
t.Errorf("cannot parse regex '%s'", tc.regex)
}
}
products := []*Product{{Name: tc.name}}
filtered := p.filterInclusive(products)
filtered := filterInclusive(regexCompiled, products)
included := false
for _, product := range filtered {
if product.Name == tc.name && !included {
@ -38,9 +43,8 @@ func TestFilterInclusive(t *testing.T) {
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
}
}
})
}
}
@ -58,12 +62,17 @@ func TestFilterExclusive(t *testing.T) {
for i, tc := range tests {
t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) {
p, err := NewURLParser("", "", "", tc.regex)
var regexCompiled *regexp.Regexp
var err error
if tc.regex != "" {
regexCompiled, err = regexp.Compile(tc.regex)
if err != nil {
t.Errorf("failed to initialize parser: %s", err)
} else {
t.Errorf("cannot parse regex '%s'", tc.regex)
}
}
products := []*Product{{Name: tc.name}}
filtered := p.filterExclusive(products)
filtered := filterExclusive(regexCompiled, products)
included := false
for _, product := range filtered {
if product.Name == tc.name && !included {
@ -78,8 +87,6 @@ func TestFilterExclusive(t *testing.T) {
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
}
})
}

View file

@ -22,6 +22,8 @@ type URLParser struct {
ctx context.Context
}
// String to print URLParser
// Implements the Parser interface
func (p *URLParser) String() string {
return fmt.Sprintf("URLParser<%s>", p.url)
}
@ -61,6 +63,7 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
}
// Parse a website to return list of products
// Implements Parser interface
// TODO: redirect output to logger
func (p *URLParser) Parse() ([]*Product, error) {
shopName, err := ExtractShopName(p.url)
@ -89,46 +92,12 @@ func (p *URLParser) Parse() ([]*Product, error) {
}
// apply filters
products = p.filterInclusive(products)
products = p.filterExclusive(products)
products = filterInclusive(p.includeRegex, products)
products = filterExclusive(p.excludeRegex, products)
return products, nil
}
// filterInclusive returns a list of products matching the include regex
func (p *URLParser) filterInclusive(products []*Product) []*Product {
var filtered []*Product
if p.includeRegex != nil {
for _, product := range products {
if p.includeRegex.MatchString(product.Name) {
log.Debugf("product %s included because it matches the include regex", product.Name)
filtered = append(filtered, product)
} else {
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
}
}
return filtered
}
return products
}
// filterExclusive returns a list of products that don't match the exclude regex
func (p *URLParser) filterExclusive(products []*Product) []*Product {
var filtered []*Product
if p.excludeRegex != nil {
for _, product := range products {
if p.excludeRegex.MatchString(product.Name) {
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
} else {
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
filtered = append(filtered, product)
}
}
return filtered
}
return products
}
func createQuery(shopName string, url string) (string, error) {
switch shopName {
case "cybertek.fr":

View file

@ -16,3 +16,13 @@ func ExtractShopName(link string) (name string, err error) {
re := regexp.MustCompile(`^www\.`)
return strings.ToLower(re.ReplaceAllString(u.Hostname(), "")), nil
}
// ContainsString returns true when string is found in the array of strings
func ContainsString(arr []string, str string) bool {
for _, elem := range arr {
if elem == str {
return true
}
}
return false
}