diff --git a/filter.go b/filter.go new file mode 100644 index 0000000..32b0fc3 --- /dev/null +++ b/filter.go @@ -0,0 +1,6 @@ +package main + +// Filter interface to include a product based on filters +type Filter interface { + Include(*Product) bool +} diff --git a/filter_exclude.go b/filter_exclude.go new file mode 100644 index 0000000..3aa1ae3 --- /dev/null +++ b/filter_exclude.go @@ -0,0 +1,40 @@ +package main + +import ( + "regexp" + + log "github.com/sirupsen/logrus" +) + +type ExcludeFilter struct { + regex *regexp.Regexp +} + +func NewExcludeFilter(regex string) (*ExcludeFilter, error) { + var err error + var compiledRegex *regexp.Regexp + + log.Debugf("compiling exclude filter regex") + if regex != "" { + compiledRegex, err = regexp.Compile(regex) + if err != nil { + return nil, err + } + } + + return &ExcludeFilter{regex: compiledRegex}, nil +} + +// Filter excludes product with name matching the regex +// implements the Filter interface +func (f *ExcludeFilter) Include(product *Product) bool { + if f.regex == nil { + return true + } + if f.regex.MatchString(product.Name) { + log.Debugf("product %s excluded because it matches the exclude regex", product.Name) + return false + } + log.Debugf("product %s included because it doesn't match the exclude regex", product.Name) + return true +} diff --git a/filter_exclude_test.go b/filter_exclude_test.go new file mode 100644 index 0000000..6fbf4cc --- /dev/null +++ b/filter_exclude_test.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "testing" +) + +func TestExcludeFilter(t *testing.T) { + tests := []struct { + regex string // exclusive regex + name string // product name + included bool // should be included or not + }{ + {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex + {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex + {"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("TestExcludeFilter#%d", i), func(t *testing.T) { + product := &Product{Name: tc.name} + filter, err := NewExcludeFilter(tc.regex) + if err != nil { + t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err) + } + + included := filter.Include(product) + + if included != tc.included { + t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included) + } else { + if included { + t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name) + } else { + t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name) + } + } + + }) + } +} diff --git a/filter_include.go b/filter_include.go new file mode 100644 index 0000000..1d45aa1 --- /dev/null +++ b/filter_include.go @@ -0,0 +1,40 @@ +package main + +import ( + "regexp" + + log "github.com/sirupsen/logrus" +) + +type IncludeFilter struct { + regex *regexp.Regexp +} + +func NewIncludeFilter(regex string) (*IncludeFilter, error) { + var err error + var compiledRegex *regexp.Regexp + + log.Debugf("compiling include filter regex") + if regex != "" { + compiledRegex, err = regexp.Compile(regex) + if err != nil { + return nil, err + } + } + + return &IncludeFilter{regex: compiledRegex}, nil +} + +// Filter includes product with name matching the regex +// implements the Filter interface +func (f *IncludeFilter) Include(product *Product) bool { + if f.regex == nil { + return true + } + if f.regex.MatchString(product.Name) { + log.Debugf("product %s included because it matches the include regex", product.Name) + return true + } + log.Debugf("product %s excluded because it doesn't match the include regex", product.Name) + return false +} diff --git a/filter_include_test.go b/filter_include_test.go new file mode 100644 index 0000000..71e5004 --- /dev/null +++ b/filter_include_test.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "testing" +) + +func TestIncludeFilter(t *testing.T) { + tests := []struct { + regex string // inclusive regex + name string // product name + included bool // should be included or not + }{ + {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex + {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex + {"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("TestIncludeFilter#%d", i), func(t *testing.T) { + product := &Product{Name: tc.name} + filter, err := NewIncludeFilter(tc.regex) + if err != nil { + t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err) + } + + included := filter.Include(product) + + if included != tc.included { + t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included) + } else { + if included { + t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name) + } else { + t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name) + } + } + + }) + } +} diff --git a/main.go b/main.go index 6a88942..32b22f2 100644 --- a/main.go +++ b/main.go @@ -163,28 +163,41 @@ func main() { } } + // register filters + filters := []Filter{} + if config.IncludeRegex != "" { + includeFilter, err := NewIncludeFilter(config.IncludeRegex) + if err != nil { + log.Fatalf("cannot create include filter: %s", err) + } + filters = append(filters, includeFilter) + } + if config.ExcludeRegex != "" { + excludeFilter, err := NewExcludeFilter(config.ExcludeRegex) + if err != nil { + log.Fatalf("cannot create exclude filter: %s", err) + } + filters = append(filters, excludeFilter) + } + // create parsers parsers := []Parser{} if config.HasURLs() { + // create a parser for all web pages for _, url := range config.URLs { - // create parser - parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex) - if err != nil { - log.Warnf("could not create URL parser for '%s'", url) - continue - } + parser := NewURLParser(url, config.BrowserAddress) parsers = append(parsers, parser) log.Debugf("parser %s registered", parser) } } if config.HasAmazon() { + // create a parser for all marketplaces for _, marketplace := range config.AmazonConfig.Marketplaces { - // create parser - parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks) + parser := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks) if err != nil { - log.Warnf("could not create Amazon parser: %s", err) + log.Warnf("could not create Amazon parser for marketplace %s: %s", marketplace, err) continue } @@ -202,7 +215,7 @@ func main() { if jobsCount < *workers { wg.Add(1) jobsCount++ - go handleProducts(parser, notifiers, db, &wg) + go handleProducts(parser, notifiers, filters, db, &wg) break } else { log.Debugf("waiting for intermediate jobs to end") @@ -217,7 +230,7 @@ func main() { } // For parser to return a list of products, then eventually send notifications -func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) { +func handleProducts(parser Parser, notifiers []Notifier, filters []Filter, db *gorm.DB, wg *sync.WaitGroup) { defer wg.Done() log.Debugf("parsing with %s", parser) @@ -241,11 +254,21 @@ func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.W log.Warnf("cannot parse: %s", err) return } - log.Debugf("parsed") - // insert or update products to database for _, product := range products { + // skip products not matching all filters + included := true + for _, filter := range filters { + if included && !filter.Include(product) { + included = false + continue + } + } + if !included { + continue + } + log.Debugf("detected product %+v", product) if !product.IsValid() { diff --git a/parser.go b/parser.go index ee301b3..20a7bd2 100644 --- a/parser.go +++ b/parser.go @@ -1,48 +1,8 @@ package main -import ( - "regexp" - - log "github.com/sirupsen/logrus" -) - // Parser interface to parse an external service and return a list of products type Parser interface { Parse() ([]*Product, error) String() string ShopName() (string, error) } - -// filterInclusive returns a list of products matching the include regex -func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product { - var filtered []*Product - if includeRegex != nil { - for _, product := range products { - if includeRegex.MatchString(product.Name) { - log.Debugf("product %s included because it matches the include regex", product.Name) - filtered = append(filtered, product) - } else { - log.Debugf("product %s excluded because it does not match the include regex", product.Name) - } - } - return filtered - } - return products -} - -// filterExclusive returns a list of products that don't match the exclude regex -func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product { - var filtered []*Product - if excludeRegex != nil { - for _, product := range products { - if excludeRegex.MatchString(product.Name) { - log.Debugf("product %s excluded because it matches the exclude regex", product.Name) - } else { - log.Debugf("product %s included because it does not match the exclude regex", product.Name) - filtered = append(filtered, product) - } - } - return filtered - } - return products -} diff --git a/parser_amazon.go b/parser_amazon.go index acedd25..69b86b4 100644 --- a/parser_amazon.go +++ b/parser_amazon.go @@ -39,35 +39,14 @@ type AmazonParser struct { } // NewAmazonParser to create a new AmazonParser instance -func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) { - var err error - var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp - - log.Debugf("compiling include name regex") - if includeRegex != "" { - includeRegexCompiled, err = regexp.Compile(includeRegex) - if err != nil { - return nil, err - } - } - - log.Debugf("compiling exclude name regex") - if excludeRegex != "" { - excludeRegexCompiled, err = regexp.Compile(excludeRegex) - if err != nil { - return nil, err - } - } - +func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) *AmazonParser { return &AmazonParser{ client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey), searches: searches, - includeRegex: includeRegexCompiled, - excludeRegex: excludeRegexCompiled, amazonFulfilled: amazonFulfilled, amazonMerchant: amazonMerchant, affiliateLinks: affiliateLinks, - }, nil + } } // Parse Amazon API to return list of products @@ -139,10 +118,6 @@ func (p *AmazonParser) Parse() ([]*Product, error) { } } - // apply filters - products = filterInclusive(p.includeRegex, products) - products = filterExclusive(p.excludeRegex, products) - return products, nil } diff --git a/parser_test.go b/parser_test.go deleted file mode 100644 index cb01350..0000000 --- a/parser_test.go +++ /dev/null @@ -1,93 +0,0 @@ -package main - -import ( - "fmt" - "regexp" - "testing" -) - -func TestFilterInclusive(t *testing.T) { - tests := []struct { - regex string // inclusive regex - name string // product name - included bool // should be included or not - }{ - {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex - {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex - {"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty - } - - for i, tc := range tests { - t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) { - var regexCompiled *regexp.Regexp - var err error - if tc.regex != "" { - regexCompiled, err = regexp.Compile(tc.regex) - if err != nil { - t.Errorf("cannot parse regex '%s'", tc.regex) - } - } - products := []*Product{{Name: tc.name}} - filtered := filterInclusive(regexCompiled, products) - included := false - for _, product := range filtered { - if product.Name == tc.name && !included { - included = true - } - } - if tc.included != included { - t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included) - } else { - if included { - t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name) - } else { - t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name) - } - } - - }) - } -} - -func TestFilterExclusive(t *testing.T) { - tests := []struct { - regex string // exclusive regex - name string // product name - included bool // should be included or not - }{ - {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex - {"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex - {"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty - } - - for i, tc := range tests { - t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) { - var regexCompiled *regexp.Regexp - var err error - if tc.regex != "" { - regexCompiled, err = regexp.Compile(tc.regex) - if err != nil { - t.Errorf("cannot parse regex '%s'", tc.regex) - } - } - - products := []*Product{{Name: tc.name}} - filtered := filterExclusive(regexCompiled, products) - included := false - for _, product := range filtered { - if product.Name == tc.name && !included { - included = true - } - } - if tc.included != included { - t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included) - } else { - if included { - t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name) - } else { - t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name) - } - } - }) - } -} diff --git a/parser_url.go b/parser_url.go index 64d9234..e22d51d 100644 --- a/parser_url.go +++ b/parser_url.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "regexp" log "github.com/sirupsen/logrus" @@ -16,10 +15,8 @@ import ( // URLParser structure to handle websites parsing logic type URLParser struct { - url string - includeRegex *regexp.Regexp - excludeRegex *regexp.Regexp - ctx context.Context + url string + ctx context.Context } // String to print URLParser @@ -34,25 +31,7 @@ func (p *URLParser) ShopName() (string, error) { } // NewURLParser to create a new URLParser instance -func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) { - var err error - var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp - - log.Debugf("compiling include name regex") - if includeRegex != "" { - includeRegexCompiled, err = regexp.Compile(includeRegex) - if err != nil { - return nil, err - } - } - - log.Debugf("compiling exclude name regex") - if excludeRegex != "" { - excludeRegexCompiled, err = regexp.Compile(excludeRegex) - if err != nil { - return nil, err - } - } +func NewURLParser(url string, browserAddress string) *URLParser { log.Debugf("creating context with headless browser drivers") ctx := context.Background() @@ -60,11 +39,9 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault()) return &URLParser{ - url: url, - includeRegex: includeRegexCompiled, - excludeRegex: excludeRegexCompiled, - ctx: ctx, - }, nil + url: url, + ctx: ctx, + } } // Parse a website to return list of products @@ -96,10 +73,6 @@ func (p *URLParser) Parse() ([]*Product, error) { return nil, err } - // apply filters - products = filterInclusive(p.includeRegex, products) - products = filterExclusive(p.excludeRegex, products) - return products, nil }