Archived
1
0
Fork 0

refactor: move filters out of parser

Filters are now separate structures to include a product or not based
on their own set of properties. For now, include and exclude filters
are supported. They take a regex as an argument and include a product
if the regex matches (or doesn't match) the product name. This commit
will allow us to create new filters on product like on a price range.

Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
Julien Riou 2021-05-19 17:43:31 +02:00
parent b6feb2d656
commit 244c9f68e7
No known key found for this signature in database
GPG key ID: FF42D23B580C89F7
10 changed files with 212 additions and 206 deletions

6
filter.go Normal file
View file

@ -0,0 +1,6 @@
package main
// Filter interface to include a product based on filters
type Filter interface {
Include(*Product) bool
}

40
filter_exclude.go Normal file
View file

@ -0,0 +1,40 @@
package main
import (
"regexp"
log "github.com/sirupsen/logrus"
)
type ExcludeFilter struct {
regex *regexp.Regexp
}
func NewExcludeFilter(regex string) (*ExcludeFilter, error) {
var err error
var compiledRegex *regexp.Regexp
log.Debugf("compiling exclude filter regex")
if regex != "" {
compiledRegex, err = regexp.Compile(regex)
if err != nil {
return nil, err
}
}
return &ExcludeFilter{regex: compiledRegex}, nil
}
// Filter excludes product with name matching the regex
// implements the Filter interface
func (f *ExcludeFilter) Include(product *Product) bool {
if f.regex == nil {
return true
}
if f.regex.MatchString(product.Name) {
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
return false
}
log.Debugf("product %s included because it doesn't match the exclude regex", product.Name)
return true
}

41
filter_exclude_test.go Normal file
View file

@ -0,0 +1,41 @@
package main
import (
"fmt"
"testing"
)
func TestExcludeFilter(t *testing.T) {
tests := []struct {
regex string // exclusive regex
name string // product name
included bool // should be included or not
}{
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
}
for i, tc := range tests {
t.Run(fmt.Sprintf("TestExcludeFilter#%d", i), func(t *testing.T) {
product := &Product{Name: tc.name}
filter, err := NewExcludeFilter(tc.regex)
if err != nil {
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
}
included := filter.Include(product)
if included != tc.included {
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
} else {
if included {
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
})
}
}

40
filter_include.go Normal file
View file

@ -0,0 +1,40 @@
package main
import (
"regexp"
log "github.com/sirupsen/logrus"
)
type IncludeFilter struct {
regex *regexp.Regexp
}
func NewIncludeFilter(regex string) (*IncludeFilter, error) {
var err error
var compiledRegex *regexp.Regexp
log.Debugf("compiling include filter regex")
if regex != "" {
compiledRegex, err = regexp.Compile(regex)
if err != nil {
return nil, err
}
}
return &IncludeFilter{regex: compiledRegex}, nil
}
// Filter includes product with name matching the regex
// implements the Filter interface
func (f *IncludeFilter) Include(product *Product) bool {
if f.regex == nil {
return true
}
if f.regex.MatchString(product.Name) {
log.Debugf("product %s included because it matches the include regex", product.Name)
return true
}
log.Debugf("product %s excluded because it doesn't match the include regex", product.Name)
return false
}

41
filter_include_test.go Normal file
View file

@ -0,0 +1,41 @@
package main
import (
"fmt"
"testing"
)
func TestIncludeFilter(t *testing.T) {
tests := []struct {
regex string // inclusive regex
name string // product name
included bool // should be included or not
}{
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
}
for i, tc := range tests {
t.Run(fmt.Sprintf("TestIncludeFilter#%d", i), func(t *testing.T) {
product := &Product{Name: tc.name}
filter, err := NewIncludeFilter(tc.regex)
if err != nil {
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
}
included := filter.Include(product)
if included != tc.included {
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
} else {
if included {
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
})
}
}

49
main.go
View file

@ -163,28 +163,41 @@ func main() {
}
}
// register filters
filters := []Filter{}
if config.IncludeRegex != "" {
includeFilter, err := NewIncludeFilter(config.IncludeRegex)
if err != nil {
log.Fatalf("cannot create include filter: %s", err)
}
filters = append(filters, includeFilter)
}
if config.ExcludeRegex != "" {
excludeFilter, err := NewExcludeFilter(config.ExcludeRegex)
if err != nil {
log.Fatalf("cannot create exclude filter: %s", err)
}
filters = append(filters, excludeFilter)
}
// create parsers
parsers := []Parser{}
if config.HasURLs() {
// create a parser for all web pages
for _, url := range config.URLs {
// create parser
parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
if err != nil {
log.Warnf("could not create URL parser for '%s'", url)
continue
}
parser := NewURLParser(url, config.BrowserAddress)
parsers = append(parsers, parser)
log.Debugf("parser %s registered", parser)
}
}
if config.HasAmazon() {
// create a parser for all marketplaces
for _, marketplace := range config.AmazonConfig.Marketplaces {
// create parser
parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
parser := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
if err != nil {
log.Warnf("could not create Amazon parser: %s", err)
log.Warnf("could not create Amazon parser for marketplace %s: %s", marketplace, err)
continue
}
@ -202,7 +215,7 @@ func main() {
if jobsCount < *workers {
wg.Add(1)
jobsCount++
go handleProducts(parser, notifiers, db, &wg)
go handleProducts(parser, notifiers, filters, db, &wg)
break
} else {
log.Debugf("waiting for intermediate jobs to end")
@ -217,7 +230,7 @@ func main() {
}
// For parser to return a list of products, then eventually send notifications
func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
func handleProducts(parser Parser, notifiers []Notifier, filters []Filter, db *gorm.DB, wg *sync.WaitGroup) {
defer wg.Done()
log.Debugf("parsing with %s", parser)
@ -241,11 +254,21 @@ func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.W
log.Warnf("cannot parse: %s", err)
return
}
log.Debugf("parsed")
// insert or update products to database
for _, product := range products {
// skip products not matching all filters
included := true
for _, filter := range filters {
if included && !filter.Include(product) {
included = false
continue
}
}
if !included {
continue
}
log.Debugf("detected product %+v", product)
if !product.IsValid() {

View file

@ -1,48 +1,8 @@
package main
import (
"regexp"
log "github.com/sirupsen/logrus"
)
// Parser interface to parse an external service and return a list of products
type Parser interface {
Parse() ([]*Product, error)
String() string
ShopName() (string, error)
}
// filterInclusive returns a list of products matching the include regex
func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product {
var filtered []*Product
if includeRegex != nil {
for _, product := range products {
if includeRegex.MatchString(product.Name) {
log.Debugf("product %s included because it matches the include regex", product.Name)
filtered = append(filtered, product)
} else {
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
}
}
return filtered
}
return products
}
// filterExclusive returns a list of products that don't match the exclude regex
func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product {
var filtered []*Product
if excludeRegex != nil {
for _, product := range products {
if excludeRegex.MatchString(product.Name) {
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
} else {
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
filtered = append(filtered, product)
}
}
return filtered
}
return products
}

View file

@ -39,35 +39,14 @@ type AmazonParser struct {
}
// NewAmazonParser to create a new AmazonParser instance
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) {
var err error
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
log.Debugf("compiling include name regex")
if includeRegex != "" {
includeRegexCompiled, err = regexp.Compile(includeRegex)
if err != nil {
return nil, err
}
}
log.Debugf("compiling exclude name regex")
if excludeRegex != "" {
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
if err != nil {
return nil, err
}
}
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) *AmazonParser {
return &AmazonParser{
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
searches: searches,
includeRegex: includeRegexCompiled,
excludeRegex: excludeRegexCompiled,
amazonFulfilled: amazonFulfilled,
amazonMerchant: amazonMerchant,
affiliateLinks: affiliateLinks,
}, nil
}
}
// Parse Amazon API to return list of products
@ -139,10 +118,6 @@ func (p *AmazonParser) Parse() ([]*Product, error) {
}
}
// apply filters
products = filterInclusive(p.includeRegex, products)
products = filterExclusive(p.excludeRegex, products)
return products, nil
}

View file

@ -1,93 +0,0 @@
package main
import (
"fmt"
"regexp"
"testing"
)
func TestFilterInclusive(t *testing.T) {
tests := []struct {
regex string // inclusive regex
name string // product name
included bool // should be included or not
}{
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
}
for i, tc := range tests {
t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) {
var regexCompiled *regexp.Regexp
var err error
if tc.regex != "" {
regexCompiled, err = regexp.Compile(tc.regex)
if err != nil {
t.Errorf("cannot parse regex '%s'", tc.regex)
}
}
products := []*Product{{Name: tc.name}}
filtered := filterInclusive(regexCompiled, products)
included := false
for _, product := range filtered {
if product.Name == tc.name && !included {
included = true
}
}
if tc.included != included {
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
} else {
if included {
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
})
}
}
func TestFilterExclusive(t *testing.T) {
tests := []struct {
regex string // exclusive regex
name string // product name
included bool // should be included or not
}{
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
}
for i, tc := range tests {
t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) {
var regexCompiled *regexp.Regexp
var err error
if tc.regex != "" {
regexCompiled, err = regexp.Compile(tc.regex)
if err != nil {
t.Errorf("cannot parse regex '%s'", tc.regex)
}
}
products := []*Product{{Name: tc.name}}
filtered := filterExclusive(regexCompiled, products)
included := false
for _, product := range filtered {
if product.Name == tc.name && !included {
included = true
}
}
if tc.included != included {
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
} else {
if included {
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
} else {
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
}
}
})
}
}

View file

@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"regexp"
log "github.com/sirupsen/logrus"
@ -16,10 +15,8 @@ import (
// URLParser structure to handle websites parsing logic
type URLParser struct {
url string
includeRegex *regexp.Regexp
excludeRegex *regexp.Regexp
ctx context.Context
url string
ctx context.Context
}
// String to print URLParser
@ -34,25 +31,7 @@ func (p *URLParser) ShopName() (string, error) {
}
// NewURLParser to create a new URLParser instance
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
var err error
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
log.Debugf("compiling include name regex")
if includeRegex != "" {
includeRegexCompiled, err = regexp.Compile(includeRegex)
if err != nil {
return nil, err
}
}
log.Debugf("compiling exclude name regex")
if excludeRegex != "" {
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
if err != nil {
return nil, err
}
}
func NewURLParser(url string, browserAddress string) *URLParser {
log.Debugf("creating context with headless browser drivers")
ctx := context.Background()
@ -60,11 +39,9 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault())
return &URLParser{
url: url,
includeRegex: includeRegexCompiled,
excludeRegex: excludeRegexCompiled,
ctx: ctx,
}, nil
url: url,
ctx: ctx,
}
}
// Parse a website to return list of products
@ -96,10 +73,6 @@ func (p *URLParser) Parse() ([]*Product, error) {
return nil, err
}
// apply filters
products = filterInclusive(p.includeRegex, products)
products = filterExclusive(p.excludeRegex, products)
return products, nil
}