refactor: move filters out of parser
Filters are now separate structures to include a product or not based on their own set of properties. For now, include and exclude filters are supported. They take a regex as an argument and include a product if the regex matches (or doesn't match) the product name. This commit will allow us to create new filters on product like on a price range. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
b6feb2d656
commit
244c9f68e7
10 changed files with 212 additions and 206 deletions
6
filter.go
Normal file
6
filter.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
package main
|
||||
|
||||
// Filter interface to include a product based on filters
|
||||
type Filter interface {
|
||||
Include(*Product) bool
|
||||
}
|
40
filter_exclude.go
Normal file
40
filter_exclude.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type ExcludeFilter struct {
|
||||
regex *regexp.Regexp
|
||||
}
|
||||
|
||||
func NewExcludeFilter(regex string) (*ExcludeFilter, error) {
|
||||
var err error
|
||||
var compiledRegex *regexp.Regexp
|
||||
|
||||
log.Debugf("compiling exclude filter regex")
|
||||
if regex != "" {
|
||||
compiledRegex, err = regexp.Compile(regex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &ExcludeFilter{regex: compiledRegex}, nil
|
||||
}
|
||||
|
||||
// Filter excludes product with name matching the regex
|
||||
// implements the Filter interface
|
||||
func (f *ExcludeFilter) Include(product *Product) bool {
|
||||
if f.regex == nil {
|
||||
return true
|
||||
}
|
||||
if f.regex.MatchString(product.Name) {
|
||||
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
||||
return false
|
||||
}
|
||||
log.Debugf("product %s included because it doesn't match the exclude regex", product.Name)
|
||||
return true
|
||||
}
|
41
filter_exclude_test.go
Normal file
41
filter_exclude_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExcludeFilter(t *testing.T) {
|
||||
tests := []struct {
|
||||
regex string // exclusive regex
|
||||
name string // product name
|
||||
included bool // should be included or not
|
||||
}{
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
|
||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestExcludeFilter#%d", i), func(t *testing.T) {
|
||||
product := &Product{Name: tc.name}
|
||||
filter, err := NewExcludeFilter(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
|
||||
}
|
||||
|
||||
included := filter.Include(product)
|
||||
|
||||
if included != tc.included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
40
filter_include.go
Normal file
40
filter_include.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type IncludeFilter struct {
|
||||
regex *regexp.Regexp
|
||||
}
|
||||
|
||||
func NewIncludeFilter(regex string) (*IncludeFilter, error) {
|
||||
var err error
|
||||
var compiledRegex *regexp.Regexp
|
||||
|
||||
log.Debugf("compiling include filter regex")
|
||||
if regex != "" {
|
||||
compiledRegex, err = regexp.Compile(regex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &IncludeFilter{regex: compiledRegex}, nil
|
||||
}
|
||||
|
||||
// Filter includes product with name matching the regex
|
||||
// implements the Filter interface
|
||||
func (f *IncludeFilter) Include(product *Product) bool {
|
||||
if f.regex == nil {
|
||||
return true
|
||||
}
|
||||
if f.regex.MatchString(product.Name) {
|
||||
log.Debugf("product %s included because it matches the include regex", product.Name)
|
||||
return true
|
||||
}
|
||||
log.Debugf("product %s excluded because it doesn't match the include regex", product.Name)
|
||||
return false
|
||||
}
|
41
filter_include_test.go
Normal file
41
filter_include_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIncludeFilter(t *testing.T) {
|
||||
tests := []struct {
|
||||
regex string // inclusive regex
|
||||
name string // product name
|
||||
included bool // should be included or not
|
||||
}{
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
|
||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestIncludeFilter#%d", i), func(t *testing.T) {
|
||||
product := &Product{Name: tc.name}
|
||||
filter, err := NewIncludeFilter(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
|
||||
}
|
||||
|
||||
included := filter.Include(product)
|
||||
|
||||
if included != tc.included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
49
main.go
49
main.go
|
@ -163,28 +163,41 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
// register filters
|
||||
filters := []Filter{}
|
||||
if config.IncludeRegex != "" {
|
||||
includeFilter, err := NewIncludeFilter(config.IncludeRegex)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create include filter: %s", err)
|
||||
}
|
||||
filters = append(filters, includeFilter)
|
||||
}
|
||||
if config.ExcludeRegex != "" {
|
||||
excludeFilter, err := NewExcludeFilter(config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create exclude filter: %s", err)
|
||||
}
|
||||
filters = append(filters, excludeFilter)
|
||||
}
|
||||
|
||||
// create parsers
|
||||
parsers := []Parser{}
|
||||
|
||||
if config.HasURLs() {
|
||||
// create a parser for all web pages
|
||||
for _, url := range config.URLs {
|
||||
// create parser
|
||||
parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
||||
if err != nil {
|
||||
log.Warnf("could not create URL parser for '%s'", url)
|
||||
continue
|
||||
}
|
||||
parser := NewURLParser(url, config.BrowserAddress)
|
||||
parsers = append(parsers, parser)
|
||||
log.Debugf("parser %s registered", parser)
|
||||
}
|
||||
}
|
||||
|
||||
if config.HasAmazon() {
|
||||
// create a parser for all marketplaces
|
||||
for _, marketplace := range config.AmazonConfig.Marketplaces {
|
||||
// create parser
|
||||
parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
|
||||
parser := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
|
||||
if err != nil {
|
||||
log.Warnf("could not create Amazon parser: %s", err)
|
||||
log.Warnf("could not create Amazon parser for marketplace %s: %s", marketplace, err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -202,7 +215,7 @@ func main() {
|
|||
if jobsCount < *workers {
|
||||
wg.Add(1)
|
||||
jobsCount++
|
||||
go handleProducts(parser, notifiers, db, &wg)
|
||||
go handleProducts(parser, notifiers, filters, db, &wg)
|
||||
break
|
||||
} else {
|
||||
log.Debugf("waiting for intermediate jobs to end")
|
||||
|
@ -217,7 +230,7 @@ func main() {
|
|||
}
|
||||
|
||||
// For parser to return a list of products, then eventually send notifications
|
||||
func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
func handleProducts(parser Parser, notifiers []Notifier, filters []Filter, db *gorm.DB, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
log.Debugf("parsing with %s", parser)
|
||||
|
@ -241,11 +254,21 @@ func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.W
|
|||
log.Warnf("cannot parse: %s", err)
|
||||
return
|
||||
}
|
||||
log.Debugf("parsed")
|
||||
|
||||
// insert or update products to database
|
||||
for _, product := range products {
|
||||
|
||||
// skip products not matching all filters
|
||||
included := true
|
||||
for _, filter := range filters {
|
||||
if included && !filter.Include(product) {
|
||||
included = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
if !included {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("detected product %+v", product)
|
||||
|
||||
if !product.IsValid() {
|
||||
|
|
40
parser.go
40
parser.go
|
@ -1,48 +1,8 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Parser interface to parse an external service and return a list of products
|
||||
type Parser interface {
|
||||
Parse() ([]*Product, error)
|
||||
String() string
|
||||
ShopName() (string, error)
|
||||
}
|
||||
|
||||
// filterInclusive returns a list of products matching the include regex
|
||||
func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if includeRegex != nil {
|
||||
for _, product := range products {
|
||||
if includeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s included because it matches the include regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
} else {
|
||||
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
||||
|
||||
// filterExclusive returns a list of products that don't match the exclude regex
|
||||
func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product {
|
||||
var filtered []*Product
|
||||
if excludeRegex != nil {
|
||||
for _, product := range products {
|
||||
if excludeRegex.MatchString(product.Name) {
|
||||
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
||||
} else {
|
||||
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
|
||||
filtered = append(filtered, product)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
return products
|
||||
}
|
||||
|
|
|
@ -39,35 +39,14 @@ type AmazonParser struct {
|
|||
}
|
||||
|
||||
// NewAmazonParser to create a new AmazonParser instance
|
||||
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) {
|
||||
var err error
|
||||
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
|
||||
|
||||
log.Debugf("compiling include name regex")
|
||||
if includeRegex != "" {
|
||||
includeRegexCompiled, err = regexp.Compile(includeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("compiling exclude name regex")
|
||||
if excludeRegex != "" {
|
||||
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) *AmazonParser {
|
||||
return &AmazonParser{
|
||||
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
|
||||
searches: searches,
|
||||
includeRegex: includeRegexCompiled,
|
||||
excludeRegex: excludeRegexCompiled,
|
||||
amazonFulfilled: amazonFulfilled,
|
||||
amazonMerchant: amazonMerchant,
|
||||
affiliateLinks: affiliateLinks,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Parse Amazon API to return list of products
|
||||
|
@ -139,10 +118,6 @@ func (p *AmazonParser) Parse() ([]*Product, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// apply filters
|
||||
products = filterInclusive(p.includeRegex, products)
|
||||
products = filterExclusive(p.excludeRegex, products)
|
||||
|
||||
return products, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilterInclusive(t *testing.T) {
|
||||
tests := []struct {
|
||||
regex string // inclusive regex
|
||||
name string // product name
|
||||
included bool // should be included or not
|
||||
}{
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
|
||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) {
|
||||
var regexCompiled *regexp.Regexp
|
||||
var err error
|
||||
if tc.regex != "" {
|
||||
regexCompiled, err = regexp.Compile(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
||||
}
|
||||
}
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := filterInclusive(regexCompiled, products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterExclusive(t *testing.T) {
|
||||
tests := []struct {
|
||||
regex string // exclusive regex
|
||||
name string // product name
|
||||
included bool // should be included or not
|
||||
}{
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
|
||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
|
||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) {
|
||||
var regexCompiled *regexp.Regexp
|
||||
var err error
|
||||
if tc.regex != "" {
|
||||
regexCompiled, err = regexp.Compile(tc.regex)
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
||||
}
|
||||
}
|
||||
|
||||
products := []*Product{{Name: tc.name}}
|
||||
filtered := filterExclusive(regexCompiled, products)
|
||||
included := false
|
||||
for _, product := range filtered {
|
||||
if product.Name == tc.name && !included {
|
||||
included = true
|
||||
}
|
||||
}
|
||||
if tc.included != included {
|
||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||
} else {
|
||||
if included {
|
||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||
} else {
|
||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -4,7 +4,6 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
|
@ -16,10 +15,8 @@ import (
|
|||
|
||||
// URLParser structure to handle websites parsing logic
|
||||
type URLParser struct {
|
||||
url string
|
||||
includeRegex *regexp.Regexp
|
||||
excludeRegex *regexp.Regexp
|
||||
ctx context.Context
|
||||
url string
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
// String to print URLParser
|
||||
|
@ -34,25 +31,7 @@ func (p *URLParser) ShopName() (string, error) {
|
|||
}
|
||||
|
||||
// NewURLParser to create a new URLParser instance
|
||||
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
|
||||
var err error
|
||||
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
|
||||
|
||||
log.Debugf("compiling include name regex")
|
||||
if includeRegex != "" {
|
||||
includeRegexCompiled, err = regexp.Compile(includeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("compiling exclude name regex")
|
||||
if excludeRegex != "" {
|
||||
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
func NewURLParser(url string, browserAddress string) *URLParser {
|
||||
|
||||
log.Debugf("creating context with headless browser drivers")
|
||||
ctx := context.Background()
|
||||
|
@ -60,11 +39,9 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
|
|||
ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault())
|
||||
|
||||
return &URLParser{
|
||||
url: url,
|
||||
includeRegex: includeRegexCompiled,
|
||||
excludeRegex: excludeRegexCompiled,
|
||||
ctx: ctx,
|
||||
}, nil
|
||||
url: url,
|
||||
ctx: ctx,
|
||||
}
|
||||
}
|
||||
|
||||
// Parse a website to return list of products
|
||||
|
@ -96,10 +73,6 @@ func (p *URLParser) Parse() ([]*Product, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// apply filters
|
||||
products = filterInclusive(p.includeRegex, products)
|
||||
products = filterExclusive(p.excludeRegex, products)
|
||||
|
||||
return products, nil
|
||||
}
|
||||
|
||||
|
|
Reference in a new issue