refactor: move filters out of parser
Filters are now separate structures to include a product or not based on their own set of properties. For now, include and exclude filters are supported. They take a regex as an argument and include a product if the regex matches (or doesn't match) the product name. This commit will allow us to create new filters on product like on a price range. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
b6feb2d656
commit
244c9f68e7
10 changed files with 212 additions and 206 deletions
6
filter.go
Normal file
6
filter.go
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
// Filter interface to include a product based on filters
|
||||||
|
type Filter interface {
|
||||||
|
Include(*Product) bool
|
||||||
|
}
|
40
filter_exclude.go
Normal file
40
filter_exclude.go
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ExcludeFilter struct {
|
||||||
|
regex *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewExcludeFilter(regex string) (*ExcludeFilter, error) {
|
||||||
|
var err error
|
||||||
|
var compiledRegex *regexp.Regexp
|
||||||
|
|
||||||
|
log.Debugf("compiling exclude filter regex")
|
||||||
|
if regex != "" {
|
||||||
|
compiledRegex, err = regexp.Compile(regex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ExcludeFilter{regex: compiledRegex}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter excludes product with name matching the regex
|
||||||
|
// implements the Filter interface
|
||||||
|
func (f *ExcludeFilter) Include(product *Product) bool {
|
||||||
|
if f.regex == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if f.regex.MatchString(product.Name) {
|
||||||
|
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
log.Debugf("product %s included because it doesn't match the exclude regex", product.Name)
|
||||||
|
return true
|
||||||
|
}
|
41
filter_exclude_test.go
Normal file
41
filter_exclude_test.go
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExcludeFilter(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
regex string // exclusive regex
|
||||||
|
name string // product name
|
||||||
|
included bool // should be included or not
|
||||||
|
}{
|
||||||
|
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
|
||||||
|
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
|
||||||
|
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range tests {
|
||||||
|
t.Run(fmt.Sprintf("TestExcludeFilter#%d", i), func(t *testing.T) {
|
||||||
|
product := &Product{Name: tc.name}
|
||||||
|
filter, err := NewExcludeFilter(tc.regex)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
included := filter.Include(product)
|
||||||
|
|
||||||
|
if included != tc.included {
|
||||||
|
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||||
|
} else {
|
||||||
|
if included {
|
||||||
|
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||||
|
} else {
|
||||||
|
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
40
filter_include.go
Normal file
40
filter_include.go
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
type IncludeFilter struct {
|
||||||
|
regex *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewIncludeFilter(regex string) (*IncludeFilter, error) {
|
||||||
|
var err error
|
||||||
|
var compiledRegex *regexp.Regexp
|
||||||
|
|
||||||
|
log.Debugf("compiling include filter regex")
|
||||||
|
if regex != "" {
|
||||||
|
compiledRegex, err = regexp.Compile(regex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &IncludeFilter{regex: compiledRegex}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter includes product with name matching the regex
|
||||||
|
// implements the Filter interface
|
||||||
|
func (f *IncludeFilter) Include(product *Product) bool {
|
||||||
|
if f.regex == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if f.regex.MatchString(product.Name) {
|
||||||
|
log.Debugf("product %s included because it matches the include regex", product.Name)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
log.Debugf("product %s excluded because it doesn't match the include regex", product.Name)
|
||||||
|
return false
|
||||||
|
}
|
41
filter_include_test.go
Normal file
41
filter_include_test.go
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIncludeFilter(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
regex string // inclusive regex
|
||||||
|
name string // product name
|
||||||
|
included bool // should be included or not
|
||||||
|
}{
|
||||||
|
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
|
||||||
|
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
|
||||||
|
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range tests {
|
||||||
|
t.Run(fmt.Sprintf("TestIncludeFilter#%d", i), func(t *testing.T) {
|
||||||
|
product := &Product{Name: tc.name}
|
||||||
|
filter, err := NewIncludeFilter(tc.regex)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("cannot create filter with regex '%s': %s", tc.regex, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
included := filter.Include(product)
|
||||||
|
|
||||||
|
if included != tc.included {
|
||||||
|
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
||||||
|
} else {
|
||||||
|
if included {
|
||||||
|
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
||||||
|
} else {
|
||||||
|
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
49
main.go
49
main.go
|
@ -163,28 +163,41 @@ func main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// register filters
|
||||||
|
filters := []Filter{}
|
||||||
|
if config.IncludeRegex != "" {
|
||||||
|
includeFilter, err := NewIncludeFilter(config.IncludeRegex)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("cannot create include filter: %s", err)
|
||||||
|
}
|
||||||
|
filters = append(filters, includeFilter)
|
||||||
|
}
|
||||||
|
if config.ExcludeRegex != "" {
|
||||||
|
excludeFilter, err := NewExcludeFilter(config.ExcludeRegex)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("cannot create exclude filter: %s", err)
|
||||||
|
}
|
||||||
|
filters = append(filters, excludeFilter)
|
||||||
|
}
|
||||||
|
|
||||||
// create parsers
|
// create parsers
|
||||||
parsers := []Parser{}
|
parsers := []Parser{}
|
||||||
|
|
||||||
if config.HasURLs() {
|
if config.HasURLs() {
|
||||||
|
// create a parser for all web pages
|
||||||
for _, url := range config.URLs {
|
for _, url := range config.URLs {
|
||||||
// create parser
|
parser := NewURLParser(url, config.BrowserAddress)
|
||||||
parser, err := NewURLParser(url, config.BrowserAddress, config.IncludeRegex, config.ExcludeRegex)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("could not create URL parser for '%s'", url)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
parsers = append(parsers, parser)
|
parsers = append(parsers, parser)
|
||||||
log.Debugf("parser %s registered", parser)
|
log.Debugf("parser %s registered", parser)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.HasAmazon() {
|
if config.HasAmazon() {
|
||||||
|
// create a parser for all marketplaces
|
||||||
for _, marketplace := range config.AmazonConfig.Marketplaces {
|
for _, marketplace := range config.AmazonConfig.Marketplaces {
|
||||||
// create parser
|
parser := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
|
||||||
parser, err := NewAmazonParser(marketplace.Name, marketplace.PartnerTag, config.AmazonConfig.AccessKey, config.AmazonConfig.SecretKey, config.AmazonConfig.Searches, config.IncludeRegex, config.ExcludeRegex, config.AmazonConfig.AmazonFulfilled, config.AmazonConfig.AmazonMerchant, config.AmazonConfig.AffiliateLinks)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("could not create Amazon parser: %s", err)
|
log.Warnf("could not create Amazon parser for marketplace %s: %s", marketplace, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,7 +215,7 @@ func main() {
|
||||||
if jobsCount < *workers {
|
if jobsCount < *workers {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
jobsCount++
|
jobsCount++
|
||||||
go handleProducts(parser, notifiers, db, &wg)
|
go handleProducts(parser, notifiers, filters, db, &wg)
|
||||||
break
|
break
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("waiting for intermediate jobs to end")
|
log.Debugf("waiting for intermediate jobs to end")
|
||||||
|
@ -217,7 +230,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// For parser to return a list of products, then eventually send notifications
|
// For parser to return a list of products, then eventually send notifications
|
||||||
func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.WaitGroup) {
|
func handleProducts(parser Parser, notifiers []Notifier, filters []Filter, db *gorm.DB, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
log.Debugf("parsing with %s", parser)
|
log.Debugf("parsing with %s", parser)
|
||||||
|
@ -241,11 +254,21 @@ func handleProducts(parser Parser, notifiers []Notifier, db *gorm.DB, wg *sync.W
|
||||||
log.Warnf("cannot parse: %s", err)
|
log.Warnf("cannot parse: %s", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
log.Debugf("parsed")
|
|
||||||
|
|
||||||
// insert or update products to database
|
|
||||||
for _, product := range products {
|
for _, product := range products {
|
||||||
|
|
||||||
|
// skip products not matching all filters
|
||||||
|
included := true
|
||||||
|
for _, filter := range filters {
|
||||||
|
if included && !filter.Include(product) {
|
||||||
|
included = false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !included {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
log.Debugf("detected product %+v", product)
|
log.Debugf("detected product %+v", product)
|
||||||
|
|
||||||
if !product.IsValid() {
|
if !product.IsValid() {
|
||||||
|
|
40
parser.go
40
parser.go
|
@ -1,48 +1,8 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Parser interface to parse an external service and return a list of products
|
// Parser interface to parse an external service and return a list of products
|
||||||
type Parser interface {
|
type Parser interface {
|
||||||
Parse() ([]*Product, error)
|
Parse() ([]*Product, error)
|
||||||
String() string
|
String() string
|
||||||
ShopName() (string, error)
|
ShopName() (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// filterInclusive returns a list of products matching the include regex
|
|
||||||
func filterInclusive(includeRegex *regexp.Regexp, products []*Product) []*Product {
|
|
||||||
var filtered []*Product
|
|
||||||
if includeRegex != nil {
|
|
||||||
for _, product := range products {
|
|
||||||
if includeRegex.MatchString(product.Name) {
|
|
||||||
log.Debugf("product %s included because it matches the include regex", product.Name)
|
|
||||||
filtered = append(filtered, product)
|
|
||||||
} else {
|
|
||||||
log.Debugf("product %s excluded because it does not match the include regex", product.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return filtered
|
|
||||||
}
|
|
||||||
return products
|
|
||||||
}
|
|
||||||
|
|
||||||
// filterExclusive returns a list of products that don't match the exclude regex
|
|
||||||
func filterExclusive(excludeRegex *regexp.Regexp, products []*Product) []*Product {
|
|
||||||
var filtered []*Product
|
|
||||||
if excludeRegex != nil {
|
|
||||||
for _, product := range products {
|
|
||||||
if excludeRegex.MatchString(product.Name) {
|
|
||||||
log.Debugf("product %s excluded because it matches the exclude regex", product.Name)
|
|
||||||
} else {
|
|
||||||
log.Debugf("product %s included because it does not match the exclude regex", product.Name)
|
|
||||||
filtered = append(filtered, product)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return filtered
|
|
||||||
}
|
|
||||||
return products
|
|
||||||
}
|
|
||||||
|
|
|
@ -39,35 +39,14 @@ type AmazonParser struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAmazonParser to create a new AmazonParser instance
|
// NewAmazonParser to create a new AmazonParser instance
|
||||||
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, includeRegex string, excludeRegex string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) (*AmazonParser, error) {
|
func NewAmazonParser(marketplace string, partnerTag string, accessKey string, secretKey string, searches []string, amazonFulfilled bool, amazonMerchant bool, affiliateLinks bool) *AmazonParser {
|
||||||
var err error
|
|
||||||
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
|
|
||||||
|
|
||||||
log.Debugf("compiling include name regex")
|
|
||||||
if includeRegex != "" {
|
|
||||||
includeRegexCompiled, err = regexp.Compile(includeRegex)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("compiling exclude name regex")
|
|
||||||
if excludeRegex != "" {
|
|
||||||
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &AmazonParser{
|
return &AmazonParser{
|
||||||
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
|
client: NewAmazonServer(marketplace).CreateClient(partnerTag, accessKey, secretKey),
|
||||||
searches: searches,
|
searches: searches,
|
||||||
includeRegex: includeRegexCompiled,
|
|
||||||
excludeRegex: excludeRegexCompiled,
|
|
||||||
amazonFulfilled: amazonFulfilled,
|
amazonFulfilled: amazonFulfilled,
|
||||||
amazonMerchant: amazonMerchant,
|
amazonMerchant: amazonMerchant,
|
||||||
affiliateLinks: affiliateLinks,
|
affiliateLinks: affiliateLinks,
|
||||||
}, nil
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse Amazon API to return list of products
|
// Parse Amazon API to return list of products
|
||||||
|
@ -139,10 +118,6 @@ func (p *AmazonParser) Parse() ([]*Product, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply filters
|
|
||||||
products = filterInclusive(p.includeRegex, products)
|
|
||||||
products = filterExclusive(p.excludeRegex, products)
|
|
||||||
|
|
||||||
return products, nil
|
return products, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,93 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"regexp"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFilterInclusive(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
regex string // inclusive regex
|
|
||||||
name string // product name
|
|
||||||
included bool // should be included or not
|
|
||||||
}{
|
|
||||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", true}, // 3060 in the include regex
|
|
||||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", false}, // 5600 not in the include regex
|
|
||||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the include regex is empty
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, tc := range tests {
|
|
||||||
t.Run(fmt.Sprintf("TestFilterInclusive#%d", i), func(t *testing.T) {
|
|
||||||
var regexCompiled *regexp.Regexp
|
|
||||||
var err error
|
|
||||||
if tc.regex != "" {
|
|
||||||
regexCompiled, err = regexp.Compile(tc.regex)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
products := []*Product{{Name: tc.name}}
|
|
||||||
filtered := filterInclusive(regexCompiled, products)
|
|
||||||
included := false
|
|
||||||
for _, product := range filtered {
|
|
||||||
if product.Name == tc.name && !included {
|
|
||||||
included = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if tc.included != included {
|
|
||||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
|
||||||
} else {
|
|
||||||
if included {
|
|
||||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
|
||||||
} else {
|
|
||||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFilterExclusive(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
regex string // exclusive regex
|
|
||||||
name string // product name
|
|
||||||
included bool // should be included or not
|
|
||||||
}{
|
|
||||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "MSI GeForce RTX 3060 GAMING X", false}, // 3060 in the exclude regex
|
|
||||||
{"(?i)(rtx|rx)(.*)(3060|3070|3080|3090|5700|6800|6900)( )?(xt|ti)?", "ASUS AMD Radeon RX 5600 XT TUF Gaming X3", true}, // 5600 not in the exclude regex
|
|
||||||
{"", "MSI GeForce RTX 3060 GAMING X", true}, // do nothing when the exclude regex is empty
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, tc := range tests {
|
|
||||||
t.Run(fmt.Sprintf("TestFilterExclusive#%d", i), func(t *testing.T) {
|
|
||||||
var regexCompiled *regexp.Regexp
|
|
||||||
var err error
|
|
||||||
if tc.regex != "" {
|
|
||||||
regexCompiled, err = regexp.Compile(tc.regex)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("cannot parse regex '%s'", tc.regex)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
products := []*Product{{Name: tc.name}}
|
|
||||||
filtered := filterExclusive(regexCompiled, products)
|
|
||||||
included := false
|
|
||||||
for _, product := range filtered {
|
|
||||||
if product.Name == tc.name && !included {
|
|
||||||
included = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if tc.included != included {
|
|
||||||
t.Errorf("regex '%s' for product '%s': got included=%t, want included=%t", tc.regex, tc.name, included, tc.included)
|
|
||||||
} else {
|
|
||||||
if included {
|
|
||||||
t.Logf("regex '%s' includes product '%s'", tc.regex, tc.name)
|
|
||||||
} else {
|
|
||||||
t.Logf("regex '%s' excludes product '%s'", tc.regex, tc.name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -4,7 +4,6 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
@ -16,10 +15,8 @@ import (
|
||||||
|
|
||||||
// URLParser structure to handle websites parsing logic
|
// URLParser structure to handle websites parsing logic
|
||||||
type URLParser struct {
|
type URLParser struct {
|
||||||
url string
|
url string
|
||||||
includeRegex *regexp.Regexp
|
ctx context.Context
|
||||||
excludeRegex *regexp.Regexp
|
|
||||||
ctx context.Context
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// String to print URLParser
|
// String to print URLParser
|
||||||
|
@ -34,25 +31,7 @@ func (p *URLParser) ShopName() (string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewURLParser to create a new URLParser instance
|
// NewURLParser to create a new URLParser instance
|
||||||
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
|
func NewURLParser(url string, browserAddress string) *URLParser {
|
||||||
var err error
|
|
||||||
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
|
|
||||||
|
|
||||||
log.Debugf("compiling include name regex")
|
|
||||||
if includeRegex != "" {
|
|
||||||
includeRegexCompiled, err = regexp.Compile(includeRegex)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("compiling exclude name regex")
|
|
||||||
if excludeRegex != "" {
|
|
||||||
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("creating context with headless browser drivers")
|
log.Debugf("creating context with headless browser drivers")
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
@ -60,11 +39,9 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
|
||||||
ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault())
|
ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault())
|
||||||
|
|
||||||
return &URLParser{
|
return &URLParser{
|
||||||
url: url,
|
url: url,
|
||||||
includeRegex: includeRegexCompiled,
|
ctx: ctx,
|
||||||
excludeRegex: excludeRegexCompiled,
|
}
|
||||||
ctx: ctx,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse a website to return list of products
|
// Parse a website to return list of products
|
||||||
|
@ -96,10 +73,6 @@ func (p *URLParser) Parse() ([]*Product, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply filters
|
|
||||||
products = filterInclusive(p.includeRegex, products)
|
|
||||||
products = filterExclusive(p.excludeRegex, products)
|
|
||||||
|
|
||||||
return products, nil
|
return products, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in a new issue