Archived
1
0
Fork 0

refactor: move filters out of parser

Filters are now separate structures to include a product or not based
on their own set of properties. For now, include and exclude filters
are supported. They take a regex as an argument and include a product
if the regex matches (or doesn't match) the product name. This commit
will allow us to create new filters on product like on a price range.

Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
Julien Riou 2021-05-19 17:43:31 +02:00
commit 244c9f68e7
No known key found for this signature in database
GPG key ID: FF42D23B580C89F7
10 changed files with 212 additions and 206 deletions

View file

@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"regexp"
log "github.com/sirupsen/logrus"
@ -16,10 +15,8 @@ import (
// URLParser structure to handle websites parsing logic
type URLParser struct {
url string
includeRegex *regexp.Regexp
excludeRegex *regexp.Regexp
ctx context.Context
url string
ctx context.Context
}
// String to print URLParser
@ -34,25 +31,7 @@ func (p *URLParser) ShopName() (string, error) {
}
// NewURLParser to create a new URLParser instance
func NewURLParser(url string, browserAddress string, includeRegex string, excludeRegex string) (*URLParser, error) {
var err error
var includeRegexCompiled, excludeRegexCompiled *regexp.Regexp
log.Debugf("compiling include name regex")
if includeRegex != "" {
includeRegexCompiled, err = regexp.Compile(includeRegex)
if err != nil {
return nil, err
}
}
log.Debugf("compiling exclude name regex")
if excludeRegex != "" {
excludeRegexCompiled, err = regexp.Compile(excludeRegex)
if err != nil {
return nil, err
}
}
func NewURLParser(url string, browserAddress string) *URLParser {
log.Debugf("creating context with headless browser drivers")
ctx := context.Background()
@ -60,11 +39,9 @@ func NewURLParser(url string, browserAddress string, includeRegex string, exclud
ctx = drivers.WithContext(ctx, http.NewDriver(), drivers.AsDefault())
return &URLParser{
url: url,
includeRegex: includeRegexCompiled,
excludeRegex: excludeRegexCompiled,
ctx: ctx,
}, nil
url: url,
ctx: ctx,
}
}
// Parse a website to return list of products
@ -96,10 +73,6 @@ func (p *URLParser) Parse() ([]*Product, error) {
return nil, err
}
// apply filters
products = filterInclusive(p.includeRegex, products)
products = filterExclusive(p.excludeRegex, products)
return products, nil
}