From 2c0b97284989dcab141b91d5352c252af67e4fb7 Mon Sep 17 00:00:00 2001 From: Julien Riou Date: Mon, 12 Jul 2021 08:50:36 +0200 Subject: [PATCH] fix: exclude cybertek home page (#33) Signed-off-by: Julien Riou --- parser_url.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parser_url.go b/parser_url.go index e22d51d..16957b7 100644 --- a/parser_url.go +++ b/parser_url.go @@ -214,6 +214,7 @@ func createQueryForCybertek(url string) string { // gather first page LET first_page = '` + url + `' LET doc = DOCUMENT(first_page, {driver: "cdp"}) +LET home_page = 'https://www.cybertek.fr/boutique/index.aspx' // discover next pages LET pagination = ELEMENT(doc, "div .pagination-div") @@ -232,6 +233,7 @@ LET results = ( LET products_available = ( FOR el IN ELEMENTS(doc, "div .listing_dispo") LET url = ELEMENT(el, "a") + FILTER url.attributes.href != home_page // exclude home page LET name = TRIM(FIRST(SPLIT(INNER_TEXT(ELEMENT(el, "div .height-txt-cat")), "-"))) LET price = TO_FLOAT(SUBSTITUTE(INNER_TEXT(ELEMENT(el, "div .price_prod_resp")), "€", ".")) RETURN { @@ -245,6 +247,7 @@ LET results = ( LET products_not_available = ( FOR el IN ELEMENTS(doc, "div .listing_nodispo") LET url = ELEMENT(el, "a") + FILTER url.attributes.href != home_page // exclude home page LET name = TRIM(FIRST(SPLIT(INNER_TEXT(ELEMENT(el, "div .height-txt-cat")), "-"))) LET price = TO_FLOAT(SUBSTITUTE(INNER_TEXT(ELEMENT(el, "div .price_prod_resp")), "€", ".")) RETURN { @@ -261,7 +264,7 @@ LET results = ( // combine all arrays to a single one RETURN FLATTEN(results) - ` + ` return q }