Handle Twitter duplicates (#20)
This commit adds a hash attribute to help identify duplicate messages. Tweets have a TweetID attribute for the initial thread identifier and a LastTweetID attribute to keep track of the last reply to eventually continue the thread if a duplicate is detected. Signed-off-by: Julien Riou <julien@riou.xyz>
This commit is contained in:
parent
9629483953
commit
1f5cef17d2
6 changed files with 166 additions and 34 deletions
|
@ -1,6 +1,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
@ -19,9 +20,11 @@ const tweetMaxSize = 280
|
|||
// Tweet to store relationship between a Product and a Twitter notification
|
||||
type Tweet struct {
|
||||
gorm.Model
|
||||
TweetID int64
|
||||
ProductURL string
|
||||
Product Product `gorm:"not null;references:URL"`
|
||||
TweetID int64 `gorm:"not null;unique"`
|
||||
Hash string `gorm:"unique"`
|
||||
LastTweetID int64 `gorm:"index"`
|
||||
ProductURL string `gorm:"index"`
|
||||
Product Product `gorm:"not null;references:URL"`
|
||||
}
|
||||
|
||||
// TwitterNotifier to manage notifications to Twitter
|
||||
|
@ -31,6 +34,7 @@ type TwitterNotifier struct {
|
|||
user *twitter.User
|
||||
hashtagsMap []map[string]string
|
||||
enableReplies bool
|
||||
retentionDays int
|
||||
}
|
||||
|
||||
// NewTwitterNotifier creates a TwitterNotifier
|
||||
|
@ -58,7 +62,46 @@ func NewTwitterNotifier(c *TwitterConfig, db *gorm.DB) (*TwitterNotifier, error)
|
|||
}
|
||||
log.Debugf("connected to twitter as @%s", user.ScreenName)
|
||||
|
||||
return &TwitterNotifier{client: client, user: user, hashtagsMap: c.Hashtags, db: db, enableReplies: c.EnableReplies}, nil
|
||||
notifier := &TwitterNotifier{
|
||||
client: client,
|
||||
user: user,
|
||||
hashtagsMap: c.Hashtags,
|
||||
db: db,
|
||||
enableReplies: c.EnableReplies,
|
||||
retentionDays: c.Retention,
|
||||
}
|
||||
|
||||
// delete old tweets
|
||||
if err = notifier.ensureRetention(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return notifier, nil
|
||||
|
||||
}
|
||||
|
||||
// ensureRetention deletes tweets according to the defined retention
|
||||
func (c *TwitterNotifier) ensureRetention() error {
|
||||
if c.retentionDays == 0 {
|
||||
log.Debugf("tweet retention not found, skipping database cleanup")
|
||||
return nil
|
||||
}
|
||||
|
||||
var oldTweets []Tweet
|
||||
retentionDate := time.Now().Local().Add(-time.Hour * 24 * time.Duration(c.retentionDays))
|
||||
trx := c.db.Where("updated_at < ?", retentionDate).Find(&oldTweets)
|
||||
if trx.Error != nil {
|
||||
return fmt.Errorf("cannot find twitter old statuses: %s", trx.Error)
|
||||
}
|
||||
for _, t := range oldTweets {
|
||||
log.Debugf("twitter old status found with id %d", t.TweetID)
|
||||
if trx = c.db.Unscoped().Delete(&t); trx.Error != nil {
|
||||
log.Warnf("cannot remove old tweet %d: %s", t.TweetID, trx.Error)
|
||||
} else {
|
||||
log.Infof("twitter old status %d removed from database", t.TweetID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// create a brand new tweet
|
||||
|
@ -98,26 +141,69 @@ func (c *TwitterNotifier) buildHashtags(productName string) string {
|
|||
// NotifyWhenAvailable create a Twitter status for announcing that a product is available
|
||||
// implements the Notifier interface
|
||||
func (c *TwitterNotifier) NotifyWhenAvailable(shopName string, productName string, productPrice float64, productCurrency string, productURL string) error {
|
||||
// TODO: check if message exists in the database to avoid flood
|
||||
// format message
|
||||
hashtags := c.buildHashtags(productName)
|
||||
message := formatAvailableTweet(shopName, productName, productPrice, productCurrency, productURL, hashtags)
|
||||
// create thread
|
||||
tweetID, err := c.createTweet(message)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new twitter thread: %s", err)
|
||||
}
|
||||
log.Infof("tweet %d sent", tweetID)
|
||||
|
||||
// save thread to database
|
||||
t := Tweet{TweetID: tweetID, ProductURL: productURL}
|
||||
trx := c.db.Create(&t)
|
||||
if trx.Error != nil {
|
||||
return fmt.Errorf("failed to save tweet %d to database: %s", t.TweetID, trx.Error)
|
||||
// compute message checksum to avoid duplicates
|
||||
var tweet Tweet
|
||||
hash := fmt.Sprintf("%x", md5.Sum([]byte(message)))
|
||||
trx := c.db.Where(Tweet{Hash: hash}).First(&tweet)
|
||||
if trx.Error != nil && trx.Error != gorm.ErrRecordNotFound {
|
||||
return fmt.Errorf("could not search for tweet with hash %s for product '%s': %s", hash, productURL, trx.Error)
|
||||
}
|
||||
log.Debugf("tweet %d saved to database", t.TweetID)
|
||||
|
||||
if trx.Error == gorm.ErrRecordNotFound {
|
||||
|
||||
// tweet has not been sent in the past
|
||||
// create thread
|
||||
tweetID, err := c.createTweet(message)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create new twitter thread for product '%s': %s", productURL, err)
|
||||
}
|
||||
log.Infof("tweet %d sent for product '%s'", tweetID, productURL)
|
||||
|
||||
// save thread to database
|
||||
tweet = Tweet{TweetID: tweetID, ProductURL: productURL, Hash: hash}
|
||||
trx = c.db.Create(&tweet)
|
||||
if trx.Error != nil {
|
||||
return fmt.Errorf("could not save tweet %d to database for product '%s': %s", tweet.TweetID, productURL, trx.Error)
|
||||
}
|
||||
log.Debugf("tweet %d saved to database", tweet.TweetID)
|
||||
|
||||
} else {
|
||||
|
||||
if !c.enableReplies {
|
||||
log.Debugf("twitter replies are disabled, skipping available notification for product '%s'", productURL)
|
||||
return nil
|
||||
}
|
||||
|
||||
// select tweet to reply
|
||||
lastTweetID := CoalesceInt64(tweet.LastTweetID, tweet.TweetID)
|
||||
if lastTweetID == 0 {
|
||||
return fmt.Errorf("could not find original tweet ID to create reply for product '%s'", productURL)
|
||||
}
|
||||
|
||||
// tweet already has been sent in the past and replies are enabled
|
||||
// continuing thread
|
||||
tweetID, err := c.replyToTweet(lastTweetID, "Good news, it's available again!")
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not reply to tweet %d for product '%s': %s", lastTweetID, productURL, err)
|
||||
}
|
||||
log.Infof("reply to tweet %d sent with id %d for product '%s'", lastTweetID, tweetID, productURL)
|
||||
|
||||
// save thread to database
|
||||
tweet.LastTweetID = tweetID
|
||||
if trx = c.db.Save(&tweet); trx.Error != nil {
|
||||
return fmt.Errorf("could not save tweet %d to database for product '%s': %s", tweet.TweetID, productURL, trx.Error)
|
||||
}
|
||||
log.Debugf("tweet %d saved in database", tweet.TweetID)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatAvailableTweet creates a message based on product characteristics
|
||||
func formatAvailableTweet(shopName string, productName string, productPrice float64, productCurrency string, productURL string, hashtags string) string {
|
||||
// format message
|
||||
formattedPrice := formatPrice(productPrice, productCurrency)
|
||||
|
@ -140,32 +226,37 @@ func (c *TwitterNotifier) NotifyWhenNotAvailable(productURL string, duration tim
|
|||
// find Tweet in the database
|
||||
var tweet Tweet
|
||||
trx := c.db.Where(Tweet{ProductURL: productURL}).First(&tweet)
|
||||
|
||||
if trx.Error != nil {
|
||||
return fmt.Errorf("failed to find tweet in database for product with url %s: %s", productURL, trx.Error)
|
||||
}
|
||||
if tweet.TweetID == 0 {
|
||||
log.Warnf("tweet for product with url %s not found, skipping close notification", productURL)
|
||||
return nil
|
||||
return fmt.Errorf("could not find tweet for product '%s' in the database: %s", productURL, trx.Error)
|
||||
}
|
||||
|
||||
if c.enableReplies {
|
||||
// format message
|
||||
message := fmt.Sprintf("And it's gone (%s)", duration)
|
||||
|
||||
// close thread on twitter
|
||||
_, err := c.replyToTweet(tweet.TweetID, message)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create reply tweet: %s", err)
|
||||
// select tweet to reply
|
||||
lastTweetID := CoalesceInt64(tweet.LastTweetID, tweet.TweetID)
|
||||
if lastTweetID == 0 {
|
||||
return fmt.Errorf("could not find original tweet ID to create reply for product '%s'", productURL)
|
||||
}
|
||||
log.Infof("reply to tweet %d sent", tweet.TweetID)
|
||||
}
|
||||
|
||||
// remove tweet from database
|
||||
trx = c.db.Unscoped().Delete(&tweet)
|
||||
if trx.Error != nil {
|
||||
return fmt.Errorf("failed to remove tweet %d from database: %s", tweet.TweetID, trx.Error)
|
||||
// close thread on twitter
|
||||
tweetID, err := c.replyToTweet(lastTweetID, message)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not close thread on twitter for product '%s': %s", productURL, err)
|
||||
}
|
||||
log.Infof("reply to tweet %d sent with id %d for product '%s'", lastTweetID, tweetID, productURL)
|
||||
|
||||
// save tweet id on database
|
||||
tweet.LastTweetID = tweetID
|
||||
if trx = c.db.Save(&tweet); trx.Error != nil {
|
||||
return fmt.Errorf("could not save tweet %d to database for product '%s': %s", tweet.TweetID, productURL, trx.Error)
|
||||
}
|
||||
log.Debugf("tweet %d saved in database", tweet.TweetID)
|
||||
} else {
|
||||
log.Debugf("twitter replies are disabled, skipping not available notification for '%s'", productURL)
|
||||
}
|
||||
log.Debugf("tweet removed from database")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
Reference in a new issue