From 0d04b7333876dc613c816e41d64c2636135bd499 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sun, 14 Jan 2024 18:53:52 +0100 Subject: [PATCH] listenbrainz: implement duplicate listen check on import --- .../backends/listenbrainz/listenbrainz.go | 71 ++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index 02b6fb4..4fcfd3a 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -24,13 +24,15 @@ import ( "go.uploadedlobster.com/scotty/internal/config" "go.uploadedlobster.com/scotty/internal/i18n" "go.uploadedlobster.com/scotty/internal/models" + "go.uploadedlobster.com/scotty/internal/similarity" "go.uploadedlobster.com/scotty/internal/version" ) type ListenBrainzApiBackend struct { - client Client - username string - existingMbids map[string]bool + client Client + username string + checkDuplicates bool + existingMbids map[string]bool } func (b *ListenBrainzApiBackend) Name() string { return "listenbrainz" } @@ -44,6 +46,10 @@ func (b *ListenBrainzApiBackend) Options() []models.BackendOption { Name: "token", Label: i18n.Tr("Access token"), Type: models.Secret, + }, { + Name: "check-duplicate-listens", + Label: i18n.Tr("Check for duplicate listens on import (slower)"), + Type: models.Bool, }} } @@ -51,6 +57,7 @@ func (b *ListenBrainzApiBackend) FromConfig(config *config.ServiceConfig) models b.client = NewClient(config.GetString("token")) b.client.MaxResults = MaxItemsPerGet b.username = config.GetString("username") + b.checkDuplicates = config.GetBool("check-duplicate-listens", false) return b } @@ -117,6 +124,7 @@ func (b *ListenBrainzApiBackend) ExportListens(oldestTimestamp time.Time, result func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, importResult models.ImportResult, progress chan models.Progress) (models.ImportResult, error) { total := len(export.Items) + p := models.Progress{}.FromImportResult(importResult) for i := 0; i < total; i += MaxListensPerRequest { listens := export.Items[i:min(i+MaxListensPerRequest, total)] count := len(listens) @@ -130,6 +138,21 @@ func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, impo } for _, l := range listens { + if b.checkDuplicates { + isDupe, err := b.checkDuplicateListen(l) + p.Elapsed += 1 + progress <- p + if err != nil { + return importResult, err + } else if isDupe { + count -= 1 + msg := fmt.Sprintf("Ignored duplicate listen %v: \"%v\" by %v (%v)", + l.ListenedAt, l.TrackName, l.ArtistName(), l.RecordingMbid) + importResult.ImportErrors = append(importResult.ImportErrors, msg) + continue + } + } + l.FillAdditionalInfo() listen := Listen{ ListenedAt: l.ListenedAt.Unix(), @@ -142,17 +165,22 @@ func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, impo } listen.TrackMetadata.AdditionalInfo["submission_client"] = version.AppName listen.TrackMetadata.AdditionalInfo["submission_client_version"] = version.AppVersion + submission.Payload = append(submission.Payload, listen) } - _, err := b.client.SubmitListens(submission) - if err != nil { - return importResult, err + if len(submission.Payload) > 0 { + _, err := b.client.SubmitListens(submission) + if err != nil { + return importResult, err + } } - importResult.UpdateTimestamp(listens[count-1].ListenedAt) + if count > 0 { + importResult.UpdateTimestamp(listens[count-1].ListenedAt) + } importResult.ImportCount += count - progress <- models.Progress{}.FromImportResult(importResult) + progress <- p.FromImportResult(importResult) } return importResult, nil @@ -258,6 +286,33 @@ func (b *ListenBrainzApiBackend) ImportLoves(export models.LovesResult, importRe return importResult, nil } +var defaultDuration = time.Duration(3 * time.Minute) + +const trackSimilarityThreshold = 0.9 + +func (b *ListenBrainzApiBackend) checkDuplicateListen(listen models.Listen) (bool, error) { + // Find listens + duration := listen.Duration + if duration == 0 { + duration = defaultDuration + } + minTime := listen.ListenedAt.Add(-duration) + maxTime := listen.ListenedAt.Add(duration) + candidates, err := b.client.GetListens(b.username, maxTime, minTime) + if err != nil { + return false, err + } + + for _, c := range candidates.Payload.Listens { + sim := similarity.CompareTracks(listen.Track, c.TrackMetadata.AsTrack()) + if sim >= trackSimilarityThreshold { + return true, nil + } + } + + return false, nil +} + func (lbListen Listen) AsListen() models.Listen { listen := models.Listen{ ListenedAt: time.Unix(lbListen.ListenedAt, 0),