listenbrainz: implement duplicate listen check on import

This commit is contained in:
Philipp Wolfer 2024-01-14 18:53:52 +01:00
parent b2b5c69278
commit 0d04b73338
No known key found for this signature in database
GPG key ID: 8FDF744D4919943B

View file

@ -24,13 +24,15 @@ import (
"go.uploadedlobster.com/scotty/internal/config" "go.uploadedlobster.com/scotty/internal/config"
"go.uploadedlobster.com/scotty/internal/i18n" "go.uploadedlobster.com/scotty/internal/i18n"
"go.uploadedlobster.com/scotty/internal/models" "go.uploadedlobster.com/scotty/internal/models"
"go.uploadedlobster.com/scotty/internal/similarity"
"go.uploadedlobster.com/scotty/internal/version" "go.uploadedlobster.com/scotty/internal/version"
) )
type ListenBrainzApiBackend struct { type ListenBrainzApiBackend struct {
client Client client Client
username string username string
existingMbids map[string]bool checkDuplicates bool
existingMbids map[string]bool
} }
func (b *ListenBrainzApiBackend) Name() string { return "listenbrainz" } func (b *ListenBrainzApiBackend) Name() string { return "listenbrainz" }
@ -44,6 +46,10 @@ func (b *ListenBrainzApiBackend) Options() []models.BackendOption {
Name: "token", Name: "token",
Label: i18n.Tr("Access token"), Label: i18n.Tr("Access token"),
Type: models.Secret, Type: models.Secret,
}, {
Name: "check-duplicate-listens",
Label: i18n.Tr("Check for duplicate listens on import (slower)"),
Type: models.Bool,
}} }}
} }
@ -51,6 +57,7 @@ func (b *ListenBrainzApiBackend) FromConfig(config *config.ServiceConfig) models
b.client = NewClient(config.GetString("token")) b.client = NewClient(config.GetString("token"))
b.client.MaxResults = MaxItemsPerGet b.client.MaxResults = MaxItemsPerGet
b.username = config.GetString("username") b.username = config.GetString("username")
b.checkDuplicates = config.GetBool("check-duplicate-listens", false)
return b return b
} }
@ -117,6 +124,7 @@ func (b *ListenBrainzApiBackend) ExportListens(oldestTimestamp time.Time, result
func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, importResult models.ImportResult, progress chan models.Progress) (models.ImportResult, error) { func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, importResult models.ImportResult, progress chan models.Progress) (models.ImportResult, error) {
total := len(export.Items) total := len(export.Items)
p := models.Progress{}.FromImportResult(importResult)
for i := 0; i < total; i += MaxListensPerRequest { for i := 0; i < total; i += MaxListensPerRequest {
listens := export.Items[i:min(i+MaxListensPerRequest, total)] listens := export.Items[i:min(i+MaxListensPerRequest, total)]
count := len(listens) count := len(listens)
@ -130,6 +138,21 @@ func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, impo
} }
for _, l := range listens { for _, l := range listens {
if b.checkDuplicates {
isDupe, err := b.checkDuplicateListen(l)
p.Elapsed += 1
progress <- p
if err != nil {
return importResult, err
} else if isDupe {
count -= 1
msg := fmt.Sprintf("Ignored duplicate listen %v: \"%v\" by %v (%v)",
l.ListenedAt, l.TrackName, l.ArtistName(), l.RecordingMbid)
importResult.ImportErrors = append(importResult.ImportErrors, msg)
continue
}
}
l.FillAdditionalInfo() l.FillAdditionalInfo()
listen := Listen{ listen := Listen{
ListenedAt: l.ListenedAt.Unix(), ListenedAt: l.ListenedAt.Unix(),
@ -142,17 +165,22 @@ func (b *ListenBrainzApiBackend) ImportListens(export models.ListensResult, impo
} }
listen.TrackMetadata.AdditionalInfo["submission_client"] = version.AppName listen.TrackMetadata.AdditionalInfo["submission_client"] = version.AppName
listen.TrackMetadata.AdditionalInfo["submission_client_version"] = version.AppVersion listen.TrackMetadata.AdditionalInfo["submission_client_version"] = version.AppVersion
submission.Payload = append(submission.Payload, listen) submission.Payload = append(submission.Payload, listen)
} }
_, err := b.client.SubmitListens(submission) if len(submission.Payload) > 0 {
if err != nil { _, err := b.client.SubmitListens(submission)
return importResult, err if err != nil {
return importResult, err
}
} }
importResult.UpdateTimestamp(listens[count-1].ListenedAt) if count > 0 {
importResult.UpdateTimestamp(listens[count-1].ListenedAt)
}
importResult.ImportCount += count importResult.ImportCount += count
progress <- models.Progress{}.FromImportResult(importResult) progress <- p.FromImportResult(importResult)
} }
return importResult, nil return importResult, nil
@ -258,6 +286,33 @@ func (b *ListenBrainzApiBackend) ImportLoves(export models.LovesResult, importRe
return importResult, nil return importResult, nil
} }
var defaultDuration = time.Duration(3 * time.Minute)
const trackSimilarityThreshold = 0.9
func (b *ListenBrainzApiBackend) checkDuplicateListen(listen models.Listen) (bool, error) {
// Find listens
duration := listen.Duration
if duration == 0 {
duration = defaultDuration
}
minTime := listen.ListenedAt.Add(-duration)
maxTime := listen.ListenedAt.Add(duration)
candidates, err := b.client.GetListens(b.username, maxTime, minTime)
if err != nil {
return false, err
}
for _, c := range candidates.Payload.Listens {
sim := similarity.CompareTracks(listen.Track, c.TrackMetadata.AsTrack())
if sim >= trackSimilarityThreshold {
return true, nil
}
}
return false, nil
}
func (lbListen Listen) AsListen() models.Listen { func (lbListen Listen) AsListen() models.Listen {
listen := models.Listen{ listen := models.Listen{
ListenedAt: time.Unix(lbListen.ListenedAt, 0), ListenedAt: time.Unix(lbListen.ListenedAt, 0),