From 5c56e480f1a4a0b8d6e82beb747e9852fde738a3 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Fri, 23 May 2025 16:33:28 +0200 Subject: [PATCH 01/22] Moved general LB related code to separate package --- .../backends/listenbrainz/listenbrainz.go | 54 ++++++++++--------- .../listenbrainz/listenbrainz_test.go | 11 ++-- .../backends => pkg}/listenbrainz/client.go | 5 +- .../listenbrainz/client_test.go | 14 ++--- .../backends => pkg}/listenbrainz/models.go | 15 +++--- .../listenbrainz/models_test.go | 4 +- .../listenbrainz/testdata/feedback.json | 0 .../listenbrainz/testdata/listen.json | 0 .../listenbrainz/testdata/listens.json | 0 .../listenbrainz/testdata/lookup.json | 0 10 files changed, 54 insertions(+), 49 deletions(-) rename {internal/backends => pkg}/listenbrainz/client.go (96%) rename {internal/backends => pkg}/listenbrainz/client_test.go (93%) rename {internal/backends => pkg}/listenbrainz/models.go (91%) rename {internal/backends => pkg}/listenbrainz/models_test.go (97%) rename {internal/backends => pkg}/listenbrainz/testdata/feedback.json (100%) rename {internal/backends => pkg}/listenbrainz/testdata/listen.json (100%) rename {internal/backends => pkg}/listenbrainz/testdata/listens.json (100%) rename {internal/backends => pkg}/listenbrainz/testdata/lookup.json (100%) diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index bf46c22..5e80a10 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -29,10 +29,11 @@ import ( "go.uploadedlobster.com/scotty/internal/models" "go.uploadedlobster.com/scotty/internal/similarity" "go.uploadedlobster.com/scotty/internal/version" + "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) type ListenBrainzApiBackend struct { - client Client + client listenbrainz.Client mbClient musicbrainzws2.Client username string checkDuplicates bool @@ -58,13 +59,13 @@ func (b *ListenBrainzApiBackend) Options() []models.BackendOption { } func (b *ListenBrainzApiBackend) InitConfig(config *config.ServiceConfig) error { - b.client = NewClient(config.GetString("token")) + b.client = listenbrainz.NewClient(config.GetString("token"), version.UserAgent()) b.mbClient = *musicbrainzws2.NewClient(musicbrainzws2.AppInfo{ Name: version.AppName, Version: version.AppVersion, URL: version.AppURL, }) - b.client.MaxResults = MaxItemsPerGet + b.client.MaxResults = listenbrainz.MaxItemsPerGet b.username = config.GetString("username") b.checkDuplicates = config.GetBool("check-duplicate-listens", false) return nil @@ -116,7 +117,7 @@ func (b *ListenBrainzApiBackend) ExportListens(ctx context.Context, oldestTimest for _, listen := range result.Payload.Listens { if listen.ListenedAt > oldestTimestamp.Unix() { - listens = append(listens, listen.AsListen()) + listens = append(listens, AsListen(listen)) } else { // result contains listens older then oldestTimestamp break @@ -138,16 +139,16 @@ func (b *ListenBrainzApiBackend) ExportListens(ctx context.Context, oldestTimest func (b *ListenBrainzApiBackend) ImportListens(ctx context.Context, export models.ListensResult, importResult models.ImportResult, progress chan models.TransferProgress) (models.ImportResult, error) { total := len(export.Items) p := models.TransferProgress{}.FromImportResult(importResult, false) - for i := 0; i < total; i += MaxListensPerRequest { - listens := export.Items[i:min(i+MaxListensPerRequest, total)] + for i := 0; i < total; i += listenbrainz.MaxListensPerRequest { + listens := export.Items[i:min(i+listenbrainz.MaxListensPerRequest, total)] count := len(listens) if count == 0 { break } - submission := ListenSubmission{ - ListenType: Import, - Payload: make([]Listen, 0, count), + submission := listenbrainz.ListenSubmission{ + ListenType: listenbrainz.Import, + Payload: make([]listenbrainz.Listen, 0, count), } for _, l := range listens { @@ -167,9 +168,9 @@ func (b *ListenBrainzApiBackend) ImportListens(ctx context.Context, export model } l.FillAdditionalInfo() - listen := Listen{ + listen := listenbrainz.Listen{ ListenedAt: l.ListenedAt.Unix(), - TrackMetadata: Track{ + TrackMetadata: listenbrainz.Track{ TrackName: l.TrackName, ReleaseName: l.ReleaseName, ArtistName: l.ArtistName(), @@ -228,7 +229,7 @@ func (b *ListenBrainzApiBackend) ExportLoves(ctx context.Context, oldestTimestam func (b *ListenBrainzApiBackend) exportLoves(ctx context.Context, oldestTimestamp time.Time, results chan models.LovesResult) { offset := 0 defer close(results) - loves := make(models.LovesList, 0, 2*MaxItemsPerGet) + loves := make(models.LovesList, 0, 2*listenbrainz.MaxItemsPerGet) out: for { @@ -254,7 +255,7 @@ out: } } - love := feedback.AsLove() + love := AsLove(feedback) if love.Created.After(oldestTimestamp) { loves = append(loves, love) } else { @@ -262,7 +263,7 @@ out: } } - offset += MaxItemsPerGet + offset += listenbrainz.MaxItemsPerGet } sort.Sort(loves) @@ -278,7 +279,7 @@ func (b *ListenBrainzApiBackend) ImportLoves(ctx context.Context, export models. go b.exportLoves(ctx, time.Unix(0, 0), existingLovesChan) // TODO: Store MBIDs directly - b.existingMBIDs = make(map[mbtypes.MBID]bool, MaxItemsPerGet) + b.existingMBIDs = make(map[mbtypes.MBID]bool, listenbrainz.MaxItemsPerGet) for existingLoves := range existingLovesChan { if existingLoves.Error != nil { @@ -316,7 +317,7 @@ func (b *ListenBrainzApiBackend) ImportLoves(ctx context.Context, export models. if b.existingMBIDs[recordingMBID] { ok = true } else { - resp, err := b.client.SendFeedback(ctx, Feedback{ + resp, err := b.client.SendFeedback(ctx, listenbrainz.Feedback{ RecordingMBID: recordingMBID, Score: 1, }) @@ -366,7 +367,7 @@ func (b *ListenBrainzApiBackend) checkDuplicateListen(ctx context.Context, liste } for _, c := range candidates.Payload.Listens { - sim := similarity.CompareTracks(listen.Track, c.TrackMetadata.AsTrack()) + sim := similarity.CompareTracks(listen.Track, AsTrack(c.TrackMetadata)) if sim >= trackSimilarityThreshold { return true, nil } @@ -375,7 +376,8 @@ func (b *ListenBrainzApiBackend) checkDuplicateListen(ctx context.Context, liste return false, nil } -func (b *ListenBrainzApiBackend) lookupRecording(ctx context.Context, mbid mbtypes.MBID) (*Track, error) { +func (b *ListenBrainzApiBackend) lookupRecording( + ctx context.Context, mbid mbtypes.MBID) (*listenbrainz.Track, error) { filter := musicbrainzws2.IncludesFilter{ Includes: []string{"artist-credits"}, } @@ -388,10 +390,10 @@ func (b *ListenBrainzApiBackend) lookupRecording(ctx context.Context, mbid mbtyp for _, artist := range recording.ArtistCredit { artistMBIDs = append(artistMBIDs, artist.Artist.ID) } - track := Track{ + track := listenbrainz.Track{ TrackName: recording.Title, ArtistName: recording.ArtistCredit.String(), - MBIDMapping: &MBIDMapping{ + MBIDMapping: &listenbrainz.MBIDMapping{ // In case of redirects this MBID differs from the looked up MBID RecordingMBID: recording.ID, ArtistMBIDs: artistMBIDs, @@ -400,26 +402,26 @@ func (b *ListenBrainzApiBackend) lookupRecording(ctx context.Context, mbid mbtyp return &track, nil } -func (lbListen Listen) AsListen() models.Listen { +func AsListen(lbListen listenbrainz.Listen) models.Listen { listen := models.Listen{ ListenedAt: time.Unix(lbListen.ListenedAt, 0), UserName: lbListen.UserName, - Track: lbListen.TrackMetadata.AsTrack(), + Track: AsTrack(lbListen.TrackMetadata), } return listen } -func (f Feedback) AsLove() models.Love { +func AsLove(f listenbrainz.Feedback) models.Love { recordingMBID := f.RecordingMBID track := f.TrackMetadata if track == nil { - track = &Track{} + track = &listenbrainz.Track{} } love := models.Love{ UserName: f.UserName, RecordingMBID: recordingMBID, Created: time.Unix(f.Created, 0), - Track: track.AsTrack(), + Track: AsTrack(*track), } if love.Track.RecordingMBID == "" { @@ -429,7 +431,7 @@ func (f Feedback) AsLove() models.Love { return love } -func (t Track) AsTrack() models.Track { +func AsTrack(t listenbrainz.Track) models.Track { track := models.Track{ TrackName: t.TrackName, ReleaseName: t.ReleaseName, diff --git a/internal/backends/listenbrainz/listenbrainz_test.go b/internal/backends/listenbrainz/listenbrainz_test.go index bf2e4d3..dd3e1d3 100644 --- a/internal/backends/listenbrainz/listenbrainz_test.go +++ b/internal/backends/listenbrainz/listenbrainz_test.go @@ -24,15 +24,16 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uploadedlobster.com/mbtypes" - "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" + lbapi "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/config" + "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) func TestInitConfig(t *testing.T) { c := viper.New() c.Set("token", "thetoken") service := config.NewServiceConfig("test", c) - backend := listenbrainz.ListenBrainzApiBackend{} + backend := lbapi.ListenBrainzApiBackend{} err := backend.InitConfig(&service) assert.NoError(t, err) } @@ -57,7 +58,7 @@ func TestListenBrainzListenAsListen(t *testing.T) { }, }, } - listen := lbListen.AsListen() + listen := lbapi.AsListen(lbListen) assert.Equal(t, time.Unix(1699289873, 0), listen.ListenedAt) assert.Equal(t, lbListen.UserName, listen.UserName) assert.Equal(t, time.Duration(413787*time.Millisecond), listen.Duration) @@ -93,7 +94,7 @@ func TestListenBrainzFeedbackAsLove(t *testing.T) { }, }, } - love := feedback.AsLove() + love := lbapi.AsLove(feedback) assert := assert.New(t) assert.Equal(time.Unix(1699859066, 0).Unix(), love.Created.Unix()) assert.Equal(feedback.UserName, love.UserName) @@ -114,7 +115,7 @@ func TestListenBrainzPartialFeedbackAsLove(t *testing.T) { RecordingMBID: recordingMBID, Score: 1, } - love := feedback.AsLove() + love := lbapi.AsLove(feedback) assert := assert.New(t) assert.Equal(time.Unix(1699859066, 0).Unix(), love.Created.Unix()) assert.Equal(recordingMBID, love.RecordingMBID) diff --git a/internal/backends/listenbrainz/client.go b/pkg/listenbrainz/client.go similarity index 96% rename from internal/backends/listenbrainz/client.go rename to pkg/listenbrainz/client.go index d1a1fa6..957a946 100644 --- a/internal/backends/listenbrainz/client.go +++ b/pkg/listenbrainz/client.go @@ -28,7 +28,6 @@ import ( "time" "github.com/go-resty/resty/v2" - "go.uploadedlobster.com/scotty/internal/version" "go.uploadedlobster.com/scotty/pkg/ratelimit" ) @@ -44,13 +43,13 @@ type Client struct { MaxResults int } -func NewClient(token string) Client { +func NewClient(token string, userAgent string) Client { client := resty.New() client.SetBaseURL(listenBrainzBaseURL) client.SetAuthScheme("Token") client.SetAuthToken(token) client.SetHeader("Accept", "application/json") - client.SetHeader("User-Agent", version.UserAgent()) + client.SetHeader("User-Agent", userAgent) // Handle rate limiting (see https://listenbrainz.readthedocs.io/en/latest/users/api/index.html#rate-limiting) ratelimit.EnableHTTPHeaderRateLimit(client, "X-RateLimit-Reset-In") diff --git a/internal/backends/listenbrainz/client_test.go b/pkg/listenbrainz/client_test.go similarity index 93% rename from internal/backends/listenbrainz/client_test.go rename to pkg/listenbrainz/client_test.go index 45bb0de..3742ca9 100644 --- a/internal/backends/listenbrainz/client_test.go +++ b/pkg/listenbrainz/client_test.go @@ -31,12 +31,12 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uploadedlobster.com/mbtypes" - "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" + "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) func TestNewClient(t *testing.T) { token := "foobar123" - client := listenbrainz.NewClient(token) + client := listenbrainz.NewClient(token, "test/1.0") assert.Equal(t, token, client.HTTPClient.Token) assert.Equal(t, listenbrainz.DefaultItemsPerGet, client.MaxResults) } @@ -44,7 +44,7 @@ func TestNewClient(t *testing.T) { func TestGetListens(t *testing.T) { defer httpmock.DeactivateAndReset() - client := listenbrainz.NewClient("thetoken") + client := listenbrainz.NewClient("thetoken", "test/1.0") client.MaxResults = 2 setupHTTPMock(t, client.HTTPClient.GetClient(), "https://api.listenbrainz.org/1/user/outsidecontext/listens", @@ -64,7 +64,7 @@ func TestGetListens(t *testing.T) { } func TestSubmitListens(t *testing.T) { - client := listenbrainz.NewClient("thetoken") + client := listenbrainz.NewClient("thetoken", "test/1.0") httpmock.ActivateNonDefault(client.HTTPClient.GetClient()) responder, err := httpmock.NewJsonResponder(200, listenbrainz.StatusResult{ @@ -104,7 +104,7 @@ func TestSubmitListens(t *testing.T) { func TestGetFeedback(t *testing.T) { defer httpmock.DeactivateAndReset() - client := listenbrainz.NewClient("thetoken") + client := listenbrainz.NewClient("thetoken", "test/1.0") client.MaxResults = 2 setupHTTPMock(t, client.HTTPClient.GetClient(), "https://api.listenbrainz.org/1/feedback/user/outsidecontext/get-feedback", @@ -123,7 +123,7 @@ func TestGetFeedback(t *testing.T) { } func TestSendFeedback(t *testing.T) { - client := listenbrainz.NewClient("thetoken") + client := listenbrainz.NewClient("thetoken", "test/1.0") httpmock.ActivateNonDefault(client.HTTPClient.GetClient()) responder, err := httpmock.NewJsonResponder(200, listenbrainz.StatusResult{ @@ -149,7 +149,7 @@ func TestSendFeedback(t *testing.T) { func TestLookup(t *testing.T) { defer httpmock.DeactivateAndReset() - client := listenbrainz.NewClient("thetoken") + client := listenbrainz.NewClient("thetoken", "test/1.0") setupHTTPMock(t, client.HTTPClient.GetClient(), "https://api.listenbrainz.org/1/metadata/lookup", "testdata/lookup.json") diff --git a/internal/backends/listenbrainz/models.go b/pkg/listenbrainz/models.go similarity index 91% rename from internal/backends/listenbrainz/models.go rename to pkg/listenbrainz/models.go index ada75d3..2dac432 100644 --- a/internal/backends/listenbrainz/models.go +++ b/pkg/listenbrainz/models.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Philipp Wolfer +Copyright © 2023-2025 Philipp Wolfer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -66,16 +66,19 @@ type Track struct { TrackName string `json:"track_name,omitempty"` ArtistName string `json:"artist_name,omitempty"` ReleaseName string `json:"release_name,omitempty"` + RecordingMSID string `json:"recording_msid,omitempty"` AdditionalInfo map[string]any `json:"additional_info,omitempty"` MBIDMapping *MBIDMapping `json:"mbid_mapping,omitempty"` } type MBIDMapping struct { - RecordingName string `json:"recording_name,omitempty"` - RecordingMBID mbtypes.MBID `json:"recording_mbid,omitempty"` - ReleaseMBID mbtypes.MBID `json:"release_mbid,omitempty"` - ArtistMBIDs []mbtypes.MBID `json:"artist_mbids,omitempty"` - Artists []Artist `json:"artists,omitempty"` + ArtistMBIDs []mbtypes.MBID `json:"artist_mbids,omitempty"` + Artists []Artist `json:"artists,omitempty"` + RecordingMBID mbtypes.MBID `json:"recording_mbid,omitempty"` + RecordingName string `json:"recording_name,omitempty"` + ReleaseMBID mbtypes.MBID `json:"release_mbid,omitempty"` + CAAID int `json:"caa_id,omitempty"` + CAAReleaseMBID mbtypes.MBID `json:"caa_release_mbid,omitempty"` } type Artist struct { diff --git a/internal/backends/listenbrainz/models_test.go b/pkg/listenbrainz/models_test.go similarity index 97% rename from internal/backends/listenbrainz/models_test.go rename to pkg/listenbrainz/models_test.go index 02cbe98..8fb4994 100644 --- a/internal/backends/listenbrainz/models_test.go +++ b/pkg/listenbrainz/models_test.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Philipp Wolfer +Copyright © 2023-2025 Philipp Wolfer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uploadedlobster.com/mbtypes" - "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" + "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) func TestTrackDurationMillisecondsInt(t *testing.T) { diff --git a/internal/backends/listenbrainz/testdata/feedback.json b/pkg/listenbrainz/testdata/feedback.json similarity index 100% rename from internal/backends/listenbrainz/testdata/feedback.json rename to pkg/listenbrainz/testdata/feedback.json diff --git a/internal/backends/listenbrainz/testdata/listen.json b/pkg/listenbrainz/testdata/listen.json similarity index 100% rename from internal/backends/listenbrainz/testdata/listen.json rename to pkg/listenbrainz/testdata/listen.json diff --git a/internal/backends/listenbrainz/testdata/listens.json b/pkg/listenbrainz/testdata/listens.json similarity index 100% rename from internal/backends/listenbrainz/testdata/listens.json rename to pkg/listenbrainz/testdata/listens.json diff --git a/internal/backends/listenbrainz/testdata/lookup.json b/pkg/listenbrainz/testdata/lookup.json similarity index 100% rename from internal/backends/listenbrainz/testdata/lookup.json rename to pkg/listenbrainz/testdata/lookup.json From 92e7216fac128ef0607a46adef6d8df456e12814 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Fri, 23 May 2025 19:03:06 +0200 Subject: [PATCH 02/22] Implemented listenbrainz-archive backend with listen export support --- README.md | 25 +- config.example.toml | 7 + go.mod | 1 + go.sum | 2 + internal/backends/backends.go | 24 +- internal/backends/backends_test.go | 6 + internal/backends/lbarchive/lbarchive.go | 121 ++++++++ internal/backends/lbarchive/lbarchive_test.go | 40 +++ pkg/listenbrainz/archive.go | 267 ++++++++++++++++++ pkg/listenbrainz/models.go | 10 +- 10 files changed, 475 insertions(+), 28 deletions(-) create mode 100644 internal/backends/lbarchive/lbarchive.go create mode 100644 internal/backends/lbarchive/lbarchive_test.go create mode 100644 pkg/listenbrainz/archive.go diff --git a/README.md b/README.md index c764730..6f997ed 100644 --- a/README.md +++ b/README.md @@ -117,18 +117,19 @@ scotty beam listens deezer listenbrainz --timestamp "2023-12-06 14:26:24" ### Supported backends The following table lists the available backends and the currently supported features. -Backend | Listens Export | Listens Import | Loves Export | Loves Import -----------------|----------------|----------------|--------------|------------- -deezer | ✓ | ⨯ | ✓ | - -funkwhale | ✓ | ⨯ | ✓ | - -jspf | ✓ | ✓ | ✓ | ✓ -lastfm | ✓ | ✓ | ✓ | ✓ -listenbrainz | ✓ | ✓ | ✓ | ✓ -maloja | ✓ | ✓ | ⨯ | ⨯ -scrobbler-log | ✓ | ✓ | ⨯ | ⨯ -spotify | ✓ | ⨯ | ✓ | - -spotify-history | ✓ | ⨯ | ⨯ | ⨯ -subsonic | ⨯ | ⨯ | ✓ | - +Backend | Listens Export | Listens Import | Loves Export | Loves Import +---------------------|----------------|----------------|--------------|------------- +deezer | ✓ | ⨯ | ✓ | - +funkwhale | ✓ | ⨯ | ✓ | - +jspf | ✓ | ✓ | ✓ | ✓ +lastfm | ✓ | ✓ | ✓ | ✓ +listenbrainz | ✓ | ✓ | ✓ | ✓ +listenbrainz-archive | ✓ | - | - | - +maloja | ✓ | ✓ | ⨯ | ⨯ +scrobbler-log | ✓ | ✓ | ⨯ | ⨯ +spotify | ✓ | ⨯ | ✓ | - +spotify-history | ✓ | ⨯ | ⨯ | ⨯ +subsonic | ⨯ | ⨯ | ✓ | - **✓** implemented **-** not yet implemented **⨯** unavailable / not planned diff --git a/config.example.toml b/config.example.toml index 6b81bac..40ffd18 100644 --- a/config.example.toml +++ b/config.example.toml @@ -19,6 +19,13 @@ token = "" # not already exists in your ListenBrainz profile. check-duplicate-listens = false +[service.listenbrainz-archive] +# This backend supports listens from a ListenBrainz export archive +# (https://listenbrainz.org/settings/export/). +backend = "listenbrainz-archive" +# The file path to the ListenBrainz export archive. +file-path = "./listenbrainz_outsidecontext.zip" + [service.maloja] # Maloja is a self hosted listening service (https://github.com/krateng/maloja) backend = "maloja" diff --git a/go.mod b/go.mod index a00b416..ccdb6cc 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.9.0 // indirect + github.com/simonfrey/jsonl v0.0.0-20240904112901-935399b9a740 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.14.0 // indirect github.com/spf13/pflag v1.0.6 // indirect diff --git a/go.sum b/go.sum index 3cd01a6..028515c 100644 --- a/go.sum +++ b/go.sum @@ -107,6 +107,8 @@ github.com/sagikazarmark/locafero v0.9.0 h1:GbgQGNtTrEmddYDSAH9QLRyfAHY12md+8YFT github.com/sagikazarmark/locafero v0.9.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= github.com/shkh/lastfm-go v0.0.0-20191215035245-89a801c244e0 h1:cgqwZtnR+IQfUYDLJ3Kiy4aE+O/wExTzEIg8xwC4Qfs= github.com/shkh/lastfm-go v0.0.0-20191215035245-89a801c244e0/go.mod h1:n3nudMl178cEvD44PaopxH9jhJaQzthSxUzLO5iKMy4= +github.com/simonfrey/jsonl v0.0.0-20240904112901-935399b9a740 h1:CXJI+lliMiiEwzfgE8yt/38K0heYDgQ0L3f/3fxRnQU= +github.com/simonfrey/jsonl v0.0.0-20240904112901-935399b9a740/go.mod h1:G4w16caPmc6at7u4fmkj/8OAoOnM9mkmJr2fvL0vhaw= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA= diff --git a/internal/backends/backends.go b/internal/backends/backends.go index a9c3292..a1cd407 100644 --- a/internal/backends/backends.go +++ b/internal/backends/backends.go @@ -27,6 +27,7 @@ import ( "go.uploadedlobster.com/scotty/internal/backends/funkwhale" "go.uploadedlobster.com/scotty/internal/backends/jspf" "go.uploadedlobster.com/scotty/internal/backends/lastfm" + "go.uploadedlobster.com/scotty/internal/backends/lbarchive" "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/backends/maloja" "go.uploadedlobster.com/scotty/internal/backends/scrobblerlog" @@ -105,17 +106,18 @@ func GetBackends() BackendList { } var knownBackends = map[string]func() models.Backend{ - "deezer": func() models.Backend { return &deezer.DeezerApiBackend{} }, - "dump": func() models.Backend { return &dump.DumpBackend{} }, - "funkwhale": func() models.Backend { return &funkwhale.FunkwhaleApiBackend{} }, - "jspf": func() models.Backend { return &jspf.JSPFBackend{} }, - "lastfm": func() models.Backend { return &lastfm.LastfmApiBackend{} }, - "listenbrainz": func() models.Backend { return &listenbrainz.ListenBrainzApiBackend{} }, - "maloja": func() models.Backend { return &maloja.MalojaApiBackend{} }, - "scrobbler-log": func() models.Backend { return &scrobblerlog.ScrobblerLogBackend{} }, - "spotify": func() models.Backend { return &spotify.SpotifyApiBackend{} }, - "spotify-history": func() models.Backend { return &spotifyhistory.SpotifyHistoryBackend{} }, - "subsonic": func() models.Backend { return &subsonic.SubsonicApiBackend{} }, + "deezer": func() models.Backend { return &deezer.DeezerApiBackend{} }, + "dump": func() models.Backend { return &dump.DumpBackend{} }, + "funkwhale": func() models.Backend { return &funkwhale.FunkwhaleApiBackend{} }, + "jspf": func() models.Backend { return &jspf.JSPFBackend{} }, + "lastfm": func() models.Backend { return &lastfm.LastfmApiBackend{} }, + "listenbrainz": func() models.Backend { return &listenbrainz.ListenBrainzApiBackend{} }, + "listenbrainz-archive": func() models.Backend { return &lbarchive.ListenBrainzArchiveBackend{} }, + "maloja": func() models.Backend { return &maloja.MalojaApiBackend{} }, + "scrobbler-log": func() models.Backend { return &scrobblerlog.ScrobblerLogBackend{} }, + "spotify": func() models.Backend { return &spotify.SpotifyApiBackend{} }, + "spotify-history": func() models.Backend { return &spotifyhistory.SpotifyHistoryBackend{} }, + "subsonic": func() models.Backend { return &subsonic.SubsonicApiBackend{} }, } func backendWithConfig(config config.ServiceConfig) (models.Backend, error) { diff --git a/internal/backends/backends_test.go b/internal/backends/backends_test.go index e115636..737c7e3 100644 --- a/internal/backends/backends_test.go +++ b/internal/backends/backends_test.go @@ -28,6 +28,7 @@ import ( "go.uploadedlobster.com/scotty/internal/backends/funkwhale" "go.uploadedlobster.com/scotty/internal/backends/jspf" "go.uploadedlobster.com/scotty/internal/backends/lastfm" + "go.uploadedlobster.com/scotty/internal/backends/lbarchive" "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/backends/maloja" "go.uploadedlobster.com/scotty/internal/backends/scrobblerlog" @@ -103,6 +104,11 @@ func TestImplementsInterfaces(t *testing.T) { expectInterface[models.LovesExport](t, &lastfm.LastfmApiBackend{}) expectInterface[models.LovesImport](t, &lastfm.LastfmApiBackend{}) + expectInterface[models.ListensExport](t, &lbarchive.ListenBrainzArchiveBackend{}) + // expectInterface[models.ListensImport](t, &lbarchive.ListenBrainzArchiveBackend{}) + // expectInterface[models.LovesExport](t, &lbarchive.ListenBrainzArchiveBackend{}) + // expectInterface[models.LovesImport](t, &lbarchive.ListenBrainzArchiveBackend{}) + expectInterface[models.ListensExport](t, &listenbrainz.ListenBrainzApiBackend{}) expectInterface[models.ListensImport](t, &listenbrainz.ListenBrainzApiBackend{}) expectInterface[models.LovesExport](t, &listenbrainz.ListenBrainzApiBackend{}) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go new file mode 100644 index 0000000..143a674 --- /dev/null +++ b/internal/backends/lbarchive/lbarchive.go @@ -0,0 +1,121 @@ +/* +Copyright © 2025 Philipp Wolfer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +package lbarchive + +import ( + "context" + "time" + + lbapi "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" + "go.uploadedlobster.com/scotty/internal/config" + "go.uploadedlobster.com/scotty/internal/i18n" + "go.uploadedlobster.com/scotty/internal/models" + "go.uploadedlobster.com/scotty/pkg/listenbrainz" +) + +const batchSize = 2000 + +type ListenBrainzArchiveBackend struct { + filePath string +} + +func (b *ListenBrainzArchiveBackend) Name() string { return "listenbrainz-archive" } + +func (b *ListenBrainzArchiveBackend) Options() []models.BackendOption { + return []models.BackendOption{{ + Name: "file-path", + Label: i18n.Tr("Export ZIP file path"), + Type: models.String, + }} +} + +func (b *ListenBrainzArchiveBackend) InitConfig(config *config.ServiceConfig) error { + b.filePath = config.GetString("file-path") + return nil +} + +func (b *ListenBrainzArchiveBackend) ExportListens( + ctx context.Context, oldestTimestamp time.Time, + results chan models.ListensResult, progress chan models.TransferProgress) { + startTime := time.Now() + minTime := oldestTimestamp + if minTime.Unix() < 1 { + minTime = time.Unix(1, 0) + } + + totalDuration := startTime.Sub(oldestTimestamp) + p := models.TransferProgress{ + Export: &models.Progress{ + Total: int64(totalDuration.Seconds()), + }, + } + + archive, err := listenbrainz.OpenArchive(b.filePath) + if err != nil { + p.Export.Abort() + progress <- p + results <- models.ListensResult{Error: err} + return + } + defer archive.Close() + + userInfo, err := archive.UserInfo() + if err != nil { + p.Export.Abort() + progress <- p + results <- models.ListensResult{Error: err} + return + } + + listens := make(models.ListensList, 0, batchSize) + for rawListen, err := range archive.IterListens(oldestTimestamp) { + if err != nil { + p.Export.Abort() + progress <- p + results <- models.ListensResult{Error: err} + return + } + + listen := lbapi.AsListen(rawListen) + if listen.UserName == "" { + listen.UserName = userInfo.Name + } + listens = append(listens, listen) + + // Update the progress + p.Export.TotalItems += 1 + remainingTime := startTime.Sub(listen.ListenedAt) + p.Export.Elapsed = int64(totalDuration.Seconds() - remainingTime.Seconds()) + + // Allow the importer to start processing the listens by + // sending them in batches. + if len(listens) >= batchSize { + results <- models.ListensResult{Items: listens} + progress <- p + listens = listens[:0] + } + } + + results <- models.ListensResult{Items: listens} + p.Export.Complete() + progress <- p +} diff --git a/internal/backends/lbarchive/lbarchive_test.go b/internal/backends/lbarchive/lbarchive_test.go new file mode 100644 index 0000000..b7e164a --- /dev/null +++ b/internal/backends/lbarchive/lbarchive_test.go @@ -0,0 +1,40 @@ +/* +Copyright © 2025 Philipp Wolfer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +package lbarchive_test + +import ( + "testing" + + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "go.uploadedlobster.com/scotty/internal/backends/lbarchive" + "go.uploadedlobster.com/scotty/internal/config" +) + +func TestInitConfig(t *testing.T) { + c := viper.New() + c.Set("file-path", "/foo/lbarchive.zip") + service := config.NewServiceConfig("test", c) + backend := lbarchive.ListenBrainzArchiveBackend{} + err := backend.InitConfig(&service) + assert.NoError(t, err) +} diff --git a/pkg/listenbrainz/archive.go b/pkg/listenbrainz/archive.go new file mode 100644 index 0000000..668b7e1 --- /dev/null +++ b/pkg/listenbrainz/archive.go @@ -0,0 +1,267 @@ +/* +Copyright © 2025 Philipp Wolfer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +package listenbrainz + +import ( + "archive/zip" + "encoding/json" + "fmt" + "io" + "iter" + "os" + "regexp" + "sort" + "strconv" + "time" + + "github.com/simonfrey/jsonl" +) + +// Represents a ListenBrainz export archive. +// +// The export contains the user's listen history, favorite tracks and +// user information. +type Archive struct { + backend archiveBackend +} + +// Close the archive and release any resources. +func (a *Archive) Close() error { + return a.backend.Close() +} + +// Read the user information from the archive. +func (a *Archive) UserInfo() (UserInfo, error) { + f, err := a.backend.OpenUserInfoFile() + if err != nil { + return UserInfo{}, err + } + defer f.Close() + + userInfo := UserInfo{} + bytes, err := io.ReadAll(f) + if err != nil { + return userInfo, err + } + + json.Unmarshal(bytes, &userInfo) + return userInfo, nil +} + +// Yields all listens from the archive that are newer than the given timestamp. +// The listens are yielded in ascending order of their listened_at timestamp. +func (a *Archive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] { + return func(yield func(Listen, error) bool) { + files, err := a.backend.ListListenExports() + if err != nil { + yield(Listen{}, err) + return + } + + sort.Slice(files, func(i, j int) bool { + return files[i].TimeRange.Start.Before(files[j].TimeRange.Start) + }) + + for _, file := range files { + if file.TimeRange.End.Before(minTimestamp) { + continue + } + + f := NewExportFile(file.f) + for l, err := range f.IterListens() { + if err != nil { + yield(Listen{}, err) + return + } + + if !time.Unix(l.ListenedAt, 0).After(minTimestamp) { + continue + } + if !yield(l, nil) { + break + } + } + } + } +} + +// Open a ListenBrainz archive from file path. +func OpenArchive(path string) (*Archive, error) { + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + switch mode := fi.Mode(); { + case mode.IsRegular(): + backend := &zipArchive{} + err := backend.Open(path) + if err != nil { + return nil, err + } + return &Archive{backend: backend}, nil + case mode.IsDir(): + // TODO: Implement directory mode + return nil, fmt.Errorf("directory mode not implemented") + default: + return nil, fmt.Errorf("unsupported file mode: %s", mode) + } +} + +type UserInfo struct { + ID string `json:"user_id"` + Name string `json:"username"` +} + +type archiveBackend interface { + Close() error + OpenUserInfoFile() (io.ReadCloser, error) + ListListenExports() ([]ListenExportFileInfo, error) +} + +type timeRange struct { + Start time.Time + End time.Time +} + +type openableFile interface { + Open() (io.ReadCloser, error) +} + +type ListenExportFileInfo struct { + Name string + TimeRange timeRange + f openableFile +} + +type zipArchive struct { + zip *zip.ReadCloser +} + +func (a *zipArchive) Open(path string) error { + zip, err := zip.OpenReader(path) + if err != nil { + return err + } + a.zip = zip + return nil +} + +func (a *zipArchive) Close() error { + if a.zip == nil { + return nil + } + return a.zip.Close() +} + +func (a *zipArchive) OpenUserInfoFile() (io.ReadCloser, error) { + file, err := a.zip.Open("user.json") + if err != nil { + return nil, err + } + return file, nil +} + +func (a *zipArchive) ListListenExports() ([]ListenExportFileInfo, error) { + re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`) + result := make([]ListenExportFileInfo, 0) + + for _, file := range a.zip.File { + match := re.FindStringSubmatch(file.Name) + if match == nil { + continue + } + + year := match[1] + month := match[2] + times, err := getMonthTimeRange(year, month) + if err != nil { + return nil, err + } + info := ListenExportFileInfo{ + Name: file.Name, + TimeRange: *times, + f: file, + } + result = append(result, info) + } + + return result, nil +} + +type ListenExportFile struct { + file openableFile +} + +func NewExportFile(f openableFile) ListenExportFile { + return ListenExportFile{file: f} +} + +func (f *ListenExportFile) openReader() (*jsonl.Reader, error) { + fio, err := f.file.Open() + if err != nil { + return nil, err + } + reader := jsonl.NewReader(fio) + return &reader, nil +} + +func (f *ListenExportFile) IterListens() iter.Seq2[Listen, error] { + return func(yield func(Listen, error) bool) { + reader, err := f.openReader() + if err != nil { + yield(Listen{}, err) + return + } + defer reader.Close() + + for { + listen := Listen{} + err := reader.ReadSingleLine(&listen) + if err != nil { + break + } + if !yield(listen, nil) { + break + } + } + } +} + +func getMonthTimeRange(year string, month string) (*timeRange, error) { + yearInt, err := strconv.Atoi(year) + if err != nil { + return nil, err + } + monthInt, err := strconv.Atoi(month) + if err != nil { + return nil, err + } + + r := &timeRange{} + r.Start = time.Date(yearInt, time.Month(monthInt), 1, 0, 0, 0, 0, time.UTC) + + // Get the end of the month + nextMonth := monthInt + 1 + r.End = time.Date( + yearInt, time.Month(nextMonth), 1, 0, 0, 0, 0, time.UTC).Add(-time.Second) + return r, nil +} diff --git a/pkg/listenbrainz/models.go b/pkg/listenbrainz/models.go index 2dac432..0b5f439 100644 --- a/pkg/listenbrainz/models.go +++ b/pkg/listenbrainz/models.go @@ -55,11 +55,11 @@ type ListenSubmission struct { } type Listen struct { - InsertedAt int64 `json:"inserted_at,omitempty"` - ListenedAt int64 `json:"listened_at"` - RecordingMSID string `json:"recording_msid,omitempty"` - UserName string `json:"user_name,omitempty"` - TrackMetadata Track `json:"track_metadata"` + InsertedAt float64 `json:"inserted_at,omitempty"` + ListenedAt int64 `json:"listened_at"` + RecordingMSID string `json:"recording_msid,omitempty"` + UserName string `json:"user_name,omitempty"` + TrackMetadata Track `json:"track_metadata"` } type Track struct { From 424305518b49d50107a57caf190a10b79af6dfc7 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 00:21:46 +0200 Subject: [PATCH 03/22] Implemented directory mode for listenbrainz-archive --- pkg/listenbrainz/archive.go | 139 +++++++++++++++++++++++++++++------- 1 file changed, 112 insertions(+), 27 deletions(-) diff --git a/pkg/listenbrainz/archive.go b/pkg/listenbrainz/archive.go index 668b7e1..de34ba8 100644 --- a/pkg/listenbrainz/archive.go +++ b/pkg/listenbrainz/archive.go @@ -28,6 +28,7 @@ import ( "io" "iter" "os" + "path/filepath" "regexp" "sort" "strconv" @@ -51,7 +52,7 @@ func (a *Archive) Close() error { // Read the user information from the archive. func (a *Archive) UserInfo() (UserInfo, error) { - f, err := a.backend.OpenUserInfoFile() + f, err := a.backend.OpenFile("user.json") if err != nil { return UserInfo{}, err } @@ -67,11 +68,43 @@ func (a *Archive) UserInfo() (UserInfo, error) { return userInfo, nil } +func (a *Archive) ListListenExports() ([]ListenExportFileInfo, error) { + re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`) + result := make([]ListenExportFileInfo, 0) + + files, err := a.backend.Glob("listens/*/*.jsonl") + if err != nil { + return nil, err + } + + for _, file := range files { + match := re.FindStringSubmatch(file.Name) + if match == nil { + continue + } + + year := match[1] + month := match[2] + times, err := getMonthTimeRange(year, month) + if err != nil { + return nil, err + } + info := ListenExportFileInfo{ + Name: file.Name, + TimeRange: *times, + f: file.File, + } + result = append(result, info) + } + + return result, nil +} + // Yields all listens from the archive that are newer than the given timestamp. // The listens are yielded in ascending order of their listened_at timestamp. func (a *Archive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] { return func(yield func(Listen, error) bool) { - files, err := a.backend.ListListenExports() + files, err := a.ListListenExports() if err != nil { yield(Listen{}, err) return @@ -119,8 +152,12 @@ func OpenArchive(path string) (*Archive, error) { } return &Archive{backend: backend}, nil case mode.IsDir(): - // TODO: Implement directory mode - return nil, fmt.Errorf("directory mode not implemented") + backend := &dirArchive{} + err := backend.Open(path) + if err != nil { + return nil, err + } + return &Archive{backend: backend}, nil default: return nil, fmt.Errorf("unsupported file mode: %s", mode) } @@ -133,8 +170,8 @@ type UserInfo struct { type archiveBackend interface { Close() error - OpenUserInfoFile() (io.ReadCloser, error) - ListListenExports() ([]ListenExportFileInfo, error) + OpenFile(path string) (io.ReadCloser, error) + Glob(pattern string) ([]FileInfo, error) } type timeRange struct { @@ -142,16 +179,30 @@ type timeRange struct { End time.Time } -type openableFile interface { +type OpenableFile interface { Open() (io.ReadCloser, error) } +type FileInfo struct { + Name string + File OpenableFile +} + +type FilesystemFile struct { + path string +} + +func (f *FilesystemFile) Open() (io.ReadCloser, error) { + return os.Open(f.path) +} + type ListenExportFileInfo struct { Name string TimeRange timeRange - f openableFile + f OpenableFile } +// An implementation of the archiveBackend interface for zip files. type zipArchive struct { zip *zip.ReadCloser } @@ -172,34 +223,68 @@ func (a *zipArchive) Close() error { return a.zip.Close() } -func (a *zipArchive) OpenUserInfoFile() (io.ReadCloser, error) { - file, err := a.zip.Open("user.json") +func (a *zipArchive) Glob(pattern string) ([]FileInfo, error) { + result := make([]FileInfo, 0) + for _, file := range a.zip.File { + if matched, err := filepath.Match(pattern, file.Name); matched { + if err != nil { + return nil, err + } + info := FileInfo{ + Name: file.Name, + File: file, + } + result = append(result, info) + } + } + + return result, nil +} + +func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) { + file, err := a.zip.Open(path) if err != nil { return nil, err } return file, nil } -func (a *zipArchive) ListListenExports() ([]ListenExportFileInfo, error) { - re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`) - result := make([]ListenExportFileInfo, 0) +// An implementation of the archiveBackend interface for directories. +type dirArchive struct { + dir string +} - for _, file := range a.zip.File { - match := re.FindStringSubmatch(file.Name) - if match == nil { - continue - } +func (a *dirArchive) Open(path string) error { + a.dir = filepath.Clean(path) + return nil +} - year := match[1] - month := match[2] - times, err := getMonthTimeRange(year, month) +func (a *dirArchive) Close() error { + return nil +} + +func (a *dirArchive) OpenFile(path string) (io.ReadCloser, error) { + file, err := os.Open(filepath.Join(a.dir, path)) + if err != nil { + return nil, err + } + return file, nil +} + +func (a *dirArchive) Glob(pattern string) ([]FileInfo, error) { + files, err := filepath.Glob(filepath.Join(a.dir, pattern)) + if err != nil { + return nil, err + } + result := make([]FileInfo, 0) + for _, filename := range files { + name, err := filepath.Rel(a.dir, filename) if err != nil { return nil, err } - info := ListenExportFileInfo{ - Name: file.Name, - TimeRange: *times, - f: file, + info := FileInfo{ + Name: name, + File: &FilesystemFile{path: filename}, } result = append(result, info) } @@ -208,10 +293,10 @@ func (a *zipArchive) ListListenExports() ([]ListenExportFileInfo, error) { } type ListenExportFile struct { - file openableFile + file OpenableFile } -func NewExportFile(f openableFile) ListenExportFile { +func NewExportFile(f OpenableFile) ListenExportFile { return ListenExportFile{file: f} } From 1025277ba91c8e92ea8f4ec9935a723c4b19532b Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 00:35:55 +0200 Subject: [PATCH 04/22] Moved generic archive abstraction into separate package --- internal/archive/archive.go | 181 ++++++++++++++++++++++++++++++++++++ pkg/listenbrainz/archive.go | 162 +++----------------------------- 2 files changed, 196 insertions(+), 147 deletions(-) create mode 100644 internal/archive/archive.go diff --git a/internal/archive/archive.go b/internal/archive/archive.go new file mode 100644 index 0000000..604efe2 --- /dev/null +++ b/internal/archive/archive.go @@ -0,0 +1,181 @@ +/* +Copyright © 2025 Philipp Wolfer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// Implements generic access to files inside an archive. +// +// An archive in this context can be any container that holds files. +// In this implementation the archive can be a ZIP file or a directory. +package archive + +import ( + "archive/zip" + "fmt" + "io" + "os" + "path/filepath" +) + +// Generic archive interface. +type Archive interface { + Close() error + OpenFile(path string) (io.ReadCloser, error) + Glob(pattern string) ([]FileInfo, error) +} + +// Open an archive in path. +// The archive can be a ZIP file or a directory. The implementation +// will detect the type of archive and return the appropriate +// implementation of the Archive interface. +func OpenArchive(path string) (Archive, error) { + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + switch mode := fi.Mode(); { + case mode.IsRegular(): + archive := &zipArchive{} + err := archive.Open(path) + if err != nil { + return nil, err + } + return archive, nil + case mode.IsDir(): + archive := &dirArchive{} + err := archive.Open(path) + if err != nil { + return nil, err + } + return archive, nil + default: + return nil, fmt.Errorf("unsupported file mode: %s", mode) + } +} + +// Interface for a file that can be opened when needed. +type OpenableFile interface { + Open() (io.ReadCloser, error) +} + +// Generic information about a file inside an archive. +type FileInfo struct { + Name string + File OpenableFile +} + +// A openable file in the filesystem. +type filesystemFile struct { + path string +} + +func (f *filesystemFile) Open() (io.ReadCloser, error) { + return os.Open(f.path) +} + +// An implementation of the archiveBackend interface for zip files. +type zipArchive struct { + zip *zip.ReadCloser +} + +func (a *zipArchive) Open(path string) error { + zip, err := zip.OpenReader(path) + if err != nil { + return err + } + a.zip = zip + return nil +} + +func (a *zipArchive) Close() error { + if a.zip == nil { + return nil + } + return a.zip.Close() +} + +func (a *zipArchive) Glob(pattern string) ([]FileInfo, error) { + result := make([]FileInfo, 0) + for _, file := range a.zip.File { + if matched, err := filepath.Match(pattern, file.Name); matched { + if err != nil { + return nil, err + } + info := FileInfo{ + Name: file.Name, + File: file, + } + result = append(result, info) + } + } + + return result, nil +} + +func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) { + file, err := a.zip.Open(path) + if err != nil { + return nil, err + } + return file, nil +} + +// An implementation of the archiveBackend interface for directories. +type dirArchive struct { + dir string +} + +func (a *dirArchive) Open(path string) error { + a.dir = filepath.Clean(path) + return nil +} + +func (a *dirArchive) Close() error { + return nil +} + +func (a *dirArchive) OpenFile(path string) (io.ReadCloser, error) { + file, err := os.Open(filepath.Join(a.dir, path)) + if err != nil { + return nil, err + } + return file, nil +} + +func (a *dirArchive) Glob(pattern string) ([]FileInfo, error) { + files, err := filepath.Glob(filepath.Join(a.dir, pattern)) + if err != nil { + return nil, err + } + result := make([]FileInfo, 0) + for _, filename := range files { + name, err := filepath.Rel(a.dir, filename) + if err != nil { + return nil, err + } + info := FileInfo{ + Name: name, + File: &filesystemFile{path: filename}, + } + result = append(result, info) + } + + return result, nil +} diff --git a/pkg/listenbrainz/archive.go b/pkg/listenbrainz/archive.go index de34ba8..a455d03 100644 --- a/pkg/listenbrainz/archive.go +++ b/pkg/listenbrainz/archive.go @@ -22,19 +22,16 @@ THE SOFTWARE. package listenbrainz import ( - "archive/zip" "encoding/json" - "fmt" "io" "iter" - "os" - "path/filepath" "regexp" "sort" "strconv" "time" "github.com/simonfrey/jsonl" + "go.uploadedlobster.com/scotty/internal/archive" ) // Represents a ListenBrainz export archive. @@ -42,7 +39,17 @@ import ( // The export contains the user's listen history, favorite tracks and // user information. type Archive struct { - backend archiveBackend + backend archive.Archive +} + +// Open a ListenBrainz archive from file path. +func OpenArchive(path string) (*Archive, error) { + backend, err := archive.OpenArchive(path) + if err != nil { + return nil, err + } + + return &Archive{backend: backend}, nil } // Close the archive and release any resources. @@ -137,166 +144,27 @@ func (a *Archive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] { } } -// Open a ListenBrainz archive from file path. -func OpenArchive(path string) (*Archive, error) { - fi, err := os.Stat(path) - if err != nil { - return nil, err - } - switch mode := fi.Mode(); { - case mode.IsRegular(): - backend := &zipArchive{} - err := backend.Open(path) - if err != nil { - return nil, err - } - return &Archive{backend: backend}, nil - case mode.IsDir(): - backend := &dirArchive{} - err := backend.Open(path) - if err != nil { - return nil, err - } - return &Archive{backend: backend}, nil - default: - return nil, fmt.Errorf("unsupported file mode: %s", mode) - } -} - type UserInfo struct { ID string `json:"user_id"` Name string `json:"username"` } -type archiveBackend interface { - Close() error - OpenFile(path string) (io.ReadCloser, error) - Glob(pattern string) ([]FileInfo, error) -} - type timeRange struct { Start time.Time End time.Time } -type OpenableFile interface { - Open() (io.ReadCloser, error) -} - -type FileInfo struct { - Name string - File OpenableFile -} - -type FilesystemFile struct { - path string -} - -func (f *FilesystemFile) Open() (io.ReadCloser, error) { - return os.Open(f.path) -} - type ListenExportFileInfo struct { Name string TimeRange timeRange - f OpenableFile -} - -// An implementation of the archiveBackend interface for zip files. -type zipArchive struct { - zip *zip.ReadCloser -} - -func (a *zipArchive) Open(path string) error { - zip, err := zip.OpenReader(path) - if err != nil { - return err - } - a.zip = zip - return nil -} - -func (a *zipArchive) Close() error { - if a.zip == nil { - return nil - } - return a.zip.Close() -} - -func (a *zipArchive) Glob(pattern string) ([]FileInfo, error) { - result := make([]FileInfo, 0) - for _, file := range a.zip.File { - if matched, err := filepath.Match(pattern, file.Name); matched { - if err != nil { - return nil, err - } - info := FileInfo{ - Name: file.Name, - File: file, - } - result = append(result, info) - } - } - - return result, nil -} - -func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) { - file, err := a.zip.Open(path) - if err != nil { - return nil, err - } - return file, nil -} - -// An implementation of the archiveBackend interface for directories. -type dirArchive struct { - dir string -} - -func (a *dirArchive) Open(path string) error { - a.dir = filepath.Clean(path) - return nil -} - -func (a *dirArchive) Close() error { - return nil -} - -func (a *dirArchive) OpenFile(path string) (io.ReadCloser, error) { - file, err := os.Open(filepath.Join(a.dir, path)) - if err != nil { - return nil, err - } - return file, nil -} - -func (a *dirArchive) Glob(pattern string) ([]FileInfo, error) { - files, err := filepath.Glob(filepath.Join(a.dir, pattern)) - if err != nil { - return nil, err - } - result := make([]FileInfo, 0) - for _, filename := range files { - name, err := filepath.Rel(a.dir, filename) - if err != nil { - return nil, err - } - info := FileInfo{ - Name: name, - File: &FilesystemFile{path: filename}, - } - result = append(result, info) - } - - return result, nil + f archive.OpenableFile } type ListenExportFile struct { - file OpenableFile + file archive.OpenableFile } -func NewExportFile(f OpenableFile) ListenExportFile { +func NewExportFile(f archive.OpenableFile) ListenExportFile { return ListenExportFile{file: f} } From 8462b9395e35ee51f7be317cb79a548ba22b0827 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 00:45:11 +0200 Subject: [PATCH 05/22] Keep listenbrainz package internal for now --- internal/backends/lbarchive/lbarchive.go | 2 +- internal/backends/listenbrainz/listenbrainz.go | 2 +- internal/backends/listenbrainz/listenbrainz_test.go | 2 +- {pkg => internal}/listenbrainz/archive.go | 0 {pkg => internal}/listenbrainz/client.go | 0 {pkg => internal}/listenbrainz/client_test.go | 2 +- {pkg => internal}/listenbrainz/models.go | 0 {pkg => internal}/listenbrainz/models_test.go | 2 +- {pkg => internal}/listenbrainz/testdata/feedback.json | 0 {pkg => internal}/listenbrainz/testdata/listen.json | 0 {pkg => internal}/listenbrainz/testdata/listens.json | 0 {pkg => internal}/listenbrainz/testdata/lookup.json | 0 12 files changed, 5 insertions(+), 5 deletions(-) rename {pkg => internal}/listenbrainz/archive.go (100%) rename {pkg => internal}/listenbrainz/client.go (100%) rename {pkg => internal}/listenbrainz/client_test.go (99%) rename {pkg => internal}/listenbrainz/models.go (100%) rename {pkg => internal}/listenbrainz/models_test.go (98%) rename {pkg => internal}/listenbrainz/testdata/feedback.json (100%) rename {pkg => internal}/listenbrainz/testdata/listen.json (100%) rename {pkg => internal}/listenbrainz/testdata/listens.json (100%) rename {pkg => internal}/listenbrainz/testdata/lookup.json (100%) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go index 143a674..88d8be7 100644 --- a/internal/backends/lbarchive/lbarchive.go +++ b/internal/backends/lbarchive/lbarchive.go @@ -28,8 +28,8 @@ import ( lbapi "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/config" "go.uploadedlobster.com/scotty/internal/i18n" + "go.uploadedlobster.com/scotty/internal/listenbrainz" "go.uploadedlobster.com/scotty/internal/models" - "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) const batchSize = 2000 diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index 5e80a10..4f0ce2f 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -26,10 +26,10 @@ import ( "go.uploadedlobster.com/musicbrainzws2" "go.uploadedlobster.com/scotty/internal/config" "go.uploadedlobster.com/scotty/internal/i18n" + "go.uploadedlobster.com/scotty/internal/listenbrainz" "go.uploadedlobster.com/scotty/internal/models" "go.uploadedlobster.com/scotty/internal/similarity" "go.uploadedlobster.com/scotty/internal/version" - "go.uploadedlobster.com/scotty/pkg/listenbrainz" ) type ListenBrainzApiBackend struct { diff --git a/internal/backends/listenbrainz/listenbrainz_test.go b/internal/backends/listenbrainz/listenbrainz_test.go index dd3e1d3..f7151e5 100644 --- a/internal/backends/listenbrainz/listenbrainz_test.go +++ b/internal/backends/listenbrainz/listenbrainz_test.go @@ -26,7 +26,7 @@ import ( "go.uploadedlobster.com/mbtypes" lbapi "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/config" - "go.uploadedlobster.com/scotty/pkg/listenbrainz" + "go.uploadedlobster.com/scotty/internal/listenbrainz" ) func TestInitConfig(t *testing.T) { diff --git a/pkg/listenbrainz/archive.go b/internal/listenbrainz/archive.go similarity index 100% rename from pkg/listenbrainz/archive.go rename to internal/listenbrainz/archive.go diff --git a/pkg/listenbrainz/client.go b/internal/listenbrainz/client.go similarity index 100% rename from pkg/listenbrainz/client.go rename to internal/listenbrainz/client.go diff --git a/pkg/listenbrainz/client_test.go b/internal/listenbrainz/client_test.go similarity index 99% rename from pkg/listenbrainz/client_test.go rename to internal/listenbrainz/client_test.go index 3742ca9..9baf293 100644 --- a/pkg/listenbrainz/client_test.go +++ b/internal/listenbrainz/client_test.go @@ -31,7 +31,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uploadedlobster.com/mbtypes" - "go.uploadedlobster.com/scotty/pkg/listenbrainz" + "go.uploadedlobster.com/scotty/internal/listenbrainz" ) func TestNewClient(t *testing.T) { diff --git a/pkg/listenbrainz/models.go b/internal/listenbrainz/models.go similarity index 100% rename from pkg/listenbrainz/models.go rename to internal/listenbrainz/models.go diff --git a/pkg/listenbrainz/models_test.go b/internal/listenbrainz/models_test.go similarity index 98% rename from pkg/listenbrainz/models_test.go rename to internal/listenbrainz/models_test.go index 8fb4994..404b87b 100644 --- a/pkg/listenbrainz/models_test.go +++ b/internal/listenbrainz/models_test.go @@ -29,7 +29,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uploadedlobster.com/mbtypes" - "go.uploadedlobster.com/scotty/pkg/listenbrainz" + "go.uploadedlobster.com/scotty/internal/listenbrainz" ) func TestTrackDurationMillisecondsInt(t *testing.T) { diff --git a/pkg/listenbrainz/testdata/feedback.json b/internal/listenbrainz/testdata/feedback.json similarity index 100% rename from pkg/listenbrainz/testdata/feedback.json rename to internal/listenbrainz/testdata/feedback.json diff --git a/pkg/listenbrainz/testdata/listen.json b/internal/listenbrainz/testdata/listen.json similarity index 100% rename from pkg/listenbrainz/testdata/listen.json rename to internal/listenbrainz/testdata/listen.json diff --git a/pkg/listenbrainz/testdata/listens.json b/internal/listenbrainz/testdata/listens.json similarity index 100% rename from pkg/listenbrainz/testdata/listens.json rename to internal/listenbrainz/testdata/listens.json diff --git a/pkg/listenbrainz/testdata/lookup.json b/internal/listenbrainz/testdata/lookup.json similarity index 100% rename from pkg/listenbrainz/testdata/lookup.json rename to internal/listenbrainz/testdata/lookup.json From cf5319309a4c81e29df2b9eb8edfe26f363ab8c1 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 00:51:28 +0200 Subject: [PATCH 06/22] Renamed listenbrainz.Archive to listenbrainz.ExportArchive --- internal/backends/lbarchive/lbarchive.go | 2 +- internal/listenbrainz/archive.go | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go index 88d8be7..0848d38 100644 --- a/internal/backends/lbarchive/lbarchive.go +++ b/internal/backends/lbarchive/lbarchive.go @@ -69,7 +69,7 @@ func (b *ListenBrainzArchiveBackend) ExportListens( }, } - archive, err := listenbrainz.OpenArchive(b.filePath) + archive, err := listenbrainz.OpenExportArchive(b.filePath) if err != nil { p.Export.Abort() progress <- p diff --git a/internal/listenbrainz/archive.go b/internal/listenbrainz/archive.go index a455d03..1d3efa3 100644 --- a/internal/listenbrainz/archive.go +++ b/internal/listenbrainz/archive.go @@ -38,27 +38,27 @@ import ( // // The export contains the user's listen history, favorite tracks and // user information. -type Archive struct { +type ExportArchive struct { backend archive.Archive } // Open a ListenBrainz archive from file path. -func OpenArchive(path string) (*Archive, error) { +func OpenExportArchive(path string) (*ExportArchive, error) { backend, err := archive.OpenArchive(path) if err != nil { return nil, err } - return &Archive{backend: backend}, nil + return &ExportArchive{backend: backend}, nil } // Close the archive and release any resources. -func (a *Archive) Close() error { +func (a *ExportArchive) Close() error { return a.backend.Close() } // Read the user information from the archive. -func (a *Archive) UserInfo() (UserInfo, error) { +func (a *ExportArchive) UserInfo() (UserInfo, error) { f, err := a.backend.OpenFile("user.json") if err != nil { return UserInfo{}, err @@ -75,7 +75,7 @@ func (a *Archive) UserInfo() (UserInfo, error) { return userInfo, nil } -func (a *Archive) ListListenExports() ([]ListenExportFileInfo, error) { +func (a *ExportArchive) ListListenExports() ([]ListenExportFileInfo, error) { re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`) result := make([]ListenExportFileInfo, 0) @@ -109,7 +109,7 @@ func (a *Archive) ListListenExports() ([]ListenExportFileInfo, error) { // Yields all listens from the archive that are newer than the given timestamp. // The listens are yielded in ascending order of their listened_at timestamp. -func (a *Archive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] { +func (a *ExportArchive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] { return func(yield func(Listen, error) bool) { files, err := a.ListListenExports() if err != nil { From 0231331209e6a79416ce87dce99a0d56f3503c48 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 01:23:12 +0200 Subject: [PATCH 07/22] Implemented listenrbainz.ExportArchive.IterFeedback --- internal/listenbrainz/archive.go | 55 +++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/internal/listenbrainz/archive.go b/internal/listenbrainz/archive.go index 1d3efa3..eb7677c 100644 --- a/internal/listenbrainz/archive.go +++ b/internal/listenbrainz/archive.go @@ -23,6 +23,7 @@ package listenbrainz import ( "encoding/json" + "errors" "io" "iter" "regexp" @@ -54,6 +55,9 @@ func OpenExportArchive(path string) (*ExportArchive, error) { // Close the archive and release any resources. func (a *ExportArchive) Close() error { + if a.backend == nil { + return nil + } return a.backend.Close() } @@ -126,8 +130,8 @@ func (a *ExportArchive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, er continue } - f := NewExportFile(file.f) - for l, err := range f.IterListens() { + f := JSONLFile[Listen]{file: file.f} + for l, err := range f.IterItems() { if err != nil { yield(Listen{}, err) return @@ -144,6 +148,36 @@ func (a *ExportArchive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, er } } +// Yields all feedbacks from the archive that are newer than the given timestamp. +// The feedbacks are yielded in ascending order of their Created timestamp. +func (a *ExportArchive) IterFeedback(minTimestamp time.Time) iter.Seq2[Feedback, error] { + return func(yield func(Feedback, error) bool) { + files, err := a.backend.Glob("feedback.jsonl") + if err != nil { + yield(Feedback{}, err) + return + } else if len(files) == 0 { + yield(Feedback{}, errors.New("no feedback.jsonl file found in archive")) + return + } + + j := JSONLFile[Feedback]{file: files[0].File} + for l, err := range j.IterItems() { + if err != nil { + yield(Feedback{}, err) + return + } + + if !time.Unix(l.Created, 0).After(minTimestamp) { + continue + } + if !yield(l, nil) { + break + } + } + } +} + type UserInfo struct { ID string `json:"user_id"` Name string `json:"username"` @@ -160,15 +194,11 @@ type ListenExportFileInfo struct { f archive.OpenableFile } -type ListenExportFile struct { +type JSONLFile[T any] struct { file archive.OpenableFile } -func NewExportFile(f archive.OpenableFile) ListenExportFile { - return ListenExportFile{file: f} -} - -func (f *ListenExportFile) openReader() (*jsonl.Reader, error) { +func (f *JSONLFile[T]) openReader() (*jsonl.Reader, error) { fio, err := f.file.Open() if err != nil { return nil, err @@ -177,17 +207,18 @@ func (f *ListenExportFile) openReader() (*jsonl.Reader, error) { return &reader, nil } -func (f *ListenExportFile) IterListens() iter.Seq2[Listen, error] { - return func(yield func(Listen, error) bool) { +func (f *JSONLFile[T]) IterItems() iter.Seq2[T, error] { + return func(yield func(T, error) bool) { reader, err := f.openReader() if err != nil { - yield(Listen{}, err) + var listen T + yield(listen, err) return } defer reader.Close() for { - listen := Listen{} + var listen T err := reader.ReadSingleLine(&listen) if err != nil { break From 975e2082548060acc484f484886df2fee91e28e6 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 02:20:07 +0200 Subject: [PATCH 08/22] Simplify dirArchive by using os.dirFS and have Archive.Open return fs.File --- internal/archive/archive.go | 40 +++++++++++++++----------------- internal/listenbrainz/archive.go | 2 +- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 604efe2..7714552 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -30,6 +30,7 @@ import ( "archive/zip" "fmt" "io" + "io/fs" "os" "path/filepath" ) @@ -37,7 +38,7 @@ import ( // Generic archive interface. type Archive interface { Close() error - OpenFile(path string) (io.ReadCloser, error) + Open(path string) (fs.File, error) Glob(pattern string) ([]FileInfo, error) } @@ -53,14 +54,14 @@ func OpenArchive(path string) (Archive, error) { switch mode := fi.Mode(); { case mode.IsRegular(): archive := &zipArchive{} - err := archive.Open(path) + err := archive.OpenArchive(path) if err != nil { return nil, err } return archive, nil case mode.IsDir(): archive := &dirArchive{} - err := archive.Open(path) + err := archive.OpenArchive(path) if err != nil { return nil, err } @@ -95,7 +96,7 @@ type zipArchive struct { zip *zip.ReadCloser } -func (a *zipArchive) Open(path string) error { +func (a *zipArchive) OpenArchive(path string) error { zip, err := zip.OpenReader(path) if err != nil { return err @@ -129,7 +130,7 @@ func (a *zipArchive) Glob(pattern string) ([]FileInfo, error) { return result, nil } -func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) { +func (a *zipArchive) Open(path string) (fs.File, error) { file, err := a.zip.Open(path) if err != nil { return nil, err @@ -139,11 +140,13 @@ func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) { // An implementation of the archiveBackend interface for directories. type dirArchive struct { - dir string + path string + dirFS fs.FS } -func (a *dirArchive) Open(path string) error { - a.dir = filepath.Clean(path) +func (a *dirArchive) OpenArchive(path string) error { + a.path = filepath.Clean(path) + a.dirFS = os.DirFS(path) return nil } @@ -151,28 +154,23 @@ func (a *dirArchive) Close() error { return nil } -func (a *dirArchive) OpenFile(path string) (io.ReadCloser, error) { - file, err := os.Open(filepath.Join(a.dir, path)) - if err != nil { - return nil, err - } - return file, nil +// Open opens the named file in the archive. +// [fs.File.Close] must be called to release any associated resources. +func (a *dirArchive) Open(path string) (fs.File, error) { + return a.dirFS.Open(path) } func (a *dirArchive) Glob(pattern string) ([]FileInfo, error) { - files, err := filepath.Glob(filepath.Join(a.dir, pattern)) + files, err := fs.Glob(a.dirFS, pattern) if err != nil { return nil, err } result := make([]FileInfo, 0) - for _, filename := range files { - name, err := filepath.Rel(a.dir, filename) - if err != nil { - return nil, err - } + for _, name := range files { + fullPath := filepath.Join(a.path, name) info := FileInfo{ Name: name, - File: &filesystemFile{path: filename}, + File: &filesystemFile{path: fullPath}, } result = append(result, info) } diff --git a/internal/listenbrainz/archive.go b/internal/listenbrainz/archive.go index eb7677c..b7b5909 100644 --- a/internal/listenbrainz/archive.go +++ b/internal/listenbrainz/archive.go @@ -63,7 +63,7 @@ func (a *ExportArchive) Close() error { // Read the user information from the archive. func (a *ExportArchive) UserInfo() (UserInfo, error) { - f, err := a.backend.OpenFile("user.json") + f, err := a.backend.Open("user.json") if err != nil { return UserInfo{}, err } From d25095267876e1a6fdb19d950a061047da8a2c1d Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 11:33:10 +0200 Subject: [PATCH 09/22] Extend dump backend to be able to write to a file --- config.example.toml | 8 +++- internal/backends/dump/dump.go | 75 ++++++++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/config.example.toml b/config.example.toml index 40ffd18..ecbba9b 100644 --- a/config.example.toml +++ b/config.example.toml @@ -141,4 +141,10 @@ client-secret = "" [service.dump] # This backend allows writing listens and loves as console output. Useful for # debugging the export from other services. -backend = "dump" +backend = "dump" +# Path to a file where the listens and loves are written to. If not set, +# the output is written to stdout. +file-path = "" +# If true (default), new listens will be appended to the existing file. Set to +# false to overwrite the file on every run. +append = true diff --git a/internal/backends/dump/dump.go b/internal/backends/dump/dump.go index 1fcd864..4714bd6 100644 --- a/internal/backends/dump/dump.go +++ b/internal/backends/dump/dump.go @@ -17,25 +17,80 @@ Scotty. If not, see . package dump import ( + "bytes" "context" "fmt" + "io" + "os" + "strings" "go.uploadedlobster.com/scotty/internal/config" + "go.uploadedlobster.com/scotty/internal/i18n" "go.uploadedlobster.com/scotty/internal/models" ) -type DumpBackend struct{} +type DumpBackend struct { + buffer io.ReadWriter + print bool // Whether to print the output to stdout +} func (b *DumpBackend) Name() string { return "dump" } -func (b *DumpBackend) Options() []models.BackendOption { return nil } +func (b *DumpBackend) Options() []models.BackendOption { + return []models.BackendOption{{ + Name: "file-path", + Label: i18n.Tr("File path"), + Type: models.String, + }, { + Name: "append", + Label: i18n.Tr("Append to file"), + Type: models.Bool, + Default: "true", + }} +} func (b *DumpBackend) InitConfig(config *config.ServiceConfig) error { + filePath := config.GetString("file-path") + append := config.GetBool("append", true) + if strings.TrimSpace(filePath) != "" { + mode := os.O_WRONLY | os.O_CREATE + if !append { + mode |= os.O_TRUNC // Truncate the file if not appending + } + f, err := os.OpenFile(filePath, mode, 0644) + if err != nil { + return err + } + b.buffer = f + b.print = false // If a file path is specified, we don't print to stdout + } else { + // If no file path is specified, use a bytes.Buffer for in-memory dumping + b.buffer = new(bytes.Buffer) + b.print = true // Print to stdout + } return nil } -func (b *DumpBackend) StartImport() error { return nil } -func (b *DumpBackend) FinishImport() error { return nil } +func (b *DumpBackend) StartImport() error { return nil } + +func (b *DumpBackend) FinishImport() error { + if b.print { + out := new(strings.Builder) + _, err := io.Copy(out, b.buffer) + if err != nil { + return err + } + fmt.Println(out.String()) + } + + // Close the io writer if it is closable + if closer, ok := b.buffer.(io.Closer); ok { + if err := closer.Close(); err != nil { + return fmt.Errorf("failed to close output file: %w", err) + } + } + return nil +} func (b *DumpBackend) ImportListens(ctx context.Context, export models.ListensResult, importResult models.ImportResult, progress chan models.TransferProgress) (models.ImportResult, error) { for _, listen := range export.Items { @@ -45,9 +100,11 @@ func (b *DumpBackend) ImportListens(ctx context.Context, export models.ListensRe importResult.UpdateTimestamp(listen.ListenedAt) importResult.ImportCount += 1 - msg := fmt.Sprintf("🎶 %v: \"%v\" by %v (%v)", + _, err := fmt.Fprintf(b.buffer, "🎶 %v: \"%v\" by %v (%v)\n", listen.ListenedAt, listen.TrackName, listen.ArtistName(), listen.RecordingMBID) - importResult.Log(models.Info, msg) + if err != nil { + return importResult, err + } progress <- models.TransferProgress{}.FromImportResult(importResult, false) } @@ -62,9 +119,11 @@ func (b *DumpBackend) ImportLoves(ctx context.Context, export models.LovesResult importResult.UpdateTimestamp(love.Created) importResult.ImportCount += 1 - msg := fmt.Sprintf("❤️ %v: \"%v\" by %v (%v)", + _, err := fmt.Fprintf(b.buffer, "❤️ %v: \"%v\" by %v (%v)\n", love.Created, love.TrackName, love.ArtistName(), love.RecordingMBID) - importResult.Log(models.Info, msg) + if err != nil { + return importResult, err + } progress <- models.TransferProgress{}.FromImportResult(importResult, false) } From dddd2e4eec7d2f54d16ea59ace358bd2c5d85ebc Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 11:59:35 +0200 Subject: [PATCH 10/22] Implemented lbarchive loves export --- go.mod | 2 +- internal/backends/backends_test.go | 2 +- internal/backends/lbarchive/lbarchive.go | 95 ++++++++++++++- internal/backends/listenbrainz/helper.go | 115 ++++++++++++++++++ .../backends/listenbrainz/listenbrainz.go | 81 +----------- 5 files changed, 210 insertions(+), 85 deletions(-) create mode 100644 internal/backends/listenbrainz/helper.go diff --git a/go.mod b/go.mod index ccdb6cc..c4c2a65 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/manifoldco/promptui v0.9.0 github.com/pelletier/go-toml/v2 v2.2.4 github.com/shkh/lastfm-go v0.0.0-20191215035245-89a801c244e0 + github.com/simonfrey/jsonl v0.0.0-20240904112901-935399b9a740 github.com/spf13/cast v1.8.0 github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.20.1 @@ -53,7 +54,6 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.9.0 // indirect - github.com/simonfrey/jsonl v0.0.0-20240904112901-935399b9a740 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.14.0 // indirect github.com/spf13/pflag v1.0.6 // indirect diff --git a/internal/backends/backends_test.go b/internal/backends/backends_test.go index 737c7e3..026e487 100644 --- a/internal/backends/backends_test.go +++ b/internal/backends/backends_test.go @@ -106,7 +106,7 @@ func TestImplementsInterfaces(t *testing.T) { expectInterface[models.ListensExport](t, &lbarchive.ListenBrainzArchiveBackend{}) // expectInterface[models.ListensImport](t, &lbarchive.ListenBrainzArchiveBackend{}) - // expectInterface[models.LovesExport](t, &lbarchive.ListenBrainzArchiveBackend{}) + expectInterface[models.LovesExport](t, &lbarchive.ListenBrainzArchiveBackend{}) // expectInterface[models.LovesImport](t, &lbarchive.ListenBrainzArchiveBackend{}) expectInterface[models.ListensExport](t, &listenbrainz.ListenBrainzApiBackend{}) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go index 0848d38..cff2a1f 100644 --- a/internal/backends/lbarchive/lbarchive.go +++ b/internal/backends/lbarchive/lbarchive.go @@ -25,17 +25,23 @@ import ( "context" "time" + "go.uploadedlobster.com/musicbrainzws2" lbapi "go.uploadedlobster.com/scotty/internal/backends/listenbrainz" "go.uploadedlobster.com/scotty/internal/config" "go.uploadedlobster.com/scotty/internal/i18n" "go.uploadedlobster.com/scotty/internal/listenbrainz" "go.uploadedlobster.com/scotty/internal/models" + "go.uploadedlobster.com/scotty/internal/version" ) -const batchSize = 2000 +const ( + listensBatchSize = 2000 + lovesBatchSize = 10 +) type ListenBrainzArchiveBackend struct { filePath string + mbClient musicbrainzws2.Client } func (b *ListenBrainzArchiveBackend) Name() string { return "listenbrainz-archive" } @@ -50,6 +56,11 @@ func (b *ListenBrainzArchiveBackend) Options() []models.BackendOption { func (b *ListenBrainzArchiveBackend) InitConfig(config *config.ServiceConfig) error { b.filePath = config.GetString("file-path") + b.mbClient = *musicbrainzws2.NewClient(musicbrainzws2.AppInfo{ + Name: version.AppName, + Version: version.AppVersion, + URL: version.AppURL, + }) return nil } @@ -86,7 +97,7 @@ func (b *ListenBrainzArchiveBackend) ExportListens( return } - listens := make(models.ListensList, 0, batchSize) + listens := make(models.ListensList, 0, listensBatchSize) for rawListen, err := range archive.IterListens(oldestTimestamp) { if err != nil { p.Export.Abort() @@ -108,7 +119,7 @@ func (b *ListenBrainzArchiveBackend) ExportListens( // Allow the importer to start processing the listens by // sending them in batches. - if len(listens) >= batchSize { + if len(listens) >= listensBatchSize { results <- models.ListensResult{Items: listens} progress <- p listens = listens[:0] @@ -119,3 +130,81 @@ func (b *ListenBrainzArchiveBackend) ExportListens( p.Export.Complete() progress <- p } + +func (b *ListenBrainzArchiveBackend) ExportLoves( + ctx context.Context, oldestTimestamp time.Time, + results chan models.LovesResult, progress chan models.TransferProgress) { + startTime := time.Now() + minTime := oldestTimestamp + if minTime.Unix() < 1 { + minTime = time.Unix(1, 0) + } + + totalDuration := startTime.Sub(oldestTimestamp) + p := models.TransferProgress{ + Export: &models.Progress{ + Total: int64(totalDuration.Seconds()), + }, + } + + archive, err := listenbrainz.OpenExportArchive(b.filePath) + if err != nil { + p.Export.Abort() + progress <- p + results <- models.LovesResult{Error: err} + return + } + defer archive.Close() + + userInfo, err := archive.UserInfo() + if err != nil { + p.Export.Abort() + progress <- p + results <- models.LovesResult{Error: err} + return + } + + loves := make(models.LovesList, 0, lovesBatchSize) + for feedback, err := range archive.IterFeedback(oldestTimestamp) { + if err != nil { + p.Export.Abort() + progress <- p + results <- models.LovesResult{Error: err} + return + } + + // The export file does not include track metadata. Try fetching details + // from MusicBrainz. + if feedback.TrackMetadata == nil { + track, err := lbapi.LookupRecording(ctx, &b.mbClient, feedback.RecordingMBID) + if err == nil { + feedback.TrackMetadata = track + } + } + + love := lbapi.AsLove(feedback) + if love.UserName == "" { + love.UserName = userInfo.Name + } + // TODO: The dump does not contain TrackMetadata for feedback. + // We need to look it up in the archive. + loves = append(loves, love) + + // Update the progress + p.Export.TotalItems += 1 + remainingTime := startTime.Sub(love.Created) + p.Export.Elapsed = int64(totalDuration.Seconds() - remainingTime.Seconds()) + + // Allow the importer to start processing the listens by + // sending them in batches. + if len(loves) >= lovesBatchSize { + results <- models.LovesResult{Items: loves} + progress <- p + loves = loves[:0] + } + } + + results <- models.LovesResult{Items: loves} + p.Export.Complete() + progress <- p +} diff --git a/internal/backends/listenbrainz/helper.go b/internal/backends/listenbrainz/helper.go new file mode 100644 index 0000000..f39a2df --- /dev/null +++ b/internal/backends/listenbrainz/helper.go @@ -0,0 +1,115 @@ +/* +Copyright © 2025 Philipp Wolfer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +package listenbrainz + +import ( + "context" + "time" + + "go.uploadedlobster.com/mbtypes" + "go.uploadedlobster.com/musicbrainzws2" + "go.uploadedlobster.com/scotty/internal/listenbrainz" + "go.uploadedlobster.com/scotty/internal/models" +) + +func LookupRecording( + ctx context.Context, + mb *musicbrainzws2.Client, + mbid mbtypes.MBID, +) (*listenbrainz.Track, error) { + filter := musicbrainzws2.IncludesFilter{ + Includes: []string{"artist-credits"}, + } + recording, err := mb.LookupRecording(ctx, mbid, filter) + if err != nil { + return nil, err + } + + artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit)) + for _, artist := range recording.ArtistCredit { + artistMBIDs = append(artistMBIDs, artist.Artist.ID) + } + track := listenbrainz.Track{ + TrackName: recording.Title, + ArtistName: recording.ArtistCredit.String(), + MBIDMapping: &listenbrainz.MBIDMapping{ + // In case of redirects this MBID differs from the looked up MBID + RecordingMBID: recording.ID, + ArtistMBIDs: artistMBIDs, + }, + } + return &track, nil +} + +func AsListen(lbListen listenbrainz.Listen) models.Listen { + listen := models.Listen{ + ListenedAt: time.Unix(lbListen.ListenedAt, 0), + UserName: lbListen.UserName, + Track: AsTrack(lbListen.TrackMetadata), + } + return listen +} + +func AsLove(f listenbrainz.Feedback) models.Love { + recordingMBID := f.RecordingMBID + track := f.TrackMetadata + if track == nil { + track = &listenbrainz.Track{} + } + love := models.Love{ + UserName: f.UserName, + RecordingMBID: recordingMBID, + Created: time.Unix(f.Created, 0), + Track: AsTrack(*track), + } + + if love.Track.RecordingMBID == "" { + love.Track.RecordingMBID = love.RecordingMBID + } + + return love +} + +func AsTrack(t listenbrainz.Track) models.Track { + track := models.Track{ + TrackName: t.TrackName, + ReleaseName: t.ReleaseName, + ArtistNames: []string{t.ArtistName}, + Duration: t.Duration(), + TrackNumber: t.TrackNumber(), + DiscNumber: t.DiscNumber(), + RecordingMBID: t.RecordingMBID(), + ReleaseMBID: t.ReleaseMBID(), + ReleaseGroupMBID: t.ReleaseGroupMBID(), + ISRC: t.ISRC(), + AdditionalInfo: t.AdditionalInfo, + } + + if t.MBIDMapping != nil && len(track.ArtistMBIDs) == 0 { + for _, artistMBID := range t.MBIDMapping.ArtistMBIDs { + track.ArtistMBIDs = append(track.ArtistMBIDs, artistMBID) + } + } + + return track +} diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index 4f0ce2f..8035b22 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -249,7 +249,7 @@ out: // longer available and might have been merged. Try fetching details // from MusicBrainz. if feedback.TrackMetadata == nil { - track, err := b.lookupRecording(ctx, feedback.RecordingMBID) + track, err := LookupRecording(ctx, &b.mbClient, feedback.RecordingMBID) if err == nil { feedback.TrackMetadata = track } @@ -375,82 +375,3 @@ func (b *ListenBrainzApiBackend) checkDuplicateListen(ctx context.Context, liste return false, nil } - -func (b *ListenBrainzApiBackend) lookupRecording( - ctx context.Context, mbid mbtypes.MBID) (*listenbrainz.Track, error) { - filter := musicbrainzws2.IncludesFilter{ - Includes: []string{"artist-credits"}, - } - recording, err := b.mbClient.LookupRecording(ctx, mbid, filter) - if err != nil { - return nil, err - } - - artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit)) - for _, artist := range recording.ArtistCredit { - artistMBIDs = append(artistMBIDs, artist.Artist.ID) - } - track := listenbrainz.Track{ - TrackName: recording.Title, - ArtistName: recording.ArtistCredit.String(), - MBIDMapping: &listenbrainz.MBIDMapping{ - // In case of redirects this MBID differs from the looked up MBID - RecordingMBID: recording.ID, - ArtistMBIDs: artistMBIDs, - }, - } - return &track, nil -} - -func AsListen(lbListen listenbrainz.Listen) models.Listen { - listen := models.Listen{ - ListenedAt: time.Unix(lbListen.ListenedAt, 0), - UserName: lbListen.UserName, - Track: AsTrack(lbListen.TrackMetadata), - } - return listen -} - -func AsLove(f listenbrainz.Feedback) models.Love { - recordingMBID := f.RecordingMBID - track := f.TrackMetadata - if track == nil { - track = &listenbrainz.Track{} - } - love := models.Love{ - UserName: f.UserName, - RecordingMBID: recordingMBID, - Created: time.Unix(f.Created, 0), - Track: AsTrack(*track), - } - - if love.Track.RecordingMBID == "" { - love.Track.RecordingMBID = love.RecordingMBID - } - - return love -} - -func AsTrack(t listenbrainz.Track) models.Track { - track := models.Track{ - TrackName: t.TrackName, - ReleaseName: t.ReleaseName, - ArtistNames: []string{t.ArtistName}, - Duration: t.Duration(), - TrackNumber: t.TrackNumber(), - DiscNumber: t.DiscNumber(), - RecordingMBID: t.RecordingMBID(), - ReleaseMBID: t.ReleaseMBID(), - ReleaseGroupMBID: t.ReleaseGroupMBID(), - ISRC: t.ISRC(), - AdditionalInfo: t.AdditionalInfo, - } - - if t.MBIDMapping != nil && len(track.ArtistMBIDs) == 0 { - for _, artistMBID := range t.MBIDMapping.ArtistMBIDs { - track.ArtistMBIDs = append(track.ArtistMBIDs, artistMBID) - } - } - - return track -} From 7542657925b1f6a253898d541d64c6408639af9d Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 16:46:10 +0200 Subject: [PATCH 11/22] Use LB API to lookup missing metadata for loves This is faster than using the MBID API individually --- internal/backends/lbarchive/lbarchive.go | 46 ++++---- internal/backends/listenbrainz/helper.go | 133 ++++++++++++++++++----- internal/listenbrainz/client.go | 22 ++++ internal/listenbrainz/models.go | 44 +++++++- 4 files changed, 194 insertions(+), 51 deletions(-) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go index cff2a1f..6e2f349 100644 --- a/internal/backends/lbarchive/lbarchive.go +++ b/internal/backends/lbarchive/lbarchive.go @@ -36,11 +36,12 @@ import ( const ( listensBatchSize = 2000 - lovesBatchSize = 10 + lovesBatchSize = listenbrainz.MaxItemsPerGet ) type ListenBrainzArchiveBackend struct { filePath string + lbClient listenbrainz.Client mbClient musicbrainzws2.Client } @@ -56,6 +57,7 @@ func (b *ListenBrainzArchiveBackend) Options() []models.BackendOption { func (b *ListenBrainzArchiveBackend) InitConfig(config *config.ServiceConfig) error { b.filePath = config.GetString("file-path") + b.lbClient = listenbrainz.NewClient("", version.UserAgent()) b.mbClient = *musicbrainzws2.NewClient(musicbrainzws2.AppInfo{ Name: version.AppName, Version: version.AppVersion, @@ -164,7 +166,7 @@ func (b *ListenBrainzArchiveBackend) ExportLoves( return } - loves := make(models.LovesList, 0, lovesBatchSize) + batch := make([]listenbrainz.Feedback, 0, lovesBatchSize) for feedback, err := range archive.IterFeedback(oldestTimestamp) { if err != nil { p.Export.Abort() @@ -173,37 +175,43 @@ func (b *ListenBrainzArchiveBackend) ExportLoves( return } - // The export file does not include track metadata. Try fetching details - // from MusicBrainz. - if feedback.TrackMetadata == nil { - track, err := lbapi.LookupRecording(ctx, &b.mbClient, feedback.RecordingMBID) - if err == nil { - feedback.TrackMetadata = track - } + if feedback.UserName == "" { + feedback.UserName = userInfo.Name } - love := lbapi.AsLove(feedback) - if love.UserName == "" { - love.UserName = userInfo.Name - } - // TODO: The dump does not contain TrackMetadata for feedback. - // We need to look it up in the archive. - loves = append(loves, love) + batch = append(batch, feedback) // Update the progress p.Export.TotalItems += 1 - remainingTime := startTime.Sub(love.Created) + remainingTime := startTime.Sub(time.Unix(feedback.Created, 0)) p.Export.Elapsed = int64(totalDuration.Seconds() - remainingTime.Seconds()) // Allow the importer to start processing the listens by // sending them in batches. - if len(loves) >= lovesBatchSize { + if len(batch) >= lovesBatchSize { + // The dump does not contain track metadata. Extend it with additional + // lookups + loves, err := lbapi.ExtendTrackMetadata(ctx, &b.lbClient, &b.mbClient, &batch) + if err != nil { + p.Export.Abort() + progress <- p + results <- models.LovesResult{Error: err} + return + } + results <- models.LovesResult{Items: loves} progress <- p - loves = loves[:0] + batch = batch[:0] } } + loves, err := lbapi.ExtendTrackMetadata(ctx, &b.lbClient, &b.mbClient, &batch) + if err != nil { + p.Export.Abort() + progress <- p + results <- models.LovesResult{Error: err} + return + } results <- models.LovesResult{Items: loves} p.Export.Complete() progress <- p diff --git a/internal/backends/listenbrainz/helper.go b/internal/backends/listenbrainz/helper.go index f39a2df..d6572d0 100644 --- a/internal/backends/listenbrainz/helper.go +++ b/internal/backends/listenbrainz/helper.go @@ -32,35 +32,6 @@ import ( "go.uploadedlobster.com/scotty/internal/models" ) -func LookupRecording( - ctx context.Context, - mb *musicbrainzws2.Client, - mbid mbtypes.MBID, -) (*listenbrainz.Track, error) { - filter := musicbrainzws2.IncludesFilter{ - Includes: []string{"artist-credits"}, - } - recording, err := mb.LookupRecording(ctx, mbid, filter) - if err != nil { - return nil, err - } - - artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit)) - for _, artist := range recording.ArtistCredit { - artistMBIDs = append(artistMBIDs, artist.Artist.ID) - } - track := listenbrainz.Track{ - TrackName: recording.Title, - ArtistName: recording.ArtistCredit.String(), - MBIDMapping: &listenbrainz.MBIDMapping{ - // In case of redirects this MBID differs from the looked up MBID - RecordingMBID: recording.ID, - ArtistMBIDs: artistMBIDs, - }, - } - return &track, nil -} - func AsListen(lbListen listenbrainz.Listen) models.Listen { listen := models.Listen{ ListenedAt: time.Unix(lbListen.ListenedAt, 0), @@ -113,3 +84,107 @@ func AsTrack(t listenbrainz.Track) models.Track { return track } + +func LookupRecording( + ctx context.Context, + mb *musicbrainzws2.Client, + mbid mbtypes.MBID, +) (*listenbrainz.Track, error) { + filter := musicbrainzws2.IncludesFilter{ + Includes: []string{"artist-credits"}, + } + recording, err := mb.LookupRecording(ctx, mbid, filter) + if err != nil { + return nil, err + } + + artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit)) + for _, artist := range recording.ArtistCredit { + artistMBIDs = append(artistMBIDs, artist.Artist.ID) + } + track := listenbrainz.Track{ + TrackName: recording.Title, + ArtistName: recording.ArtistCredit.String(), + MBIDMapping: &listenbrainz.MBIDMapping{ + // In case of redirects this MBID differs from the looked up MBID + RecordingMBID: recording.ID, + ArtistMBIDs: artistMBIDs, + }, + } + return &track, nil +} + +func ExtendTrackMetadata( + ctx context.Context, + lb *listenbrainz.Client, + mb *musicbrainzws2.Client, + feedbacks *[]listenbrainz.Feedback, +) ([]models.Love, error) { + mbids := make([]mbtypes.MBID, 0, len(*feedbacks)) + for _, feedback := range *feedbacks { + if feedback.TrackMetadata == nil && feedback.RecordingMBID != "" { + mbids = append(mbids, feedback.RecordingMBID) + } + } + result, err := lb.MetadataRecordings(ctx, mbids) + if err != nil { + return nil, err + } + + loves := make([]models.Love, 0, len(*feedbacks)) + for _, feedback := range *feedbacks { + if feedback.TrackMetadata == nil && feedback.RecordingMBID != "" { + metadata, ok := result[feedback.RecordingMBID] + if ok { + feedback.TrackMetadata = trackFromMetadataLookup( + feedback.RecordingMBID, metadata) + } else { + // MBID not in result. This is probably a MBID redirect, get + // data from MB instead (slower). + // If this also fails, just leave the metadata empty. + track, err := LookupRecording(ctx, mb, feedback.RecordingMBID) + if err == nil { + feedback.TrackMetadata = track + } + } + } + + loves = append(loves, AsLove(feedback)) + } + + return loves, nil +} + +func trackFromMetadataLookup( + recordingMBID mbtypes.MBID, + metadata listenbrainz.RecordingMetadata, +) *listenbrainz.Track { + artistMBIDs := make([]mbtypes.MBID, 0, len(metadata.Artist.Artists)) + artists := make([]listenbrainz.Artist, 0, len(metadata.Artist.Artists)) + for _, artist := range metadata.Artist.Artists { + artistMBIDs = append(artistMBIDs, artist.ArtistMBID) + artists = append(artists, listenbrainz.Artist{ + ArtistCreditName: artist.Name, + ArtistMBID: artist.ArtistMBID, + JoinPhrase: artist.JoinPhrase, + }) + } + + return &listenbrainz.Track{ + TrackName: metadata.Recording.Name, + ArtistName: metadata.Artist.Name, + ReleaseName: metadata.Release.Name, + AdditionalInfo: map[string]any{ + "duration_ms": metadata.Recording.Length, + "release_group_mbid": metadata.Release.ReleaseGroupMBID, + }, + MBIDMapping: &listenbrainz.MBIDMapping{ + RecordingMBID: recordingMBID, + ReleaseMBID: metadata.Release.MBID, + ArtistMBIDs: artistMBIDs, + Artists: artists, + CAAID: metadata.Release.CAAID, + CAAReleaseMBID: metadata.Release.CAAReleaseMBID, + }, + } +} diff --git a/internal/listenbrainz/client.go b/internal/listenbrainz/client.go index 957a946..270bf4b 100644 --- a/internal/listenbrainz/client.go +++ b/internal/listenbrainz/client.go @@ -28,6 +28,7 @@ import ( "time" "github.com/go-resty/resty/v2" + "go.uploadedlobster.com/mbtypes" "go.uploadedlobster.com/scotty/pkg/ratelimit" ) @@ -158,3 +159,24 @@ func (c Client) Lookup(ctx context.Context, recordingName string, artistName str } return } + +func (c Client) MetadataRecordings(ctx context.Context, mbids []mbtypes.MBID) (result RecordingMetadataResult, err error) { + const path = "/metadata/recording/" + errorResult := ErrorResult{} + body := RecordingMetadataRequest{ + RecordingMBIDs: mbids, + Includes: "artist release", + } + response, err := c.HTTPClient.R(). + SetContext(ctx). + SetBody(body). + SetResult(&result). + SetError(&errorResult). + Post(path) + + if !response.IsSuccess() { + err = errors.New(errorResult.Error) + return + } + return +} diff --git a/internal/listenbrainz/models.go b/internal/listenbrainz/models.go index 0b5f439..5e0d0e1 100644 --- a/internal/listenbrainz/models.go +++ b/internal/listenbrainz/models.go @@ -82,9 +82,9 @@ type MBIDMapping struct { } type Artist struct { - ArtistCreditName string `json:"artist_credit_name,omitempty"` - ArtistMBID string `json:"artist_mbid,omitempty"` - JoinPhrase string `json:"join_phrase,omitempty"` + ArtistCreditName string `json:"artist_credit_name,omitempty"` + ArtistMBID mbtypes.MBID `json:"artist_mbid,omitempty"` + JoinPhrase string `json:"join_phrase,omitempty"` } type GetFeedbackResult struct { @@ -112,6 +112,44 @@ type LookupResult struct { ArtistMBIDs []mbtypes.MBID `json:"artist_mbids"` } +type RecordingMetadataRequest struct { + RecordingMBIDs []mbtypes.MBID `json:"recording_mbids"` + Includes string `json:"inc,omitempty"` +} + +// Result for a recording metadata lookup +type RecordingMetadataResult map[mbtypes.MBID]RecordingMetadata + +type RecordingMetadata struct { + Artist struct { + Name string `json:"name"` + ArtistCreditID int `json:"artist_credit_id"` + Artists []struct { + Name string `json:"name"` + Area string `json:"area"` + ArtistMBID mbtypes.MBID `json:"artist_mbid"` + JoinPhrase string `json:"join_phrase"` + BeginYear int `json:"begin_year"` + Type string `json:"type"` + // todo rels + } `json:"artists"` + } `json:"artist"` + Recording struct { + Name string `json:"name"` + Length int `json:"length"` + // TODO rels + } `json:"recording"` + Release struct { + Name string `json:"name"` + AlbumArtistName string `json:"album_artist_name"` + Year int `json:"year"` + MBID mbtypes.MBID `json:"mbid"` + ReleaseGroupMBID mbtypes.MBID `json:"release_group_mbid"` + CAAID int `json:"caa_id"` + CAAReleaseMBID mbtypes.MBID `json:"caa_release_mbid"` + } `json:"release"` +} + type StatusResult struct { Status string `json:"status"` } From 4ad89d287d2f241641b6ef06bd281205afaaac50 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 16:47:13 +0200 Subject: [PATCH 12/22] Rework ratelimit code Simplify variables and avoid potential error if retry header reading fails --- pkg/ratelimit/httpheader.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pkg/ratelimit/httpheader.go b/pkg/ratelimit/httpheader.go index dba5e30..617c3b8 100644 --- a/pkg/ratelimit/httpheader.go +++ b/pkg/ratelimit/httpheader.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Philipp Wolfer +Copyright © 2023-2025 Philipp Wolfer Scotty is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,9 +25,9 @@ import ( ) const ( - RetryCount = 5 - DefaultRateLimitWaitSeconds = 5 - MaxWaitTimeSeconds = 60 + RetryCount = 5 + DefaultRateLimitWait = 5 * time.Second + MaxWaitTime = 60 * time.Second ) // Implements rate HTTP header based limiting for resty. @@ -47,16 +47,15 @@ func EnableHTTPHeaderRateLimit(client *resty.Client, resetInHeader string) { return code == http.StatusTooManyRequests || code >= http.StatusInternalServerError }, ) - client.SetRetryMaxWaitTime(time.Duration(MaxWaitTimeSeconds * time.Second)) + client.SetRetryMaxWaitTime(MaxWaitTime) client.SetRetryAfter(func(client *resty.Client, resp *resty.Response) (time.Duration, error) { - var err error - var retryAfter int = DefaultRateLimitWaitSeconds + retryAfter := DefaultRateLimitWait if resp.StatusCode() == http.StatusTooManyRequests { - retryAfter, err = strconv.Atoi(resp.Header().Get(resetInHeader)) - if err != nil { - retryAfter = DefaultRateLimitWaitSeconds + retryAfterHeader, err := strconv.Atoi(resp.Header().Get(resetInHeader)) + if err == nil { + retryAfter = time.Duration(retryAfterHeader) * time.Second } } - return time.Duration(retryAfter * int(time.Second)), err + return retryAfter, nil }) } From f70b6248b6e733ecaef35b6fe89324ab774e33c9 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 16:48:38 +0200 Subject: [PATCH 13/22] Update musicbrainzws2 to fix rate limit issues --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index c4c2a65..c5c3511 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/supersonic-app/go-subsonic v0.0.0-20241224013245-9b2841f3711d github.com/vbauerster/mpb/v8 v8.10.1 go.uploadedlobster.com/mbtypes v0.4.0 - go.uploadedlobster.com/musicbrainzws2 v0.15.0 + go.uploadedlobster.com/musicbrainzws2 v0.15.1-0.20250524094913-01f007ad1064 golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 golang.org/x/oauth2 v0.30.0 golang.org/x/text v0.25.0 diff --git a/go.sum b/go.sum index 028515c..6d34a6d 100644 --- a/go.sum +++ b/go.sum @@ -136,6 +136,8 @@ go.uploadedlobster.com/mbtypes v0.4.0 h1:D5asCgHsRWufj4Yn5u0IuH2J9z1UuYImYkYIp1Z go.uploadedlobster.com/mbtypes v0.4.0/go.mod h1:Bu1K1Hl77QTAE2Z7QKiW/JAp9KqYWQebkRRfG02dlZM= go.uploadedlobster.com/musicbrainzws2 v0.15.0 h1:njJeyf1dDwfz2toEHaZSuockVsn1fg+967/tVfLHhwQ= go.uploadedlobster.com/musicbrainzws2 v0.15.0/go.mod h1:T6sYE7ZHRH3mJWT3g9jdSUPKJLZubnBjKyjMPNdkgao= +go.uploadedlobster.com/musicbrainzws2 v0.15.1-0.20250524094913-01f007ad1064 h1:bir8kas9u0A+T54sfzj3il7SUAV5KQtb5QzDtwvslxI= +go.uploadedlobster.com/musicbrainzws2 v0.15.1-0.20250524094913-01f007ad1064/go.mod h1:T6sYE7ZHRH3mJWT3g9jdSUPKJLZubnBjKyjMPNdkgao= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= From ef6780701ad506aad79cea397ed6d758ab1c033a Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 17:08:15 +0200 Subject: [PATCH 14/22] Use ExtendTrackMetadata also for LB API loves export --- .../backends/listenbrainz/listenbrainz.go | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index 8035b22..9e1c9f3 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -32,6 +32,8 @@ import ( "go.uploadedlobster.com/scotty/internal/version" ) +const lovesBatchSize = listenbrainz.MaxItemsPerGet + type ListenBrainzApiBackend struct { client listenbrainz.Client mbClient musicbrainzws2.Client @@ -229,7 +231,8 @@ func (b *ListenBrainzApiBackend) ExportLoves(ctx context.Context, oldestTimestam func (b *ListenBrainzApiBackend) exportLoves(ctx context.Context, oldestTimestamp time.Time, results chan models.LovesResult) { offset := 0 defer close(results) - loves := make(models.LovesList, 0, 2*listenbrainz.MaxItemsPerGet) + allLoves := make(models.LovesList, 0, 2*listenbrainz.MaxItemsPerGet) + batch := make([]listenbrainz.Feedback, 0, lovesBatchSize) out: for { @@ -245,31 +248,45 @@ out: } for _, feedback := range result.Feedback { - // Missing track metadata indicates that the recording MBID is no - // longer available and might have been merged. Try fetching details - // from MusicBrainz. - if feedback.TrackMetadata == nil { - track, err := LookupRecording(ctx, &b.mbClient, feedback.RecordingMBID) - if err == nil { - feedback.TrackMetadata = track - } - } - - love := AsLove(feedback) - if love.Created.After(oldestTimestamp) { - loves = append(loves, love) + if time.Unix(feedback.Created, 0).After(oldestTimestamp) { + batch = append(batch, feedback) } else { break out } + + if len(batch) >= lovesBatchSize { + // Missing track metadata indicates that the recording MBID is no + // longer available and might have been merged. Try fetching details + // from MusicBrainz. + lovesBatch, err := ExtendTrackMetadata(ctx, &b.client, &b.mbClient, &batch) + if err != nil { + results <- models.LovesResult{Error: err} + return + } + + for _, l := range lovesBatch { + allLoves = append(allLoves, l) + } + } } offset += listenbrainz.MaxItemsPerGet } - sort.Sort(loves) + lovesBatch, err := ExtendTrackMetadata(ctx, &b.client, &b.mbClient, &batch) + if err != nil { + results <- models.LovesResult{Error: err} + return + } + + for _, l := range lovesBatch { + allLoves = append(allLoves, l) + } + + sort.Sort(allLoves) results <- models.LovesResult{ - Total: len(loves), - Items: loves, + Total: len(allLoves), + Items: allLoves, } } From 7fb77da135d74577f76c4e1045988197e6f55f52 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 17:35:19 +0200 Subject: [PATCH 15/22] Allow reading Spotify history directly from ZIP file --- config.example.toml | 8 +- internal/backends/spotifyhistory/archive.go | 82 +++++++++++++++++++ .../backends/spotifyhistory/spotifyhistory.go | 51 +++++------- 3 files changed, 108 insertions(+), 33 deletions(-) create mode 100644 internal/backends/spotifyhistory/archive.go diff --git a/config.example.toml b/config.example.toml index ecbba9b..28c37ad 100644 --- a/config.example.toml +++ b/config.example.toml @@ -105,9 +105,11 @@ client-secret = "" [service.spotify-history] # Read listens from a Spotify extended history export backend = "spotify-history" -# Directory where the extended history JSON files are located. The files must -# follow the naming scheme "Streaming_History_Audio_*.json". -dir-path = "./my_spotify_data_extended/Spotify Extended Streaming History" +# Path to the Spotify extended history archive. This can either point directly +# to the "my_spotify_data_extended.zip" ZIP file provided by Spotify or a +# directory where this file has been extracted to. The history files are +# expected to follow the naming pattern "Streaming_History_Audio_*.json". +archive-path = "./my_spotify_data_extended.zip" # If true (default), ignore listens from a Spotify "private session". ignore-incognito = true # If true, ignore listens marked as skipped. Default is false. diff --git a/internal/backends/spotifyhistory/archive.go b/internal/backends/spotifyhistory/archive.go new file mode 100644 index 0000000..1d596bd --- /dev/null +++ b/internal/backends/spotifyhistory/archive.go @@ -0,0 +1,82 @@ +/* +Copyright © 2025 Philipp Wolfer + +This file is part of Scotty. + +Scotty is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later version. + +Scotty is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +Scotty. If not, see . +*/ + +package spotifyhistory + +import ( + "errors" + "sort" + + "go.uploadedlobster.com/scotty/internal/archive" +) + +var historyFileGlobs = []string{ + "Spotify Extended Streaming History/Streaming_History_Audio_*.json", + "Streaming_History_Audio_*.json", +} + +// Access a Spotify history archive. +// This can be either the ZIP file as provided by Spotify +// or a directory where this was extracted to. +type HistoryArchive struct { + backend archive.Archive +} + +// Open a Spotify history archive from file path. +func OpenHistoryArchive(path string) (*HistoryArchive, error) { + backend, err := archive.OpenArchive(path) + if err != nil { + return nil, err + } + + return &HistoryArchive{backend: backend}, nil +} + +func (h *HistoryArchive) GetHistoryFiles() ([]archive.FileInfo, error) { + for _, glob := range historyFileGlobs { + files, err := h.backend.Glob(glob) + if err != nil { + return nil, err + } + + if len(files) > 0 { + sort.Slice(files, func(i, j int) bool { + return files[i].Name < files[j].Name + }) + return files, nil + } + } + + // Found no files, fail + return nil, errors.New("found no history files in archive") +} + +func readHistoryFile(f archive.OpenableFile) (StreamingHistory, error) { + file, err := f.Open() + if err != nil { + return nil, err + } + + defer file.Close() + history := StreamingHistory{} + err = history.Read(file) + if err != nil { + return nil, err + } + + return history, nil +} diff --git a/internal/backends/spotifyhistory/spotifyhistory.go b/internal/backends/spotifyhistory/spotifyhistory.go index ce470ff..90ee8ff 100644 --- a/internal/backends/spotifyhistory/spotifyhistory.go +++ b/internal/backends/spotifyhistory/spotifyhistory.go @@ -19,9 +19,6 @@ package spotifyhistory import ( "context" - "os" - "path/filepath" - "slices" "sort" "time" @@ -30,10 +27,8 @@ import ( "go.uploadedlobster.com/scotty/internal/models" ) -const historyFileGlob = "Streaming_History_Audio_*.json" - type SpotifyHistoryBackend struct { - dirPath string + archivePath string ignoreIncognito bool ignoreSkipped bool skippedMinSeconds int @@ -43,9 +38,10 @@ func (b *SpotifyHistoryBackend) Name() string { return "spotify-history" } func (b *SpotifyHistoryBackend) Options() []models.BackendOption { return []models.BackendOption{{ - Name: "dir-path", - Label: i18n.Tr("Directory path"), - Type: models.String, + Name: "archive-path", + Label: i18n.Tr("Archive path"), + Type: models.String, + Default: "./my_spotify_data_extended.zip", }, { Name: "ignore-incognito", Label: i18n.Tr("Ignore listens in incognito mode"), @@ -65,7 +61,11 @@ func (b *SpotifyHistoryBackend) Options() []models.BackendOption { } func (b *SpotifyHistoryBackend) InitConfig(config *config.ServiceConfig) error { - b.dirPath = config.GetString("dir-path") + b.archivePath = config.GetString("archive-path") + // Backward compatibility + if b.archivePath == "" { + b.archivePath = config.GetString("dir-path") + } b.ignoreIncognito = config.GetBool("ignore-incognito", true) b.ignoreSkipped = config.GetBool("ignore-skipped", false) b.skippedMinSeconds = config.GetInt("ignore-min-duration-seconds", 30) @@ -73,11 +73,19 @@ func (b *SpotifyHistoryBackend) InitConfig(config *config.ServiceConfig) error { } func (b *SpotifyHistoryBackend) ExportListens(ctx context.Context, oldestTimestamp time.Time, results chan models.ListensResult, progress chan models.TransferProgress) { - files, err := filepath.Glob(filepath.Join(b.dirPath, historyFileGlob)) p := models.TransferProgress{ Export: &models.Progress{}, } + archive, err := OpenHistoryArchive(b.archivePath) + if err != nil { + p.Export.Abort() + progress <- p + results <- models.ListensResult{Error: err} + return + } + + files, err := archive.GetHistoryFiles() if err != nil { p.Export.Abort() progress <- p @@ -85,10 +93,9 @@ func (b *SpotifyHistoryBackend) ExportListens(ctx context.Context, oldestTimesta return } - slices.Sort(files) fileCount := int64(len(files)) p.Export.Total = fileCount - for i, filePath := range files { + for i, f := range files { if err := ctx.Err(); err != nil { results <- models.ListensResult{Error: err} p.Export.Abort() @@ -96,7 +103,7 @@ func (b *SpotifyHistoryBackend) ExportListens(ctx context.Context, oldestTimesta return } - history, err := readHistoryFile(filePath) + history, err := readHistoryFile(f.File) if err != nil { results <- models.ListensResult{Error: err} p.Export.Abort() @@ -118,19 +125,3 @@ func (b *SpotifyHistoryBackend) ExportListens(ctx context.Context, oldestTimesta p.Export.Complete() progress <- p } - -func readHistoryFile(filePath string) (StreamingHistory, error) { - file, err := os.Open(filePath) - if err != nil { - return nil, err - } - - defer file.Close() - history := StreamingHistory{} - err = history.Read(file) - if err != nil { - return nil, err - } - - return history, nil -} From 1ef498943b6a1db48e60ef250b4b567a6613bc33 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 17:38:19 +0200 Subject: [PATCH 16/22] Renamed parameter for lbarchive also to "archive-file" --- config.example.toml | 6 ++++-- internal/backends/lbarchive/lbarchive.go | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/config.example.toml b/config.example.toml index 28c37ad..d01a51a 100644 --- a/config.example.toml +++ b/config.example.toml @@ -23,8 +23,10 @@ check-duplicate-listens = false # This backend supports listens from a ListenBrainz export archive # (https://listenbrainz.org/settings/export/). backend = "listenbrainz-archive" -# The file path to the ListenBrainz export archive. -file-path = "./listenbrainz_outsidecontext.zip" +# The file path to the ListenBrainz export archive. The path can either point +# to the ZIP file as downloaded from ListenBrainz or a directory were the +# ZIP was extracted to. +archive-path = "./listenbrainz_outsidecontext.zip" [service.maloja] # Maloja is a self hosted listening service (https://github.com/krateng/maloja) diff --git a/internal/backends/lbarchive/lbarchive.go b/internal/backends/lbarchive/lbarchive.go index 6e2f349..a91c0a5 100644 --- a/internal/backends/lbarchive/lbarchive.go +++ b/internal/backends/lbarchive/lbarchive.go @@ -49,14 +49,14 @@ func (b *ListenBrainzArchiveBackend) Name() string { return "listenbrainz-archiv func (b *ListenBrainzArchiveBackend) Options() []models.BackendOption { return []models.BackendOption{{ - Name: "file-path", - Label: i18n.Tr("Export ZIP file path"), + Name: "archive-path", + Label: i18n.Tr("Archive path"), Type: models.String, }} } func (b *ListenBrainzArchiveBackend) InitConfig(config *config.ServiceConfig) error { - b.filePath = config.GetString("file-path") + b.filePath = config.GetString("archive-path") b.lbClient = listenbrainz.NewClient("", version.UserAgent()) b.mbClient = *musicbrainzws2.NewClient(musicbrainzws2.AppInfo{ Name: version.AppName, From 93767df5679f39063114616958381c3ed1760409 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 17:54:24 +0200 Subject: [PATCH 17/22] Allow editing config option after renaming --- internal/backends/spotifyhistory/spotifyhistory.go | 9 +++++---- internal/cli/services.go | 6 ++++++ internal/models/options.go | 11 ++++++----- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/internal/backends/spotifyhistory/spotifyhistory.go b/internal/backends/spotifyhistory/spotifyhistory.go index 90ee8ff..5f67604 100644 --- a/internal/backends/spotifyhistory/spotifyhistory.go +++ b/internal/backends/spotifyhistory/spotifyhistory.go @@ -38,10 +38,11 @@ func (b *SpotifyHistoryBackend) Name() string { return "spotify-history" } func (b *SpotifyHistoryBackend) Options() []models.BackendOption { return []models.BackendOption{{ - Name: "archive-path", - Label: i18n.Tr("Archive path"), - Type: models.String, - Default: "./my_spotify_data_extended.zip", + Name: "archive-path", + Label: i18n.Tr("Archive path"), + Type: models.String, + Default: "./my_spotify_data_extended.zip", + MigrateFrom: "dir-path", }, { Name: "ignore-incognito", Label: i18n.Tr("Ignore listens in incognito mode"), diff --git a/internal/cli/services.go b/internal/cli/services.go index df27833..65e4337 100644 --- a/internal/cli/services.go +++ b/internal/cli/services.go @@ -83,6 +83,12 @@ func PromptExtraOptions(config config.ServiceConfig) (config.ServiceConfig, erro current, exists := config.ConfigValues[opt.Name] if exists { opt.Default = fmt.Sprintf("%v", current) + } else if opt.MigrateFrom != "" { + // If there is an old value to migrate from, try that + fallback, exists := config.ConfigValues[opt.MigrateFrom] + if exists { + opt.Default = fmt.Sprintf("%v", fallback) + } } val, err := Prompt(opt) diff --git a/internal/models/options.go b/internal/models/options.go index ffa3ae6..0e09dd7 100644 --- a/internal/models/options.go +++ b/internal/models/options.go @@ -25,9 +25,10 @@ const ( ) type BackendOption struct { - Name string - Label string - Type OptionType - Default string - Validate func(string) error + Name string + Label string + Type OptionType + Default string + Validate func(string) error + MigrateFrom string } From c29b2e20cd0d30b361267db98c92db2fc3c71e7b Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 18:22:42 +0200 Subject: [PATCH 18/22] deezer: fixed endless export loop if user's listen history is empty --- internal/backends/deezer/deezer.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/backends/deezer/deezer.go b/internal/backends/deezer/deezer.go index c38f4e7..a6eaec2 100644 --- a/internal/backends/deezer/deezer.go +++ b/internal/backends/deezer/deezer.go @@ -105,6 +105,11 @@ out: return } + // No result, break immediately + if result.Total == 0 { + break out + } + // The offset was higher then the actual number of tracks. Adjust the offset // and continue. if offset >= result.Total { From b18a6c210427a97d93a0aab2bf3b8c4710e7398d Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 18:30:26 +0200 Subject: [PATCH 19/22] Update changelog and README Clarify that some services are not suited for full listen history export --- CHANGES.md | 17 +++++++++++++++++ README.md | 8 +++++++- config.example.toml | 6 +++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 486d0ff..5ccf6d0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,22 @@ # Scotty Changelog +## 0.7.0 - WIP +- listenbrainz-archive: new backend to load listens and loves from a + ListenBrainz export. The data can be read from the downloaded ZIP archive + or a directory where the contents of the archive have been extracted to. +- listenbrainz: faster loading of missing loves metadata using the ListenBrainz + API instead of MusicBrainz. Fallback to slower MusicBrainz query, if + ListenBrainz does not provide the data. +- spotify-history: it is now possible to specify the path directly to the + `my_spotify_data_extended.zip` ZIP file as downloaded from Spotify. +- spotify-history: the parameter to the export archive path has been renamed to + `archive-path`. For backward compatibility the old `dir-path` parameter is + still read. +- deezer: fixed endless export loop if the user's listen history was empty. +- dump: it is now possible to specify a file to write the text output to. +- Fixed potential issues with MusicBrainz rate limiting. + + ## 0.6.0 - 2025-05-23 - Fully reworked progress report - Cancel both export and import on error diff --git a/README.md b/README.md index 6f997ed..b10a030 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ funkwhale | ✓ | ⨯ | ✓ | - jspf | ✓ | ✓ | ✓ | ✓ lastfm | ✓ | ✓ | ✓ | ✓ listenbrainz | ✓ | ✓ | ✓ | ✓ -listenbrainz-archive | ✓ | - | - | - +listenbrainz-archive | ✓ | - | ✓ | - maloja | ✓ | ✓ | ⨯ | ⨯ scrobbler-log | ✓ | ✓ | ⨯ | ⨯ spotify | ✓ | ⨯ | ✓ | - @@ -135,6 +135,12 @@ subsonic | ⨯ | ⨯ | ✓ | - See the comments in [config.example.toml](./config.example.toml) for a description of each backend's available configuration options. +**NOTE:** Some services, e.g. the Spotify and Deezer API, do not provide access +to the user's full listening history. Hence the API integrations are not suited +to do a full history export. They can however be well used for continuously +transfer recent listens to other services when running scotty frequently, e.g. +as a cron job. + ## Contribute The source code for Scotty is available on [SourceHut](https://sr.ht/~phw/scotty/). To report issues or feature requests please [create a ticket](https://todo.sr.ht/~phw/scotty). diff --git a/config.example.toml b/config.example.toml index d01a51a..3acdf88 100644 --- a/config.example.toml +++ b/config.example.toml @@ -106,6 +106,8 @@ client-secret = "" [service.spotify-history] # Read listens from a Spotify extended history export +# NOTE: The Spotify API does not allow access to the full listen history, +# but only to recent listens. backend = "spotify-history" # Path to the Spotify extended history archive. This can either point directly # to the "my_spotify_data_extended.zip" ZIP file provided by Spotify or a @@ -122,7 +124,9 @@ ignore-skipped = false ignore-min-duration-seconds = 30 [service.deezer] -# Read listens and loves from a Deezer account +# Read listens and loves from a Deezer account. +# NOTE: The Deezer API does not allow access to the full listen history, +# but only to recent listens. backend = "deezer" # You need to register an application on https://developers.deezer.com/myapps # and set the client ID and client secret below. From b1b0df7763f00323c5740fe691084ec623edbcb5 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 18:52:15 +0200 Subject: [PATCH 20/22] listenbrainz: fixed timestamp update with duplicates --- CHANGES.md | 2 ++ internal/backends/listenbrainz/listenbrainz.go | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 5ccf6d0..40d2f73 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,8 @@ - listenbrainz: faster loading of missing loves metadata using the ListenBrainz API instead of MusicBrainz. Fallback to slower MusicBrainz query, if ListenBrainz does not provide the data. +- listenbrainz: fixed issue were timestamp was not updated properly if + duplicate listens where detected during import. - spotify-history: it is now possible to specify the path directly to the `my_spotify_data_extended.zip` ZIP file as downloaded from Spotify. - spotify-history: the parameter to the export archive path has been renamed to diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index 9e1c9f3..dcc28fa 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -165,6 +165,7 @@ func (b *ListenBrainzApiBackend) ImportListens(ctx context.Context, export model msg := i18n.Tr("Ignored duplicate listen %v: \"%v\" by %v (%v)", l.ListenedAt, l.TrackName, l.ArtistName(), l.RecordingMBID) importResult.Log(models.Info, msg) + importResult.UpdateTimestamp(l.ListenedAt) continue } } From 312d9860cf84f075f6bf5a6392fc2223a7fb3fd3 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 20:43:02 +0200 Subject: [PATCH 21/22] Fixed import log output duplicating --- CHANGES.md | 1 + internal/backends/import.go | 5 +++-- internal/models/models.go | 20 +++++++++++++++----- internal/models/models_test.go | 22 ++++++++++++++++++++-- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 40d2f73..2257aaa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,7 @@ - deezer: fixed endless export loop if the user's listen history was empty. - dump: it is now possible to specify a file to write the text output to. - Fixed potential issues with MusicBrainz rate limiting. +- Fixed import log output duplicating. ## 0.6.0 - 2025-05-23 diff --git a/internal/backends/import.go b/internal/backends/import.go index e7a6add..97912dd 100644 --- a/internal/backends/import.go +++ b/internal/backends/import.go @@ -112,8 +112,9 @@ func process[R models.LovesResult | models.ListensResult, P ImportProcessor[R]]( return } - importResult, err := processor.Import(ctx, exportResult, result, out, progress) - result.Update(importResult) + importResult, err := processor.Import( + ctx, exportResult, result.Copy(), out, progress) + result.Update(&importResult) if err != nil { processor.ImportBackend().FinishImport() out <- handleError(result, err, progress) diff --git a/internal/models/models.go b/internal/models/models.go index 78d9965..a93a043 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -196,11 +196,21 @@ func (i *ImportResult) UpdateTimestamp(newTime time.Time) { } } -func (i *ImportResult) Update(from ImportResult) { - i.TotalCount = from.TotalCount - i.ImportCount = from.ImportCount - i.UpdateTimestamp(from.LastTimestamp) - i.ImportLog = append(i.ImportLog, from.ImportLog...) +func (i *ImportResult) Update(from *ImportResult) { + if i != from { + i.TotalCount = from.TotalCount + i.ImportCount = from.ImportCount + i.UpdateTimestamp(from.LastTimestamp) + i.ImportLog = append(i.ImportLog, from.ImportLog...) + } +} + +func (i *ImportResult) Copy() ImportResult { + return ImportResult{ + TotalCount: i.TotalCount, + ImportCount: i.ImportCount, + LastTimestamp: i.LastTimestamp, + } } func (i *ImportResult) Log(t LogEntryType, msg string) { diff --git a/internal/models/models_test.go b/internal/models/models_test.go index 5395610..47ef86f 100644 --- a/internal/models/models_test.go +++ b/internal/models/models_test.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Philipp Wolfer +Copyright © 2023-2025 Philipp Wolfer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -138,13 +138,31 @@ func TestImportResultUpdate(t *testing.T) { LastTimestamp: time.Now().Add(1 * time.Hour), ImportLog: []models.LogEntry{logEntry2}, } - result.Update(newResult) + result.Update(&newResult) assert.Equal(t, 120, result.TotalCount) assert.Equal(t, 50, result.ImportCount) assert.Equal(t, newResult.LastTimestamp, result.LastTimestamp) assert.Equal(t, []models.LogEntry{logEntry1, logEntry2}, result.ImportLog) } +func TestImportResultCopy(t *testing.T) { + logEntry := models.LogEntry{ + Type: models.Warning, + Message: "foo", + } + result := models.ImportResult{ + TotalCount: 100, + ImportCount: 20, + LastTimestamp: time.Now(), + ImportLog: []models.LogEntry{logEntry}, + } + copy := result.Copy() + assert.Equal(t, result.TotalCount, copy.TotalCount) + assert.Equal(t, result.ImportCount, copy.ImportCount) + assert.Equal(t, result.LastTimestamp, copy.LastTimestamp) + assert.Empty(t, copy.ImportLog) +} + func TestImportResultLog(t *testing.T) { result := models.ImportResult{} result.Log(models.Warning, "foo") From 4da569743555301b18ffc993f5bcfffb4d625f98 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sat, 24 May 2025 20:54:20 +0200 Subject: [PATCH 22/22] If dump does no write to file, output the result as log --- internal/backends/dump/dump.go | 7 +++++-- internal/backends/import.go | 6 +++--- internal/backends/jspf/jspf.go | 2 +- internal/backends/lastfm/lastfm.go | 6 ++++-- internal/backends/listenbrainz/listenbrainz.go | 6 ++++-- internal/backends/maloja/maloja.go | 6 ++++-- internal/backends/scrobblerlog/scrobblerlog.go | 2 +- internal/cli/transfer.go | 6 +++++- internal/models/interfaces.go | 2 +- internal/models/models.go | 1 + 10 files changed, 29 insertions(+), 15 deletions(-) diff --git a/internal/backends/dump/dump.go b/internal/backends/dump/dump.go index 4714bd6..8d7c641 100644 --- a/internal/backends/dump/dump.go +++ b/internal/backends/dump/dump.go @@ -73,14 +73,17 @@ func (b *DumpBackend) InitConfig(config *config.ServiceConfig) error { func (b *DumpBackend) StartImport() error { return nil } -func (b *DumpBackend) FinishImport() error { +func (b *DumpBackend) FinishImport(result *models.ImportResult) error { if b.print { out := new(strings.Builder) _, err := io.Copy(out, b.buffer) if err != nil { return err } - fmt.Println(out.String()) + + if result != nil { + result.Log(models.Output, out.String()) + } } // Close the io writer if it is closable diff --git a/internal/backends/import.go b/internal/backends/import.go index 97912dd..ae6da92 100644 --- a/internal/backends/import.go +++ b/internal/backends/import.go @@ -107,7 +107,7 @@ func process[R models.LovesResult | models.ListensResult, P ImportProcessor[R]]( for exportResult := range results { if err := ctx.Err(); err != nil { - processor.ImportBackend().FinishImport() + processor.ImportBackend().FinishImport(&result) out <- handleError(result, err, progress) return } @@ -116,14 +116,14 @@ func process[R models.LovesResult | models.ListensResult, P ImportProcessor[R]]( ctx, exportResult, result.Copy(), out, progress) result.Update(&importResult) if err != nil { - processor.ImportBackend().FinishImport() + processor.ImportBackend().FinishImport(&result) out <- handleError(result, err, progress) return } progress <- p.FromImportResult(result, false) } - if err := processor.ImportBackend().FinishImport(); err != nil { + if err := processor.ImportBackend().FinishImport(&result); err != nil { out <- handleError(result, err, progress) return } diff --git a/internal/backends/jspf/jspf.go b/internal/backends/jspf/jspf.go index e2bcde1..887fd72 100644 --- a/internal/backends/jspf/jspf.go +++ b/internal/backends/jspf/jspf.go @@ -90,7 +90,7 @@ func (b *JSPFBackend) StartImport() error { return b.readJSPF() } -func (b *JSPFBackend) FinishImport() error { +func (b *JSPFBackend) FinishImport(result *models.ImportResult) error { return b.writeJSPF() } diff --git a/internal/backends/lastfm/lastfm.go b/internal/backends/lastfm/lastfm.go index b34452e..186a631 100644 --- a/internal/backends/lastfm/lastfm.go +++ b/internal/backends/lastfm/lastfm.go @@ -70,8 +70,10 @@ func (b *LastfmApiBackend) InitConfig(config *config.ServiceConfig) error { return nil } -func (b *LastfmApiBackend) StartImport() error { return nil } -func (b *LastfmApiBackend) FinishImport() error { return nil } +func (b *LastfmApiBackend) StartImport() error { return nil } +func (b *LastfmApiBackend) FinishImport(result *models.ImportResult) error { + return nil +} func (b *LastfmApiBackend) OAuth2Strategy(redirectURL *url.URL) auth.OAuth2Strategy { return lastfmStrategy{ diff --git a/internal/backends/listenbrainz/listenbrainz.go b/internal/backends/listenbrainz/listenbrainz.go index dcc28fa..98d1525 100644 --- a/internal/backends/listenbrainz/listenbrainz.go +++ b/internal/backends/listenbrainz/listenbrainz.go @@ -73,8 +73,10 @@ func (b *ListenBrainzApiBackend) InitConfig(config *config.ServiceConfig) error return nil } -func (b *ListenBrainzApiBackend) StartImport() error { return nil } -func (b *ListenBrainzApiBackend) FinishImport() error { return nil } +func (b *ListenBrainzApiBackend) StartImport() error { return nil } +func (b *ListenBrainzApiBackend) FinishImport(result *models.ImportResult) error { + return nil +} func (b *ListenBrainzApiBackend) ExportListens(ctx context.Context, oldestTimestamp time.Time, results chan models.ListensResult, progress chan models.TransferProgress) { startTime := time.Now() diff --git a/internal/backends/maloja/maloja.go b/internal/backends/maloja/maloja.go index f082d9b..d85309f 100644 --- a/internal/backends/maloja/maloja.go +++ b/internal/backends/maloja/maloja.go @@ -61,8 +61,10 @@ func (b *MalojaApiBackend) InitConfig(config *config.ServiceConfig) error { return nil } -func (b *MalojaApiBackend) StartImport() error { return nil } -func (b *MalojaApiBackend) FinishImport() error { return nil } +func (b *MalojaApiBackend) StartImport() error { return nil } +func (b *MalojaApiBackend) FinishImport(result *models.ImportResult) error { + return nil +} func (b *MalojaApiBackend) ExportListens(ctx context.Context, oldestTimestamp time.Time, results chan models.ListensResult, progress chan models.TransferProgress) { page := 0 diff --git a/internal/backends/scrobblerlog/scrobblerlog.go b/internal/backends/scrobblerlog/scrobblerlog.go index 6d42f3c..13aecba 100644 --- a/internal/backends/scrobblerlog/scrobblerlog.go +++ b/internal/backends/scrobblerlog/scrobblerlog.go @@ -126,7 +126,7 @@ func (b *ScrobblerLogBackend) StartImport() error { return nil } -func (b *ScrobblerLogBackend) FinishImport() error { +func (b *ScrobblerLogBackend) FinishImport(result *models.ImportResult) error { return b.file.Close() } diff --git a/internal/cli/transfer.go b/internal/cli/transfer.go index 3aabb4b..7c5ecc0 100644 --- a/internal/cli/transfer.go +++ b/internal/cli/transfer.go @@ -157,7 +157,11 @@ func (c *TransferCmd[E, I, R]) Transfer(exp backends.ExportProcessor[R], imp bac fmt.Println() fmt.Println(i18n.Tr("Import log:")) for _, entry := range result.ImportLog { - fmt.Println(i18n.Tr("%v: %v", entry.Type, entry.Message)) + if entry.Type != models.Output { + fmt.Println(i18n.Tr("%v: %v", entry.Type, entry.Message)) + } else { + fmt.Println(entry.Message) + } } } diff --git a/internal/models/interfaces.go b/internal/models/interfaces.go index 2f4beaf..79a4c6c 100644 --- a/internal/models/interfaces.go +++ b/internal/models/interfaces.go @@ -46,7 +46,7 @@ type ImportBackend interface { // The implementation can perform all steps here to finalize the // export/import and free used resources. - FinishImport() error + FinishImport(result *ImportResult) error } // Must be implemented by services supporting the export of listens. diff --git a/internal/models/models.go b/internal/models/models.go index a93a043..69280b3 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -169,6 +169,7 @@ type LovesResult ExportResult[LovesList] type LogEntryType string const ( + Output LogEntryType = "" Info LogEntryType = "Info" Warning LogEntryType = "Warning" Error LogEntryType = "Error"