Use LB API to lookup missing metadata for loves

This is faster than using the MBID API individually
This commit is contained in:
Philipp Wolfer 2025-05-24 16:46:10 +02:00
parent dddd2e4eec
commit 7542657925
No known key found for this signature in database
GPG key ID: 8FDF744D4919943B
4 changed files with 194 additions and 51 deletions

View file

@ -36,11 +36,12 @@ import (
const (
listensBatchSize = 2000
lovesBatchSize = 10
lovesBatchSize = listenbrainz.MaxItemsPerGet
)
type ListenBrainzArchiveBackend struct {
filePath string
lbClient listenbrainz.Client
mbClient musicbrainzws2.Client
}
@ -56,6 +57,7 @@ func (b *ListenBrainzArchiveBackend) Options() []models.BackendOption {
func (b *ListenBrainzArchiveBackend) InitConfig(config *config.ServiceConfig) error {
b.filePath = config.GetString("file-path")
b.lbClient = listenbrainz.NewClient("", version.UserAgent())
b.mbClient = *musicbrainzws2.NewClient(musicbrainzws2.AppInfo{
Name: version.AppName,
Version: version.AppVersion,
@ -164,7 +166,7 @@ func (b *ListenBrainzArchiveBackend) ExportLoves(
return
}
loves := make(models.LovesList, 0, lovesBatchSize)
batch := make([]listenbrainz.Feedback, 0, lovesBatchSize)
for feedback, err := range archive.IterFeedback(oldestTimestamp) {
if err != nil {
p.Export.Abort()
@ -173,37 +175,43 @@ func (b *ListenBrainzArchiveBackend) ExportLoves(
return
}
// The export file does not include track metadata. Try fetching details
// from MusicBrainz.
if feedback.TrackMetadata == nil {
track, err := lbapi.LookupRecording(ctx, &b.mbClient, feedback.RecordingMBID)
if err == nil {
feedback.TrackMetadata = track
}
if feedback.UserName == "" {
feedback.UserName = userInfo.Name
}
love := lbapi.AsLove(feedback)
if love.UserName == "" {
love.UserName = userInfo.Name
}
// TODO: The dump does not contain TrackMetadata for feedback.
// We need to look it up in the archive.
loves = append(loves, love)
batch = append(batch, feedback)
// Update the progress
p.Export.TotalItems += 1
remainingTime := startTime.Sub(love.Created)
remainingTime := startTime.Sub(time.Unix(feedback.Created, 0))
p.Export.Elapsed = int64(totalDuration.Seconds() - remainingTime.Seconds())
// Allow the importer to start processing the listens by
// sending them in batches.
if len(loves) >= lovesBatchSize {
if len(batch) >= lovesBatchSize {
// The dump does not contain track metadata. Extend it with additional
// lookups
loves, err := lbapi.ExtendTrackMetadata(ctx, &b.lbClient, &b.mbClient, &batch)
if err != nil {
p.Export.Abort()
progress <- p
results <- models.LovesResult{Error: err}
return
}
results <- models.LovesResult{Items: loves}
progress <- p
loves = loves[:0]
batch = batch[:0]
}
}
loves, err := lbapi.ExtendTrackMetadata(ctx, &b.lbClient, &b.mbClient, &batch)
if err != nil {
p.Export.Abort()
progress <- p
results <- models.LovesResult{Error: err}
return
}
results <- models.LovesResult{Items: loves}
p.Export.Complete()
progress <- p

View file

@ -32,35 +32,6 @@ import (
"go.uploadedlobster.com/scotty/internal/models"
)
func LookupRecording(
ctx context.Context,
mb *musicbrainzws2.Client,
mbid mbtypes.MBID,
) (*listenbrainz.Track, error) {
filter := musicbrainzws2.IncludesFilter{
Includes: []string{"artist-credits"},
}
recording, err := mb.LookupRecording(ctx, mbid, filter)
if err != nil {
return nil, err
}
artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit))
for _, artist := range recording.ArtistCredit {
artistMBIDs = append(artistMBIDs, artist.Artist.ID)
}
track := listenbrainz.Track{
TrackName: recording.Title,
ArtistName: recording.ArtistCredit.String(),
MBIDMapping: &listenbrainz.MBIDMapping{
// In case of redirects this MBID differs from the looked up MBID
RecordingMBID: recording.ID,
ArtistMBIDs: artistMBIDs,
},
}
return &track, nil
}
func AsListen(lbListen listenbrainz.Listen) models.Listen {
listen := models.Listen{
ListenedAt: time.Unix(lbListen.ListenedAt, 0),
@ -113,3 +84,107 @@ func AsTrack(t listenbrainz.Track) models.Track {
return track
}
func LookupRecording(
ctx context.Context,
mb *musicbrainzws2.Client,
mbid mbtypes.MBID,
) (*listenbrainz.Track, error) {
filter := musicbrainzws2.IncludesFilter{
Includes: []string{"artist-credits"},
}
recording, err := mb.LookupRecording(ctx, mbid, filter)
if err != nil {
return nil, err
}
artistMBIDs := make([]mbtypes.MBID, 0, len(recording.ArtistCredit))
for _, artist := range recording.ArtistCredit {
artistMBIDs = append(artistMBIDs, artist.Artist.ID)
}
track := listenbrainz.Track{
TrackName: recording.Title,
ArtistName: recording.ArtistCredit.String(),
MBIDMapping: &listenbrainz.MBIDMapping{
// In case of redirects this MBID differs from the looked up MBID
RecordingMBID: recording.ID,
ArtistMBIDs: artistMBIDs,
},
}
return &track, nil
}
func ExtendTrackMetadata(
ctx context.Context,
lb *listenbrainz.Client,
mb *musicbrainzws2.Client,
feedbacks *[]listenbrainz.Feedback,
) ([]models.Love, error) {
mbids := make([]mbtypes.MBID, 0, len(*feedbacks))
for _, feedback := range *feedbacks {
if feedback.TrackMetadata == nil && feedback.RecordingMBID != "" {
mbids = append(mbids, feedback.RecordingMBID)
}
}
result, err := lb.MetadataRecordings(ctx, mbids)
if err != nil {
return nil, err
}
loves := make([]models.Love, 0, len(*feedbacks))
for _, feedback := range *feedbacks {
if feedback.TrackMetadata == nil && feedback.RecordingMBID != "" {
metadata, ok := result[feedback.RecordingMBID]
if ok {
feedback.TrackMetadata = trackFromMetadataLookup(
feedback.RecordingMBID, metadata)
} else {
// MBID not in result. This is probably a MBID redirect, get
// data from MB instead (slower).
// If this also fails, just leave the metadata empty.
track, err := LookupRecording(ctx, mb, feedback.RecordingMBID)
if err == nil {
feedback.TrackMetadata = track
}
}
}
loves = append(loves, AsLove(feedback))
}
return loves, nil
}
func trackFromMetadataLookup(
recordingMBID mbtypes.MBID,
metadata listenbrainz.RecordingMetadata,
) *listenbrainz.Track {
artistMBIDs := make([]mbtypes.MBID, 0, len(metadata.Artist.Artists))
artists := make([]listenbrainz.Artist, 0, len(metadata.Artist.Artists))
for _, artist := range metadata.Artist.Artists {
artistMBIDs = append(artistMBIDs, artist.ArtistMBID)
artists = append(artists, listenbrainz.Artist{
ArtistCreditName: artist.Name,
ArtistMBID: artist.ArtistMBID,
JoinPhrase: artist.JoinPhrase,
})
}
return &listenbrainz.Track{
TrackName: metadata.Recording.Name,
ArtistName: metadata.Artist.Name,
ReleaseName: metadata.Release.Name,
AdditionalInfo: map[string]any{
"duration_ms": metadata.Recording.Length,
"release_group_mbid": metadata.Release.ReleaseGroupMBID,
},
MBIDMapping: &listenbrainz.MBIDMapping{
RecordingMBID: recordingMBID,
ReleaseMBID: metadata.Release.MBID,
ArtistMBIDs: artistMBIDs,
Artists: artists,
CAAID: metadata.Release.CAAID,
CAAReleaseMBID: metadata.Release.CAAReleaseMBID,
},
}
}

View file

@ -28,6 +28,7 @@ import (
"time"
"github.com/go-resty/resty/v2"
"go.uploadedlobster.com/mbtypes"
"go.uploadedlobster.com/scotty/pkg/ratelimit"
)
@ -158,3 +159,24 @@ func (c Client) Lookup(ctx context.Context, recordingName string, artistName str
}
return
}
func (c Client) MetadataRecordings(ctx context.Context, mbids []mbtypes.MBID) (result RecordingMetadataResult, err error) {
const path = "/metadata/recording/"
errorResult := ErrorResult{}
body := RecordingMetadataRequest{
RecordingMBIDs: mbids,
Includes: "artist release",
}
response, err := c.HTTPClient.R().
SetContext(ctx).
SetBody(body).
SetResult(&result).
SetError(&errorResult).
Post(path)
if !response.IsSuccess() {
err = errors.New(errorResult.Error)
return
}
return
}

View file

@ -82,9 +82,9 @@ type MBIDMapping struct {
}
type Artist struct {
ArtistCreditName string `json:"artist_credit_name,omitempty"`
ArtistMBID string `json:"artist_mbid,omitempty"`
JoinPhrase string `json:"join_phrase,omitempty"`
ArtistCreditName string `json:"artist_credit_name,omitempty"`
ArtistMBID mbtypes.MBID `json:"artist_mbid,omitempty"`
JoinPhrase string `json:"join_phrase,omitempty"`
}
type GetFeedbackResult struct {
@ -112,6 +112,44 @@ type LookupResult struct {
ArtistMBIDs []mbtypes.MBID `json:"artist_mbids"`
}
type RecordingMetadataRequest struct {
RecordingMBIDs []mbtypes.MBID `json:"recording_mbids"`
Includes string `json:"inc,omitempty"`
}
// Result for a recording metadata lookup
type RecordingMetadataResult map[mbtypes.MBID]RecordingMetadata
type RecordingMetadata struct {
Artist struct {
Name string `json:"name"`
ArtistCreditID int `json:"artist_credit_id"`
Artists []struct {
Name string `json:"name"`
Area string `json:"area"`
ArtistMBID mbtypes.MBID `json:"artist_mbid"`
JoinPhrase string `json:"join_phrase"`
BeginYear int `json:"begin_year"`
Type string `json:"type"`
// todo rels
} `json:"artists"`
} `json:"artist"`
Recording struct {
Name string `json:"name"`
Length int `json:"length"`
// TODO rels
} `json:"recording"`
Release struct {
Name string `json:"name"`
AlbumArtistName string `json:"album_artist_name"`
Year int `json:"year"`
MBID mbtypes.MBID `json:"mbid"`
ReleaseGroupMBID mbtypes.MBID `json:"release_group_mbid"`
CAAID int `json:"caa_id"`
CAAReleaseMBID mbtypes.MBID `json:"caa_release_mbid"`
} `json:"release"`
}
type StatusResult struct {
Status string `json:"status"`
}