From 3b9d07e6b589247626d20581956fdcaa5eceb83e Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Fri, 23 May 2025 10:00:22 +0200 Subject: [PATCH] Implemented ScrobblerLog.ParseIter --- pkg/scrobblerlog/parser.go | 150 ++++++++++++++++++++------------ pkg/scrobblerlog/parser_test.go | 36 +++++++- 2 files changed, 129 insertions(+), 57 deletions(-) diff --git a/pkg/scrobblerlog/parser.go b/pkg/scrobblerlog/parser.go index 8bad56d..48fadcf 100644 --- a/pkg/scrobblerlog/parser.go +++ b/pkg/scrobblerlog/parser.go @@ -39,6 +39,7 @@ import ( "encoding/csv" "fmt" "io" + "iter" "strconv" "strings" "time" @@ -91,53 +92,36 @@ type ScrobblerLog struct { // The reader must provide a valid scrobbler log file with a valid header. // This function implicitly calls [ScrobblerLog.ReadHeader]. func (l *ScrobblerLog) Parse(data io.Reader, ignoreSkipped bool) error { - l.Records = make([]Record, 0) - - reader := bufio.NewReader(data) - err := l.readHeader(reader) + tsvReader, err := l.initReader(data) if err != nil { return err } - tsvReader := csv.NewReader(reader) - tsvReader.Comma = '\t' - // Row length is often flexible - tsvReader.FieldsPerRecord = -1 - - for { - // A row is: - // artistName releaseName trackName trackNumber duration rating timestamp recordingMBID - row, err := tsvReader.Read() - if err == io.EOF { - break - } else if err != nil { - return err - } - - // fmt.Printf("row: %v\n", row) - - // We consider only the last field (recording MBID) optional - // This was added in the 1.1 file format. - if len(row) < 7 { - line, _ := tsvReader.FieldPos(0) - return fmt.Errorf("invalid record in scrobblerlog line %v", line) - } - - record, err := l.rowToRecord(row) + for _, err := range l.iterRecords(tsvReader, ignoreSkipped) { if err != nil { return err } - - if ignoreSkipped && record.Rating == RatingSkipped { - continue - } - - l.Records = append(l.Records, record) } return nil } +// Parses a scrobbler log file from the given reader and returns an iterator over all records. +// +// The reader must provide a valid scrobbler log file with a valid header. +// This function implicitly calls [ScrobblerLog.ReadHeader]. +func (l *ScrobblerLog) ParseIter(data io.Reader, ignoreSkipped bool) iter.Seq2[Record, error] { + + tsvReader, err := l.initReader(data) + if err != nil { + return func(yield func(Record, error) bool) { + yield(Record{}, err) + } + } + + return l.iterRecords(tsvReader, ignoreSkipped) +} + // Append writes the given records to the writer. // // The writer should be for an existing scrobbler log file or @@ -177,6 +161,37 @@ func (l *ScrobblerLog) ReadHeader(reader io.Reader) error { return l.readHeader(bufio.NewReader(reader)) } +// Writes the header of a scrobbler log file to the given writer. +func (l *ScrobblerLog) WriteHeader(writer io.Writer) error { + headers := []string{ + "#AUDIOSCROBBLER/1.1\n", + "#TZ/" + string(l.TZ) + "\n", + "#CLIENT/" + l.Client + "\n", + } + for _, line := range headers { + _, err := writer.Write([]byte(line)) + if err != nil { + return err + } + } + return nil +} + +func (l *ScrobblerLog) initReader(data io.Reader) (*csv.Reader, error) { + reader := bufio.NewReader(data) + err := l.readHeader(reader) + if err != nil { + return nil, err + } + + tsvReader := csv.NewReader(reader) + tsvReader.Comma = '\t' + // Row length is often flexible + tsvReader.FieldsPerRecord = -1 + + return tsvReader, nil +} + func (l *ScrobblerLog) readHeader(reader *bufio.Reader) error { // Skip header for i := 0; i < 3; i++ { @@ -215,37 +230,64 @@ func (l *ScrobblerLog) readHeader(reader *bufio.Reader) error { return nil } -// Writes the header of a scrobbler log file to the given writer. -func (l *ScrobblerLog) WriteHeader(writer io.Writer) error { - headers := []string{ - "#AUDIOSCROBBLER/1.1\n", - "#TZ/" + string(l.TZ) + "\n", - "#CLIENT/" + l.Client + "\n", - } - for _, line := range headers { - _, err := writer.Write([]byte(line)) - if err != nil { - return err +func (l *ScrobblerLog) iterRecords(reader *csv.Reader, ignoreSkipped bool) iter.Seq2[Record, error] { + return func(yield func(Record, error) bool) { + l.Records = make([]Record, 0) + for { + record, err := l.parseRow(reader) + if err == io.EOF { + break + } else if err != nil { + yield(Record{}, err) + break + } + + if ignoreSkipped && record.Rating == RatingSkipped { + continue + } + + l.Records = append(l.Records, *record) + if !yield(*record, nil) { + break + } } } - return nil } -func (l ScrobblerLog) rowToRecord(row []string) (Record, error) { - var record Record +func (l *ScrobblerLog) parseRow(reader *csv.Reader) (*Record, error) { + // A row is: + // artistName releaseName trackName trackNumber duration rating timestamp recordingMBID + row, err := reader.Read() + if err != nil { + return nil, err + } + + // fmt.Printf("row: %v\n", row) + + // We consider only the last field (recording MBID) optional + // This was added in the 1.1 file format. + if len(row) < 7 { + line, _ := reader.FieldPos(0) + return nil, fmt.Errorf("invalid record in scrobblerlog line %v", line) + } + + return l.rowToRecord(row) +} + +func (l ScrobblerLog) rowToRecord(row []string) (*Record, error) { trackNumber, err := strconv.Atoi(row[3]) if err != nil { - return record, err + return nil, err } duration, err := strconv.Atoi(row[4]) if err != nil { - return record, err + return nil, err } timestamp, err := strconv.ParseInt(row[6], 10, 64) if err != nil { - return record, err + return nil, err } var timezone *time.Location = nil @@ -253,7 +295,7 @@ func (l ScrobblerLog) rowToRecord(row []string) (Record, error) { timezone = l.FallbackTimezone } - record = Record{ + record := Record{ ArtistName: row[0], AlbumName: row[1], TrackName: row[2], @@ -267,7 +309,7 @@ func (l ScrobblerLog) rowToRecord(row []string) (Record, error) { record.MusicBrainzRecordingID = mbtypes.MBID(row[7]) } - return record, nil + return &record, nil } // Convert a Unix timestamp to a [time.Time] object, but treat the timestamp diff --git a/pkg/scrobblerlog/parser_test.go b/pkg/scrobblerlog/parser_test.go index 8dc30e5..26990f9 100644 --- a/pkg/scrobblerlog/parser_test.go +++ b/pkg/scrobblerlog/parser_test.go @@ -44,7 +44,14 @@ Kraftwerk Trans-Europe Express The Hall of Mirrors 2 474 S 1260358000 385ba9e9-6 Teeth Agency You Don't Have To Live In Pain Wolfs Jam 2 107 L 1260359404 1262beaf-19f8-4534-b9ed-7eef9ca8e83f ` -func TestParser(t *testing.T) { +var testScrobblerLogInvalid = `#AUDIOSCROBBLER/1.1 +#TZ/UNKNOWN +#CLIENT/Rockbox sansaclipplus $Revision$ +Özcan Deniz Ses ve Ayrilik Sevdanin rengi (sipacik) byMrTurkey 5 306 L 1260342084 +Özcan Deniz Hediye 2@V@7 Bir Dudaktan 1 210 L +` + +func TestParse(t *testing.T) { assert := assert.New(t) data := bytes.NewBufferString(testScrobblerLog) result := scrobblerlog.ScrobblerLog{} @@ -68,7 +75,7 @@ func TestParser(t *testing.T) { record4.MusicBrainzRecordingID) } -func TestParserIgnoreSkipped(t *testing.T) { +func TestParseIgnoreSkipped(t *testing.T) { assert := assert.New(t) data := bytes.NewBufferString(testScrobblerLog) result := scrobblerlog.ScrobblerLog{} @@ -81,7 +88,7 @@ func TestParserIgnoreSkipped(t *testing.T) { record4.MusicBrainzRecordingID) } -func TestParserFallbackTimezone(t *testing.T) { +func TestParseFallbackTimezone(t *testing.T) { assert := assert.New(t) data := bytes.NewBufferString(testScrobblerLog) result := scrobblerlog.ScrobblerLog{ @@ -96,6 +103,29 @@ func TestParserFallbackTimezone(t *testing.T) { ) } +func TestParseInvalid(t *testing.T) { + assert := assert.New(t) + data := bytes.NewBufferString(testScrobblerLogInvalid) + result := scrobblerlog.ScrobblerLog{} + err := result.Parse(data, true) + assert.ErrorContains(err, "invalid record in scrobblerlog line 2") +} + +func TestParseIter(t *testing.T) { + assert := assert.New(t) + data := bytes.NewBufferString(testScrobblerLog) + result := scrobblerlog.ScrobblerLog{} + records := make([]scrobblerlog.Record, 0) + for record, err := range result.ParseIter(data, false) { + require.NoError(t, err) + records = append(records, record) + } + + assert.Len(records, 5) + record1 := result.Records[0] + assert.Equal("Ses ve Ayrilik", record1.AlbumName) +} + func TestAppend(t *testing.T) { assert := assert.New(t) data := make([]byte, 0, 10)