From b2b5c69278e0c6867084a230ec581bd3204087b3 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer <ph.wolfer@gmail.com> Date: Sun, 14 Jan 2024 17:14:05 +0100 Subject: [PATCH] New similarity.CompareTracks function --- internal/similarity/similarity.go | 31 +++++++++++++++++++++-- internal/similarity/similarity_test.go | 35 ++++++++++++++++++++++++++ internal/util/util.go | 16 ++++++++++++ internal/util/util_test.go | 30 ++++++++++++++++++++++ 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/internal/similarity/similarity.go b/internal/similarity/similarity.go index 8e8536d..4c0b345 100644 --- a/internal/similarity/similarity.go +++ b/internal/similarity/similarity.go @@ -20,6 +20,7 @@ import ( "strings" "github.com/agnivade/levenshtein" + "go.uploadedlobster.com/scotty/internal/models" "go.uploadedlobster.com/scotty/internal/util" "golang.org/x/text/unicode/norm" ) @@ -42,14 +43,40 @@ func Similarity(s1 string, s2 string) float64 { return 1.0 - (float64(dist) / float64(maxLen)) } -var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`) var reMultiSpace = regexp.MustCompile(`\s+`) +var reIgnoredPatterns = []*regexp.Regexp{ + regexp.MustCompile(`\s+\([^)]+\)$`), + regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`), +} // Normalizes a track or release title. func NormalizeTitle(s string) string { s = strings.TrimSpace(s) s = strings.ToLower(s) - s = reExtraTitleInfo.ReplaceAllString(s, "") s = reMultiSpace.ReplaceAllString(s, " ") + for _, re := range reIgnoredPatterns { + s = re.ReplaceAllString(s, "") + } return s } + +// Compare two tracks for similarity. +func CompareTracks(t1 models.Track, t2 models.Track) float64 { + // Identical recording MBID always compares 100% + if t1.RecordingMbid == t2.RecordingMbid && t1.RecordingMbid != "" { + return 1.0 + } + + // Compare track name and artist + sims := []float64{ + Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)), + Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())), + } + + // Compare release names only if they are set for both tracks + if t1.ReleaseName != "" && t2.ReleaseName != "" { + sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName))) + } + + return util.Average(sims...) +} diff --git a/internal/similarity/similarity_test.go b/internal/similarity/similarity_test.go index 206c6f0..f1e92a5 100644 --- a/internal/similarity/similarity_test.go +++ b/internal/similarity/similarity_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "go.uploadedlobster.com/scotty/internal/models" "go.uploadedlobster.com/scotty/internal/similarity" ) @@ -36,6 +37,7 @@ func TestSimilarity(t *testing.T) { assert.Equal(0.0, similarity.Similarity("foo", "bar")) assert.Equal(0.5, similarity.Similarity("foobar", "bar")) assert.Equal(1.0, similarity.Similarity("foo", "foo")) + assert.Equal(0.6, similarity.Similarity("Forever After", "Forever Failure")) } func ExampleNormalizeTitle() { @@ -48,4 +50,37 @@ func TestNormalizeTitle(t *testing.T) { assert := assert.New(t) assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure")) assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t")) + assert.Equal("wasted years", similarity.NormalizeTitle("Wasted Years - 2015 Remaster")) + assert.Equal("london calling", similarity.NormalizeTitle("London Calling - Remastered")) + assert.Equal("london calling", similarity.NormalizeTitle("London Calling (Remastered)")) +} + +func ExampleCompareTracks() { + t1 := models.Track{ + ArtistNames: []string{"Paradise Lost"}, + TrackName: "Forever After", + } + t2 := models.Track{ + ArtistNames: []string{"Paradise Lost"}, + TrackName: "Forever Failure (radio edit)", + ReleaseName: "Draconian Times", + } + sim := similarity.CompareTracks(t1, t2) + fmt.Println(sim) + // Output: 0.8333333333333334 +} + +func TestCompareTracksSameMBID(t *testing.T) { + t1 := models.Track{ + ArtistNames: []string{"Paradise Lost"}, + TrackName: "Forever After", + RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"), + } + t2 := models.Track{ + ArtistNames: []string{"Paradise Lost"}, + TrackName: "Forever Failure (radio edit)", + ReleaseName: "Draconian Times", + RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"), + } + assert.Equal(t, 1.0, similarity.CompareTracks(t1, t2)) } diff --git a/internal/util/util.go b/internal/util/util.go index 99826a1..e8663a7 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -32,3 +32,19 @@ func Min[T constraints.Ordered](m, n T) T { return m } } + +func Sum[T constraints.Integer | constraints.Float](v ...T) T { + var sum T + for _, i := range v { + sum += i + } + return sum +} + +func Average[T constraints.Integer | constraints.Float](v ...T) float64 { + length := len(v) + if length == 0 { + return 0.0 + } + return float64(Sum(v...)) / float64(length) +} diff --git a/internal/util/util_test.go b/internal/util/util_test.go index 5aee726..73ec415 100644 --- a/internal/util/util_test.go +++ b/internal/util/util_test.go @@ -17,7 +17,9 @@ package util_test import ( "fmt" + "testing" + "github.com/stretchr/testify/assert" "go.uploadedlobster.com/scotty/internal/util" ) @@ -32,3 +34,31 @@ func ExampleMin() { fmt.Print(v) // Output: 2 } + +func ExampleSum() { + values := []float64{1.4, 2.2} + sum := util.Sum(values...) + fmt.Print(sum) + // Output: 3.6 +} + +func TestSumEmpty(t *testing.T) { + assert.Equal(t, 0, util.Sum([]int{}...)) +} + +func ExampleAverage() { + values := []float64{1.4, 2.2, 0.9} + sum := util.Average(values...) + fmt.Print(sum) + // Output: 1.5 +} + +func TestAverageEmpty(t *testing.T) { + assert.Equal(t, 0.0, util.Average([]int{}...)) +} + +func TestAverageInt(t *testing.T) { + assert := assert.New(t) + assert.Equal(3.0, util.Average([]int{2, 4, 3}...)) + assert.Equal(1.5, util.Average([]int{2, 1, 1, 2}...)) +}