From b2b5c69278e0c6867084a230ec581bd3204087b3 Mon Sep 17 00:00:00 2001
From: Philipp Wolfer <ph.wolfer@gmail.com>
Date: Sun, 14 Jan 2024 17:14:05 +0100
Subject: [PATCH] New similarity.CompareTracks function

---
 internal/similarity/similarity.go      | 31 +++++++++++++++++++++--
 internal/similarity/similarity_test.go | 35 ++++++++++++++++++++++++++
 internal/util/util.go                  | 16 ++++++++++++
 internal/util/util_test.go             | 30 ++++++++++++++++++++++
 4 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/internal/similarity/similarity.go b/internal/similarity/similarity.go
index 8e8536d..4c0b345 100644
--- a/internal/similarity/similarity.go
+++ b/internal/similarity/similarity.go
@@ -20,6 +20,7 @@ import (
 	"strings"
 
 	"github.com/agnivade/levenshtein"
+	"go.uploadedlobster.com/scotty/internal/models"
 	"go.uploadedlobster.com/scotty/internal/util"
 	"golang.org/x/text/unicode/norm"
 )
@@ -42,14 +43,40 @@ func Similarity(s1 string, s2 string) float64 {
 	return 1.0 - (float64(dist) / float64(maxLen))
 }
 
-var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
 var reMultiSpace = regexp.MustCompile(`\s+`)
+var reIgnoredPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`\s+\([^)]+\)$`),
+	regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`),
+}
 
 // Normalizes a track or release title.
 func NormalizeTitle(s string) string {
 	s = strings.TrimSpace(s)
 	s = strings.ToLower(s)
-	s = reExtraTitleInfo.ReplaceAllString(s, "")
 	s = reMultiSpace.ReplaceAllString(s, " ")
+	for _, re := range reIgnoredPatterns {
+		s = re.ReplaceAllString(s, "")
+	}
 	return s
 }
+
+// Compare two tracks for similarity.
+func CompareTracks(t1 models.Track, t2 models.Track) float64 {
+	// Identical recording MBID always compares 100%
+	if t1.RecordingMbid == t2.RecordingMbid && t1.RecordingMbid != "" {
+		return 1.0
+	}
+
+	// Compare track name and artist
+	sims := []float64{
+		Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)),
+		Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())),
+	}
+
+	// Compare release names only if they are set for both tracks
+	if t1.ReleaseName != "" && t2.ReleaseName != "" {
+		sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName)))
+	}
+
+	return util.Average(sims...)
+}
diff --git a/internal/similarity/similarity_test.go b/internal/similarity/similarity_test.go
index 206c6f0..f1e92a5 100644
--- a/internal/similarity/similarity_test.go
+++ b/internal/similarity/similarity_test.go
@@ -20,6 +20,7 @@ import (
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+	"go.uploadedlobster.com/scotty/internal/models"
 	"go.uploadedlobster.com/scotty/internal/similarity"
 )
 
@@ -36,6 +37,7 @@ func TestSimilarity(t *testing.T) {
 	assert.Equal(0.0, similarity.Similarity("foo", "bar"))
 	assert.Equal(0.5, similarity.Similarity("foobar", "bar"))
 	assert.Equal(1.0, similarity.Similarity("foo", "foo"))
+	assert.Equal(0.6, similarity.Similarity("Forever After", "Forever Failure"))
 }
 
 func ExampleNormalizeTitle() {
@@ -48,4 +50,37 @@ func TestNormalizeTitle(t *testing.T) {
 	assert := assert.New(t)
 	assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure"))
 	assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t"))
+	assert.Equal("wasted years", similarity.NormalizeTitle("Wasted Years - 2015 Remaster"))
+	assert.Equal("london calling", similarity.NormalizeTitle("London Calling - Remastered"))
+	assert.Equal("london calling", similarity.NormalizeTitle("London Calling (Remastered)"))
+}
+
+func ExampleCompareTracks() {
+	t1 := models.Track{
+		ArtistNames: []string{"Paradise Lost"},
+		TrackName:   "Forever After",
+	}
+	t2 := models.Track{
+		ArtistNames: []string{"Paradise Lost"},
+		TrackName:   "Forever Failure (radio edit)",
+		ReleaseName: "Draconian Times",
+	}
+	sim := similarity.CompareTracks(t1, t2)
+	fmt.Println(sim)
+	// Output: 0.8333333333333334
+}
+
+func TestCompareTracksSameMBID(t *testing.T) {
+	t1 := models.Track{
+		ArtistNames:   []string{"Paradise Lost"},
+		TrackName:     "Forever After",
+		RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
+	}
+	t2 := models.Track{
+		ArtistNames:   []string{"Paradise Lost"},
+		TrackName:     "Forever Failure (radio edit)",
+		ReleaseName:   "Draconian Times",
+		RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
+	}
+	assert.Equal(t, 1.0, similarity.CompareTracks(t1, t2))
 }
diff --git a/internal/util/util.go b/internal/util/util.go
index 99826a1..e8663a7 100644
--- a/internal/util/util.go
+++ b/internal/util/util.go
@@ -32,3 +32,19 @@ func Min[T constraints.Ordered](m, n T) T {
 		return m
 	}
 }
+
+func Sum[T constraints.Integer | constraints.Float](v ...T) T {
+	var sum T
+	for _, i := range v {
+		sum += i
+	}
+	return sum
+}
+
+func Average[T constraints.Integer | constraints.Float](v ...T) float64 {
+	length := len(v)
+	if length == 0 {
+		return 0.0
+	}
+	return float64(Sum(v...)) / float64(length)
+}
diff --git a/internal/util/util_test.go b/internal/util/util_test.go
index 5aee726..73ec415 100644
--- a/internal/util/util_test.go
+++ b/internal/util/util_test.go
@@ -17,7 +17,9 @@ package util_test
 
 import (
 	"fmt"
+	"testing"
 
+	"github.com/stretchr/testify/assert"
 	"go.uploadedlobster.com/scotty/internal/util"
 )
 
@@ -32,3 +34,31 @@ func ExampleMin() {
 	fmt.Print(v)
 	// Output: 2
 }
+
+func ExampleSum() {
+	values := []float64{1.4, 2.2}
+	sum := util.Sum(values...)
+	fmt.Print(sum)
+	// Output: 3.6
+}
+
+func TestSumEmpty(t *testing.T) {
+	assert.Equal(t, 0, util.Sum([]int{}...))
+}
+
+func ExampleAverage() {
+	values := []float64{1.4, 2.2, 0.9}
+	sum := util.Average(values...)
+	fmt.Print(sum)
+	// Output: 1.5
+}
+
+func TestAverageEmpty(t *testing.T) {
+	assert.Equal(t, 0.0, util.Average([]int{}...))
+}
+
+func TestAverageInt(t *testing.T) {
+	assert := assert.New(t)
+	assert.Equal(3.0, util.Average([]int{2, 4, 3}...))
+	assert.Equal(1.5, util.Average([]int{2, 1, 1, 2}...))
+}