New similarity.CompareTracks function

This commit is contained in:
Philipp Wolfer 2024-01-14 17:14:05 +01:00
parent bace31471e
commit b2b5c69278
No known key found for this signature in database
GPG key ID: 8FDF744D4919943B
4 changed files with 110 additions and 2 deletions

View file

@ -20,6 +20,7 @@ import (
"strings"
"github.com/agnivade/levenshtein"
"go.uploadedlobster.com/scotty/internal/models"
"go.uploadedlobster.com/scotty/internal/util"
"golang.org/x/text/unicode/norm"
)
@ -42,14 +43,40 @@ func Similarity(s1 string, s2 string) float64 {
return 1.0 - (float64(dist) / float64(maxLen))
}
var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
var reMultiSpace = regexp.MustCompile(`\s+`)
var reIgnoredPatterns = []*regexp.Regexp{
regexp.MustCompile(`\s+\([^)]+\)$`),
regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`),
}
// Normalizes a track or release title.
func NormalizeTitle(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = reExtraTitleInfo.ReplaceAllString(s, "")
s = reMultiSpace.ReplaceAllString(s, " ")
for _, re := range reIgnoredPatterns {
s = re.ReplaceAllString(s, "")
}
return s
}
// Compare two tracks for similarity.
func CompareTracks(t1 models.Track, t2 models.Track) float64 {
// Identical recording MBID always compares 100%
if t1.RecordingMbid == t2.RecordingMbid && t1.RecordingMbid != "" {
return 1.0
}
// Compare track name and artist
sims := []float64{
Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)),
Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())),
}
// Compare release names only if they are set for both tracks
if t1.ReleaseName != "" && t2.ReleaseName != "" {
sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName)))
}
return util.Average(sims...)
}

View file

@ -20,6 +20,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"go.uploadedlobster.com/scotty/internal/models"
"go.uploadedlobster.com/scotty/internal/similarity"
)
@ -36,6 +37,7 @@ func TestSimilarity(t *testing.T) {
assert.Equal(0.0, similarity.Similarity("foo", "bar"))
assert.Equal(0.5, similarity.Similarity("foobar", "bar"))
assert.Equal(1.0, similarity.Similarity("foo", "foo"))
assert.Equal(0.6, similarity.Similarity("Forever After", "Forever Failure"))
}
func ExampleNormalizeTitle() {
@ -48,4 +50,37 @@ func TestNormalizeTitle(t *testing.T) {
assert := assert.New(t)
assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure"))
assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t"))
assert.Equal("wasted years", similarity.NormalizeTitle("Wasted Years - 2015 Remaster"))
assert.Equal("london calling", similarity.NormalizeTitle("London Calling - Remastered"))
assert.Equal("london calling", similarity.NormalizeTitle("London Calling (Remastered)"))
}
func ExampleCompareTracks() {
t1 := models.Track{
ArtistNames: []string{"Paradise Lost"},
TrackName: "Forever After",
}
t2 := models.Track{
ArtistNames: []string{"Paradise Lost"},
TrackName: "Forever Failure (radio edit)",
ReleaseName: "Draconian Times",
}
sim := similarity.CompareTracks(t1, t2)
fmt.Println(sim)
// Output: 0.8333333333333334
}
func TestCompareTracksSameMBID(t *testing.T) {
t1 := models.Track{
ArtistNames: []string{"Paradise Lost"},
TrackName: "Forever After",
RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
}
t2 := models.Track{
ArtistNames: []string{"Paradise Lost"},
TrackName: "Forever Failure (radio edit)",
ReleaseName: "Draconian Times",
RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
}
assert.Equal(t, 1.0, similarity.CompareTracks(t1, t2))
}

View file

@ -32,3 +32,19 @@ func Min[T constraints.Ordered](m, n T) T {
return m
}
}
func Sum[T constraints.Integer | constraints.Float](v ...T) T {
var sum T
for _, i := range v {
sum += i
}
return sum
}
func Average[T constraints.Integer | constraints.Float](v ...T) float64 {
length := len(v)
if length == 0 {
return 0.0
}
return float64(Sum(v...)) / float64(length)
}

View file

@ -17,7 +17,9 @@ package util_test
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"go.uploadedlobster.com/scotty/internal/util"
)
@ -32,3 +34,31 @@ func ExampleMin() {
fmt.Print(v)
// Output: 2
}
func ExampleSum() {
values := []float64{1.4, 2.2}
sum := util.Sum(values...)
fmt.Print(sum)
// Output: 3.6
}
func TestSumEmpty(t *testing.T) {
assert.Equal(t, 0, util.Sum([]int{}...))
}
func ExampleAverage() {
values := []float64{1.4, 2.2, 0.9}
sum := util.Average(values...)
fmt.Print(sum)
// Output: 1.5
}
func TestAverageEmpty(t *testing.T) {
assert.Equal(t, 0.0, util.Average([]int{}...))
}
func TestAverageInt(t *testing.T) {
assert := assert.New(t)
assert.Equal(3.0, util.Average([]int{2, 4, 3}...))
assert.Equal(1.5, util.Average([]int{2, 1, 1, 2}...))
}