mirror of
https://git.sr.ht/~phw/scotty
synced 2025-04-16 10:09:28 +02:00
New similarity.CompareTracks function
This commit is contained in:
parent
bace31471e
commit
b2b5c69278
4 changed files with 110 additions and 2 deletions
|
@ -20,6 +20,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/agnivade/levenshtein"
|
"github.com/agnivade/levenshtein"
|
||||||
|
"go.uploadedlobster.com/scotty/internal/models"
|
||||||
"go.uploadedlobster.com/scotty/internal/util"
|
"go.uploadedlobster.com/scotty/internal/util"
|
||||||
"golang.org/x/text/unicode/norm"
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
@ -42,14 +43,40 @@ func Similarity(s1 string, s2 string) float64 {
|
||||||
return 1.0 - (float64(dist) / float64(maxLen))
|
return 1.0 - (float64(dist) / float64(maxLen))
|
||||||
}
|
}
|
||||||
|
|
||||||
var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
|
|
||||||
var reMultiSpace = regexp.MustCompile(`\s+`)
|
var reMultiSpace = regexp.MustCompile(`\s+`)
|
||||||
|
var reIgnoredPatterns = []*regexp.Regexp{
|
||||||
|
regexp.MustCompile(`\s+\([^)]+\)$`),
|
||||||
|
regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`),
|
||||||
|
}
|
||||||
|
|
||||||
// Normalizes a track or release title.
|
// Normalizes a track or release title.
|
||||||
func NormalizeTitle(s string) string {
|
func NormalizeTitle(s string) string {
|
||||||
s = strings.TrimSpace(s)
|
s = strings.TrimSpace(s)
|
||||||
s = strings.ToLower(s)
|
s = strings.ToLower(s)
|
||||||
s = reExtraTitleInfo.ReplaceAllString(s, "")
|
|
||||||
s = reMultiSpace.ReplaceAllString(s, " ")
|
s = reMultiSpace.ReplaceAllString(s, " ")
|
||||||
|
for _, re := range reIgnoredPatterns {
|
||||||
|
s = re.ReplaceAllString(s, "")
|
||||||
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compare two tracks for similarity.
|
||||||
|
func CompareTracks(t1 models.Track, t2 models.Track) float64 {
|
||||||
|
// Identical recording MBID always compares 100%
|
||||||
|
if t1.RecordingMbid == t2.RecordingMbid && t1.RecordingMbid != "" {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare track name and artist
|
||||||
|
sims := []float64{
|
||||||
|
Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)),
|
||||||
|
Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare release names only if they are set for both tracks
|
||||||
|
if t1.ReleaseName != "" && t2.ReleaseName != "" {
|
||||||
|
sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return util.Average(sims...)
|
||||||
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"go.uploadedlobster.com/scotty/internal/models"
|
||||||
"go.uploadedlobster.com/scotty/internal/similarity"
|
"go.uploadedlobster.com/scotty/internal/similarity"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -36,6 +37,7 @@ func TestSimilarity(t *testing.T) {
|
||||||
assert.Equal(0.0, similarity.Similarity("foo", "bar"))
|
assert.Equal(0.0, similarity.Similarity("foo", "bar"))
|
||||||
assert.Equal(0.5, similarity.Similarity("foobar", "bar"))
|
assert.Equal(0.5, similarity.Similarity("foobar", "bar"))
|
||||||
assert.Equal(1.0, similarity.Similarity("foo", "foo"))
|
assert.Equal(1.0, similarity.Similarity("foo", "foo"))
|
||||||
|
assert.Equal(0.6, similarity.Similarity("Forever After", "Forever Failure"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleNormalizeTitle() {
|
func ExampleNormalizeTitle() {
|
||||||
|
@ -48,4 +50,37 @@ func TestNormalizeTitle(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure"))
|
assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure"))
|
||||||
assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t"))
|
assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t"))
|
||||||
|
assert.Equal("wasted years", similarity.NormalizeTitle("Wasted Years - 2015 Remaster"))
|
||||||
|
assert.Equal("london calling", similarity.NormalizeTitle("London Calling - Remastered"))
|
||||||
|
assert.Equal("london calling", similarity.NormalizeTitle("London Calling (Remastered)"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleCompareTracks() {
|
||||||
|
t1 := models.Track{
|
||||||
|
ArtistNames: []string{"Paradise Lost"},
|
||||||
|
TrackName: "Forever After",
|
||||||
|
}
|
||||||
|
t2 := models.Track{
|
||||||
|
ArtistNames: []string{"Paradise Lost"},
|
||||||
|
TrackName: "Forever Failure (radio edit)",
|
||||||
|
ReleaseName: "Draconian Times",
|
||||||
|
}
|
||||||
|
sim := similarity.CompareTracks(t1, t2)
|
||||||
|
fmt.Println(sim)
|
||||||
|
// Output: 0.8333333333333334
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareTracksSameMBID(t *testing.T) {
|
||||||
|
t1 := models.Track{
|
||||||
|
ArtistNames: []string{"Paradise Lost"},
|
||||||
|
TrackName: "Forever After",
|
||||||
|
RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
|
||||||
|
}
|
||||||
|
t2 := models.Track{
|
||||||
|
ArtistNames: []string{"Paradise Lost"},
|
||||||
|
TrackName: "Forever Failure (radio edit)",
|
||||||
|
ReleaseName: "Draconian Times",
|
||||||
|
RecordingMbid: models.MBID("2886d15c-09b0-43c6-af56-932f70dde164"),
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1.0, similarity.CompareTracks(t1, t2))
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,3 +32,19 @@ func Min[T constraints.Ordered](m, n T) T {
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Sum[T constraints.Integer | constraints.Float](v ...T) T {
|
||||||
|
var sum T
|
||||||
|
for _, i := range v {
|
||||||
|
sum += i
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
func Average[T constraints.Integer | constraints.Float](v ...T) float64 {
|
||||||
|
length := len(v)
|
||||||
|
if length == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
return float64(Sum(v...)) / float64(length)
|
||||||
|
}
|
||||||
|
|
|
@ -17,7 +17,9 @@ package util_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"go.uploadedlobster.com/scotty/internal/util"
|
"go.uploadedlobster.com/scotty/internal/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -32,3 +34,31 @@ func ExampleMin() {
|
||||||
fmt.Print(v)
|
fmt.Print(v)
|
||||||
// Output: 2
|
// Output: 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExampleSum() {
|
||||||
|
values := []float64{1.4, 2.2}
|
||||||
|
sum := util.Sum(values...)
|
||||||
|
fmt.Print(sum)
|
||||||
|
// Output: 3.6
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSumEmpty(t *testing.T) {
|
||||||
|
assert.Equal(t, 0, util.Sum([]int{}...))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAverage() {
|
||||||
|
values := []float64{1.4, 2.2, 0.9}
|
||||||
|
sum := util.Average(values...)
|
||||||
|
fmt.Print(sum)
|
||||||
|
// Output: 1.5
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAverageEmpty(t *testing.T) {
|
||||||
|
assert.Equal(t, 0.0, util.Average([]int{}...))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAverageInt(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
assert.Equal(3.0, util.Average([]int{2, 4, 3}...))
|
||||||
|
assert.Equal(1.5, util.Average([]int{2, 1, 1, 2}...))
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue