New similarity.CompareTracks function

This commit is contained in:
Philipp Wolfer 2024-01-14 17:14:05 +01:00
parent bace31471e
commit b2b5c69278
No known key found for this signature in database
GPG key ID: 8FDF744D4919943B
4 changed files with 110 additions and 2 deletions

View file

@ -20,6 +20,7 @@ import (
"strings"
"github.com/agnivade/levenshtein"
"go.uploadedlobster.com/scotty/internal/models"
"go.uploadedlobster.com/scotty/internal/util"
"golang.org/x/text/unicode/norm"
)
@ -42,14 +43,40 @@ func Similarity(s1 string, s2 string) float64 {
return 1.0 - (float64(dist) / float64(maxLen))
}
var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
var reMultiSpace = regexp.MustCompile(`\s+`)
var reIgnoredPatterns = []*regexp.Regexp{
regexp.MustCompile(`\s+\([^)]+\)$`),
regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`),
}
// Normalizes a track or release title.
func NormalizeTitle(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = reExtraTitleInfo.ReplaceAllString(s, "")
s = reMultiSpace.ReplaceAllString(s, " ")
for _, re := range reIgnoredPatterns {
s = re.ReplaceAllString(s, "")
}
return s
}
// Compare two tracks for similarity.
func CompareTracks(t1 models.Track, t2 models.Track) float64 {
// Identical recording MBID always compares 100%
if t1.RecordingMbid == t2.RecordingMbid && t1.RecordingMbid != "" {
return 1.0
}
// Compare track name and artist
sims := []float64{
Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)),
Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())),
}
// Compare release names only if they are set for both tracks
if t1.ReleaseName != "" && t2.ReleaseName != "" {
sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName)))
}
return util.Average(sims...)
}