mirror of
https://git.sr.ht/~phw/scotty
synced 2025-04-08 06:39:28 +02:00
82 lines
2.5 KiB
Go
82 lines
2.5 KiB
Go
/*
|
|
Copyright © 2024 Philipp Wolfer <phw@uploadedlobster.com>
|
|
|
|
Scotty is free software: you can redistribute it and/or modify it under the
|
|
terms of the GNU General Public License as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option) any later version.
|
|
|
|
Scotty is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
Scotty. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package similarity
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/agnivade/levenshtein"
|
|
"go.uploadedlobster.com/scotty/internal/models"
|
|
"go.uploadedlobster.com/scotty/internal/util"
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
// Returns the Levensthein distance between s1 and s2 relative to the length of
|
|
// the longer string.
|
|
// Unicode normalization on the strings is performed.
|
|
func Similarity(s1 string, s2 string) float64 {
|
|
s1 = norm.NFKC.String(s1)
|
|
s2 = norm.NFKC.String(s2)
|
|
l1 := len([]rune(s1))
|
|
l2 := len([]rune(s2))
|
|
maxLen := max(l1, l2)
|
|
// Empty strings always compare full equal
|
|
if maxLen == 0 {
|
|
return 1.0
|
|
}
|
|
dist := levenshtein.ComputeDistance(s1, s2)
|
|
// fmt.Printf("%v (%v) ~ %v (%v) = %v\n", s1, l1, s2, l2, dist)
|
|
return 1.0 - (float64(dist) / float64(maxLen))
|
|
}
|
|
|
|
var reMultiSpace = regexp.MustCompile(`\s+`)
|
|
var reIgnoredPatterns = []*regexp.Regexp{
|
|
regexp.MustCompile(`\s+\([^)]+\)$`),
|
|
regexp.MustCompile(`\s+- (\d{4} )?remaster(ed)?$`),
|
|
}
|
|
|
|
// Normalizes a track or release title.
|
|
func NormalizeTitle(s string) string {
|
|
s = strings.TrimSpace(s)
|
|
s = strings.ToLower(s)
|
|
s = reMultiSpace.ReplaceAllString(s, " ")
|
|
for _, re := range reIgnoredPatterns {
|
|
s = re.ReplaceAllString(s, "")
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Compare two tracks for similarity.
|
|
func CompareTracks(t1 models.Track, t2 models.Track) float64 {
|
|
// Identical recording MBID always compares 100%
|
|
if t1.RecordingMBID == t2.RecordingMBID && t1.RecordingMBID != "" {
|
|
return 1.0
|
|
}
|
|
|
|
// Compare track name and artist
|
|
sims := []float64{
|
|
Similarity(NormalizeTitle(t1.TrackName), NormalizeTitle(t2.TrackName)),
|
|
Similarity(NormalizeTitle(t1.ArtistName()), NormalizeTitle(t2.ArtistName())),
|
|
}
|
|
|
|
// Compare release names only if they are set for both tracks
|
|
if t1.ReleaseName != "" && t2.ReleaseName != "" {
|
|
sims = append(sims, Similarity(NormalizeTitle(t1.ReleaseName), NormalizeTitle(t2.ReleaseName)))
|
|
}
|
|
|
|
return util.Average(sims...)
|
|
}
|