scotty/internal/similarity/similarity.go

55 lines
1.7 KiB
Go

/*
Copyright © 2024 Philipp Wolfer <phw@uploadedlobster.com>
Scotty is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later version.
Scotty is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
Scotty. If not, see <https://www.gnu.org/licenses/>.
*/
package similarity
import (
"regexp"
"strings"
"github.com/agnivade/levenshtein"
"go.uploadedlobster.com/scotty/internal/util"
"golang.org/x/text/unicode/norm"
)
// Returns the Levensthein distance between s1 and s2 relative to the length of
// the longer string.
// Unicode normalization on the strings is performed.
func Similarity(s1 string, s2 string) float64 {
s1 = norm.NFKC.String(s1)
s2 = norm.NFKC.String(s2)
l1 := len([]rune(s1))
l2 := len([]rune(s2))
maxLen := util.Max(l1, l2)
// Empty strings always compare full equal
if maxLen == 0 {
return 1.0
}
dist := levenshtein.ComputeDistance(s1, s2)
// fmt.Printf("%v (%v) ~ %v (%v) = %v\n", s1, l1, s2, l2, dist)
return 1.0 - (float64(dist) / float64(maxLen))
}
var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
var reMultiSpace = regexp.MustCompile(`\s+`)
// Normalizes a track or release title.
func NormalizeTitle(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = reExtraTitleInfo.ReplaceAllString(s, "")
s = reMultiSpace.ReplaceAllString(s, " ")
return s
}