/* Copyright © 2024 Philipp Wolfer Scotty is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Scotty is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Scotty. If not, see . */ package similarity import ( "regexp" "strings" "github.com/agnivade/levenshtein" "go.uploadedlobster.com/scotty/internal/util" "golang.org/x/text/unicode/norm" ) // Returns the Levensthein distance between s1 and s2 relative to the length of // the longer string. // Unicode normalization on the strings is performed. func Similarity(s1 string, s2 string) float64 { s1 = norm.NFKC.String(s1) s2 = norm.NFKC.String(s2) l1 := len([]rune(s1)) l2 := len([]rune(s2)) maxLen := util.Max(l1, l2) // Empty strings always compare full equal if maxLen == 0 { return 1.0 } dist := levenshtein.ComputeDistance(s1, s2) // fmt.Printf("%v (%v) ~ %v (%v) = %v\n", s1, l1, s2, l2, dist) return 1.0 - (float64(dist) / float64(maxLen)) } var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`) var reMultiSpace = regexp.MustCompile(`\s+`) // Normalizes a track or release title. func NormalizeTitle(s string) string { s = strings.TrimSpace(s) s = strings.ToLower(s) s = reExtraTitleInfo.ReplaceAllString(s, "") s = reMultiSpace.ReplaceAllString(s, " ") return s }