mirror of
https://git.sr.ht/~phw/scotty
synced 2025-04-16 10:09:28 +02:00
New similarity module to help with comparing track titles
This commit is contained in:
parent
d9d83a4282
commit
bace31471e
4 changed files with 111 additions and 0 deletions
1
go.mod
1
go.mod
|
@ -28,6 +28,7 @@ require (
|
||||||
require (
|
require (
|
||||||
github.com/VividCortex/ewma v1.2.0 // indirect
|
github.com/VividCortex/ewma v1.2.0 // indirect
|
||||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
|
||||||
|
github.com/agnivade/levenshtein v1.1.1 // indirect
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
|
||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -44,6 +44,9 @@ github.com/Xuanwo/go-locale v1.1.0 h1:51gUxhxl66oXAjI9uPGb2O0qwPECpriKQb2hl35mQk
|
||||||
github.com/Xuanwo/go-locale v1.1.0/go.mod h1:UKrHoZB3FPIk9wIG2/tVSobnHgNnceGSH3Y8DY5cASs=
|
github.com/Xuanwo/go-locale v1.1.0/go.mod h1:UKrHoZB3FPIk9wIG2/tVSobnHgNnceGSH3Y8DY5cASs=
|
||||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
|
||||||
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
|
||||||
|
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
|
||||||
|
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
|
||||||
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||||
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
||||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||||
|
@ -64,6 +67,7 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
|
||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/delucks/go-subsonic v0.0.0-20220915164742-2744002c4be5 h1:RuuxidatioSKGOiBzL1mTY4X22DQD8weEbS3iRLHnAg=
|
github.com/delucks/go-subsonic v0.0.0-20220915164742-2744002c4be5 h1:RuuxidatioSKGOiBzL1mTY4X22DQD8weEbS3iRLHnAg=
|
||||||
github.com/delucks/go-subsonic v0.0.0-20220915164742-2744002c4be5/go.mod h1:vnbEuj6Z20PLcHB4rrLQAOXGMjtULfMGhRVSFPcSdUo=
|
github.com/delucks/go-subsonic v0.0.0-20220915164742-2744002c4be5/go.mod h1:vnbEuj6Z20PLcHB4rrLQAOXGMjtULfMGhRVSFPcSdUo=
|
||||||
|
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||||
|
|
55
internal/similarity/similarity.go
Normal file
55
internal/similarity/similarity.go
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
Copyright © 2024 Philipp Wolfer <phw@uploadedlobster.com>
|
||||||
|
|
||||||
|
Scotty is free software: you can redistribute it and/or modify it under the
|
||||||
|
terms of the GNU General Public License as published by the Free Software
|
||||||
|
Foundation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
Scotty is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along with
|
||||||
|
Scotty. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package similarity
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/agnivade/levenshtein"
|
||||||
|
"go.uploadedlobster.com/scotty/internal/util"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Returns the Levensthein distance between s1 and s2 relative to the length of
|
||||||
|
// the longer string.
|
||||||
|
// Unicode normalization on the strings is performed.
|
||||||
|
func Similarity(s1 string, s2 string) float64 {
|
||||||
|
s1 = norm.NFKC.String(s1)
|
||||||
|
s2 = norm.NFKC.String(s2)
|
||||||
|
l1 := len([]rune(s1))
|
||||||
|
l2 := len([]rune(s2))
|
||||||
|
maxLen := util.Max(l1, l2)
|
||||||
|
// Empty strings always compare full equal
|
||||||
|
if maxLen == 0 {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
dist := levenshtein.ComputeDistance(s1, s2)
|
||||||
|
// fmt.Printf("%v (%v) ~ %v (%v) = %v\n", s1, l1, s2, l2, dist)
|
||||||
|
return 1.0 - (float64(dist) / float64(maxLen))
|
||||||
|
}
|
||||||
|
|
||||||
|
var reExtraTitleInfo = regexp.MustCompile(`\([^)]+\)$`)
|
||||||
|
var reMultiSpace = regexp.MustCompile(`\s+`)
|
||||||
|
|
||||||
|
// Normalizes a track or release title.
|
||||||
|
func NormalizeTitle(s string) string {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
s = reExtraTitleInfo.ReplaceAllString(s, "")
|
||||||
|
s = reMultiSpace.ReplaceAllString(s, " ")
|
||||||
|
return s
|
||||||
|
}
|
51
internal/similarity/similarity_test.go
Normal file
51
internal/similarity/similarity_test.go
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
Copyright © 2024 Philipp Wolfer <phw@uploadedlobster.com>
|
||||||
|
|
||||||
|
Scotty is free software: you can redistribute it and/or modify it under the
|
||||||
|
terms of the GNU General Public License as published by the Free Software
|
||||||
|
Foundation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
Scotty is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along with
|
||||||
|
Scotty. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package similarity_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"go.uploadedlobster.com/scotty/internal/similarity"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleSimilarity() {
|
||||||
|
s := similarity.Similarity("bar1", "bär1")
|
||||||
|
fmt.Println(s)
|
||||||
|
// Output: 0.75
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSimilarity(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
assert.Equal(1.0, similarity.Similarity("", ""))
|
||||||
|
assert.Equal(0.0, similarity.Similarity("foo", ""))
|
||||||
|
assert.Equal(0.0, similarity.Similarity("foo", "bar"))
|
||||||
|
assert.Equal(0.5, similarity.Similarity("foobar", "bar"))
|
||||||
|
assert.Equal(1.0, similarity.Similarity("foo", "foo"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleNormalizeTitle() {
|
||||||
|
s := similarity.NormalizeTitle(" Forever \tFailure (video edit) ")
|
||||||
|
fmt.Println(s)
|
||||||
|
// Output: forever failure
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeTitle(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
assert.Equal("forever failure", similarity.NormalizeTitle("Forever Failure"))
|
||||||
|
assert.Equal("foo", similarity.NormalizeTitle(" \tfoo\t \t"))
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue