Implemented directory mode for listenbrainz-archive

This commit is contained in:
Philipp Wolfer 2025-05-24 00:21:46 +02:00
parent 92e7216fac
commit 424305518b
No known key found for this signature in database
GPG key ID: 8FDF744D4919943B

View file

@ -28,6 +28,7 @@ import (
"io"
"iter"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
@ -51,7 +52,7 @@ func (a *Archive) Close() error {
// Read the user information from the archive.
func (a *Archive) UserInfo() (UserInfo, error) {
f, err := a.backend.OpenUserInfoFile()
f, err := a.backend.OpenFile("user.json")
if err != nil {
return UserInfo{}, err
}
@ -67,11 +68,43 @@ func (a *Archive) UserInfo() (UserInfo, error) {
return userInfo, nil
}
func (a *Archive) ListListenExports() ([]ListenExportFileInfo, error) {
re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`)
result := make([]ListenExportFileInfo, 0)
files, err := a.backend.Glob("listens/*/*.jsonl")
if err != nil {
return nil, err
}
for _, file := range files {
match := re.FindStringSubmatch(file.Name)
if match == nil {
continue
}
year := match[1]
month := match[2]
times, err := getMonthTimeRange(year, month)
if err != nil {
return nil, err
}
info := ListenExportFileInfo{
Name: file.Name,
TimeRange: *times,
f: file.File,
}
result = append(result, info)
}
return result, nil
}
// Yields all listens from the archive that are newer than the given timestamp.
// The listens are yielded in ascending order of their listened_at timestamp.
func (a *Archive) IterListens(minTimestamp time.Time) iter.Seq2[Listen, error] {
return func(yield func(Listen, error) bool) {
files, err := a.backend.ListListenExports()
files, err := a.ListListenExports()
if err != nil {
yield(Listen{}, err)
return
@ -119,8 +152,12 @@ func OpenArchive(path string) (*Archive, error) {
}
return &Archive{backend: backend}, nil
case mode.IsDir():
// TODO: Implement directory mode
return nil, fmt.Errorf("directory mode not implemented")
backend := &dirArchive{}
err := backend.Open(path)
if err != nil {
return nil, err
}
return &Archive{backend: backend}, nil
default:
return nil, fmt.Errorf("unsupported file mode: %s", mode)
}
@ -133,8 +170,8 @@ type UserInfo struct {
type archiveBackend interface {
Close() error
OpenUserInfoFile() (io.ReadCloser, error)
ListListenExports() ([]ListenExportFileInfo, error)
OpenFile(path string) (io.ReadCloser, error)
Glob(pattern string) ([]FileInfo, error)
}
type timeRange struct {
@ -142,16 +179,30 @@ type timeRange struct {
End time.Time
}
type openableFile interface {
type OpenableFile interface {
Open() (io.ReadCloser, error)
}
type FileInfo struct {
Name string
File OpenableFile
}
type FilesystemFile struct {
path string
}
func (f *FilesystemFile) Open() (io.ReadCloser, error) {
return os.Open(f.path)
}
type ListenExportFileInfo struct {
Name string
TimeRange timeRange
f openableFile
f OpenableFile
}
// An implementation of the archiveBackend interface for zip files.
type zipArchive struct {
zip *zip.ReadCloser
}
@ -172,34 +223,68 @@ func (a *zipArchive) Close() error {
return a.zip.Close()
}
func (a *zipArchive) OpenUserInfoFile() (io.ReadCloser, error) {
file, err := a.zip.Open("user.json")
func (a *zipArchive) Glob(pattern string) ([]FileInfo, error) {
result := make([]FileInfo, 0)
for _, file := range a.zip.File {
if matched, err := filepath.Match(pattern, file.Name); matched {
if err != nil {
return nil, err
}
info := FileInfo{
Name: file.Name,
File: file,
}
result = append(result, info)
}
}
return result, nil
}
func (a *zipArchive) OpenFile(path string) (io.ReadCloser, error) {
file, err := a.zip.Open(path)
if err != nil {
return nil, err
}
return file, nil
}
func (a *zipArchive) ListListenExports() ([]ListenExportFileInfo, error) {
re := regexp.MustCompile(`^listens/(\d{4})/(\d{1,2})\.jsonl$`)
result := make([]ListenExportFileInfo, 0)
// An implementation of the archiveBackend interface for directories.
type dirArchive struct {
dir string
}
for _, file := range a.zip.File {
match := re.FindStringSubmatch(file.Name)
if match == nil {
continue
}
func (a *dirArchive) Open(path string) error {
a.dir = filepath.Clean(path)
return nil
}
year := match[1]
month := match[2]
times, err := getMonthTimeRange(year, month)
func (a *dirArchive) Close() error {
return nil
}
func (a *dirArchive) OpenFile(path string) (io.ReadCloser, error) {
file, err := os.Open(filepath.Join(a.dir, path))
if err != nil {
return nil, err
}
return file, nil
}
func (a *dirArchive) Glob(pattern string) ([]FileInfo, error) {
files, err := filepath.Glob(filepath.Join(a.dir, pattern))
if err != nil {
return nil, err
}
result := make([]FileInfo, 0)
for _, filename := range files {
name, err := filepath.Rel(a.dir, filename)
if err != nil {
return nil, err
}
info := ListenExportFileInfo{
Name: file.Name,
TimeRange: *times,
f: file,
info := FileInfo{
Name: name,
File: &FilesystemFile{path: filename},
}
result = append(result, info)
}
@ -208,10 +293,10 @@ func (a *zipArchive) ListListenExports() ([]ListenExportFileInfo, error) {
}
type ListenExportFile struct {
file openableFile
file OpenableFile
}
func NewExportFile(f openableFile) ListenExportFile {
func NewExportFile(f OpenableFile) ListenExportFile {
return ListenExportFile{file: f}
}