METANOIA/handler/disc.go

630 lines
17 KiB
Go

package handler
import (
"encoding/binary"
"facette.io/natsort"
"fmt"
"git.gammaspectra.live/S.O.N.G/Hibiki/panako"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/specializedstore"
"git.gammaspectra.live/S.O.N.G/Kirika/audio"
"git.gammaspectra.live/S.O.N.G/Kirika/audio/format"
"git.gammaspectra.live/S.O.N.G/Kirika/audio/format/guess"
"git.gammaspectra.live/S.O.N.G/Kirika/audio/replaygain"
"git.gammaspectra.live/S.O.N.G/Kirika/hasher"
"git.gammaspectra.live/S.O.N.G/METANOIA/metadata"
"git.gammaspectra.live/S.O.N.G/METANOIA/utilities"
"github.com/dhowden/tag"
"github.com/oriser/regroup"
"io"
"io/ioutil"
"log"
"os"
"path"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"unicode"
)
type fileEntryList []fileEntry
const separatorTrimSet = ",.-_()[]{}"
func isSeparator(b byte) bool {
if b == ' ' {
return true
}
for i := 0; i < len(separatorTrimSet); i++ {
if separatorTrimSet[i] == b {
return true
}
}
return false
}
type analyzeEntry struct {
fileEntry
fileHandle *os.File
audioMetadata struct {
sampleRate int
channels int
samples int
}
replayGain struct {
albumGain float64
albumPeak float64
trackGain float64
trackPeak float64
}
fileMetadata tag.Metadata
panakoFingerprints []*panako.Fingerprint
hasherCrc32 *hasher.Hasher
hasherCueToolsCrc32 *hasher.Hasher
hasherAccurateRipV1 *hasher.Hasher
hasherAccurateRipV2 *hasher.Hasher
}
func (l fileEntryList) analyze(directory string, panakoInstance *panako.Instance) (entries []*analyzeEntry, fullCRC32 uint32, fullCTDBCRC32 uint32) {
var waitGroups []*sync.WaitGroup
printStrategy := panakoInstance.GetStrategy(specializedstore.NewMemoryStore(), audio.Linear)
var joinedCTDBChannels []format.AnalyzerChannel
var joinedChannels []format.AnalyzerChannel
var preLastTotalSamplesWaitGroup sync.WaitGroup
preLastTotalSamples := uint32(0)
var replayGainSources []audio.Source
for trackIndex, e := range l {
f, err := os.Open(path.Join(directory, e.Name))
if err != nil {
//TODO
log.Print(err)
continue
}
var source audio.Source
var analyzer format.AnalyzerChannel
meta, err := tag.ReadFrom(f)
if err != nil {
log.Print(err)
err = nil
}
f.Seek(0, io.SeekStart)
decoders, err := guess.GetDecoders(f, f.Name())
if err != nil { //cannot decode
//TODO
log.Print(err)
f.Close()
continue
}
if source, analyzer, err = guess.OpenAnalyzer(f, decoders); err != nil || source.Blocks == nil {
analyzer = nil
source, err = guess.Open(f, decoders)
}
if err != nil { //cannot decode
//TODO
log.Print(err)
f.Close()
continue
}
if source.Blocks == nil { //no known decoder
//TODO
log.Print(fmt.Errorf("no known decoder for %s", f.Name()))
f.Close()
continue
}
entry := &analyzeEntry{
fileEntry: e,
fileHandle: f,
fileMetadata: meta,
}
sources := source.Split(3)
var sinkWaitGroup sync.WaitGroup
sinkWaitGroup.Add(1)
preLastTotalSamplesWaitGroup.Add(1)
go func(add bool) {
defer sinkWaitGroup.Done()
defer preLastTotalSamplesWaitGroup.Done()
entry.audioMetadata.sampleRate = sources[0].SampleRate
entry.audioMetadata.channels = sources[0].Channels
var samples int
for block := range sources[0].Blocks {
samples += len(block) / sources[0].Channels
}
entry.audioMetadata.samples = samples
if add {
atomic.AddUint32(&preLastTotalSamples, uint32(samples))
}
}(trackIndex < len(l)-1)
sinkWaitGroup.Add(1)
go func(add bool) {
defer sinkWaitGroup.Done()
entry.panakoFingerprints = printStrategy.BlockChannelToFingerprints(sources[1].Blocks)
}(trackIndex < len(l)-1)
replayGainSources = append(replayGainSources, sources[2])
//TODO: handle extra appended/prepended silence
if analyzer != nil {
if trackIndex == 0 {
channels := analyzer.Split(4)
joinedChannels = append(joinedChannels, channels[0])
ctChannels := channels[1].SkipStartSamples(metadata.Int16SamplesPerSector * 10).Split(2)
joinedCTDBChannels = append(joinedCTDBChannels, ctChannels[0])
entry.hasherCueToolsCrc32 = hasher.NewHasher(ctChannels[1], hasher.HashtypeCrc32)
arChannels := channels[2].SkipStartSamples(metadata.Int16SamplesPerSector*5 - 1).Split(2)
entry.hasherAccurateRipV1 = hasher.NewHasher(arChannels[0], hasher.HashtypeAccurateRipV1Start)
entry.hasherAccurateRipV2 = hasher.NewHasher(arChannels[1], hasher.HashtypeAccurateRipV2Start)
entry.hasherCrc32 = hasher.NewHasher(channels[3], hasher.HashtypeCrc32)
} else if trackIndex == len(l)-1 {
channels := analyzer.Split(4)
joinedChannels = append(joinedChannels, channels[0])
ctChannels := channels[1].SkipEndSamplesMultiple(&preLastTotalSamplesWaitGroup, &preLastTotalSamples, metadata.Int16SamplesPerSector*10).Split(2)
joinedCTDBChannels = append(joinedCTDBChannels, ctChannels[0])
entry.hasherCueToolsCrc32 = hasher.NewHasher(ctChannels[1], hasher.HashtypeCrc32)
arChannels := channels[2].SkipEndSamples(metadata.Int16SamplesPerSector * 5).Split(2)
entry.hasherAccurateRipV1 = hasher.NewHasher(arChannels[0], hasher.HashtypeAccurateRipV1)
entry.hasherAccurateRipV2 = hasher.NewHasher(arChannels[1], hasher.HashtypeAccurateRipV2)
entry.hasherCrc32 = hasher.NewHasher(channels[3], hasher.HashtypeCrc32)
} else {
channels := analyzer.Split(5)
joinedChannels = append(joinedChannels, channels[0])
joinedCTDBChannels = append(joinedCTDBChannels, channels[1])
entry.hasherCrc32 = hasher.NewHasher(channels[2], hasher.HashtypeCrc32)
entry.hasherAccurateRipV1 = hasher.NewHasher(channels[3], hasher.HashtypeAccurateRipV1)
entry.hasherAccurateRipV2 = hasher.NewHasher(channels[4], hasher.HashtypeAccurateRipV2)
}
waitGroups = append(waitGroups, entry.hasherCrc32.GetWaitGroup(), entry.hasherAccurateRipV1.GetWaitGroup(), entry.hasherAccurateRipV2.GetWaitGroup())
if entry.hasherCueToolsCrc32 != nil {
waitGroups = append(waitGroups, entry.hasherCueToolsCrc32.GetWaitGroup())
}
}
waitGroups = append(waitGroups, &sinkWaitGroup)
entries = append(entries, entry)
}
var rgwg sync.WaitGroup
rgwg.Add(1)
go func() {
defer rgwg.Done()
albumGain, albumPeak, trackGains, trackPeaks, err := replaygain.GetAlbumReplayGain(replayGainSources)
if err != nil {
return
}
for i, e := range entries {
e.replayGain.albumGain = albumGain
e.replayGain.albumPeak = albumPeak
e.replayGain.trackGain = trackGains[i]
e.replayGain.trackPeak = trackPeaks[i]
}
}()
waitGroups = append(waitGroups, &rgwg)
fullHasher := hasher.NewHasher(format.MergeHasherChannels(joinedChannels...), hasher.HashtypeCrc32)
fullCTDBHasher := hasher.NewHasher(format.MergeHasherChannels(joinedCTDBChannels...), hasher.HashtypeCrc32)
fullHasher.Wait()
fullCTDBHasher.Wait()
fullCRC32 = binary.BigEndian.Uint32(fullHasher.GetResult())
fullCTDBCRC32 = binary.BigEndian.Uint32(fullCTDBHasher.GetResult())
//Wait for all tasks
for _, wg := range waitGroups {
wg.Wait()
}
return
}
type fileEntry struct {
Name string
NormalizedSortName string
NormalizedName string
}
func processAudioFiles(files []string) (result fileEntryList) {
result = make(fileEntryList, 0, len(files))
for _, f := range files {
normalized := utilities.NormalizeUnicode(f)
ext := strings.LastIndex(normalized, ".")
for k := 0; k < ext; k++ {
index := strings.IndexFunc(normalized[k:], unicode.IsNumber)
if index == -1 {
//wtf, no numbers?
result = append(result, fileEntry{
Name: f,
NormalizedSortName: strings.TrimSpace(strings.TrimLeft(strings.TrimSpace(normalized[:ext]), separatorTrimSet)),
NormalizedName: strings.TrimSpace(strings.TrimLeft(strings.TrimSpace(normalized[:ext]), separatorTrimSet)),
})
break
}
index += k
if index == 0 || isSeparator(normalized[index-1]) { //If it's start of string or prefixed by a space
normalized = normalized[index:ext]
firstNotNumber := strings.IndexFunc(normalized, func(r rune) bool {
return !unicode.IsNumber(r)
})
r := fileEntry{
Name: f,
NormalizedSortName: strings.TrimSpace(strings.TrimLeft(strings.TrimSpace(normalized), separatorTrimSet)),
NormalizedName: strings.TrimSpace(strings.TrimLeft(strings.TrimSpace(normalized[firstNotNumber:]), separatorTrimSet)),
}
result = append(result, r)
break
}
k = index
}
}
//Sort files naturally
sort.SliceStable(result, func(i, j int) bool {
return natsort.Compare(result[i].NormalizedSortName, result[j].NormalizedSortName)
})
return
}
type DiscHandlerResult struct {
TOC metadata.TOC
CRC32 uint32
CueToolsCRC32 uint32
Directory string
Tracks []DiscHandlerTrack
CommonMetadata map[string]string
Identifiers []metadata.Name
Album string
}
type DiscHandlerTrack struct {
FileName string
TrackName string
SortName string
Fingerprints struct {
Panako []*panako.Fingerprint
CRC32 uint32
CueToolsCRC32 uint32
AccurateRipV1 uint32
AccurateRipV2 uint32
}
FileMetadata struct {
DiscNumber int
Artists []metadata.Name
Album string
Year int
TrackNumber int
Title string
OriginalTitle string
Lyrics string
EmbeddedPicture []byte
}
AudioMetadata struct {
SampleRate int
Channels int
NumberOfFullSamples int
Duration time.Duration
}
}
func HandleDiscEntry(panakoInstance *panako.Instance, pathEntry string) *DiscHandlerResult {
log.Printf("Handling %q", pathEntry)
entries, err := ioutil.ReadDir(pathEntry)
if err != nil {
return nil
}
var audioFiles []string
var imageFiles []string
var metadataFiles []string
var folders []string
for _, entry := range entries {
if !entry.IsDir() {
ext := path.Ext(entry.Name())
mime := utilities.GetMimeTypeFromExtension(ext)
isAudio := mime[0:6] == "audio/"
isAudioMetadata := mime == "text/x-log" || mime == "text/x-accurip" || mime == "text/x-cue" || mime == "text/x-toc"
isImage := mime[0:6] == "image/"
if isAudio {
audioFiles = append(audioFiles, entry.Name())
} else if isImage {
imageFiles = append(imageFiles, entry.Name())
} else if isAudioMetadata {
metadataFiles = append(metadataFiles, entry.Name())
}
} else {
folders = append(folders, entry.Name())
}
}
if len(audioFiles) == 0 {
return nil
}
sortedAudioEntries := processAudioFiles(audioFiles)
disc := &DiscHandlerResult{
Directory: pathEntry,
TOC: metadata.TOC{metadata.TocPregap},
CommonMetadata: make(map[string]string),
}
result, fullCRC32, fullCTDBCRC32 := sortedAudioEntries.analyze(pathEntry, panakoInstance)
defer func() {
for _, entry := range result {
entry.fileHandle.Close()
}
}()
disc.CRC32 = fullCRC32
disc.CueToolsCRC32 = fullCTDBCRC32
for _, entry := range result {
track := DiscHandlerTrack{
FileName: entry.Name,
TrackName: entry.NormalizedName,
SortName: entry.NormalizedSortName,
}
track.AudioMetadata.SampleRate = entry.audioMetadata.sampleRate
track.AudioMetadata.Channels = entry.audioMetadata.channels
track.AudioMetadata.NumberOfFullSamples = entry.audioMetadata.samples
track.AudioMetadata.Duration = time.Duration(float64(time.Second) * float64(track.AudioMetadata.NumberOfFullSamples) / float64(track.AudioMetadata.SampleRate))
track.Fingerprints.Panako = entry.panakoFingerprints
disc.TOC = append(disc.TOC, disc.TOC[len(disc.TOC)-1]+track.AudioMetadata.NumberOfFullSamples/metadata.Int16SamplesPerSector)
if entry.hasherCrc32 != nil {
track.Fingerprints.CRC32 = binary.BigEndian.Uint32(entry.hasherCrc32.GetResult())
track.Fingerprints.CueToolsCRC32 = track.Fingerprints.CRC32
}
if entry.hasherCueToolsCrc32 != nil {
track.Fingerprints.CueToolsCRC32 = binary.BigEndian.Uint32(entry.hasherCueToolsCrc32.GetResult())
}
if entry.hasherAccurateRipV1 != nil {
track.Fingerprints.AccurateRipV1 = binary.BigEndian.Uint32(entry.hasherAccurateRipV1.GetResult())
}
if entry.hasherAccurateRipV2 != nil {
track.Fingerprints.AccurateRipV2 = binary.BigEndian.Uint32(entry.hasherAccurateRipV2.GetResult())
}
track.FileMetadata.DiscNumber, _ = entry.fileMetadata.Disc()
track.FileMetadata.TrackNumber, _ = entry.fileMetadata.Track()
track.FileMetadata.Year = entry.fileMetadata.Year()
if entry.fileMetadata.Artist() != "" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "artist",
Name: entry.fileMetadata.Artist(),
})
}
if entry.fileMetadata.AlbumArtist() != "" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "albumartist",
Name: entry.fileMetadata.AlbumArtist(),
})
}
if entry.fileMetadata.Composer() != "" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "composer",
Name: entry.fileMetadata.Composer(),
})
}
rawValues := entry.fileMetadata.Raw()
for k, v := range rawValues {
str, ok := v.(string)
if !ok {
var number int
number, ok = v.(int)
if ok {
str = fmt.Sprint(number)
}
}
if ok && len(str) > 0 {
if k == "mastering" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "mastering",
Name: str,
})
}
if k == "lyricist" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "lyrics",
Name: str,
})
}
if k == "guitar" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "performer, guitar",
Name: str,
})
}
if k == "arrange" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "arranger",
Name: str,
})
}
if k == "vocal" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "vocals",
Name: str,
})
}
if k == "chorus" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "vocals",
Name: str,
})
}
if k == "performer" {
track.FileMetadata.Artists = append(track.FileMetadata.Artists, metadata.Name{
Kind: "performer",
Name: str,
})
}
if k == "originaltitle" {
track.FileMetadata.OriginalTitle = str
}
if k == "unsyncedlyrics" {
track.FileMetadata.Lyrics = str
}
value, exists := disc.CommonMetadata[k]
if !exists {
disc.CommonMetadata[k] = str
} else if len(value) > 0 && value != str {
disc.CommonMetadata[k] = ""
}
}
}
for k := range disc.CommonMetadata {
_, exists := rawValues[k]
if !exists {
disc.CommonMetadata[k] = ""
}
}
track.FileMetadata.Album = entry.fileMetadata.Album()
track.FileMetadata.Title = entry.fileMetadata.Title()
if entry.fileMetadata.Picture() != nil {
track.FileMetadata.EmbeddedPicture = entry.fileMetadata.Picture().Data
}
disc.Tracks = append(disc.Tracks, track)
}
for k, v := range disc.CommonMetadata {
if v == "" {
delete(disc.CommonMetadata, k)
}
}
if catno, ok := disc.CommonMetadata["catalogid"]; ok {
for _, n := range strings.Split(catno, ";") {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "catalog",
Name: n,
})
}
}
if catno, ok := disc.CommonMetadata["catalognumber"]; ok {
for _, n := range strings.Split(catno, ";") {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "catalog",
Name: n,
})
}
}
if catno, ok := disc.CommonMetadata["labelno"]; ok {
for _, n := range strings.Split(catno, ";") {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "catalog",
Name: n,
})
}
}
album, ok := disc.CommonMetadata["TALB"] //ID3v2
if ok {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "album",
Name: album,
})
disc.Album = album
}
album, ok = disc.CommonMetadata["album"]
if ok {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "album",
Name: album,
})
disc.Album = album
}
discid, ok := disc.CommonMetadata["discid"]
if ok {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "discid",
Name: discid,
})
}
cdtoc, ok := disc.CommonMetadata["cdtoc"]
if ok {
toc := metadata.TOC{}
for _, v := range strings.Split(cdtoc, "+")[1:] {
number, err := strconv.ParseInt(v, 16, 0)
if err != nil {
toc = metadata.TOC{}
break
}
toc = append(toc, int(number))
}
if len(toc) > 0 {
toc = append(metadata.TOC{toc[len(toc)-1]}, toc[0:len(toc)-1]...)
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "toc",
Name: toc.String(),
})
}
}
catalogRE := regroup.MustCompile(`(?i)[\[\(\{](?P<catno>(?:[a-z]{2,}-?[0-9][a-z0-9\-~]*)|(?:[0-9 ]{5,}))[\}\)\]]`)
m := &struct {
CatalogNumber string `regroup:"catno"`
}{}
err = catalogRE.MatchToTarget(utilities.NormalizeUnicode(disc.Directory), m)
if err == nil {
disc.Identifiers = append(disc.Identifiers, metadata.Name{
Kind: "catalog",
Name: m.CatalogNumber,
})
}
disc.TOC = append(metadata.TOC{disc.TOC[len(disc.TOC)-1]}, disc.TOC[0:len(disc.TOC)-1]...)
return disc
}