Hibiki/panako/processor.go

256 lines
7.3 KiB
Go

package panako
import "C"
import (
"git.gammaspectra.live/S.O.N.G/Hibiki/cgo"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
"git.gammaspectra.live/S.O.N.G/Kirika/audio"
"git.gammaspectra.live/S.O.N.G/goborator"
"runtime"
"unsafe"
)
type EventPointProcessor struct {
audio.Sink
instance *Instance
magnitudes [][]float32
maxMagnitudes [][]float32
magnitudesIndex int
previousMaxMagnitudes map[int][]float32
previousMagnitudes map[int][]float32
eventPoints []EventPoint
fingerprints []Fingerprint
analysisFrameIndex int
maxFilterVertical *cgo.LemireMaxClampedFilter
maxFilterWindowSizeFrequency int
maxFilterWindowSizeTime int
maxHorizontal []float32
bandNumber int
gaborator *goborator.Gaborator
pinner runtime.Pinner
}
func NewEventPointProcessor(instance *Instance) (*EventPointProcessor, error) {
ob := &EventPointProcessor{
instance: instance,
magnitudesIndex: 0,
previousMaxMagnitudes: make(map[int][]float32),
previousMagnitudes: make(map[int][]float32),
analysisFrameIndex: 0,
maxFilterWindowSizeFrequency: instance.PointFilterMaximumFrequencyFilterSize,
maxFilterWindowSizeTime: instance.PointFilterMaximumTimeFilterSize,
eventPoints: make([]EventPoint, 0, 8196),
fingerprints: make([]Fingerprint, 0, 8196),
}
ob.gaborator = goborator.NewGaborator(
instance.BlockSize,
float64(instance.SampleRate),
instance.GetTransformBandsPerOctave(),
float64(instance.GetTransformMinimumFrequency()),
float64(instance.GetTransformMaximumFrequency()),
float64(instance.GetTransformReferenceFrequency()),
instance.GetTransformTimeResolutionInSamples(),
)
ob.bandNumber = ob.gaborator.GetNumberOfBands()
ob.magnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
ob.maxMagnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
for i := range ob.maxMagnitudes {
ob.maxMagnitudes[i] = make([]float32, ob.bandNumber)
ob.pinner.Pin(unsafe.SliceData(ob.maxMagnitudes[i]))
}
ob.maxHorizontal = make([]float32, ob.bandNumber)
var err error
ob.maxFilterVertical, err = cgo.NewLemireMaxClampedFilter(ob.maxFilterWindowSizeFrequency, ob.bandNumber)
if err != nil {
return nil, err
}
return ob, nil
}
func (e *EventPointProcessor) GetMagnitudes() []float32 {
return e.magnitudes[e.magnitudesIndex]
}
func (e *EventPointProcessor) GetFingerprints() []Fingerprint {
return e.fingerprints
}
func (e *EventPointProcessor) GetEventPoints() []EventPoint {
return e.eventPoints
}
func (e *EventPointProcessor) Process(source audio.Source) error {
return e.ProcessBlockChannel(source.ToFloat32().GetBlocks())
}
func (e *EventPointProcessor) ProcessBlockChannel(channel chan []float32) error {
err := e.gaborator.GaborBlockTransform(channel, e.processResult)
if err != nil {
return err
}
e.processResultFinish()
return nil
}
func (e *EventPointProcessor) ProcessingFinished() {
e.gaborator.ProcessingFinished()
if e.maxFilterVertical != nil {
e.maxFilterVertical.Close()
e.maxFilterVertical = nil
}
e.pinner.Unpin()
}
func (e *EventPointProcessor) GetLatency() int64 {
return e.gaborator.GetLatency()
}
func (e *EventPointProcessor) processResult(currentMagnitudes []float32) {
e.magnitudes[e.magnitudesIndex] = currentMagnitudes
//store the frame magnitudes
e.previousMagnitudes[e.analysisFrameIndex] = e.magnitudes[e.magnitudesIndex]
e.pinner.Pin(unsafe.SliceData(e.magnitudes[e.magnitudesIndex]))
//run a max filter over frequency bins
e.maxFilterVertical.Filter(e.magnitudes[e.magnitudesIndex], e.maxMagnitudes[e.magnitudesIndex])
//store the max filtered frequency bins
e.previousMaxMagnitudes[e.analysisFrameIndex] = e.maxMagnitudes[e.magnitudesIndex]
//find the horizontal maxima
if len(e.previousMaxMagnitudes) == e.maxFilterWindowSizeTime {
time := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
maxFrame := e.previousMaxMagnitudes[time]
frameMagnitudes := e.previousMagnitudes[time]
for frequency := 2; frequency < len(frameMagnitudes)-1; frequency++ {
maxVal := maxFrame[frequency]
currentVal := frameMagnitudes[frequency]
if maxVal == currentVal && currentVal != 0 {
e.horizontalFilter(frequency)
maxVal = e.maxHorizontal[frequency]
if currentVal == maxVal {
prevFrameMagnitudes := e.previousMagnitudes[time-1]
nextFrameMagnitudes := e.previousMagnitudes[time+1]
//add the magnitude of surrounding bins for magnitude estimates more robust against discretization effects
totalMagnitude := frameMagnitudes[frequency] + prevFrameMagnitudes[frequency] + nextFrameMagnitudes[frequency] +
frameMagnitudes[frequency+1] + prevFrameMagnitudes[frequency+1] + nextFrameMagnitudes[frequency+1] +
frameMagnitudes[frequency-1] + prevFrameMagnitudes[frequency-1] + nextFrameMagnitudes[frequency-1]
e.eventPoints = append(e.eventPoints, EventPoint{
Time: uint32(time),
Frequency: uint32(frequency),
Magnitude: totalMagnitude,
})
}
}
}
//Remove analysis frames that are not needed any more:
ix := e.analysisFrameIndex - e.maxFilterWindowSizeTime + 1
delete(e.previousMaxMagnitudes, ix)
delete(e.previousMagnitudes, ix)
}
//magnitude index counter
e.magnitudesIndex++
if e.magnitudesIndex == len(e.magnitudes) {
e.magnitudesIndex = 0
}
//Increment analysis frame counter
e.analysisFrameIndex++
}
func (e *EventPointProcessor) processResultFinish() {
e.packEventPointsIntoFingerprints()
e.ProcessingFinished()
}
func (e *EventPointProcessor) horizontalFilter(j int) {
e.maxHorizontal[j] = -1000
centerFrameIndex := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
startFrameIndex := centerFrameIndex - e.maxFilterWindowSizeTime/2
stopFrameIndex := centerFrameIndex + e.maxFilterWindowSizeTime/2
// Run a horizontal max filter
for i := startFrameIndex; i < stopFrameIndex; i++ {
maxFrame := e.previousMaxMagnitudes[i]
if maxFrame[j] > e.maxHorizontal[j] {
e.maxHorizontal[j] = maxFrame[j]
}
}
}
func (e *EventPointProcessor) packEventPointsIntoFingerprints() {
minFreqDistance := int64(e.instance.PointFilterMinimumFrequencyDistance)
maxFreqDistance := int64(e.instance.PointFilterMaximumFrequencyDistance)
minTimeDistance := int64(e.instance.PointFilterMinimumTimeDistance)
maxTimeDistance := int64(e.instance.PointFilterMaximumTimeDistance)
for i := range e.eventPoints {
p1 := &e.eventPoints[i]
index2 := i + 1
for j := range e.eventPoints[index2:] {
p2 := &e.eventPoints[j]
fDiff := utilities.AbsInt64(int64(p1.Frequency) - int64(p2.Frequency))
tDiff := int64(p2.Time) - int64(p1.Time)
if tDiff > maxTimeDistance {
break
}
if tDiff < minTimeDistance {
continue
}
if fDiff < minFreqDistance {
continue
}
if fDiff > maxFreqDistance {
continue
}
index3 := index2 + j + 1
for k := range e.eventPoints[index3:] {
p3 := &e.eventPoints[k]
fDiff = utilities.AbsInt64(int64(p2.Frequency) - int64(p3.Frequency))
tDiff = int64(p3.Time) - int64(p2.Time)
if tDiff > maxTimeDistance {
break
}
if tDiff < minTimeDistance {
continue
}
if fDiff < minFreqDistance {
continue
}
if fDiff > maxFreqDistance {
continue
}
e.fingerprints = append(e.fingerprints, NewFingerprint(p1, p2, p3))
}
}
}
}