256 lines
7.3 KiB
Go
256 lines
7.3 KiB
Go
package panako
|
|
|
|
import "C"
|
|
import (
|
|
"git.gammaspectra.live/S.O.N.G/Hibiki/cgo"
|
|
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
|
|
"git.gammaspectra.live/S.O.N.G/Kirika/audio"
|
|
"git.gammaspectra.live/S.O.N.G/goborator"
|
|
"runtime"
|
|
"unsafe"
|
|
)
|
|
|
|
type EventPointProcessor struct {
|
|
audio.Sink
|
|
instance *Instance
|
|
magnitudes [][]float32
|
|
maxMagnitudes [][]float32
|
|
magnitudesIndex int
|
|
|
|
previousMaxMagnitudes map[int][]float32
|
|
previousMagnitudes map[int][]float32
|
|
|
|
eventPoints []EventPoint
|
|
fingerprints []Fingerprint
|
|
|
|
analysisFrameIndex int
|
|
maxFilterVertical *cgo.LemireMaxClampedFilter
|
|
maxFilterWindowSizeFrequency int
|
|
maxFilterWindowSizeTime int
|
|
maxHorizontal []float32
|
|
bandNumber int
|
|
gaborator *goborator.Gaborator
|
|
pinner runtime.Pinner
|
|
}
|
|
|
|
func NewEventPointProcessor(instance *Instance) (*EventPointProcessor, error) {
|
|
ob := &EventPointProcessor{
|
|
instance: instance,
|
|
magnitudesIndex: 0,
|
|
previousMaxMagnitudes: make(map[int][]float32),
|
|
previousMagnitudes: make(map[int][]float32),
|
|
analysisFrameIndex: 0,
|
|
maxFilterWindowSizeFrequency: instance.PointFilterMaximumFrequencyFilterSize,
|
|
maxFilterWindowSizeTime: instance.PointFilterMaximumTimeFilterSize,
|
|
eventPoints: make([]EventPoint, 0, 8196),
|
|
fingerprints: make([]Fingerprint, 0, 8196),
|
|
}
|
|
|
|
ob.gaborator = goborator.NewGaborator(
|
|
instance.BlockSize,
|
|
float64(instance.SampleRate),
|
|
instance.GetTransformBandsPerOctave(),
|
|
float64(instance.GetTransformMinimumFrequency()),
|
|
float64(instance.GetTransformMaximumFrequency()),
|
|
float64(instance.GetTransformReferenceFrequency()),
|
|
instance.GetTransformTimeResolutionInSamples(),
|
|
)
|
|
|
|
ob.bandNumber = ob.gaborator.GetNumberOfBands()
|
|
|
|
ob.magnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
|
|
|
|
ob.maxMagnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
|
|
for i := range ob.maxMagnitudes {
|
|
ob.maxMagnitudes[i] = make([]float32, ob.bandNumber)
|
|
ob.pinner.Pin(unsafe.SliceData(ob.maxMagnitudes[i]))
|
|
}
|
|
|
|
ob.maxHorizontal = make([]float32, ob.bandNumber)
|
|
|
|
var err error
|
|
ob.maxFilterVertical, err = cgo.NewLemireMaxClampedFilter(ob.maxFilterWindowSizeFrequency, ob.bandNumber)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return ob, nil
|
|
}
|
|
|
|
func (e *EventPointProcessor) GetMagnitudes() []float32 {
|
|
return e.magnitudes[e.magnitudesIndex]
|
|
}
|
|
|
|
func (e *EventPointProcessor) GetFingerprints() []Fingerprint {
|
|
return e.fingerprints
|
|
}
|
|
|
|
func (e *EventPointProcessor) GetEventPoints() []EventPoint {
|
|
return e.eventPoints
|
|
}
|
|
|
|
func (e *EventPointProcessor) Process(source audio.Source) error {
|
|
return e.ProcessBlockChannel(source.ToFloat32().GetBlocks())
|
|
}
|
|
|
|
func (e *EventPointProcessor) ProcessBlockChannel(channel chan []float32) error {
|
|
err := e.gaborator.GaborBlockTransform(channel, e.processResult)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
e.processResultFinish()
|
|
return nil
|
|
}
|
|
|
|
func (e *EventPointProcessor) ProcessingFinished() {
|
|
e.gaborator.ProcessingFinished()
|
|
if e.maxFilterVertical != nil {
|
|
e.maxFilterVertical.Close()
|
|
e.maxFilterVertical = nil
|
|
}
|
|
e.pinner.Unpin()
|
|
}
|
|
|
|
func (e *EventPointProcessor) GetLatency() int64 {
|
|
return e.gaborator.GetLatency()
|
|
}
|
|
|
|
func (e *EventPointProcessor) processResult(currentMagnitudes []float32) {
|
|
e.magnitudes[e.magnitudesIndex] = currentMagnitudes
|
|
|
|
//store the frame magnitudes
|
|
|
|
e.previousMagnitudes[e.analysisFrameIndex] = e.magnitudes[e.magnitudesIndex]
|
|
e.pinner.Pin(unsafe.SliceData(e.magnitudes[e.magnitudesIndex]))
|
|
|
|
//run a max filter over frequency bins
|
|
e.maxFilterVertical.Filter(e.magnitudes[e.magnitudesIndex], e.maxMagnitudes[e.magnitudesIndex])
|
|
//store the max filtered frequency bins
|
|
e.previousMaxMagnitudes[e.analysisFrameIndex] = e.maxMagnitudes[e.magnitudesIndex]
|
|
|
|
//find the horizontal maxima
|
|
if len(e.previousMaxMagnitudes) == e.maxFilterWindowSizeTime {
|
|
time := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
|
|
|
|
maxFrame := e.previousMaxMagnitudes[time]
|
|
frameMagnitudes := e.previousMagnitudes[time]
|
|
|
|
for frequency := 2; frequency < len(frameMagnitudes)-1; frequency++ {
|
|
maxVal := maxFrame[frequency]
|
|
currentVal := frameMagnitudes[frequency]
|
|
|
|
if maxVal == currentVal && currentVal != 0 {
|
|
e.horizontalFilter(frequency)
|
|
maxVal = e.maxHorizontal[frequency]
|
|
if currentVal == maxVal {
|
|
prevFrameMagnitudes := e.previousMagnitudes[time-1]
|
|
nextFrameMagnitudes := e.previousMagnitudes[time+1]
|
|
|
|
//add the magnitude of surrounding bins for magnitude estimates more robust against discretization effects
|
|
totalMagnitude := frameMagnitudes[frequency] + prevFrameMagnitudes[frequency] + nextFrameMagnitudes[frequency] +
|
|
frameMagnitudes[frequency+1] + prevFrameMagnitudes[frequency+1] + nextFrameMagnitudes[frequency+1] +
|
|
frameMagnitudes[frequency-1] + prevFrameMagnitudes[frequency-1] + nextFrameMagnitudes[frequency-1]
|
|
|
|
e.eventPoints = append(e.eventPoints, EventPoint{
|
|
Time: uint32(time),
|
|
Frequency: uint32(frequency),
|
|
Magnitude: totalMagnitude,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
//Remove analysis frames that are not needed any more:
|
|
ix := e.analysisFrameIndex - e.maxFilterWindowSizeTime + 1
|
|
delete(e.previousMaxMagnitudes, ix)
|
|
delete(e.previousMagnitudes, ix)
|
|
}
|
|
|
|
//magnitude index counter
|
|
e.magnitudesIndex++
|
|
if e.magnitudesIndex == len(e.magnitudes) {
|
|
e.magnitudesIndex = 0
|
|
}
|
|
|
|
//Increment analysis frame counter
|
|
e.analysisFrameIndex++
|
|
}
|
|
|
|
func (e *EventPointProcessor) processResultFinish() {
|
|
e.packEventPointsIntoFingerprints()
|
|
e.ProcessingFinished()
|
|
}
|
|
|
|
func (e *EventPointProcessor) horizontalFilter(j int) {
|
|
e.maxHorizontal[j] = -1000
|
|
|
|
centerFrameIndex := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
|
|
startFrameIndex := centerFrameIndex - e.maxFilterWindowSizeTime/2
|
|
stopFrameIndex := centerFrameIndex + e.maxFilterWindowSizeTime/2
|
|
|
|
// Run a horizontal max filter
|
|
for i := startFrameIndex; i < stopFrameIndex; i++ {
|
|
maxFrame := e.previousMaxMagnitudes[i]
|
|
if maxFrame[j] > e.maxHorizontal[j] {
|
|
e.maxHorizontal[j] = maxFrame[j]
|
|
}
|
|
}
|
|
}
|
|
|
|
func (e *EventPointProcessor) packEventPointsIntoFingerprints() {
|
|
minFreqDistance := int64(e.instance.PointFilterMinimumFrequencyDistance)
|
|
maxFreqDistance := int64(e.instance.PointFilterMaximumFrequencyDistance)
|
|
|
|
minTimeDistance := int64(e.instance.PointFilterMinimumTimeDistance)
|
|
maxTimeDistance := int64(e.instance.PointFilterMaximumTimeDistance)
|
|
|
|
for i := range e.eventPoints {
|
|
p1 := &e.eventPoints[i]
|
|
|
|
index2 := i + 1
|
|
for j := range e.eventPoints[index2:] {
|
|
p2 := &e.eventPoints[j]
|
|
|
|
fDiff := utilities.AbsInt64(int64(p1.Frequency) - int64(p2.Frequency))
|
|
tDiff := int64(p2.Time) - int64(p1.Time)
|
|
|
|
if tDiff > maxTimeDistance {
|
|
break
|
|
}
|
|
if tDiff < minTimeDistance {
|
|
continue
|
|
}
|
|
|
|
if fDiff < minFreqDistance {
|
|
continue
|
|
}
|
|
if fDiff > maxFreqDistance {
|
|
continue
|
|
}
|
|
index3 := index2 + j + 1
|
|
|
|
for k := range e.eventPoints[index3:] {
|
|
p3 := &e.eventPoints[k]
|
|
fDiff = utilities.AbsInt64(int64(p2.Frequency) - int64(p3.Frequency))
|
|
tDiff = int64(p3.Time) - int64(p2.Time)
|
|
|
|
if tDiff > maxTimeDistance {
|
|
break
|
|
}
|
|
if tDiff < minTimeDistance {
|
|
continue
|
|
}
|
|
|
|
if fDiff < minFreqDistance {
|
|
continue
|
|
}
|
|
if fDiff > maxFreqDistance {
|
|
continue
|
|
}
|
|
|
|
e.fingerprints = append(e.fingerprints, NewFingerprint(p1, p2, p3))
|
|
}
|
|
}
|
|
}
|
|
}
|