Implemented Panako with matching prints. FLAC/MP3 decoder, audio resampler. Fixed errors in transcription. Test files not included

This commit is contained in:
DataHoarder 2022-01-26 23:57:15 +01:00
parent 3297f0af3f
commit 800079940a
16 changed files with 890 additions and 298 deletions

View file

@ -1,41 +1 @@
package Hibiki
const PANAKO_AUDIO_BLOCK_SIZE = 8192
const PANAKO_AUDIO_BLOCK_OVERLAP = 0
const PANAKO_FREQ_MAX_FILTER_SIZE = 103
const PANAKO_TIME_MAX_FILTER_SIZE = 25
const PANAKO_FP_MIN_FREQ_DIST = 1
const PANAKO_FP_MAX_FREQ_DIST = 128
const PANAKO_FP_MIN_TIME_DIST = 2
const PANAKO_FP_MAX_TIME_DIST = 33
const PANAKO_SAMPLE_RATE = 16000
const PANAKO_TRANSF_MIN_FREQ = 110
const PANAKO_TRANSF_MAX_FREQ = 7040
const PANAKO_TRANSF_REF_FREQ = 440
const PANAKO_TRANSF_BANDS_PER_OCTAVE = 85
const PANAKO_TRANSF_TIME_RESOLUTION = 128
const PANAKO_QUERY_RANGE = 3
const PANAKO_MIN_HITS_UNFILTERED = 10
const PANAKO_MIN_HITS_FILTERED = 5
const PANAKO_MIN_TIME_FACTOR = 0.8
const PANAKO_MAX_TIME_FACTOR = 1.2
const PANAKO_MIN_FREQ_FACTOR = 0.8
const PANAKO_MAX_FREQ_FACTOR = 1.2
// PANAKO_MIN_MATCH_DURATION maybe 7 or 3 ?
const PANAKO_MIN_MATCH_DURATION = 3
const PANAKO_MIN_SEC_WITH_MATCH = 0.2
func Abs(x int32) int32 {
if x < 0 {
return -x
}
return x
}

177
Hibiki_test.go Normal file
View file

@ -0,0 +1,177 @@
package Hibiki
import (
"git.gammaspectra.live/S.O.N.G/Hibiki/strategy/panako"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/audio/format/flac"
"log"
"os"
"testing"
)
type TestPanakoKeyValueStore struct {
prints map[int64][]panako.StoreRecord
}
func NewTestPanakoKeyValueStore() *TestPanakoKeyValueStore {
return &TestPanakoKeyValueStore{
prints: make(map[int64][]panako.StoreRecord),
}
}
func (s TestPanakoKeyValueStore) GetPanakoMatch(record panako.StoreRecord, lookupRange int) []panako.MatchedRecord {
keyStart := record.Hash - int64(lookupRange)
keyStop := record.Hash + int64(lookupRange)
var matches []panako.MatchedRecord
for key := keyStart; key <= keyStop; key++ {
v, ok := s.prints[key]
if ok {
for _, r := range v {
matches = append(matches, panako.MatchedRecord{
Query: record,
Match: r,
})
}
}
}
return matches
}
func (s TestPanakoKeyValueStore) GetPanakoMatches(records []panako.StoreRecord, lookupRange int) []panako.MatchedRecord {
var matches []panako.MatchedRecord
for _, record := range records {
matches = append(matches, s.GetPanakoMatch(record, lookupRange)...)
}
return matches
}
func (s TestPanakoKeyValueStore) GetPanakoRecords(resourceId int64) []panako.StoreRecord {
return nil
}
func (s TestPanakoKeyValueStore) recordExists(record panako.StoreRecord) bool {
v, ok := s.prints[record.Hash]
if ok {
for _, r := range v {
if record.ResourceId == r.ResourceId && record.Time == r.Time && record.Frequency == r.Frequency {
return true
}
}
}
return false
}
func (s TestPanakoKeyValueStore) StorePanakoPrint(record panako.StoreRecord) {
if !s.recordExists(record) {
_, ok := s.prints[record.Hash]
if ok {
s.prints[record.Hash] = append(s.prints[record.Hash], record)
} else {
s.prints[record.Hash] = []panako.StoreRecord{record}
}
}
}
func (s TestPanakoKeyValueStore) StorePanakoPrints(records []panako.StoreRecord) {
for _, record := range records {
s.StorePanakoPrint(record)
}
}
func TestHibiki(t *testing.T) {
file, err := os.Open("test.flac")
if err != nil {
t.Error(err)
return
}
defer file.Close()
flacFormat := flac.NewFormat()
//mp3Format := mp3.NewFormat()
flacStream, err := flacFormat.Open(file)
if err != nil {
t.Error(err)
return
}
store := NewTestPanakoKeyValueStore()
strategy := panako.NewStrategy(store)
originalPrints := strategy.StreamToFingerprints(flacStream)
strategy.StoreFingerprints(1, originalPrints)
test, err := os.Open("test_radio.flac")
if err != nil {
t.Error(err)
return
}
defer test.Close()
radioStream, err := flacFormat.Open(test)
if err != nil {
t.Error(err)
return
}
/*
file2, err := os.Create("test_w.raw")
if err != nil {
t.Error(err)
return
}
defer file2.Close()
resampledStream, err := radioStream.DoResample(1, utilities.PANAKO_SAMPLE_RATE, audio.RESAMPLER_QUALITY_MEDIUM)
c := resampledStream.GetAsChannel()
for {
f, more := <-c
if !more {
break
}
binary.Write(file2, binary.LittleEndian, f)
}
os.Exit(0)
*/
prints := strategy.StreamToFingerprints(radioStream)
flacTest, err := os.Open("test2.flac")
if err != nil {
t.Error(err)
return
}
defer flacTest.Close()
flacTestStream, err := flacFormat.Open(flacTest)
if err != nil {
t.Error(err)
return
}
flacTestPrints := strategy.StreamToFingerprints(flacTestStream)
results1 := strategy.QueryFingerprints(flacTestPrints)
results2 := strategy.QueryFingerprints(originalPrints)
results3 := strategy.QueryFingerprints(prints)
for _, result := range results1 {
log.Printf("result 1 %#v\n", result)
}
for _, result := range results2 {
log.Printf("result 2 %#v\n", result)
}
for _, result := range results3 {
log.Printf("result 3 %#v\n", result)
}
count := 0
for _, v := range store.prints {
count += len(v)
}
log.Printf("resources: %d, fingerprints: %d ~%d KiB || ~%d KiB", len(store.prints), count, (count*(8+4+4))/1024, (count*(4+4))/1024)
}

9
go.mod
View file

@ -3,11 +3,14 @@ module git.gammaspectra.live/S.O.N.G/Hibiki
go 1.18
require (
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220123203229-147da335b6eb
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220126140813-481f5df7947e
github.com/dh1tw/gosamplerate v0.1.2
github.com/gammazero/deque v0.1.0
github.com/hajimehoshi/go-mp3 v0.3.2
github.com/mewkiz/flac v1.0.7
)
require (
github.com/dh1tw/gosamplerate v0.1.2 // indirect
github.com/mewkiz/flac v1.0.7 // indirect
github.com/icza/bitio v1.0.0 // indirect
github.com/mewkiz/pkg v0.0.0-20190919212034-518ade7978e2 // indirect
)

13
go.sum
View file

@ -1,5 +1,5 @@
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220123203229-147da335b6eb h1:ItDfszsa4SZTvmDYy7Ofzh6TAj3K7OSgR/5iS2oJWHY=
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220123203229-147da335b6eb/go.mod h1:ySjuueqe5HUqvf7lWS51Cy5UP2tgJWsezOv8UIm2arA=
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220126140813-481f5df7947e h1:dEzZ+/k/7BizZ+1FFkhtmmu4E2PhuDyq1Q3eelIMZOw=
git.gammaspectra.live/S.O.N.G/goborator v0.0.0-20220126140813-481f5df7947e/go.mod h1:ySjuueqe5HUqvf7lWS51Cy5UP2tgJWsezOv8UIm2arA=
github.com/d4l3k/messagediff v1.2.2-0.20190829033028-7e0a312ae40b/go.mod h1:Oozbb1TVXFac9FtSIxHBMnBCq2qeH/2KkEQxENCrlLo=
github.com/dh1tw/gosamplerate v0.1.2 h1:oyqtZk67xB9B4l+vIZCZ3F0RYV/z66W58VOah11/ktI=
github.com/dh1tw/gosamplerate v0.1.2/go.mod h1:zooTyHpoR7hE+FLfdE3yjLHb2QA2NpMusNfuaZqEACM=
@ -8,14 +8,23 @@ github.com/gammazero/deque v0.1.0/go.mod h1:KQw7vFau1hHuM8xmI9RbgKFbAsQFWmBpqQ2K
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
github.com/go-audio/wav v1.0.0/go.mod h1:3yoReyQOsiARkvPl3ERCi8JFjihzG6WhjYpZCf5zAWE=
github.com/hajimehoshi/go-mp3 v0.3.2 h1:xSYNE2F3lxtOu9BRjCWHHceg7S91IHfXfXp5+LYQI7s=
github.com/hajimehoshi/go-mp3 v0.3.2/go.mod h1:qMJj/CSDxx6CGHiZeCgbiq2DSUkbK0UbtXShQcnfyMM=
github.com/hajimehoshi/oto v0.6.1/go.mod h1:0QXGEkbuJRohbJaxr7ZQSxnju7hEhseiPx2hrh6raOI=
github.com/icza/bitio v1.0.0 h1:squ/m1SHyFeCA6+6Gyol1AxV9nmPPlJFT8c2vKdj3U8=
github.com/icza/bitio v1.0.0/go.mod h1:0jGnlLAx8MKMr9VGnn/4YrvZiprkvBelsVIbA9Jjr9A=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
github.com/mewkiz/flac v1.0.7 h1:uIXEjnuXqdRaZttmSFM5v5Ukp4U6orrZsnYGGR3yow8=
github.com/mewkiz/flac v1.0.7/go.mod h1:yU74UH277dBUpqxPouHSQIar3G1X/QIclVbFahSd1pU=
github.com/mewkiz/pkg v0.0.0-20190919212034-518ade7978e2 h1:EyTNMdePWaoWsRSGQnXiSoQu0r6RS1eA557AwJhlzHU=
github.com/mewkiz/pkg v0.0.0-20190919212034-518ade7978e2/go.mod h1:3E2FUC/qYUfM8+r9zAwpeHJzqRVVMIYnpzD/clwWxyA=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/image v0.0.0-20190220214146-31aff87c08e9/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/mobile v0.0.0-20190415191353-3e0bab5405d6/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190429190828-d89cdac9e872/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -2,8 +2,9 @@ package panako
import (
"fmt"
"git.gammaspectra.live/S.O.N.G/Hibiki"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/morton2d"
"log"
"math"
)
@ -14,16 +15,23 @@ import (
type Fingerprint struct {
entries [3]EventPoint
hash *int64
hash int64
modHash int32
}
func NewFingerprint(p1, p2, p3 EventPoint) Fingerprint {
return Fingerprint{
func NewFingerprint(p1, p2, p3 EventPoint) *Fingerprint {
if p2.DeltaTime(p1) < 0 {
log.Panicf("p2.Time < p1.Time : %d < %d", p2.Time, p1.Time)
}
if p3.DeltaTime(p2) < 0 {
log.Panicf("p3.Time < p2.Time : %d < %d", p3.Time, p2.Time)
}
return &Fingerprint{
entries: [3]EventPoint{p1, p2, p3},
}
}
func (f *Fingerprint) t1() int32 {
func (f *Fingerprint) T1() int32 {
return f.entries[0].Time
}
func (f *Fingerprint) t2() int32 {
@ -33,7 +41,7 @@ func (f *Fingerprint) t3() int32 {
return f.entries[2].Time
}
func (f *Fingerprint) f1() int32 {
func (f *Fingerprint) F1() int32 {
return f.entries[0].Frequency
}
func (f *Fingerprint) f2() int32 {
@ -55,7 +63,7 @@ func (f *Fingerprint) m3() float32 {
func (f *Fingerprint) RobustHash() int32 {
f1LargerThanF2 := 0
if f.f1() > f.f2() {
if f.F1() > f.f2() {
f1LargerThanF2 = 1
}
f2LargerThanF3 := 0
@ -64,7 +72,7 @@ func (f *Fingerprint) RobustHash() int32 {
}
f3LargerThanF1 := 0
if f.f3() > f.f1() {
if f.f3() > f.F1() {
f3LargerThanF1 = 1
}
@ -83,16 +91,16 @@ func (f *Fingerprint) RobustHash() int32 {
}
dt1t2LargerThant3t2 := 0
if (f.t2() - f.t1()) > (f.t3() - f.t2()) {
if (f.t2() - f.T1()) > (f.t3() - f.t2()) {
dt1t2LargerThant3t2 = 1
}
//9 bits f in range( 0 - 512) to 2 bits
f1Range := f.f1() >> 7
f1Range := f.F1() >> 7
f2Range := f.f2() >> 7
f3Range := f.f3() >> 7
diffT2T1 := f.t2() - f.t1()
diffT2T1 := f.t2() - f.T1()
diffT3T1 := f.t3() - f.t2()
timeRatio := float64(diffT2T1) / float64(diffT3T1)
@ -104,7 +112,7 @@ func (f *Fingerprint) RobustHash() int32 {
spreadT := maxTRatio - minTRatio
timeRatioHash := int32(math.Round((mappedTRatio - minTRatio) / spreadT * (1 << 9)))
diffF2F1 := f.f2() - f.f1()
diffF2F1 := f.f2() - f.F1()
diffF3F2 := f.f3() - f.f2()
freqRatio := float64(diffF2F1) / float64(diffF3F2)
@ -135,12 +143,12 @@ func (f *Fingerprint) RobustHash() int32 {
}
func (f *Fingerprint) Hash() int64 {
if f.hash != nil {
return *f.hash
if f.hash != 0 {
return f.hash
}
var f1LargerThanF2 int64 = 0
if f.f1() > f.f2() {
if f.F1() > f.f2() {
f1LargerThanF2 = 1
}
var f2LargerThanF3 int64 = 0
@ -149,63 +157,250 @@ func (f *Fingerprint) Hash() int64 {
}
var f3LargerThanF1 int64 = 0
if f.f3() > f.f1() {
if f.f3() > f.F1() {
f3LargerThanF1 = 1
}
var m1LargerThanm2 int64 = 0
var m1LargerThanM2 int64 = 0
if f.m1() > f.m2() {
m1LargerThanm2 = 1
m1LargerThanM2 = 1
}
var m2LargerThanm3 int64 = 0
var m2LargerThanM3 int64 = 0
if f.m2() > f.m3() {
m2LargerThanm3 = 1
m2LargerThanM3 = 1
}
var m3LargerThanm1 int64 = 0
var m3LargerThanM1 int64 = 0
if f.m3() > f.m1() {
m3LargerThanm1 = 1
m3LargerThanM1 = 1
}
var dt1t2LargerThant3t2 int64 = 0
if (f.t2() - f.t1()) > (f.t3() - f.t2()) {
if (f.t2() - f.T1()) > (f.t3() - f.t2()) {
dt1t2LargerThant3t2 = 1
}
var df1f2LargerThanf3f2 int64 = 0
if Hibiki.Abs(f.f2()-f.f1()) > Hibiki.Abs(f.f3()-f.f2()) {
if utilities.Abs(f.f2()-f.F1()) > utilities.Abs(f.f3()-f.f2()) {
dt1t2LargerThant3t2 = 1
}
//9 bits f in range( 0 - 512) to 8 bits
f1Range := int64(f.f1() >> 5)
f1Range := int64(f.F1() >> 1)
//7 bits (0-128) -> 5 bits
df2f1 := int64(Hibiki.Abs(f.f2()-f.f1()) >> 2)
df3f2 := int64(Hibiki.Abs(f.f3()-f.f2()) >> 2)
df2f1 := int64(utilities.Abs(f.f2()-f.F1()) >> 2)
df3f2 := int64(utilities.Abs(f.f3()-f.f2()) >> 2)
ratioT := int64(float64(f.t2()-f.t1()) / float64(f.t3()-f.t1()) * 64)
//6 bits max
ratioT := int64(float64(f.t2()-f.T1()) / float64(f.t3()-f.T1()) * 64)
var hash int64 = 0
hash += (ratioT & ((1 << 6) - 1)) << 0
hash += (f1LargerThanF2 & ((1 << 1) - 1)) << 6
hash += (f2LargerThanF3 & ((1 << 1) - 1)) << 7
hash += (f3LargerThanF1 & ((1 << 1) - 1)) << 8
hash += (m1LargerThanm2 & ((1 << 1) - 1)) << 9
hash += (m2LargerThanm3 & ((1 << 1) - 1)) << 10
hash += (m3LargerThanm1 & ((1 << 1) - 1)) << 11
hash += (dt1t2LargerThant3t2 & ((1 << 1) - 1)) << 12
hash += (df1f2LargerThanf3f2 & ((1 << 1) - 1)) << 13
hash += (f1Range & ((1 << 8) - 1)) << 14
hash += (df2f1 & ((1 << 6) - 1)) << 22
hash += (df3f2 & ((1 << 6) - 1)) << 28
//combine the hash components into a single 64-bit integer
f.hash = &hash
return hash
//for debugging purposes, these should be inlined later on
clampBits := func(v int64, n int) int64 {
return v & ((1 << n) - 1)
}
shiftBits := func(v int64, n int) int64 {
return v << n
}
//34 bit output
// +----+----------------+---|
// | # | field | n |
// +----+----------------+---|
// | | | |
// | | | |
// | 0 | ratioT | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | 6 | F1 > f2 | 1 |
// +----+----------------+---|
// | 7 | f2 > f3 | 1 |
// +----+----------------+---|
// | 8 | f3 > F1 | 1 |
// +----+----------------+---|
// | 9 | m1 > m2 | 1 |
// +----+----------------+---|
// | 10 | m2 > m3 | 1 |
// +----+----------------+---|
// | 11 | m3 > m1 | 1 |
// +----+----------------+---|
// | 12 |d(T1,t2)>d(t3,t2)| 1 |
// +----+----------------+----|
// | 13 |d(F1,f2)>d(f3,f2)| 1 |
// +----+----------------+---|
// | | | |
// | | | |
// | | | |
// | 14 | f1range | 8 |
// | | | |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | | | |
// | | | |
// | 22 | d(f2,F1) | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | | | |
// | | | |
// | 28 | d(f2,F1) | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
f.hash = shiftBits(clampBits(ratioT, 6), 0) |
shiftBits(clampBits(f1LargerThanF2, 1), 6) |
shiftBits(clampBits(f2LargerThanF3, 1), 7) |
shiftBits(clampBits(f3LargerThanF1, 1), 8) |
shiftBits(clampBits(m1LargerThanM2, 1), 9) |
shiftBits(clampBits(m2LargerThanM3, 1), 10) |
shiftBits(clampBits(m3LargerThanM1, 1), 11) |
shiftBits(clampBits(dt1t2LargerThant3t2, 1), 12) |
shiftBits(clampBits(df1f2LargerThanf3f2, 1), 13) |
shiftBits(clampBits(f1Range, 8), 14) |
shiftBits(clampBits(df2f1, 6), 22) |
shiftBits(clampBits(df3f2, 6), 28)
return f.hash
}
// ModifiedHash Slightly changed fingerprint with f1Range as 6-bit instead of 8-bit so it fits into 32-bit values
func (f *Fingerprint) ModifiedHash() int32 {
if f.modHash != 0 {
return f.modHash
}
var f1LargerThanF2 int32 = 0
if f.F1() > f.f2() {
f1LargerThanF2 = 1
}
var f2LargerThanF3 int32 = 0
if f.f2() > f.f3() {
f2LargerThanF3 = 1
}
var f3LargerThanF1 int32 = 0
if f.f3() > f.F1() {
f3LargerThanF1 = 1
}
var m1LargerThanM2 int32 = 0
if f.m1() > f.m2() {
m1LargerThanM2 = 1
}
var m2LargerThanM3 int32 = 0
if f.m2() > f.m3() {
m2LargerThanM3 = 1
}
var m3LargerThanM1 int32 = 0
if f.m3() > f.m1() {
m3LargerThanM1 = 1
}
var dt1t2LargerThant3t2 int32 = 0
if (f.t2() - f.T1()) > (f.t3() - f.t2()) {
dt1t2LargerThant3t2 = 1
}
var df1f2LargerThanf3f2 int32 = 0
if utilities.Abs(f.f2()-f.F1()) > utilities.Abs(f.f3()-f.f2()) {
dt1t2LargerThant3t2 = 1
}
//9 bits f in range( 0 - 512) to 6 bits
f1Range := int32(f.F1() >> 3)
//7 bits (0-128) -> 5 bits
df2f1 := int32(utilities.Abs(f.f2()-f.F1()) >> 2)
df3f2 := int32(utilities.Abs(f.f3()-f.f2()) >> 2)
//6 bits max
ratioT := int32(float64(f.t2()-f.T1()) / float64(f.t3()-f.T1()) * 64)
//combine the hash components into a single 64-bit integer
//for debugging purposes, these should be inlined later on
clampBits := func(v int32, n int) int32 {
return v & ((1 << n) - 1)
}
shiftBits := func(v int32, n int) int32 {
return v << n
}
//34 bit output
// +----+----------------+---|
// | # | field | n |
// +----+----------------+---|
// | | | |
// | | | |
// | 0 | ratioT | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | 6 | F1 > f2 | 1 |
// +----+----------------+---|
// | 7 | f2 > f3 | 1 |
// +----+----------------+---|
// | 8 | f3 > F1 | 1 |
// +----+----------------+---|
// | 9 | m1 > m2 | 1 |
// +----+----------------+---|
// | 10 | m2 > m3 | 1 |
// +----+----------------+---|
// | 11 | m3 > m1 | 1 |
// +----+----------------+---|
// | 12 |d(T1,t2)>d(t3,t2)| 1 |
// +----+----------------+----|
// | 13 |d(F1,f2)>d(f3,f2)| 1 |
// +----+----------------+---|
// | | | |
// | | | |
// | 14 | f1range | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | | | |
// | | | |
// | 20 | d(f2,F1) | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
// | | | |
// | | | |
// | 26 | d(f2,F1) | 6 |
// | | | |
// | | | |
// | | | |
// +----+----------------+---|
f.modHash = shiftBits(clampBits(ratioT, 6), 0) |
shiftBits(clampBits(f1LargerThanF2, 1), 6) |
shiftBits(clampBits(f2LargerThanF3, 1), 7) |
shiftBits(clampBits(f3LargerThanF1, 1), 8) |
shiftBits(clampBits(m1LargerThanM2, 1), 9) |
shiftBits(clampBits(m2LargerThanM3, 1), 10) |
shiftBits(clampBits(m3LargerThanM1, 1), 11) |
shiftBits(clampBits(dt1t2LargerThant3t2, 1), 12) |
shiftBits(clampBits(df1f2LargerThanf3f2, 1), 13) |
shiftBits(clampBits(f1Range, 6), 14) |
shiftBits(clampBits(df2f1, 6), 20) |
shiftBits(clampBits(df3f2, 6), 26)
return f.modHash
}
func (f *Fingerprint) String() string {
return fmt.Sprintf("(%d,%d),(%d,%d),(%d,%d),%d", f.t1(), f.f1(), f.t2(), f.f2(), f.t3(), f.f3(), f.Hash())
return fmt.Sprintf("(%d,%d),(%d,%d),(%d,%d),%d", f.T1(), f.F1(), f.t2(), f.f2(), f.t3(), f.f3(), f.Hash())
}
func (f *Fingerprint) Equals(other *Fingerprint) bool {
@ -218,7 +413,7 @@ func (f *Fingerprint) Equals(other *Fingerprint) bool {
sameHash := other.Hash() == f.Hash()
//if closer than 100 analysis frames (of e.g. 32ms), than hash is deemed the same).
closeInTime := Hibiki.Abs(other.t1()-f.t1()) < 100
closeInTime := utilities.Abs(other.T1()-f.T1()) < 100
return sameHash && closeInTime
}

View file

@ -11,3 +11,7 @@ type EventPoint struct {
// Magnitude The energy value of the element.
Magnitude float32
}
func (p EventPoint) DeltaTime(point EventPoint) int32 {
return p.Time - point.Time
}

View file

@ -1,10 +1,8 @@
package panako
import (
"git.gammaspectra.live/S.O.N.G/Hibiki"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
"git.gammaspectra.live/S.O.N.G/goborator"
"io"
)
type EventPointProcessor struct {
@ -16,7 +14,7 @@ type EventPointProcessor struct {
previousMagnitudes map[int][]float32
eventPoints []EventPoint
fingerprints []Fingerprint
fingerprints []*Fingerprint
analysisFrameIndex int
maxFilterVertical *utilities.LemireMinMaxFilter
@ -34,19 +32,19 @@ func NewEventPointProcessor(fftSize int) (*EventPointProcessor, error) {
previousMaxMagnitudes: make(map[int][]float32),
previousMagnitudes: make(map[int][]float32),
analysisFrameIndex: 0,
maxFilterWindowSizeFrequency: Hibiki.PANAKO_FREQ_MAX_FILTER_SIZE,
maxFilterWindowSizeTime: Hibiki.PANAKO_TIME_MAX_FILTER_SIZE,
maxFilterWindowSizeFrequency: utilities.PANAKO_FREQ_MAX_FILTER_SIZE,
maxFilterWindowSizeTime: utilities.PANAKO_TIME_MAX_FILTER_SIZE,
maxHorizontal: make([]float32, fftSize/2),
}
ob.gaborator = goborator.NewGaborator(
Hibiki.PANAKO_AUDIO_BLOCK_SIZE,
Hibiki.PANAKO_SAMPLE_RATE,
Hibiki.PANAKO_TRANSF_BANDS_PER_OCTAVE,
Hibiki.PANAKO_TRANSF_MIN_FREQ,
Hibiki.PANAKO_TRANSF_MAX_FREQ,
Hibiki.PANAKO_TRANSF_REF_FREQ,
Hibiki.PANAKO_TRANSF_TIME_RESOLUTION,
utilities.PANAKO_AUDIO_BLOCK_SIZE,
utilities.PANAKO_SAMPLE_RATE,
utilities.PANAKO_TRANSF_BANDS_PER_OCTAVE,
utilities.PANAKO_TRANSF_MIN_FREQ,
utilities.PANAKO_TRANSF_MAX_FREQ,
utilities.PANAKO_TRANSF_REF_FREQ,
utilities.PANAKO_TRANSF_TIME_RESOLUTION,
)
ob.magnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
@ -55,7 +53,7 @@ func NewEventPointProcessor(fftSize int) (*EventPointProcessor, error) {
}
ob.maxMagnitudes = make([][]float32, ob.maxFilterWindowSizeTime)
for i := range ob.magnitudes {
for i := range ob.maxMagnitudes {
ob.maxMagnitudes[i] = make([]float32, fftSize/2)
}
@ -67,34 +65,11 @@ func NewEventPointProcessor(fftSize int) (*EventPointProcessor, error) {
return ob, nil
}
func NaiveMaxFilter(data []float32, halfFilterSize int, clamp bool) []float32 {
max := make([]float32, len(data))
for i := 0; i < len(data); i++ {
startIndex := 0
if i-halfFilterSize > startIndex {
startIndex = i - halfFilterSize
}
stopIndex := len(data)
if i+halfFilterSize < stopIndex {
stopIndex = i + halfFilterSize
}
maxValue := float32(-1000000.)
for j := startIndex; j <= stopIndex; j++ {
if maxValue < data[j] {
maxValue = data[j]
}
}
max[i] = maxValue
}
return max
}
func (e *EventPointProcessor) GetMagnitudes() []float32 {
return e.magnitudes[e.magnitudesIndex]
}
func (e *EventPointProcessor) GetFingerprints() []Fingerprint {
func (e *EventPointProcessor) GetFingerprints() []*Fingerprint {
return e.fingerprints
}
@ -102,9 +77,14 @@ func (e *EventPointProcessor) GetEventPoints() []EventPoint {
return e.eventPoints
}
// ProcessReader Processes a stream of float32
func (e *EventPointProcessor) ProcessReader(reader io.Reader) {
e.gaborator.GaborTransform(reader)
func (e *EventPointProcessor) ProcessBlockChannel(channel chan []float32) {
e.gaborator.GaborBlockTransform(channel)
e.ProcessingFinished()
}
func (e *EventPointProcessor) ProcessChannel(channel chan float32) {
e.gaborator.GaborTransform(channel)
e.ProcessingFinished()
}
func (e *EventPointProcessor) Process(block []float32) error {
@ -118,12 +98,14 @@ func (e *EventPointProcessor) GetLatency() int64 {
func (e *EventPointProcessor) ProcessingFinished() {
e.gaborator.ProcessingFinished()
//calculate the fft
allMagnitudes := e.gaborator.GetCoefficients()
for _, currentMagnitudes := range allMagnitudes {
e.magnitudes[e.magnitudesIndex] = currentMagnitudes
//store the frame magnitudes
e.previousMagnitudes[e.analysisFrameIndex] = e.magnitudes[e.magnitudesIndex]
//run a max filter over frequency bins
@ -133,30 +115,30 @@ func (e *EventPointProcessor) ProcessingFinished() {
//find the horizontal maxima
if len(e.previousMaxMagnitudes) == e.maxFilterWindowSizeTime {
t := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
time := e.analysisFrameIndex - e.maxFilterWindowSizeTime/2
maxFrame := e.previousMaxMagnitudes[t]
frameMagnitudes := e.previousMagnitudes[t]
maxFrame := e.previousMaxMagnitudes[time]
frameMagnitudes := e.previousMagnitudes[time]
for f := 2; f < len(frameMagnitudes)-1; f++ {
maxVal := maxFrame[f]
currentVal := frameMagnitudes[f]
for frequency := 2; frequency < len(frameMagnitudes)-1; frequency++ {
maxVal := maxFrame[frequency]
currentVal := frameMagnitudes[frequency]
if maxVal == currentVal {
e.horizontalFilter(f)
maxVal = e.maxHorizontal[f]
e.horizontalFilter(frequency)
maxVal = e.maxHorizontal[frequency]
if currentVal == maxVal && currentVal != 0 {
prevFrameMagnitudes := e.previousMagnitudes[t-1]
nextFrameMagnitudes := e.previousMagnitudes[t+1]
prevFrameMagnitudes := e.previousMagnitudes[time-1]
nextFrameMagnitudes := e.previousMagnitudes[time+1]
//add the magnitude of surrounding bins for magnitude estimates more robust against discretization effects
totalMagnitude := frameMagnitudes[f] + prevFrameMagnitudes[f] + nextFrameMagnitudes[f]
totalMagnitude += frameMagnitudes[f+1] + prevFrameMagnitudes[f+1] + nextFrameMagnitudes[f+1]
totalMagnitude += frameMagnitudes[f-1] + prevFrameMagnitudes[f-1] + nextFrameMagnitudes[f-1]
totalMagnitude := frameMagnitudes[frequency] + prevFrameMagnitudes[frequency] + nextFrameMagnitudes[frequency] +
frameMagnitudes[frequency+1] + prevFrameMagnitudes[frequency+1] + nextFrameMagnitudes[frequency+1] +
frameMagnitudes[frequency-1] + prevFrameMagnitudes[frequency-1] + nextFrameMagnitudes[frequency-1]
e.eventPoints = append(e.eventPoints, EventPoint{
Time: int32(t),
Frequency: int32(f),
Time: int32(time),
Frequency: int32(frequency),
Magnitude: totalMagnitude,
})
}
@ -181,18 +163,9 @@ func (e *EventPointProcessor) ProcessingFinished() {
e.packEventPointsIntoFingerprints()
}
func maxInSlice(array []float32) float32 {
var max = array[0]
for _, value := range array {
if max < value {
max = value
}
}
return max
}
func (e *EventPointProcessor) horizontalFilter(j int) {
for i := 0; i < len(e.maxHorizontal); i++ {
for i := range e.maxHorizontal {
e.maxHorizontal[i] = -1000
}
@ -200,24 +173,28 @@ func (e *EventPointProcessor) horizontalFilter(j int) {
startFrameIndex := centerFrameIndex - e.maxFilterWindowSizeTime/2
stopFrameIndex := centerFrameIndex + e.maxFilterWindowSizeTime/2
// Run a horizontal max filter
for i := startFrameIndex; i < stopFrameIndex; i++ {
maxFrame := e.previousMagnitudes[i]
e.maxHorizontal[j] = maxInSlice(maxFrame)
maxFrame := e.previousMaxMagnitudes[i]
if maxFrame[j] > e.maxHorizontal[j] {
e.maxHorizontal[j] = maxFrame[j]
}
}
}
func (e *EventPointProcessor) packEventPointsIntoFingerprints() {
minFreqDistance := int32(Hibiki.PANAKO_FP_MIN_FREQ_DIST)
maxFreqDistance := int32(Hibiki.PANAKO_FP_MAX_FREQ_DIST)
const minFreqDistance = int32(utilities.PANAKO_FP_MIN_FREQ_DIST)
const maxFreqDistance = int32(utilities.PANAKO_FP_MAX_FREQ_DIST)
minTimeDistance := int32(Hibiki.PANAKO_FP_MIN_TIME_DIST)
maxTimeDistance := int32(Hibiki.PANAKO_FP_MAX_TIME_DIST)
const minTimeDistance = int32(utilities.PANAKO_FP_MIN_TIME_DIST)
const maxTimeDistance = int32(utilities.PANAKO_FP_MAX_TIME_DIST)
for i, p1 := range e.eventPoints {
for j, p2 := range e.eventPoints[i+1:] {
index2 := i + 1
for j, p2 := range e.eventPoints[index2:] {
fDiff := Hibiki.Abs(p1.Frequency - p2.Frequency)
fDiff := utilities.Abs(p1.Frequency - p2.Frequency)
tDiff := p2.Time - p1.Time
if tDiff > maxTimeDistance {
@ -233,10 +210,11 @@ func (e *EventPointProcessor) packEventPointsIntoFingerprints() {
if fDiff > maxFreqDistance {
continue
}
index3 := index2 + j + 1
for _, p3 := range e.eventPoints[j+1:] {
fDiff := Hibiki.Abs(p2.Frequency - p3.Frequency)
tDiff := p3.Time - p2.Time
for _, p3 := range e.eventPoints[index3:] {
fDiff = utilities.Abs(p2.Frequency - p3.Frequency)
tDiff = p3.Time - p2.Time
if tDiff > maxTimeDistance {
break
@ -257,13 +235,3 @@ func (e *EventPointProcessor) packEventPointsIntoFingerprints() {
}
}
}
func (e *EventPointProcessor) Reset() {
e.eventPoints = []EventPoint{}
e.fingerprints = []Fingerprint{}
e.analysisFrameIndex = 0
e.magnitudesIndex = 0
e.previousMagnitudes = make(map[int][]float32)
e.previousMaxMagnitudes = make(map[int][]float32)
}

View file

@ -24,6 +24,10 @@ func (r *StoreRecord) GetPackedPrint() (int64, int64) {
return r.Hash, (int64(r.Time) << 32) | int64(r.Frequency)
}
func (r *StoreRecord) GetKey() (int64, int64) {
return r.Hash, (int64(r.Time) << 32) | int64(r.Frequency)
}
func NewStoreRecordFromPacked(resourceId, hash, packed int64) StoreRecord {
return StoreRecord{
ResourceId: resourceId,

View file

@ -1,9 +1,8 @@
package panako
import (
"git.gammaspectra.live/S.O.N.G/Hibiki"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/audio"
"io"
"log"
"math"
"sort"
@ -28,19 +27,19 @@ type QueryResult struct {
}
type Strategy struct {
store *Store
store Store
queryRange int
latency int64
}
func NewStrategy(store *Store) *Strategy {
func NewStrategy(store Store) *Strategy {
return &Strategy{
store: store,
queryRange: Hibiki.PANAKO_QUERY_RANGE,
queryRange: utilities.PANAKO_QUERY_RANGE,
}
}
func (s *Strategy) StoreStream(resourceId int64, stream audio.Stream) float64 {
func (s *Strategy) StoreStream(resourceId int64, stream *audio.Stream) float64 {
prints := s.StreamToFingerprints(stream)
s.StoreFingerprints(resourceId, prints)
if len(prints) > 0 {
@ -49,39 +48,49 @@ func (s *Strategy) StoreStream(resourceId int64, stream audio.Stream) float64 {
return 0
}
func (s *Strategy) StreamSliceToFingerprints(stream audio.Stream, startTimeOffset, duration float64) []Fingerprint {
func (s *Strategy) StreamSliceToFingerprints(stream *audio.Stream, startTimeOffset, duration float64) []*Fingerprint {
stream.AdvanceSeconds(startTimeOffset) //This can err but it's fine
stream.SetMaxDuration(duration)
return s.StreamToFingerprints(stream)
}
func (s *Strategy) StreamToFingerprints(stream audio.Stream) []Fingerprint {
resample, err := stream.DoResample(1, Hibiki.PANAKO_SAMPLE_RATE, audio.RESAMPLER_QUALITY_BEST)
func (s *Strategy) StreamToFingerprints(stream *audio.Stream) []*Fingerprint {
resample, err := stream.DoResample(1, utilities.PANAKO_SAMPLE_RATE, audio.RESAMPLER_QUALITY_MEDIUM)
if err != nil {
log.Panic(err)
}
return s.ReaderToFingerprints(resample)
return s.BlockChannelToFingerprints(resample.GetAsBlockChannel())
}
// ReaderToFingerprints Processes a stream of float32
func (s *Strategy) ReaderToFingerprints(reader io.Reader) []Fingerprint {
eventPointProcessor, err := NewEventPointProcessor(Hibiki.PANAKO_AUDIO_BLOCK_SIZE)
func (s *Strategy) BlockChannelToFingerprints(channel chan []float32) []*Fingerprint {
eventPointProcessor, err := NewEventPointProcessor(utilities.PANAKO_AUDIO_BLOCK_SIZE)
if err != nil {
log.Panic(err)
}
s.latency = eventPointProcessor.GetLatency()
eventPointProcessor.ProcessReader(reader)
eventPointProcessor.ProcessBlockChannel(channel)
return eventPointProcessor.GetFingerprints()
}
func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
func (s *Strategy) ChannelToFingerprints(channel chan float32) []*Fingerprint {
eventPointProcessor, err := NewEventPointProcessor(utilities.PANAKO_AUDIO_BLOCK_SIZE)
if err != nil {
log.Panic(err)
}
s.latency = eventPointProcessor.GetLatency()
eventPointProcessor.ProcessChannel(channel)
return eventPointProcessor.GetFingerprints()
}
func (s *Strategy) QueryFingerprints(prints []*Fingerprint) []QueryResult {
hitsPerResource := make(map[int64][]Match)
var queryResults []QueryResult
for _, r := range (*s.store).GetPanakoMatches(getRecordsFromPrints(-1, prints), s.queryRange) {
for _, r := range s.store.GetPanakoMatches(getRecordsFromPrints(-1, prints), s.queryRange) {
match := Match{
ResourceId: r.Match.ResourceId,
MatchTime: r.Match.Time,
@ -102,14 +111,14 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
var matchesToDelete []int64
for k, v := range hitsPerResource {
if len(v) < Hibiki.PANAKO_MIN_HITS_UNFILTERED {
for k, hitList := range hitsPerResource {
if len(hitList) < utilities.PANAKO_MIN_HITS_UNFILTERED {
matchesToDelete = append(matchesToDelete, k)
}
}
for _, v := range matchesToDelete {
delete(hitsPerResource, v)
for _, identifier := range matchesToDelete {
delete(hitsPerResource, identifier)
}
for resourceId, hitList := range hitsPerResource {
@ -120,8 +129,8 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
maxListSize := 250
ix := len(hitList) / 5
if Hibiki.PANAKO_MIN_HITS_UNFILTERED > ix {
ix = Hibiki.PANAKO_MIN_HITS_UNFILTERED
if utilities.PANAKO_MIN_HITS_UNFILTERED > ix {
ix = utilities.PANAKO_MIN_HITS_UNFILTERED
}
if maxListSize < ix {
ix = maxListSize
@ -139,7 +148,7 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
diff := hit.DeltaTime()
if diff == y1 {
x1 = hit.QueryTime
frequencyFactor = binToHz(hit.MatchTime) / binToHz(hit.QueryTime)
frequencyFactor = binToHz(hit.MatchFrequency) / binToHz(hit.QueryFrequency)
break
}
}
@ -147,7 +156,8 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
y2 := mostCommonDeltaTforHitList(lastHits)
var x2 int32 = 0
for _, hit := range lastHits {
for i := len(lastHits) - 1; i >= 0; i-- {
hit := lastHits[i]
diff := hit.DeltaTime()
if diff == y2 {
x2 = hit.QueryTime
@ -163,7 +173,7 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
threshold := float64(s.queryRange)
//only continue processing when time factor is reasonable
if timeFactor > Hibiki.PANAKO_MIN_TIME_FACTOR && timeFactor < Hibiki.PANAKO_MAX_TIME_FACTOR && frequencyFactor > Hibiki.PANAKO_MIN_FREQ_FACTOR && frequencyFactor < Hibiki.PANAKO_MAX_FREQ_FACTOR {
if timeFactor > utilities.PANAKO_MIN_TIME_FACTOR && timeFactor < utilities.PANAKO_MAX_TIME_FACTOR && frequencyFactor > utilities.PANAKO_MIN_FREQ_FACTOR && frequencyFactor < utilities.PANAKO_MAX_FREQ_FACTOR {
var filteredHits []Match
for _, hit := range hitList {
@ -180,8 +190,8 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
}
//ignore resources with too few filtered hits remaining
if len(filteredHits) > Hibiki.PANAKO_MIN_HITS_FILTERED {
minDuration := float64(Hibiki.PANAKO_MIN_MATCH_DURATION)
if len(filteredHits) > utilities.PANAKO_MIN_HITS_FILTERED {
minDuration := float64(utilities.PANAKO_MIN_MATCH_DURATION)
queryStart := s.blocksToSeconds(filteredHits[0].QueryTime)
queryStop := s.blocksToSeconds(filteredHits[len(filteredHits)-1].QueryTime)
duration := queryStop - queryStart
@ -214,7 +224,7 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
numberOfMatchingSeconds := math.Ceil(refStop - refStart)
emptySeconds := numberOfMatchingSeconds - float64(len(matchesPerSecondHistogram))
percentOfSecondsWithMatches := 1. - (emptySeconds / numberOfMatchingSeconds)
if percentOfSecondsWithMatches >= Hibiki.PANAKO_MIN_SEC_WITH_MATCH {
if percentOfSecondsWithMatches >= utilities.PANAKO_MIN_SEC_WITH_MATCH {
queryResults = append(queryResults, QueryResult{
QueryStart: queryStart,
QueryStop: queryStop,
@ -242,17 +252,17 @@ func (s *Strategy) QueryFingerprints(prints []Fingerprint) []QueryResult {
}
func (s *Strategy) blocksToSeconds(t int32) float64 {
return float64(t)*(float64(Hibiki.PANAKO_TRANSF_TIME_RESOLUTION)/float64(Hibiki.PANAKO_SAMPLE_RATE)) + float64(s.latency)/float64(Hibiki.PANAKO_SAMPLE_RATE)
return float64(t)*(float64(utilities.PANAKO_TRANSF_TIME_RESOLUTION)/float64(utilities.PANAKO_SAMPLE_RATE)) + float64(s.latency)/float64(utilities.PANAKO_SAMPLE_RATE)
}
func absoluteCentToHertz(absoluteCent float64) float64 {
return 8.17579892 * math.Pow(2.0, absoluteCent/1200.0)
return 8.17579892 * math.Pow(2.0, absoluteCent/1200.)
}
func binToHz(f int32) float64 {
centsPerBin := 1200. / Hibiki.PANAKO_TRANSF_BANDS_PER_OCTAVE
centsPerBin := 1200. / float64(utilities.PANAKO_TRANSF_BANDS_PER_OCTAVE)
diffFromMinFreqInCents := float64(f) * centsPerBin
minFreqInAbsCents := absoluteCentToHertz(Hibiki.PANAKO_TRANSF_MIN_FREQ)
minFreqInAbsCents := absoluteCentToHertz(float64(utilities.PANAKO_TRANSF_MIN_FREQ))
binInAbsCents := minFreqInAbsCents + diffFromMinFreqInCents
return absoluteCentToHertz(binInAbsCents)
@ -283,19 +293,19 @@ func mostCommonDeltaTforHitList(hitList []Match) int32 {
return mostCommonDeltaT
}
func getRecordsFromPrints(resourceId int64, prints []Fingerprint) []StoreRecord {
func getRecordsFromPrints(resourceId int64, prints []*Fingerprint) []StoreRecord {
records := make([]StoreRecord, len(prints))
for i, p := range prints {
records[i] = StoreRecord{
ResourceId: resourceId,
Hash: p.Hash(),
Time: p.t1(),
Frequency: p.f1(),
Time: p.T1(),
Frequency: p.F1(),
}
}
return records
}
func (s *Strategy) StoreFingerprints(resourceId int64, prints []Fingerprint) {
(*s.store).StorePanakoPrints(getRecordsFromPrints(resourceId, prints))
func (s *Strategy) StoreFingerprints(resourceId int64, prints []*Fingerprint) {
s.store.StorePanakoPrints(getRecordsFromPrints(resourceId, prints))
}

View file

@ -0,0 +1,113 @@
package flac
import (
"bytes"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/audio"
flacLib "github.com/mewkiz/flac"
"io"
"log"
)
type Format struct {
}
func NewFormat() Format {
return Format{}
}
func (f Format) Open(r io.ReadSeeker) (*audio.Stream, error) {
stream, err := flacLib.Parse(r)
if err != nil {
return nil, err
}
newChannel := make(chan float32)
go func() {
defer stream.Close()
defer close(newChannel)
for {
currentFrame, err := stream.ParseNext()
if err != nil {
return
}
log.Printf("read frame %d", currentFrame.Num)
for sample := 0; sample < currentFrame.Subframes[0].NSamples; sample++ {
//Interleave samples
for _, subFrame := range currentFrame.Subframes {
//convert to f32 samples
newChannel <- float32(subFrame.Samples[sample]) / float32((int64(1)<<(currentFrame.BitsPerSample-1))-1)
}
}
}
}()
return audio.NewStream(newChannel, int(stream.Info.NChannels), float64(stream.Info.SampleRate)), nil
}
/*
func (f Format) Encode(stream *audio.Stream, writer io.Writer) error {
bitsPerSample := uint8(16)
encoder, err := flacLib.NewEncoder(writer, &meta.StreamInfo{
SampleRate: uint32(stream.GetSampleRate()),
NChannels: uint8(stream.GetChannels()),
BitsPerSample: bitsPerSample,
BlockSizeMin: 16,
BlockSizeMax: 65535,
FrameSizeMin: 0,
FrameSizeMax: 0,
})
if err != nil {
return err
}
defer encoder.Close()
for {
block, more := stream.GetBlock()
if stream.GetChannels() > 1 {
} else {
samples := make([]int32, len(block))
for i, v := range block {
samples[i] = int32(v * float32((int64(1)<<(bitsPerSample-1))-1))
}
err = encoder.WriteFrame(&frame.Frame{
Header: frame.Header{
BlockSize: uint16(len(samples)),
SampleRate: 0,
BitsPerSample: bitsPerSample,
HasFixedBlockSize: true,
},
Subframes: []*frame.Subframe{
{
SubHeader: frame.SubHeader{
Pred: frame.PredVerbatim,
},
Samples: samples,
NSamples: len(samples),
},
},
})
if err != nil {
return err
}
}
if !more {
break
}
}
return nil
}*/
func (f Format) Identify(peek []byte, extension string) bool {
return bytes.Compare(peek[:4], []byte{'f', 'L', 'a', 'C'}) == 0 && extension == "flac"
}

View file

@ -0,0 +1,19 @@
package format
import (
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/audio"
"io"
)
type Format interface {
// Identify checks whether a format is of a type. peek includes a few first bytes, extension is the lowercase file extension without a dot.
Identify(peek []byte, extension string) bool
// Open Opens a stream and decodes it into an audio.Stream
Open(r io.ReadSeeker) (audio.Stream, error)
}
type Encoder interface {
// Encode Receives a stream and encodes it into an io.Reader
Encode(stream *audio.Stream) (io.Reader, error)
}

View file

@ -0,0 +1,44 @@
package mp3
import (
"encoding/binary"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities/audio"
mp3Lib "github.com/hajimehoshi/go-mp3"
"io"
"math"
)
type Format struct {
}
func NewFormat() Format {
return Format{}
}
func (f Format) Open(r io.ReadSeeker) (*audio.Stream, error) {
decoder, err := mp3Lib.NewDecoder(r)
if err != nil {
return nil, err
}
newChannel := make(chan float32)
go func() {
defer close(newChannel)
for {
var i int16
err = binary.Read(decoder, binary.LittleEndian, &i)
if err != nil {
return
}
newChannel <- float32(i) / float32(math.MaxInt16)
}
}()
return audio.NewStream(newChannel, 2, float64(decoder.SampleRate())), nil
}
func (f Format) Identify(peek []byte, extension string) bool {
return /*bytes.Compare(peek[:4], []byte{'f', 'L', 'a', 'C'}) == 0 && */ extension == "mp3"
}

View file

@ -1,7 +1,7 @@
package audio
import (
"git.gammaspectra.live/S.O.N.G/Hibiki"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
"github.com/dh1tw/gosamplerate"
)
@ -19,7 +19,7 @@ const (
)
func NewResampler(from, to float64, channels, quality int) (Resampler, error) {
rs, err := gosamplerate.New(quality, channels, Hibiki.PANAKO_AUDIO_BLOCK_SIZE)
rs, err := gosamplerate.New(quality, channels, utilities.PANAKO_AUDIO_BLOCK_SIZE)
if err != nil {
return Resampler{}, err
}
@ -32,9 +32,15 @@ func NewResampler(from, to float64, channels, quality int) (Resampler, error) {
}
func (r *Resampler) GetBlock() []float32 {
s := r.buffer[0 : Hibiki.PANAKO_AUDIO_BLOCK_SIZE*r.channels]
r.buffer = r.buffer[Hibiki.PANAKO_AUDIO_BLOCK_SIZE*r.channels:]
return s
if len(r.buffer) > utilities.PANAKO_AUDIO_BLOCK_SIZE*r.channels {
s := r.buffer[0 : utilities.PANAKO_AUDIO_BLOCK_SIZE*r.channels]
r.buffer = r.buffer[utilities.PANAKO_AUDIO_BLOCK_SIZE*r.channels:]
return s
} else {
s := r.buffer
r.buffer = r.buffer[:0]
return s
}
}
func (r *Resampler) Process(block []float32) error {

View file

@ -1,99 +1,135 @@
package audio
import (
"encoding/binary"
"fmt"
"git.gammaspectra.live/S.O.N.G/Hibiki"
"io"
"unsafe"
"git.gammaspectra.live/S.O.N.G/Hibiki/utilities"
)
type Stream struct {
source io.ReadCloser
channels int
sampleRate float64
done bool
bytesRead int
stopAt int
source chan float32
channels int
sampleRate float64
samplesRead int
stopAtSample int
}
func NewStream(source io.ReadCloser, channels int, sampleRate float64) Stream {
return Stream{
func NewStream(source chan float32, channels int, sampleRate float64) *Stream {
return &Stream{
source: source,
channels: channels,
sampleRate: sampleRate,
}
}
func (s Stream) GetChannels() int {
func (s *Stream) GetChannels() int {
return s.channels
}
func (s Stream) GetSampleRate() float64 {
func (s *Stream) GetSampleRate() float64 {
return s.sampleRate
}
func (s Stream) Read(p []byte) (n int, err error) {
if s.stopAt != 0 && s.bytesRead+len(p) > s.stopAt {
if s.stopAt-s.bytesRead == 0 {
return 0, io.EOF
func (s *Stream) GetAsChannel() chan float32 {
newChannel := make(chan float32)
go func() {
defer close(newChannel)
for {
v, more := s.Get()
if !more {
return
}
newChannel <- v
}
}()
return newChannel
}
func (s *Stream) GetAsBlockChannel() chan []float32 {
newChannel := make(chan []float32)
go func() {
defer close(newChannel)
for {
v, more := s.GetBlock()
if !more {
return
}
newChannel <- v
}
}()
return newChannel
}
func (s *Stream) Get() (float32, bool) {
if s.stopAtSample != 0 && s.samplesRead >= s.stopAtSample {
return 0, false
}
v, more := <-s.source
if more {
s.samplesRead++
}
return v, more
}
func (s *Stream) GetBlock() ([]float32, bool) {
buf := make([]float32, 0, utilities.PANAKO_AUDIO_BLOCK_SIZE*s.channels)
for {
f, more := s.Get()
if !more { //EOF
return buf, more
}
buf = append(buf, f)
if len(buf) == utilities.PANAKO_AUDIO_BLOCK_SIZE*s.channels {
return buf, true
}
r, err := s.source.Read(p[:s.stopAt-s.bytesRead])
s.bytesRead += r
return r, err
} else {
r, err := s.source.Read(p)
s.bytesRead += r
return r, err
}
}
func (s Stream) AdvanceSeconds(seconds float64) error {
n := s.byteIndex(seconds)
b := make([]byte, n)
_, err := io.ReadFull(s, b)
s.bytesRead = 0
return err
func (s *Stream) AdvanceSeconds(seconds float64) bool {
stopAt := s.secondsIndex(seconds)
for i := 0; i < stopAt; i++ {
_, more := s.Get()
if !more {
return false
}
}
return true
}
func (s Stream) SetMaxDuration(seconds float64) {
s.stopAt = s.byteIndex(seconds)
func (s *Stream) SetMaxDuration(seconds float64) {
s.stopAtSample = s.secondsIndex(seconds)
}
func (s Stream) byteIndex(seconds float64) int {
return int(seconds*s.sampleRate) * int(unsafe.Sizeof(float32(0)))
func (s *Stream) secondsIndex(seconds float64) int {
return int(seconds * s.sampleRate)
}
func (s Stream) Close() error {
return s.source.Close()
}
func (s Stream) DoResample(channels int, sampleRate float64, quality int) (Stream, error) {
func (s *Stream) DoResample(channels int, sampleRate float64, quality int) (*Stream, error) {
if channels != 1 && s.channels != channels {
return Stream{}, fmt.Errorf("cannot convert from %d channels to %d", s.channels, channels)
return nil, fmt.Errorf("cannot convert from %d channels to %d", s.channels, channels)
}
if channels == s.channels && sampleRate == s.sampleRate {
return Stream{}, nil
return nil, nil
}
rs, err := NewResampler(s.sampleRate, sampleRate, channels, quality)
if err != nil {
return Stream{}, err
return nil, err
}
reader, writer := io.Pipe()
newChannel := make(chan float32)
go func() {
var buf []float32
var f float32
defer rs.Delete()
defer close(newChannel)
handleTransform := func(end bool) error {
handleTransform := func(buf []float32, end bool) error {
if channels != s.channels && channels == 1 {
newBuf := make([]float32, 0, Hibiki.PANAKO_AUDIO_BLOCK_SIZE)
for i := 0; i < len(buf)/s.channels && i < Hibiki.PANAKO_AUDIO_BLOCK_SIZE; i++ {
newBuf := make([]float32, 0, utilities.PANAKO_AUDIO_BLOCK_SIZE)
for i := 0; i < len(buf)/s.channels && i < utilities.PANAKO_AUDIO_BLOCK_SIZE; i++ {
var sum float32
for j := 0; j < s.channels; j++ {
sum += buf[i*s.channels+j]
@ -102,27 +138,25 @@ func (s Stream) DoResample(channels int, sampleRate float64, quality int) (Strea
}
if len(newBuf) > 0 {
err := rs.Process(newBuf)
err = rs.Process(newBuf)
if err != nil {
return err
}
}
} else {
err := rs.Process(buf)
err = rs.Process(buf)
if err != nil {
return err
}
}
if end {
err := rs.Finish()
err = rs.Finish()
if err != nil {
return err
}
}
buf = buf[:0]
for {
b := rs.GetBlock()
@ -131,37 +165,33 @@ func (s Stream) DoResample(channels int, sampleRate float64, quality int) (Strea
}
for _, d := range b {
err := binary.Write(writer, binary.LittleEndian, d)
if err != nil {
return err
}
newChannel <- d
}
}
}
var buf []float32
var more bool
for {
err := binary.Read(s.source, binary.LittleEndian, &f)
if err != nil { //EOF
buf, more = s.GetBlock()
if !more { //EOF
break
}
buf = append(buf, f)
if len(buf) == Hibiki.PANAKO_AUDIO_BLOCK_SIZE*s.channels {
err := handleTransform(false)
if len(buf) == utilities.PANAKO_AUDIO_BLOCK_SIZE*s.channels {
err = handleTransform(buf, false)
if err != nil {
writer.Close()
return
}
}
}
err := handleTransform(true)
err = handleTransform(buf, true)
if err != nil {
writer.Close()
return
}
}()
return NewStream(reader, channels, sampleRate), nil
return NewStream(newChannel, channels, sampleRate), nil
}

44
utilities/constants.go Normal file
View file

@ -0,0 +1,44 @@
package utilities
const PANAKO_AUDIO_BLOCK_SIZE = 8192
const PANAKO_AUDIO_BLOCK_OVERLAP = 0
const PANAKO_FREQ_MAX_FILTER_SIZE = 103
const PANAKO_TIME_MAX_FILTER_SIZE = 25
const PANAKO_FP_MIN_FREQ_DIST = 1
const PANAKO_FP_MAX_FREQ_DIST = 128
const PANAKO_FP_MIN_TIME_DIST = 2
const PANAKO_FP_MAX_TIME_DIST = 33
const PANAKO_SAMPLE_RATE = 16000
const PANAKO_TRANSF_MIN_FREQ = 110
const PANAKO_TRANSF_MAX_FREQ = 7040
const PANAKO_TRANSF_REF_FREQ = 440
const PANAKO_TRANSF_BANDS_PER_OCTAVE = 85
//PANAKO_TRANSF_TIME_RESOLUTION in audio samples, 8 ms
const PANAKO_TRANSF_TIME_RESOLUTION = 128
// PANAKO_QUERY_RANGE maybe set to 2?
const PANAKO_QUERY_RANGE = 3
const PANAKO_MIN_HITS_UNFILTERED = 10
const PANAKO_MIN_HITS_FILTERED = 5
const PANAKO_MIN_TIME_FACTOR = 0.8
const PANAKO_MAX_TIME_FACTOR = 1.2
const PANAKO_MIN_FREQ_FACTOR = 0.8
const PANAKO_MAX_FREQ_FACTOR = 1.2
// PANAKO_MIN_MATCH_DURATION maybe 7 or 3 ?
const PANAKO_MIN_MATCH_DURATION = 3
const PANAKO_MIN_SEC_WITH_MATCH = 0.2
func Abs(x int32) int32 {
if x < 0 {
return -x
}
return x
}

View file

@ -34,19 +34,23 @@ func NewLemireMinMaxFilter(windowSize, dataLength int, clampEdges bool) (*Lemire
ob.dataToFilter = make([]float32, dataLength+windowSize-1)
} else {
ob.maxVal = make([]float32, dataLength+windowSize-1)
ob.minVal = make([]float32, dataLength+windowSize-1)
ob.maxVal = make([]float32, dataLength-windowSize+1)
ob.minVal = make([]float32, dataLength-windowSize+1)
ob.dataToFilter = nil
}
return ob, nil
}
func (l *LemireMinMaxFilter) GetMinVal() []float32 {
return l.minVal
valueCopy := make([]float32, len(l.minVal))
copy(valueCopy, l.minVal)
return valueCopy
}
func (l *LemireMinMaxFilter) GetMaxVal() []float32 {
return l.maxVal
valueCopy := make([]float32, len(l.maxVal))
copy(valueCopy, l.maxVal)
return valueCopy
}
// MaxFilter Run only a max filter. The resulting filtered data is stored in maxFiltered
@ -55,11 +59,12 @@ func (l *LemireMinMaxFilter) GetMaxVal() []float32 {
// @param maxFiltered the data to filter. It should have the same length as the data array
func (l *LemireMinMaxFilter) MaxFilter(array, maxFiltered []float32) {
if l.clampEdges {
copy(l.dataToFilter[l.windowSize/2:], array[0:])
for j := 0; j < (l.windowSize / 2); j++ {
ix := l.windowSize / 2
copy(l.dataToFilter[ix:ix+len(array)], array)
for j := 0; j < ix; j++ {
l.dataToFilter[j] = array[0]
}
for j := len(l.dataToFilter) - l.windowSize/2; j < len(l.dataToFilter); j++ {
for j := len(l.dataToFilter) - ix; j < len(l.dataToFilter); j++ {
l.dataToFilter[j] = array[len(array)-1]
}
array = l.dataToFilter
@ -94,7 +99,7 @@ func (l *LemireMinMaxFilter) MaxFilter(array, maxFiltered []float32) {
}
l.maxFifo.PushBack(i)
if i == l.windowSize+l.maxFifo.Front().(int) {
if i == (l.windowSize + l.maxFifo.Front().(int)) {
l.maxFifo.PopFront()
}
}
@ -106,11 +111,12 @@ func (l *LemireMinMaxFilter) MaxFilter(array, maxFiltered []float32) {
// Filter Run the filter. The resulting filtered data can requested by calling GetMaxVal and GetMinVal.
func (l *LemireMinMaxFilter) Filter(array []float32) {
if l.clampEdges {
copy(l.dataToFilter[l.windowSize/2:], array[0:])
for j := 0; j < (l.windowSize / 2); j++ {
ix := l.windowSize / 2
copy(l.dataToFilter[ix:ix+len(array)], array)
for j := 0; j < ix; j++ {
l.dataToFilter[j] = array[0]
}
for j := len(l.dataToFilter) - l.windowSize/2; j < len(l.dataToFilter); j++ {
for j := len(l.dataToFilter) - ix; j < len(l.dataToFilter); j++ {
l.dataToFilter[j] = array[len(array)-1]
}
array = l.dataToFilter