From e005fba1891e5cda695fdded0366a9b599b28a52 Mon Sep 17 00:00:00 2001 From: Sean DuBois Date: Thu, 8 Sep 2022 00:16:35 -0400 Subject: [PATCH] Finish normalizeLSFInterpolation Before we only handled when wQ2 == 4 --- internal/silk/decoder.go | 44 +++++++++++++++++++++++------------ internal/silk/decoder_test.go | 43 +++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/internal/silk/decoder.go b/internal/silk/decoder.go index be7421a..d2e256a 100644 --- a/internal/silk/decoder.go +++ b/internal/silk/decoder.go @@ -15,8 +15,13 @@ type Decoder struct { // Is the previous frame a voiced frame? isPreviousFrameVoiced bool + previousLogGain uint32 + + // n0Q15 are the LSF coefficients decoded for the prior frame + // see normalizeLSFInterpolation + n0Q15 []int16 + // TODO, should have dedicated frame state - logGain uint32 subframeState [4]struct { } } @@ -130,7 +135,7 @@ func (d *Decoder) decodeSubframeQuantizations(signalType frameSignalType) (gainQ // current gain is limited as follows: // log_gain = max(gain_index, previous_log_gain - 16) if d.haveDecoded { - logGain = maxUint32(gainIndex, d.logGain-16) + logGain = maxUint32(gainIndex, d.previousLogGain-16) } else { logGain = gainIndex } @@ -144,10 +149,10 @@ func (d *Decoder) decodeSubframeQuantizations(signalType frameSignalType) (gainQ // The following formula translates this index into a quantization gain // for the current subframe using the gain from the previous subframe: // log_gain = clamp(0, max(2*delta_gain_index - 16, previous_log_gain + delta_gain_index - 4), 63) - logGain = uint32(clamp(0, maxInt32(2*int32(deltaGainIndex)-16, int32(d.logGain+deltaGainIndex)-4), 63)) + logGain = uint32(clamp(0, maxInt32(2*int32(deltaGainIndex)-16, int32(d.previousLogGain+deltaGainIndex)-4), 63)) } - d.logGain = logGain + d.previousLogGain = logGain // silk_gains_dequant() (gain_quant.c) dequantizes log_gain for the k'th // subframe and converts it into a linear Q16 scale factor via @@ -397,7 +402,7 @@ func (d *Decoder) normalizeLSFStabilization(nlsfQ15 []int16) { // (in the same channel) and the current frame // // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5 -func (d *Decoder) normalizeLSFInterpolation(nlsfQ15 []int16) (n1Q15 []int16, err error) { +func (d *Decoder) normalizeLSFInterpolation(n2Q15 []int16) (n1Q15 []int16) { // Let n2_Q15[k] be the normalized LSF coefficients decoded by the // procedure in Section 4.2.7.5, n0_Q15[k] be the LSF coefficients // decoded for the prior frame, and w_Q2 be the interpolation factor. @@ -405,14 +410,17 @@ func (d *Decoder) normalizeLSFInterpolation(nlsfQ15 []int16) (n1Q15 []int16, err // 20 ms frame, n1_Q15[k], are // // n1_Q15[k] = n0_Q15[k] + (w_Q2*(n2_Q15[k] - n0_Q15[k]) >> 2) - if wQ2 := d.rangeDecoder.DecodeSymbolWithICDF(icdfNormalizedLSFInterpolationIndex); wQ2 != 4 { - return nil, errUnsupportedLSFInterpolation + wQ2 := int16(d.rangeDecoder.DecodeSymbolWithICDF(icdfNormalizedLSFInterpolationIndex)) + if wQ2 == 4 || !d.haveDecoded { + return n2Q15 } - // TODO - n1Q15 = nlsfQ15 + n1Q15 = make([]int16, len(n2Q15)) + for k := range n1Q15 { + n1Q15[k] = d.n0Q15[k] + (wQ2 * (n2Q15[k] - d.n0Q15[k]) >> 2) + } - return n1Q15, nil + return } func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(n1Q15 []int16, bandwidth Bandwidth) (a32Q17 []int32) { @@ -1272,10 +1280,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in d.normalizeLSFStabilization(nlsfQ15) // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5 - n1Q15, err := d.normalizeLSFInterpolation(nlsfQ15) - if err != nil { - return err - } + n1Q15 := d.normalizeLSFInterpolation(nlsfQ15) // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6 a32Q17 := d.convertNormalizedLSFsToLPCCoefficients(n1Q15, bandwidth) @@ -1297,7 +1302,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in } // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3 - _, err = d.decodeLTPScalingParamater(signalType) + _, err := d.decodeLTPScalingParamater(signalType) if err != nil { return err } @@ -1323,6 +1328,15 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in //https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2 d.lpcSynthesis(out, bandwidth, dLPC, aQ12, res, gainQ16) + // n0Q15 is the LSF coefficients decoded for the prior frame + // see normalizeLSFInterpolation. + if len(d.n0Q15) != len(nlsfQ15) { + d.n0Q15 = make([]int16, len(nlsfQ15)) + } + + copy(d.n0Q15, nlsfQ15) d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced + d.haveDecoded = true + return nil } diff --git a/internal/silk/decoder_test.go b/internal/silk/decoder_test.go index 9e8bf82..0270eef 100644 --- a/internal/silk/decoder_test.go +++ b/internal/silk/decoder_test.go @@ -58,14 +58,7 @@ func TestDecodeSubframeQuantizations(t *testing.T) { d := &Decoder{rangeDecoder: createRangeDecoder(testSilkFrame(), 31, 482344960, 437100388)} gainQ16 := d.decodeSubframeQuantizations(frameSignalTypeInactive) - switch { - case gainQ16[0] != 210944: - t.Fatal() - case gainQ16[1] != 112640: - t.Fatal() - case gainQ16[2] != 96256: - t.Fatal() - case gainQ16[3] != 96256: + if !reflect.DeepEqual(gainQ16, []float64{210944, 112640, 96256, 96256}) { t.Fatal() } } @@ -99,6 +92,40 @@ func TestNormalizeLineSpectralFrequencyCoefficients(t *testing.T) { } } +func TestNormalizeLSFInterpolation(t *testing.T) { + t.Run("wQ2 == 4", func(t *testing.T) { + d := &Decoder{rangeDecoder: createRangeDecoder(testSilkFrame(), 55, 493249168, 174371199)} + n2Q15 := []int16{ + 2132, 3584, 5504, 7424, 9472, 11392, 13440, 15360, 17280, + 19200, 21120, 23040, 25088, 27008, 28928, 30848, + } + + if !reflect.DeepEqual(d.normalizeLSFInterpolation(n2Q15), n2Q15) { + t.Fatal() + } + }) + + t.Run("wQ2 == 1", func(t *testing.T) { + frame := []byte{0xac, 0xbd, 0xa9, 0xf7, 0x26, 0x24, 0x5a, 0xa4, 0x00, 0x37, 0xbf, 0x9c, 0xde, 0xe, 0xcf, 0x94, 0x64, 0xaa, 0xf9, 0x87, 0xd0, 0x79, 0x19, 0xa8, 0x21, 0xc0} + d := &Decoder{ + rangeDecoder: createRangeDecoder(frame, 65, 1231761776, 1068195183), + haveDecoded: true, + n0Q15: []int16{ + 518, 380, 4444, 6982, 8752, 10510, 12381, 14102, 15892, 17651, 19340, 21888, 23936, 25984, 28160, 30208, + }, + } + n2Q15 := []int16{215, 1447, 3712, 5120, 7168, 9088, 11264, 13184, 15232, 17536, 19712, 21888, 24192, 26240, 28416, 30336} + expectedN1Q15 := []int16{ + 442, 646, 4261, 6516, 8356, 10154, 12101, 13872, 15727, + 17622, 19433, 21888, 24000, 26048, 28224, 30240, + } + + if !reflect.DeepEqual(d.normalizeLSFInterpolation(n2Q15), expectedN1Q15) { + t.Fatal() + } + }) +} + func TestConvertNormalizedLSFsToLPCCoefficients(t *testing.T) { d := &Decoder{}