Create cache for res and out values

LTP Synthesis needs values of out + res for previous frames
2022-09-22 23:32:16 -04:00 · 2022-09-22 23:32:16 -04:00 · 3a79dec65f
parent 903249e356
commit 3a79dec65f
2 changed files with 25 additions and 8 deletions
--- a/internal/silk/decoder.go
+++ b/internal/silk/decoder.go
@ -25,6 +25,11 @@ type Decoder struct {
 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
 	finalLPCValues []float32

+	// This requires storage to buffer up to 306 values of out[i] from
+	// previous subframes.
+	// https://www.rfc-editor.org/rfc/rfc6716#section-4.2.7.9.1
+	finalOutValues []float32
+
 	// n0Q15 are the LSF coefficients decoded for the prior frame
 	// see normalizeLSFInterpolation
 	n0Q15 []int16
@ -34,6 +39,7 @@ type Decoder struct {
 func NewDecoder() Decoder {
 	return Decoder{
 		finalLPCValues: make([]float32, 16),
+		finalOutValues: make([]float32, 306),
 	}
 }

@ -1077,14 +1083,14 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float32)
 }

 // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
-func (d *Decoder) decodePitchLags(signalType frameSignalType, bandwidth Bandwidth) (lag uint32, pitchLags []int) {
+func (d *Decoder) decodePitchLags(signalType frameSignalType, bandwidth Bandwidth) (lagMax uint32, pitchLags []int) {
 	if signalType != frameSignalTypeVoiced {
 		return
 	}

 	var (
+		lag    uint32
 		lagMin uint32
-		lagMax uint32
 	)

 	// The primary lag index is coded either relative to the primary lag of
@ -1348,7 +1354,7 @@ func (d *Decoder) ltpSynthesis(
 	LTPScaleQ14 float32,
 	bandwidth Bandwidth,
 	wQ2 int16,
-	aQ12, gainQ16, lpc, res []float32,
+	aQ12, gainQ16, lpc, res, resLag []float32,
 ) {
 	// If this is the third or fourth subframe of a 20 ms SILK frame and the LSF
 	// interpolation factor, w_Q2 (see Section 4.2.7.5.5), is less than 4,
@ -1411,7 +1417,9 @@ func (d *Decoder) ltpSynthesis(
 	// previous subframes (240 from the current SILK frame and 16 from the
 	// previous SILK frame).  This corresponds to WB with up to three
 	// previous subframes in the current SILK frame, plus 16 samples for
-	// d_LPC.  The astute reader will notice that, given the definition of
+	// d_LPC.
+
+	// The astute reader will notice that, given the definition of
 	// lpc[i] in Section 4.2.7.9.2, the output of this latter equation is
 	// merely a scaled version of the values of res[i] from previous
 	// subframes.
@ -1482,6 +1490,11 @@ func (d *Decoder) ltpSynthesis(
 //
 // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
 func (d *Decoder) lpcSynthesis(out []float32, bandwidth Bandwidth, n, s, dLPC int, aQ12, res, gainQ16, lpc []float32) {
+	// Shift left one subframe of samples
+	for i := 0; i < len(d.finalOutValues)-n; i++ {
+		d.finalOutValues[i] = d.finalOutValues[i+n]
+	}
+
 	finalLPCValuesIndex := 0

 	// j be the index of the first sample in the residual corresponding to
@ -1530,6 +1543,7 @@ func (d *Decoder) lpcSynthesis(out []float32, bandwidth Bandwidth, n, s, dLPC in
 		//     out[i] = clamp(-1.0, lpc[i], 1.0)
 		//
 		out[i] = clampFloat(-1.0, lpc[sampleIndex], 1.0)
+		d.finalOutValues[len(d.finalOutValues)-n+i] = out[i]
 	}
 }

@ -1545,6 +1559,7 @@ func (d *Decoder) lpcSynthesis(out []float32, bandwidth Bandwidth, n, s, dLPC in
 func (d *Decoder) silkFrameReconstruction(
 	signalType frameSignalType, bandwidth Bandwidth,
 	dLPC int,
+	lagMax uint32,
 	bQ7 [][]int8,
 	pitchLags []int,
 	eQ23 []int32,
@ -1569,6 +1584,7 @@ func (d *Decoder) silkFrameReconstruction(
 	//     res[i] = ---------
 	//               2.0**23
 	res := make([]float32, len(eQ23))
+	resLag := make([]float32, lagMax)
 	for i := range res {
 		res[i] = float32(eQ23[i]) / 8388608.0
 	}
@ -1588,7 +1604,7 @@ func (d *Decoder) silkFrameReconstruction(
 		//
 		// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
 		if signalType == frameSignalTypeVoiced {
-			d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12, gainQ16, lpc, res)
+			d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12, gainQ16, lpc, res, resLag)
 		}

 		//https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
@ -1677,7 +1693,7 @@ func (d *Decoder) Decode(in []byte, out []float32, isStereo bool, nanoseconds in
 	aQ12 := d.limitLPCFilterPredictionGain(a32Q17)

 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
-	_, pitchLags := d.decodePitchLags(signalType, bandwidth)
+	lagMax, pitchLags := d.decodePitchLags(signalType, bandwidth)

 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
 	bQ7 := d.decodeLTPFilterCoefficients(signalType)
@ -1704,6 +1720,7 @@ func (d *Decoder) Decode(in []byte, out []float32, isStereo bool, nanoseconds in
 	d.silkFrameReconstruction(
 		signalType, bandwidth,
 		dLPC,
+		lagMax,
 		bQ7,
 		pitchLags,
 		eQ23,
--- a/internal/silk/decoder_test.go
+++ b/internal/silk/decoder_test.go
@ -396,8 +396,8 @@ func TestDecodePitchLags(t *testing.T) {
 	silkFrame := []byte{0xb4, 0xe2, 0x2c, 0xe, 0x10, 0x65, 0x1d, 0xa9, 0x7, 0x5c, 0x36, 0x8f, 0x96, 0x7b, 0xf4, 0x89, 0x41, 0x55, 0x98, 0x7a, 0x39, 0x2e, 0x6b, 0x71, 0xa4, 0x3, 0x70, 0xbf}
 	d := &Decoder{rangeDecoder: createRangeDecoder(silkFrame, 73, 30770362, 1380489)}

-	lag, pitchLags := d.decodePitchLags(frameSignalTypeVoiced, BandwidthWideband)
-	if lag != 206 {
+	lagMax, pitchLags := d.decodePitchLags(frameSignalTypeVoiced, BandwidthWideband)
+	if lagMax != 288 {
 		t.Fatal()
 	}