Update PitchLag decoder to be subframe aware

Before would only compute s=0
2022-09-17 16:19:57 -04:00 · 2022-09-17 16:19:57 -04:00 · cb995e5d2b
parent c9124c5c4e
commit cb995e5d2b
2 changed files with 38 additions and 14 deletions
--- a/internal/silk/decoder.go
+++ b/internal/silk/decoder.go
@ -1216,8 +1216,13 @@ func (d *Decoder) decodePitchLags(signalType frameSignalType, bandwidth Bandwidt
 	//
 	//     pitch_lags[k] = clamp(lag_min, lag + lag_cb[contour_index][k],
 	//                           lag_max)
-	pitchLags = []int{
-		int(clamp(int32(lagMin), int32(lag+uint32(lagCb[contourIndex][0])), int32(lagMax))),
+	pitchLags = make([]int, subframeCount)
+	for i := 0; i < subframeCount; i++ {
+		pitchLags[i] = int(clamp(
+			int32(lagMin),
+			int32(lag+uint32(lagCb[contourIndex][i])),
+			int32(lagMax)),
+		)
 	}

 	return
@ -1326,7 +1331,15 @@ func (d *Decoder) samplesInSubframe(bandwidth Bandwidth) int {
 }

 // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
-func (d *Decoder) ltpSynthesis(signalType frameSignalType, eQ23 []int32, i int, LTPscaleQ14 float64, bandwidth Bandwidth, wQ2 int16) (res []float64) {
+func (d *Decoder) ltpSynthesis(
+	signalType frameSignalType,
+	pitchLags []int,
+	eQ23 []int32,
+	n, j, s int,
+	LTPscaleQ14 float64,
+	bandwidth Bandwidth,
+	wQ2 int16,
+) (res []float64) {
 	// For unvoiced frames (see Section 4.2.7.3), the LPC residual for i
 	// such that j <= i < (j + n) is simply a normalized copy of the
 	// excitation signal, i.e.,
@ -1337,10 +1350,9 @@ func (d *Decoder) ltpSynthesis(signalType frameSignalType, eQ23 []int32, i int,

 	res = make([]float64, len(eQ23))
 	if signalType != frameSignalTypeVoiced {
-		for i := range eQ23 {
+		for i := j; i < (j + n); i++ {
 			res[i] = float64(eQ23[i]) / 8388608
 		}
-
 		return
 	}

@ -1357,7 +1369,7 @@ func (d *Decoder) ltpSynthesis(signalType frameSignalType, eQ23 []int32, i int,
 // after either
 //
 // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
-func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, currentSubframe, dLPC int, aQ12, res, gainQ16 []float64) {
+func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, s, dLPC int, aQ12, res, gainQ16 []float64) {
 	finalLPCValuesIndex := 0

 	// j be the index of the first sample in the residual corresponding to
@ -1379,8 +1391,8 @@ func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, currentSub
 	//
 	var currentLPCVal float64
 	for i := j; i < (j + n); i++ {
-		lpcVal := gainQ16[currentSubframe] / 65536.0
-		lpcVal *= res[i+(n*currentSubframe)]
+		lpcVal := gainQ16[s] / 65536.0
+		lpcVal *= res[i+(n*s)]

 		for k := 0; k < dLPC; k++ {
 			if i-k > 0 {
@ -1423,20 +1435,31 @@ func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, currentSub
 func (d *Decoder) silkFrameReconstruction(
 	signalType frameSignalType, bandwidth Bandwidth,
 	dLPC int,
+	pitchLags []int,
 	eQ23 []int32,
 	LTPscaleQ14 float64,
 	wQ2 int16,
 	aQ12, gainQ16, out []float64,
 ) {
 	// let n be the number of samples in a subframe
+	//
+	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9
 	n := d.samplesInSubframe(bandwidth)

-	for i := 0; i < subframeCount; i++ {
+	// s be the index of the current subframe in this SILK frame
+	// (0 or 1 for 10 ms frames, or 0 to 3 for 20 ms frames)
+	for s := 0; s < subframeCount; s++ {
+		// j be the index of the first sample in the residual corresponding to
+		// the current subframe.
+		//
+		// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9
+		j := n * s
+
 		// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
-		res := d.ltpSynthesis(signalType, eQ23, subframeCount, LTPscaleQ14, bandwidth, wQ2)
+		res := d.ltpSynthesis(signalType, pitchLags, eQ23, n, j, s, LTPscaleQ14, bandwidth, wQ2)

 		//https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
-		d.lpcSynthesis(out[n*i:], bandwidth, n, i, dLPC, aQ12, res, gainQ16)
+		d.lpcSynthesis(out[n*s:], bandwidth, n, s, dLPC, aQ12, res, gainQ16)
 	}
 }

@ -1521,7 +1544,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in
 	aQ12 := d.limitLPCFilterPredictionGain(a32Q17)

 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
-	d.decodePitchLags(signalType, bandwidth)
+	_, pitchLags := d.decodePitchLags(signalType, bandwidth)

 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
 	d.decodeLTPFilterCoefficients(signalType)
@ -1547,6 +1570,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in
 	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9
 	d.silkFrameReconstruction(signalType, bandwidth,
 		dLPC,
+		pitchLags,
 		eQ23,
 		LTPscaleQ14,
 		wQ2,
--- a/internal/silk/decoder_test.go
+++ b/internal/silk/decoder_test.go
@ -400,7 +400,7 @@ func TestDecodePitchLags(t *testing.T) {
 		t.Fatal()
 	}

-	if !reflect.DeepEqual(pitchLags, []int{206}) {
+	if !reflect.DeepEqual(pitchLags, []int{206, 206, 206, 206}) {
 		t.Fatal()
 	}
 }
@ -433,7 +433,7 @@ func TestDecodeLTPScalingParameter(t *testing.T) {
 	})
 }

-func TestSean(t *testing.T) {
+func TestDecode(t *testing.T) {
 	d := NewDecoder()
 	out := make([]float64, 320)