Add stub for Limiting the Prediction Gain of the LPC Filter

Comes from 4.2.7.5.8 we can implement this later
2022-09-05 15:41:26 -04:00 · 2022-09-05 15:41:26 -04:00 · 5919d29d78
parent 1e8a69eba3
commit 5919d29d78
2 changed files with 62 additions and 16 deletions
--- a/internal/silk/decoder.go
+++ b/internal/silk/decoder.go
@ -12,6 +12,9 @@ type Decoder struct {
 	// Have we decoded a frame yet?
 	haveDecoded bool

+	// Is the previous frame a voiced frame?
+	isPreviousFrameVoiced bool
+
 	// TODO, should have dedicated frame state
 	logGain       uint32
 	subframeState [4]struct {
@ -385,7 +388,7 @@ func (d *Decoder) normalizeLineSpectralFrequencyCoefficients(bandwidth Bandwidth
 // percentile of a large training set).
 //
 // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.4
-func (d *Decoder) normalizeLSFStabilization() {
+func (d *Decoder) normalizeLSFStabilization(nlsfQ15 []int16) {
 	// TODO
 }

@ -395,7 +398,7 @@ func (d *Decoder) normalizeLSFStabilization() {
 // (in the same channel) and the current frame
 //
 // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
-func (d *Decoder) normalizeLSFInterpolation() error {
+func (d *Decoder) normalizeLSFInterpolation(nlsfQ15 []int16) (n1Q15 []int16, err error) {
 	// Let n2_Q15[k] be the normalized LSF coefficients decoded by the
 	// procedure in Section 4.2.7.5, n0_Q15[k] be the LSF coefficients
 	// decoded for the prior frame, and w_Q2 be the interpolation factor.
@ -404,14 +407,17 @@ func (d *Decoder) normalizeLSFInterpolation() error {
 	//
 	//      n1_Q15[k] = n0_Q15[k] + (w_Q2*(n2_Q15[k] - n0_Q15[k]) >> 2)
 	if wQ2 := d.rangeDecoder.DecodeSymbolWithICDF(icdfNormalizedLSFInterpolationIndex); wQ2 != 4 {
-		return errUnsupportedLSFInterpolation
+		return nil, errUnsupportedLSFInterpolation
 	}

-	return nil
+	// TODO
+	n1Q15 = nlsfQ15
+
+	return n1Q15, nil
 }

-func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(nlsfQ15 []int16, bandwidth Bandwidth) (a32Q17 []int32) {
-	cQ17 := make([]int32, len(nlsfQ15))
+func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(n1Q15 []int16, bandwidth Bandwidth) (a32Q17 []int32) {
+	cQ17 := make([]int32, len(n1Q15))
 	cosQ12 := q12CosineTableForLSFConverion

 	ordering := lsfOrderingForPolynomialEvaluationNarrowbandAndMediumband
@ -433,16 +439,16 @@ func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(nlsfQ15 []int16, bandwi
 	// i'th entry of Table 28.
 	//
 	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6
-	for k := range nlsfQ15 {
-		i := int32(nlsfQ15[k] >> 8)
-		f := int32(nlsfQ15[k] & 255)
+	for k := range n1Q15 {
+		i := int32(n1Q15[k] >> 8)
+		f := int32(n1Q15[k] & 255)

 		cQ17[ordering[k]] = (cosQ12[i]*256 +
 			(cosQ12[i+1]-cosQ12[i])*f + 4) >> 3
 	}

-	pQ16 := make([]int32, (len(nlsfQ15)/2)+1)
-	qQ16 := make([]int32, (len(nlsfQ15)/2)+1)
+	pQ16 := make([]int32, (len(n1Q15)/2)+1)
+	qQ16 := make([]int32, (len(n1Q15)/2)+1)

 	// Given the list of cosine values compute the coefficients of P and Q,
 	// described here via a simple recurrence.  Let p_Q16[k][j] and q_Q16[k][j]
@ -461,7 +467,7 @@ func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(nlsfQ15 []int16, bandwi
 	qQ16[0] = 1 << 16
 	pQ16[1] = -cQ17[0]
 	qQ16[1] = -cQ17[1]
-	dLPC := len(nlsfQ15)
+	dLPC := len(n1Q15)
 	d2 := dLPC / 2

 	// As boundary conditions, assume p_Q16[k][j] = q_Q16[k][j] = 0 for all j < 0.
@ -510,7 +516,7 @@ func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(nlsfQ15 []int16, bandwi
 	//
 	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6

-	a32Q17 = make([]int32, len(nlsfQ15))
+	a32Q17 = make([]int32, len(n1Q15))
 	for k := 0; k < d2; k++ {
 		a32Q17[k] = -(qQ16[k+1] - qQ16[k]) - (pQ16[k+1] + pQ16[k])
 		a32Q17[dLPC-k-1] = (qQ16[k+1] - qQ16[k]) - (pQ16[k+1] + pQ16[k])
@ -1032,6 +1038,35 @@ func (d *Decoder) limitLPCCoefficientsRange(a32Q17 []int32) {
 	}
 }

+// The prediction gain of an LPC synthesis filter is the square root of
+// the output energy when the filter is excited by a unit-energy
+// impulse.  Even if the Q12 coefficients would fit, the resulting
+// filter may still have a significant gain (especially for voiced
+// sounds), making the filter unstable. silk_NLSF2A() applies up to 16
+// additional rounds of bandwidth expansion to limit the prediction
+// gain.
+//
+// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.8
+func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64) {
+	aQ12 = make([]float64, len(a32Q17))
+
+	// However, silk_LPC_inverse_pred_gain_QA() approximates this using
+	// fixed-point arithmetic to guarantee reproducible results across
+	// platforms and implementations.  Since small changes in the
+	// coefficients can make a stable filter unstable, it takes the real Q12
+	// coefficients that will be used during reconstruction as input.  Thus,
+	// let
+	//
+	//     a32_Q12[n] = (a32_Q17[n] + 16) >> 5
+	//
+	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.8
+	for n := range a32Q17 {
+		aQ12[n] = float64((a32Q17[n] + 16) >> 5)
+
+	}
+	return
+}
+
 // Decode decodes many SILK subframes
 //   An overview of the decoder is given in Figure 14.
 //
@ -1093,17 +1128,27 @@ func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Ba
 	nlsfQ15 := d.normalizeLineSpectralFrequencyCoefficients(bandwidth, resQ10, I1)

 	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.4
-	d.normalizeLSFStabilization()
+	d.normalizeLSFStabilization(nlsfQ15)

 	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
-	if err := d.normalizeLSFInterpolation(); err != nil {
+	n1Q15, err := d.normalizeLSFInterpolation(nlsfQ15)
+	if err != nil {
 		return nil, err
 	}

 	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6
-	a32Q17 := d.convertNormalizedLSFsToLPCCoefficients(nlsfQ15, bandwidth)
+	a32Q17 := d.convertNormalizedLSFsToLPCCoefficients(n1Q15, bandwidth)

+	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.7
 	d.limitLPCCoefficientsRange(a32Q17)

+	// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.8
+	d.limitLPCFilterPredictionGain(a32Q17)
+
+	if signalType == frameSignalTypeVoiced {
+		return nil, errUnsupportedVoicedFrames
+	}
+	d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced
+
 	return
 }
--- a/internal/silk/errors.go
+++ b/internal/silk/errors.go
@ -7,4 +7,5 @@ var (
 	errUnsupportedSilkStereo               = errors.New("silk decoder does not support stereo")
 	errUnsupportedSilkLowBitrateRedundancy = errors.New("silk decoder does not low bit-rate redundancy")
 	errUnsupportedLSFInterpolation         = errors.New("silk decoder does not support LSF Interpolation yet")
+	errUnsupportedVoicedFrames             = errors.New("silk decoder does not support voiced frames")
 )