Properly implement Q15 handling

If w4 == 4 the normalized value (n1Q15) is only used for the first two subframes.
Before we incorrectly would use it for all subframes.
This commit is contained in:
Sean DuBois 2022-09-25 22:17:10 -04:00
parent 4c632df5ac
commit 238ccbfc34
2 changed files with 45 additions and 22 deletions

View file

@ -395,11 +395,6 @@ func (d *Decoder) normalizeLSFStabilization(nlsfQ15 []int16) {
// TODO
}
// For 20 ms SILK frames, the first half of the frame (i.e., the first
// two subframes) may use normalized LSF coefficients that are
// interpolated between the decoded LSFs for the most recent coded frame
// (in the same channel) and the current frame
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
func (d *Decoder) normalizeLSFInterpolation(n2Q15 []int16) (n1Q15 []int16, wQ2 int16) {
// Let n2_Q15[k] be the normalized LSF coefficients decoded by the
@ -411,7 +406,7 @@ func (d *Decoder) normalizeLSFInterpolation(n2Q15 []int16) (n1Q15 []int16, wQ2 i
// n1_Q15[k] = n0_Q15[k] + (w_Q2*(n2_Q15[k] - n0_Q15[k]) >> 2)
wQ2 = int16(d.rangeDecoder.DecodeSymbolWithICDF(icdfNormalizedLSFInterpolationIndex))
if wQ2 == 4 || !d.haveDecoded {
return n2Q15, wQ2
return nil, wQ2
}
n1Q15 = make([]int16, len(n2Q15))
@ -422,6 +417,22 @@ func (d *Decoder) normalizeLSFInterpolation(n2Q15 []int16) (n1Q15 []int16, wQ2 i
return
}
func (d *Decoder) generateAQ12(Q15 []int16, bandwidth Bandwidth, aQ12 [][]float32) [][]float32 {
if Q15 == nil {
return aQ12
}
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6
a32Q17 := d.convertNormalizedLSFsToLPCCoefficients(Q15, bandwidth)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.7
d.limitLPCCoefficientsRange(a32Q17)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.8
aQ12 = append(aQ12, d.limitLPCFilterPredictionGain(a32Q17))
return aQ12
}
func (d *Decoder) convertNormalizedLSFsToLPCCoefficients(n1Q15 []int16, bandwidth Bandwidth) (a32Q17 []int32) {
cQ17 := make([]int32, len(n1Q15))
cosQ12 := q12CosineTableForLSFConverion
@ -1546,7 +1557,8 @@ func (d *Decoder) silkFrameReconstruction(
eQ23 []int32,
LTPscaleQ14 float32,
wQ2 int16,
aQ12, gainQ16, out []float32,
aQ12 [][]float32,
gainQ16, out []float32,
) {
// let n be the number of samples in a subframe
//
@ -1573,6 +1585,18 @@ func (d *Decoder) silkFrameReconstruction(
// s be the index of the current subframe in this SILK frame
// (0 or 1 for 10 ms frames, or 0 to 3 for 20 ms frames)
for s := 0; s < subframeCount; s++ {
// For 20 ms SILK frames, the first half of the frame (i.e., the first
// two subframes) may use normalized LSF coefficients that are
// interpolated between the decoded LSFs for the most recent coded frame
// (in the same channel) and the current frame
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
aQ12Index := 0
if s > 1 && len(aQ12) > 1 {
aQ12Index = 1
}
// j be the index of the first sample in the residual corresponding to
// the current subframe.
//
@ -1585,11 +1609,11 @@ func (d *Decoder) silkFrameReconstruction(
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
if signalType == frameSignalTypeVoiced {
d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12, gainQ16, lpc, res, resLag)
d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12[aQ12Index], gainQ16, lpc, res, resLag)
}
//https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
d.lpcSynthesis(out[n*s:], bandwidth, n, s, dLPC, aQ12, res, gainQ16, lpc)
d.lpcSynthesis(out[n*s:], bandwidth, n, s, dLPC, aQ12[aQ12Index], res, gainQ16, lpc)
}
}
@ -1664,14 +1688,15 @@ func (d *Decoder) Decode(in []byte, out []float32, isStereo bool, nanoseconds in
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
n1Q15, wQ2 := d.normalizeLSFInterpolation(nlsfQ15)
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.6
a32Q17 := d.convertNormalizedLSFsToLPCCoefficients(n1Q15, bandwidth)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.7
d.limitLPCCoefficientsRange(a32Q17)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.8
aQ12 := d.limitLPCFilterPredictionGain(a32Q17)
// For 20 ms SILK frames, the first half of the frame (i.e., the first
// two subframes) may use normalized LSF coefficients that are
// interpolated between the decoded LSFs for the most recent coded frame
// (in the same channel) and the current frame
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.5
aQ12 := [][]float32{}
aQ12 = d.generateAQ12(n1Q15, bandwidth, aQ12)
aQ12 = d.generateAQ12(nlsfQ15, bandwidth, aQ12)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
lagMax, pitchLags := d.decodePitchLags(signalType, bandwidth)
@ -1707,7 +1732,8 @@ func (d *Decoder) Decode(in []byte, out []float32, isStereo bool, nanoseconds in
eQ23,
LTPscaleQ14,
wQ2,
aQ12, gainQ16, out,
aQ12,
gainQ16, out,
)
// n0Q15 is the LSF coefficients decoded for the prior frame

View file

@ -105,10 +105,7 @@ func TestNormalizeLineSpectralFrequencyCoefficients(t *testing.T) {
func TestNormalizeLSFInterpolation(t *testing.T) {
t.Run("wQ2 == 4", func(t *testing.T) {
d := &Decoder{rangeDecoder: createRangeDecoder(testSilkFrame(), 55, 493249168, 174371199)}
expectedN1Q15 := []int16{
2132, 3584, 5504, 7424, 9472, 11392, 13440, 15360, 17280,
19200, 21120, 23040, 25088, 27008, 28928, 30848,
}
var expectedN1Q15 []int16
actualN1Q15, _ := d.normalizeLSFInterpolation(expectedN1Q15)
if !reflect.DeepEqual(actualN1Q15, expectedN1Q15) {