diff --git a/internal/silk/decoder.go b/internal/silk/decoder.go index b29cf11..3daf3cd 100644 --- a/internal/silk/decoder.go +++ b/internal/silk/decoder.go @@ -1348,35 +1348,8 @@ func (d *Decoder) ltpSynthesis( LTPScaleQ14 float32, bandwidth Bandwidth, wQ2 int16, - aQ12, gainQ16, lpc []float32, -) (res []float32) { - // For unvoiced frames (see Section 4.2.7.3), the LPC residual for i - // such that j <= i < (j + n) is simply a normalized copy of the - // excitation signal, i.e., - // - // e_Q23[i] - // res[i] = --------- - // 2.0**23 - - res = make([]float32, len(eQ23)) - if signalType != frameSignalTypeVoiced { - for i := j; i < (j + n); i++ { - res[i] = float32(eQ23[i]) / 8388608 - } - return - } - - // Voiced SILK frames, on the other hand, pass the excitation through an - // LTP filter using the parameters decoded in Section 4.2.7.6 to produce - // an LPC residual. - for i := range res { - res[i] = float32(eQ23[i]) / 8388608.0 - } - - // Voiced SILK frames, on the other hand, pass the excitation through an - // LTP filter using the parameters decoded in Section 4.2.7.6 to produce - // an LPC residual. - + aQ12, gainQ16, lpc, res []float32, +) { // If this is the third or fourth subframe of a 20 ms SILK frame and the LSF // interpolation factor, w_Q2 (see Section 4.2.7.5.5), is less than 4, // then let out_end be set to (j - (s-2)*n) and let LTP_scale_Q14 be set @@ -1402,6 +1375,7 @@ func (d *Decoder) ltpSynthesis( // out[i] - \ out[i-k-1] * --------, 1.0) // /_ 4096.0 // k=0 + var outVal float32 for i := (j - pitchLags[s] - 2); i < out_end; i++ { index := i + j @@ -1587,6 +1561,18 @@ func (d *Decoder) silkFrameReconstruction( // previous subframe or zeros in the first subframe for this channel lpc := make([]float32, n*subframeCount) + // For unvoiced frames (see Section 4.2.7.3), the LPC residual for i + // such that j <= i < (j + n) is simply a normalized copy of the + // excitation signal, i.e., + // + // e_Q23[i] + // res[i] = --------- + // 2.0**23 + res := make([]float32, len(eQ23)) + for i := range res { + res[i] = float32(eQ23[i]) / 8388608.0 + } + // s be the index of the current subframe in this SILK frame // (0 or 1 for 10 ms frames, or 0 to 3 for 20 ms frames) for s := 0; s < subframeCount; s++ { @@ -1596,8 +1582,14 @@ func (d *Decoder) silkFrameReconstruction( // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9 j := n * s + // Voiced SILK frames, on the other hand, pass the excitation through an + // LTP filter using the parameters decoded in Section 4.2.7.6 to produce + // an LPC residual. + // // https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1 - res := d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12, gainQ16, lpc) + if signalType == frameSignalTypeVoiced { + d.ltpSynthesis(out, signalType, bQ7, pitchLags, eQ23, n, j, s, dLPC, LTPscaleQ14, bandwidth, wQ2, aQ12, gainQ16, lpc, res) + } //https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2 d.lpcSynthesis(out[n*s:], bandwidth, n, s, dLPC, aQ12, res, gainQ16, lpc)