Add stubs for rest of Silk Decoder

Also include the implemented excitation functions now that we have
reached them.
This commit is contained in:
Sean DuBois 2022-09-05 23:50:07 -04:00
parent 5919d29d78
commit f20a9eedf1
4 changed files with 100 additions and 10 deletions

View file

@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
}
for _, encodedFrame := range encodedFrames {
decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds(), silk.Bandwidth(cfg.bandwidth()))
decoded, err := d.silkDecoder.Decode(encodedFrame, []byte{}, tocHeader.isStereo(), cfg.frameDuration().nanoseconds(), silk.Bandwidth(cfg.bandwidth()))
if err != nil {
return 0, false, nil, err
}

View file

@ -1067,6 +1067,63 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
return
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
func (d *Decoder) decodePitchLags(signalType frameSignalType) error {
if signalType == frameSignalTypeVoiced {
return errUnsupportedVoicedFrames
}
return nil
}
// This allows the encoder to trade off the prediction gain between
// packets against the recovery time after packet loss.
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3
func (d *Decoder) decodeLTPScalingParamater(signalType frameSignalType) (float64, error) {
// An LTP scaling parameter appears after the LTP filter coefficients if
// and only if
//
// o This is a voiced frame (see Section 4.2.7.3), and
// o Either
// * This SILK frame corresponds to the first time interval of the
// current Opus frame for its type (LBRR or regular), or
//
// * This is an LBRR frame where the LBRR flags (see Section 4.2.4)
// indicate the previous LBRR frame in the same channel is not
// coded.
// Frames that do not code the scaling parameter
// use the default factor of 15565 (approximately 0.95).
if signalType != frameSignalTypeVoiced {
return 15565.0, nil
}
// TODO
return 0, errUnsupportedVoicedFrames
}
// SILK uses a separate 5-tap pitch filter for each subframe, selected
// from one of three codebooks.
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
func (d *Decoder) decodeLTPFilterCoefficients(signalType frameSignalType) error {
if signalType == frameSignalTypeVoiced {
return errUnsupportedVoicedFrames
}
// TODO
return nil
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
func (d *Decoder) ltpSynthesis() {
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
func (d *Decoder) lpcSynthesis() {
}
// Decode decodes many SILK subframes
// An overview of the decoder is given in Figure 14.
//
@ -1099,7 +1156,7 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
// 8: Resampled signal
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.1
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Bandwidth) (decoded []byte, err error) {
func (d *Decoder) Decode(in, out []byte, isStereo bool, nanoseconds int, bandwidth Bandwidth) ([]byte, error) {
if nanoseconds != nanoseconds20Ms {
return nil, errUnsupportedSilkFrameDuration
} else if isStereo {
@ -1113,7 +1170,7 @@ func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Ba
return nil, errUnsupportedSilkLowBitrateRedundancy
}
signalType, _ := d.determineFrameType(voiceActivityDetected)
signalType, quantizationOffsetType := d.determineFrameType(voiceActivityDetected)
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.4
d.decodeSubframeQuantizations(signalType)
@ -1145,10 +1202,43 @@ func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Ba
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.8
d.limitLPCFilterPredictionGain(a32Q17)
if signalType == frameSignalTypeVoiced {
return nil, errUnsupportedVoicedFrames
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
if err := d.decodePitchLags(signalType); err != nil {
return nil, err
}
d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced
return
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
if err := d.decodeLTPFilterCoefficients(signalType); err != nil {
return nil, err
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3
_, err = d.decodeLTPScalingParamater(signalType)
if err != nil {
return nil, err
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.7
lcgSeed := d.decodeLinearCongruentialGeneratorSeed()
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8
shellblocks := d.decodeShellblocks(nanoseconds, bandwidth)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.1
rateLevel := d.decodeRatelevel(signalType == frameSignalTypeVoiced)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.2
pulsecounts, lsbcounts := d.decodePulseAndLSBCounts(shellblocks, rateLevel)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.6
d.decodeExcitation(signalType, quantizationOffsetType, lcgSeed, pulsecounts, lsbcounts)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
d.ltpSynthesis()
//https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
d.lpcSynthesis()
d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced
return out, nil
}

View file

@ -28,7 +28,7 @@ func createRangeDecoder(data []byte, bitsRead uint, rangeSize uint32, highAndCod
func TestDecode20MsOnly(t *testing.T) {
d := &Decoder{}
_, err := d.Decode(testSilkFrame(), false, 1, BandwidthWideband)
_, err := d.Decode(testSilkFrame(), []byte{}, false, 1, BandwidthWideband)
if !errors.Is(err, errUnsupportedSilkFrameDuration) {
t.Fatal(err)
}
@ -36,7 +36,7 @@ func TestDecode20MsOnly(t *testing.T) {
func TestDecodeStereoTODO(t *testing.T) {
d := &Decoder{}
_, err := d.Decode(testSilkFrame(), true, nanoseconds20Ms, BandwidthWideband)
_, err := d.Decode(testSilkFrame(), []byte{}, true, nanoseconds20Ms, BandwidthWideband)
if !errors.Is(err, errUnsupportedSilkStereo) {
t.Fatal(err)
}

View file

@ -6,6 +6,6 @@ var (
errUnsupportedSilkFrameDuration = errors.New("only silk frames with a duration of 20ms supported")
errUnsupportedSilkStereo = errors.New("silk decoder does not support stereo")
errUnsupportedSilkLowBitrateRedundancy = errors.New("silk decoder does not low bit-rate redundancy")
errUnsupportedLSFInterpolation = errors.New("silk decoder does not support LSF Interpolation yet")
errUnsupportedLSFInterpolation = errors.New("silk decoder does not support LSF Interpolation")
errUnsupportedVoicedFrames = errors.New("silk decoder does not support voiced frames")
)