Add stubs for rest of Silk Decoder
Also include the implemented excitation functions now that we have reached them.
This commit is contained in:
parent
5919d29d78
commit
f20a9eedf1
|
@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
|
|||
}
|
||||
|
||||
for _, encodedFrame := range encodedFrames {
|
||||
decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds(), silk.Bandwidth(cfg.bandwidth()))
|
||||
decoded, err := d.silkDecoder.Decode(encodedFrame, []byte{}, tocHeader.isStereo(), cfg.frameDuration().nanoseconds(), silk.Bandwidth(cfg.bandwidth()))
|
||||
if err != nil {
|
||||
return 0, false, nil, err
|
||||
}
|
||||
|
|
|
@ -1067,6 +1067,63 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
|
|||
return
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
|
||||
func (d *Decoder) decodePitchLags(signalType frameSignalType) error {
|
||||
if signalType == frameSignalTypeVoiced {
|
||||
return errUnsupportedVoicedFrames
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// This allows the encoder to trade off the prediction gain between
|
||||
// packets against the recovery time after packet loss.
|
||||
//
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3
|
||||
func (d *Decoder) decodeLTPScalingParamater(signalType frameSignalType) (float64, error) {
|
||||
// An LTP scaling parameter appears after the LTP filter coefficients if
|
||||
// and only if
|
||||
//
|
||||
// o This is a voiced frame (see Section 4.2.7.3), and
|
||||
// o Either
|
||||
// * This SILK frame corresponds to the first time interval of the
|
||||
// current Opus frame for its type (LBRR or regular), or
|
||||
//
|
||||
// * This is an LBRR frame where the LBRR flags (see Section 4.2.4)
|
||||
// indicate the previous LBRR frame in the same channel is not
|
||||
// coded.
|
||||
|
||||
// Frames that do not code the scaling parameter
|
||||
// use the default factor of 15565 (approximately 0.95).
|
||||
if signalType != frameSignalTypeVoiced {
|
||||
return 15565.0, nil
|
||||
}
|
||||
|
||||
// TODO
|
||||
return 0, errUnsupportedVoicedFrames
|
||||
}
|
||||
|
||||
// SILK uses a separate 5-tap pitch filter for each subframe, selected
|
||||
// from one of three codebooks.
|
||||
//
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
|
||||
func (d *Decoder) decodeLTPFilterCoefficients(signalType frameSignalType) error {
|
||||
if signalType == frameSignalTypeVoiced {
|
||||
return errUnsupportedVoicedFrames
|
||||
}
|
||||
|
||||
// TODO
|
||||
return nil
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
|
||||
func (d *Decoder) ltpSynthesis() {
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
|
||||
func (d *Decoder) lpcSynthesis() {
|
||||
}
|
||||
|
||||
// Decode decodes many SILK subframes
|
||||
// An overview of the decoder is given in Figure 14.
|
||||
//
|
||||
|
@ -1099,7 +1156,7 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
|
|||
// 8: Resampled signal
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.1
|
||||
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Bandwidth) (decoded []byte, err error) {
|
||||
func (d *Decoder) Decode(in, out []byte, isStereo bool, nanoseconds int, bandwidth Bandwidth) ([]byte, error) {
|
||||
if nanoseconds != nanoseconds20Ms {
|
||||
return nil, errUnsupportedSilkFrameDuration
|
||||
} else if isStereo {
|
||||
|
@ -1113,7 +1170,7 @@ func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Ba
|
|||
return nil, errUnsupportedSilkLowBitrateRedundancy
|
||||
}
|
||||
|
||||
signalType, _ := d.determineFrameType(voiceActivityDetected)
|
||||
signalType, quantizationOffsetType := d.determineFrameType(voiceActivityDetected)
|
||||
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.4
|
||||
d.decodeSubframeQuantizations(signalType)
|
||||
|
@ -1145,10 +1202,43 @@ func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int, bandwidth Ba
|
|||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.5.8
|
||||
d.limitLPCFilterPredictionGain(a32Q17)
|
||||
|
||||
if signalType == frameSignalTypeVoiced {
|
||||
return nil, errUnsupportedVoicedFrames
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
|
||||
if err := d.decodePitchLags(signalType); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced
|
||||
|
||||
return
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
|
||||
if err := d.decodeLTPFilterCoefficients(signalType); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3
|
||||
_, err = d.decodeLTPScalingParamater(signalType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.7
|
||||
lcgSeed := d.decodeLinearCongruentialGeneratorSeed()
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8
|
||||
shellblocks := d.decodeShellblocks(nanoseconds, bandwidth)
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.1
|
||||
rateLevel := d.decodeRatelevel(signalType == frameSignalTypeVoiced)
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.2
|
||||
pulsecounts, lsbcounts := d.decodePulseAndLSBCounts(shellblocks, rateLevel)
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.8.6
|
||||
d.decodeExcitation(signalType, quantizationOffsetType, lcgSeed, pulsecounts, lsbcounts)
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
|
||||
d.ltpSynthesis()
|
||||
|
||||
//https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
|
||||
d.lpcSynthesis()
|
||||
|
||||
d.isPreviousFrameVoiced = signalType == frameSignalTypeVoiced
|
||||
return out, nil
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ func createRangeDecoder(data []byte, bitsRead uint, rangeSize uint32, highAndCod
|
|||
|
||||
func TestDecode20MsOnly(t *testing.T) {
|
||||
d := &Decoder{}
|
||||
_, err := d.Decode(testSilkFrame(), false, 1, BandwidthWideband)
|
||||
_, err := d.Decode(testSilkFrame(), []byte{}, false, 1, BandwidthWideband)
|
||||
if !errors.Is(err, errUnsupportedSilkFrameDuration) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ func TestDecode20MsOnly(t *testing.T) {
|
|||
|
||||
func TestDecodeStereoTODO(t *testing.T) {
|
||||
d := &Decoder{}
|
||||
_, err := d.Decode(testSilkFrame(), true, nanoseconds20Ms, BandwidthWideband)
|
||||
_, err := d.Decode(testSilkFrame(), []byte{}, true, nanoseconds20Ms, BandwidthWideband)
|
||||
if !errors.Is(err, errUnsupportedSilkStereo) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -6,6 +6,6 @@ var (
|
|||
errUnsupportedSilkFrameDuration = errors.New("only silk frames with a duration of 20ms supported")
|
||||
errUnsupportedSilkStereo = errors.New("silk decoder does not support stereo")
|
||||
errUnsupportedSilkLowBitrateRedundancy = errors.New("silk decoder does not low bit-rate redundancy")
|
||||
errUnsupportedLSFInterpolation = errors.New("silk decoder does not support LSF Interpolation yet")
|
||||
errUnsupportedLSFInterpolation = errors.New("silk decoder does not support LSF Interpolation")
|
||||
errUnsupportedVoicedFrames = errors.New("silk decoder does not support voiced frames")
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue