Implement Pitch Lags

Defined in section-4.2.7.6.1
This commit is contained in:
Sean DuBois 2022-09-11 23:32:29 -04:00
parent 85ca7b30c0
commit 87d5d99155
4 changed files with 448 additions and 8 deletions

View file

@ -818,4 +818,220 @@ var (
-3703, -3745, -3784, -3822, -3857, -3889, -3920, -3948, -3973, -3997,
-4017, -4036, -4052, -4065, -4076, -4085, -4091, -4095, -4096,
}
//
//static const int8_t silk_pitch_offset_mbwb20ms[34][4] = {
//};
// +-------+------------------+
// | Index | Subframe Offsets |
// +-------+------------------+
// | 0 | 0 0 |
// | | |
// | 1 | 1 0 |
// | | |
// | 2 | 0 1 |
// +-------+------------------+
//
// Table 33: Codebook Vectors for Subframe Pitch Contour: NB, 10 ms Frames
subframePitchCounterNarrowband10Ms = [][]int8{
{0, 0},
{1, 0},
{0, 1},
}
// +-------+------------------+
// | Index | Subframe Offsets |
// +-------+------------------+
// | 0 | 0 0 0 0 |
// | | |
// | 1 | 2 1 0 -1 |
// | | |
// | 2 | -1 0 1 2 |
// | | |
// | 3 | -1 0 0 1 |
// | | |
// | 4 | -1 0 0 0 |
// | | |
// | 5 | 0 0 0 1 |
// | | |
// | 6 | 0 0 1 1 |
// | | |
// | 7 | 1 1 0 0 |
// | | |
// | 8 | 1 0 0 0 |
// | | |
// | 9 | 0 0 0 -1 |
// | | |
// | 10 | 1 0 0 -1 |
// +-------+------------------+
//
// Table 34: Codebook Vectors for Subframe Pitch Contour: NB, 20 ms Frames
subframePitchCounterNarrowband20Ms = [][]int8{
{0, 0, 0, 0},
{2, 1, 0, -1},
{-1, 0, 1, 2},
{-1, 0, 0, 1},
{-1, 0, 0, 0},
{0, 0, 0, 1},
{0, 0, 1, 1},
{1, 1, 0, 0},
{1, 0, 0, 0},
{0, 0, 0, -1},
{1, 0, 0, -1},
}
// +-------+------------------+
// | Index | Subframe Offsets |
// +-------+------------------+
// | 0 | 0 0 |
// | | |
// | 1 | 0 1 |
// | | |
// | 2 | 1 0 |
// | | |
// | 3 | -1 1 |
// | | |
// | 4 | 1 -1 |
// | | |
// | 5 | -1 2 |
// | | |
// | 6 | 2 -1 |
// | | |
// | 7 | -2 2 |
// | | |
// | 8 | 2 -2 |
// | | |
// | 9 | -2 3 |
// | | |
// | 10 | 3 -2 |
// | | |
// | 11 | -3 3 |
// +-------+------------------+
//
// Table 35: Codebook Vectors for Subframe Pitch Contour: MB or WB, 10 ms Frames
subframePitchCounterMediumbandOrWideband10Ms = [][]int8{
{0, 0},
{0, 1},
{1, 0},
{-1, 1},
{1, -1},
{-1, 2},
{2, -1},
{-2, 2},
{2, -2},
{-2, 3},
{3, -2},
{-3, 3},
}
// +-------+------------------+
// | Index | Subframe Offsets |
// +-------+------------------+
// | 0 | 0 0 0 0 |
// | | |
// | 1 | 0 0 1 1 |
// | | |
// | 2 | 1 1 0 0 |
// | | |
// | 3 | -1 0 0 0 |
// | | |
// | 4 | 0 0 0 1 |
// | | |
// | 5 | 1 0 0 0 |
// | | |
// | 6 | -1 0 0 1 |
// | | |
// | 7 | 0 0 0 -1 |
// | | |
// | 8 | -1 0 1 2 |
// | | |
// | 9 | 1 0 0 -1 |
// | | |
// | 10 | -2 -1 1 2 |
// | | |
// | 11 | 2 1 0 -1 |
// | | |
// | 12 | -2 0 0 2 |
// | | |
// | 13 | -2 0 1 3 |
// | | |
// | 14 | 2 1 -1 -2 |
// | | |
// | 15 | -3 -1 1 3 |
// | | |
// | 16 | 2 0 0 -2 |
// | | |
// | 17 | 3 1 0 -2 |
// | | |
// | 18 | -3 -1 2 4 |
// | | |
// | 19 | -4 -1 1 4 |
// | | |
// | 20 | 3 1 -1 -3 |
// | | |
// | 21 | -4 -1 2 5 |
// | | |
// | 22 | 4 2 -1 -3 |
// | | |
// | 23 | 4 1 -1 -4 |
// | | |
// | 24 | -5 -1 2 6 |
// | | |
// | 25 | 5 2 -1 -4 |
// | | |
// | 26 | -6 -2 2 6 |
// | | |
// | 27 | -5 -2 2 5 |
// | | |
// | 28 | 6 2 -1 -5 |
// | | |
// | 29 | -7 -2 3 8 |
// | | |
// | 30 | 6 2 -2 -6 |
// | | |
// | 31 | 5 2 -2 -5 |
// | | |
// | 32 | 8 3 -2 -7 |
// | | |
// | 33 | -9 -3 3 9 |
// +-------+------------------+
//
// Table 36: Codebook Vectors for Subframe Pitch Contour: MB or WB, 20 ms Frames
subframePitchCounterMediumbandOrWideband20Ms = [][]int8{
{0, 0, 0, 0},
{0, 0, 1, 1},
{1, 1, 0, 0},
{-1, 0, 0, 0},
{0, 0, 0, 1},
{1, 0, 0, 0},
{-1, 0, 0, 1},
{0, 0, 0, -1},
{-1, 0, 1, 2},
{1, 0, 0, -1},
{-2, -1, 1, 2},
{2, 1, 0, -1},
{-2, 0, 0, 2},
{-2, 0, 1, 3},
{2, 1, -1, -2},
{-3, -1, 1, 3},
{2, 0, 0, -2},
{3, 1, 0, -2},
{-3, -1, 2, 4},
{-4, -1, 1, 4},
{3, 1, -1, -3},
{-4, -1, 2, 5},
{4, 2, -1, -3},
{4, 1, -1, -4},
{-5, -1, 2, 6},
{5, 2, -1, -4},
{-6, -2, 2, 6},
{-5, -2, 2, 5},
{6, 2, -1, -5},
{-7, -2, 3, 8},
{6, 2, -2, -6},
{5, 2, -2, -5},
{8, 3, -2, -7},
{-9, -3, 3, 9},
}
)

View file

@ -1076,12 +1076,150 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
}
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
func (d *Decoder) decodePitchLags(signalType frameSignalType) error {
if signalType == frameSignalTypeVoiced {
return errUnsupportedVoicedFrames
func (d *Decoder) decodePitchLags(signalType frameSignalType, bandwidth Bandwidth) (lag uint32, pitchLags []int) {
if signalType != frameSignalTypeVoiced {
return
}
return nil
var (
lagMin uint32
lagMax uint32
)
// The primary lag index is coded either relative to the primary lag of
// the prior frame in the same channel or as an absolute index.
// Absolute coding is used if and only if
//
// * This is the first SILK frame of its type (LBRR or regular) for
// this channel in the current Opus frame,
//
// * The previous SILK frame of the same type (LBRR or regular) for
// this channel in the same Opus frame was not coded, or
//
// * That previous SILK frame was coded, but was not voiced (see
// Section 4.2.7.3).
lagAbsolute := true
if lagAbsolute {
// With absolute coding, the primary pitch lag may range from 2 ms
// (inclusive) up to 18 ms (exclusive), corresponding to pitches from
// 500 Hz down to 55.6 Hz, respectively. It is comprised of a high part
// and a low part, where the decoder first reads the high part using the
// 32-entry codebook in Table 29 and then the low part using the
// codebook corresponding to the current audio bandwidth from Table 30.
//
// +------------+------------------------+-------+----------+----------+
// | Audio | PDF | Scale | Minimum | Maximum |
// | Bandwidth | | | Lag | Lag |
// +------------+------------------------+-------+----------+----------+
// | NB | {64, 64, 64, 64}/256 | 4 | 16 | 144 |
// | | | | | |
// | MB | {43, 42, 43, 43, 42, | 6 | 24 | 216 |
// | | 43}/256 | | | |
// | | | | | |
// | WB | {32, 32, 32, 32, 32, | 8 | 32 | 288 |
// | | 32, 32, 32}/256 | | | |
// +------------+------------------------+-------+----------+----------+
// Table 30: PDF for Low Part of Primary Pitch Lag
var (
lowPartICDF []uint
lagScale uint32
)
switch bandwidth {
case BandwidthNarrowband:
lowPartICDF = icdfPrimaryPitchLagLowPartNarrowband
lagScale = 4
lagMin = 16
lagMax = 144
case BandwidthMediumband:
lowPartICDF = icdfPrimaryPitchLagLowPartMediumband
lagScale = 6
lagMin = 24
lagMax = 216
case BandwidthWideband:
lowPartICDF = icdfPrimaryPitchLagLowPartWideband
lagScale = 8
lagMin = 32
lagMax = 288
}
lagHigh := d.rangeDecoder.DecodeSymbolWithICDF(icdfPrimaryPitchLagHighPart)
lagLow := d.rangeDecoder.DecodeSymbolWithICDF(lowPartICDF)
// The final primary pitch lag is then
//
// lag = lag_high*lag_scale + lag_low + lag_min
//
// where lag_high is the high part, lag_low is the low part, and
// lag_scale and lag_min are the values from the "Scale" and "Minimum
// Lag" columns of Table 30, respectively.
lag = lagHigh*lagScale + lagLow + lagMin
} else {
// TODO
}
// After the primary pitch lag, a "pitch contour", stored as a single
// entry from one of four small VQ codebooks, gives lag offsets for each
// subframe in the current SILK frame. The codebook index is decoded
// using one of the PDFs in Table 32 depending on the current frame size
// and audio bandwidth. Tables 33 through 36 give the corresponding
// offsets to apply to the primary pitch lag for each subframe given the
// decoded codebook index.
//
// +-----------+--------+----------+-----------------------------------+
// | Audio | SILK | Codebook | PDF |
// | Bandwidth | Frame | Size | |
// | | Size | | |
// +-----------+--------+----------+-----------------------------------+
// | NB | 10 ms | 3 | {143, 50, 63}/256 |
// | | | | |
// | NB | 20 ms | 11 | {68, 12, 21, 17, 19, 22, 30, 24, |
// | | | | 17, 16, 10}/256 |
// | | | | |
// | MB or WB | 10 ms | 12 | {91, 46, 39, 19, 14, 12, 8, 7, 6, |
// | | | | 5, 5, 4}/256 |
// | | | | |
// | MB or WB | 20 ms | 34 | {33, 22, 18, 16, 15, 14, 14, 13, |
// | | | | 13, 10, 9, 9, 8, 6, 6, 6, 5, 4, |
// | | | | 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, |
// | | | | 2, 1, 1, 1, 1}/256 |
// +-----------+--------+----------+-----------------------------------+
//
// Table 32: PDFs for Subframe Pitch Contour
// The final pitch lag for each subframe is assembled in
// silk_decode_pitch() (decode_pitch.c). Let lag be the primary pitch
// lag for the current SILK frame, contour_index be index of the VQ
// codebook, and lag_cb[contour_index][k] be the corresponding entry of
// the codebook from the appropriate table given above for the k'th
// subframe.
var (
lagCb [][]int8
lagIcdf []uint
)
switch bandwidth {
case BandwidthNarrowband:
lagCb = subframePitchCounterNarrowband20Ms
lagIcdf = icdfSubframePitchContourNarrowband20Ms
case BandwidthMediumband, BandwidthWideband:
lagCb = subframePitchCounterMediumbandOrWideband20Ms
lagIcdf = icdfSubframePitchContourMediumbandOrWideband20Ms
}
contourIndex := d.rangeDecoder.DecodeSymbolWithICDF(lagIcdf)
// Then the final pitch lag for that subframe is
//
// pitch_lags[k] = clamp(lag_min, lag + lag_cb[contour_index][k],
// lag_max)
pitchLags = []int{
int(clamp(int32(lagMin), int32(lag+uint32(lagCb[contourIndex][0])), int32(lagMax))),
}
return
}
// This allows the encoder to trade off the prediction gain between
@ -1269,7 +1407,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in
gainQ16 := d.decodeSubframeQuantizations(signalType)
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.1
I1 := d.normalizeLineSpectralFrequencyStageOne(voiceActivityDetected, bandwidth)
I1 := d.normalizeLineSpectralFrequencyStageOne(signalType == frameSignalTypeVoiced, bandwidth)
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.2
dLPC, resQ10 := d.normalizeLineSpectralFrequencyStageTwo(bandwidth, I1)
@ -1293,9 +1431,7 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in
aQ12 := d.limitLPCFilterPredictionGain(a32Q17)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
if err := d.decodePitchLags(signalType); err != nil {
return err
}
d.decodePitchLags(signalType, bandwidth)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.2
if err := d.decodeLTPFilterCoefficients(signalType); err != nil {

View file

@ -319,3 +319,17 @@ func TestLPCSynthesis(t *testing.T) {
t.Fatal()
}
}
func TestDecodePitchLags(t *testing.T) {
silkFrame := []byte{0xb4, 0xe2, 0x2c, 0xe, 0x10, 0x65, 0x1d, 0xa9, 0x7, 0x5c, 0x36, 0x8f, 0x96, 0x7b, 0xf4, 0x89, 0x41, 0x55, 0x98, 0x7a, 0x39, 0x2e, 0x6b, 0x71, 0xa4, 0x3, 0x70, 0xbf}
d := &Decoder{rangeDecoder: createRangeDecoder(silkFrame, 73, 30770362, 1380489)}
lag, pitchLags := d.decodePitchLags(frameSignalTypeVoiced, BandwidthWideband)
if lag != 206 {
t.Fatal()
}
if !reflect.DeepEqual(pitchLags, []int{206}) {
t.Fatal()
}
}

View file

@ -637,4 +637,78 @@ var (
icdfExcitationSignVoicedSignalHighQuantization4Pulse = []uint{256, 168, 256}
icdfExcitationSignVoicedSignalHighQuantization5Pulse = []uint{256, 161, 256}
icdfExcitationSignVoicedSignalHighQuantization6PlusPulse = []uint{256, 154, 256}
// +-------------------------------------------------------------------+
// | PDF |
// +-------------------------------------------------------------------+
// | {3, 3, 6, 11, 21, 30, 32, 19, 11, 10, 12, 13, 13, 12, 11, 9, 8, |
// | 7, 6, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1}/256 |
// +-------------------------------------------------------------------+
//
// Table 29: PDF for High Part of Primary Pitch Lag
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
icdfPrimaryPitchLagHighPart = []uint{
256, 3, 6, 12, 23, 44, 74, 106, 125, 136,
146, 158, 171, 184, 196, 207, 216, 224, 231, 237,
241, 243, 245, 247, 248, 249, 250, 251, 252, 253,
254, 255, 256,
}
// +------------+------------------------+-------+----------+----------+
// | Audio | PDF | Scale | Minimum | Maximum |
// | Bandwidth | | | Lag | Lag |
// +------------+------------------------+-------+----------+----------+
// | NB | {64, 64, 64, 64}/256 | 4 | 16 | 144 |
// | | | | | |
// | MB | {43, 42, 43, 43, 42, | 6 | 24 | 216 |
// | | 43}/256 | | | |
// | | | | | |
// | WB | {32, 32, 32, 32, 32, | 8 | 32 | 288 |
// | | 32, 32, 32}/256 | | | |
// +------------+------------------------+-------+----------+----------+
//
// Table 30: PDF for Low Part of Primary Pitch Lag
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
icdfPrimaryPitchLagLowPartNarrowband = []uint{256, 64, 128, 192, 256}
icdfPrimaryPitchLagLowPartMediumband = []uint{256, 43, 85, 128, 171, 213, 256}
icdfPrimaryPitchLagLowPartWideband = []uint{256, 32, 64, 96, 128, 160, 192, 224, 256}
// +-----------+--------+----------+-----------------------------------+
// | Audio | SILK | Codebook | PDF |
// | Bandwidth | Frame | Size | |
// | | Size | | |
// +-----------+--------+----------+-----------------------------------+
// | NB | 10 ms | 3 | {143, 50, 63}/256 |
// | | | | |
// | NB | 20 ms | 11 | {68, 12, 21, 17, 19, 22, 30, 24, |
// | | | | 17, 16, 10}/256 |
// | | | | |
// | MB or WB | 10 ms | 12 | {91, 46, 39, 19, 14, 12, 8, 7, 6, |
// | | | | 5, 5, 4}/256 |
// | | | | |
// | MB or WB | 20 ms | 34 | {33, 22, 18, 16, 15, 14, 14, 13, |
// | | | | 13, 10, 9, 9, 8, 6, 6, 6, 5, 4, |
// | | | | 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, |
// | | | | 2, 1, 1, 1, 1}/256 |
// +-----------+--------+----------+-----------------------------------+
//
// Table 32: PDFs for Subframe Pitch Contour
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.1
icdfSubframePitchContourNarrowband10Ms = []uint{
256, 143, 193, 256,
}
icdfSubframePitchContourNarrowband20Ms = []uint{
256, 68, 80, 101, 118, 137, 159, 189, 213, 230, 246, 256,
}
icdfSubframePitchContourMediumbandOrWideband10Ms = []uint{
256, 91, 137, 176, 195, 209, 221, 229, 236, 242, 247, 252, 256,
}
icdfSubframePitchContourMediumbandOrWideband20Ms = []uint{
256, 33, 55, 73, 89, 104, 118, 132, 145, 158, 168, 177,
186, 194, 200, 206, 212, 217, 221, 225, 229, 232, 235, 238,
240, 242, 244, 246, 248, 250, 252, 253, 254, 255, 256,
}
)