Implement Excitation Sign Decoding

Specified in https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
This commit is contained in:
Sean DuBois 2022-08-08 23:50:53 -04:00
parent dd17102d36
commit cfae85659a
3 changed files with 310 additions and 6 deletions

View file

@ -491,7 +491,7 @@ func (d *Decoder) decodeLinearCongruentialGeneratorSeed() uint32 {
// position are required to have the same sign.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8
func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceActivityDetected bool, lcgSeed uint32) {
func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceActivityDetected bool, lcgSeed uint32, signalType frameSignalType, quantizationOffsetType frameQuantizationOffsetType) {
// SILK fixes the dimension of the codebook to N = 16. The excitation
// is made up of a number of "shell blocks", each 16 samples in size.
// Table 44 lists the number of shell blocks required for a SILK frame
@ -590,7 +590,7 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.3
excitation := make([]uint8, shellblocks*pulsecountLargestPartitionSize)
excitation := make([]int32, shellblocks*pulsecountLargestPartitionSize)
for i := range pulsecounts {
// This process skips partitions without any pulses, i.e., where
// the initial pulse count from Section 4.2.7.8.2 was zero, or where the
@ -601,10 +601,11 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
continue
}
excitationIndex := 16 * i
excitationIndex := pulsecountLargestPartitionSize * i
samplePartition16 := make([]uint8, 2)
samplePartition8 := make([]uint8, 2)
samplePartition4 := make([]uint8, 2)
samplePartition2 := make([]uint8, 2)
// The location of pulses is coded by recursively partitioning each
// block into halves, and coding how many pulses fall on the left side
@ -616,12 +617,171 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
for k := 0; k < 2; k++ {
d.partitionPulseCount(icdfPulseCountSplit4SamplePartitions, samplePartition8[k], samplePartition4)
for l := 0; l < 2; l++ {
d.partitionPulseCount(icdfPulseCountSplit2SamplePartitions, samplePartition4[l], excitation[excitationIndex:])
excitationIndex += 2
d.partitionPulseCount(icdfPulseCountSplit2SamplePartitions, samplePartition4[l], samplePartition2)
excitation[excitationIndex] = int32(samplePartition2[0])
excitationIndex++
excitation[excitationIndex] = int32(samplePartition2[1])
excitationIndex++
}
}
}
}
// After the decoder reads the pulse locations for all blocks, it reads
// the LSBs (if any) for each block in turn. Inside each block, it
// reads all the LSBs for each coefficient in turn, even those where no
// pulses were allocated, before proceeding to the next one.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.4
for i := 0; i < len(excitation); i++ {
for bit := uint8(0); bit < lsbcounts[i/pulsecountLargestPartitionSize]; bit++ {
excitation[i] = (excitation[i] << 1) | int32(d.rangeDecoder.DecodeSymbolWithICDF(icdfExcitationLSB))
}
}
// After decoding the pulse locations and the LSBs, the decoder knows
// the magnitude of each coefficient in the excitation.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
for i := 0; i < len(excitation); i++ {
// It then decodes a sign for all coefficients
// with a non-zero magnitude
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
if excitation[i] == 0 {
continue
}
var icdf []uint
pulsecount := pulsecounts[i/pulsecountLargestPartitionSize]
// using one of the PDFs from Table 52.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
switch signalType {
case frameSignalTypeInactive:
switch quantizationOffsetType {
case frameQuantizationOffsetTypeLow:
switch pulsecount {
case 0:
icdf = icdfExcitationSignInactiveSignalLowQuantization0Pulse
case 1:
icdf = icdfExcitationSignInactiveSignalLowQuantization1Pulse
case 2:
icdf = icdfExcitationSignInactiveSignalLowQuantization2Pulse
case 3:
icdf = icdfExcitationSignInactiveSignalLowQuantization3Pulse
case 4:
icdf = icdfExcitationSignInactiveSignalLowQuantization4Pulse
case 5:
icdf = icdfExcitationSignInactiveSignalLowQuantization5Pulse
default:
icdf = icdfExcitationSignInactiveSignalLowQuantization6PlusPulse
}
case frameQuantizationOffsetTypeHigh:
switch pulsecount {
case 0:
icdf = icdfExcitationSignInactiveSignalHighQuantization0Pulse
case 1:
icdf = icdfExcitationSignInactiveSignalHighQuantization1Pulse
case 2:
icdf = icdfExcitationSignInactiveSignalHighQuantization2Pulse
case 3:
icdf = icdfExcitationSignInactiveSignalHighQuantization3Pulse
case 4:
icdf = icdfExcitationSignInactiveSignalHighQuantization4Pulse
case 5:
icdf = icdfExcitationSignInactiveSignalHighQuantization5Pulse
default:
icdf = icdfExcitationSignInactiveSignalHighQuantization6PlusPulse
}
}
case frameSignalTypeUnvoiced:
switch quantizationOffsetType {
case frameQuantizationOffsetTypeLow:
switch pulsecount {
case 0:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization0Pulse
case 1:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization1Pulse
case 2:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization2Pulse
case 3:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization3Pulse
case 4:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization4Pulse
case 5:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization5Pulse
default:
icdf = icdfExcitationSignUnvoicedSignalLowQuantization6PlusPulse
}
case frameQuantizationOffsetTypeHigh:
switch pulsecount {
case 0:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization0Pulse
case 1:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization1Pulse
case 2:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization2Pulse
case 3:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization3Pulse
case 4:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization4Pulse
case 5:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization5Pulse
default:
icdf = icdfExcitationSignUnvoicedSignalHighQuantization6PlusPulse
}
}
case frameSignalTypeVoiced:
switch quantizationOffsetType {
case frameQuantizationOffsetTypeLow:
switch pulsecount {
case 0:
icdf = icdfExcitationSignVoicedSignalLowQuantization0Pulse
case 1:
icdf = icdfExcitationSignVoicedSignalLowQuantization1Pulse
case 2:
icdf = icdfExcitationSignVoicedSignalLowQuantization2Pulse
case 3:
icdf = icdfExcitationSignVoicedSignalLowQuantization3Pulse
case 4:
icdf = icdfExcitationSignVoicedSignalLowQuantization4Pulse
case 5:
icdf = icdfExcitationSignVoicedSignalLowQuantization5Pulse
default:
icdf = icdfExcitationSignVoicedSignalLowQuantization6PlusPulse
}
case frameQuantizationOffsetTypeHigh:
switch pulsecount {
case 0:
icdf = icdfExcitationSignVoicedSignalHighQuantization0Pulse
case 1:
icdf = icdfExcitationSignVoicedSignalHighQuantization1Pulse
case 2:
icdf = icdfExcitationSignVoicedSignalHighQuantization2Pulse
case 3:
icdf = icdfExcitationSignVoicedSignalHighQuantization3Pulse
case 4:
icdf = icdfExcitationSignVoicedSignalHighQuantization4Pulse
case 5:
icdf = icdfExcitationSignVoicedSignalHighQuantization5Pulse
default:
icdf = icdfExcitationSignVoicedSignalHighQuantization6PlusPulse
}
}
}
// If the value decoded is 0, then the coefficient magnitude is negated.
// Otherwise, it remains positive.
if d.rangeDecoder.DecodeSymbolWithICDF(icdf) == 0 {
excitation[i] *= -1
}
}
}
// The PDF to use is chosen by the size of the current partition (16, 8, 4, or 2) and the

View file

@ -103,5 +103,5 @@ func TestExcitation(t *testing.T) {
d := &Decoder{rangeDecoder: createRangeDecoder(silkFrame, 71, 851775140, 846837397)}
lcgSeed := d.decodeLinearCongruentialGeneratorSeed()
d.decodeExcitation(nanoseconds20Ms, BandwidthWideband, false, lcgSeed)
d.decodeExcitation(nanoseconds20Ms, BandwidthWideband, false, lcgSeed, frameSignalTypeUnvoiced, frameQuantizationOffsetTypeLow)
}

View file

@ -493,4 +493,148 @@ var (
{256, 1, 3, 8, 18, 35, 60, 92, 128, 164, 196, 221, 238, 248, 253, 255, 256},
{256, 1, 3, 7, 14, 27, 48, 76, 110, 146, 180, 208, 229, 242, 249, 253, 255, 256},
}
// +----------------+
// | PDF |
// +----------------+
// | {136, 120}/256 |
// +----------------+
//
// Table 51: PDF for Excitation LSBs
icdfExcitationLSB = []uint{256, 136, 256}
// +-------------+-----------------------+-------------+---------------+
// | Signal Type | Quantization Offset | Pulse Count | PDF |
// | | Type | | |
// +-------------+-----------------------+-------------+---------------+
// | Inactive | Low | 0 | {2, 254}/256 |
// | | | | |
// | Inactive | Low | 1 | {207, 49}/256 |
// | | | | |
// | Inactive | Low | 2 | {189, 67}/256 |
// | | | | |
// | Inactive | Low | 3 | {179, 77}/256 |
// | | | | |
// | Inactive | Low | 4 | {174, 82}/256 |
// | | | | |
// | Inactive | Low | 5 | {163, 93}/256 |
// | | | | |
// | Inactive | Low | 6 or more | {157, 99}/256 |
// | | | | |
// | Inactive | High | 0 | {58, 198}/256 |
// | | | | |
// | Inactive | High | 1 | {245, 11}/256 |
// | | | | |
// | Inactive | High | 2 | {238, 18}/256 |
// | | | | |
// | Inactive | High | 3 | {232, 24}/256 |
// | | | | |
// | Inactive | High | 4 | {225, 31}/256 |
// | | | | |
// | Inactive | High | 5 | {220, 36}/256 |
// | | | | |
// | Inactive | High | 6 or more | {211, 45}/256 |
// | | | | |
// | Unvoiced | Low | 0 | {1, 255}/256 |
// | | | | |
// | Unvoiced | Low | 1 | {210, 46}/256 |
// | | | | |
// | Unvoiced | Low | 2 | {190, 66}/256 |
// | | | | |
// | Unvoiced | Low | 3 | {178, 78}/256 |
// | | | | |
// | Unvoiced | Low | 4 | {169, 87}/256 |
// | | | | |
// | Unvoiced | Low | 5 | {162, 94}/256 |
// | | | | |
// | Unvoiced | Low | 6 or more | {152,104}/256 |
// | | | | |
// | Unvoiced | High | 0 | {48, 208}/256 |
// | | | | |
// | Unvoiced | High | 1 | {242, 14}/256 |
// | | | | |
// | Unvoiced | High | 2 | {235, 21}/256 |
// | | | | |
// | Unvoiced | High | 3 | {224, 32}/256 |
// | | | | |
// | Unvoiced | High | 4 | {214, 42}/256 |
// | | | | |
// | Unvoiced | High | 5 | {205, 51}/256 |
// | | | | |
// | Unvoiced | High | 6 or more | {190, 66}/256 |
// | | | | |
// | Voiced | Low | 0 | {1, 255}/256 |
// | | | | |
// | Voiced | Low | 1 | {162, 94}/256 |
// | | | | |
// | Voiced | Low | 2 | {152, |
// | | | | 104}/256 |
// | | | | |
// | Voiced | Low | 3 | {147, |
// | | | | 109}/256 |
// | | | | |
// | Voiced | Low | 4 | {144, 112}/256|
// | | | | |
// | Voiced | Low | 5 | {141, 115}/256|
// | | | | |
// | Voiced | Low | 6 or more | {138, 118}/256|
// | | | | |
// | Voiced | High | 0 | {8, 248}/256 |
// | | | | |
// | Voiced | High | 1 | {203, 53}/256 |
// | | | | |
// | Voiced | High | 2 | {187, 69}/256 |
// | | | | |
// | Voiced | High | 3 | {176, 80}/256 |
// | | | | |
// | Voiced | High | 4 | {168, 88}/256 |
// | | | | |
// | Voiced | High | 5 | {161, 95}/256 |
// | | | | |
// | Voiced | High | 6 or more | {154,102}/256 |
// +-------------+-----------------------+-------------+---------------+
//
// Table 52: PDFs for Excitation Signs
icdfExcitationSignInactiveSignalLowQuantization0Pulse = []uint{256, 2, 256}
icdfExcitationSignInactiveSignalLowQuantization1Pulse = []uint{256, 207, 256}
icdfExcitationSignInactiveSignalLowQuantization2Pulse = []uint{256, 189, 256}
icdfExcitationSignInactiveSignalLowQuantization3Pulse = []uint{256, 179, 256}
icdfExcitationSignInactiveSignalLowQuantization4Pulse = []uint{256, 174, 256}
icdfExcitationSignInactiveSignalLowQuantization5Pulse = []uint{256, 163, 256}
icdfExcitationSignInactiveSignalLowQuantization6PlusPulse = []uint{256, 157, 256}
icdfExcitationSignInactiveSignalHighQuantization0Pulse = []uint{256, 58, 256}
icdfExcitationSignInactiveSignalHighQuantization1Pulse = []uint{256, 245, 256}
icdfExcitationSignInactiveSignalHighQuantization2Pulse = []uint{256, 238, 256}
icdfExcitationSignInactiveSignalHighQuantization3Pulse = []uint{256, 232, 256}
icdfExcitationSignInactiveSignalHighQuantization4Pulse = []uint{256, 225, 256}
icdfExcitationSignInactiveSignalHighQuantization5Pulse = []uint{256, 220, 256}
icdfExcitationSignInactiveSignalHighQuantization6PlusPulse = []uint{256, 211, 256}
icdfExcitationSignUnvoicedSignalLowQuantization0Pulse = []uint{256, 1, 256}
icdfExcitationSignUnvoicedSignalLowQuantization1Pulse = []uint{256, 210, 256}
icdfExcitationSignUnvoicedSignalLowQuantization2Pulse = []uint{256, 190, 256}
icdfExcitationSignUnvoicedSignalLowQuantization3Pulse = []uint{256, 178, 256}
icdfExcitationSignUnvoicedSignalLowQuantization4Pulse = []uint{256, 169, 256}
icdfExcitationSignUnvoicedSignalLowQuantization5Pulse = []uint{256, 162, 256}
icdfExcitationSignUnvoicedSignalLowQuantization6PlusPulse = []uint{256, 152, 256}
icdfExcitationSignUnvoicedSignalHighQuantization0Pulse = []uint{256, 48, 256}
icdfExcitationSignUnvoicedSignalHighQuantization1Pulse = []uint{256, 242, 256}
icdfExcitationSignUnvoicedSignalHighQuantization2Pulse = []uint{256, 235, 256}
icdfExcitationSignUnvoicedSignalHighQuantization3Pulse = []uint{256, 224, 256}
icdfExcitationSignUnvoicedSignalHighQuantization4Pulse = []uint{256, 214, 256}
icdfExcitationSignUnvoicedSignalHighQuantization5Pulse = []uint{256, 205, 256}
icdfExcitationSignUnvoicedSignalHighQuantization6PlusPulse = []uint{256, 190, 256}
icdfExcitationSignVoicedSignalLowQuantization0Pulse = []uint{256, 1, 256}
icdfExcitationSignVoicedSignalLowQuantization1Pulse = []uint{256, 162, 256}
icdfExcitationSignVoicedSignalLowQuantization2Pulse = []uint{256, 152, 256}
icdfExcitationSignVoicedSignalLowQuantization3Pulse = []uint{256, 147, 256}
icdfExcitationSignVoicedSignalLowQuantization4Pulse = []uint{256, 144, 256}
icdfExcitationSignVoicedSignalLowQuantization5Pulse = []uint{256, 141, 256}
icdfExcitationSignVoicedSignalLowQuantization6PlusPulse = []uint{256, 138, 256}
icdfExcitationSignVoicedSignalHighQuantization0Pulse = []uint{256, 8, 256}
icdfExcitationSignVoicedSignalHighQuantization1Pulse = []uint{256, 203, 256}
icdfExcitationSignVoicedSignalHighQuantization2Pulse = []uint{256, 187, 256}
icdfExcitationSignVoicedSignalHighQuantization3Pulse = []uint{256, 176, 256}
icdfExcitationSignVoicedSignalHighQuantization4Pulse = []uint{256, 168, 256}
icdfExcitationSignVoicedSignalHighQuantization5Pulse = []uint{256, 161, 256}
icdfExcitationSignVoicedSignalHighQuantization6PlusPulse = []uint{256, 154, 256}
)