Implement Excitation Sign Decoding
Specified in https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
This commit is contained in:
parent
dd17102d36
commit
cfae85659a
|
@ -491,7 +491,7 @@ func (d *Decoder) decodeLinearCongruentialGeneratorSeed() uint32 {
|
|||
// position are required to have the same sign.
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8
|
||||
func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceActivityDetected bool, lcgSeed uint32) {
|
||||
func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceActivityDetected bool, lcgSeed uint32, signalType frameSignalType, quantizationOffsetType frameQuantizationOffsetType) {
|
||||
// SILK fixes the dimension of the codebook to N = 16. The excitation
|
||||
// is made up of a number of "shell blocks", each 16 samples in size.
|
||||
// Table 44 lists the number of shell blocks required for a SILK frame
|
||||
|
@ -590,7 +590,7 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
|
|||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.3
|
||||
|
||||
excitation := make([]uint8, shellblocks*pulsecountLargestPartitionSize)
|
||||
excitation := make([]int32, shellblocks*pulsecountLargestPartitionSize)
|
||||
for i := range pulsecounts {
|
||||
// This process skips partitions without any pulses, i.e., where
|
||||
// the initial pulse count from Section 4.2.7.8.2 was zero, or where the
|
||||
|
@ -601,10 +601,11 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
|
|||
continue
|
||||
}
|
||||
|
||||
excitationIndex := 16 * i
|
||||
excitationIndex := pulsecountLargestPartitionSize * i
|
||||
samplePartition16 := make([]uint8, 2)
|
||||
samplePartition8 := make([]uint8, 2)
|
||||
samplePartition4 := make([]uint8, 2)
|
||||
samplePartition2 := make([]uint8, 2)
|
||||
|
||||
// The location of pulses is coded by recursively partitioning each
|
||||
// block into halves, and coding how many pulses fall on the left side
|
||||
|
@ -616,12 +617,171 @@ func (d *Decoder) decodeExcitation(nanoseconds int, bandwidth Bandwidth, voiceAc
|
|||
for k := 0; k < 2; k++ {
|
||||
d.partitionPulseCount(icdfPulseCountSplit4SamplePartitions, samplePartition8[k], samplePartition4)
|
||||
for l := 0; l < 2; l++ {
|
||||
d.partitionPulseCount(icdfPulseCountSplit2SamplePartitions, samplePartition4[l], excitation[excitationIndex:])
|
||||
excitationIndex += 2
|
||||
d.partitionPulseCount(icdfPulseCountSplit2SamplePartitions, samplePartition4[l], samplePartition2)
|
||||
excitation[excitationIndex] = int32(samplePartition2[0])
|
||||
excitationIndex++
|
||||
|
||||
excitation[excitationIndex] = int32(samplePartition2[1])
|
||||
excitationIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// After the decoder reads the pulse locations for all blocks, it reads
|
||||
// the LSBs (if any) for each block in turn. Inside each block, it
|
||||
// reads all the LSBs for each coefficient in turn, even those where no
|
||||
// pulses were allocated, before proceeding to the next one.
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.4
|
||||
for i := 0; i < len(excitation); i++ {
|
||||
for bit := uint8(0); bit < lsbcounts[i/pulsecountLargestPartitionSize]; bit++ {
|
||||
excitation[i] = (excitation[i] << 1) | int32(d.rangeDecoder.DecodeSymbolWithICDF(icdfExcitationLSB))
|
||||
}
|
||||
}
|
||||
|
||||
// After decoding the pulse locations and the LSBs, the decoder knows
|
||||
// the magnitude of each coefficient in the excitation.
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
|
||||
for i := 0; i < len(excitation); i++ {
|
||||
// It then decodes a sign for all coefficients
|
||||
// with a non-zero magnitude
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
|
||||
if excitation[i] == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var icdf []uint
|
||||
pulsecount := pulsecounts[i/pulsecountLargestPartitionSize]
|
||||
|
||||
// using one of the PDFs from Table 52.
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.8.5
|
||||
switch signalType {
|
||||
case frameSignalTypeInactive:
|
||||
switch quantizationOffsetType {
|
||||
case frameQuantizationOffsetTypeLow:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignInactiveSignalLowQuantization6PlusPulse
|
||||
}
|
||||
case frameQuantizationOffsetTypeHigh:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignInactiveSignalHighQuantization6PlusPulse
|
||||
}
|
||||
|
||||
}
|
||||
case frameSignalTypeUnvoiced:
|
||||
switch quantizationOffsetType {
|
||||
case frameQuantizationOffsetTypeLow:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignUnvoicedSignalLowQuantization6PlusPulse
|
||||
}
|
||||
case frameQuantizationOffsetTypeHigh:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignUnvoicedSignalHighQuantization6PlusPulse
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
case frameSignalTypeVoiced:
|
||||
switch quantizationOffsetType {
|
||||
case frameQuantizationOffsetTypeLow:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignVoicedSignalLowQuantization6PlusPulse
|
||||
}
|
||||
case frameQuantizationOffsetTypeHigh:
|
||||
switch pulsecount {
|
||||
case 0:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization0Pulse
|
||||
case 1:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization1Pulse
|
||||
case 2:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization2Pulse
|
||||
case 3:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization3Pulse
|
||||
case 4:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization4Pulse
|
||||
case 5:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization5Pulse
|
||||
default:
|
||||
icdf = icdfExcitationSignVoicedSignalHighQuantization6PlusPulse
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the value decoded is 0, then the coefficient magnitude is negated.
|
||||
// Otherwise, it remains positive.
|
||||
if d.rangeDecoder.DecodeSymbolWithICDF(icdf) == 0 {
|
||||
excitation[i] *= -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The PDF to use is chosen by the size of the current partition (16, 8, 4, or 2) and the
|
||||
|
|
|
@ -103,5 +103,5 @@ func TestExcitation(t *testing.T) {
|
|||
d := &Decoder{rangeDecoder: createRangeDecoder(silkFrame, 71, 851775140, 846837397)}
|
||||
|
||||
lcgSeed := d.decodeLinearCongruentialGeneratorSeed()
|
||||
d.decodeExcitation(nanoseconds20Ms, BandwidthWideband, false, lcgSeed)
|
||||
d.decodeExcitation(nanoseconds20Ms, BandwidthWideband, false, lcgSeed, frameSignalTypeUnvoiced, frameQuantizationOffsetTypeLow)
|
||||
}
|
||||
|
|
|
@ -493,4 +493,148 @@ var (
|
|||
{256, 1, 3, 8, 18, 35, 60, 92, 128, 164, 196, 221, 238, 248, 253, 255, 256},
|
||||
{256, 1, 3, 7, 14, 27, 48, 76, 110, 146, 180, 208, 229, 242, 249, 253, 255, 256},
|
||||
}
|
||||
|
||||
// +----------------+
|
||||
// | PDF |
|
||||
// +----------------+
|
||||
// | {136, 120}/256 |
|
||||
// +----------------+
|
||||
//
|
||||
// Table 51: PDF for Excitation LSBs
|
||||
icdfExcitationLSB = []uint{256, 136, 256}
|
||||
|
||||
// +-------------+-----------------------+-------------+---------------+
|
||||
// | Signal Type | Quantization Offset | Pulse Count | PDF |
|
||||
// | | Type | | |
|
||||
// +-------------+-----------------------+-------------+---------------+
|
||||
// | Inactive | Low | 0 | {2, 254}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 1 | {207, 49}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 2 | {189, 67}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 3 | {179, 77}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 4 | {174, 82}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 5 | {163, 93}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | Low | 6 or more | {157, 99}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 0 | {58, 198}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 1 | {245, 11}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 2 | {238, 18}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 3 | {232, 24}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 4 | {225, 31}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 5 | {220, 36}/256 |
|
||||
// | | | | |
|
||||
// | Inactive | High | 6 or more | {211, 45}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 0 | {1, 255}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 1 | {210, 46}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 2 | {190, 66}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 3 | {178, 78}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 4 | {169, 87}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 5 | {162, 94}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | Low | 6 or more | {152,104}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 0 | {48, 208}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 1 | {242, 14}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 2 | {235, 21}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 3 | {224, 32}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 4 | {214, 42}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 5 | {205, 51}/256 |
|
||||
// | | | | |
|
||||
// | Unvoiced | High | 6 or more | {190, 66}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | Low | 0 | {1, 255}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | Low | 1 | {162, 94}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | Low | 2 | {152, |
|
||||
// | | | | 104}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | Low | 3 | {147, |
|
||||
// | | | | 109}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | Low | 4 | {144, 112}/256|
|
||||
// | | | | |
|
||||
// | Voiced | Low | 5 | {141, 115}/256|
|
||||
// | | | | |
|
||||
// | Voiced | Low | 6 or more | {138, 118}/256|
|
||||
// | | | | |
|
||||
// | Voiced | High | 0 | {8, 248}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 1 | {203, 53}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 2 | {187, 69}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 3 | {176, 80}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 4 | {168, 88}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 5 | {161, 95}/256 |
|
||||
// | | | | |
|
||||
// | Voiced | High | 6 or more | {154,102}/256 |
|
||||
// +-------------+-----------------------+-------------+---------------+
|
||||
//
|
||||
// Table 52: PDFs for Excitation Signs
|
||||
icdfExcitationSignInactiveSignalLowQuantization0Pulse = []uint{256, 2, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization1Pulse = []uint{256, 207, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization2Pulse = []uint{256, 189, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization3Pulse = []uint{256, 179, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization4Pulse = []uint{256, 174, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization5Pulse = []uint{256, 163, 256}
|
||||
icdfExcitationSignInactiveSignalLowQuantization6PlusPulse = []uint{256, 157, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization0Pulse = []uint{256, 58, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization1Pulse = []uint{256, 245, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization2Pulse = []uint{256, 238, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization3Pulse = []uint{256, 232, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization4Pulse = []uint{256, 225, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization5Pulse = []uint{256, 220, 256}
|
||||
icdfExcitationSignInactiveSignalHighQuantization6PlusPulse = []uint{256, 211, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization0Pulse = []uint{256, 1, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization1Pulse = []uint{256, 210, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization2Pulse = []uint{256, 190, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization3Pulse = []uint{256, 178, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization4Pulse = []uint{256, 169, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization5Pulse = []uint{256, 162, 256}
|
||||
icdfExcitationSignUnvoicedSignalLowQuantization6PlusPulse = []uint{256, 152, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization0Pulse = []uint{256, 48, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization1Pulse = []uint{256, 242, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization2Pulse = []uint{256, 235, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization3Pulse = []uint{256, 224, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization4Pulse = []uint{256, 214, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization5Pulse = []uint{256, 205, 256}
|
||||
icdfExcitationSignUnvoicedSignalHighQuantization6PlusPulse = []uint{256, 190, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization0Pulse = []uint{256, 1, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization1Pulse = []uint{256, 162, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization2Pulse = []uint{256, 152, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization3Pulse = []uint{256, 147, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization4Pulse = []uint{256, 144, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization5Pulse = []uint{256, 141, 256}
|
||||
icdfExcitationSignVoicedSignalLowQuantization6PlusPulse = []uint{256, 138, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization0Pulse = []uint{256, 8, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization1Pulse = []uint{256, 203, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization2Pulse = []uint{256, 187, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization3Pulse = []uint{256, 176, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization4Pulse = []uint{256, 168, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization5Pulse = []uint{256, 161, 256}
|
||||
icdfExcitationSignVoicedSignalHighQuantization6PlusPulse = []uint{256, 154, 256}
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue