opus-go/internal/silk/decoder.go
Sean DuBois e539ceec0f Continue work on Silk decoder
Start to process Silk frame
2022-07-16 00:31:59 -04:00

129 lines
4.1 KiB
Go

package silk
import (
"fmt"
"github.com/pion/opus/internal/rangecoding"
)
type (
frameSignalType byte
frameQuantizationOffsetType byte
)
const (
nanoseconds20Ms = 20000000
frameSignalTypeInactive frameSignalType = iota + 1
frameSignalTypeUnvoiced
frameSignalTypeVoiced
frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1
frameQuantizationOffsetTypeHigh
)
var (
// +----------+-----------------------------+
// | VAD Flag | PDF |
// +----------+-----------------------------+
// | Inactive | {26, 230, 0, 0, 0, 0}/256 |
// | | |
// | Active | {0, 0, 24, 74, 148, 10}/256 |
// +----------+-----------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
icdfFrameTypeVADInactive = []uint{256, 26, 256}
icdfFrameTypeVADActive = []uint{256, 24, 98, 246, 256}
)
// Decoder maintains the state needed to decode a stream
// of Silk frames
type Decoder struct {
rangeDecoder rangecoding.Decoder
}
// NewDecoder creates a new Silk Decoder
func NewDecoder() *Decoder {
return &Decoder{}
}
// Decode decodes many SILK subframes
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) {
if nanoseconds != nanoseconds20Ms {
return nil, errUnsupportedSilkFrameDuration
} else if isStereo {
return nil, errUnsupportedSilkStereo
}
d.rangeDecoder.Init(in)
//The LP layer begins with two to eight header bits These consist of one
// Voice Activity Detection (VAD) bit per frame (up to 3), followed by a
// single flag indicating the presence of LBRR frames.
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3
voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
if lowBitRateRedundancy {
return nil, errUnsupportedSilkLowBitrateRedundancy
}
// Each SILK frame contains a single "frame type" symbol that jointly
// codes the signal type and quantization offset type of the
// corresponding frame.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
var frameTypeSymbol uint32
if voiceActivityDetected {
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive)
} else {
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive)
}
// +------------+-------------+--------------------------+
// | Frame Type | Signal Type | Quantization Offset Type |
// +------------+-------------+--------------------------+
// | 0 | Inactive | Low |
// | | | |
// | 1 | Inactive | High |
// | | | |
// | 2 | Unvoiced | Low |
// | | | |
// | 3 | Unvoiced | High |
// | | | |
// | 4 | Voiced | Low |
// | | | |
// | 5 | Voiced | High |
// +------------+-------------+--------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
signalType := frameSignalType(0)
quantizationOffsetType := frameQuantizationOffsetType(0)
switch frameTypeSymbol {
case 0:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 1:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 2:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 3:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 4:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 5:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
}
fmt.Println(signalType)
fmt.Println(quantizationOffsetType)
return
}