Continue work on Silk decoder

Start to process Silk frame
This commit is contained in:
Sean DuBois 2022-07-16 00:31:59 -04:00
parent cb80e84c4e
commit e539ceec0f
3 changed files with 95 additions and 6 deletions

View file

@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
} }
for _, encodedFrame := range encodedFrames { for _, encodedFrame := range encodedFrames {
_, decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds()) decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
if err != nil { if err != nil {
return 0, false, nil, err return 0, false, nil, err
} }

View file

@ -6,8 +6,34 @@ import (
"github.com/pion/opus/internal/rangecoding" "github.com/pion/opus/internal/rangecoding"
) )
type (
frameSignalType byte
frameQuantizationOffsetType byte
)
const ( const (
nanoseconds20Ms = 20000000 nanoseconds20Ms = 20000000
frameSignalTypeInactive frameSignalType = iota + 1
frameSignalTypeUnvoiced
frameSignalTypeVoiced
frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1
frameQuantizationOffsetTypeHigh
)
var (
// +----------+-----------------------------+
// | VAD Flag | PDF |
// +----------+-----------------------------+
// | Inactive | {26, 230, 0, 0, 0, 0}/256 |
// | | |
// | Active | {0, 0, 24, 74, 148, 10}/256 |
// +----------+-----------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
icdfFrameTypeVADInactive = []uint{256, 26, 256}
icdfFrameTypeVADActive = []uint{256, 24, 98, 246, 256}
) )
// Decoder maintains the state needed to decode a stream // Decoder maintains the state needed to decode a stream
@ -22,24 +48,81 @@ func NewDecoder() *Decoder {
} }
// Decode decodes many SILK subframes // Decode decodes many SILK subframes
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (samples int, decoded []byte, err error) { func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) {
if nanoseconds != nanoseconds20Ms { if nanoseconds != nanoseconds20Ms {
return 0, nil, errUnsupportedSilkFrameDuration return nil, errUnsupportedSilkFrameDuration
} else if isStereo { } else if isStereo {
return 0, nil, errUnsupportedSilkStereo return nil, errUnsupportedSilkStereo
} }
d.rangeDecoder.Init(in) d.rangeDecoder.Init(in)
//The LP layer begins with two to eight header bits These consist of one
// Voice Activity Detection (VAD) bit per frame (up to 3), followed by a
// single flag indicating the presence of LBRR frames.
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3
voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1 voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1 lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
if lowBitRateRedundancy { if lowBitRateRedundancy {
return 0, nil, errUnsupportedSilkLowBitrateRedundancy return nil, errUnsupportedSilkLowBitrateRedundancy
} }
// Each SILK frame contains a single "frame type" symbol that jointly
// codes the signal type and quantization offset type of the
// corresponding frame.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
var frameTypeSymbol uint32
if voiceActivityDetected { if voiceActivityDetected {
fmt.Println("VAD") frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive)
} else {
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive)
} }
// +------------+-------------+--------------------------+
// | Frame Type | Signal Type | Quantization Offset Type |
// +------------+-------------+--------------------------+
// | 0 | Inactive | Low |
// | | | |
// | 1 | Inactive | High |
// | | | |
// | 2 | Unvoiced | Low |
// | | | |
// | 3 | Unvoiced | High |
// | | | |
// | 4 | Voiced | Low |
// | | | |
// | 5 | Voiced | High |
// +------------+-------------+--------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
signalType := frameSignalType(0)
quantizationOffsetType := frameQuantizationOffsetType(0)
switch frameTypeSymbol {
case 0:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 1:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 2:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 3:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 4:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 5:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
}
fmt.Println(signalType)
fmt.Println(quantizationOffsetType)
return return
} }

View file

@ -148,6 +148,8 @@ func (c configurationMode) String() string {
return "Invalid" return "Invalid"
} }
// See Configuration for mapping of mode to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) mode() configurationMode { func (c Configuration) mode() configurationMode {
switch { switch {
case c >= 0 && c <= 11: case c >= 0 && c <= 11:
@ -208,6 +210,8 @@ func (f frameDuration) nanoseconds() int {
return 0 return 0
} }
// See Configuration for mapping of frameDuration to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) frameDuration() frameDuration { func (c Configuration) frameDuration() frameDuration {
switch c { switch c {
case 16, 20, 24, 28: case 16, 20, 24, 28:
@ -236,6 +240,8 @@ const (
BandwidthFullband BandwidthFullband
) )
// See Configuration for mapping of bandwidth to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) bandwidth() Bandwidth { func (c Configuration) bandwidth() Bandwidth {
switch { switch {
case c <= 3: case c <= 3: