Continue work on Silk decoder

Start to process Silk frame
This commit is contained in:
Sean DuBois 2022-07-16 00:31:59 -04:00
parent cb80e84c4e
commit e539ceec0f
3 changed files with 95 additions and 6 deletions

View file

@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
}
for _, encodedFrame := range encodedFrames {
_, decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
if err != nil {
return 0, false, nil, err
}

View file

@ -6,8 +6,34 @@ import (
"github.com/pion/opus/internal/rangecoding"
)
type (
frameSignalType byte
frameQuantizationOffsetType byte
)
const (
nanoseconds20Ms = 20000000
frameSignalTypeInactive frameSignalType = iota + 1
frameSignalTypeUnvoiced
frameSignalTypeVoiced
frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1
frameQuantizationOffsetTypeHigh
)
var (
// +----------+-----------------------------+
// | VAD Flag | PDF |
// +----------+-----------------------------+
// | Inactive | {26, 230, 0, 0, 0, 0}/256 |
// | | |
// | Active | {0, 0, 24, 74, 148, 10}/256 |
// +----------+-----------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
icdfFrameTypeVADInactive = []uint{256, 26, 256}
icdfFrameTypeVADActive = []uint{256, 24, 98, 246, 256}
)
// Decoder maintains the state needed to decode a stream
@ -22,24 +48,81 @@ func NewDecoder() *Decoder {
}
// Decode decodes many SILK subframes
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (samples int, decoded []byte, err error) {
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) {
if nanoseconds != nanoseconds20Ms {
return 0, nil, errUnsupportedSilkFrameDuration
return nil, errUnsupportedSilkFrameDuration
} else if isStereo {
return 0, nil, errUnsupportedSilkStereo
return nil, errUnsupportedSilkStereo
}
d.rangeDecoder.Init(in)
//The LP layer begins with two to eight header bits These consist of one
// Voice Activity Detection (VAD) bit per frame (up to 3), followed by a
// single flag indicating the presence of LBRR frames.
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3
voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
if lowBitRateRedundancy {
return 0, nil, errUnsupportedSilkLowBitrateRedundancy
return nil, errUnsupportedSilkLowBitrateRedundancy
}
// Each SILK frame contains a single "frame type" symbol that jointly
// codes the signal type and quantization offset type of the
// corresponding frame.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
var frameTypeSymbol uint32
if voiceActivityDetected {
fmt.Println("VAD")
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive)
} else {
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive)
}
// +------------+-------------+--------------------------+
// | Frame Type | Signal Type | Quantization Offset Type |
// +------------+-------------+--------------------------+
// | 0 | Inactive | Low |
// | | | |
// | 1 | Inactive | High |
// | | | |
// | 2 | Unvoiced | Low |
// | | | |
// | 3 | Unvoiced | High |
// | | | |
// | 4 | Voiced | Low |
// | | | |
// | 5 | Voiced | High |
// +------------+-------------+--------------------------+
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
signalType := frameSignalType(0)
quantizationOffsetType := frameQuantizationOffsetType(0)
switch frameTypeSymbol {
case 0:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 1:
signalType = frameSignalTypeInactive
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 2:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 3:
signalType = frameSignalTypeUnvoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
case 4:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeLow
case 5:
signalType = frameSignalTypeVoiced
quantizationOffsetType = frameQuantizationOffsetTypeHigh
}
fmt.Println(signalType)
fmt.Println(quantizationOffsetType)
return
}

View file

@ -148,6 +148,8 @@ func (c configurationMode) String() string {
return "Invalid"
}
// See Configuration for mapping of mode to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) mode() configurationMode {
switch {
case c >= 0 && c <= 11:
@ -208,6 +210,8 @@ func (f frameDuration) nanoseconds() int {
return 0
}
// See Configuration for mapping of frameDuration to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) frameDuration() frameDuration {
switch c {
case 16, 20, 24, 28:
@ -236,6 +240,8 @@ const (
BandwidthFullband
)
// See Configuration for mapping of bandwidth to configuration numbers
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
func (c Configuration) bandwidth() Bandwidth {
switch {
case c <= 3: