Continue work on Silk decoder
Start to process Silk frame
This commit is contained in:
parent
cb80e84c4e
commit
e539ceec0f
|
@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, encodedFrame := range encodedFrames {
|
for _, encodedFrame := range encodedFrames {
|
||||||
_, decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
|
decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, false, nil, err
|
return 0, false, nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,8 +6,34 @@ import (
|
||||||
"github.com/pion/opus/internal/rangecoding"
|
"github.com/pion/opus/internal/rangecoding"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
frameSignalType byte
|
||||||
|
frameQuantizationOffsetType byte
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
nanoseconds20Ms = 20000000
|
nanoseconds20Ms = 20000000
|
||||||
|
|
||||||
|
frameSignalTypeInactive frameSignalType = iota + 1
|
||||||
|
frameSignalTypeUnvoiced
|
||||||
|
frameSignalTypeVoiced
|
||||||
|
|
||||||
|
frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1
|
||||||
|
frameQuantizationOffsetTypeHigh
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// +----------+-----------------------------+
|
||||||
|
// | VAD Flag | PDF |
|
||||||
|
// +----------+-----------------------------+
|
||||||
|
// | Inactive | {26, 230, 0, 0, 0, 0}/256 |
|
||||||
|
// | | |
|
||||||
|
// | Active | {0, 0, 24, 74, 148, 10}/256 |
|
||||||
|
// +----------+-----------------------------+
|
||||||
|
//
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
|
||||||
|
icdfFrameTypeVADInactive = []uint{256, 26, 256}
|
||||||
|
icdfFrameTypeVADActive = []uint{256, 24, 98, 246, 256}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Decoder maintains the state needed to decode a stream
|
// Decoder maintains the state needed to decode a stream
|
||||||
|
@ -22,24 +48,81 @@ func NewDecoder() *Decoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode decodes many SILK subframes
|
// Decode decodes many SILK subframes
|
||||||
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (samples int, decoded []byte, err error) {
|
func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) {
|
||||||
if nanoseconds != nanoseconds20Ms {
|
if nanoseconds != nanoseconds20Ms {
|
||||||
return 0, nil, errUnsupportedSilkFrameDuration
|
return nil, errUnsupportedSilkFrameDuration
|
||||||
} else if isStereo {
|
} else if isStereo {
|
||||||
return 0, nil, errUnsupportedSilkStereo
|
return nil, errUnsupportedSilkStereo
|
||||||
}
|
}
|
||||||
|
|
||||||
d.rangeDecoder.Init(in)
|
d.rangeDecoder.Init(in)
|
||||||
|
|
||||||
|
//The LP layer begins with two to eight header bits These consist of one
|
||||||
|
// Voice Activity Detection (VAD) bit per frame (up to 3), followed by a
|
||||||
|
// single flag indicating the presence of LBRR frames.
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3
|
||||||
voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
|
voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
|
||||||
lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
|
lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
|
||||||
if lowBitRateRedundancy {
|
if lowBitRateRedundancy {
|
||||||
return 0, nil, errUnsupportedSilkLowBitrateRedundancy
|
return nil, errUnsupportedSilkLowBitrateRedundancy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each SILK frame contains a single "frame type" symbol that jointly
|
||||||
|
// codes the signal type and quantization offset type of the
|
||||||
|
// corresponding frame.
|
||||||
|
//
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
|
||||||
|
var frameTypeSymbol uint32
|
||||||
if voiceActivityDetected {
|
if voiceActivityDetected {
|
||||||
fmt.Println("VAD")
|
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive)
|
||||||
|
} else {
|
||||||
|
frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// +------------+-------------+--------------------------+
|
||||||
|
// | Frame Type | Signal Type | Quantization Offset Type |
|
||||||
|
// +------------+-------------+--------------------------+
|
||||||
|
// | 0 | Inactive | Low |
|
||||||
|
// | | | |
|
||||||
|
// | 1 | Inactive | High |
|
||||||
|
// | | | |
|
||||||
|
// | 2 | Unvoiced | Low |
|
||||||
|
// | | | |
|
||||||
|
// | 3 | Unvoiced | High |
|
||||||
|
// | | | |
|
||||||
|
// | 4 | Voiced | Low |
|
||||||
|
// | | | |
|
||||||
|
// | 5 | Voiced | High |
|
||||||
|
// +------------+-------------+--------------------------+
|
||||||
|
//
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
|
||||||
|
|
||||||
|
signalType := frameSignalType(0)
|
||||||
|
quantizationOffsetType := frameQuantizationOffsetType(0)
|
||||||
|
|
||||||
|
switch frameTypeSymbol {
|
||||||
|
case 0:
|
||||||
|
signalType = frameSignalTypeInactive
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeLow
|
||||||
|
case 1:
|
||||||
|
signalType = frameSignalTypeInactive
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeHigh
|
||||||
|
case 2:
|
||||||
|
signalType = frameSignalTypeUnvoiced
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeLow
|
||||||
|
case 3:
|
||||||
|
signalType = frameSignalTypeUnvoiced
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeHigh
|
||||||
|
case 4:
|
||||||
|
signalType = frameSignalTypeVoiced
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeLow
|
||||||
|
case 5:
|
||||||
|
signalType = frameSignalTypeVoiced
|
||||||
|
quantizationOffsetType = frameQuantizationOffsetTypeHigh
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(signalType)
|
||||||
|
fmt.Println(quantizationOffsetType)
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,6 +148,8 @@ func (c configurationMode) String() string {
|
||||||
return "Invalid"
|
return "Invalid"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See Configuration for mapping of mode to configuration numbers
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
|
||||||
func (c Configuration) mode() configurationMode {
|
func (c Configuration) mode() configurationMode {
|
||||||
switch {
|
switch {
|
||||||
case c >= 0 && c <= 11:
|
case c >= 0 && c <= 11:
|
||||||
|
@ -208,6 +210,8 @@ func (f frameDuration) nanoseconds() int {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See Configuration for mapping of frameDuration to configuration numbers
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
|
||||||
func (c Configuration) frameDuration() frameDuration {
|
func (c Configuration) frameDuration() frameDuration {
|
||||||
switch c {
|
switch c {
|
||||||
case 16, 20, 24, 28:
|
case 16, 20, 24, 28:
|
||||||
|
@ -236,6 +240,8 @@ const (
|
||||||
BandwidthFullband
|
BandwidthFullband
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// See Configuration for mapping of bandwidth to configuration numbers
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
|
||||||
func (c Configuration) bandwidth() Bandwidth {
|
func (c Configuration) bandwidth() Bandwidth {
|
||||||
switch {
|
switch {
|
||||||
case c <= 3:
|
case c <= 3:
|
||||||
|
|
Loading…
Reference in a new issue