From e539ceec0ffedfe32da90bd87d1e6cf446abf5e3 Mon Sep 17 00:00:00 2001 From: Sean DuBois Date: Sat, 16 Jul 2022 00:31:59 -0400 Subject: [PATCH] Continue work on Silk decoder Start to process Silk frame --- decoder.go | 2 +- internal/silk/decoder.go | 93 +++++++++++++++++++++++++++++++++++-- table_of_contents_header.go | 6 +++ 3 files changed, 95 insertions(+), 6 deletions(-) diff --git a/decoder.go b/decoder.go index 2900c5b..35e925d 100644 --- a/decoder.go +++ b/decoder.go @@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames } for _, encodedFrame := range encodedFrames { - _, decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds()) + decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds()) if err != nil { return 0, false, nil, err } diff --git a/internal/silk/decoder.go b/internal/silk/decoder.go index 03486bc..95f6833 100644 --- a/internal/silk/decoder.go +++ b/internal/silk/decoder.go @@ -6,8 +6,34 @@ import ( "github.com/pion/opus/internal/rangecoding" ) +type ( + frameSignalType byte + frameQuantizationOffsetType byte +) + const ( nanoseconds20Ms = 20000000 + + frameSignalTypeInactive frameSignalType = iota + 1 + frameSignalTypeUnvoiced + frameSignalTypeVoiced + + frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1 + frameQuantizationOffsetTypeHigh +) + +var ( + // +----------+-----------------------------+ + // | VAD Flag | PDF | + // +----------+-----------------------------+ + // | Inactive | {26, 230, 0, 0, 0, 0}/256 | + // | | | + // | Active | {0, 0, 24, 74, 148, 10}/256 | + // +----------+-----------------------------+ + // + // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3 + icdfFrameTypeVADInactive = []uint{256, 26, 256} + icdfFrameTypeVADActive = []uint{256, 24, 98, 246, 256} ) // Decoder maintains the state needed to decode a stream @@ -22,24 +48,81 @@ func NewDecoder() *Decoder { } // Decode decodes many SILK subframes -func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (samples int, decoded []byte, err error) { +func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) { if nanoseconds != nanoseconds20Ms { - return 0, nil, errUnsupportedSilkFrameDuration + return nil, errUnsupportedSilkFrameDuration } else if isStereo { - return 0, nil, errUnsupportedSilkStereo + return nil, errUnsupportedSilkStereo } d.rangeDecoder.Init(in) + //The LP layer begins with two to eight header bits These consist of one + // Voice Activity Detection (VAD) bit per frame (up to 3), followed by a + // single flag indicating the presence of LBRR frames. + // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3 voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1 lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1 if lowBitRateRedundancy { - return 0, nil, errUnsupportedSilkLowBitrateRedundancy + return nil, errUnsupportedSilkLowBitrateRedundancy } + // Each SILK frame contains a single "frame type" symbol that jointly + // codes the signal type and quantization offset type of the + // corresponding frame. + // + // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3 + var frameTypeSymbol uint32 if voiceActivityDetected { - fmt.Println("VAD") + frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive) + } else { + frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive) } + // +------------+-------------+--------------------------+ + // | Frame Type | Signal Type | Quantization Offset Type | + // +------------+-------------+--------------------------+ + // | 0 | Inactive | Low | + // | | | | + // | 1 | Inactive | High | + // | | | | + // | 2 | Unvoiced | Low | + // | | | | + // | 3 | Unvoiced | High | + // | | | | + // | 4 | Voiced | Low | + // | | | | + // | 5 | Voiced | High | + // +------------+-------------+--------------------------+ + // + // https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3 + + signalType := frameSignalType(0) + quantizationOffsetType := frameQuantizationOffsetType(0) + + switch frameTypeSymbol { + case 0: + signalType = frameSignalTypeInactive + quantizationOffsetType = frameQuantizationOffsetTypeLow + case 1: + signalType = frameSignalTypeInactive + quantizationOffsetType = frameQuantizationOffsetTypeHigh + case 2: + signalType = frameSignalTypeUnvoiced + quantizationOffsetType = frameQuantizationOffsetTypeLow + case 3: + signalType = frameSignalTypeUnvoiced + quantizationOffsetType = frameQuantizationOffsetTypeHigh + case 4: + signalType = frameSignalTypeVoiced + quantizationOffsetType = frameQuantizationOffsetTypeLow + case 5: + signalType = frameSignalTypeVoiced + quantizationOffsetType = frameQuantizationOffsetTypeHigh + } + + fmt.Println(signalType) + fmt.Println(quantizationOffsetType) + return } diff --git a/table_of_contents_header.go b/table_of_contents_header.go index 98904cf..bdbcd4e 100644 --- a/table_of_contents_header.go +++ b/table_of_contents_header.go @@ -148,6 +148,8 @@ func (c configurationMode) String() string { return "Invalid" } +// See Configuration for mapping of mode to configuration numbers +// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) mode() configurationMode { switch { case c >= 0 && c <= 11: @@ -208,6 +210,8 @@ func (f frameDuration) nanoseconds() int { return 0 } +// See Configuration for mapping of frameDuration to configuration numbers +// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) frameDuration() frameDuration { switch c { case 16, 20, 24, 28: @@ -236,6 +240,8 @@ const ( BandwidthFullband ) +// See Configuration for mapping of bandwidth to configuration numbers +// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) bandwidth() Bandwidth { switch { case c <= 3: