Continue work on Silk decoder

Start to process Silk frame
2022-07-16 00:31:59 -04:00 · 2022-07-16 00:31:59 -04:00 · e539ceec0f
parent cb80e84c4e
commit e539ceec0f
3 changed files with 95 additions and 6 deletions
--- a/decoder.go
+++ b/decoder.go
@ -38,7 +38,7 @@ func (d *Decoder) Decode(in []byte) (bandwidth Bandwidth, isStereo bool, frames
 	}

 	for _, encodedFrame := range encodedFrames {
-		_, decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
+		decoded, err := d.silkDecoder.Decode(encodedFrame, tocHeader.isStereo(), cfg.frameDuration().nanoseconds())
 		if err != nil {
 			return 0, false, nil, err
 		}
--- a/internal/silk/decoder.go
+++ b/internal/silk/decoder.go
@ -6,8 +6,34 @@ import (
 	"github.com/pion/opus/internal/rangecoding"
 )

+type (
+	frameSignalType             byte
+	frameQuantizationOffsetType byte
+)
+
 const (
 	nanoseconds20Ms = 20000000
+
+	frameSignalTypeInactive frameSignalType = iota + 1
+	frameSignalTypeUnvoiced
+	frameSignalTypeVoiced
+
+	frameQuantizationOffsetTypeLow frameQuantizationOffsetType = iota + 1
+	frameQuantizationOffsetTypeHigh
+)
+
+var (
+	// +----------+-----------------------------+
+	// | VAD Flag | PDF                         |
+	// +----------+-----------------------------+
+	// | Inactive | {26, 230, 0, 0, 0, 0}/256   |
+	// |          |                             |
+	// | Active   | {0, 0, 24, 74, 148, 10}/256 |
+	// +----------+-----------------------------+
+	//
+	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
+	icdfFrameTypeVADInactive = []uint{256, 26, 256}
+	icdfFrameTypeVADActive   = []uint{256, 24, 98, 246, 256}
 )

 // Decoder maintains the state needed to decode a stream
@ -22,24 +48,81 @@ func NewDecoder() *Decoder {
 }

 // Decode decodes many SILK subframes
-func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (samples int, decoded []byte, err error) {
+func (d *Decoder) Decode(in []byte, isStereo bool, nanoseconds int) (decoded []byte, err error) {
 	if nanoseconds != nanoseconds20Ms {
-		return 0, nil, errUnsupportedSilkFrameDuration
+		return nil, errUnsupportedSilkFrameDuration
 	} else if isStereo {
-		return 0, nil, errUnsupportedSilkStereo
+		return nil, errUnsupportedSilkStereo
 	}

 	d.rangeDecoder.Init(in)

+	//The LP layer begins with two to eight header bits These consist of one
+	// Voice Activity Detection (VAD) bit per frame (up to 3), followed by a
+	// single flag indicating the presence of LBRR frames.
+	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.3
 	voiceActivityDetected := d.rangeDecoder.DecodeSymbolLogP(1) == 1
 	lowBitRateRedundancy := d.rangeDecoder.DecodeSymbolLogP(1) == 1
 	if lowBitRateRedundancy {
-		return 0, nil, errUnsupportedSilkLowBitrateRedundancy
+		return nil, errUnsupportedSilkLowBitrateRedundancy
 	}

+	// Each SILK frame contains a single "frame type" symbol that jointly
+	// codes the signal type and quantization offset type of the
+	// corresponding frame.
+	//
+	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
+	var frameTypeSymbol uint32
 	if voiceActivityDetected {
-		fmt.Println("VAD")
+		frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADActive)
+	} else {
+		frameTypeSymbol = d.rangeDecoder.DecodeSymbolWithICDF(icdfFrameTypeVADInactive)
 	}

+	//   +------------+-------------+--------------------------+
+	// | Frame Type | Signal Type | Quantization Offset Type |
+	// +------------+-------------+--------------------------+
+	// | 0          | Inactive    |                      Low |
+	// |            |             |                          |
+	// | 1          | Inactive    |                     High |
+	// |            |             |                          |
+	// | 2          | Unvoiced    |                      Low |
+	// |            |             |                          |
+	// | 3          | Unvoiced    |                     High |
+	// |            |             |                          |
+	// | 4          | Voiced      |                      Low |
+	// |            |             |                          |
+	// | 5          | Voiced      |                     High |
+	// +------------+-------------+--------------------------+
+	//
+	// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.3
+
+	signalType := frameSignalType(0)
+	quantizationOffsetType := frameQuantizationOffsetType(0)
+
+	switch frameTypeSymbol {
+	case 0:
+		signalType = frameSignalTypeInactive
+		quantizationOffsetType = frameQuantizationOffsetTypeLow
+	case 1:
+		signalType = frameSignalTypeInactive
+		quantizationOffsetType = frameQuantizationOffsetTypeHigh
+	case 2:
+		signalType = frameSignalTypeUnvoiced
+		quantizationOffsetType = frameQuantizationOffsetTypeLow
+	case 3:
+		signalType = frameSignalTypeUnvoiced
+		quantizationOffsetType = frameQuantizationOffsetTypeHigh
+	case 4:
+		signalType = frameSignalTypeVoiced
+		quantizationOffsetType = frameQuantizationOffsetTypeLow
+	case 5:
+		signalType = frameSignalTypeVoiced
+		quantizationOffsetType = frameQuantizationOffsetTypeHigh
+	}
+
+	fmt.Println(signalType)
+	fmt.Println(quantizationOffsetType)
+
 	return
 }
--- a/table_of_contents_header.go
+++ b/table_of_contents_header.go
@ -148,6 +148,8 @@ func (c configurationMode) String() string {
 	return "Invalid"
 }

+// See Configuration for mapping of mode to configuration numbers
+// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
 func (c Configuration) mode() configurationMode {
 	switch {
 	case c >= 0 && c <= 11:
@ -208,6 +210,8 @@ func (f frameDuration) nanoseconds() int {
 	return 0
 }

+// See Configuration for mapping of frameDuration to configuration numbers
+// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
 func (c Configuration) frameDuration() frameDuration {
 	switch c {
 	case 16, 20, 24, 28:
@ -236,6 +240,8 @@ const (
 	BandwidthFullband
 )

+// See Configuration for mapping of bandwidth to configuration numbers
+// https://datatracker.ietf.org/doc/html/rfc6716#section-3.1
 func (c Configuration) bandwidth() Bandwidth {
 	switch {
 	case c <= 3: