package opus type ( // The table-of-contents (TOC) header that signals which of the // various modes and configurations a given packet uses. It is composed // of a configuration number, "config", a stereo flag, "s", and a frame // count code, "c", arranged as illustrated in Figure 1 // // 0 1 2 3 4 5 6 7 // +-+-+-+-+-+-+-+-+ // | config |s| c | // +-+-+-+-+-+-+-+-+ // // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 tableOfContentsHeader byte // Configuration numbers in each range (e.g., 0...3 for NB SILK- // only) correspond to the various choices of frame size, in the same // order. For example, configuration 0 has a 10 ms frame size and // configuration 3 has a 60 ms frame size. // +-----------------------+-----------+-----------+-------------------+ // | Configuration | Mode | Bandwidth | Frame Sizes | // | Number(s) | | | | // +-----------------------+-----------+-----------+-------------------+ // | 0...3 | SILK-only | NB | 10, 20, 40, 60 ms | // | | | | | // | 4...7 | SILK-only | MB | 10, 20, 40, 60 ms | // | | | | | // | 8...11 | SILK-only | WB | 10, 20, 40, 60 ms | // | | | | | // | 12...13 | Hybrid | SWB | 10, 20 ms | // | | | | | // | 14...15 | Hybrid | FB | 10, 20 ms | // | | | | | // | 16...19 | CELT-only | NB | 2.5, 5, 10, 20 ms | // | | | | | // | 20...23 | CELT-only | WB | 2.5, 5, 10, 20 ms | // | | | | | // | 24...27 | CELT-only | SWB | 2.5, 5, 10, 20 ms | // | | | | | // | 28...31 | CELT-only | FB | 2.5, 5, 10, 20 ms | // +-----------------------+-----------+-----------+-------------------+ // // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 Configuration byte // As described, the LP (SILK) layer and MDCT (CELT) layer can be // combined in three possible operating modes: // 1. A SILK-only mode for use in low bitrate connections with an audio // bandwidth of WB or less, // // 2. A Hybrid (SILK+CELT) mode for SWB or FB speech at medium // bitrates, and // // 3. A CELT-only mode for very low delay speech transmission as well // as music transmission (NB to FB). // // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 configurationMode byte // Opus can encode frames of 2.5, 5, 10, 20, 40, or 60 ms. It can also // combine multiple frames into packets of up to 120 ms. For real-time // applications, sending fewer packets per second reduces the bitrate, // since it reduces the overhead from IP, UDP, and RTP headers. // However, it increases latency and sensitivity to packet losses, as // losing one packet constitutes a loss of a bigger chunk of audio. // Increasing the frame duration also slightly improves coding // efficiency, but the gain becomes small for frame sizes above 20 ms. // For this reason, 20 ms frames are a good choice for most // // https://datatracker.ietf.org/doc/html/rfc6716#section-2.1.4 frameDuration byte // The Bandwidth the Opus codec scales from 6 kbit/s narrowband mono speech to // 510 kbit/s fullband stereo music, with algorithmic delays ranging // from 5 ms to 65.2 ms. At any given time, either the LP layer, the // MDCT layer, or both, may be active. It can seamlessly switch between // all of its various operating modes, giving it a great deal of // flexibility to adapt to varying content and network conditions // without renegotiating the current session. The codec allows input // and output of various audio bandwidths, defined as follows: // +----------------------+-----------------+-------------------------+ // | Abbreviation | Audio Bandwidth | Sample Rate (Effective) | // +----------------------+-----------------+-------------------------+ // | NB (narrowband) | 4 kHz | 8 kHz | // | | | | // | MB (medium-band) | 6 kHz | 12 kHz | // | | | | // | WB (wideband) | 8 kHz | 16 kHz | // | | | | // | SWB (super-wideband) | 12 kHz | 24 kHz | // | | | | // | FB (fullband) | 20 kHz (*) | 48 kHz | // +----------------------+-----------------+-------------------------+ // // https://datatracker.ietf.org/doc/html/rfc6716#section-2 Bandwidth byte // The remaining two bits of the TOC byte, labeled "c", code the number // of frames per packet (codes 0 to 3) as follows: // o 0: 1 frame in the packet // o 1: 2 frames in the packet, each with equal compressed size // o 2: 2 frames in the packet, with different compressed sizes // o 3: an arbitrary number of frames in the packet // // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 frameCode byte ) func (t tableOfContentsHeader) configuration() Configuration { return Configuration(t >> 3) } func (t tableOfContentsHeader) isStereo() bool { return (t & 0b00000100) != 0 } const ( frameCodeOneFrame frameCode = 0 frameCodeTwoEqualFrames = 1 frameCodeTwoDifferentFrames = 2 frameCodeArbitraryFrames = 3 ) func (t tableOfContentsHeader) frameCode() frameCode { return frameCode(t & 0b00000011) } const ( configurationModeSilkOnly configurationMode = iota + 1 configurationModeCELTOnly configurationModeHybrid ) func (c configurationMode) String() string { switch c { case configurationModeSilkOnly: return "Silk-only" case configurationModeCELTOnly: return "CELT-only" case configurationModeHybrid: return "Hybrid" } return "Invalid" } // See Configuration for mapping of mode to configuration numbers // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) mode() configurationMode { switch { case c >= 0 && c <= 11: return configurationModeSilkOnly case c >= 12 && c <= 15: return configurationModeHybrid case c >= 16 && c <= 31: return configurationModeCELTOnly default: return 0 } } const ( frameDuration2500us frameDuration = iota + 1 frameDuration5ms frameDuration10ms frameDuration20ms frameDuration40ms frameDuration60ms ) func (f frameDuration) String() string { switch f { case frameDuration2500us: return "2.5ms" case frameDuration5ms: return "5ms" case frameDuration10ms: return "10ms" case frameDuration20ms: return "20ms" case frameDuration40ms: return "40ms" case frameDuration60ms: return "60ms" } return "Invalid" } func (f frameDuration) nanoseconds() int { switch f { case frameDuration2500us: return 2500 case frameDuration5ms: return 5000000 case frameDuration10ms: return 10000000 case frameDuration20ms: return 20000000 case frameDuration40ms: return 40000000 case frameDuration60ms: return 60000000 } return 0 } // See Configuration for mapping of frameDuration to configuration numbers // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) frameDuration() frameDuration { switch c { case 16, 20, 24, 28: return frameDuration2500us case 17, 21, 25, 29: return frameDuration5ms case 0, 4, 8, 12, 14, 18, 22, 26, 30: return frameDuration10ms case 1, 5, 9, 13, 15, 19, 23, 27, 31: return frameDuration20ms case 2, 6: return frameDuration40ms case 3, 7, 11: return frameDuration60ms } return 0 } // Bandwidth constants const ( BandwidthNarrowband Bandwidth = iota + 1 BandwidthMediumband BandwidthWideband BandwidthSuperwideband BandwidthFullband ) // See Configuration for mapping of bandwidth to configuration numbers // https://datatracker.ietf.org/doc/html/rfc6716#section-3.1 func (c Configuration) bandwidth() Bandwidth { switch { case c <= 3: return BandwidthNarrowband case c <= 7: return BandwidthMediumband case c <= 11: return BandwidthWideband case c <= 13: return BandwidthSuperwideband case c <= 15: return BandwidthFullband case c <= 19: return BandwidthNarrowband case c <= 23: return BandwidthWideband case c <= 27: return BandwidthSuperwideband case c <= 31: return BandwidthFullband } return 0 } func (b Bandwidth) String() string { switch b { case BandwidthNarrowband: return "Narrowband" case BandwidthMediumband: return "Mediumband" case BandwidthWideband: return "Wideband" case BandwidthSuperwideband: return "Superwideband" case BandwidthFullband: return "Fullband" } return "Invalid" } // SampleRate returns the effective SampleRate for a given bandwidth func (b Bandwidth) SampleRate() int { switch b { case BandwidthNarrowband: return 8000 case BandwidthMediumband: return 12000 case BandwidthWideband: return 16000 case BandwidthSuperwideband: return 24000 case BandwidthFullband: return 48000 } return 0 } // The TOC byte is followed by a byte encoding the number of frames in // the packet in bits 2 to 7 (marked "M" in Figure 5), with bit 1 indicating // whether or not Opus padding is inserted (marked "p" in Figure 5), and bit 0 // indicating VBR (marked "v" in Figure 5). M MUST NOT be zero, and the audio // duration contained within a packet MUST NOT exceed 120 ms [R5]. This // limits the maximum frame count for any frame size to 48 (for 2.5 ms // frames), with lower limits for longer frame sizes. Figure 5 // illustrates the layout of the frame count byte. // // 0 // 0 1 2 3 4 5 6 7 // +-+-+-+-+-+-+-+-+ // |v|p| M | // +-+-+-+-+-+-+-+-+ // // Figure 5: The frame count byte func parseFrameCountByte(in byte) (isVBR bool, hasPadding bool, frameCount byte) { isVBR = (in & 0b10000000) == 1 hasPadding = (in & 0b01000000) == 1 frameCount = byte(in & 0b00111111) return }