// Package frame implements access to FLAC audio frames. // // A brief introduction of the FLAC audio format [1] follows. FLAC encoders // divide the audio stream into blocks through a process called blocking [2]. A // block contains the unencoded audio samples from all channels during a short // period of time. Each audio block is divided into subblocks, one per channel. // // There is often a correlation between the left and right channel of stereo // audio. Using inter-channel decorrelation [3] it is possible to store only one // of the channels and the difference between the channels, or store the average // of the channels and their difference. An encoder decorrelates audio samples // as follows: // // mid = (left + right)/2 // average of the channels // side = left - right // difference between the channels // // The blocks are encoded using a variety of prediction methods [4][5] and // stored in frames. Blocks and subblocks contains unencoded audio samples while // frames and subframes contain encoded audio samples. A FLAC stream contains // one or more audio frames. // // [1]: https://www.xiph.org/flac/format.html#architecture // [2]: https://www.xiph.org/flac/format.html#blocking // [3]: https://www.xiph.org/flac/format.html#interchannel // [4]: https://www.xiph.org/flac/format.html#prediction // [5]: https://godoc.org/git.gammaspectra.live/S.O.N.G/flacgo/frame#Pred package frame import ( "encoding/binary" "errors" "fmt" "hash" "io" "log" "git.gammaspectra.live/S.O.N.G/flacgo/internal/bits" "git.gammaspectra.live/S.O.N.G/flacgo/internal/hashutil" "git.gammaspectra.live/S.O.N.G/flacgo/internal/hashutil/crc16" "git.gammaspectra.live/S.O.N.G/flacgo/internal/hashutil/crc8" "git.gammaspectra.live/S.O.N.G/flacgo/internal/utf8" ) // A Frame contains the header and subframes of an audio frame. It holds the // encoded samples from a block (a part) of the audio stream. Each subframe // holding the samples from one of its channel. // // ref: https://www.xiph.org/flac/format.html#frame type Frame struct { // Audio frame header. Header // One subframe per channel, containing encoded audio samples. Subframes []*Subframe // CRC-16 hash sum, calculated by read operations on hr. crc hashutil.Hash16 // A bit reader, wrapping read operations to hr. br *bits.Reader // A CRC-16 hash reader, wrapping read operations to r. hr io.Reader // Underlying io.Reader. r io.Reader } // New creates a new Frame for accessing the audio samples of r. It reads and // parses an audio frame header. It returns io.EOF to signal a graceful end of // FLAC stream. // // Call Frame.Parse to parse the audio samples of its subframes. func New(r io.Reader) (frame *Frame, err error) { // Create a new CRC-16 hash reader which adds the data from all read // operations to a running hash. crc := crc16.NewIBM() hr := io.TeeReader(r, crc) // Parse frame header. frame = &Frame{crc: crc, hr: hr, r: r} err = frame.parseHeader() return frame, err } // Parse reads and parses the header, and the audio samples from each subframe // of a frame. If the samples are inter-channel decorrelated between the // subframes, it correlates them. It returns io.EOF to signal a graceful end of // FLAC stream. // // ref: https://www.xiph.org/flac/format.html#interchannel func Parse(r io.Reader) (frame *Frame, err error) { // Parse frame header. frame, err = New(r) if err != nil { return frame, err } // Parse subframes. err = frame.Parse() return frame, err } // Parse reads and parses the audio samples from each subframe of the frame. If // the samples are inter-channel decorrelated between the subframes, it // correlates them. // // ref: https://www.xiph.org/flac/format.html#interchannel func (frame *Frame) Parse() error { // Parse subframes. frame.Subframes = make([]*Subframe, frame.Channels.Count()) var err error for channel := range frame.Subframes { // The side channel requires an extra bit per sample when using // inter-channel decorrelation. bps := uint(frame.BitsPerSample) switch frame.Channels { case ChannelsSideRight: // channel 0 is the side channel. if channel == 0 { bps++ } case ChannelsLeftSide, ChannelsMidSide: // channel 1 is the side channel. if channel == 1 { bps++ } } // Parse subframe. frame.Subframes[channel], err = frame.parseSubframe(frame.br, bps) if err != nil { return err } } // Inter-channel correlation of subframe samples. frame.correlate() // 2 bytes: CRC-16 checksum. var want uint16 if err = binary.Read(frame.r, binary.BigEndian, &want); err != nil { return unexpected(err) } got := frame.crc.Sum16() if got != want { return fmt.Errorf("frame.Frame.Parse: CRC-16 checksum mismatch; expected 0x%04X, got 0x%04X", want, got) } return nil } // Hash adds the decoded audio samples of the frame to a running MD5 hash. It // can be used in conjunction with StreamInfo.MD5sum to verify the integrity of // the decoded audio samples. // // Note: The audio samples of the frame must be decoded before calling Hash. func (frame *Frame) Hash(md5sum hash.Hash) { // Write decoded samples to a running MD5 hash. bps := frame.BitsPerSample var buf [3]byte for i := 0; i < int(frame.BlockSize); i++ { for _, subframe := range frame.Subframes { sample := subframe.Samples[i] switch bps { case 8: buf[0] = uint8(sample) md5sum.Write(buf[:1]) case 16: buf[0] = uint8(sample) buf[1] = uint8(sample >> 8) md5sum.Write(buf[:2]) case 24: buf[0] = uint8(sample) buf[1] = uint8(sample >> 8) buf[2] = uint8(sample >> 16) md5sum.Write(buf[:]) default: log.Printf("frame.Frame.Hash: support for %d-bit sample size not yet implemented", bps) } } } } // A Header contains the basic properties of an audio frame, such as its sample // rate and channel count. To facilitate random access decoding each frame // header starts with a sync-code. This allows the decoder to synchronize and // locate the start of a frame header. // // ref: https://www.xiph.org/flac/format.html#frame_header type Header struct { // Specifies if the block size is fixed or variable. HasFixedBlockSize bool // Block size in inter-channel samples, i.e. the number of audio samples in // each subframe. BlockSize uint16 // Sample rate in Hz; a 0 value implies unknown, get sample rate from // StreamInfo. SampleRate uint32 // Specifies the number of channels (subframes) that exist in the frame, // their order and possible inter-channel decorrelation. Channels Channels // Sample size in bits-per-sample; a 0 value implies unknown, get sample size // from StreamInfo. BitsPerSample uint8 // Specifies the frame number if the block size is fixed, and the first // sample number in the frame otherwise. When using fixed block size, the // first sample number in the frame can be derived by multiplying the frame // number with the block size (in samples). Num uint64 } // Errors returned by Frame.parseHeader. var ( ErrInvalidSync = errors.New("frame.Frame.parseHeader: invalid sync-code") ) // parseHeader reads and parses the header of an audio frame. func (frame *Frame) parseHeader() error { // Create a new CRC-8 hash reader which adds the data from all read // operations to a running hash. h := crc8.NewATM() hr := io.TeeReader(frame.hr, h) // Create bit reader. br := bits.NewReader(hr) frame.br = br // 14 bits: sync-code (11111111111110) x, err := br.Read(14) if err != nil { // This is the only place an audio frame may return io.EOF, which signals // a graceful end of a FLAC stream. return err } if x != 0x3FFE { return ErrInvalidSync } // 1 bit: reserved. x, err = br.Read(1) if err != nil { return unexpected(err) } if x != 0 { return errors.New("frame.Frame.parseHeader: non-zero reserved value") } // 1 bit: HasFixedBlockSize. x, err = br.Read(1) if err != nil { return unexpected(err) } if x == 0 { frame.HasFixedBlockSize = true } // 4 bits: BlockSize. The block size parsing is simplified by deferring it to // the end of the header. blockSize, err := br.Read(4) if err != nil { return unexpected(err) } // 4 bits: SampleRate. The sample rate parsing is simplified by deferring it // to the end of the header. sampleRate, err := br.Read(4) if err != nil { return unexpected(err) } // Parse channels. if err := frame.parseChannels(br); err != nil { return err } // Parse bits per sample. if err := frame.parseBitsPerSample(br); err != nil { return err } // 1 bit: reserved. x, err = br.Read(1) if err != nil { return unexpected(err) } if x != 0 { return errors.New("frame.Frame.parseHeader: non-zero reserved value") } // if (fixed block size) // 1-6 bytes: UTF-8 encoded frame number. // else // 1-7 bytes: UTF-8 encoded sample number. frame.Num, err = utf8.Decode(hr) if err != nil { return unexpected(err) } // Parse block size. if err := frame.parseBlockSize(br, blockSize); err != nil { return err } // Parse sample rate. if err := frame.parseSampleRate(br, sampleRate); err != nil { return err } // 1 byte: CRC-8 checksum. var want uint8 if err = binary.Read(frame.hr, binary.BigEndian, &want); err != nil { return unexpected(err) } got := h.Sum8() if want != got { return fmt.Errorf("frame.Frame.parseHeader: CRC-8 checksum mismatch; expected 0x%02X, got 0x%02X", want, got) } return nil } // parseBitsPerSample parses the bits per sample of the header. func (frame *Frame) parseBitsPerSample(br *bits.Reader) error { // 3 bits: BitsPerSample. x, err := br.Read(3) if err != nil { return unexpected(err) } // The 3 bits are used to specify the sample size as follows: // 000: unknown sample size; get from StreamInfo. // 001: 8 bits-per-sample. // 010: 12 bits-per-sample. // 011: reserved. // 100: 16 bits-per-sample. // 101: 20 bits-per-sample. // 110: 24 bits-per-sample. // 111: reserved. switch x { case 0x0: // 000: unknown bits-per-sample; get from StreamInfo. case 0x1: // 001: 8 bits-per-sample. frame.BitsPerSample = 8 case 0x2: // 010: 12 bits-per-sample. frame.BitsPerSample = 12 // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with %d bits-per-sample. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.BitsPerSample) case 0x4: // 100: 16 bits-per-sample. frame.BitsPerSample = 16 case 0x5: // 101: 20 bits-per-sample. frame.BitsPerSample = 20 // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with %d bits-per-sample. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.BitsPerSample) case 0x6: // 110: 24 bits-per-sample. frame.BitsPerSample = 24 default: // 011: reserved. // 111: reserved. return fmt.Errorf("frame.Frame.parseHeader: reserved sample size bit pattern (%03b)", x) } return nil } // parseChannels parses the channels of the header. func (frame *Frame) parseChannels(br *bits.Reader) error { // 4 bits: Channels. // // The 4 bits are used to specify the channels as follows: // 0000: (1 channel) mono. // 0001: (2 channels) left, right. // 0010: (3 channels) left, right, center. // 0011: (4 channels) left, right, left surround, right surround. // 0100: (5 channels) left, right, center, left surround, right surround. // 0101: (6 channels) left, right, center, LFE, left surround, right surround. // 0110: (7 channels) left, right, center, LFE, center surround, side left, side right. // 0111: (8 channels) left, right, center, LFE, left surround, right surround, side left, side right. // 1000: (2 channels) left, side; using inter-channel decorrelation. // 1001: (2 channels) side, right; using inter-channel decorrelation. // 1010: (2 channels) mid, side; using inter-channel decorrelation. // 1011: reserved. // 1100: reserved. // 1101: reserved. // 1111: reserved. x, err := br.Read(4) if err != nil { return unexpected(err) } if x >= 0xB { return fmt.Errorf("frame.Frame.parseHeader: reserved channels bit pattern (%04b)", x) } frame.Channels = Channels(x) return nil } // parseBlockSize parses the block size of the header. func (frame *Frame) parseBlockSize(br *bits.Reader, blockSize uint64) error { // The 4 bits of n are used to specify the block size as follows: // 0000: reserved. // 0001: 192 samples. // 0010-0101: 576 * 2^(n-2) samples. // 0110: get 8 bit (block size)-1 from the end of the header. // 0111: get 16 bit (block size)-1 from the end of the header. // 1000-1111: 256 * 2^(n-8) samples. n := blockSize switch { case n == 0x0: // 0000: reserved. return errors.New("frame.Frame.parseHeader: reserved block size bit pattern (0000)") case n == 0x1: // 0001: 192 samples. frame.BlockSize = 192 // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with block size %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.BlockSize) case n >= 0x2 && n <= 0x5: // 0010-0101: 576 * 2^(n-2) samples. frame.BlockSize = 576 * (1 << (n - 2)) case n == 0x6: // 0110: get 8 bit (block size)-1 from the end of the header. x, err := br.Read(8) if err != nil { return unexpected(err) } frame.BlockSize = uint16(x + 1) case n == 0x7: // 0111: get 16 bit (block size)-1 from the end of the header. x, err := br.Read(16) if err != nil { return unexpected(err) } frame.BlockSize = uint16(x + 1) default: // 1000-1111: 256 * 2^(n-8) samples. frame.BlockSize = 256 * (1 << (n - 8)) } return nil } // parseSampleRate parses the sample rate of the header. func (frame *Frame) parseSampleRate(br *bits.Reader, sampleRate uint64) error { // The 4 bits are used to specify the sample rate as follows: // 0000: unknown sample rate; get from StreamInfo. // 0001: 88.2 kHz. // 0010: 176.4 kHz. // 0011: 192 kHz. // 0100: 8 kHz. // 0101: 16 kHz. // 0110: 22.05 kHz. // 0111: 24 kHz. // 1000: 32 kHz. // 1001: 44.1 kHz. // 1010: 48 kHz. // 1011: 96 kHz. // 1100: get 8 bit sample rate (in kHz) from the end of the header. // 1101: get 16 bit sample rate (in Hz) from the end of the header. // 1110: get 16 bit sample rate (in daHz) from the end of the header. // 1111: invalid. switch sampleRate { case 0x0: // 0000: unknown sample rate; get from StreamInfo. case 0x1: // 0001: 88.2 kHz. frame.SampleRate = 88200 case 0x2: // 0010: 176.4 kHz. frame.SampleRate = 176400 // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate) case 0x3: // 0011: 192 kHz. frame.SampleRate = 192000 case 0x4: // 0100: 8 kHz. frame.SampleRate = 8000 case 0x5: // 0101: 16 kHz. frame.SampleRate = 16000 case 0x6: // 0110: 22.05 kHz. frame.SampleRate = 22050 case 0x7: // 0111: 24 kHz. frame.SampleRate = 24000 // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate) case 0x8: // 1000: 32 kHz. frame.SampleRate = 32000 case 0x9: // 1001: 44.1 kHz. frame.SampleRate = 44100 case 0xA: // 1010: 48 kHz. frame.SampleRate = 48000 case 0xB: // 1011: 96 kHz. frame.SampleRate = 96000 case 0xC: // 1100: get 8 bit sample rate (in kHz) from the end of the header. x, err := br.Read(8) if err != nil { return unexpected(err) } frame.SampleRate = uint32(x * 1000) // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate) case 0xD: // 1101: get 16 bit sample rate (in Hz) from the end of the header. x, err := br.Read(16) if err != nil { return unexpected(err) } frame.SampleRate = uint32(x) case 0xE: // 1110: get 16 bit sample rate (in daHz) from the end of the header. x, err := br.Read(16) if err != nil { return unexpected(err) } frame.SampleRate = uint32(x * 10) // TODO(u): Remove log message when the test cases have been extended. log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate) default: // 1111: invalid. return errors.New("frame.Frame.parseHeader: invalid sample rate bit pattern (1111)") } return nil } // Channels specifies the number of channels (subframes) that exist in a frame, // their order and possible inter-channel decorrelation. type Channels uint8 // Channel assignments. The following abbreviations are used: // // C: center (directly in front) // R: right (standard stereo) // Sr: side right (directly to the right) // Rs: right surround (back right) // Cs: center surround (rear center) // Ls: left surround (back left) // Sl: side left (directly to the left) // L: left (standard stereo) // Lfe: low-frequency effect (placed according to room acoustics) // // The first 6 channel constants follow the SMPTE/ITU-R channel order: // // L R C Lfe Ls Rs const ( ChannelsMono Channels = iota // 1 channel: mono. ChannelsLR // 2 channels: left, right. ChannelsLRC // 3 channels: left, right, center. ChannelsLRLsRs // 4 channels: left, right, left surround, right surround. ChannelsLRCLsRs // 5 channels: left, right, center, left surround, right surround. ChannelsLRCLfeLsRs // 6 channels: left, right, center, LFE, left surround, right surround. ChannelsLRCLfeCsSlSr // 7 channels: left, right, center, LFE, center surround, side left, side right. ChannelsLRCLfeLsRsSlSr // 8 channels: left, right, center, LFE, left surround, right surround, side left, side right. ChannelsLeftSide // 2 channels: left, side; using inter-channel decorrelation. ChannelsSideRight // 2 channels: side, right; using inter-channel decorrelation. ChannelsMidSide // 2 channels: mid, side; using inter-channel decorrelation. ) // nChannels specifies the number of channels used by each channel assignment. var nChannels = [...]int{ ChannelsMono: 1, ChannelsLR: 2, ChannelsLRC: 3, ChannelsLRLsRs: 4, ChannelsLRCLsRs: 5, ChannelsLRCLfeLsRs: 6, ChannelsLRCLfeCsSlSr: 7, ChannelsLRCLfeLsRsSlSr: 8, ChannelsLeftSide: 2, ChannelsSideRight: 2, ChannelsMidSide: 2, } // Count returns the number of channels (subframes) used by the provided channel // assignment. func (channels Channels) Count() int { return nChannels[channels] } // correlate reverts any inter-channel decorrelation between the samples of the // subframes. // // An encoder decorrelates audio samples as follows: // // mid = (left + right)/2 // side = left - right func (frame *Frame) correlate() { switch frame.Channels { case ChannelsLeftSide: // 2 channels: left, side; using inter-channel decorrelation. left := frame.Subframes[0].Samples side := frame.Subframes[1].Samples for i := range side { // right = left - side side[i] = left[i] - side[i] } case ChannelsSideRight: // 2 channels: side, right; using inter-channel decorrelation. side := frame.Subframes[0].Samples right := frame.Subframes[1].Samples // left = right + side for i := range side { side[i] += right[i] } case ChannelsMidSide: // 2 channels: mid, side; using inter-channel decorrelation. mid := frame.Subframes[0].Samples side := frame.Subframes[1].Samples for i := range side { // left = (2*mid + side)/2 // right = (2*mid - side)/2 m := mid[i] s := side[i] m *= 2 // Notice that the integer division in mid = (left + right)/2 discards // the least significant bit. It can be reconstructed however, since a // sum A+B and a difference A-B has the same least significant bit. // // ref: Data Compression: The Complete Reference (ch. 7, Decorrelation) m |= s & 1 mid[i] = (m + s) / 2 side[i] = (m - s) / 2 } } } // SampleNumber returns the first sample number contained within the frame. func (frame *Frame) SampleNumber() uint64 { if frame.HasFixedBlockSize { return frame.Num * uint64(frame.BlockSize) } return frame.Num } // unexpected returns io.ErrUnexpectedEOF if err is io.EOF, and returns err // otherwise. func unexpected(err error) error { if err == io.EOF { return io.ErrUnexpectedEOF } return err }