//go:build !disable_format_aac && !disable_codec_libfdk_aac && cgo package aac import ( "bytes" "errors" "fmt" "git.gammaspectra.live/S.O.N.G/Kirika/audio" "git.gammaspectra.live/S.O.N.G/Kirika/audio/format" "git.gammaspectra.live/S.O.N.G/go-fdkaac/fdkaac" aacAdts "github.com/Eyevinn/mp4ff/aac" "github.com/Eyevinn/mp4ff/mp4" "golang.org/x/exp/slices" "io" "runtime" "time" "unsafe" ) type Format struct { } func NewFormat() Format { return Format{} } func (f Format) Name() string { return "aac" } func (f Format) DecoderDescription() string { return fmt.Sprintf("libfdk-aac %s (S.O.N.G/go-fdkaac)", fdkaac.EncoderVersion()) } func (f Format) EncoderDescription() string { return f.DecoderDescription() } func decodeFrame(decoder *fdkaac.AacDecoder, r io.Reader) ([]int16, error) { pcm, err := tryDecodeFrame(decoder) if err != nil { return nil, err } if pcm != nil { return pcm, err } header, _, err := aacAdts.DecodeADTSHeader(r) if err != nil { return nil, err } data := make([]byte, header.PayloadLength) if _, err = io.ReadFull(r, data); err != nil { return nil, err } var n int fullData := append(header.Encode(), data...) if n, err = decoder.Fill(fullData); n != 0 { return nil, errors.New("buffer under read") } if err != nil { return nil, err } return decodeFrame(decoder, r) } func tryDecodeFrame(decoder *fdkaac.AacDecoder) ([]int16, error) { pcm, err := decoder.Decode() if err != nil { return nil, err } if pcm != nil { defer runtime.KeepAlive(pcm) return slices.Clone(unsafe.Slice((*int16)(unsafe.Pointer(unsafe.SliceData(pcm))), len(pcm)/2)), nil } return nil, nil } func decodeFrameMP4(decoder *fdkaac.AacDecoder, demuxer *mp4Decoder) (result []int16, err error) { pcm, err := tryDecodeFrame(decoder) if err != nil { return nil, err } if pcm != nil { return pcm, err } samples := demuxer.Read() if samples == nil { return nil, io.EOF } var n int for _, sample := range samples { if n, err = decoder.Fill(sample); n != 0 { return nil, errors.New("buffer under read") } if err != nil { return nil, err } pcm, err = tryDecodeFrame(decoder) if pcm != nil { result = append(result, pcm...) } } return result, nil } func (f Format) Open(r io.ReadSeekCloser) (audio.Source, error) { decoder := fdkaac.NewAacDecoder() _, _, err := aacAdts.DecodeADTSHeader(r) if err == nil { //try ADTS _, err = r.Seek(0, io.SeekStart) if err != nil { return nil, err } err = decoder.InitAdts() if err != nil { return nil, err } buf, err := decodeFrame(decoder, r) if err != nil { _ = decoder.Close() return nil, err } source := audio.NewSource[int16](16, decoder.SampleRate(), decoder.NumChannels()) go func() { defer source.Close() defer decoder.Close() if len(buf) > 0 { source.IngestInt16(buf, 16) } for { buf, err = decodeFrame(decoder, r) if err != nil { return } if len(buf) > 0 { source.IngestInt16(buf, 16) } } }() return source, nil } else { return nil, fmt.Errorf("unsupported format mp4") _, err = r.Seek(0, io.SeekStart) if err != nil { return nil, err } mp4Demuxer, err := tryDecodeMP4(r) if err != nil { return nil, err } err = decoder.InitRaw(mp4Demuxer.cookie) if err != nil { return nil, err } buf, err := decodeFrameMP4(decoder, mp4Demuxer) if err != nil { _ = decoder.Close() return nil, err } source := audio.NewSource[int16](16, decoder.SampleRate(), decoder.NumChannels()) go func() { defer source.Close() defer decoder.Close() if len(buf) > 0 { source.IngestInt16(buf, 16) } for { buf, err = decodeFrameMP4(decoder, mp4Demuxer) if err != nil { return } if len(buf) > 0 { source.IngestInt16(buf, 16) } } }() return source, nil } } func (f Format) Encode(source audio.Source, writer io.WriteCloser, options map[string]interface{}) error { var bitrate = 128 * 1024 var codecMode = 0 //0 = LC, 1 = HE, 2 = HEv2 var afterburner = 1 var format = "adts" if options != nil { var val interface{} var ok bool var intVal int var int64Val int64 var strVal string var boolVal bool if val, ok = options["bitrate"]; ok { if strVal, ok = val.(string); ok { switch strVal { case "320k": bitrate = 320 * 1024 case "256k": bitrate = 256 * 1024 case "192k": bitrate = 192 * 1024 case "128k": bitrate = 128 * 1024 case "vbr1": bitrate = -1 options["mode"] = "hev2" codecMode = 2 case "vbr2": bitrate = -2 options["mode"] = "hev1" codecMode = 1 case "vbr3": bitrate = -3 options["mode"] = "lc" codecMode = 0 case "vbr4": bitrate = -4 options["mode"] = "lc" codecMode = 0 case "vbr5": bitrate = -5 options["mode"] = "lc" codecMode = 0 default: return fmt.Errorf("unknown setting bitrate=%s", strVal) } } else if intVal, ok = val.(int); ok { bitrate = intVal * 1024 } else if int64Val, ok = val.(int64); ok { bitrate = int(int64Val) * 1024 } } if val, ok = options["afterburner"]; ok { if boolVal, ok = val.(bool); ok { if boolVal { afterburner = 1 } else { afterburner = 0 } } } if val, ok = options["mode"]; ok { if strVal, ok = val.(string); ok { switch strVal { case "lc": codecMode = 0 case "he", "hev1": codecMode = 1 case "hev2": codecMode = 2 default: return fmt.Errorf("unknown setting mode=%s", strVal) } } } if val, ok = options["format"]; ok { if strVal, ok = val.(string); ok { format = strVal } } } muxingMode := fdkaac.MuxingModeADTS if format == "adts" { muxingMode = fdkaac.MuxingModeADTS } else if format == "mp4" { muxingMode = fdkaac.MuxingModeRAW } else if format == "adif" { muxingMode = fdkaac.MuxingModeADIF } else { return fmt.Errorf("unsupported format %s", format) } encoder := fdkaac.NewAacEncoder() if codecMode == 0 { err := encoder.InitLc(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner) if err != nil { return err } } else if codecMode == 1 { err := encoder.InitHE(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner) if err != nil { return err } } else { err := encoder.InitHEv2(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner) if err != nil { return err } } defer encoder.Close() frameSize := encoder.FrameSize() * encoder.Channels() encodeSource := func(source audio.Source, writeCallback func(frame []byte) error) error { buffer := make([]int16, 0, frameSize) for block := range source.ToInt16().GetBlocks() { buffer = append(buffer, block...) tempBuffer := buffer[:] for len(tempBuffer) >= frameSize { sl := tempBuffer[:frameSize] frameBuffer, err := encoder.Encode(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(sl))), len(sl)*2)) if err != nil { return err } if len(frameBuffer) > 0 { if err = writeCallback(frameBuffer); err != nil { return err } } tempBuffer = tempBuffer[frameSize:] runtime.KeepAlive(sl) } if len(tempBuffer) > 0 { copy(buffer, tempBuffer) } buffer = buffer[:len(tempBuffer)] } if len(buffer) > 0 { //pad buffer = append(buffer, make([]int16, frameSize-len(buffer))...) frameBuffer, err := encoder.Encode(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buffer))), len(buffer)*2)) if err != nil { return err } if len(frameBuffer) > 0 { if err = writeCallback(frameBuffer); err != nil { return err } } runtime.KeepAlive(buffer) } //Do flush for { frameBuffer, err := encoder.Flush() if err != nil { return err } if len(frameBuffer) > 0 { if err = writeCallback(frameBuffer); err != nil { return err } } else { break } } return nil } if format == "mp4" { init := mp4.CreateEmptyInit() init.AddEmptyTrack(uint32(source.GetSampleRate()), "audio", "en") trackId := init.Moov.Mvhd.NextTrackID - 1 trak := init.Moov.Trak objType := aacAdts.AAClc if codecMode == 1 { objType = aacAdts.HEAACv1 } else if codecMode == 2 { objType = aacAdts.HEAACv2 } { stsd := trak.Mdia.Minf.Stbl.Stsd asc := &aacAdts.AudioSpecificConfig{ ObjectType: byte(objType), ChannelConfiguration: byte(source.GetChannels()), SamplingFrequency: source.GetSampleRate(), ExtensionFrequency: 0, SBRPresentFlag: false, PSPresentFlag: false, } switch objType { case aacAdts.HEAACv1: asc.ExtensionFrequency = 2 * source.GetSampleRate() asc.SBRPresentFlag = true case aacAdts.HEAACv2: asc.ExtensionFrequency = 2 * source.GetSampleRate() asc.SBRPresentFlag = true asc.ChannelConfiguration = 1 asc.PSPresentFlag = true } buf := &bytes.Buffer{} err := asc.Encode(buf) if err != nil { return err } ascBytes := buf.Bytes() esds := mp4.CreateEsdsBox(ascBytes) mp4a := mp4.CreateAudioSampleEntryBox("mp4a", uint16(asc.ChannelConfiguration), 16, uint16(source.GetSampleRate()), esds) stsd.AddChild(mp4a) } _ = init.Encode(writer) var seqNumber uint32 var packetsWritten uint64 outputBuffer := make([][]byte, 0, 32) segmentDuration := time.Millisecond * 1000 outputSegment := func() { seg := mp4.NewMediaSegment() frag, _ := mp4.CreateFragment(seqNumber, trackId) seg.AddFragment(frag) for _, b := range outputBuffer { _ = frag.AddFullSampleToTrack(mp4.FullSample{ Sample: mp4.Sample{ Dur: uint32(frameSize), Size: uint32(len(b)), }, DecodeTime: uint64(frameSize) * packetsWritten, Data: b, }, trackId) packetsWritten++ } _ = seg.Encode(writer) seqNumber++ outputBuffer = make([][]byte, 0, 32) } if err := encodeSource(source, func(frame []byte) error { outputBuffer = append(outputBuffer, frame) if time.Duration(float64(time.Second)*(float64(frameSize*len(outputBuffer))/float64(source.GetSampleRate()))) >= segmentDuration { outputSegment() } return nil }); err != nil { return err } if len(outputBuffer) > 0 { outputSegment() } } else { if err := encodeSource(source, func(frame []byte) error { _, err := writer.Write(frame) if err != nil { return err } return nil }); err != nil { return err } } return nil } func (f Format) Identify(_ [format.IdentifyPeekBytes]byte, extension string) bool { //TODO: add .m4a/mp4 detection return extension == "aac" || extension == "adts" }