Kirika/audio/format/aac/libfdk-aac.go
DataHoarder 514a88aec1
Some checks failed
continuous-integration/drone/push Build is failing
Update to Go 1.20
2023-04-09 13:10:30 +02:00

511 lines
10 KiB
Go

//go:build !disable_format_aac && !disable_codec_libfdk_aac && cgo
package aac
import (
"bytes"
"errors"
"fmt"
"git.gammaspectra.live/S.O.N.G/Kirika/audio"
"git.gammaspectra.live/S.O.N.G/Kirika/audio/format"
"git.gammaspectra.live/S.O.N.G/go-fdkaac/fdkaac"
aacAdts "github.com/Eyevinn/mp4ff/aac"
"github.com/Eyevinn/mp4ff/mp4"
"golang.org/x/exp/slices"
"io"
"runtime"
"time"
"unsafe"
)
type Format struct {
}
func NewFormat() Format {
return Format{}
}
func (f Format) Name() string {
return "aac"
}
func (f Format) DecoderDescription() string {
return fmt.Sprintf("libfdk-aac %s (S.O.N.G/go-fdkaac)", fdkaac.EncoderVersion())
}
func (f Format) EncoderDescription() string {
return f.DecoderDescription()
}
func decodeFrame(decoder *fdkaac.AacDecoder, r io.Reader) ([]int16, error) {
pcm, err := tryDecodeFrame(decoder)
if err != nil {
return nil, err
}
if pcm != nil {
return pcm, err
}
header, _, err := aacAdts.DecodeADTSHeader(r)
if err != nil {
return nil, err
}
data := make([]byte, header.PayloadLength)
if _, err = io.ReadFull(r, data); err != nil {
return nil, err
}
var n int
fullData := append(header.Encode(), data...)
if n, err = decoder.Fill(fullData); n != 0 {
return nil, errors.New("buffer under read")
}
if err != nil {
return nil, err
}
return decodeFrame(decoder, r)
}
func tryDecodeFrame(decoder *fdkaac.AacDecoder) ([]int16, error) {
pcm, err := decoder.Decode()
if err != nil {
return nil, err
}
if pcm != nil {
defer runtime.KeepAlive(pcm)
return slices.Clone(unsafe.Slice((*int16)(unsafe.Pointer(unsafe.SliceData(pcm))), len(pcm)/2)), nil
}
return nil, nil
}
func decodeFrameMP4(decoder *fdkaac.AacDecoder, demuxer *mp4Decoder) (result []int16, err error) {
pcm, err := tryDecodeFrame(decoder)
if err != nil {
return nil, err
}
if pcm != nil {
return pcm, err
}
samples := demuxer.Read()
if samples == nil {
return nil, io.EOF
}
var n int
for _, sample := range samples {
if n, err = decoder.Fill(sample); n != 0 {
return nil, errors.New("buffer under read")
}
if err != nil {
return nil, err
}
pcm, err = tryDecodeFrame(decoder)
if pcm != nil {
result = append(result, pcm...)
}
}
return result, nil
}
func (f Format) Open(r io.ReadSeekCloser) (audio.Source, error) {
decoder := fdkaac.NewAacDecoder()
_, _, err := aacAdts.DecodeADTSHeader(r)
if err == nil { //try ADTS
_, err = r.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
err = decoder.InitAdts()
if err != nil {
return nil, err
}
buf, err := decodeFrame(decoder, r)
if err != nil {
_ = decoder.Close()
return nil, err
}
source := audio.NewSource[int16](16, decoder.SampleRate(), decoder.NumChannels())
go func() {
defer source.Close()
defer decoder.Close()
if len(buf) > 0 {
source.IngestInt16(buf, 16)
}
for {
buf, err = decodeFrame(decoder, r)
if err != nil {
return
}
if len(buf) > 0 {
source.IngestInt16(buf, 16)
}
}
}()
return source, nil
} else {
return nil, fmt.Errorf("unsupported format mp4")
_, err = r.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
mp4Demuxer, err := tryDecodeMP4(r)
if err != nil {
return nil, err
}
err = decoder.InitRaw(mp4Demuxer.cookie)
if err != nil {
return nil, err
}
buf, err := decodeFrameMP4(decoder, mp4Demuxer)
if err != nil {
_ = decoder.Close()
return nil, err
}
source := audio.NewSource[int16](16, decoder.SampleRate(), decoder.NumChannels())
go func() {
defer source.Close()
defer decoder.Close()
if len(buf) > 0 {
source.IngestInt16(buf, 16)
}
for {
buf, err = decodeFrameMP4(decoder, mp4Demuxer)
if err != nil {
return
}
if len(buf) > 0 {
source.IngestInt16(buf, 16)
}
}
}()
return source, nil
}
}
func (f Format) Encode(source audio.Source, writer io.WriteCloser, options map[string]interface{}) error {
var bitrate = 128 * 1024
var codecMode = 0 //0 = LC, 1 = HE, 2 = HEv2
var afterburner = 1
var format = "adts"
if options != nil {
var val interface{}
var ok bool
var intVal int
var int64Val int64
var strVal string
var boolVal bool
if val, ok = options["bitrate"]; ok {
if strVal, ok = val.(string); ok {
switch strVal {
case "320k":
bitrate = 320 * 1024
case "256k":
bitrate = 256 * 1024
case "192k":
bitrate = 192 * 1024
case "128k":
bitrate = 128 * 1024
case "vbr1":
bitrate = -1
options["mode"] = "hev2"
codecMode = 2
case "vbr2":
bitrate = -2
options["mode"] = "hev1"
codecMode = 1
case "vbr3":
bitrate = -3
options["mode"] = "lc"
codecMode = 0
case "vbr4":
bitrate = -4
options["mode"] = "lc"
codecMode = 0
case "vbr5":
bitrate = -5
options["mode"] = "lc"
codecMode = 0
default:
return fmt.Errorf("unknown setting bitrate=%s", strVal)
}
} else if intVal, ok = val.(int); ok {
bitrate = intVal * 1024
} else if int64Val, ok = val.(int64); ok {
bitrate = int(int64Val) * 1024
}
}
if val, ok = options["afterburner"]; ok {
if boolVal, ok = val.(bool); ok {
if boolVal {
afterburner = 1
} else {
afterburner = 0
}
}
}
if val, ok = options["mode"]; ok {
if strVal, ok = val.(string); ok {
switch strVal {
case "lc":
codecMode = 0
case "he", "hev1":
codecMode = 1
case "hev2":
codecMode = 2
default:
return fmt.Errorf("unknown setting mode=%s", strVal)
}
}
}
if val, ok = options["format"]; ok {
if strVal, ok = val.(string); ok {
format = strVal
}
}
}
muxingMode := fdkaac.MuxingModeADTS
if format == "adts" {
muxingMode = fdkaac.MuxingModeADTS
} else if format == "mp4" {
muxingMode = fdkaac.MuxingModeRAW
} else if format == "adif" {
muxingMode = fdkaac.MuxingModeADIF
} else {
return fmt.Errorf("unsupported format %s", format)
}
encoder := fdkaac.NewAacEncoder()
if codecMode == 0 {
err := encoder.InitLc(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner)
if err != nil {
return err
}
} else if codecMode == 1 {
err := encoder.InitHE(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner)
if err != nil {
return err
}
} else {
err := encoder.InitHEv2(source.GetChannels(), source.GetSampleRate(), bitrate, muxingMode, afterburner)
if err != nil {
return err
}
}
defer encoder.Close()
frameSize := encoder.FrameSize() * encoder.Channels()
encodeSource := func(source audio.Source, writeCallback func(frame []byte) error) error {
buffer := make([]int16, 0, frameSize)
for block := range source.ToInt16().GetBlocks() {
buffer = append(buffer, block...)
tempBuffer := buffer[:]
for len(tempBuffer) >= frameSize {
sl := tempBuffer[:frameSize]
frameBuffer, err := encoder.Encode(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(sl))), len(sl)*2))
if err != nil {
return err
}
if len(frameBuffer) > 0 {
if err = writeCallback(frameBuffer); err != nil {
return err
}
}
tempBuffer = tempBuffer[frameSize:]
runtime.KeepAlive(sl)
}
if len(tempBuffer) > 0 {
copy(buffer, tempBuffer)
}
buffer = buffer[:len(tempBuffer)]
}
if len(buffer) > 0 {
//pad
buffer = append(buffer, make([]int16, frameSize-len(buffer))...)
frameBuffer, err := encoder.Encode(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buffer))), len(buffer)*2))
if err != nil {
return err
}
if len(frameBuffer) > 0 {
if err = writeCallback(frameBuffer); err != nil {
return err
}
}
runtime.KeepAlive(buffer)
}
//Do flush
for {
frameBuffer, err := encoder.Flush()
if err != nil {
return err
}
if len(frameBuffer) > 0 {
if err = writeCallback(frameBuffer); err != nil {
return err
}
} else {
break
}
}
return nil
}
if format == "mp4" {
init := mp4.CreateEmptyInit()
init.AddEmptyTrack(uint32(source.GetSampleRate()), "audio", "en")
trackId := init.Moov.Mvhd.NextTrackID - 1
trak := init.Moov.Trak
objType := aacAdts.AAClc
if codecMode == 1 {
objType = aacAdts.HEAACv1
} else if codecMode == 2 {
objType = aacAdts.HEAACv2
}
{
stsd := trak.Mdia.Minf.Stbl.Stsd
asc := &aacAdts.AudioSpecificConfig{
ObjectType: byte(objType),
ChannelConfiguration: byte(source.GetChannels()),
SamplingFrequency: source.GetSampleRate(),
ExtensionFrequency: 0,
SBRPresentFlag: false,
PSPresentFlag: false,
}
switch objType {
case aacAdts.HEAACv1:
asc.ExtensionFrequency = 2 * source.GetSampleRate()
asc.SBRPresentFlag = true
case aacAdts.HEAACv2:
asc.ExtensionFrequency = 2 * source.GetSampleRate()
asc.SBRPresentFlag = true
asc.ChannelConfiguration = 1
asc.PSPresentFlag = true
}
buf := &bytes.Buffer{}
err := asc.Encode(buf)
if err != nil {
return err
}
ascBytes := buf.Bytes()
esds := mp4.CreateEsdsBox(ascBytes)
mp4a := mp4.CreateAudioSampleEntryBox("mp4a",
uint16(asc.ChannelConfiguration),
16, uint16(source.GetSampleRate()), esds)
stsd.AddChild(mp4a)
}
_ = init.Encode(writer)
var seqNumber uint32
var packetsWritten uint64
outputBuffer := make([][]byte, 0, 32)
segmentDuration := time.Millisecond * 1000
outputSegment := func() {
seg := mp4.NewMediaSegment()
frag, _ := mp4.CreateFragment(seqNumber, trackId)
seg.AddFragment(frag)
for _, b := range outputBuffer {
_ = frag.AddFullSampleToTrack(mp4.FullSample{
Sample: mp4.Sample{
Dur: uint32(frameSize),
Size: uint32(len(b)),
},
DecodeTime: uint64(frameSize) * packetsWritten,
Data: b,
}, trackId)
packetsWritten++
}
_ = seg.Encode(writer)
seqNumber++
outputBuffer = make([][]byte, 0, 32)
}
if err := encodeSource(source, func(frame []byte) error {
outputBuffer = append(outputBuffer, frame)
if time.Duration(float64(time.Second)*(float64(frameSize*len(outputBuffer))/float64(source.GetSampleRate()))) >= segmentDuration {
outputSegment()
}
return nil
}); err != nil {
return err
}
if len(outputBuffer) > 0 {
outputSegment()
}
} else {
if err := encodeSource(source, func(frame []byte) error {
_, err := writer.Write(frame)
if err != nil {
return err
}
return nil
}); err != nil {
return err
}
}
return nil
}
func (f Format) Identify(_ [format.IdentifyPeekBytes]byte, extension string) bool {
//TODO: add .m4a/mp4 detection
return extension == "aac" || extension == "adts"
}