Ignite/encoder/libaom/libaom.go

502 lines
13 KiB
Go

//go:build cgo && !disable_library_libaom
package libaom
/*
#cgo pkg-config: aom
#include "libaom.h"
*/
import "C"
import (
"errors"
"fmt"
"git.gammaspectra.live/S.O.N.G/Ignite/frame"
"git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter"
"golang.org/x/exp/constraints"
"io"
"runtime"
"strconv"
"sync/atomic"
"unsafe"
)
type Encoder struct {
w *obuwriter.Writer
cleaned atomic.Bool
cfg C.aom_codec_enc_cfg_t
codec C.aom_codec_ctx_t
raw *C.aom_image_t
frames uint32
}
var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str())
func Version() string {
return libaomVersion
}
const (
UsageGoodQuality = C.AOM_USAGE_GOOD_QUALITY
UsageRealtime = C.AOM_USAGE_REALTIME
UsageAllIntra = C.AOM_USAGE_ALL_INTRA
)
func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[string]any) (*Encoder, error) {
e := &Encoder{}
var aomErr C.aom_codec_err_t
encoder := C.aom_codec_av1_cx()
if encoder == nil {
return nil, errors.New("unsupported codec")
}
e.cfg.g_usage = C.uint(getSettingUnsigned(settings, "usage", uint(UsageGoodQuality)))
if getSettingBool(settings, "good", false) {
e.cfg.g_usage = UsageGoodQuality
}
if getSettingBool(settings, "rt", false) {
e.cfg.g_usage = UsageRealtime
}
if getSettingBool(settings, "allintra", false) {
e.cfg.g_usage = UsageAllIntra
}
var imageFormat C.aom_img_fmt_t
var flags C.aom_codec_flags_t
if aomErr = C.aom_codec_enc_config_default(encoder, &e.cfg, e.cfg.g_usage); aomErr != 0 {
return nil, errors.New("failed to get default codec config")
}
switch true {
case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4:
imageFormat = C.AOM_IMG_FMT_I444
e.cfg.g_profile = 1
case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2:
imageFormat = C.AOM_IMG_FMT_I422
e.cfg.g_profile = 2
case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0:
imageFormat = C.AOM_IMG_FMT_I420
e.cfg.g_profile = 0
case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0:
//mono is defined as 4:2:0, but monochrome is set on config
imageFormat = C.AOM_IMG_FMT_I420
e.cfg.g_profile = 0
e.cfg.monochrome = 1
default:
return nil, errors.New("unsupported input chroma subsampling")
}
e.cfg.g_input_bit_depth = C.uint(properties.ColorSpace.BitDepth)
e.cfg.g_bit_depth = C.aom_bit_depth_t(properties.ColorSpace.BitDepth)
if e.cfg.g_bit_depth >= 12 { //only bitdepths up to 12 are supported, see aom_bit_depth_t
e.cfg.g_bit_depth = 12
e.cfg.g_profile = 2
}
if e.cfg.g_input_bit_depth > 8 {
imageFormat |= C.AOM_IMG_FMT_HIGHBITDEPTH
}
if e.cfg.g_bit_depth > 8 {
flags |= C.AOM_CODEC_USE_HIGHBITDEPTH
}
if e.raw = (*C.aom_image_t)(C.malloc(C.size_t(unsafe.Sizeof(C.aom_image_t{})))); e.raw == nil {
return nil, errors.New("error allocating memory")
}
if C.aom_img_alloc(e.raw, imageFormat, C.uint(properties.Width), C.uint(properties.Height), 1) == nil {
return nil, errors.New("failed to allocate image")
}
runtime.SetFinalizer(e, func(encoder *Encoder) {
encoder.Close()
})
e.cfg.g_w = C.uint(properties.Width)
e.cfg.g_h = C.uint(properties.Height)
/*!\brief Stream timebase units
*
* Indicates the smallest interval of time, in seconds, used by the stream.
* For fixed frame rate material, or variable frame rate material where
* frames are timed at a multiple of a given clock (ex: video capture),
* the \ref RECOMMENDED method is to set the timebase to the reciprocal
* of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
* pts to correspond to the frame number, which can be handy. For
* re-encoding video from containers with absolute time timestamps, the
* \ref RECOMMENDED method is to set the timebase to that of the parent
* container or multimedia framework (ex: 1/1000 for ms, as in FLV).
*/
reciprocalFrameRate := properties.FrameRate.Reciprocal()
e.cfg.g_timebase.num = C.int(reciprocalFrameRate.Numerator)
e.cfg.g_timebase.den = C.int(reciprocalFrameRate.Denominator)
// boolean settings
if getSettingBool(settings, "large-scale-tile", e.cfg.large_scale_tile != 0) {
e.cfg.large_scale_tile = 1
}
if getSettingBool(settings, "monochrome", e.cfg.monochrome != 0) {
e.cfg.monochrome = 1
}
if getSettingBool(settings, "enable-fwd-kf", e.cfg.fwd_kf_enabled != 0) {
e.cfg.fwd_kf_enabled = 1
}
if getSettingBool(settings, "kf-disabled", false) {
e.cfg.kf_mode = C.AOM_KF_DISABLED
}
// integer settings
type uintSettingPair struct {
p *C.uint
n string
}
for _, s := range []uintSettingPair{
{&e.cfg.g_threads, "threads"},
{&e.cfg.g_lag_in_frames, "lag-in-frames"},
{&e.cfg.g_forced_max_frame_width, "forced_max_frame_width"},
{&e.cfg.g_forced_max_frame_height, "forced_max_frame_height"},
{&e.cfg.rc_dropframe_thresh, "drop-frame"},
{&e.cfg.rc_resize_mode, "resize-mode"},
{&e.cfg.rc_resize_denominator, "resize-denominator"},
{&e.cfg.rc_resize_kf_denominator, "resize-kf-denominator"},
{(*C.uint)(&e.cfg.rc_superres_mode), "superres-mode"},
{&e.cfg.rc_superres_denominator, "superres-denominator"},
{&e.cfg.rc_superres_kf_denominator, "superres-kf-denominator"},
{&e.cfg.rc_superres_qthresh, "superres-qthresh"},
{&e.cfg.rc_superres_kf_qthresh, "superres-kf-qthresh"},
{&e.cfg.rc_target_bitrate, "target-bitrate"},
{&e.cfg.rc_min_quantizer, "min-q"},
{&e.cfg.rc_max_quantizer, "max-q"},
{&e.cfg.rc_undershoot_pct, "undershoot-pct"},
{&e.cfg.rc_overshoot_pct, "overshoot-pct"},
{&e.cfg.rc_buf_sz, "buf-sz"},
{&e.cfg.rc_buf_initial_sz, "buf-initial-sz"},
{&e.cfg.rc_buf_optimal_sz, "buf-optimal-sz"},
//{&e.cfg.rc_2pass_vbr_bias_pct, "bias-pct"},
//{&e.cfg.rc_2pass_vbr_minsection_pct, "minsection-pct"},
//{&e.cfg.rc_2pass_vbr_maxsection_pct, "maxsection-pct"},
{&e.cfg.kf_min_dist, "kf-min-dist"},
{&e.cfg.kf_max_dist, "kf-max-dist"},
{&e.cfg.sframe_dist, "sframe-dist"},
{&e.cfg.sframe_mode, "sframe-mode"},
} {
//todo: unset setting from map
*s.p = C.uint(getSettingUnsigned(settings, s.n, uint(*s.p)))
}
// string/enum settings
endUsage := getSettingString(settings, "end-usage", "vbr")
switch endUsage {
case "vbr":
e.cfg.rc_end_usage = C.AOM_VBR
case "cbr":
e.cfg.rc_end_usage = C.AOM_CBR
case "cq":
e.cfg.rc_end_usage = C.AOM_CQ
case "q":
e.cfg.rc_end_usage = C.AOM_Q
default:
return nil, errors.New("unknown end-usage setting: " + endUsage)
}
//TODO: find all settings not set on AV1 encoder and place them on e.cfg
if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 {
return nil, fmt.Errorf("failed to initialize encoder: %s", C.GoString(e.codec.err_detail))
}
if properties.FullColorRange {
if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 1); aomErr != 0 {
return nil, fmt.Errorf("failed to set color range")
}
} else {
if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 0); aomErr != 0 {
return nil, fmt.Errorf("failed to set color range")
}
}
for k, v := range settings {
if err := func() error {
var strVal *C.char
if val, ok := v.(string); ok {
strVal = C.CString(val)
} else if val, ok := v.(int); ok {
strVal = C.CString(strconv.FormatInt(int64(val), 10))
} else if val, ok := v.(int64); ok {
strVal = C.CString(strconv.FormatInt(val, 10))
} else if val, ok := v.(uint); ok {
strVal = C.CString(strconv.FormatUint(uint64(val), 10))
} else if val, ok := v.(uint64); ok {
strVal = C.CString(strconv.FormatUint(val, 10))
} else if val, ok := v.(bool); ok {
if val {
strVal = C.CString("1")
} else {
strVal = C.CString("0")
}
}
if strVal != nil {
defer C.free(unsafe.Pointer(strVal))
} else {
return fmt.Errorf("could not get parameter %s", k)
}
strKey := C.CString(k)
defer C.free(unsafe.Pointer(strKey))
if ret := C.aom_codec_set_option(&e.codec, strKey, strVal); ret != 0 {
if ret == C.AOM_CODEC_INVALID_PARAM {
return fmt.Errorf("bad parameter value %s for %s: %s", C.GoString(strVal), k, C.GoString(C.aom_codec_error_detail(&e.codec)))
} else if ret == C.AOM_CODEC_ERROR {
return fmt.Errorf("error setting parameter %s: %s", k, C.GoString(C.aom_codec_error_detail(&e.codec)))
} else {
return fmt.Errorf("error setting parameter %s: %s", k, C.GoString(C.aom_codec_error_detail(&e.codec)))
}
}
return nil
}(); err != nil {
return nil, err
}
}
var err error
if e.w, err = obuwriter.NewWriter(w, properties.Width, properties.Height, 0x31305641, reciprocalFrameRate); err != nil {
return nil, err
}
return e, nil
}
func (e *Encoder) EncodeStream(stream *frame.Stream) error {
for f := range stream.Channel() {
if err := e.Encode(f); err != nil {
return err
}
}
return e.Flush()
}
func (e *Encoder) Encode(f frame.Frame) error {
if f8, ok := f.(frame.TypedFrame[uint8]); ok {
e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeLuma()[0]))
e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCb()[0]))
e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCr()[0]))
} else if f16, ok := f.(frame.TypedFrame[uint16]); ok {
e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeLuma()[0]))
e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCb()[0]))
e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCr()[0]))
}
defer runtime.KeepAlive(f)
//cleanup pointers
defer func() {
e.raw.planes[0] = nil
e.raw.planes[1] = nil
e.raw.planes[2] = nil
}()
if _, err := e.encodeFrame(f.PTS(), e.raw); err != nil {
return err
}
e.frames++
return nil
}
func (e *Encoder) encodeFrame(pts int64, raw *C.aom_image_t) (pkts int, err error) {
//TODO: make this a Source channel
var aomErr C.aom_codec_err_t
if aomErr = C.aom_codec_encode(&e.codec, raw, C.long(pts), 1, 0); aomErr != C.AOM_CODEC_OK {
if aomErr == C.AOM_CODEC_INCAPABLE {
return 0, errors.New("error encoding frame: AOM_CODEC_INCAPABLE")
} else if aomErr == C.AOM_CODEC_INVALID_PARAM {
return 0, errors.New("error encoding frame: AOM_CODEC_INVALID_PARAM")
} else if aomErr == C.AOM_CODEC_ERROR {
return 0, errors.New("error encoding frame: AOM_CODEC_ERROR")
} else {
return 0, errors.New("error encoding frame")
}
}
var iter C.aom_codec_iter_t
for {
pkt := C.aom_codec_get_cx_data(&e.codec, &iter)
if pkt == nil {
break
}
pkts++
if pkt.kind == C.AOM_CODEC_CX_FRAME_PKT {
if err = e.w.WriteFrameBytes(uint64(C.aom_get_pkt_pts(pkt)), unsafe.Slice((*byte)(C.aom_get_pkt_buf(pkt)), int(C.aom_get_pkt_sz(pkt)))); err != nil {
return pkts, err
}
}
}
return pkts, nil
}
func (e *Encoder) Flush() error {
var pkts int
var err error
for {
if pkts, err = e.encodeFrame(-1, nil); err != nil {
return err
}
if pkts == 0 {
break
}
}
_ = e.w.WriteLength(e.frames)
return nil
}
func (e *Encoder) Close() {
if e.cleaned.Swap(true) == false {
if e.raw != nil {
C.aom_img_free(e.raw)
C.free(unsafe.Pointer(e.raw))
e.raw = nil
}
C.aom_codec_destroy(&e.codec)
}
}
func (e *Encoder) Version() string {
return Version()
}
func getSettingBool(m map[string]any, name string, fallback bool) bool {
if v, ok := m[name]; ok {
if val, ok := v.(string); ok {
delete(m, name)
return val == "false" || val == "f" || val == "n"
}
if val, ok := v.(int); ok {
delete(m, name)
return val != 0
}
if val, ok := v.(int64); ok {
delete(m, name)
return val != 0
}
if val, ok := v.(uint); ok {
delete(m, name)
return val != 0
}
if val, ok := v.(uint64); ok {
delete(m, name)
return val != 0
}
return true
}
return fallback
}
func getSettingString(m map[string]any, name string, fallback string) string {
if v, ok := m[name]; ok {
if val, ok := v.(string); ok {
delete(m, name)
return val
}
if val, ok := v.(int); ok {
delete(m, name)
return strconv.FormatInt(int64(val), 10)
}
if val, ok := v.(int64); ok {
delete(m, name)
return strconv.FormatInt(val, 10)
}
if val, ok := v.(uint); ok {
delete(m, name)
return strconv.FormatUint(uint64(val), 10)
}
if val, ok := v.(uint64); ok {
delete(m, name)
return strconv.FormatUint(val, 10)
}
if val, ok := v.(bool); ok {
delete(m, name)
if val {
return "1"
} else {
return "0"
}
}
}
return fallback
}
func getSettingUnsigned[T constraints.Unsigned](m map[string]any, name string, fallback T) T {
if v, ok := m[name]; ok {
if val, ok := v.(string); ok {
if intVal, err := strconv.ParseUint(val, 10, 0); err != nil {
delete(m, name)
return T(intVal)
} else {
return fallback
}
}
if val, ok := v.(int); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(int64); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(uint); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(uint64); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(C.int); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(C.uint); ok {
delete(m, name)
return T(val)
}
if val, ok := v.(bool); ok {
delete(m, name)
if val {
return 1
} else {
return 0
}
}
}
return fallback
}