//go:build cgo && !disable_library_libaom package libaom /* #cgo pkg-config: aom #include "libaom.h" */ import "C" import ( "errors" "fmt" "git.gammaspectra.live/S.O.N.G/Ignite/frame" "git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter" "golang.org/x/exp/constraints" "io" "runtime" "strconv" "sync/atomic" "unsafe" ) type Encoder struct { w *obuwriter.Writer cleaned atomic.Bool cfg C.aom_codec_enc_cfg_t codec C.aom_codec_ctx_t raw *C.aom_image_t frames uint32 } var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) func Version() string { return libaomVersion } const ( UsageGoodQuality = C.AOM_USAGE_GOOD_QUALITY UsageRealtime = C.AOM_USAGE_REALTIME UsageAllIntra = C.AOM_USAGE_ALL_INTRA ) func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[string]any) (*Encoder, error) { e := &Encoder{} var aomErr C.aom_codec_err_t encoder := C.aom_codec_av1_cx() if encoder == nil { return nil, errors.New("unsupported codec") } e.cfg.g_usage = C.uint(getSettingUnsigned(settings, "usage", uint(UsageGoodQuality))) if getSettingBool(settings, "good", false) { e.cfg.g_usage = UsageGoodQuality } if getSettingBool(settings, "rt", false) { e.cfg.g_usage = UsageRealtime } if getSettingBool(settings, "allintra", false) { e.cfg.g_usage = UsageAllIntra } var imageFormat C.aom_img_fmt_t var flags C.aom_codec_flags_t if aomErr = C.aom_codec_enc_config_default(encoder, &e.cfg, e.cfg.g_usage); aomErr != 0 { return nil, errors.New("failed to get default codec config") } switch true { case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4: imageFormat = C.AOM_IMG_FMT_I444 e.cfg.g_profile = 1 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2: imageFormat = C.AOM_IMG_FMT_I422 e.cfg.g_profile = 2 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0: imageFormat = C.AOM_IMG_FMT_I420 e.cfg.g_profile = 0 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0: //mono is defined as 4:2:0, but monochrome is set on config imageFormat = C.AOM_IMG_FMT_I420 e.cfg.g_profile = 0 e.cfg.monochrome = 1 default: return nil, errors.New("unsupported input chroma subsampling") } e.cfg.g_input_bit_depth = C.uint(properties.ColorSpace.BitDepth) e.cfg.g_bit_depth = C.aom_bit_depth_t(properties.ColorSpace.BitDepth) if e.cfg.g_bit_depth >= 12 { //only bitdepths up to 12 are supported, see aom_bit_depth_t e.cfg.g_bit_depth = 12 e.cfg.g_profile = 2 } if e.cfg.g_input_bit_depth > 8 { imageFormat |= C.AOM_IMG_FMT_HIGHBITDEPTH } if e.cfg.g_bit_depth > 8 { flags |= C.AOM_CODEC_USE_HIGHBITDEPTH } if e.raw = (*C.aom_image_t)(C.malloc(C.size_t(unsafe.Sizeof(C.aom_image_t{})))); e.raw == nil { return nil, errors.New("error allocating memory") } if C.aom_img_alloc(e.raw, imageFormat, C.uint(properties.Width), C.uint(properties.Height), 1) == nil { return nil, errors.New("failed to allocate image") } runtime.SetFinalizer(e, func(encoder *Encoder) { encoder.Close() }) e.cfg.g_w = C.uint(properties.Width) e.cfg.g_h = C.uint(properties.Height) /*!\brief Stream timebase units * * Indicates the smallest interval of time, in seconds, used by the stream. * For fixed frame rate material, or variable frame rate material where * frames are timed at a multiple of a given clock (ex: video capture), * the \ref RECOMMENDED method is to set the timebase to the reciprocal * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the * pts to correspond to the frame number, which can be handy. For * re-encoding video from containers with absolute time timestamps, the * \ref RECOMMENDED method is to set the timebase to that of the parent * container or multimedia framework (ex: 1/1000 for ms, as in FLV). */ reciprocalFrameRate := properties.FrameRate.Reciprocal() e.cfg.g_timebase.num = C.int(reciprocalFrameRate.Numerator) e.cfg.g_timebase.den = C.int(reciprocalFrameRate.Denominator) // boolean settings if getSettingBool(settings, "large-scale-tile", e.cfg.large_scale_tile != 0) { e.cfg.large_scale_tile = 1 } if getSettingBool(settings, "monochrome", e.cfg.monochrome != 0) { e.cfg.monochrome = 1 } if getSettingBool(settings, "enable-fwd-kf", e.cfg.fwd_kf_enabled != 0) { e.cfg.fwd_kf_enabled = 1 } if getSettingBool(settings, "kf-disabled", false) { e.cfg.kf_mode = C.AOM_KF_DISABLED } // integer settings type uintSettingPair struct { p *C.uint n string } for _, s := range []uintSettingPair{ {&e.cfg.g_threads, "threads"}, {&e.cfg.g_lag_in_frames, "lag-in-frames"}, {&e.cfg.g_forced_max_frame_width, "forced_max_frame_width"}, {&e.cfg.g_forced_max_frame_height, "forced_max_frame_height"}, {&e.cfg.rc_dropframe_thresh, "drop-frame"}, {&e.cfg.rc_resize_mode, "resize-mode"}, {&e.cfg.rc_resize_denominator, "resize-denominator"}, {&e.cfg.rc_resize_kf_denominator, "resize-kf-denominator"}, {(*C.uint)(&e.cfg.rc_superres_mode), "superres-mode"}, {&e.cfg.rc_superres_denominator, "superres-denominator"}, {&e.cfg.rc_superres_kf_denominator, "superres-kf-denominator"}, {&e.cfg.rc_superres_qthresh, "superres-qthresh"}, {&e.cfg.rc_superres_kf_qthresh, "superres-kf-qthresh"}, {&e.cfg.rc_target_bitrate, "target-bitrate"}, {&e.cfg.rc_min_quantizer, "min-q"}, {&e.cfg.rc_max_quantizer, "max-q"}, {&e.cfg.rc_undershoot_pct, "undershoot-pct"}, {&e.cfg.rc_overshoot_pct, "overshoot-pct"}, {&e.cfg.rc_buf_sz, "buf-sz"}, {&e.cfg.rc_buf_initial_sz, "buf-initial-sz"}, {&e.cfg.rc_buf_optimal_sz, "buf-optimal-sz"}, //{&e.cfg.rc_2pass_vbr_bias_pct, "bias-pct"}, //{&e.cfg.rc_2pass_vbr_minsection_pct, "minsection-pct"}, //{&e.cfg.rc_2pass_vbr_maxsection_pct, "maxsection-pct"}, {&e.cfg.kf_min_dist, "kf-min-dist"}, {&e.cfg.kf_max_dist, "kf-max-dist"}, {&e.cfg.sframe_dist, "sframe-dist"}, {&e.cfg.sframe_mode, "sframe-mode"}, } { //todo: unset setting from map *s.p = C.uint(getSettingUnsigned(settings, s.n, uint(*s.p))) } // string/enum settings endUsage := getSettingString(settings, "end-usage", "vbr") switch endUsage { case "vbr": e.cfg.rc_end_usage = C.AOM_VBR case "cbr": e.cfg.rc_end_usage = C.AOM_CBR case "cq": e.cfg.rc_end_usage = C.AOM_CQ case "q": e.cfg.rc_end_usage = C.AOM_Q default: return nil, errors.New("unknown end-usage setting: " + endUsage) } //TODO: find all settings not set on AV1 encoder and place them on e.cfg if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 { return nil, fmt.Errorf("failed to initialize encoder: %s", C.GoString(e.codec.err_detail)) } if properties.FullColorRange { if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 1); aomErr != 0 { return nil, fmt.Errorf("failed to set color range") } } else { if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 0); aomErr != 0 { return nil, fmt.Errorf("failed to set color range") } } for k, v := range settings { if err := func() error { var strVal *C.char if val, ok := v.(string); ok { strVal = C.CString(val) } else if val, ok := v.(int); ok { strVal = C.CString(strconv.FormatInt(int64(val), 10)) } else if val, ok := v.(int64); ok { strVal = C.CString(strconv.FormatInt(val, 10)) } else if val, ok := v.(uint); ok { strVal = C.CString(strconv.FormatUint(uint64(val), 10)) } else if val, ok := v.(uint64); ok { strVal = C.CString(strconv.FormatUint(val, 10)) } else if val, ok := v.(bool); ok { if val { strVal = C.CString("1") } else { strVal = C.CString("0") } } if strVal != nil { defer C.free(unsafe.Pointer(strVal)) } else { return fmt.Errorf("could not get parameter %s", k) } strKey := C.CString(k) defer C.free(unsafe.Pointer(strKey)) if ret := C.aom_codec_set_option(&e.codec, strKey, strVal); ret != 0 { if ret == C.AOM_CODEC_INVALID_PARAM { return fmt.Errorf("bad parameter value %s for %s: %s", C.GoString(strVal), k, C.GoString(C.aom_codec_error_detail(&e.codec))) } else if ret == C.AOM_CODEC_ERROR { return fmt.Errorf("error setting parameter %s: %s", k, C.GoString(C.aom_codec_error_detail(&e.codec))) } else { return fmt.Errorf("error setting parameter %s: %s", k, C.GoString(C.aom_codec_error_detail(&e.codec))) } } return nil }(); err != nil { return nil, err } } var err error if e.w, err = obuwriter.NewWriter(w, properties.Width, properties.Height, 0x31305641, reciprocalFrameRate); err != nil { return nil, err } return e, nil } func (e *Encoder) EncodeStream(stream *frame.Stream) error { for f := range stream.Channel() { if err := e.Encode(f); err != nil { return err } } return e.Flush() } func (e *Encoder) Encode(f frame.Frame) error { if f8, ok := f.(frame.TypedFrame[uint8]); ok { e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeLuma()[0])) e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCb()[0])) e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCr()[0])) } else if f16, ok := f.(frame.TypedFrame[uint16]); ok { e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeLuma()[0])) e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCb()[0])) e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCr()[0])) } defer runtime.KeepAlive(f) //cleanup pointers defer func() { e.raw.planes[0] = nil e.raw.planes[1] = nil e.raw.planes[2] = nil }() if _, err := e.encodeFrame(f.PTS(), e.raw); err != nil { return err } e.frames++ return nil } func (e *Encoder) encodeFrame(pts int64, raw *C.aom_image_t) (pkts int, err error) { //TODO: make this a Source channel var aomErr C.aom_codec_err_t if aomErr = C.aom_codec_encode(&e.codec, raw, C.long(pts), 1, 0); aomErr != C.AOM_CODEC_OK { if aomErr == C.AOM_CODEC_INCAPABLE { return 0, errors.New("error encoding frame: AOM_CODEC_INCAPABLE") } else if aomErr == C.AOM_CODEC_INVALID_PARAM { return 0, errors.New("error encoding frame: AOM_CODEC_INVALID_PARAM") } else if aomErr == C.AOM_CODEC_ERROR { return 0, errors.New("error encoding frame: AOM_CODEC_ERROR") } else { return 0, errors.New("error encoding frame") } } var iter C.aom_codec_iter_t for { pkt := C.aom_codec_get_cx_data(&e.codec, &iter) if pkt == nil { break } pkts++ if pkt.kind == C.AOM_CODEC_CX_FRAME_PKT { if err = e.w.WriteFrameBytes(uint64(C.aom_get_pkt_pts(pkt)), unsafe.Slice((*byte)(C.aom_get_pkt_buf(pkt)), int(C.aom_get_pkt_sz(pkt)))); err != nil { return pkts, err } } } return pkts, nil } func (e *Encoder) Flush() error { var pkts int var err error for { if pkts, err = e.encodeFrame(-1, nil); err != nil { return err } if pkts == 0 { break } } _ = e.w.WriteLength(e.frames) return nil } func (e *Encoder) Close() { if e.cleaned.Swap(true) == false { if e.raw != nil { C.aom_img_free(e.raw) C.free(unsafe.Pointer(e.raw)) e.raw = nil } C.aom_codec_destroy(&e.codec) } } func (e *Encoder) Version() string { return Version() } func getSettingBool(m map[string]any, name string, fallback bool) bool { if v, ok := m[name]; ok { if val, ok := v.(string); ok { delete(m, name) return val == "false" || val == "f" || val == "n" } if val, ok := v.(int); ok { delete(m, name) return val != 0 } if val, ok := v.(int64); ok { delete(m, name) return val != 0 } if val, ok := v.(uint); ok { delete(m, name) return val != 0 } if val, ok := v.(uint64); ok { delete(m, name) return val != 0 } return true } return fallback } func getSettingString(m map[string]any, name string, fallback string) string { if v, ok := m[name]; ok { if val, ok := v.(string); ok { delete(m, name) return val } if val, ok := v.(int); ok { delete(m, name) return strconv.FormatInt(int64(val), 10) } if val, ok := v.(int64); ok { delete(m, name) return strconv.FormatInt(val, 10) } if val, ok := v.(uint); ok { delete(m, name) return strconv.FormatUint(uint64(val), 10) } if val, ok := v.(uint64); ok { delete(m, name) return strconv.FormatUint(val, 10) } if val, ok := v.(bool); ok { delete(m, name) if val { return "1" } else { return "0" } } } return fallback } func getSettingUnsigned[T constraints.Unsigned](m map[string]any, name string, fallback T) T { if v, ok := m[name]; ok { if val, ok := v.(string); ok { if intVal, err := strconv.ParseUint(val, 10, 0); err != nil { delete(m, name) return T(intVal) } else { return fallback } } if val, ok := v.(int); ok { delete(m, name) return T(val) } if val, ok := v.(int64); ok { delete(m, name) return T(val) } if val, ok := v.(uint); ok { delete(m, name) return T(val) } if val, ok := v.(uint64); ok { delete(m, name) return T(val) } if val, ok := v.(C.int); ok { delete(m, name) return T(val) } if val, ok := v.(C.uint); ok { delete(m, name) return T(val) } if val, ok := v.(bool); ok { delete(m, name) if val { return 1 } else { return 0 } } } return fallback }