From 6ea3e971bb9be0fe37be87c39666d7b45b102230 Mon Sep 17 00:00:00 2001 From: WeebDataHoarder <57538841+WeebDataHoarder@users.noreply.github.com> Date: Tue, 31 Oct 2023 23:00:37 +0100 Subject: [PATCH] Improved libaom/libx264, proper pool, frame stats --- cli/encode-server/encode.go | 2 + cli/encode-server/job.go | 2 + cli/encode-server/main.go | 2 + decoder/libdav1d/libdav1d.go | 18 +-- decoder/y4m/y4m.go | 11 +- encoder/encoder.go | 5 + encoder/libaom/libaom.c | 12 ++ encoder/libaom/libaom.go | 288 ++++++++++++++++++++++++++++------- encoder/libaom/libaom.h | 6 +- encoder/libx264/libx264.go | 24 +-- frame/frame.go | 2 + frame/frame_uint16.go | 71 +++------ frame/frame_uint8.go | 69 +++------ frame/pool.go | 16 +- frame/statistics.go | 85 +++++++++++ frame/stream.go | 6 +- utilities/libvmaf/libvmaf.go | 26 +--- 17 files changed, 433 insertions(+), 212 deletions(-) create mode 100644 frame/statistics.go diff --git a/cli/encode-server/encode.go b/cli/encode-server/encode.go index af23f8a..4fc79d4 100644 --- a/cli/encode-server/encode.go +++ b/cli/encode-server/encode.go @@ -1,3 +1,5 @@ +//go:build cgo && !disable_library_libaom && !disable_library_libx264 + package main import ( diff --git a/cli/encode-server/job.go b/cli/encode-server/job.go index 3019d5f..d552ea7 100644 --- a/cli/encode-server/job.go +++ b/cli/encode-server/job.go @@ -1,3 +1,5 @@ +//go:build cgo && !disable_library_libaom && !disable_library_libx264 + package main import ( diff --git a/cli/encode-server/main.go b/cli/encode-server/main.go index 6c02fd2..00b84f7 100644 --- a/cli/encode-server/main.go +++ b/cli/encode-server/main.go @@ -1,3 +1,5 @@ +//go:build cgo && !disable_library_libaom && !disable_library_libx264 + package main import ( diff --git a/decoder/libdav1d/libdav1d.go b/decoder/libdav1d/libdav1d.go index d298a3f..98124b1 100644 --- a/decoder/libdav1d/libdav1d.go +++ b/decoder/libdav1d/libdav1d.go @@ -233,22 +233,14 @@ func (d *Decoder) pictureToFrame() (f frame.Frame, err error) { } } - f = d.pool.Get(int64(d.picture.m.timestamp)) + f = d.pool.Get(int64(d.picture.m.timestamp), int64(d.picture.m.timestamp)+int64(d.picture.m.duration)) var yData, uData, vData []byte - if bitDepth > 8 { //16-bit - yData = unsafe.Slice((*byte)(d.picture.data[planeY]), properties.Height*properties.Width*2) - if d.picture.p.layout != C.DAV1D_PIXEL_LAYOUT_I400 { - uData = unsafe.Slice((*byte)(d.picture.data[planeU]), chromaHeight*chromaWidth*2) - vData = unsafe.Slice((*byte)(d.picture.data[planeV]), chromaHeight*chromaWidth*2) - } - } else { - yData = unsafe.Slice((*byte)(d.picture.data[planeY]), properties.Height*properties.Width) - if d.picture.p.layout != C.DAV1D_PIXEL_LAYOUT_I400 { - uData = unsafe.Slice((*byte)(d.picture.data[planeU]), chromaHeight*chromaWidth) - vData = unsafe.Slice((*byte)(d.picture.data[planeV]), chromaHeight*chromaWidth) - } + yData = unsafe.Slice((*byte)(d.picture.data[planeY]), properties.Height*properties.Width*(bitDepth/2)) + if d.picture.p.layout != C.DAV1D_PIXEL_LAYOUT_I400 { + uData = unsafe.Slice((*byte)(d.picture.data[planeU]), chromaHeight*chromaWidth*(bitDepth/2)) + vData = unsafe.Slice((*byte)(d.picture.data[planeV]), chromaHeight*chromaWidth*(bitDepth/2)) } copy(f.GetLuma(), yData) diff --git a/decoder/y4m/y4m.go b/decoder/y4m/y4m.go index 1edc4e8..0764e91 100644 --- a/decoder/y4m/y4m.go +++ b/decoder/y4m/y4m.go @@ -209,7 +209,12 @@ func (s *Decoder) GetFrame() (parameters map[Parameter][]string, frameObject fra return nil, nil, fmt.Errorf("frame %d PTS could not be calculated", s.frameCounter) } - if frameObject, err = s.readFrameData(pts); err != nil { + nextPts := s.FramePTS(s.frameCounter + 1) + if nextPts == -1 { + nextPts = pts + (pts - s.FramePTS(s.frameCounter-1)) + } + + if frameObject, err = s.readFrameData(pts, nextPts); err != nil { return nil, nil, err } @@ -295,8 +300,8 @@ func (s *Decoder) readFrameHeader() (parameters map[Parameter][]string, err erro return parameters, nil } -func (s *Decoder) readFrameData(pts int64) (f frame.Frame, err error) { - f = s.pool.Get(pts) +func (s *Decoder) readFrameData(pts, nextPts int64) (f frame.Frame, err error) { + f = s.pool.Get(pts, nextPts) _, err = io.ReadFull(s.r, f.GetJoint()) return f, err } diff --git a/encoder/encoder.go b/encoder/encoder.go index b9f9fac..5e45997 100644 --- a/encoder/encoder.go +++ b/encoder/encoder.go @@ -9,3 +9,8 @@ type Encoder interface { Close() Version() string } + +type EncoderWithStatistics interface { + Encoder + Statistics() frame.Statistics +} diff --git a/encoder/libaom/libaom.c b/encoder/libaom/libaom.c index 0842efe..61b958d 100644 --- a/encoder/libaom/libaom.c +++ b/encoder/libaom/libaom.c @@ -8,6 +8,10 @@ aom_codec_err_t aom_codec_control_uint(aom_codec_ctx_t *ctx, int ctrl_id, unsign return aom_codec_control(ctx, ctrl_id, v); } +aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* v) { + return aom_codec_control(ctx, ctrl_id, v); +} + void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt){ return pkt->data.frame.buf; } @@ -16,6 +20,10 @@ size_t aom_get_pkt_sz(aom_codec_cx_pkt_t *pkt){ return pkt->data.frame.sz; } +int aom_get_pkt_partition_id(aom_codec_cx_pkt_t *pkt){ + return pkt->data.frame.partition_id; +} + aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt){ return pkt->data.frame.pts; } @@ -23,3 +31,7 @@ aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt){ aom_codec_frame_flags_t aom_get_pkt_flags(aom_codec_cx_pkt_t *pkt){ return pkt->data.frame.flags; } + +const struct aom_codec_enc_cfg* aom_get_ctx_enc_cfg(aom_codec_ctx_t *ctx){ + return ctx->config.enc; +} \ No newline at end of file diff --git a/encoder/libaom/libaom.go b/encoder/libaom/libaom.go index 7ed9cd3..6e30b49 100644 --- a/encoder/libaom/libaom.go +++ b/encoder/libaom/libaom.go @@ -10,10 +10,12 @@ import "C" import ( "errors" "fmt" + "git.gammaspectra.live/S.O.N.G/Ignite/color" "git.gammaspectra.live/S.O.N.G/Ignite/frame" "git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter" "golang.org/x/exp/constraints" "io" + "log" "maps" "runtime" "strconv" @@ -22,16 +24,18 @@ import ( ) type Encoder struct { - w *obuwriter.Writer - cleaned atomic.Bool - cfg C.aom_codec_enc_cfg_t - codec C.aom_codec_ctx_t - raw *C.aom_image_t - frames uint32 - resourcePinner runtime.Pinner + w *obuwriter.Writer + cleaned atomic.Bool + cfg C.aom_codec_enc_cfg_t + codec C.aom_codec_ctx_t + raw *C.aom_image_t + rawBuffer []byte + framesIn, framesOut int + frameStatistics frame.FrameStatistics + resourcePinner runtime.Pinner } -var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) +var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) + " ABI " + strconv.FormatUint(C.AOM_ENCODER_ABI_VERSION, 10) func Version() string { return libaomVersion @@ -44,7 +48,9 @@ const ( ) func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[string]any) (*Encoder, error) { - e := &Encoder{} + e := &Encoder{ + frameStatistics: make(frame.FrameStatistics), + } clonedSettings := maps.Clone(settings) @@ -74,21 +80,56 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str return nil, errors.New("failed to get default codec config") } + var bitsPerSample, decH, decV C.int + switch true { case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4: imageFormat = C.AOM_IMG_FMT_I444 e.cfg.g_profile = 1 + switch properties.ColorSpace.BitDepth { + case C.AOM_BITS_8: + bitsPerSample = 24 + case C.AOM_BITS_10: + bitsPerSample = 30 + case C.AOM_BITS_12: + bitsPerSample = 36 + } + decH = 1 + decV = 1 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2: imageFormat = C.AOM_IMG_FMT_I422 e.cfg.g_profile = 2 + switch properties.ColorSpace.BitDepth { + case C.AOM_BITS_8: + bitsPerSample = 16 + case C.AOM_BITS_10: + bitsPerSample = 20 + case C.AOM_BITS_12: + bitsPerSample = 24 + } + decH = 2 + decV = 1 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0: imageFormat = C.AOM_IMG_FMT_I420 e.cfg.g_profile = 0 + switch properties.ColorSpace.BitDepth { + case C.AOM_BITS_8: + bitsPerSample = 12 + case C.AOM_BITS_10: + bitsPerSample = 15 + case C.AOM_BITS_12: + bitsPerSample = 18 + } + decH = 2 + decV = 2 case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0: //mono is defined as 4:2:0, but monochrome is set on config imageFormat = C.AOM_IMG_FMT_I420 e.cfg.g_profile = 0 e.cfg.monochrome = 1 + bitsPerSample = C.int(properties.ColorSpace.BitDepth) + decH = 0 + decV = 2 default: return nil, errors.New("unsupported input chroma subsampling") @@ -97,31 +138,95 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str e.cfg.g_input_bit_depth = C.uint(properties.ColorSpace.BitDepth) e.cfg.g_bit_depth = C.aom_bit_depth_t(properties.ColorSpace.BitDepth) - if e.cfg.g_bit_depth >= 12 { //only bitdepths up to 12 are supported, see aom_bit_depth_t - e.cfg.g_bit_depth = 12 + if e.cfg.g_bit_depth >= C.AOM_BITS_12 { //only bitdepths up to 12 are supported, see aom_bit_depth_t + e.cfg.g_bit_depth = C.AOM_BITS_12 e.cfg.g_profile = 2 } - if e.cfg.g_input_bit_depth > 8 { + if e.cfg.g_input_bit_depth > C.AOM_BITS_8 { imageFormat |= C.AOM_IMG_FMT_HIGHBITDEPTH } - if e.cfg.g_bit_depth > 8 { + if e.cfg.g_bit_depth > C.AOM_BITS_8 { flags |= C.AOM_CODEC_USE_HIGHBITDEPTH } - e.raw = &C.aom_image_t{} - e.resourcePinner.Pin(e.raw) - if C.aom_img_alloc(e.raw, imageFormat, C.uint(properties.Width), C.uint(properties.Height), 1) == nil { - return nil, errors.New("failed to allocate image") + frameSize, err := properties.ColorSpace.FrameSize(properties.Width, properties.Height) + if err != nil { + return nil, err } + bytesPerSample := 1 + if properties.ColorSpace.BitDepth > C.AOM_BITS_8 { + bytesPerSample = 2 + } + + e.raw = &C.aom_image_t{} + + e.raw.bit_depth = C.uint(properties.ColorSpace.BitDepth) + //todo: color primaries + e.raw.cp = C.AOM_CICP_CP_UNSPECIFIED + //todo: transfer characteristics + e.raw.tc = C.AOM_CICP_TC_UNSPECIFIED + //todo: matrix coefficients + e.raw.mc = C.AOM_CICP_MC_UNSPECIFIED + e.raw.monochrome = C.int(e.cfg.monochrome) + e.raw.fmt = imageFormat + + switch properties.ColorSpace.ChromaSamplePosition { + case color.ChromaSamplePositionLeft: + e.raw.csp = C.AOM_CSP_VERTICAL + case color.ChromaSamplePositionCenter: + e.raw.csp = C.AOM_CSP_UNKNOWN + case color.ChromaSamplePositionTopLeft: + e.raw.csp = C.AOM_CSP_COLOCATED + default: + e.raw.csp = C.AOM_CSP_UNKNOWN + } + + if properties.FullColorRange { + e.raw._range = C.AOM_CR_FULL_RANGE + } else { + e.raw._range = C.AOM_CR_STUDIO_RANGE + } + + e.cfg.g_w = C.uint(properties.Width) + e.cfg.g_h = C.uint(properties.Height) + + e.raw.w = e.cfg.g_w + e.raw.d_w = e.cfg.g_w + e.raw.h = e.cfg.g_h + e.raw.d_h = e.cfg.g_h + e.raw.bps = bitsPerSample + e.raw.x_chroma_shift = C.uint(decH >> 1) + e.raw.y_chroma_shift = C.uint(decV >> 1) + + c_w := (C.int(properties.Width) + decH - 1) / decH + c_w *= C.int(bytesPerSample) + + e.rawBuffer = make([]byte, frameSize) + e.resourcePinner.Pin(e.raw) + e.resourcePinner.Pin(unsafe.SliceData(e.rawBuffer)) + + iY := properties.ColorSpace.ChromaSampling.PlaneLumaSamples(properties.Width, properties.Height) + iCb := properties.ColorSpace.ChromaSampling.PlaneCbSamples(properties.Width, properties.Height) + iCr := properties.ColorSpace.ChromaSampling.PlaneCrSamples(properties.Width, properties.Height) + + iY *= bytesPerSample + iCb *= bytesPerSample + iCr *= bytesPerSample + + e.raw.stride[C.AOM_PLANE_Y] = C.int(properties.Width * bytesPerSample) + e.raw.stride[C.AOM_PLANE_U] = c_w + e.raw.stride[C.AOM_PLANE_V] = e.raw.stride[C.AOM_PLANE_U] + + e.raw.planes[C.AOM_PLANE_Y] = (*C.uchar)(unsafe.Pointer(unsafe.SliceData(e.rawBuffer[:iY]))) + e.raw.planes[C.AOM_PLANE_U] = (*C.uchar)(unsafe.Pointer(unsafe.SliceData(e.rawBuffer[iY : iY+iCb]))) + e.raw.planes[C.AOM_PLANE_V] = (*C.uchar)(unsafe.Pointer(unsafe.SliceData(e.rawBuffer[iY+iCb : iY+iCb+iCr]))) + runtime.SetFinalizer(e, func(encoder *Encoder) { encoder.Close() }) - e.cfg.g_w = C.uint(properties.Width) - e.cfg.g_h = C.uint(properties.Height) - /*!\brief Stream timebase units * * Indicates the smallest interval of time, in seconds, used by the stream. @@ -199,6 +304,8 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str {&e.cfg.kf_max_dist, "kf-max-dist"}, {&e.cfg.sframe_dist, "sframe-dist"}, {&e.cfg.sframe_mode, "sframe-mode"}, + + {&e.cfg.use_fixed_qp_offsets, "use-fixed-qp-offsets"}, } { //todo: unset setting from map *s.p = C.uint(getSettingUnsigned(clonedSettings, s.n, uint(*s.p))) @@ -224,19 +331,51 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str //TODO: find all settings not set on AV1 encoder and place them on e.cfg if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 { - return nil, fmt.Errorf("failed to initialize encoder: %s", C.GoString(e.codec.err_detail)) + return nil, fmt.Errorf("failed to initialize encoder: err %d %s", aomErr, C.GoString(e.codec.err_detail)) } if properties.FullColorRange { - if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 1); aomErr != 0 { + if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, C.AOM_CR_FULL_RANGE); aomErr != 0 { return nil, fmt.Errorf("failed to set color range") } } else { - if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, 0); aomErr != 0 { + if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, C.AOM_CR_STUDIO_RANGE); aomErr != 0 { return nil, fmt.Errorf("failed to set color range") } } + //??? aomenc does this + if properties.ColorSpace.BitDepth == 12 { + if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_CHROMA_SUBSAMPLING_X, C.uint(decH>>1)); aomErr != 0 { + return nil, fmt.Errorf("failed to set chroma subsampling X") + } + if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_CHROMA_SUBSAMPLING_Y, C.uint(decV>>1)); aomErr != 0 { + return nil, fmt.Errorf("failed to set chroma subsampling Y") + } + } + + //TODO: fill all + controlSettings := map[string]C.int{ + "cpu-used": C.AOME_SET_CPUUSED, + "auto-alt-ref": C.AOME_SET_ENABLEAUTOALTREF, + "sharpness": C.AOME_SET_SHARPNESS, + "row-mt": C.AV1E_SET_ROW_MT, + //"fp-mt": C.AV1E_SET_FP_MT, + "tile-columns": C.AV1E_SET_TILE_COLUMNS, + "tile-rows": C.AV1E_SET_TILE_ROWS, + "cq-level": C.AOME_SET_CQ_LEVEL, + "max-reference-frames": C.AV1E_SET_MAX_REFERENCE_FRAMES, + } + + for key, controlKey := range controlSettings { + if _, ok := clonedSettings[key]; ok { + val := getSettingUnsigned[uint](clonedSettings, key, 0) + if aomErr = C.aom_codec_control_uint(&e.codec, controlKey, C.uint(val)); aomErr != 0 { + return nil, fmt.Errorf("error setting parameter %s: %s", key, C.GoString(C.aom_codec_error_detail(&e.codec))) + } + } + } + for k, v := range clonedSettings { if err := func() error { var strVal *C.char @@ -281,7 +420,6 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str } } - var err error if e.w, err = obuwriter.NewWriter(w, properties.Width, properties.Height, 0x31305641, timeBase); err != nil { return nil, err } @@ -299,40 +437,27 @@ func (e *Encoder) EncodeStream(stream *frame.Stream) error { } func (e *Encoder) Encode(f frame.Frame) error { + /*luma := f.GetLuma() + cb := f.GetCb() + cr := f.GetCr() + copy(unsafe.Slice((*byte)(e.raw.planes[C.AOM_PLANE_Y]), len(luma)), luma) + copy(unsafe.Slice((*byte)(e.raw.planes[C.AOM_PLANE_U]), len(cb)), cb) + copy(unsafe.Slice((*byte)(e.raw.planes[C.AOM_PLANE_V]), len(cr)), cr) + */ + copy(e.rawBuffer, f.GetJoint()) - switch typedFrame := f.(type) { - case frame.TypedFrame[uint8]: - luma := typedFrame.GetNativeLuma() - cb := typedFrame.GetNativeCb() - cr := typedFrame.GetNativeCr() - copy(unsafe.Slice((*byte)(e.raw.planes[0]), len(luma)), luma) - copy(unsafe.Slice((*byte)(e.raw.planes[1]), len(cb)), cb) - copy(unsafe.Slice((*byte)(e.raw.planes[2]), len(cr)), cr) - case frame.TypedFrame[uint16]: - luma := typedFrame.GetNativeLuma() - cb := typedFrame.GetNativeCb() - cr := typedFrame.GetNativeCr() - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.raw.planes[0])), len(luma)), luma) - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.raw.planes[1])), len(cb)), cb) - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.raw.planes[2])), len(cr)), cr) - default: - return errors.New("unknown frame type") - } - - if _, err := e.encodeFrame(f.PTS(), e.raw); err != nil { + if _, err := e.encodeFrame(f.PTS(), f.NextPTS(), e.raw); err != nil { return err } - - e.frames++ + runtime.KeepAlive(f) return nil } -func (e *Encoder) encodeFrame(pts int64, raw *C.aom_image_t) (pkts int, err error) { - //TODO: make this a Source channel +func (e *Encoder) encodeFrame(pts, nextPts int64, raw *C.aom_image_t) (pkts int, err error) { var aomErr C.aom_codec_err_t - if aomErr = C.aom_codec_encode(&e.codec, raw, C.long(pts), 1, 0); aomErr != C.AOM_CODEC_OK { + if aomErr = C.aom_codec_encode(&e.codec, raw, C.long(pts), C.ulong(nextPts-pts), 0); aomErr != C.AOM_CODEC_OK { if aomErr == C.AOM_CODEC_INCAPABLE { return 0, errors.New("error encoding frame: AOM_CODEC_INCAPABLE") } else if aomErr == C.AOM_CODEC_INVALID_PARAM { @@ -344,6 +469,17 @@ func (e *Encoder) encodeFrame(pts int64, raw *C.aom_image_t) (pkts int, err erro } } + if raw != nil { + e.framesIn++ + } + + var quant64 C.int + if e.framesIn >= int(e.cfg.g_lag_in_frames) { + if aomErr = C.aom_codec_control_intptr(&e.codec, C.AOME_GET_LAST_QUANTIZER_64, &quant64); aomErr != C.AOM_CODEC_OK { + return 0, errors.New("error getting LAST_QUANTIZER_64") + } + } + var iter C.aom_codec_iter_t for { @@ -354,21 +490,63 @@ func (e *Encoder) encodeFrame(pts int64, raw *C.aom_image_t) (pkts int, err erro pkts++ if pkt.kind == C.AOM_CODEC_CX_FRAME_PKT { - if err = e.w.WriteFrameBytes(uint64(C.aom_get_pkt_pts(pkt)), unsafe.Slice((*byte)(C.aom_get_pkt_buf(pkt)), int(C.aom_get_pkt_sz(pkt)))); err != nil { + partitionId := int(C.aom_get_pkt_partition_id(pkt)) + if partitionId > 0 { + return 0, errors.New("partition id not supported") + } + packetPts := uint64(C.aom_get_pkt_pts(pkt)) + buf := unsafe.Slice((*byte)(C.aom_get_pkt_buf(pkt)), int(C.aom_get_pkt_sz(pkt))) + flags := C.aom_get_pkt_flags(pkt) + + e.frameStatistics[frame.FrameStatisticsKeyNumber] = e.framesOut + e.frameStatistics[frame.FrameStatisticsKeyPts] = int64(packetPts) + e.frameStatistics[frame.FrameStatisticsKeyQuantizer] = int(quant64) + e.frameStatistics[frame.FrameStatisticsKeySize] = len(buf) + e.frameStatistics[frame.FrameStatisticsKeyIsKeyFrame] = false + e.frameStatistics[frame.FrameStatisticsKeyIsIntraFrame] = false + + if flags&C.AOM_FRAME_IS_KEY > 0 { + e.frameStatistics[frame.FrameStatisticsKeyIsKeyFrame] = true + } + if flags&C.AOM_FRAME_IS_INTRAONLY > 0 { + e.frameStatistics[frame.FrameStatisticsKeyIsIntraFrame] = true + } + if err = e.w.WriteFrameBytes(packetPts, buf); err != nil { return pkts, err } + + e.framesOut++ + runtime.KeepAlive(buf) + } else { + + log.Printf("kind %d", pkt.kind) } } return pkts, nil } +func (e *Encoder) Statistics() frame.Statistics { + if len(e.frameStatistics) > 0 { + return frame.Statistics{ + frame.StatisticsKeyFramesIn: e.framesIn, + frame.StatisticsKeyFramesOut: e.framesOut, + frame.StatisticsKeyLastFrameOut: maps.Clone(e.frameStatistics), + } + } else { + return frame.Statistics{ + frame.StatisticsKeyFramesIn: e.framesIn, + frame.StatisticsKeyFramesOut: e.framesOut, + } + } +} + func (e *Encoder) Flush() error { var pkts int var err error for { - if pkts, err = e.encodeFrame(-1, nil); err != nil { + if pkts, err = e.encodeFrame(-1, 0, nil); err != nil { return err } if pkts == 0 { @@ -376,19 +554,21 @@ func (e *Encoder) Flush() error { } } - _ = e.w.WriteLength(e.frames) + _ = e.w.WriteLength(uint32(e.framesIn)) return nil } func (e *Encoder) Close() { if e.cleaned.Swap(true) == false { + e.resourcePinner.Unpin() if e.raw != nil { - C.aom_img_free(e.raw) e.raw = nil } + if e.rawBuffer != nil { + e.rawBuffer = nil + } C.aom_codec_destroy(&e.codec) - e.resourcePinner.Unpin() } } diff --git a/encoder/libaom/libaom.h b/encoder/libaom/libaom.h index dbea99d..deb95ba 100644 --- a/encoder/libaom/libaom.h +++ b/encoder/libaom/libaom.h @@ -9,7 +9,11 @@ aom_codec_err_t aom_codec_control_int(aom_codec_ctx_t *ctx, int ctrl_id, int v); aom_codec_err_t aom_codec_control_uint(aom_codec_ctx_t *ctx, int ctrl_id, unsigned int v); +aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* v); + void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt); size_t aom_get_pkt_sz(aom_codec_cx_pkt_t *pkt); aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt); -aom_codec_frame_flags_t aom_get_pkt_flags(aom_codec_cx_pkt_t *pkt); \ No newline at end of file +int aom_get_pkt_partition_id(aom_codec_cx_pkt_t *pkt); +aom_codec_frame_flags_t aom_get_pkt_flags(aom_codec_cx_pkt_t *pkt); +const struct aom_codec_enc_cfg* aom_get_ctx_enc_cfg(aom_codec_ctx_t *ctx); \ No newline at end of file diff --git a/encoder/libx264/libx264.go b/encoder/libx264/libx264.go index 56c6fb0..6481a78 100644 --- a/encoder/libx264/libx264.go +++ b/encoder/libx264/libx264.go @@ -223,24 +223,12 @@ func (e *Encoder) EncodeStream(stream *frame.Stream) error { } func (e *Encoder) Encode(f frame.Frame) error { - switch typedFrame := f.(type) { - case frame.TypedFrame[uint8]: - luma := typedFrame.GetNativeLuma() - cb := typedFrame.GetNativeCb() - cr := typedFrame.GetNativeCr() - copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[0]), len(luma)), luma) - copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[1]), len(cb)), cb) - copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[2]), len(cr)), cr) - case frame.TypedFrame[uint16]: - luma := typedFrame.GetNativeLuma() - cb := typedFrame.GetNativeCb() - cr := typedFrame.GetNativeCr() - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.pictureIn.img.plane[0])), len(luma)), luma) - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.pictureIn.img.plane[1])), len(cb)), cb) - copy(unsafe.Slice((*uint16)(unsafe.Pointer(e.pictureIn.img.plane[2])), len(cr)), cr) - default: - return errors.New("unknown frame type") - } + luma := f.GetLuma() + cb := f.GetCb() + cr := f.GetCr() + copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[0]), len(luma)), luma) + copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[1]), len(cb)), cb) + copy(unsafe.Slice((*byte)(e.pictureIn.img.plane[2]), len(cr)), cr) e.pictureIn.i_pts = C.int64_t(f.PTS()) diff --git a/frame/frame.go b/frame/frame.go index fe5ba63..ccac750 100644 --- a/frame/frame.go +++ b/frame/frame.go @@ -13,6 +13,8 @@ type Frame interface { Properties() Properties // PTS usually frame number, but can differ on VFR PTS() int64 + // NextPTS Next PTS + NextPTS() int64 // Get16 get a pixel sample in 16-bit depth Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) diff --git a/frame/frame_uint16.go b/frame/frame_uint16.go index afc7254..2fac536 100644 --- a/frame/frame_uint16.go +++ b/frame/frame_uint16.go @@ -1,69 +1,44 @@ package frame import ( - "errors" - "runtime" "unsafe" ) -type FrameUint16 struct { +type fUint16 struct { properties Properties ret func(f Frame) Pts int64 + NextPts int64 Y []uint16 Cb []uint16 Cr []uint16 } -// NewUint16FrameFromBytes -// Deprecated -func NewUint16FrameFromBytes(properties Properties, pts int64, data []byte) (*FrameUint16, error) { - if frameLength, _ := properties.ColorSpace.FrameSize(properties.Width, properties.Height); frameLength != len(data) { - return nil, errors.New("wrong length of data") - } - - if properties.ColorSpace.BitDepth >= 16 { - return nil, errors.New("wrong bit depth") - } - - buf := make([]uint16, len(data)/2) - copy(buf, unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(data))), len(data)/2)) - runtime.KeepAlive(data) - - iY := properties.ColorSpace.ChromaSampling.PlaneLumaSamples(properties.Width, properties.Height) - iCb := properties.ColorSpace.ChromaSampling.PlaneCbSamples(properties.Width, properties.Height) - iCr := properties.ColorSpace.ChromaSampling.PlaneCrSamples(properties.Width, properties.Height) - - return &FrameUint16{ - properties: properties, - Y: buf[:iY], - Cb: buf[iY : iY+iCb], - Cr: buf[iY+iCb : iY+iCb+iCr], - Pts: pts, - }, nil -} - -func (i *FrameUint16) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { +func (i *fUint16) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { cy, cb, cr := i.GetNative(x, y) return cy << (16 - i.properties.ColorSpace.BitDepth), cb << (16 - i.properties.ColorSpace.BitDepth), cr << (16 - i.properties.ColorSpace.BitDepth) } -func (i *FrameUint16) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { +func (i *fUint16) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { cy, cb, cr := i.GetNative(x, y) return uint8(cy >> (i.properties.ColorSpace.BitDepth - 8)), uint8(cb >> (i.properties.ColorSpace.BitDepth - 8)), uint8(cr >> (i.properties.ColorSpace.BitDepth - 8)) } -func (i *FrameUint16) Properties() Properties { +func (i *fUint16) Properties() Properties { return i.properties } -func (i *FrameUint16) PTS() int64 { +func (i *fUint16) PTS() int64 { return i.Pts } -func (i *FrameUint16) GetNative(x, y int) (Y uint16, Cb uint16, Cr uint16) { +func (i *fUint16) NextPTS() int64 { + return i.NextPts +} + +func (i *fUint16) GetNative(x, y int) (Y uint16, Cb uint16, Cr uint16) { Yindex := x + y*i.properties.Width Cwidth := (i.properties.Width * int(i.properties.ColorSpace.ChromaSampling.A)) / int(i.properties.ColorSpace.ChromaSampling.J) @@ -77,56 +52,56 @@ func (i *FrameUint16) GetNative(x, y int) (Y uint16, Cb uint16, Cr uint16) { return } -func (i *FrameUint16) FillNativeLuma(buf []uint16) { +func (i *fUint16) FillNativeLuma(buf []uint16) { copy(i.Y, buf) } -func (i *FrameUint16) FillNativeCb(buf []uint16) { +func (i *fUint16) FillNativeCb(buf []uint16) { copy(i.Cb, buf) } -func (i *FrameUint16) FillNativeCr(buf []uint16) { +func (i *fUint16) FillNativeCr(buf []uint16) { copy(i.Cr, buf) } -func (i *FrameUint16) GetNativeJoint() []uint16 { +func (i *fUint16) GetNativeJoint() []uint16 { // Component slices are allocated as a single buffer return i.Y[:len(i.Y)+len(i.Cb)+len(i.Cr)] } -func (i *FrameUint16) GetJoint() []byte { +func (i *fUint16) GetJoint() []byte { buf := i.GetNativeJoint() return unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), len(buf)*2) } -func (i *FrameUint16) GetLuma() []byte { +func (i *fUint16) GetLuma() []byte { buf := i.GetNativeLuma() return unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), len(buf)*2) } -func (i *FrameUint16) GetCb() []byte { +func (i *fUint16) GetCb() []byte { buf := i.GetNativeCb() return unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), len(buf)*2) } -func (i *FrameUint16) GetCr() []byte { +func (i *fUint16) GetCr() []byte { buf := i.GetNativeCr() return unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), len(buf)*2) } -func (i *FrameUint16) GetNativeLuma() []uint16 { +func (i *fUint16) GetNativeLuma() []uint16 { return i.Y } -func (i *FrameUint16) GetNativeCb() []uint16 { +func (i *fUint16) GetNativeCb() []uint16 { return i.Cb } -func (i *FrameUint16) GetNativeCr() []uint16 { +func (i *fUint16) GetNativeCr() []uint16 { return i.Cr } -func (i *FrameUint16) Return() { +func (i *fUint16) Return() { if i.ret != nil { i.ret(i) } diff --git a/frame/frame_uint8.go b/frame/frame_uint8.go index ae6bbe3..6a54da0 100644 --- a/frame/frame_uint8.go +++ b/frame/frame_uint8.go @@ -1,61 +1,38 @@ package frame -import ( - "errors" -) - -type FrameUint8 struct { +type fUint8 struct { properties Properties ret func(f Frame) Pts int64 + NextPts int64 Y []uint8 Cb []uint8 Cr []uint8 } -// NewUint8FrameFromBytes -// Deprecated -func NewUint8FrameFromBytes(properties Properties, pts int64, data []byte) (*FrameUint8, error) { - if frameLength, _ := properties.ColorSpace.FrameSize(properties.Width, properties.Height); frameLength != len(data) { - return nil, errors.New("wrong length of data") - } - - if properties.ColorSpace.BitDepth > 8 { - return nil, errors.New("wrong bit depth") - } - - iY := properties.ColorSpace.ChromaSampling.PlaneLumaSamples(properties.Width, properties.Height) - iCb := properties.ColorSpace.ChromaSampling.PlaneCbSamples(properties.Width, properties.Height) - iCr := properties.ColorSpace.ChromaSampling.PlaneCrSamples(properties.Width, properties.Height) - - return &FrameUint8{ - properties: properties, - Y: data[:iY], - Cb: data[iY : iY+iCb], - Cr: data[iY+iCb : iY+iCb+iCr], - Pts: pts, - }, nil -} - -func (i *FrameUint8) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { +func (i *fUint8) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { cy, cb, cr := i.GetNative(x, y) return uint16(cy) << (16 - i.properties.ColorSpace.BitDepth), uint16(cb) << (16 - i.properties.ColorSpace.BitDepth), uint16(cr) << (16 - i.properties.ColorSpace.BitDepth) } -func (i *FrameUint8) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { +func (i *fUint8) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { return i.GetNative(x, y) } -func (i *FrameUint8) Properties() Properties { +func (i *fUint8) Properties() Properties { return i.properties } -func (i *FrameUint8) PTS() int64 { +func (i *fUint8) PTS() int64 { return i.Pts } -func (i *FrameUint8) GetNative(x, y int) (Y uint8, Cb uint8, Cr uint8) { +func (i *fUint8) NextPTS() int64 { + return i.NextPts +} + +func (i *fUint8) GetNative(x, y int) (Y uint8, Cb uint8, Cr uint8) { Yindex := x + y*i.properties.Width Cwidth := (i.properties.Width * int(i.properties.ColorSpace.ChromaSampling.A)) / int(i.properties.ColorSpace.ChromaSampling.J) @@ -69,52 +46,52 @@ func (i *FrameUint8) GetNative(x, y int) (Y uint8, Cb uint8, Cr uint8) { return } -func (i *FrameUint8) FillNativeLuma(buf []uint8) { +func (i *fUint8) FillNativeLuma(buf []uint8) { copy(i.Y, buf) } -func (i *FrameUint8) FillNativeCb(buf []uint8) { +func (i *fUint8) FillNativeCb(buf []uint8) { copy(i.Cb, buf) } -func (i *FrameUint8) FillNativeCr(buf []uint8) { +func (i *fUint8) FillNativeCr(buf []uint8) { copy(i.Cr, buf) } -func (i *FrameUint8) GetNativeLuma() []uint8 { +func (i *fUint8) GetNativeLuma() []uint8 { return i.Y } -func (i *FrameUint8) GetNativeCb() []uint8 { +func (i *fUint8) GetNativeCb() []uint8 { return i.Cb } -func (i *FrameUint8) GetNativeCr() []uint8 { +func (i *fUint8) GetNativeCr() []uint8 { return i.Cr } -func (i *FrameUint8) GetNativeJoint() []uint8 { +func (i *fUint8) GetNativeJoint() []uint8 { // Component slices are allocated as a single buffer return i.Y[:len(i.Y)+len(i.Cb)+len(i.Cr)] } -func (i *FrameUint8) GetJoint() []byte { +func (i *fUint8) GetJoint() []byte { return i.GetNativeJoint() } -func (i *FrameUint8) GetLuma() []byte { +func (i *fUint8) GetLuma() []byte { return i.GetNativeLuma() } -func (i *FrameUint8) GetCb() []byte { +func (i *fUint8) GetCb() []byte { return i.GetNativeCb() } -func (i *FrameUint8) GetCr() []byte { +func (i *fUint8) GetCr() []byte { return i.GetNativeCr() } -func (i *FrameUint8) Return() { +func (i *fUint8) Return() { if i.ret != nil { i.ret(i) } diff --git a/frame/pool.go b/frame/pool.go index 3ac62bf..f5a1197 100644 --- a/frame/pool.go +++ b/frame/pool.go @@ -35,7 +35,7 @@ func NewPool(properties Properties) (*Pool, error) { if properties.ColorSpace.BitDepth > 8 { p.p.New = func() any { buf := make([]uint16, p.frameSize/2) - return &FrameUint16{ + return &fUint16{ ret: p.Put, properties: properties, Y: buf[:iY], @@ -46,7 +46,7 @@ func NewPool(properties Properties) (*Pool, error) { } else { p.p.New = func() any { buf := make([]uint8, p.frameSize) - return &FrameUint8{ + return &fUint8{ ret: p.Put, properties: properties, Y: buf[:iY], @@ -63,13 +63,15 @@ func (p *Pool) Properties() Properties { return p.properties } -func (p *Pool) Get(pts int64) Frame { +func (p *Pool) Get(pts, nextPts int64) Frame { switch tf := p.p.Get().(type) { - case *FrameUint16: + case *fUint16: tf.Pts = pts + tf.NextPts = nextPts return tf - case *FrameUint8: + case *fUint8: tf.Pts = pts + tf.NextPts = nextPts return tf default: panic("unsupported type") @@ -78,7 +80,7 @@ func (p *Pool) Get(pts int64) Frame { func (p *Pool) Put(f Frame) { switch tf := f.(type) { - case *FrameUint16: + case *fUint16: if tf.properties != p.properties { panic("unsupported properties") } @@ -86,7 +88,7 @@ func (p *Pool) Put(f Frame) { panic("unsupported data size") } p.p.Put(tf) - case *FrameUint8: + case *fUint8: if tf.properties != p.properties { panic("unsupported properties") } diff --git a/frame/statistics.go b/frame/statistics.go new file mode 100644 index 0000000..9cbee9f --- /dev/null +++ b/frame/statistics.go @@ -0,0 +1,85 @@ +package frame + +import "math" + +type Statistics map[string]any + +const ( + StatisticsKeyFramesIn = "frames_in" + StatisticsKeyFramesOut = "frames_out" + StatisticsKeyLastFrameOut = "last_frame_out" +) + +func (s Statistics) FramesIn() int { + if n, ok := s[StatisticsKeyFramesIn].(int); ok { + return n + } + return -1 +} + +func (s Statistics) FramesOut() int { + if n, ok := s[StatisticsKeyFramesOut].(int); ok { + return n + } + return -1 +} + +func (s Statistics) LastFrameOut() *FrameStatistics { + if n, ok := s[StatisticsKeyLastFrameOut].(FrameStatistics); ok { + return &n + } + return nil +} + +type FrameStatistics map[string]any + +const ( + FrameStatisticsKeyNumber = "number" + FrameStatisticsKeyPts = "pts" + FrameStatisticsKeyQuantizer = "quantizer" + FrameStatisticsKeySize = "size" + FrameStatisticsKeyIsKeyFrame = "is_key" + FrameStatisticsKeyIsIntraFrame = "is_intra" +) + +func (s FrameStatistics) Number() int { + if n, ok := s[FrameStatisticsKeyNumber].(int); ok { + return n + } + return -1 +} + +func (s FrameStatistics) Size() int { + if n, ok := s[FrameStatisticsKeySize].(int); ok { + return n + } + return -1 +} + +func (s FrameStatistics) PTS() int64 { + if n, ok := s[FrameStatisticsKeyPts].(int64); ok { + return n + } + return -1 +} + +func (s FrameStatistics) Quantizer() float64 { + if n, ok := s[FrameStatisticsKeyQuantizer].(float64); ok { + return n + } + return math.NaN() +} + +func (s FrameStatistics) IsKeyFrame() bool { + if n, ok := s[FrameStatisticsKeyIsKeyFrame].(bool); ok { + return n + } + return false +} + +func (s FrameStatistics) IsIntraFrame() bool { + if n, ok := s[FrameStatisticsKeyIsIntraFrame].(bool); ok { + return n + } + return false +} diff --git a/frame/stream.go b/frame/stream.go index d7856d6..8a8f3df 100644 --- a/frame/stream.go +++ b/frame/stream.go @@ -226,17 +226,19 @@ func (s *Stream) Monochrome() *Stream { switch typedFrame := f.(type) { case TypedFrame[uint8]: - channel <- &FrameUint8{ + channel <- &fUint8{ properties: frameProps, Pts: typedFrame.PTS(), + NextPts: typedFrame.NextPTS(), Y: typedFrame.GetNativeLuma(), Cb: nil, Cr: nil, } case TypedFrame[uint16]: - channel <- &FrameUint16{ + channel <- &fUint16{ properties: frameProps, Pts: typedFrame.PTS(), + NextPts: typedFrame.NextPTS(), Y: typedFrame.GetNativeLuma(), Cb: nil, Cr: nil, diff --git a/utilities/libvmaf/libvmaf.go b/utilities/libvmaf/libvmaf.go index 4cf6520..1e1cc7b 100644 --- a/utilities/libvmaf/libvmaf.go +++ b/utilities/libvmaf/libvmaf.go @@ -121,26 +121,12 @@ func (v *VMAF) frameToPicture(f frame.Frame) *C.VmafPicture { if p := v.allocatePicture(f.Properties()); p == nil { return nil } else { - //TODO: check validity of lengths - if f16, ok := f.(frame.TypedFrame[uint16]); ok { - yPlane := unsafe.Slice((*uint16)(p.data[0]), len(f16.GetNativeLuma())) - uPlane := unsafe.Slice((*uint16)(p.data[1]), len(f16.GetNativeCb())) - vPlane := unsafe.Slice((*uint16)(p.data[2]), len(f16.GetNativeCr())) - copy(yPlane, f16.GetNativeLuma()) - copy(uPlane, f16.GetNativeCb()) - copy(vPlane, f16.GetNativeCr()) - } else if f8, ok := f.(frame.TypedFrame[uint8]); ok { - yPlane := unsafe.Slice((*uint8)(p.data[0]), len(f8.GetNativeLuma())) - uPlane := unsafe.Slice((*uint8)(p.data[1]), len(f8.GetNativeCb())) - vPlane := unsafe.Slice((*uint8)(p.data[2]), len(f8.GetNativeCr())) - copy(yPlane, f8.GetNativeLuma()) - copy(uPlane, f8.GetNativeCb()) - copy(vPlane, f8.GetNativeCr()) - } else { - // not supported frame - v.deallocatePicture(p) - return nil - } + luma := f.GetLuma() + cb := f.GetCb() + cr := f.GetCr() + copy(unsafe.Slice((*byte)(p.data[0]), len(luma)), luma) + copy(unsafe.Slice((*byte)(p.data[1]), len(cb)), cb) + copy(unsafe.Slice((*byte)(p.data[2]), len(cr)), cr) return p }