diff --git a/README.md b/README.md index f7fd5a3..6869b3f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Supported * y4m pipes * 4:4:4, 4:2:0, and probably 4:2:2 and 4:0:0. -* 8, 10, 12 bit depth. Probably 14 and 16 as well, and lower than 8 (but why). +* 8, 10, 12 bit depth. Probably 14 and 16 as well. * TODO: make list per encoder and decoder. # TODO diff --git a/color/chroma.go b/color/chroma.go new file mode 100644 index 0000000..1096793 --- /dev/null +++ b/color/chroma.go @@ -0,0 +1,50 @@ +package color + +const ( + ChromaSamplePositionUnspecified ChromaSamplePosition = iota + ChromaSamplePositionLeft + ChromaSamplePositionCenter + ChromaSamplePositionTopLeft + ChromaSamplePositionTop + ChromaSamplePositionBottomLeft + ChromaSamplePositionBottom + ChromaSamplePositionNB +) + +type ChromaSamplePosition byte + +// ChromaSampling The subsampling scheme is commonly expressed as a three-part ratio J : A : B (e.g. 4:2:2), that describe the number of luminance and chrominance samples in a conceptual region that is J pixels wide and 2 pixels high. +type ChromaSampling struct { + // J horizontal sampling reference (width of the conceptual region). Usually, 4. + J byte + // A number of chrominance samples (Cb, Cr) in the first row of J pixels. + A byte + // B number of changes of chrominance samples (Cb, Cr) between first and second row of J pixels. Note that B has to be either zero or equal to A + B byte +} + +// ElementPixels returns the number of pixels for a full element +func (s ChromaSampling) ElementPixels() int { + return int(s.J) * 2 +} + +// ElementSamples returns the number of actual samples (total Y, Cb, Cr) for an encoded element +func (s ChromaSampling) ElementSamples() int { + return int(s.J)*2 + s.ElementChromaSamples() +} + +func (s ChromaSampling) ElementChromaSamples() int { + return int(s.A)*2 + int(s.B)*2 +} + +func (s ChromaSampling) PlaneLumaSamples(width, height int) int { + return width * height +} + +func (s ChromaSampling) PlaneCrSamples(width, height int) int { + return int((int64(width) * int64(height) * (int64(s.A) + int64(s.B))) / int64(s.ElementPixels())) +} + +func (s ChromaSampling) PlaneCbSamples(width, height int) int { + return int((int64(width) * int64(height) * (int64(s.A) + int64(s.B))) / int64(s.ElementPixels())) +} diff --git a/color/colorspace.go b/color/colorspace.go deleted file mode 100644 index fe58fa9..0000000 --- a/color/colorspace.go +++ /dev/null @@ -1,195 +0,0 @@ -package color - -import ( - "encoding/binary" - "errors" - "fmt" - "strconv" - "strings" -) - -// SubsamplingScheme The subsampling scheme is commonly expressed as a three-part ratio J : A : B (e.g. 4:2:2), that describe the number of luminance and chrominance samples in a conceptual region that is J pixels wide and 2 pixels high. -type SubsamplingScheme struct { - // J horizontal sampling reference (width of the conceptual region). Usually, 4. - J byte - // A number of chrominance samples (Cb, Cr) in the first row of J pixels. - A byte - // B number of changes of chrominance samples (Cb, Cr) between first and second row of J pixels. Note that B has to be either zero or equal to A - B byte -} - -// TODO: add color primaries -type ColorFormat struct { - Subsampling SubsamplingScheme - BitDepth byte -} - -func (c ColorFormat) ToInteger() uint32 { - return binary.BigEndian.Uint32([]byte{c.Subsampling.J, c.Subsampling.A, c.Subsampling.B, c.BitDepth}) -} - -// ElementPixels returns the number of pixels for a full element -func (c ColorFormat) ElementPixels() int { - return int(c.Subsampling.J) * 2 -} - -// ElementSamples returns the number of actual samples (total Y, Cb, Cr) for an encoded element -func (c ColorFormat) ElementSamples() int { - return int(c.Subsampling.J)*2 + c.ElementChromaSamples() -} - -func (c ColorFormat) ElementChromaSamples() int { - return int(c.Subsampling.A)*2 + int(c.Subsampling.B)*2 -} - -func (c ColorFormat) PlaneLumaSamples(width, height int) int { - return width * height -} - -func (c ColorFormat) PlaneCrSamples(width, height int) int { - return int((int64(width) * int64(height) * (int64(c.Subsampling.A) + int64(c.Subsampling.B))) / int64(c.ElementPixels())) -} - -func (c ColorFormat) PlaneCbSamples(width, height int) int { - return int((int64(width) * int64(height) * (int64(c.Subsampling.A) + int64(c.Subsampling.B))) / int64(c.ElementPixels())) -} - -func (c ColorFormat) FrameSizePacked(width, height int) (int, error) { - a1 := int64(width) * int64(height) - a2 := int64(c.ElementPixels()) - if a1%a2 != 0 { - return 0, errors.New("not divisible pixels") - } - a3 := (a1 / a2) * int64(c.ElementSamples()) * int64(c.BitDepth) - if a3%8 != 0 { - return 0, errors.New("not divisible size") - } - - return int(a3 / 8), nil -} - -func (c ColorFormat) FrameSize(width, height int) (int, error) { - actualBitDepth := int64(c.BitDepth) - if actualBitDepth&0b111 != 0 { - actualBitDepth = ((actualBitDepth + 8) >> 3) << 3 - } - - a1 := int64(width) * int64(height) - a2 := int64(c.ElementPixels()) - if a1%a2 != 0 { - return 0, errors.New("not divisible pixels") - } - a3 := (a1 / a2) * int64(c.ElementSamples()) * actualBitDepth - if a3%8 != 0 { - return 0, errors.New("not divisible size") - } - - return int(a3 / 8), nil -} - -func (c ColorFormat) check() error { - if c.Subsampling.J <= 0 { - return fmt.Errorf("unsupported J %d", c.Subsampling.J) - } - if c.Subsampling.A < 0 && c.Subsampling.A > c.Subsampling.J { - return fmt.Errorf("unsupported A %d", c.Subsampling.A) - } - if c.Subsampling.B != 0 && c.Subsampling.B != c.Subsampling.A { - return fmt.Errorf("unsupported B %d", c.Subsampling.B) - } - if c.BitDepth != 8 && c.BitDepth != 10 && c.BitDepth != 12 && c.BitDepth != 14 && c.BitDepth != 16 { - return fmt.Errorf("unsupported BitDepth %d", c.BitDepth) - } - - return nil -} - -func NewColorFormatFromInteger(colorFormat uint32) (ColorFormat, error) { - buf := make([]byte, 4) - binary.BigEndian.PutUint32(buf, colorFormat) - - space := ColorFormat{ - Subsampling: SubsamplingScheme{ - J: buf[0], - A: buf[1], - B: buf[2], - }, - BitDepth: buf[3], - } - - return space, space.check() -} - -func NewColorFormatFromString(colorFormat string) (ColorFormat, error) { - colorFormat = strings.ToLower(colorFormat) - if colorFormat == "420mpeg2" { //todo: chroma location, left - return NewColorFormatFromString("420p8") - } else if colorFormat == "420j" || colorFormat == "420jpeg" { //todo: chroma location, center - return NewColorFormatFromString("420p8") - } else if colorFormat == "mono" { - return NewColorFormatFromString("400p8") - } else if colorFormat == "mono10" { - return NewColorFormatFromString("400p10") - } else if colorFormat == "mono12" { - return NewColorFormatFromString("400p12") - } else if colorFormat == "mono14" { - return NewColorFormatFromString("400p14") - } else if colorFormat == "mono16" { - return NewColorFormatFromString("400p16") - } - - splits := strings.Split(colorFormat, "p") - if len(splits) == 1 { //default 8 bit - splits = append(splits, "8") - } - - space := ColorFormat{} - switch strings.ReplaceAll(splits[0], ":", "") { - case "400": - space.Subsampling.J = 4 - space.Subsampling.A = 0 - space.Subsampling.B = 0 - case "420": - space.Subsampling.J = 4 - space.Subsampling.A = 2 - space.Subsampling.B = 0 - case "422": - space.Subsampling.J = 4 - space.Subsampling.A = 2 - space.Subsampling.B = 2 - case "444": - space.Subsampling.J = 4 - space.Subsampling.A = 4 - space.Subsampling.B = 4 - default: - return space, fmt.Errorf("unsupported Chroma Subsampling %s", splits[0]) - } - - n, err := strconv.Atoi(splits[1]) - if err != nil { - return space, err - } - space.BitDepth = byte(n) - - return space, space.check() -} - -func newColorFormatFromStringInternal(colorFormat string) ColorFormat { - space, _ := NewColorFormatFromString(colorFormat) - return space -} - -var ( - Space420 ColorFormat = newColorFormatFromStringInternal("420") - Space422 ColorFormat = newColorFormatFromStringInternal("422") - Space444 ColorFormat = newColorFormatFromStringInternal("444") - Space420P10 ColorFormat = newColorFormatFromStringInternal("420p10") - Space422P10 ColorFormat = newColorFormatFromStringInternal("422p10") - Space444P10 ColorFormat = newColorFormatFromStringInternal("444p10") - Space420P12 ColorFormat = newColorFormatFromStringInternal("420p12") - Space422P12 ColorFormat = newColorFormatFromStringInternal("422p12") - Space444P12 ColorFormat = newColorFormatFromStringInternal("444p12") - Space420P16 ColorFormat = newColorFormatFromStringInternal("420p16") - Space422P16 ColorFormat = newColorFormatFromStringInternal("422p16") - Space444P16 ColorFormat = newColorFormatFromStringInternal("444p16") -) diff --git a/color/space.go b/color/space.go new file mode 100644 index 0000000..28f10ae --- /dev/null +++ b/color/space.go @@ -0,0 +1,166 @@ +package color + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +type Space struct { + ChromaSampling ChromaSampling + ChromaSamplePosition ChromaSamplePosition + BitDepth byte +} + +func (c Space) String() string { + if c.ChromaSampling.J == 4 && c.ChromaSampling.A == 2 && c.ChromaSampling.B == 0 && c.BitDepth == 8 { + switch c.ChromaSamplePosition { + case ChromaSamplePositionCenter: + return "420jpeg" + case ChromaSamplePositionTopLeft: + return "420paldv" + case ChromaSamplePositionLeft: + return "420mpeg2" + } + } + if c.ChromaSampling.J == 4 && c.ChromaSampling.A == 0 && c.ChromaSampling.B == 0 { + return fmt.Sprintf("mono%d", c.BitDepth) + } + + return fmt.Sprintf("%d%d%dp%d", c.ChromaSampling.J, c.ChromaSampling.A, c.ChromaSampling.B, c.BitDepth) +} + +func (c Space) FrameSizePacked(width, height int) (int, error) { + a1 := int64(width) * int64(height) + a2 := int64(c.ChromaSampling.ElementPixels()) + if a1%a2 != 0 { + return 0, errors.New("not divisible pixels") + } + a3 := (a1 / a2) * int64(c.ChromaSampling.ElementSamples()) * int64(c.BitDepth) + if a3%8 != 0 { + return 0, errors.New("not divisible size") + } + + return int(a3 / 8), nil +} + +func (c Space) FrameSize(width, height int) (int, error) { + actualBitDepth := int64(c.BitDepth) + if actualBitDepth&0b111 != 0 { + actualBitDepth = ((actualBitDepth + 8) >> 3) << 3 + } + + a1 := int64(width) * int64(height) + a2 := int64(c.ChromaSampling.ElementPixels()) + if a1%a2 != 0 { + return 0, errors.New("not divisible pixels") + } + a3 := (a1 / a2) * int64(c.ChromaSampling.ElementSamples()) * actualBitDepth + if a3%8 != 0 { + return 0, errors.New("not divisible size") + } + + return int(a3 / 8), nil +} + +func (c Space) check() error { + if c.ChromaSampling.J <= 0 { + return fmt.Errorf("unsupported J %d", c.ChromaSampling.J) + } + if c.ChromaSampling.A < 0 && c.ChromaSampling.A > c.ChromaSampling.J { + return fmt.Errorf("unsupported A %d", c.ChromaSampling.A) + } + if c.ChromaSampling.B != 0 && c.ChromaSampling.B != c.ChromaSampling.A { + return fmt.Errorf("unsupported B %d", c.ChromaSampling.B) + } + if c.BitDepth != 8 && c.BitDepth != 10 && c.BitDepth != 12 && c.BitDepth != 14 && c.BitDepth != 16 { + return fmt.Errorf("unsupported BitDepth %d", c.BitDepth) + } + + return nil +} + +func NewColorFormatFromString(colorFormat string) (Space, error) { + colorFormat = strings.ToLower(colorFormat) + if colorFormat == "420paldv" { + c, err := NewColorFormatFromString("420p8") + c.ChromaSamplePosition = ChromaSamplePositionTopLeft + return c, err + } else if colorFormat == "420mpeg2" { + c, err := NewColorFormatFromString("420p8") + c.ChromaSamplePosition = ChromaSamplePositionLeft + return c, err + } else if colorFormat == "420" || colorFormat == "420jpeg" || colorFormat == "420j" { + c, err := NewColorFormatFromString("420p8") + c.ChromaSamplePosition = ChromaSamplePositionCenter + return c, err + } else if colorFormat == "mono" { + return NewColorFormatFromString("400p8") + } else if colorFormat == "mono10" { + return NewColorFormatFromString("400p10") + } else if colorFormat == "mono12" { + return NewColorFormatFromString("400p12") + } else if colorFormat == "mono14" { + return NewColorFormatFromString("400p14") + } else if colorFormat == "mono16" { + return NewColorFormatFromString("400p16") + } + + splits := strings.Split(colorFormat, "p") + if len(splits) == 1 { //default 8 bit + splits = append(splits, "8") + } + + space := Space{ + ChromaSamplePosition: ChromaSamplePositionUnspecified, + } + switch strings.ReplaceAll(splits[0], ":", "") { + case "400": + space.ChromaSampling.J = 4 + space.ChromaSampling.A = 0 + space.ChromaSampling.B = 0 + case "420": + space.ChromaSampling.J = 4 + space.ChromaSampling.A = 2 + space.ChromaSampling.B = 0 + case "422": + space.ChromaSampling.J = 4 + space.ChromaSampling.A = 2 + space.ChromaSampling.B = 2 + case "444": + space.ChromaSampling.J = 4 + space.ChromaSampling.A = 4 + space.ChromaSampling.B = 4 + default: + return space, fmt.Errorf("unsupported Chroma ChromaSampling %s", splits[0]) + } + + n, err := strconv.Atoi(splits[1]) + if err != nil { + return space, err + } + space.BitDepth = byte(n) + + return space, space.check() +} + +func newColorFormatFromStringInternal(colorFormat string) Space { + space, _ := NewColorFormatFromString(colorFormat) + return space +} + +var ( + Space420 Space = newColorFormatFromStringInternal("420") + Space422 Space = newColorFormatFromStringInternal("422") + Space444 Space = newColorFormatFromStringInternal("444") + Space420P10 Space = newColorFormatFromStringInternal("420p10") + Space422P10 Space = newColorFormatFromStringInternal("422p10") + Space444P10 Space = newColorFormatFromStringInternal("444p10") + Space420P12 Space = newColorFormatFromStringInternal("420p12") + Space422P12 Space = newColorFormatFromStringInternal("422p12") + Space444P12 Space = newColorFormatFromStringInternal("444p12") + Space420P16 Space = newColorFormatFromStringInternal("420p16") + Space422P16 Space = newColorFormatFromStringInternal("422p16") + Space444P16 Space = newColorFormatFromStringInternal("444p16") +) diff --git a/decoder/libdav1d/libdav1d.go b/decoder/libdav1d/libdav1d.go index 3a84d8d..9575790 100644 --- a/decoder/libdav1d/libdav1d.go +++ b/decoder/libdav1d/libdav1d.go @@ -69,7 +69,7 @@ func NewDecoder(r io.Reader, settings map[string]any) (d *Decoder, err error) { Width: fP.Width, Height: fP.Height, PixelAspectRatio: fP.PixelAspectRatio, - ColorFormat: fP.ColorFormat, + ColorSpace: fP.ColorSpace, FrameRate: utilities.Ratio{ Numerator: int(d.h.TimebaseDenominator), Denominator: int(d.h.TimebaseNumerator), @@ -170,13 +170,13 @@ func (d *Decoder) pictureToFrame() (frame.Frame, error) { switch d.picture.p.layout { case C.DAV1D_PIXEL_LAYOUT_I400: - properties.ColorFormat.Subsampling = color.SubsamplingScheme{J: 4, A: 0, B: 0} + properties.ColorSpace.ChromaSampling = color.ChromaSampling{J: 4, A: 0, B: 0} case C.DAV1D_PIXEL_LAYOUT_I420: - properties.ColorFormat.Subsampling = color.SubsamplingScheme{J: 4, A: 2, B: 0} + properties.ColorSpace.ChromaSampling = color.ChromaSampling{J: 4, A: 2, B: 0} case C.DAV1D_PIXEL_LAYOUT_I422: - properties.ColorFormat.Subsampling = color.SubsamplingScheme{J: 4, A: 2, B: 2} + properties.ColorSpace.ChromaSampling = color.ChromaSampling{J: 4, A: 2, B: 2} case C.DAV1D_PIXEL_LAYOUT_I444: - properties.ColorFormat.Subsampling = color.SubsamplingScheme{J: 4, A: 4, B: 4} + properties.ColorSpace.ChromaSampling = color.ChromaSampling{J: 4, A: 4, B: 4} } if d.picture.p.layout != C.DAV1D_PIXEL_LAYOUT_I400 { @@ -193,7 +193,7 @@ func (d *Decoder) pictureToFrame() (frame.Frame, error) { chromaHeight = (properties.Height + ssVer) >> ssVer } - properties.ColorFormat.BitDepth = byte(bitDepth) + properties.ColorSpace.BitDepth = byte(bitDepth) properties.FullColorRange = false if d.picture.seq_hdr.color_range == 1 { diff --git a/decoder/y4m/y4m.go b/decoder/y4m/y4m.go index 99a6997..7aaafd8 100644 --- a/decoder/y4m/y4m.go +++ b/decoder/y4m/y4m.go @@ -166,7 +166,7 @@ func (s *Decoder) GetFrame() (parameters map[Parameter][]string, frameObject fra return nil, nil, err } - if s.properties.ColorFormat.BitDepth > 8 { + if s.properties.ColorSpace.BitDepth > 8 { //it's copied below defer s.bufPool.Put(buf) if frameObject, err = frame.NewUint16FrameFromBytes(s.properties.FrameProperties(), int64(s.frameCounter), buf); err != nil { @@ -312,7 +312,7 @@ func (s *Decoder) parseParameters() (err error) { return err } case ParameterColorFormat: - if s.properties.ColorFormat, err = color.NewColorFormatFromString(values[0]); err != nil { + if s.properties.ColorSpace, err = color.NewColorFormatFromString(values[0]); err != nil { return err } case ParameterExtension: @@ -336,7 +336,7 @@ func (s *Decoder) parseParameters() (err error) { //TODO: check for missing values width, height, colorformat etc. - s.frameSize, err = s.properties.ColorFormat.FrameSize(s.properties.Width, s.properties.Height) + s.frameSize, err = s.properties.ColorSpace.FrameSize(s.properties.Width, s.properties.Height) if seeker, ok := s.r.(io.Seeker); ok { if index, err := seeker.Seek(0, io.SeekCurrent); err == nil { diff --git a/encoder/libaom/libaom.go b/encoder/libaom/libaom.go index 0e0f27a..b6f5e91 100644 --- a/encoder/libaom/libaom.go +++ b/encoder/libaom/libaom.go @@ -69,24 +69,29 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str } switch true { - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 4 && properties.ColorFormat.Subsampling.B == 4: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4: imageFormat = C.AOM_IMG_FMT_I444 e.cfg.g_profile = 1 - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 2: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2: imageFormat = C.AOM_IMG_FMT_I422 e.cfg.g_profile = 2 - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 0: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0: imageFormat = C.AOM_IMG_FMT_I420 e.cfg.g_profile = 0 + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0: + //mono is defined as 4:2:0, but monochrome is set on config + imageFormat = C.AOM_IMG_FMT_I420 + e.cfg.g_profile = 0 + e.cfg.monochrome = 1 default: return nil, errors.New("unsupported input chroma subsampling") } - e.cfg.g_input_bit_depth = C.uint(properties.ColorFormat.BitDepth) - e.cfg.g_bit_depth = C.aom_bit_depth_t(properties.ColorFormat.BitDepth) + e.cfg.g_input_bit_depth = C.uint(properties.ColorSpace.BitDepth) + e.cfg.g_bit_depth = C.aom_bit_depth_t(properties.ColorSpace.BitDepth) - if e.cfg.g_bit_depth >= 12 { + if e.cfg.g_bit_depth >= 12 { //only bitdepths up to 12 are supported, see aom_bit_depth_t e.cfg.g_bit_depth = 12 e.cfg.g_profile = 2 } @@ -155,9 +160,13 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str if val, ok := v.(string); ok { strVal = C.CString(val) } else if val, ok := v.(int); ok { - strVal = C.CString(strconv.Itoa(val)) + strVal = C.CString(strconv.FormatInt(int64(val), 10)) } else if val, ok := v.(int64); ok { - strVal = C.CString(strconv.Itoa(int(val))) + strVal = C.CString(strconv.FormatInt(val, 10)) + } else if val, ok := v.(uint); ok { + strVal = C.CString(strconv.FormatUint(uint64(val), 10)) + } else if val, ok := v.(uint64); ok { + strVal = C.CString(strconv.FormatUint(val, 10)) } if strVal != nil { @@ -202,24 +211,23 @@ func (e *Encoder) EncodeStream(stream *frame.Stream) error { func (e *Encoder) Encode(f frame.Frame) error { - if int8F, ok := f.(frame.TypedFrame[uint8]); ok { - e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeLuma()[0])) - e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeCb()[0])) - e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeCr()[0])) - } else if int16F, ok := f.(frame.TypedFrame[uint16]); ok { - e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeLuma()[0])) - e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeCb()[0])) - e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeCr()[0])) + if f8, ok := f.(frame.TypedFrame[uint8]); ok { + e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeLuma()[0])) + e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCb()[0])) + e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCr()[0])) + } else if f16, ok := f.(frame.TypedFrame[uint16]); ok { + e.raw.planes[0] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeLuma()[0])) + e.raw.planes[1] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCb()[0])) + e.raw.planes[2] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCr()[0])) } + defer runtime.KeepAlive(f) //cleanup pointers defer func() { e.raw.planes[0] = nil e.raw.planes[1] = nil e.raw.planes[2] = nil - }() - defer runtime.KeepAlive(f) if _, err := e.encodeFrame(f.PTS(), e.raw); err != nil { return err @@ -276,6 +284,8 @@ func (e *Encoder) Flush() error { } } + _ = e.w.WriteLength() + return nil } diff --git a/encoder/libx264/libx264.go b/encoder/libx264/libx264.go index fba984d..d55d3aa 100644 --- a/encoder/libx264/libx264.go +++ b/encoder/libx264/libx264.go @@ -53,16 +53,16 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str defaultProfile := "high" switch true { - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 4 && properties.ColorFormat.Subsampling.B == 4: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4: e.params.i_csp = C.X264_CSP_I444 defaultProfile = "high444" - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 2: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2: e.params.i_csp = C.X264_CSP_I422 defaultProfile = "high422" - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 0: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0: e.params.i_csp = C.X264_CSP_I420 defaultProfile = "high" - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 0 && properties.ColorFormat.Subsampling.B == 0: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0: e.params.i_csp = C.X264_CSP_I400 defaultProfile = "high" default: @@ -70,7 +70,7 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str } - if properties.ColorFormat.BitDepth > 8 { + if properties.ColorSpace.BitDepth > 8 { e.params.i_csp |= C.X264_CSP_HIGH_DEPTH if defaultProfile == "high" { defaultProfile = "high10" @@ -80,7 +80,7 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str profile := C.CString(getSettingString(settings, "profile", defaultProfile)) defer C.free(unsafe.Pointer(profile)) - e.params.i_bitdepth = C.int(properties.ColorFormat.BitDepth) + e.params.i_bitdepth = C.int(properties.ColorSpace.BitDepth) e.params.i_width = C.int(properties.Width) e.params.i_height = C.int(properties.Height) @@ -104,9 +104,13 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str if val, ok := v.(string); ok { strVal = C.CString(val) } else if val, ok := v.(int); ok { - strVal = C.CString(strconv.Itoa(val)) + strVal = C.CString(strconv.FormatInt(int64(val), 10)) } else if val, ok := v.(int64); ok { - strVal = C.CString(strconv.Itoa(int(val))) + strVal = C.CString(strconv.FormatInt(val, 10)) + } else if val, ok := v.(uint); ok { + strVal = C.CString(strconv.FormatUint(uint64(val), 10)) + } else if val, ok := v.(uint64); ok { + strVal = C.CString(strconv.FormatUint(val, 10)) } if strVal != nil { @@ -170,42 +174,42 @@ func (e *Encoder) EncodeStream(stream *frame.Stream) error { func (e *Encoder) Encode(f frame.Frame) error { var nal *C.x264_nal_t - var i_nal C.int - var frame_size C.int + var iNal C.int + var frameSize C.int - if int8F, ok := f.(frame.TypedFrame[uint8]); ok { - e.pictureIn.img.plane[0] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeLuma()[0])) - e.pictureIn.img.plane[1] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeCb()[0])) - e.pictureIn.img.plane[2] = (*C.uint8_t)(unsafe.Pointer(&int8F.GetNativeCr()[0])) - } else if int16F, ok := f.(frame.TypedFrame[uint16]); ok { - e.pictureIn.img.plane[0] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeLuma()[0])) - e.pictureIn.img.plane[1] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeCb()[0])) - e.pictureIn.img.plane[2] = (*C.uint8_t)(unsafe.Pointer(&int16F.GetNativeCr()[0])) + if f8, ok := f.(frame.TypedFrame[uint8]); ok { + e.pictureIn.img.plane[0] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeLuma()[0])) + e.pictureIn.img.plane[1] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCb()[0])) + e.pictureIn.img.plane[2] = (*C.uint8_t)(unsafe.Pointer(&f8.GetNativeCr()[0])) + } else if f16, ok := f.(frame.TypedFrame[uint16]); ok { + e.pictureIn.img.plane[0] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeLuma()[0])) + e.pictureIn.img.plane[1] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCb()[0])) + e.pictureIn.img.plane[2] = (*C.uint8_t)(unsafe.Pointer(&f16.GetNativeCr()[0])) } + defer runtime.KeepAlive(f) //cleanup pointers defer func() { e.pictureIn.img.plane[0] = nil e.pictureIn.img.plane[1] = nil e.pictureIn.img.plane[2] = nil }() - defer runtime.KeepAlive(f) e.pictureIn.i_pts = C.int64_t(f.PTS()) - if frame_size = C.x264_encoder_encode(e.h, &nal, &i_nal, e.pictureIn, &e.pictureOut); frame_size < 0 { + if frameSize = C.x264_encoder_encode(e.h, &nal, &iNal, e.pictureIn, &e.pictureOut); frameSize < 0 { return errors.New("error encoding frame") } - if frame_size == 0 { + if frameSize == 0 { return nil } - if i_nal != 1 { + if iNal != 1 { //return errors.New("more than one NAL present") } - buf := unsafe.Slice((*byte)(nal.p_payload), int(frame_size)) + buf := unsafe.Slice((*byte)(nal.p_payload), int(frameSize)) if _, err := e.w.Write(buf); err != nil { return err } @@ -215,23 +219,23 @@ func (e *Encoder) Encode(f frame.Frame) error { func (e *Encoder) Flush() error { var nal *C.x264_nal_t - var i_nal C.int - var frame_size C.int + var iNal C.int + var frameSize C.int for C.x264_encoder_delayed_frames(e.h) > 0 { - if frame_size = C.x264_encoder_encode(e.h, &nal, &i_nal, nil, &e.pictureOut); frame_size < 0 { + if frameSize = C.x264_encoder_encode(e.h, &nal, &iNal, nil, &e.pictureOut); frameSize < 0 { return errors.New("error encoding frame") } - if frame_size == 0 { + if frameSize == 0 { return nil } - if i_nal != 1 { + if iNal != 1 { //return errors.New("more than one NAL present") } - buf := unsafe.Slice((*byte)(nal.p_payload), int(frame_size)) + buf := unsafe.Slice((*byte)(nal.p_payload), int(frameSize)) if _, err := e.w.Write(buf); err != nil { return err } diff --git a/frame/frame.go b/frame/frame.go index 78cc9da..f3c9022 100644 --- a/frame/frame.go +++ b/frame/frame.go @@ -34,6 +34,6 @@ type Properties struct { Width int Height int PixelAspectRatio utilities.Ratio - ColorFormat color.ColorFormat + ColorSpace color.Space FullColorRange bool } diff --git a/frame/frame_uint16.go b/frame/frame_uint16.go index 775979d..8e44f27 100644 --- a/frame/frame_uint16.go +++ b/frame/frame_uint16.go @@ -15,11 +15,11 @@ type FrameUint16 struct { } func NewUint16FrameFromBytes(properties Properties, pts int64, data []byte) (*FrameUint16, error) { - if frameLength, _ := properties.ColorFormat.FrameSize(properties.Width, properties.Height); frameLength != len(data) { + if frameLength, _ := properties.ColorSpace.FrameSize(properties.Width, properties.Height); frameLength != len(data) { return nil, errors.New("wrong length of data") } - if properties.ColorFormat.BitDepth >= 16 { + if properties.ColorSpace.BitDepth >= 16 { return nil, errors.New("wrong bit depth") } @@ -27,9 +27,9 @@ func NewUint16FrameFromBytes(properties Properties, pts int64, data []byte) (*Fr copy(buf, unsafe.Slice((*uint16)(unsafe.Pointer(&data[0])), len(data)/2)) runtime.KeepAlive(data) - iY := properties.ColorFormat.PlaneLumaSamples(properties.Width, properties.Height) - iCb := properties.ColorFormat.PlaneCbSamples(properties.Width, properties.Height) - iCr := properties.ColorFormat.PlaneCrSamples(properties.Width, properties.Height) + iY := properties.ColorSpace.ChromaSampling.PlaneLumaSamples(properties.Width, properties.Height) + iCb := properties.ColorSpace.ChromaSampling.PlaneCbSamples(properties.Width, properties.Height) + iCr := properties.ColorSpace.ChromaSampling.PlaneCrSamples(properties.Width, properties.Height) return &FrameUint16{ properties: properties, @@ -43,13 +43,13 @@ func NewUint16FrameFromBytes(properties Properties, pts int64, data []byte) (*Fr func (i *FrameUint16) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { cy, cb, cr := i.GetNative(x, y) - return cy << (16 - i.properties.ColorFormat.BitDepth), cb << (16 - i.properties.ColorFormat.BitDepth), cr << (16 - i.properties.ColorFormat.BitDepth) + return cy << (16 - i.properties.ColorSpace.BitDepth), cb << (16 - i.properties.ColorSpace.BitDepth), cr << (16 - i.properties.ColorSpace.BitDepth) } func (i *FrameUint16) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { cy, cb, cr := i.GetNative(x, y) - return uint8(cy >> (i.properties.ColorFormat.BitDepth - 8)), uint8(cb >> (i.properties.ColorFormat.BitDepth - 8)), uint8(cr >> (i.properties.ColorFormat.BitDepth - 8)) + return uint8(cy >> (i.properties.ColorSpace.BitDepth - 8)), uint8(cb >> (i.properties.ColorSpace.BitDepth - 8)), uint8(cr >> (i.properties.ColorSpace.BitDepth - 8)) } func (i *FrameUint16) Properties() Properties { @@ -63,11 +63,11 @@ func (i *FrameUint16) PTS() int64 { func (i *FrameUint16) GetNative(x, y int) (Y uint16, Cb uint16, Cr uint16) { Yindex := x + y*i.properties.Width - Cwidth := (i.properties.Width * int(i.properties.ColorFormat.Subsampling.A)) / int(i.properties.ColorFormat.Subsampling.J) - if i.properties.ColorFormat.Subsampling.B == 0 { + Cwidth := (i.properties.Width * int(i.properties.ColorSpace.ChromaSampling.A)) / int(i.properties.ColorSpace.ChromaSampling.J) + if i.properties.ColorSpace.ChromaSampling.B == 0 { y /= 2 } - Cindex := (x*int(i.properties.ColorFormat.Subsampling.A))/int(i.properties.ColorFormat.Subsampling.J) + y*Cwidth + Cindex := (x*int(i.properties.ColorSpace.ChromaSampling.A))/int(i.properties.ColorSpace.ChromaSampling.J) + y*Cwidth Y = i.Y[Yindex] Cb = i.Cb[Cindex] Cr = i.Cr[Cindex] diff --git a/frame/frame_uint8.go b/frame/frame_uint8.go index aa0940f..dfd490b 100644 --- a/frame/frame_uint8.go +++ b/frame/frame_uint8.go @@ -13,17 +13,17 @@ type FrameUint8 struct { } func NewUint8FrameFromBytes(properties Properties, pts int64, data []byte) (*FrameUint8, error) { - if frameLength, _ := properties.ColorFormat.FrameSize(properties.Width, properties.Height); frameLength != len(data) { + if frameLength, _ := properties.ColorSpace.FrameSize(properties.Width, properties.Height); frameLength != len(data) { return nil, errors.New("wrong length of data") } - if properties.ColorFormat.BitDepth > 8 { + if properties.ColorSpace.BitDepth > 8 { return nil, errors.New("wrong bit depth") } - iY := properties.ColorFormat.PlaneLumaSamples(properties.Width, properties.Height) - iCb := properties.ColorFormat.PlaneCbSamples(properties.Width, properties.Height) - iCr := properties.ColorFormat.PlaneCrSamples(properties.Width, properties.Height) + iY := properties.ColorSpace.ChromaSampling.PlaneLumaSamples(properties.Width, properties.Height) + iCb := properties.ColorSpace.ChromaSampling.PlaneCbSamples(properties.Width, properties.Height) + iCr := properties.ColorSpace.ChromaSampling.PlaneCrSamples(properties.Width, properties.Height) return &FrameUint8{ properties: properties, @@ -37,7 +37,7 @@ func NewUint8FrameFromBytes(properties Properties, pts int64, data []byte) (*Fra func (i *FrameUint8) Get16(x, y int) (Y uint16, Cb uint16, Cr uint16) { cy, cb, cr := i.GetNative(x, y) - return uint16(cy) << (16 - i.properties.ColorFormat.BitDepth), uint16(cb) << (16 - i.properties.ColorFormat.BitDepth), uint16(cr) << (16 - i.properties.ColorFormat.BitDepth) + return uint16(cy) << (16 - i.properties.ColorSpace.BitDepth), uint16(cb) << (16 - i.properties.ColorSpace.BitDepth), uint16(cr) << (16 - i.properties.ColorSpace.BitDepth) } func (i *FrameUint8) Get8(x, y int) (Y uint8, Cb uint8, Cr uint8) { @@ -55,11 +55,11 @@ func (i *FrameUint8) PTS() int64 { func (i *FrameUint8) GetNative(x, y int) (Y uint8, Cb uint8, Cr uint8) { Yindex := x + y*i.properties.Width - Cwidth := (i.properties.Width * int(i.properties.ColorFormat.Subsampling.A)) / int(i.properties.ColorFormat.Subsampling.J) - if i.properties.ColorFormat.Subsampling.B == 0 { + Cwidth := (i.properties.Width * int(i.properties.ColorSpace.ChromaSampling.A)) / int(i.properties.ColorSpace.ChromaSampling.J) + if i.properties.ColorSpace.ChromaSampling.B == 0 { y /= 2 } - Cindex := (x*int(i.properties.ColorFormat.Subsampling.A))/int(i.properties.ColorFormat.Subsampling.J) + y*Cwidth + Cindex := (x*int(i.properties.ColorSpace.ChromaSampling.A))/int(i.properties.ColorSpace.ChromaSampling.J) + y*Cwidth Y = i.Y[Yindex] Cb = i.Cb[Cindex] Cr = i.Cr[Cindex] diff --git a/frame/stream.go b/frame/stream.go index c192f44..02a472e 100644 --- a/frame/stream.go +++ b/frame/stream.go @@ -27,8 +27,8 @@ type StreamProperties struct { Height int // PixelAspectRatio could be not populated until the first frame is read. Frame can contain different settings. PixelAspectRatio utilities.Ratio - // ColorFormat could be not populated until the first frame is read. Frame can contain different settings. - ColorFormat color.ColorFormat + // ColorSpace could be not populated until the first frame is read. Frame can contain different settings. + ColorSpace color.Space FrameRate utilities.Ratio FullColorRange bool } @@ -38,7 +38,7 @@ func (p StreamProperties) FrameProperties() Properties { Width: p.Width, Height: p.Height, PixelAspectRatio: p.PixelAspectRatio, - ColorFormat: p.ColorFormat, + ColorSpace: p.ColorSpace, FullColorRange: p.FullColorRange, } } diff --git a/utilities/libvmaf/libvmaf.go b/utilities/libvmaf/libvmaf.go index 2158409..4cf6520 100644 --- a/utilities/libvmaf/libvmaf.go +++ b/utilities/libvmaf/libvmaf.go @@ -16,8 +16,10 @@ import ( "unsafe" ) +var vmafVersion = "vmaf " + C.GoString(C.vmaf_version()) + func Version() string { - return "vmaf " + C.GoString(C.vmaf_version()) + return vmafVersion } type VMAF struct { @@ -85,20 +87,20 @@ func (v *VMAF) allocatePicture(properties frame.Properties) *C.VmafPicture { //todo: reuse these pictures pixFmt := uint32(C.VMAF_PIX_FMT_YUV420P) switch true { - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 4 && properties.ColorFormat.Subsampling.B == 4: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 4 && properties.ColorSpace.ChromaSampling.B == 4: pixFmt = C.VMAF_PIX_FMT_YUV444P - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 2: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 2: pixFmt = C.VMAF_PIX_FMT_YUV422P - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 2 && properties.ColorFormat.Subsampling.B == 0: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 2 && properties.ColorSpace.ChromaSampling.B == 0: pixFmt = C.VMAF_PIX_FMT_YUV420P - case properties.ColorFormat.Subsampling.J == 4 && properties.ColorFormat.Subsampling.A == 0 && properties.ColorFormat.Subsampling.B == 0: + case properties.ColorSpace.ChromaSampling.J == 4 && properties.ColorSpace.ChromaSampling.A == 0 && properties.ColorSpace.ChromaSampling.B == 0: pixFmt = C.VMAF_PIX_FMT_YUV400P default: return nil } var p C.VmafPicture - if ret := C.vmaf_picture_alloc(&p, pixFmt, C.uint(properties.ColorFormat.BitDepth), C.uint(properties.Width), C.uint(properties.Height)); ret != 0 { + if ret := C.vmaf_picture_alloc(&p, pixFmt, C.uint(properties.ColorSpace.BitDepth), C.uint(properties.Width), C.uint(properties.Height)); ret != 0 { return nil } diff --git a/utilities/obuwriter/writer.go b/utilities/obuwriter/writer.go index d744856..a291d29 100644 --- a/utilities/obuwriter/writer.go +++ b/utilities/obuwriter/writer.go @@ -2,12 +2,14 @@ package obuwriter import ( "encoding/binary" + "errors" "git.gammaspectra.live/S.O.N.G/Ignite/utilities" "io" ) type Writer struct { - w io.Writer + w io.Writer + frames uint32 } func NewWriter(w io.Writer, width, height int, fourCC uint32, frameRate utilities.Ratio) (*Writer, error) { @@ -44,11 +46,34 @@ func NewWriter(w io.Writer, width, height int, fourCC uint32, frameRate utilitie func (w *Writer) WriteFrameBytes(pts uint64, data []byte) error { if err := binary.Write(w.w, binary.LittleEndian, uint32(len(data))); err != nil { return err - } else if err = binary.Write(w.w, binary.LittleEndian, uint64(pts)); err != nil { + } else if err = binary.Write(w.w, binary.LittleEndian, pts); err != nil { return err } else if _, err = w.w.Write(data); err != nil { return err } + w.frames++ + return nil } + +// WriteLength writes the Length field for number of frames, if writer was set to io.WriteSeeker +func (w *Writer) WriteLength() error { + if seeker, ok := w.w.(io.WriteSeeker); ok { + if currentIndex, err := seeker.Seek(0, io.SeekCurrent); err != nil { + return err + } else { + if _, err = seeker.Seek(4+2+2+4+2+2+4+4, io.SeekStart); err != nil { + return err + } else if err = binary.Write(w.w, binary.LittleEndian, w.frames); err != nil { + return err + } else if _, err = seeker.Seek(currentIndex, io.SeekStart); err != nil { + return err + } + + return nil + } + } else { + return errors.New("writer is not io.WriteSeeker") + } +} diff --git a/utilities/ratio.go b/utilities/ratio.go index b562ed7..9fcb07e 100644 --- a/utilities/ratio.go +++ b/utilities/ratio.go @@ -1,5 +1,7 @@ package utilities +import "fmt" + type Ratio struct { Numerator int Denominator int @@ -8,3 +10,7 @@ type Ratio struct { func (r Ratio) Float64() float64 { return float64(r.Numerator) / float64(r.Denominator) } + +func (r Ratio) String() string { + return fmt.Sprintf("%d:%d", r.Numerator, r.Denominator) +}