xyz2yuv/xyz2yuv.go

428 lines
11 KiB
Go

package main
import "C"
import (
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/colorspace"
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/conv"
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/libav"
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/libopenjp2"
"gonum.org/v1/gonum/mat"
"io"
"math"
"os"
"runtime"
"slices"
"strings"
"sync"
"sync/atomic"
"time"
"unsafe"
)
var space colorspace.RelativeSystem
var xyz2rgb *mat.Dense
var xyz2rgbDenorm *mat.Dense
var rgb2yuv *mat.Dense
var rgb2yuvPremultiplied *mat.Dense
var rgbGamma float64
func ToPacked(a, b, c []uint32, extra int) []uint32 {
if len(a) != len(b) || len(a) != len(c) {
panic("lengths mismatch")
}
out := make([]uint32, len(a)*3, len(a)*3+extra)
for i := range a {
out[i*3] = a[i]
out[i*3+1] = b[i]
out[i*3+2] = c[i]
}
return out
}
func main() {
inFile := flag.String("in", "", "Input file")
startFrame := flag.Uint64("start", 0, "Start frame number inclusive")
endFrame := flag.Uint64("end", math.MaxUint64, "End frame number exclusive")
outFile := flag.String("out", "-", "Output file. Use - for stdout")
colorspaceRelativeSystem := flag.String("colorspace", "rec709_pure", "Colorspace and parameters to convert into. Supported: rec709, rec709_pure, rec709_pure22, rec709_pure24, rec2020, rec2020_pure, rec2020_pure24")
xyzPrecision := flag.Int("precision-xyz2rgb", 0, "XYZ -> RGB conversion matrix precision. 0 = maximum")
rgbPrecision := flag.Int("precision-rgb2yuv", 0, "RGB -> YUV conversion matrix precision. 0 = maximum")
lowres := flag.Uint("lowres", 0, "Feed lowres parameter. Default is full frame")
useFloatPipeline := flag.Bool("float", false, "Use float pipeline instead of double, although less precise. Very fast.")
useGoPipeline := flag.Bool("use-go-pipeline", false, "Use Go pipeline, although slower. Does not support float mode.")
hashOutput := flag.Bool("hash", false, "Hash with SHA256 each output frame for accuracy comparisons")
decoderThreads := flag.Uint("decoder-threads", 0, "Threads for JPEG2000 decoding. Defaults to number of logical CPU")
pipelineThreads := flag.Uint("pipeline-threads", 0, "Threads for colorspace conversion pipeline. Defaults to number of logical CPU")
flag.Parse()
runtime.KeepAlive(endFrame)
//C.av_log_set_level(C.AV_LOG_DEBUG)
numDecoderCpu := int(*decoderThreads)
if numDecoderCpu == 0 {
numDecoderCpu = runtime.NumCPU()
}
numPipelineCpu := int(*pipelineThreads)
if numPipelineCpu == 0 {
numPipelineCpu = runtime.NumCPU()
}
useCConverter = !*useGoPipeline
useFloat = *useFloatPipeline
switch strings.ToLower(*colorspaceRelativeSystem) {
case "rec709":
space = colorspace.SystemRec709
rgbGamma = colorspace.GammaRec709
case "rec709_pure":
space = colorspace.SystemRec709_Pure
rgbGamma = colorspace.GammaRec709
case "rec709_pure22":
space = colorspace.SystemRec709_Pure22
rgbGamma = colorspace.Gamma22
case "rec709_pure24":
space = colorspace.SystemRec709_Pure24
rgbGamma = colorspace.Gamma24
case "rec2020":
space = colorspace.SystemRec2020
rgbGamma = colorspace.GammaRec2020
case "rec2020_pure":
space = colorspace.SystemRec2020_Pure
rgbGamma = colorspace.GammaRec2020
case "rec2020_pure24":
space = colorspace.SystemRec2020_Pure24
rgbGamma = colorspace.Gamma24
default:
panic("unsupported colorspace")
}
_, xyz2rgb = space.Chromaticity.ConversionXYZ()
_, rgb2yuv = space.YCbCr.ConversionRGB()
//adjust xyz2rgb with normalization factor from DCI
denorm := mat.NewDiagDense(3, []float64{
1 / colorspace.DCINormalizationFactor,
1 / colorspace.DCINormalizationFactor,
1 / colorspace.DCINormalizationFactor,
})
xyz2rgbDenorm = mat.NewDense(3, 3, nil)
xyz2rgbDenorm.Mul(denorm, xyz2rgb)
premult := mat.NewDiagDense(3, []float64{
math.MaxUint16,
math.MaxUint16,
math.MaxUint16,
})
rgb2yuvPremultiplied = mat.NewDense(3, 3, nil)
rgb2yuvPremultiplied.Mul(rgb2yuv, premult)
xyz2rgb = RoundMatToPrecision(xyz2rgb, *xyzPrecision)
rgb2yuv = RoundMatToPrecision(rgb2yuv, *rgbPrecision)
xyz2rgbDenorm = RoundMatToPrecision(xyz2rgbDenorm, *xyzPrecision)
rgb2yuvPremultiplied = RoundMatToPrecision(rgb2yuvPremultiplied, *rgbPrecision)
_, _ = fmt.Fprintf(os.Stderr, "\nXYZ to RGB matrix:\n%v\n\n", mat.Formatted(xyz2rgb))
_, _ = fmt.Fprintf(os.Stderr, "\nXYZ to RGB matrix (denormalized):\n%v\n\n", mat.Formatted(xyz2rgbDenorm))
_, _ = fmt.Fprintf(os.Stderr, "\nRGB to YUV matrix:\n%v\n\n", mat.Formatted(rgb2yuv))
_, _ = fmt.Fprintf(os.Stderr, "\nRGB to YUV matrix (premultiplied):\n%v\n\n", mat.Formatted(rgb2yuvPremultiplied))
if useCConverter {
_, _ = fmt.Fprintf(os.Stderr, "\nDecoder: CGO %s\n", conv.DecoderInformation())
} else {
_, _ = fmt.Fprintf(os.Stderr, "\nDecoder: Go Generic scalar pipeline (1d 1f)\n")
}
if useFloat && useCConverter {
_, _ = fmt.Fprintf(os.Stderr, "Data type: float32\n\n")
} else {
_, _ = fmt.Fprintf(os.Stderr, "Data type: float64\n\n")
}
if useCConverter {
conv.InitData(xyz2rgbDenorm, rgb2yuvPremultiplied, colorspace.GammaDCIXYZ, rgbGamma)
}
//open and write output file header
var output *os.File
if *outFile == "-" {
output = os.Stdout
} else {
f, err := os.Create(*outFile)
if err != nil {
panic(err)
}
output = f
}
defer output.Close()
outputFrame := func(number int, y, cb, cr []uint16) error {
by := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(y))), len(y)*2)
bcb := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(cb))), len(cb)*2)
bcr := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(cr))), len(cr)*2)
if *hashOutput {
hasher := sha256.New()
hasher.Write(by)
hasher.Write(bcb)
hasher.Write(bcr)
fmt.Fprintf(os.Stderr, "\r%s\n", hex.EncodeToString(hasher.Sum(nil)))
//fmt.Fprintf(os.Stderr, "\rFrame %d: %s\n", number, hex.EncodeToString(hasher.Sum(nil)))
}
_, err := output.WriteString("FRAME\n")
if err != nil {
return err
}
_, err = output.Write(by)
if err != nil {
return err
}
_, err = output.Write(bcb)
if err != nil {
return err
}
_, err = output.Write(bcr)
if err != nil {
return err
}
return nil
}
// decode and processing loop
var wg sync.WaitGroup
availableFrames := make(chan *frameJobData, numPipelineCpu*2)
inFrameJobs := make(chan *frameJobData, numPipelineCpu)
outFrameJobs := make(chan *frameJobData, numPipelineCpu)
availableDecoders := make(chan struct{}, numDecoderCpu*2)
outDecoderJobs := make(chan *decodedFrame, numPipelineCpu)
jpegDecoderChannel := make(chan libav.PacketData, numDecoderCpu)
var expectedFrame = max(0, *startFrame)
var expectedFrameDecoder = expectedFrame
var processedFrames atomic.Uint64
var firstFrame = expectedFrame
var firstFrameTime time.Time
var wg2 sync.WaitGroup
for i := 0; i < numPipelineCpu; i++ {
wg2.Add(1)
go func() {
defer wg2.Done()
for job := range inFrameJobs {
job.Process()
processedFrames.Add(1)
outFrameJobs <- job
}
}()
}
wg.Add(1)
go func() {
defer wg.Done()
wg2.Wait()
close(outFrameJobs)
}()
wg.Add(1)
go func() {
defer wg.Done()
outputs := make([]*frameJobData, 0)
for out := range outFrameJobs {
outputs = append(outputs, out)
slices.SortFunc(outputs, func(a, b *frameJobData) int {
return a.frame - b.frame
})
for len(outputs) > 0 {
f := outputs[0]
if f.frame != int(expectedFrame) {
break
}
//output frame to file
err := outputFrame(f.frame, f.y, f.cb, f.cr)
if err != nil {
panic(err)
}
outputs = slices.Delete(outputs, 0, 1)
expectedFrame++
availableFrames <- f
}
}
}()
decoder, err := libopenjp2.NewJpeg2000Decoder(libopenjp2.QualityLayersAll, *lowres)
if err != nil {
panic(err)
}
var streamFramerateNum, streamFramerateDen, streamSarNum, streamSarDen int
var onceInit sync.Once
var wgDecoder sync.WaitGroup
for i := 0; i < numDecoderCpu*2; i++ {
wg.Add(1)
wgDecoder.Add(1)
go func() {
defer wg.Done()
defer wgDecoder.Done()
for p := range jpegDecoderChannel {
frame, err := decoder.DecodeFrame(p.Data)
if err != nil {
panic(err)
}
onceInit.Do(func() {
_, err := output.WriteString(fmt.Sprintf("YUV4MPEG2 W%d H%d F%d:%d I%s A%d:%d%s%s\n",
frame.Width,
frame.Height,
streamFramerateNum,
streamFramerateDen,
"p",
streamSarNum,
streamSarDen,
" C444p16 XYSCSS=444P16",
" XCOLORRANGE=FULL",
))
if err != nil {
panic(err)
}
yuvLineSize := frame.Width
yuvFrameSize := frame.Height * yuvLineSize
for i := 0; i < numPipelineCpu*2; i++ {
availableFrames <- &frameJobData{
wg: &wg,
frame: 0,
width: frame.Width,
height: frame.Height,
in: nil,
//add extra capacity for OOB writes in ASM code
y: make([]uint16, yuvFrameSize, yuvFrameSize+64),
cb: make([]uint16, yuvFrameSize, yuvFrameSize+64),
cr: make([]uint16, yuvFrameSize, yuvFrameSize+64),
}
}
firstFrameTime = time.Now().UTC()
})
outDecoderJobs <- &decodedFrame{
Number: p.Number,
//add extra capacity for OOB reads in ASM code
Frame: ToPacked(frame.X, frame.Y, frame.Z, 256),
}
}
}()
}
wg.Add(1)
go func() {
defer wg.Done()
defer close(inFrameJobs)
outputs := make([]*decodedFrame, 0)
for out := range outDecoderJobs {
outputs = append(outputs, out)
slices.SortFunc(outputs, func(a, b *decodedFrame) int {
return a.Number - b.Number
})
for len(outputs) > 0 {
frame := outputs[0]
if frame.Number != int(expectedFrameDecoder) {
break
}
f := <-availableFrames
f.frame = frame.Number
//f.inLineSize = linesize
f.in = frame.Frame
wg.Add(1)
inFrameJobs <- f
outputs = slices.Delete(outputs, 0, 1)
expectedFrameDecoder++
availableDecoders <- struct{}{}
}
}
}()
go func() {
for range time.Tick(time.Second) {
frame := int(processedFrames.Load())
runningTime := time.Now().UTC().Sub(firstFrameTime)
fps := float64(frame-int(firstFrame)+1) / runningTime.Seconds()
_, _ = fmt.Fprintf(os.Stderr, "\rFrames %d %.02f fps %s ", frame, fps, runningTime.Truncate(time.Second))
}
}()
err = libav.OpenXYZ12(*inFile, func(framerateNum, framerateDen, sarNum, sarDen, width, height int) error {
streamFramerateNum = framerateNum
streamFramerateDen = framerateDen
streamSarNum = sarNum
streamSarDen = sarDen
for i := 0; i < numDecoderCpu*2; i++ {
availableDecoders <- struct{}{}
}
return nil
}, func(p libav.PacketData) error {
if uint64(p.Number) < *startFrame {
firstFrameTime = time.Now().UTC()
return nil
}
if uint64(p.Number) >= *endFrame {
return io.EOF
}
<-availableDecoders
jpegDecoderChannel <- p
return nil
})
if err != nil {
panic(err)
}
close(jpegDecoderChannel)
wgDecoder.Wait()
close(outDecoderJobs)
wg.Wait()
print("\n\n")
runningTime := time.Now().UTC().Sub(firstFrameTime)
fps := float64(int(processedFrames.Load())-int(firstFrame)+1) / runningTime.Seconds()
_, _ = fmt.Fprintf(os.Stderr, "\nTotal %d frames, %.02f fps, took %s \n", processedFrames.Load(), fps, runningTime.Truncate(time.Millisecond))
}