428 lines
11 KiB
Go
428 lines
11 KiB
Go
package main
|
|
|
|
import "C"
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"flag"
|
|
"fmt"
|
|
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/colorspace"
|
|
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/conv"
|
|
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/libav"
|
|
"git.gammaspectra.live/WeebDataHoarder/xyz2yuv/libopenjp2"
|
|
"gonum.org/v1/gonum/mat"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"runtime"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
"unsafe"
|
|
)
|
|
|
|
var space colorspace.RelativeSystem
|
|
|
|
var xyz2rgb *mat.Dense
|
|
var xyz2rgbDenorm *mat.Dense
|
|
|
|
var rgb2yuv *mat.Dense
|
|
var rgb2yuvPremultiplied *mat.Dense
|
|
|
|
var rgbGamma float64
|
|
|
|
func ToPacked(a, b, c []uint32, extra int) []uint32 {
|
|
if len(a) != len(b) || len(a) != len(c) {
|
|
panic("lengths mismatch")
|
|
}
|
|
|
|
out := make([]uint32, len(a)*3, len(a)*3+extra)
|
|
for i := range a {
|
|
out[i*3] = a[i]
|
|
out[i*3+1] = b[i]
|
|
out[i*3+2] = c[i]
|
|
}
|
|
return out
|
|
}
|
|
|
|
func main() {
|
|
inFile := flag.String("in", "", "Input file")
|
|
startFrame := flag.Uint64("start", 0, "Start frame number inclusive")
|
|
endFrame := flag.Uint64("end", math.MaxUint64, "End frame number exclusive")
|
|
outFile := flag.String("out", "-", "Output file. Use - for stdout")
|
|
colorspaceRelativeSystem := flag.String("colorspace", "rec709_pure", "Colorspace and parameters to convert into. Supported: rec709, rec709_pure, rec709_pure22, rec709_pure24, rec2020, rec2020_pure, rec2020_pure24")
|
|
xyzPrecision := flag.Int("precision-xyz2rgb", 0, "XYZ -> RGB conversion matrix precision. 0 = maximum")
|
|
rgbPrecision := flag.Int("precision-rgb2yuv", 0, "RGB -> YUV conversion matrix precision. 0 = maximum")
|
|
lowres := flag.Uint("lowres", 0, "Feed lowres parameter. Default is full frame")
|
|
useFloatPipeline := flag.Bool("float", false, "Use float pipeline instead of double, although less precise. Very fast.")
|
|
useGoPipeline := flag.Bool("use-go-pipeline", false, "Use Go pipeline, although slower. Does not support float mode.")
|
|
hashOutput := flag.Bool("hash", false, "Hash with SHA256 each output frame for accuracy comparisons")
|
|
decoderThreads := flag.Uint("decoder-threads", 0, "Threads for JPEG2000 decoding. Defaults to number of logical CPU")
|
|
pipelineThreads := flag.Uint("pipeline-threads", 0, "Threads for colorspace conversion pipeline. Defaults to number of logical CPU")
|
|
flag.Parse()
|
|
|
|
runtime.KeepAlive(endFrame)
|
|
|
|
//C.av_log_set_level(C.AV_LOG_DEBUG)
|
|
|
|
numDecoderCpu := int(*decoderThreads)
|
|
if numDecoderCpu == 0 {
|
|
numDecoderCpu = runtime.NumCPU()
|
|
}
|
|
|
|
numPipelineCpu := int(*pipelineThreads)
|
|
if numPipelineCpu == 0 {
|
|
numPipelineCpu = runtime.NumCPU()
|
|
}
|
|
|
|
useCConverter = !*useGoPipeline
|
|
useFloat = *useFloatPipeline
|
|
|
|
switch strings.ToLower(*colorspaceRelativeSystem) {
|
|
case "rec709":
|
|
space = colorspace.SystemRec709
|
|
rgbGamma = colorspace.GammaRec709
|
|
case "rec709_pure":
|
|
space = colorspace.SystemRec709_Pure
|
|
rgbGamma = colorspace.GammaRec709
|
|
case "rec709_pure22":
|
|
space = colorspace.SystemRec709_Pure22
|
|
rgbGamma = colorspace.Gamma22
|
|
case "rec709_pure24":
|
|
space = colorspace.SystemRec709_Pure24
|
|
rgbGamma = colorspace.Gamma24
|
|
case "rec2020":
|
|
space = colorspace.SystemRec2020
|
|
rgbGamma = colorspace.GammaRec2020
|
|
case "rec2020_pure":
|
|
space = colorspace.SystemRec2020_Pure
|
|
rgbGamma = colorspace.GammaRec2020
|
|
case "rec2020_pure24":
|
|
space = colorspace.SystemRec2020_Pure24
|
|
rgbGamma = colorspace.Gamma24
|
|
|
|
default:
|
|
panic("unsupported colorspace")
|
|
}
|
|
|
|
_, xyz2rgb = space.Chromaticity.ConversionXYZ()
|
|
_, rgb2yuv = space.YCbCr.ConversionRGB()
|
|
|
|
//adjust xyz2rgb with normalization factor from DCI
|
|
denorm := mat.NewDiagDense(3, []float64{
|
|
1 / colorspace.DCINormalizationFactor,
|
|
1 / colorspace.DCINormalizationFactor,
|
|
1 / colorspace.DCINormalizationFactor,
|
|
})
|
|
|
|
xyz2rgbDenorm = mat.NewDense(3, 3, nil)
|
|
xyz2rgbDenorm.Mul(denorm, xyz2rgb)
|
|
|
|
premult := mat.NewDiagDense(3, []float64{
|
|
math.MaxUint16,
|
|
math.MaxUint16,
|
|
math.MaxUint16,
|
|
})
|
|
|
|
rgb2yuvPremultiplied = mat.NewDense(3, 3, nil)
|
|
rgb2yuvPremultiplied.Mul(rgb2yuv, premult)
|
|
|
|
xyz2rgb = RoundMatToPrecision(xyz2rgb, *xyzPrecision)
|
|
rgb2yuv = RoundMatToPrecision(rgb2yuv, *rgbPrecision)
|
|
xyz2rgbDenorm = RoundMatToPrecision(xyz2rgbDenorm, *xyzPrecision)
|
|
rgb2yuvPremultiplied = RoundMatToPrecision(rgb2yuvPremultiplied, *rgbPrecision)
|
|
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nXYZ to RGB matrix:\n%v\n\n", mat.Formatted(xyz2rgb))
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nXYZ to RGB matrix (denormalized):\n%v\n\n", mat.Formatted(xyz2rgbDenorm))
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nRGB to YUV matrix:\n%v\n\n", mat.Formatted(rgb2yuv))
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nRGB to YUV matrix (premultiplied):\n%v\n\n", mat.Formatted(rgb2yuvPremultiplied))
|
|
|
|
if useCConverter {
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nDecoder: CGO %s\n", conv.DecoderInformation())
|
|
} else {
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nDecoder: Go Generic scalar pipeline (1d 1f)\n")
|
|
}
|
|
|
|
if useFloat && useCConverter {
|
|
_, _ = fmt.Fprintf(os.Stderr, "Data type: float32\n\n")
|
|
} else {
|
|
_, _ = fmt.Fprintf(os.Stderr, "Data type: float64\n\n")
|
|
}
|
|
|
|
if useCConverter {
|
|
conv.InitData(xyz2rgbDenorm, rgb2yuvPremultiplied, colorspace.GammaDCIXYZ, rgbGamma)
|
|
}
|
|
|
|
//open and write output file header
|
|
var output *os.File
|
|
if *outFile == "-" {
|
|
output = os.Stdout
|
|
} else {
|
|
f, err := os.Create(*outFile)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
output = f
|
|
}
|
|
defer output.Close()
|
|
|
|
outputFrame := func(number int, y, cb, cr []uint16) error {
|
|
by := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(y))), len(y)*2)
|
|
bcb := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(cb))), len(cb)*2)
|
|
bcr := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(cr))), len(cr)*2)
|
|
if *hashOutput {
|
|
hasher := sha256.New()
|
|
hasher.Write(by)
|
|
hasher.Write(bcb)
|
|
hasher.Write(bcr)
|
|
fmt.Fprintf(os.Stderr, "\r%s\n", hex.EncodeToString(hasher.Sum(nil)))
|
|
//fmt.Fprintf(os.Stderr, "\rFrame %d: %s\n", number, hex.EncodeToString(hasher.Sum(nil)))
|
|
}
|
|
|
|
_, err := output.WriteString("FRAME\n")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = output.Write(by)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = output.Write(bcb)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = output.Write(bcr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// decode and processing loop
|
|
var wg sync.WaitGroup
|
|
|
|
availableFrames := make(chan *frameJobData, numPipelineCpu*2)
|
|
inFrameJobs := make(chan *frameJobData, numPipelineCpu)
|
|
outFrameJobs := make(chan *frameJobData, numPipelineCpu)
|
|
|
|
availableDecoders := make(chan struct{}, numDecoderCpu*2)
|
|
outDecoderJobs := make(chan *decodedFrame, numPipelineCpu)
|
|
jpegDecoderChannel := make(chan libav.PacketData, numDecoderCpu)
|
|
|
|
var expectedFrame = max(0, *startFrame)
|
|
var expectedFrameDecoder = expectedFrame
|
|
var processedFrames atomic.Uint64
|
|
|
|
var firstFrame = expectedFrame
|
|
var firstFrameTime time.Time
|
|
|
|
var wg2 sync.WaitGroup
|
|
for i := 0; i < numPipelineCpu; i++ {
|
|
wg2.Add(1)
|
|
go func() {
|
|
defer wg2.Done()
|
|
for job := range inFrameJobs {
|
|
job.Process()
|
|
processedFrames.Add(1)
|
|
outFrameJobs <- job
|
|
}
|
|
}()
|
|
}
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
wg2.Wait()
|
|
close(outFrameJobs)
|
|
}()
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
outputs := make([]*frameJobData, 0)
|
|
for out := range outFrameJobs {
|
|
outputs = append(outputs, out)
|
|
slices.SortFunc(outputs, func(a, b *frameJobData) int {
|
|
return a.frame - b.frame
|
|
})
|
|
|
|
for len(outputs) > 0 {
|
|
f := outputs[0]
|
|
|
|
if f.frame != int(expectedFrame) {
|
|
break
|
|
}
|
|
|
|
//output frame to file
|
|
err := outputFrame(f.frame, f.y, f.cb, f.cr)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
outputs = slices.Delete(outputs, 0, 1)
|
|
|
|
expectedFrame++
|
|
|
|
availableFrames <- f
|
|
}
|
|
}
|
|
}()
|
|
|
|
decoder, err := libopenjp2.NewJpeg2000Decoder(libopenjp2.QualityLayersAll, *lowres)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
var streamFramerateNum, streamFramerateDen, streamSarNum, streamSarDen int
|
|
|
|
var onceInit sync.Once
|
|
var wgDecoder sync.WaitGroup
|
|
for i := 0; i < numDecoderCpu*2; i++ {
|
|
wg.Add(1)
|
|
wgDecoder.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
defer wgDecoder.Done()
|
|
|
|
for p := range jpegDecoderChannel {
|
|
|
|
frame, err := decoder.DecodeFrame(p.Data)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
onceInit.Do(func() {
|
|
_, err := output.WriteString(fmt.Sprintf("YUV4MPEG2 W%d H%d F%d:%d I%s A%d:%d%s%s\n",
|
|
frame.Width,
|
|
frame.Height,
|
|
streamFramerateNum,
|
|
streamFramerateDen,
|
|
"p",
|
|
streamSarNum,
|
|
streamSarDen,
|
|
" C444p16 XYSCSS=444P16",
|
|
" XCOLORRANGE=FULL",
|
|
))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
yuvLineSize := frame.Width
|
|
yuvFrameSize := frame.Height * yuvLineSize
|
|
|
|
for i := 0; i < numPipelineCpu*2; i++ {
|
|
availableFrames <- &frameJobData{
|
|
wg: &wg,
|
|
frame: 0,
|
|
width: frame.Width,
|
|
height: frame.Height,
|
|
in: nil,
|
|
//add extra capacity for OOB writes in ASM code
|
|
y: make([]uint16, yuvFrameSize, yuvFrameSize+64),
|
|
cb: make([]uint16, yuvFrameSize, yuvFrameSize+64),
|
|
cr: make([]uint16, yuvFrameSize, yuvFrameSize+64),
|
|
}
|
|
}
|
|
firstFrameTime = time.Now().UTC()
|
|
})
|
|
|
|
outDecoderJobs <- &decodedFrame{
|
|
Number: p.Number,
|
|
//add extra capacity for OOB reads in ASM code
|
|
Frame: ToPacked(frame.X, frame.Y, frame.Z, 256),
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
defer close(inFrameJobs)
|
|
|
|
outputs := make([]*decodedFrame, 0)
|
|
for out := range outDecoderJobs {
|
|
outputs = append(outputs, out)
|
|
slices.SortFunc(outputs, func(a, b *decodedFrame) int {
|
|
return a.Number - b.Number
|
|
})
|
|
|
|
for len(outputs) > 0 {
|
|
frame := outputs[0]
|
|
|
|
if frame.Number != int(expectedFrameDecoder) {
|
|
break
|
|
}
|
|
|
|
f := <-availableFrames
|
|
f.frame = frame.Number
|
|
//f.inLineSize = linesize
|
|
|
|
f.in = frame.Frame
|
|
wg.Add(1)
|
|
inFrameJobs <- f
|
|
|
|
outputs = slices.Delete(outputs, 0, 1)
|
|
|
|
expectedFrameDecoder++
|
|
|
|
availableDecoders <- struct{}{}
|
|
}
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
for range time.Tick(time.Second) {
|
|
frame := int(processedFrames.Load())
|
|
runningTime := time.Now().UTC().Sub(firstFrameTime)
|
|
fps := float64(frame-int(firstFrame)+1) / runningTime.Seconds()
|
|
_, _ = fmt.Fprintf(os.Stderr, "\rFrames %d %.02f fps %s ", frame, fps, runningTime.Truncate(time.Second))
|
|
}
|
|
}()
|
|
|
|
err = libav.OpenXYZ12(*inFile, func(framerateNum, framerateDen, sarNum, sarDen, width, height int) error {
|
|
streamFramerateNum = framerateNum
|
|
streamFramerateDen = framerateDen
|
|
streamSarNum = sarNum
|
|
streamSarDen = sarDen
|
|
|
|
for i := 0; i < numDecoderCpu*2; i++ {
|
|
availableDecoders <- struct{}{}
|
|
}
|
|
|
|
return nil
|
|
}, func(p libav.PacketData) error {
|
|
if uint64(p.Number) < *startFrame {
|
|
firstFrameTime = time.Now().UTC()
|
|
return nil
|
|
}
|
|
|
|
if uint64(p.Number) >= *endFrame {
|
|
return io.EOF
|
|
}
|
|
|
|
<-availableDecoders
|
|
|
|
jpegDecoderChannel <- p
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
close(jpegDecoderChannel)
|
|
wgDecoder.Wait()
|
|
close(outDecoderJobs)
|
|
|
|
wg.Wait()
|
|
|
|
print("\n\n")
|
|
|
|
runningTime := time.Now().UTC().Sub(firstFrameTime)
|
|
fps := float64(int(processedFrames.Load())-int(firstFrame)+1) / runningTime.Seconds()
|
|
_, _ = fmt.Fprintf(os.Stderr, "\nTotal %d frames, %.02f fps, took %s \n", processedFrames.Load(), fps, runningTime.Truncate(time.Millisecond))
|
|
|
|
}
|