Added ISO Noise table generator for libaom

2023-11-03 18:52:15 +01:00 · 2023-11-03 18:52:15 +01:00 · c533ad4386
parent 1297a5abce
commit c533ad4386
7 changed files with 400 additions and 3 deletions
--- a/encoder/libaom/libaom.c
+++ b/encoder/libaom/libaom.c
@ -12,6 +12,10 @@ aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int*
    return aom_codec_control(ctx, ctrl_id, v);
 }

+aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v) {
+    return aom_codec_control(ctx, ctrl_id, v);
+}
+
 void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt){
    return pkt->data.frame.buf;
 }
--- a/encoder/libaom/libaom.go
+++ b/encoder/libaom/libaom.go
@ -13,11 +13,13 @@ import (
 	"git.gammaspectra.live/S.O.N.G/Ignite/color"
 	"git.gammaspectra.live/S.O.N.G/Ignite/frame"
 	"git.gammaspectra.live/S.O.N.G/Ignite/utilities"
+	"git.gammaspectra.live/S.O.N.G/Ignite/utilities/filmgrain"
 	"git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter"
 	"golang.org/x/exp/constraints"
 	"io"
 	"log"
 	"maps"
+	"os"
 	"runtime"
 	"strconv"
 	"strings"
@ -36,6 +38,7 @@ type Encoder struct {
 	frameStatistics     frame.FrameStatistics
 	resourcePinner      runtime.Pinner
 	logger              utilities.Logger
+	free                []func()
 }

 var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) + " ABI " + strconv.FormatUint(C.AOM_ENCODER_ABI_VERSION, 10)
@ -56,7 +59,35 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
 		logger:          logger,
 	}

-	clonedSettings := maps.Clone(settings)
+	clonedSettings := make(map[string]any)
+	maps.Copy(clonedSettings, settings)
+
+	photonNoiseIso := getSettingUnsigned[uint](clonedSettings, "photon-noise-iso", 0)
+	photonNoiseTransferFunction := filmgrain.GetTransferFunction(getSettingString(clonedSettings, "photon-noise-transfer", "bt709"))
+
+	if photonNoiseIso > 0 && photonNoiseTransferFunction != nil {
+		//create table
+		table, err := filmgrain.CreatePhotonNoiseTable(properties.Width, properties.Height, float64(photonNoiseIso), photonNoiseTransferFunction)
+		if err != nil {
+			return nil, err
+		}
+
+		tmpFile, err := os.CreateTemp(os.TempDir(), "photon-table*.tbl")
+		if err != nil {
+			return nil, err
+		}
+		_, err = tmpFile.Write(table)
+		if err != nil {
+			return nil, err
+		}
+		tmpName := tmpFile.Name()
+		tmpFile.Close()
+		e.free = append(e.free, func() {
+			os.Remove(tmpName)
+		})
+		clonedSettings["film-grain-table"] = tmpName
+
+	}

 	var aomErr C.aom_codec_err_t

@ -337,6 +368,9 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
 	if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 {
 		return nil, fmt.Errorf("failed to initialize encoder: err %d %s", aomErr, C.GoString(e.codec.err_detail))
 	}
+	e.free = append(e.free, func() {
+		C.aom_codec_destroy(&e.codec)
+	})

 	if properties.FullColorRange {
 		if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, C.AOM_CR_FULL_RANGE); aomErr != 0 {
@ -380,6 +414,14 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
 		}
 	}

+	if fgt := getSettingString(clonedSettings, "film-grain-table", ""); fgt != "" {
+		strVal := C.CString(fgt)
+		defer C.free(unsafe.Pointer(strVal))
+		if aomErr = C.aom_codec_control_charptr(&e.codec, C.AV1E_SET_FILM_GRAIN_TABLE, strVal); aomErr != 0 {
+			return nil, fmt.Errorf("error setting FILM_GRAIN_TABLE parameter: %s", C.GoString(C.aom_codec_error_detail(&e.codec)))
+		}
+	}
+
 	for k, v := range clonedSettings {
 		if err := func() error {
 			var strVal *C.char
--- a/encoder/libaom/libaom.h
+++ b/encoder/libaom/libaom.h
@ -11,6 +11,8 @@ aom_codec_err_t aom_codec_control_uint(aom_codec_ctx_t *ctx, int ctrl_id, unsign

 aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* v);

+aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v);
+
 void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt);
 size_t aom_get_pkt_sz(aom_codec_cx_pkt_t *pkt);
 aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt);
--- a/encoder/libaom/libaom_test.go
+++ b/encoder/libaom/libaom_test.go
@ -6,6 +6,7 @@ import (
 	"git.gammaspectra.live/S.O.N.G/Ignite/decoder/y4m"
 	"git.gammaspectra.live/S.O.N.G/Ignite/testdata"
 	"git.gammaspectra.live/S.O.N.G/Ignite/utilities/testingutils"
+	"maps"
 	"os"
 	"runtime"
 	"sync"
@ -16,7 +17,7 @@ func TestVersion(t *testing.T) {
 	t.Logf("libaom version: %s", Version())
 }

-func testEncode(sample testdata.TestSample, t *testing.T) {
+func testEncode(sample testdata.TestSample, t *testing.T, otherSettings map[string]any) {
 	reader, err := sample.Open(t)
 	if err != nil {
 		t.Fatal(err)
@ -54,6 +55,7 @@ func testEncode(sample testdata.TestSample, t *testing.T) {
 		stream := pipe.DecodeStream()

 		settings := make(map[string]any)
+		maps.Copy(settings, otherSettings)
 		settings["threads"] = runtime.NumCPU()
 		settings["row-mt"] = 1
 		settings["cpu-used"] = 8
@ -99,5 +101,23 @@ func TestEncode_YUV420_8bit(t *testing.T) {
 		t.Skip("skipping test in short mode")
 	}

-	testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t)
+	testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t, nil)
+}
+
+func TestEncode_YUV444_8bit(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, nil)
+}
+
+func TestEncode_YUV444_8bit_PhotonNoise(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, map[string]any{
+		"photon-noise-iso": 64000,
+	})
 }
--- a/utilities/filmgrain/isotable.go
+++ b/utilities/filmgrain/isotable.go
@ -0,0 +1,160 @@
+package filmgrain
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strings"
+)
+
+func CreatePhotonNoiseTable(width, height int, iso float64, transferFunction *TransferFunction) ([]byte, error) {
+	if transferFunction == nil {
+		return nil, errors.New("unknown transfer function")
+	}
+
+	// Assumes a daylight-like spectrum.
+	// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
+	const kPhotonsPerLxSPerUm2 = 11260
+
+	// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
+	// account.
+	const kEffectiveQuantumEfficiency = 0.20
+
+	// Also reasonable values for current cameras. The read noise is typically
+	// higher than this at low ISO settings but it matters less there.
+	const kPhotoResponseNonUniformity = 0.005
+	const kInputReferredReadNoise = 1.5
+
+	// Focal plane exposure for a mid-tone (typically a 18% reflectance card), in
+	// lx·s.
+	midToneExposure := 10 / float64(iso)
+
+	// In microns. Assumes a 35mm sensor (36mm × 24mm).
+	pixelAreaUm2 := float64((36000 * 24000) / (width * height))
+
+	midToneElectronsPerPixel := kEffectiveQuantumEfficiency *
+		kPhotonsPerLxSPerUm2 *
+		midToneExposure * pixelAreaUm2
+	maxElectronsPerPixel :=
+		midToneElectronsPerPixel / transferFunction.MidTone
+
+	var filmGrain struct {
+
+		// 8 bit values
+		scalingPointsY [14][2]int
+		numYPoints     int // value: 0..14
+
+		// 8 bit values
+		scalingPointsCb [10][2]int
+		numCbPoints     int // value: 0..10
+
+		// 8 bit values
+		scalingPointsCr [10][2]int
+		numCrPoints     int // value: 0..10
+
+		arCoeffLag int // values:  0..3
+
+		// 8 bit values
+		arCoeffsY  [24]int
+		arCoeffsCb [25]int
+		arCoeffsCr [25]int
+	}
+
+	filmGrain.numYPoints = 14
+	filmGrain.numCbPoints = 0
+	filmGrain.numCrPoints = 0
+	filmGrain.arCoeffLag = 0
+
+	for i := 0; i < filmGrain.numYPoints; i++ {
+		x := float64(i) / float64(filmGrain.numYPoints-1)
+		linear := transferFunction.ToLinear(x)
+		electronsPerPixel := maxElectronsPerPixel * linear
+		// Quadrature sum of the relevant sources of noise, in electrons rms. Photon
+		// shot noise is math.Sqrt(electrons) so we can skip the square root and the
+		// squaring.
+		// https://en.wikipedia.org/wiki/Addition_in_quadrature
+		// https://doi.org/10.1117/3.725073
+		noiseInElectrons :=
+			math.Sqrt(kInputReferredReadNoise*kInputReferredReadNoise +
+				electronsPerPixel +
+				(kPhotoResponseNonUniformity * kPhotoResponseNonUniformity *
+					electronsPerPixel * electronsPerPixel))
+		linearNoise := noiseInElectrons / maxElectronsPerPixel
+		linearRangeStart := max(0., linear-2*linearNoise)
+		linearRangeEnd := min(1., linear+2*linearNoise)
+		tfSlope :=
+			(transferFunction.FromLinear(linearRangeEnd) -
+				transferFunction.FromLinear(
+					linearRangeStart)) /
+				(linearRangeEnd - linearRangeStart)
+		encodedNoise := linearNoise * tfSlope
+
+		x = math.Round(255 * x)
+		encodedNoise = min(255., math.Round(255*7.88*encodedNoise))
+
+		filmGrain.scalingPointsY[i][0] = int(x)
+		filmGrain.scalingPointsY[i][1] = int(encodedNoise)
+	}
+
+	var lines []string
+	lines = append(lines, "filmgrn1")
+	lines = append(lines, fmt.Sprintf("E %d %d %d %d %d", 0 /*start_time*/, math.MaxInt64 /*end_time*/, 1 /*apply_grain*/, 7391 /*seed*/, 1 /*update_parameters*/))
+	lines = append(lines, fmt.Sprintf("\tp %d %d %d %d %d %d %d %d %d %d %d %d",
+		0 /*ar_coeff_lag*/, 6 /*ar_coeff_shift*/, 0, /*grain_scale_shift*/
+		8 /*scaling_shift*/, 0, /*chroma_scaling_from_luma*/
+		1 /*overlap_flag*/, 0 /*cb_mult*/, 0, /*cb_luma_mult*/
+		0 /*cb_offset*/, 0 /*cr_mult*/, 0, /*cr_luma_mult*/
+		0 /*cr_offset*/))
+
+	{
+		line := fmt.Sprintf("\tsY %d ", filmGrain.numYPoints)
+		for i := 0; i < filmGrain.numYPoints; i++ {
+			line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsY[i][0],
+				filmGrain.scalingPointsY[i][1])
+		}
+		lines = append(lines, line)
+	}
+
+	{
+		line := fmt.Sprintf("\tsCb %d", filmGrain.numCbPoints)
+		for i := 0; i < filmGrain.numCbPoints; i++ {
+			line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCb[i][0],
+				filmGrain.scalingPointsCb[i][1])
+		}
+		lines = append(lines, line)
+	}
+
+	{
+		line := fmt.Sprintf("\tsCr %d", filmGrain.numCrPoints)
+		for i := 0; i < filmGrain.numCrPoints; i++ {
+			line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCr[i][0],
+				filmGrain.scalingPointsCr[i][1])
+		}
+		lines = append(lines, line)
+	}
+
+	n := 2 * filmGrain.arCoeffLag * (filmGrain.arCoeffLag + 1)
+	{
+		line := "\tcY"
+		for i := 0; i < n; i++ {
+			line += fmt.Sprintf(" %d", filmGrain.arCoeffsY[i])
+		}
+		lines = append(lines, line)
+	}
+	{
+		line := "\tcCb"
+		for i := 0; i <= n; i++ {
+			line += fmt.Sprintf(" %d", filmGrain.arCoeffsCb[i])
+		}
+		lines = append(lines, line)
+	}
+	{
+		line := "\tcCr"
+		for i := 0; i <= n; i++ {
+			line += fmt.Sprintf(" %d", filmGrain.arCoeffsCr[i])
+		}
+		lines = append(lines, line)
+	}
+
+	return []byte(strings.Join(lines, "\n") + "\n"), nil
+}
--- a/utilities/filmgrain/isotable_test.go
+++ b/utilities/filmgrain/isotable_test.go
@ -0,0 +1,28 @@
+package filmgrain
+
+import (
+	"bytes"
+	"testing"
+)
+
+const testGrainTable = `filmgrn1
+E 0 9223372036854775807 1 7391 1
+	p 0 6 0 8 0 1 0 0 0 0 0 0
+	sY 14  0 17 20 4 39 3 59 3 78 2 98 2 118 2 137 3 157 3 177 3 196 3 216 3 235 4 255 4
+	sCb 0
+	sCr 0
+	cY
+	cCb 0
+	cCr 0
+`
+
+func TestCreatePhotonNoiseTable(t *testing.T) {
+	table, err := CreatePhotonNoiseTable(1280, 720, 400, GetTransferFunction("bt470bg"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if bytes.Compare(table, []byte(testGrainTable)) != 0 {
+		t.Fatal("table is different")
+	}
+}
--- a/utilities/filmgrain/transfer.go
+++ b/utilities/filmgrain/transfer.go
@ -0,0 +1,141 @@
+package filmgrain
+
+import (
+	"math"
+	"strings"
+)
+
+//TODO: transfer function type?
+
+func GetTransferFunction(kind string) *TransferFunction {
+	switch strings.ToLower(kind) {
+	case "bt470m":
+		//gamma22
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				return math.Pow(in, 2.2)
+			},
+			FromLinear: func(linear float64) (out float64) {
+				return math.Pow(linear, 1/2.2)
+			},
+			MidTone: 0.18,
+		}
+	case "bt470bg":
+		//gamma28
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				return math.Pow(in, 2.8)
+			},
+			FromLinear: func(linear float64) (out float64) {
+				return math.Pow(linear, 1/2.8)
+			},
+			MidTone: 0.18,
+		}
+	case "bt601", "bt709", "bt2020":
+		//TODO: are bt601 and bt709 the same?
+		//bt2020. same as bt709, just defined more precise. Can use the same for all
+		const beta = 0.018053968510807
+		const alpha = 1 + 5.5*beta
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				if in < 0.081 {
+					return in / 4.5
+				} else {
+					return math.Pow((in+(alpha-1))/alpha, 1/0.45)
+				}
+			},
+			FromLinear: func(linear float64) (out float64) {
+				if linear < beta {
+					return 4.5 * linear
+				} else {
+					return alpha*math.Pow(linear, 0.45) - (alpha - 1)
+				}
+			},
+			//TODO: check this for bt2020?
+			MidTone: 0.18,
+		}
+	case "srgb":
+		//sRGB
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				if in <= 0.04045 {
+					return in / 12.92
+				} else {
+					return math.Pow((in+0.055)/1.055, 2.4)
+				}
+			},
+			FromLinear: func(linear float64) (out float64) {
+				if linear <= 0.0031308 {
+					return 12.92 * linear
+				} else {
+					return 1.055*math.Pow(linear, 1/2.4) - 0.055
+				}
+			},
+			MidTone: 0.18,
+		}
+	case "smpte2084", "pq":
+		//pq
+		const PqM1 = 2610. / 16384
+		const PqM2 = 128 * 2523. / 4096
+		const PqC1 = 3424. / 4096
+		const PqC2 = 32 * 2413. / 4096
+		const PqC3 = 32 * 2392. / 4096
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				pq_pow_inv_m2 := math.Pow(in, 1./PqM2)
+				return math.Pow(max(0, pq_pow_inv_m2-PqC1)/(PqC2-PqC3*pq_pow_inv_m2),
+					1./PqM1)
+			},
+			FromLinear: func(linear float64) (out float64) {
+				linear_pow_m1 := math.Pow(linear, PqM1)
+				return math.Pow((PqC1+PqC2*linear_pow_m1)/(1+PqC3*linear_pow_m1),
+					PqM2)
+			},
+			MidTone: 26. / 1000,
+		}
+	case "hlg":
+		//hlg
+		// Note: it is perhaps debatable whether “linear” for HLG should be scene light
+		// or display light. Here, it is implemented in terms of display light assuming
+		// a nominal peak display luminance of 1000 cd/m², hence the system γ of 1.2. To
+		// make it scene light instead, the OOTF (math.Pow(x, 1.2)) and its inverse should
+		// be removed from the functions below, and the TransferFunction.MidTone should be replaced
+		// with math.Pow(26. / 1000, 1 / 1.2).
+		const HlgA = 0.17883277
+		const HlgB = 0.28466892
+		const HlgC = 0.55991073
+		return &TransferFunction{
+			ToLinear: func(in float64) (linear float64) {
+				// EOTF = OOTF ∘ OETF⁻¹
+				if in <= 0.5 {
+					linear = in * in / 3
+				} else {
+					linear = (math.Exp((in-HlgC)/HlgA) + HlgB) / 12
+				}
+				return math.Pow(linear, 1.2)
+			},
+			FromLinear: func(linear float64) (out float64) {
+				// EOTF⁻¹ = OETF ∘ OOTF⁻¹
+				linear = math.Pow(linear, 1./1.2)
+				if linear <= (1. / 12) {
+					return math.Sqrt(3 * linear)
+				} else {
+					return HlgA*math.Log(12*linear-HlgB) + HlgC
+				}
+			},
+			MidTone: 26. / 1000,
+		}
+	}
+	return nil
+}
+
+type TransferFunction struct {
+	ToLinear   func(in float64) (linear float64)
+	FromLinear func(linear float64) (out float64)
+
+	// MidTone In linear output light. This would typically be 0.18 for SDR (this matches
+	// the definition of Standard Output Sensitivity from ISO 12232:2019), but in
+	// HDR, we certainly do not want to consider 18% of the maximum output a
+	// “mid-tone”, as it would be e.g. 1800 cd/m² for SMPTE ST 2084 (PQ).
+	MidTone float64
+}