diff --git a/encoder/libaom/libaom.c b/encoder/libaom/libaom.c index 61b958d..704c876 100644 --- a/encoder/libaom/libaom.c +++ b/encoder/libaom/libaom.c @@ -12,6 +12,10 @@ aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* return aom_codec_control(ctx, ctrl_id, v); } +aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v) { + return aom_codec_control(ctx, ctrl_id, v); +} + void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt){ return pkt->data.frame.buf; } diff --git a/encoder/libaom/libaom.go b/encoder/libaom/libaom.go index fcf424e..7ba1a78 100644 --- a/encoder/libaom/libaom.go +++ b/encoder/libaom/libaom.go @@ -13,11 +13,13 @@ import ( "git.gammaspectra.live/S.O.N.G/Ignite/color" "git.gammaspectra.live/S.O.N.G/Ignite/frame" "git.gammaspectra.live/S.O.N.G/Ignite/utilities" + "git.gammaspectra.live/S.O.N.G/Ignite/utilities/filmgrain" "git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter" "golang.org/x/exp/constraints" "io" "log" "maps" + "os" "runtime" "strconv" "strings" @@ -36,6 +38,7 @@ type Encoder struct { frameStatistics frame.FrameStatistics resourcePinner runtime.Pinner logger utilities.Logger + free []func() } var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) + " ABI " + strconv.FormatUint(C.AOM_ENCODER_ABI_VERSION, 10) @@ -56,7 +59,35 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str logger: logger, } - clonedSettings := maps.Clone(settings) + clonedSettings := make(map[string]any) + maps.Copy(clonedSettings, settings) + + photonNoiseIso := getSettingUnsigned[uint](clonedSettings, "photon-noise-iso", 0) + photonNoiseTransferFunction := filmgrain.GetTransferFunction(getSettingString(clonedSettings, "photon-noise-transfer", "bt709")) + + if photonNoiseIso > 0 && photonNoiseTransferFunction != nil { + //create table + table, err := filmgrain.CreatePhotonNoiseTable(properties.Width, properties.Height, float64(photonNoiseIso), photonNoiseTransferFunction) + if err != nil { + return nil, err + } + + tmpFile, err := os.CreateTemp(os.TempDir(), "photon-table*.tbl") + if err != nil { + return nil, err + } + _, err = tmpFile.Write(table) + if err != nil { + return nil, err + } + tmpName := tmpFile.Name() + tmpFile.Close() + e.free = append(e.free, func() { + os.Remove(tmpName) + }) + clonedSettings["film-grain-table"] = tmpName + + } var aomErr C.aom_codec_err_t @@ -337,6 +368,9 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 { return nil, fmt.Errorf("failed to initialize encoder: err %d %s", aomErr, C.GoString(e.codec.err_detail)) } + e.free = append(e.free, func() { + C.aom_codec_destroy(&e.codec) + }) if properties.FullColorRange { if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, C.AOM_CR_FULL_RANGE); aomErr != 0 { @@ -380,6 +414,14 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str } } + if fgt := getSettingString(clonedSettings, "film-grain-table", ""); fgt != "" { + strVal := C.CString(fgt) + defer C.free(unsafe.Pointer(strVal)) + if aomErr = C.aom_codec_control_charptr(&e.codec, C.AV1E_SET_FILM_GRAIN_TABLE, strVal); aomErr != 0 { + return nil, fmt.Errorf("error setting FILM_GRAIN_TABLE parameter: %s", C.GoString(C.aom_codec_error_detail(&e.codec))) + } + } + for k, v := range clonedSettings { if err := func() error { var strVal *C.char diff --git a/encoder/libaom/libaom.h b/encoder/libaom/libaom.h index deb95ba..b27c189 100644 --- a/encoder/libaom/libaom.h +++ b/encoder/libaom/libaom.h @@ -11,6 +11,8 @@ aom_codec_err_t aom_codec_control_uint(aom_codec_ctx_t *ctx, int ctrl_id, unsign aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* v); +aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v); + void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt); size_t aom_get_pkt_sz(aom_codec_cx_pkt_t *pkt); aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt); diff --git a/encoder/libaom/libaom_test.go b/encoder/libaom/libaom_test.go index d85a21d..f3eee89 100644 --- a/encoder/libaom/libaom_test.go +++ b/encoder/libaom/libaom_test.go @@ -6,6 +6,7 @@ import ( "git.gammaspectra.live/S.O.N.G/Ignite/decoder/y4m" "git.gammaspectra.live/S.O.N.G/Ignite/testdata" "git.gammaspectra.live/S.O.N.G/Ignite/utilities/testingutils" + "maps" "os" "runtime" "sync" @@ -16,7 +17,7 @@ func TestVersion(t *testing.T) { t.Logf("libaom version: %s", Version()) } -func testEncode(sample testdata.TestSample, t *testing.T) { +func testEncode(sample testdata.TestSample, t *testing.T, otherSettings map[string]any) { reader, err := sample.Open(t) if err != nil { t.Fatal(err) @@ -54,6 +55,7 @@ func testEncode(sample testdata.TestSample, t *testing.T) { stream := pipe.DecodeStream() settings := make(map[string]any) + maps.Copy(settings, otherSettings) settings["threads"] = runtime.NumCPU() settings["row-mt"] = 1 settings["cpu-used"] = 8 @@ -99,5 +101,23 @@ func TestEncode_YUV420_8bit(t *testing.T) { t.Skip("skipping test in short mode") } - testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t) + testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t, nil) +} + +func TestEncode_YUV444_8bit(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, nil) +} + +func TestEncode_YUV444_8bit_PhotonNoise(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, map[string]any{ + "photon-noise-iso": 64000, + }) } diff --git a/utilities/filmgrain/isotable.go b/utilities/filmgrain/isotable.go new file mode 100644 index 0000000..6b995eb --- /dev/null +++ b/utilities/filmgrain/isotable.go @@ -0,0 +1,160 @@ +package filmgrain + +import ( + "errors" + "fmt" + "math" + "strings" +) + +func CreatePhotonNoiseTable(width, height int, iso float64, transferFunction *TransferFunction) ([]byte, error) { + if transferFunction == nil { + return nil, errors.New("unknown transfer function") + } + + // Assumes a daylight-like spectrum. + // https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s + const kPhotonsPerLxSPerUm2 = 11260 + + // Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into + // account. + const kEffectiveQuantumEfficiency = 0.20 + + // Also reasonable values for current cameras. The read noise is typically + // higher than this at low ISO settings but it matters less there. + const kPhotoResponseNonUniformity = 0.005 + const kInputReferredReadNoise = 1.5 + + // Focal plane exposure for a mid-tone (typically a 18% reflectance card), in + // lx·s. + midToneExposure := 10 / float64(iso) + + // In microns. Assumes a 35mm sensor (36mm × 24mm). + pixelAreaUm2 := float64((36000 * 24000) / (width * height)) + + midToneElectronsPerPixel := kEffectiveQuantumEfficiency * + kPhotonsPerLxSPerUm2 * + midToneExposure * pixelAreaUm2 + maxElectronsPerPixel := + midToneElectronsPerPixel / transferFunction.MidTone + + var filmGrain struct { + + // 8 bit values + scalingPointsY [14][2]int + numYPoints int // value: 0..14 + + // 8 bit values + scalingPointsCb [10][2]int + numCbPoints int // value: 0..10 + + // 8 bit values + scalingPointsCr [10][2]int + numCrPoints int // value: 0..10 + + arCoeffLag int // values: 0..3 + + // 8 bit values + arCoeffsY [24]int + arCoeffsCb [25]int + arCoeffsCr [25]int + } + + filmGrain.numYPoints = 14 + filmGrain.numCbPoints = 0 + filmGrain.numCrPoints = 0 + filmGrain.arCoeffLag = 0 + + for i := 0; i < filmGrain.numYPoints; i++ { + x := float64(i) / float64(filmGrain.numYPoints-1) + linear := transferFunction.ToLinear(x) + electronsPerPixel := maxElectronsPerPixel * linear + // Quadrature sum of the relevant sources of noise, in electrons rms. Photon + // shot noise is math.Sqrt(electrons) so we can skip the square root and the + // squaring. + // https://en.wikipedia.org/wiki/Addition_in_quadrature + // https://doi.org/10.1117/3.725073 + noiseInElectrons := + math.Sqrt(kInputReferredReadNoise*kInputReferredReadNoise + + electronsPerPixel + + (kPhotoResponseNonUniformity * kPhotoResponseNonUniformity * + electronsPerPixel * electronsPerPixel)) + linearNoise := noiseInElectrons / maxElectronsPerPixel + linearRangeStart := max(0., linear-2*linearNoise) + linearRangeEnd := min(1., linear+2*linearNoise) + tfSlope := + (transferFunction.FromLinear(linearRangeEnd) - + transferFunction.FromLinear( + linearRangeStart)) / + (linearRangeEnd - linearRangeStart) + encodedNoise := linearNoise * tfSlope + + x = math.Round(255 * x) + encodedNoise = min(255., math.Round(255*7.88*encodedNoise)) + + filmGrain.scalingPointsY[i][0] = int(x) + filmGrain.scalingPointsY[i][1] = int(encodedNoise) + } + + var lines []string + lines = append(lines, "filmgrn1") + lines = append(lines, fmt.Sprintf("E %d %d %d %d %d", 0 /*start_time*/, math.MaxInt64 /*end_time*/, 1 /*apply_grain*/, 7391 /*seed*/, 1 /*update_parameters*/)) + lines = append(lines, fmt.Sprintf("\tp %d %d %d %d %d %d %d %d %d %d %d %d", + 0 /*ar_coeff_lag*/, 6 /*ar_coeff_shift*/, 0, /*grain_scale_shift*/ + 8 /*scaling_shift*/, 0, /*chroma_scaling_from_luma*/ + 1 /*overlap_flag*/, 0 /*cb_mult*/, 0, /*cb_luma_mult*/ + 0 /*cb_offset*/, 0 /*cr_mult*/, 0, /*cr_luma_mult*/ + 0 /*cr_offset*/)) + + { + line := fmt.Sprintf("\tsY %d ", filmGrain.numYPoints) + for i := 0; i < filmGrain.numYPoints; i++ { + line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsY[i][0], + filmGrain.scalingPointsY[i][1]) + } + lines = append(lines, line) + } + + { + line := fmt.Sprintf("\tsCb %d", filmGrain.numCbPoints) + for i := 0; i < filmGrain.numCbPoints; i++ { + line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCb[i][0], + filmGrain.scalingPointsCb[i][1]) + } + lines = append(lines, line) + } + + { + line := fmt.Sprintf("\tsCr %d", filmGrain.numCrPoints) + for i := 0; i < filmGrain.numCrPoints; i++ { + line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCr[i][0], + filmGrain.scalingPointsCr[i][1]) + } + lines = append(lines, line) + } + + n := 2 * filmGrain.arCoeffLag * (filmGrain.arCoeffLag + 1) + { + line := "\tcY" + for i := 0; i < n; i++ { + line += fmt.Sprintf(" %d", filmGrain.arCoeffsY[i]) + } + lines = append(lines, line) + } + { + line := "\tcCb" + for i := 0; i <= n; i++ { + line += fmt.Sprintf(" %d", filmGrain.arCoeffsCb[i]) + } + lines = append(lines, line) + } + { + line := "\tcCr" + for i := 0; i <= n; i++ { + line += fmt.Sprintf(" %d", filmGrain.arCoeffsCr[i]) + } + lines = append(lines, line) + } + + return []byte(strings.Join(lines, "\n") + "\n"), nil +} diff --git a/utilities/filmgrain/isotable_test.go b/utilities/filmgrain/isotable_test.go new file mode 100644 index 0000000..079fd0c --- /dev/null +++ b/utilities/filmgrain/isotable_test.go @@ -0,0 +1,28 @@ +package filmgrain + +import ( + "bytes" + "testing" +) + +const testGrainTable = `filmgrn1 +E 0 9223372036854775807 1 7391 1 + p 0 6 0 8 0 1 0 0 0 0 0 0 + sY 14 0 17 20 4 39 3 59 3 78 2 98 2 118 2 137 3 157 3 177 3 196 3 216 3 235 4 255 4 + sCb 0 + sCr 0 + cY + cCb 0 + cCr 0 +` + +func TestCreatePhotonNoiseTable(t *testing.T) { + table, err := CreatePhotonNoiseTable(1280, 720, 400, GetTransferFunction("bt470bg")) + if err != nil { + t.Fatal(err) + } + + if bytes.Compare(table, []byte(testGrainTable)) != 0 { + t.Fatal("table is different") + } +} diff --git a/utilities/filmgrain/transfer.go b/utilities/filmgrain/transfer.go new file mode 100644 index 0000000..67d3051 --- /dev/null +++ b/utilities/filmgrain/transfer.go @@ -0,0 +1,141 @@ +package filmgrain + +import ( + "math" + "strings" +) + +//TODO: transfer function type? + +func GetTransferFunction(kind string) *TransferFunction { + switch strings.ToLower(kind) { + case "bt470m": + //gamma22 + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + return math.Pow(in, 2.2) + }, + FromLinear: func(linear float64) (out float64) { + return math.Pow(linear, 1/2.2) + }, + MidTone: 0.18, + } + case "bt470bg": + //gamma28 + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + return math.Pow(in, 2.8) + }, + FromLinear: func(linear float64) (out float64) { + return math.Pow(linear, 1/2.8) + }, + MidTone: 0.18, + } + case "bt601", "bt709", "bt2020": + //TODO: are bt601 and bt709 the same? + //bt2020. same as bt709, just defined more precise. Can use the same for all + const beta = 0.018053968510807 + const alpha = 1 + 5.5*beta + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + if in < 0.081 { + return in / 4.5 + } else { + return math.Pow((in+(alpha-1))/alpha, 1/0.45) + } + }, + FromLinear: func(linear float64) (out float64) { + if linear < beta { + return 4.5 * linear + } else { + return alpha*math.Pow(linear, 0.45) - (alpha - 1) + } + }, + //TODO: check this for bt2020? + MidTone: 0.18, + } + case "srgb": + //sRGB + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + if in <= 0.04045 { + return in / 12.92 + } else { + return math.Pow((in+0.055)/1.055, 2.4) + } + }, + FromLinear: func(linear float64) (out float64) { + if linear <= 0.0031308 { + return 12.92 * linear + } else { + return 1.055*math.Pow(linear, 1/2.4) - 0.055 + } + }, + MidTone: 0.18, + } + case "smpte2084", "pq": + //pq + const PqM1 = 2610. / 16384 + const PqM2 = 128 * 2523. / 4096 + const PqC1 = 3424. / 4096 + const PqC2 = 32 * 2413. / 4096 + const PqC3 = 32 * 2392. / 4096 + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + pq_pow_inv_m2 := math.Pow(in, 1./PqM2) + return math.Pow(max(0, pq_pow_inv_m2-PqC1)/(PqC2-PqC3*pq_pow_inv_m2), + 1./PqM1) + }, + FromLinear: func(linear float64) (out float64) { + linear_pow_m1 := math.Pow(linear, PqM1) + return math.Pow((PqC1+PqC2*linear_pow_m1)/(1+PqC3*linear_pow_m1), + PqM2) + }, + MidTone: 26. / 1000, + } + case "hlg": + //hlg + // Note: it is perhaps debatable whether “linear” for HLG should be scene light + // or display light. Here, it is implemented in terms of display light assuming + // a nominal peak display luminance of 1000 cd/m², hence the system γ of 1.2. To + // make it scene light instead, the OOTF (math.Pow(x, 1.2)) and its inverse should + // be removed from the functions below, and the TransferFunction.MidTone should be replaced + // with math.Pow(26. / 1000, 1 / 1.2). + const HlgA = 0.17883277 + const HlgB = 0.28466892 + const HlgC = 0.55991073 + return &TransferFunction{ + ToLinear: func(in float64) (linear float64) { + // EOTF = OOTF ∘ OETF⁻¹ + if in <= 0.5 { + linear = in * in / 3 + } else { + linear = (math.Exp((in-HlgC)/HlgA) + HlgB) / 12 + } + return math.Pow(linear, 1.2) + }, + FromLinear: func(linear float64) (out float64) { + // EOTF⁻¹ = OETF ∘ OOTF⁻¹ + linear = math.Pow(linear, 1./1.2) + if linear <= (1. / 12) { + return math.Sqrt(3 * linear) + } else { + return HlgA*math.Log(12*linear-HlgB) + HlgC + } + }, + MidTone: 26. / 1000, + } + } + return nil +} + +type TransferFunction struct { + ToLinear func(in float64) (linear float64) + FromLinear func(linear float64) (out float64) + + // MidTone In linear output light. This would typically be 0.18 for SDR (this matches + // the definition of Standard Output Sensitivity from ISO 12232:2019), but in + // HDR, we certainly do not want to consider 18% of the maximum output a + // “mid-tone”, as it would be e.g. 1800 cd/m² for SMPTE ST 2084 (PQ). + MidTone float64 +}