Added ISO Noise table generator for libaom
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
DataHoarder 2023-11-03 18:52:15 +01:00
parent 1297a5abce
commit c533ad4386
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
7 changed files with 400 additions and 3 deletions

View file

@ -12,6 +12,10 @@ aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int*
return aom_codec_control(ctx, ctrl_id, v);
}
aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v) {
return aom_codec_control(ctx, ctrl_id, v);
}
void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt){
return pkt->data.frame.buf;
}

View file

@ -13,11 +13,13 @@ import (
"git.gammaspectra.live/S.O.N.G/Ignite/color"
"git.gammaspectra.live/S.O.N.G/Ignite/frame"
"git.gammaspectra.live/S.O.N.G/Ignite/utilities"
"git.gammaspectra.live/S.O.N.G/Ignite/utilities/filmgrain"
"git.gammaspectra.live/S.O.N.G/Ignite/utilities/obuwriter"
"golang.org/x/exp/constraints"
"io"
"log"
"maps"
"os"
"runtime"
"strconv"
"strings"
@ -36,6 +38,7 @@ type Encoder struct {
frameStatistics frame.FrameStatistics
resourcePinner runtime.Pinner
logger utilities.Logger
free []func()
}
var libaomVersion = "libaom-av1 " + C.GoString(C.aom_codec_version_str()) + " ABI " + strconv.FormatUint(C.AOM_ENCODER_ABI_VERSION, 10)
@ -56,7 +59,35 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
logger: logger,
}
clonedSettings := maps.Clone(settings)
clonedSettings := make(map[string]any)
maps.Copy(clonedSettings, settings)
photonNoiseIso := getSettingUnsigned[uint](clonedSettings, "photon-noise-iso", 0)
photonNoiseTransferFunction := filmgrain.GetTransferFunction(getSettingString(clonedSettings, "photon-noise-transfer", "bt709"))
if photonNoiseIso > 0 && photonNoiseTransferFunction != nil {
//create table
table, err := filmgrain.CreatePhotonNoiseTable(properties.Width, properties.Height, float64(photonNoiseIso), photonNoiseTransferFunction)
if err != nil {
return nil, err
}
tmpFile, err := os.CreateTemp(os.TempDir(), "photon-table*.tbl")
if err != nil {
return nil, err
}
_, err = tmpFile.Write(table)
if err != nil {
return nil, err
}
tmpName := tmpFile.Name()
tmpFile.Close()
e.free = append(e.free, func() {
os.Remove(tmpName)
})
clonedSettings["film-grain-table"] = tmpName
}
var aomErr C.aom_codec_err_t
@ -337,6 +368,9 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
if aomErr = C.aom_codec_enc_init_ver(&e.codec, encoder, &e.cfg, flags, C.AOM_ENCODER_ABI_VERSION); aomErr != 0 {
return nil, fmt.Errorf("failed to initialize encoder: err %d %s", aomErr, C.GoString(e.codec.err_detail))
}
e.free = append(e.free, func() {
C.aom_codec_destroy(&e.codec)
})
if properties.FullColorRange {
if aomErr = C.aom_codec_control_uint(&e.codec, C.AV1E_SET_COLOR_RANGE, C.AOM_CR_FULL_RANGE); aomErr != 0 {
@ -380,6 +414,14 @@ func NewEncoder(w io.Writer, properties frame.StreamProperties, settings map[str
}
}
if fgt := getSettingString(clonedSettings, "film-grain-table", ""); fgt != "" {
strVal := C.CString(fgt)
defer C.free(unsafe.Pointer(strVal))
if aomErr = C.aom_codec_control_charptr(&e.codec, C.AV1E_SET_FILM_GRAIN_TABLE, strVal); aomErr != 0 {
return nil, fmt.Errorf("error setting FILM_GRAIN_TABLE parameter: %s", C.GoString(C.aom_codec_error_detail(&e.codec)))
}
}
for k, v := range clonedSettings {
if err := func() error {
var strVal *C.char

View file

@ -11,6 +11,8 @@ aom_codec_err_t aom_codec_control_uint(aom_codec_ctx_t *ctx, int ctrl_id, unsign
aom_codec_err_t aom_codec_control_intptr(aom_codec_ctx_t *ctx, int ctrl_id, int* v);
aom_codec_err_t aom_codec_control_charptr(aom_codec_ctx_t *ctx, int ctrl_id, const char* v);
void* aom_get_pkt_buf(aom_codec_cx_pkt_t *pkt);
size_t aom_get_pkt_sz(aom_codec_cx_pkt_t *pkt);
aom_codec_pts_t aom_get_pkt_pts(aom_codec_cx_pkt_t *pkt);

View file

@ -6,6 +6,7 @@ import (
"git.gammaspectra.live/S.O.N.G/Ignite/decoder/y4m"
"git.gammaspectra.live/S.O.N.G/Ignite/testdata"
"git.gammaspectra.live/S.O.N.G/Ignite/utilities/testingutils"
"maps"
"os"
"runtime"
"sync"
@ -16,7 +17,7 @@ func TestVersion(t *testing.T) {
t.Logf("libaom version: %s", Version())
}
func testEncode(sample testdata.TestSample, t *testing.T) {
func testEncode(sample testdata.TestSample, t *testing.T, otherSettings map[string]any) {
reader, err := sample.Open(t)
if err != nil {
t.Fatal(err)
@ -54,6 +55,7 @@ func testEncode(sample testdata.TestSample, t *testing.T) {
stream := pipe.DecodeStream()
settings := make(map[string]any)
maps.Copy(settings, otherSettings)
settings["threads"] = runtime.NumCPU()
settings["row-mt"] = 1
settings["cpu-used"] = 8
@ -99,5 +101,23 @@ func TestEncode_YUV420_8bit(t *testing.T) {
t.Skip("skipping test in short mode")
}
testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t)
testEncode(testdata.Y4M_Sintel_Trailer_720p24_YUV420_8bit, t, nil)
}
func TestEncode_YUV444_8bit(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, nil)
}
func TestEncode_YUV444_8bit_PhotonNoise(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
testEncode(testdata.Y4M_Ducks_Take_Off_720p50_YUV444_8bit, t, map[string]any{
"photon-noise-iso": 64000,
})
}

View file

@ -0,0 +1,160 @@
package filmgrain
import (
"errors"
"fmt"
"math"
"strings"
)
func CreatePhotonNoiseTable(width, height int, iso float64, transferFunction *TransferFunction) ([]byte, error) {
if transferFunction == nil {
return nil, errors.New("unknown transfer function")
}
// Assumes a daylight-like spectrum.
// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
const kPhotonsPerLxSPerUm2 = 11260
// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
// account.
const kEffectiveQuantumEfficiency = 0.20
// Also reasonable values for current cameras. The read noise is typically
// higher than this at low ISO settings but it matters less there.
const kPhotoResponseNonUniformity = 0.005
const kInputReferredReadNoise = 1.5
// Focal plane exposure for a mid-tone (typically a 18% reflectance card), in
// lx·s.
midToneExposure := 10 / float64(iso)
// In microns. Assumes a 35mm sensor (36mm × 24mm).
pixelAreaUm2 := float64((36000 * 24000) / (width * height))
midToneElectronsPerPixel := kEffectiveQuantumEfficiency *
kPhotonsPerLxSPerUm2 *
midToneExposure * pixelAreaUm2
maxElectronsPerPixel :=
midToneElectronsPerPixel / transferFunction.MidTone
var filmGrain struct {
// 8 bit values
scalingPointsY [14][2]int
numYPoints int // value: 0..14
// 8 bit values
scalingPointsCb [10][2]int
numCbPoints int // value: 0..10
// 8 bit values
scalingPointsCr [10][2]int
numCrPoints int // value: 0..10
arCoeffLag int // values: 0..3
// 8 bit values
arCoeffsY [24]int
arCoeffsCb [25]int
arCoeffsCr [25]int
}
filmGrain.numYPoints = 14
filmGrain.numCbPoints = 0
filmGrain.numCrPoints = 0
filmGrain.arCoeffLag = 0
for i := 0; i < filmGrain.numYPoints; i++ {
x := float64(i) / float64(filmGrain.numYPoints-1)
linear := transferFunction.ToLinear(x)
electronsPerPixel := maxElectronsPerPixel * linear
// Quadrature sum of the relevant sources of noise, in electrons rms. Photon
// shot noise is math.Sqrt(electrons) so we can skip the square root and the
// squaring.
// https://en.wikipedia.org/wiki/Addition_in_quadrature
// https://doi.org/10.1117/3.725073
noiseInElectrons :=
math.Sqrt(kInputReferredReadNoise*kInputReferredReadNoise +
electronsPerPixel +
(kPhotoResponseNonUniformity * kPhotoResponseNonUniformity *
electronsPerPixel * electronsPerPixel))
linearNoise := noiseInElectrons / maxElectronsPerPixel
linearRangeStart := max(0., linear-2*linearNoise)
linearRangeEnd := min(1., linear+2*linearNoise)
tfSlope :=
(transferFunction.FromLinear(linearRangeEnd) -
transferFunction.FromLinear(
linearRangeStart)) /
(linearRangeEnd - linearRangeStart)
encodedNoise := linearNoise * tfSlope
x = math.Round(255 * x)
encodedNoise = min(255., math.Round(255*7.88*encodedNoise))
filmGrain.scalingPointsY[i][0] = int(x)
filmGrain.scalingPointsY[i][1] = int(encodedNoise)
}
var lines []string
lines = append(lines, "filmgrn1")
lines = append(lines, fmt.Sprintf("E %d %d %d %d %d", 0 /*start_time*/, math.MaxInt64 /*end_time*/, 1 /*apply_grain*/, 7391 /*seed*/, 1 /*update_parameters*/))
lines = append(lines, fmt.Sprintf("\tp %d %d %d %d %d %d %d %d %d %d %d %d",
0 /*ar_coeff_lag*/, 6 /*ar_coeff_shift*/, 0, /*grain_scale_shift*/
8 /*scaling_shift*/, 0, /*chroma_scaling_from_luma*/
1 /*overlap_flag*/, 0 /*cb_mult*/, 0, /*cb_luma_mult*/
0 /*cb_offset*/, 0 /*cr_mult*/, 0, /*cr_luma_mult*/
0 /*cr_offset*/))
{
line := fmt.Sprintf("\tsY %d ", filmGrain.numYPoints)
for i := 0; i < filmGrain.numYPoints; i++ {
line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsY[i][0],
filmGrain.scalingPointsY[i][1])
}
lines = append(lines, line)
}
{
line := fmt.Sprintf("\tsCb %d", filmGrain.numCbPoints)
for i := 0; i < filmGrain.numCbPoints; i++ {
line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCb[i][0],
filmGrain.scalingPointsCb[i][1])
}
lines = append(lines, line)
}
{
line := fmt.Sprintf("\tsCr %d", filmGrain.numCrPoints)
for i := 0; i < filmGrain.numCrPoints; i++ {
line += fmt.Sprintf(" %d %d", filmGrain.scalingPointsCr[i][0],
filmGrain.scalingPointsCr[i][1])
}
lines = append(lines, line)
}
n := 2 * filmGrain.arCoeffLag * (filmGrain.arCoeffLag + 1)
{
line := "\tcY"
for i := 0; i < n; i++ {
line += fmt.Sprintf(" %d", filmGrain.arCoeffsY[i])
}
lines = append(lines, line)
}
{
line := "\tcCb"
for i := 0; i <= n; i++ {
line += fmt.Sprintf(" %d", filmGrain.arCoeffsCb[i])
}
lines = append(lines, line)
}
{
line := "\tcCr"
for i := 0; i <= n; i++ {
line += fmt.Sprintf(" %d", filmGrain.arCoeffsCr[i])
}
lines = append(lines, line)
}
return []byte(strings.Join(lines, "\n") + "\n"), nil
}

View file

@ -0,0 +1,28 @@
package filmgrain
import (
"bytes"
"testing"
)
const testGrainTable = `filmgrn1
E 0 9223372036854775807 1 7391 1
p 0 6 0 8 0 1 0 0 0 0 0 0
sY 14 0 17 20 4 39 3 59 3 78 2 98 2 118 2 137 3 157 3 177 3 196 3 216 3 235 4 255 4
sCb 0
sCr 0
cY
cCb 0
cCr 0
`
func TestCreatePhotonNoiseTable(t *testing.T) {
table, err := CreatePhotonNoiseTable(1280, 720, 400, GetTransferFunction("bt470bg"))
if err != nil {
t.Fatal(err)
}
if bytes.Compare(table, []byte(testGrainTable)) != 0 {
t.Fatal("table is different")
}
}

View file

@ -0,0 +1,141 @@
package filmgrain
import (
"math"
"strings"
)
//TODO: transfer function type?
func GetTransferFunction(kind string) *TransferFunction {
switch strings.ToLower(kind) {
case "bt470m":
//gamma22
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
return math.Pow(in, 2.2)
},
FromLinear: func(linear float64) (out float64) {
return math.Pow(linear, 1/2.2)
},
MidTone: 0.18,
}
case "bt470bg":
//gamma28
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
return math.Pow(in, 2.8)
},
FromLinear: func(linear float64) (out float64) {
return math.Pow(linear, 1/2.8)
},
MidTone: 0.18,
}
case "bt601", "bt709", "bt2020":
//TODO: are bt601 and bt709 the same?
//bt2020. same as bt709, just defined more precise. Can use the same for all
const beta = 0.018053968510807
const alpha = 1 + 5.5*beta
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
if in < 0.081 {
return in / 4.5
} else {
return math.Pow((in+(alpha-1))/alpha, 1/0.45)
}
},
FromLinear: func(linear float64) (out float64) {
if linear < beta {
return 4.5 * linear
} else {
return alpha*math.Pow(linear, 0.45) - (alpha - 1)
}
},
//TODO: check this for bt2020?
MidTone: 0.18,
}
case "srgb":
//sRGB
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
if in <= 0.04045 {
return in / 12.92
} else {
return math.Pow((in+0.055)/1.055, 2.4)
}
},
FromLinear: func(linear float64) (out float64) {
if linear <= 0.0031308 {
return 12.92 * linear
} else {
return 1.055*math.Pow(linear, 1/2.4) - 0.055
}
},
MidTone: 0.18,
}
case "smpte2084", "pq":
//pq
const PqM1 = 2610. / 16384
const PqM2 = 128 * 2523. / 4096
const PqC1 = 3424. / 4096
const PqC2 = 32 * 2413. / 4096
const PqC3 = 32 * 2392. / 4096
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
pq_pow_inv_m2 := math.Pow(in, 1./PqM2)
return math.Pow(max(0, pq_pow_inv_m2-PqC1)/(PqC2-PqC3*pq_pow_inv_m2),
1./PqM1)
},
FromLinear: func(linear float64) (out float64) {
linear_pow_m1 := math.Pow(linear, PqM1)
return math.Pow((PqC1+PqC2*linear_pow_m1)/(1+PqC3*linear_pow_m1),
PqM2)
},
MidTone: 26. / 1000,
}
case "hlg":
//hlg
// Note: it is perhaps debatable whether “linear” for HLG should be scene light
// or display light. Here, it is implemented in terms of display light assuming
// a nominal peak display luminance of 1000 cd/m², hence the system γ of 1.2. To
// make it scene light instead, the OOTF (math.Pow(x, 1.2)) and its inverse should
// be removed from the functions below, and the TransferFunction.MidTone should be replaced
// with math.Pow(26. / 1000, 1 / 1.2).
const HlgA = 0.17883277
const HlgB = 0.28466892
const HlgC = 0.55991073
return &TransferFunction{
ToLinear: func(in float64) (linear float64) {
// EOTF = OOTF ∘ OETF⁻¹
if in <= 0.5 {
linear = in * in / 3
} else {
linear = (math.Exp((in-HlgC)/HlgA) + HlgB) / 12
}
return math.Pow(linear, 1.2)
},
FromLinear: func(linear float64) (out float64) {
// EOTF⁻¹ = OETF ∘ OOTF⁻¹
linear = math.Pow(linear, 1./1.2)
if linear <= (1. / 12) {
return math.Sqrt(3 * linear)
} else {
return HlgA*math.Log(12*linear-HlgB) + HlgC
}
},
MidTone: 26. / 1000,
}
}
return nil
}
type TransferFunction struct {
ToLinear func(in float64) (linear float64)
FromLinear func(linear float64) (out float64)
// MidTone In linear output light. This would typically be 0.18 for SDR (this matches
// the definition of Standard Output Sensitivity from ISO 12232:2019), but in
// HDR, we certainly do not want to consider 18% of the maximum output a
// “mid-tone”, as it would be e.g. 1800 cd/m² for SMPTE ST 2084 (PQ).
MidTone float64
}