Use float32 instead of float64 internally

This commit is contained in:
Sean DuBois 2022-09-20 14:17:13 -04:00
parent c631824345
commit 9e56e15906
4 changed files with 55 additions and 52 deletions

View file

@ -19,7 +19,7 @@ func NewDecoder() Decoder {
}
// Decode decodes the Opus bitstream into PCM
func (d *Decoder) Decode(in []byte, out []float64) (bandwidth Bandwidth, isStereo bool, err error) {
func (d *Decoder) Decode(in []byte, out []float32) (bandwidth Bandwidth, isStereo bool, err error) {
if len(in) < 1 {
return 0, false, errTooShortForTableOfContentsHeader
}

View file

@ -23,7 +23,7 @@ type Decoder struct {
// (for WB frames).
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
finalLPCValues []float64
finalLPCValues []float32
// n0Q15 are the LSF coefficients decoded for the prior frame
// see normalizeLSFInterpolation
@ -33,7 +33,7 @@ type Decoder struct {
// NewDecoder creates a new Silk Decoder
func NewDecoder() Decoder {
return Decoder{
finalLPCValues: make([]float64, 16),
finalLPCValues: make([]float32, 16),
}
}
@ -106,9 +106,9 @@ func (d *Decoder) determineFrameType(voiceActivityDetected bool) (signalType fra
// A separate quantization gain is coded for each 5 ms subframe
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.4
func (d *Decoder) decodeSubframeQuantizations(signalType frameSignalType) (gainQ16 []float64) {
func (d *Decoder) decodeSubframeQuantizations(signalType frameSignalType) (gainQ16 []float32) {
var logGain, deltaGainIndex, gainIndex int32
gainQ16 = make([]float64, 4)
gainQ16 = make([]float32, 4)
for subframeIndex := 0; subframeIndex < subframeCount; subframeIndex++ {
@ -176,7 +176,7 @@ func (d *Decoder) decodeSubframeQuantizations(signalType frameSignalType) (gainQ
// between 81920 and 1686110208, inclusive (representing scale factors
// of 1.25 to 25728, respectively).
gainQ16[subframeIndex] = float64((1 << i) + ((-174*f*(128-f)>>16)+f)*((1<<i)>>7))
gainQ16[subframeIndex] = float32((1 << i) + ((-174*f*(128-f)>>16)+f)*((1<<i)>>7))
}
return
@ -1056,8 +1056,8 @@ func (d *Decoder) limitLPCCoefficientsRange(a32Q17 []int32) {
// gain.
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.8
func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64) {
aQ12 = make([]float64, len(a32Q17))
func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float32) {
aQ12 = make([]float32, len(a32Q17))
// However, silk_LPC_inverse_pred_gain_QA() approximates this using
// fixed-point arithmetic to guarantee reproducible results across
@ -1070,7 +1070,7 @@ func (d *Decoder) limitLPCFilterPredictionGain(a32Q17 []int32) (aQ12 []float64)
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.7.5.8
for n := range a32Q17 {
aQ12[n] = float64((a32Q17[n] + 16) >> 5)
aQ12[n] = float32((a32Q17[n] + 16) >> 5)
}
return
@ -1232,7 +1232,7 @@ func (d *Decoder) decodePitchLags(signalType frameSignalType, bandwidth Bandwidt
// packets against the recovery time after packet loss.
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.6.3
func (d *Decoder) decodeLTPScalingParamater(signalType frameSignalType) (LTPscaleQ14 float64) {
func (d *Decoder) decodeLTPScalingParamater(signalType frameSignalType) (LTPscaleQ14 float32) {
// An LTP scaling parameter appears after the LTP filter coefficients if
// and only if
//
@ -1332,17 +1332,17 @@ func (d *Decoder) samplesInSubframe(bandwidth Bandwidth) int {
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.1
func (d *Decoder) ltpSynthesis(
out []float64,
out []float32,
signalType frameSignalType,
bQ7 []int8,
pitchLags []int,
eQ23 []int32,
n, j, s, dLPC int,
LTPScaleQ14 float64,
LTPScaleQ14 float32,
bandwidth Bandwidth,
wQ2 int16,
aQ12, gainQ16, lpc []float64,
) (res []float64) {
aQ12, gainQ16, lpc []float32,
) (res []float32) {
// For unvoiced frames (see Section 4.2.7.3), the LPC residual for i
// such that j <= i < (j + n) is simply a normalized copy of the
// excitation signal, i.e.,
@ -1351,10 +1351,10 @@ func (d *Decoder) ltpSynthesis(
// res[i] = ---------
// 2.0**23
res = make([]float64, len(eQ23))
res = make([]float32, len(eQ23))
if signalType != frameSignalTypeVoiced {
for i := j; i < (j + n); i++ {
res[i] = float64(eQ23[i]) / 8388608
res[i] = float32(eQ23[i]) / 8388608
}
return
}
@ -1388,7 +1388,7 @@ func (d *Decoder) ltpSynthesis(
// out[i] - \ out[i-k-1] * --------, 1.0)
// /_ 4096.0
// k=0
var outVal float64
var outVal float32
for i := (j - pitchLags[s] - 2); i < out_end; i++ {
index := i + j
if index < 0 || index >= len(res) || index >= len(out) {
@ -1427,7 +1427,7 @@ func (d *Decoder) ltpSynthesis(
// lpc[i] in Section 4.2.7.9.2, the output of this latter equation is
// merely a scaled version of the values of res[i] from previous
// subframes.
var lpcVal float64
var lpcVal float32
for i := out_end; i < j; i++ {
index := i + j
if index < 0 || index >= len(res) {
@ -1460,7 +1460,7 @@ func (d *Decoder) ltpSynthesis(
// res[i] = --------- + \ res[i - pitch_lags[s] + 2 - k] * -------
// 2.0**23 /_ 128.0
// k=0
var resSum, resVal float64
var resSum, resVal float32
for i := j; i < (j + n); i++ {
index := i + j
if index < 0 || index >= len(res) {
@ -1473,14 +1473,14 @@ func (d *Decoder) ltpSynthesis(
if resValIndex < 0 || resValIndex >= len(res) {
resVal = 0
} else {
resVal = res[resValIndex] * (float64(bQ7[k]) / 128.0)
resVal = res[resValIndex] * (float32(bQ7[k]) / 128.0)
}
resSum += resVal
}
res[index] = (float64(eQ23[i]) / 8388608.0) + resSum
res[index] = (float32(eQ23[i]) / 8388608.0) + resSum
}
return
@ -1493,7 +1493,7 @@ func (d *Decoder) ltpSynthesis(
// after either
//
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9.2
func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, s, dLPC int, aQ12, res, gainQ16, lpc []float64) {
func (d *Decoder) lpcSynthesis(out []float32, bandwidth Bandwidth, n, s, dLPC int, aQ12, res, gainQ16, lpc []float32) {
finalLPCValuesIndex := 0
// j be the index of the first sample in the residual corresponding to
@ -1509,7 +1509,7 @@ func (d *Decoder) lpcSynthesis(out []float64, bandwidth Bandwidth, n, s, dLPC in
// 65536.0 /_ 4096.0
// k=0
//
var currentLPCVal float64
var currentLPCVal float32
for i := j; i < (j + n); i++ {
sampleIndex := i + (n * s)
@ -1560,9 +1560,9 @@ func (d *Decoder) silkFrameReconstruction(
bQ7 []int8,
pitchLags []int,
eQ23 []int32,
LTPscaleQ14 float64,
LTPscaleQ14 float32,
wQ2 int16,
aQ12, gainQ16, out []float64,
aQ12, gainQ16, out []float32,
) {
// let n be the number of samples in a subframe
//
@ -1571,7 +1571,7 @@ func (d *Decoder) silkFrameReconstruction(
// let lpc[i] be the result of LPC synthesis from the last d_LPC samples of the
// previous subframe or zeros in the first subframe for this channel
lpc := make([]float64, n*subframeCount)
lpc := make([]float32, n*subframeCount)
// s be the index of the current subframe in this SILK frame
// (0 or 1 for 10 ms frames, or 0 to 3 for 20 ms frames)
@ -1623,7 +1623,7 @@ func (d *Decoder) silkFrameReconstruction(
// 8: Resampled signal
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-4.2.1
func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds int, bandwidth Bandwidth) error {
func (d *Decoder) Decode(in []byte, out []float32, isStereo bool, nanoseconds int, bandwidth Bandwidth) error {
subframeSize := d.samplesInSubframe(bandwidth)
switch {
case nanoseconds != nanoseconds20Ms:
@ -1695,7 +1695,8 @@ func (d *Decoder) Decode(in []byte, out []float64, isStereo bool, nanoseconds in
eQ23 := d.decodeExcitation(signalType, quantizationOffsetType, lcgSeed, pulsecounts, lsbcounts)
// https://www.rfc-editor.org/rfc/rfc6716.html#section-4.2.7.9
d.silkFrameReconstruction(signalType, bandwidth,
d.silkFrameReconstruction(
signalType, bandwidth,
dLPC,
bQ7,
pitchLags,

View file

@ -30,7 +30,7 @@ func createRangeDecoder(data []byte, bitsRead uint, rangeSize uint32, highAndCod
func TestDecode20MsOnly(t *testing.T) {
d := &Decoder{}
err := d.Decode(testSilkFrame(), []float64{}, false, 1, BandwidthWideband)
err := d.Decode(testSilkFrame(), []float32{}, false, 1, BandwidthWideband)
if !errors.Is(err, errUnsupportedSilkFrameDuration) {
t.Fatal(err)
}
@ -38,7 +38,7 @@ func TestDecode20MsOnly(t *testing.T) {
func TestDecodeStereoTODO(t *testing.T) {
d := &Decoder{}
err := d.Decode(testSilkFrame(), []float64{}, true, nanoseconds20Ms, BandwidthWideband)
err := d.Decode(testSilkFrame(), []float32{}, true, nanoseconds20Ms, BandwidthWideband)
if !errors.Is(err, errUnsupportedSilkStereo) {
t.Fatal(err)
}
@ -60,14 +60,14 @@ func TestDecodeSubframeQuantizations(t *testing.T) {
d := &Decoder{rangeDecoder: createRangeDecoder(testSilkFrame(), 31, 482344960, 437100388)}
gainQ16 := d.decodeSubframeQuantizations(frameSignalTypeInactive)
if !reflect.DeepEqual(gainQ16, []float64{210944, 112640, 96256, 96256}) {
if !reflect.DeepEqual(gainQ16, []float32{210944, 112640, 96256, 96256}) {
t.Fatal()
}
}
func TestDecodeBufferSize(t *testing.T) {
d := NewDecoder()
err := d.Decode([]byte{}, make([]float64, 50), false, nanoseconds20Ms, BandwidthWideband)
err := d.Decode([]byte{}, make([]float32, 50), false, nanoseconds20Ms, BandwidthWideband)
if !errors.Is(err, errOutBufferTooSmall) {
t.Fatal()
}
@ -213,7 +213,7 @@ func TestLimitLPCFilterPredictionGain(t *testing.T) {
-4493, -1614, -1960, -3112, -2153, -2898,
}
expectedAQ12 := []float64{
expectedAQ12 := []float32{
405, 305, 131, 114, -118, -138, -72, -146, -108, -120, -140, -50, -61,
-97, -67, -91,
}
@ -229,12 +229,12 @@ func TestLPCSynthesis(t *testing.T) {
bandwidth := BandwidthWideband
dLPC := 16
aQ12 := []float64{
aQ12 := []float32{
405, 305, 131, 114, -118, -138, -72, -146, -108, -120,
-140, -50, -61, -97, -67, -91,
}
res := []float64{
res := []float32{
7.152557373046875e-06, 7.152557373046875e-06, 7.152557373046875e-06, -7.152557373046875e-06, 7.152557373046875e-06,
-7.152557373046875e-06, 7.152557373046875e-06, -7.152557373046875e-06, 7.152557373046875e-06, 7.152557373046875e-06,
-7.152557373046875e-06, -7.152557373046875e-06, 7.152557373046875e-06, 7.152557373046875e-06, -7.152557373046875e-06,
@ -301,11 +301,11 @@ func TestLPCSynthesis(t *testing.T) {
-7.152557373046875e-06, -7.152557373046875e-06, 7.152557373046875e-06, 7.152557373046875e-06, 7.152557373046875e-06,
}
gainQ16 := []float64{
gainQ16 := []float32{
210944, 112640, 96256, 96256,
}
expectedOut := [][]float64{
expectedOut := [][]float32{
{
0.000023, 0.000025, 0.000027, -0.000018, 0.000025,
-0.000021, 0.000021, -0.000024, 0.000021, 0.000021,
@ -380,9 +380,9 @@ func TestLPCSynthesis(t *testing.T) {
},
}
lpc := make([]float64, d.samplesInSubframe(BandwidthWideband)*subframeCount)
lpc := make([]float32, d.samplesInSubframe(BandwidthWideband)*subframeCount)
for i := range expectedOut {
out := make([]float64, 80)
out := make([]float32, 80)
d.lpcSynthesis(out, bandwidth, d.samplesInSubframe(BandwidthWideband), i, dLPC, aQ12, res, gainQ16, lpc)
for j := range out {
if out[j]-expectedOut[i][j] > floatEqualityThreshold {
@ -436,9 +436,9 @@ func TestDecodeLTPScalingParameter(t *testing.T) {
func TestDecode(t *testing.T) {
d := NewDecoder()
out := make([]float64, 320)
out := make([]float32, 320)
compareBuffer := func(out, expectedOut []float64, t *testing.T) {
compareBuffer := func(out, expectedOut []float32, t *testing.T) {
for i := range expectedOut {
if out[i]-expectedOut[i] > floatEqualityThreshold {
t.Fatalf("%d (%f) != (%f)", i, out[i], expectedOut[i])
@ -451,7 +451,7 @@ func TestDecode(t *testing.T) {
t.Fatal(err)
}
expectedOut := []float64{
expectedOut := []float32{
0.000023, 0.000025, 0.000027, -0.000018, 0.000025,
-0.000021, 0.000021, -0.000024, 0.000021, 0.000021,
-0.000022, -0.000026, 0.000018, 0.000022, -0.000023,
@ -525,7 +525,7 @@ func TestDecode(t *testing.T) {
t.Fatal(err)
}
expectedOut := []float64{
expectedOut := []float32{
0.000014, -0.000006, -0.000007, -0.000009, 0.000010,
0.000011, -0.000009, 0.000011, 0.000011, -0.000009,
0.000010, -0.000010, -0.000011, -0.000014, 0.000007,

View file

@ -66,7 +66,7 @@ func clamp(low, in, high int32) int32 {
return in
}
func clampFloat(low, in, high float64) float64 {
func clampFloat(low, in, high float32) float32 {
if in > high {
return high
} else if in < low {
@ -77,9 +77,11 @@ func clampFloat(low, in, high float64) float64 {
}
// The sign of x, i.e.,
// ( -1, x < 0
// sign(x) = < 0, x == 0
// ( 1, x > 0
//
// ( -1, x < 0
// sign(x) = < 0, x == 0
// ( 1, x > 0
//
// https://datatracker.ietf.org/doc/html/rfc6716#section-1.1.4
func sign(x int) int {
switch {
@ -92,12 +94,12 @@ func sign(x int) int {
}
}
// The minimum number of bits required to store a positive integer n in
// binary, or 0 for a non-positive integer n.
// The minimum number of bits required to store a positive integer n in
// binary, or 0 for a non-positive integer n.
//
// ( 0, n <= 0
// ilog(n) = <
// ( floor(log2(n))+1, n > 0
// ( 0, n <= 0
// ilog(n) = <
// ( floor(log2(n))+1, n > 0
func ilog(n int) int {
if n <= 0 {
return 0