softfloat64/internals.go
2024-04-17 05:27:11 +02:00

152 lines
5 KiB
Go

package softfloat64
import "math/bits"
const (
mantbits64 = 52
expbits64 = 11
signshift64 = mantbits64 + expbits64
expmask64 = (1 << expbits64) - 1
expmaskshift64 = expmask64 << mantbits64
mantmask64 = (1 << mantbits64) - 1
)
func exp(f uint64) int16 {
return int16((f >> mantbits64) & expmask64)
}
func sign(f uint64) bool {
return (f >> signshift64) > 0
}
func frac(f uint64) uint64 {
return f & mantmask64
}
func pack(sign bool, exp int16, sig uint64) uint64 {
if sign {
return (1 << signshift64) + (uint64(exp) << mantbits64) + sig
} else {
return (uint64(exp) << mantbits64) + sig
}
}
/*
func shiftRightJam(a uint64, dist uint32) uint64 {
if dist < 63 {
if (a << (-dist & 63)) != 0 {
return (a >> dist) | 1
}
return a >> dist
} else {
return a & 1
}
}
*/
func normSubnormal(sig uint64) (snExp int16, snSig uint64) {
shiftDist := int16(bits.LeadingZeros64(sig) - expbits64)
return 1 - shiftDist, sig << shiftDist
}
// shiftRightJam
// Shifts 'a' right by the number of bits given in 'dist', which must not
// be zero. If any nonzero bits are shifted off, they are "jammed" into the
// least-significant bit of the shifted value by setting the least-significant
// bit to 1. This shifted-and-jammed value is returned.
//
// The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
//
// greater than 64, the result will be either 0 or 1, depending on whether 'a'
// is zero or nonzero.
func shiftRightJam(a uint64, dist uint32) uint64 {
if dist < 63 {
if (a << ((-dist) & 63)) != 0 {
return (a >> dist) | 1
} else {
return a >> dist
}
} else if a != 0 {
return 1
} else {
return 0
}
}
// shortShiftRightJam
// Shifts 'a' right by the number of bits given in 'dist', which must be in
// the range 1 to 63. If any nonzero bits are shifted off, they are "jammed"
// into the least-significant bit of the shifted value by setting the least-
// significant bit to 1. This shifted-and-jammed value is returned.
func shortShiftRightJam(a uint64, dist uint32) uint64 {
if (a & ((1 << dist) - 1)) != 0 {
return (a >> dist) | 1
} else {
return a >> dist
}
}
// approxRecip32
// Returns an approximation to the reciprocal of the number represented by 'a',
// where 'a' is interpreted as an unsigned fixed-point number with one integer
// bit and 31 fraction bits. The 'a' input must be "normalized", meaning that
// its most-significant bit (bit 31) must be 1. Thus, if A is the value of
// the fixed-point interpretation of 'a', then 1 <= A < 2. The returned value
// is interpreted as a pure unsigned fraction, having no integer bits and 32
// fraction bits. The approximation returned is never greater than the true
// reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
// (units in the last place).
func approxRecip32(a uint32) uint32 {
return uint32(uint64(0x7FFFFFFFFFFFFFFF) / uint64(a))
}
var approxRecipSqrt_1k0s = [16]uint16{
0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1,
}
var approxRecipSqrt_1k1s = [16]uint16{
0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD,
}
// approxRecipSqrt32
// Returns an approximation to the reciprocal of the square root of the number
// represented by 'a', where 'a' is interpreted as an unsigned fixed-point
// number either with one integer bit and 31 fraction bits or with two integer
// bits and 30 fraction bits. The format of 'a' is determined by 'oddExpA',
// which must be either 0 or 1. If 'oddExpA' is 1, 'a' is interpreted as
// having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
// two integer bits. The 'a' input must be "normalized", meaning that its
// most-significant bit (bit 31) must be 1. Thus, if A is the value of the
// fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
// is 1, and 2 <= A < 4 when 'oddExpA' is 0.
//
// The returned value is interpreted as a pure unsigned fraction, having
//
// no integer bits and 32 fraction bits. The approximation returned is never
// greater than the true reciprocal 1/sqrt(A), and it differs from the true
// reciprocal by at most 2.06 ulp (units in the last place). The approximation
// returned is also always within the range 0.5 to 1; thus, the most-
// significant bit of the result is always set.
func approxRecipSqrt32(oddExpA, a uint32) uint32 {
index := ((a >> 27) & 0xE) + oddExpA
eps := uint16(a >> 12)
r0 := approxRecipSqrt_1k0s[index] - uint16((uint32(approxRecipSqrt_1k1s[index])*uint32(eps))>>20)
ESqrR0 := uint32(r0) * uint32(r0)
if oddExpA == 0 {
ESqrR0 <<= 1
}
sigma0 := ^uint32((uint64(ESqrR0) * uint64(a)) >> 23)
r := (uint32(r0) << 16) + uint32((uint64(r0)*uint64(sigma0))>>25)
sqrSigma0 := uint32((uint64(sigma0) * uint64(sigma0)) >> 32)
r += uint32((uint64((r>>1)+(r>>3)-(uint32(r0)<<14)) * uint64(sqrSigma0)) >> 48)
if (r & 0x80000000) == 0 {
r = 0x80000000
}
return r
}