softfloat64/internals.go

package softfloat64

import "math/bits"

const (
	mantbits64  = 52
	expbits64   = 11
	signshift64 = mantbits64 + expbits64

	expmask64      = (1 << expbits64) - 1
	expmaskshift64 = expmask64 << mantbits64

	mantmask64 = (1 << mantbits64) - 1
)

func exp(f uint64) int16 {
	return int16((f >> mantbits64) & expmask64)
}

func sign(f uint64) bool {
	return (f >> signshift64) > 0
}

func frac(f uint64) uint64 {
	return f & mantmask64
}

func pack(sign bool, exp int16, sig uint64) uint64 {
	if sign {
		return (1 << signshift64) + (uint64(exp) << mantbits64) + sig
	} else {
		return (uint64(exp) << mantbits64) + sig
	}
}

/*
	func shiftRightJam(a uint64, dist uint32) uint64 {
		if dist < 63 {
			if (a << (-dist & 63)) != 0 {
				return (a >> dist) | 1
			}
			return a >> dist
		} else {
			return a & 1
		}
	}
*/

func normSubnormal(sig uint64) (snExp int16, snSig uint64) {
	shiftDist := int16(bits.LeadingZeros64(sig) - expbits64)

	return 1 - shiftDist, sig << shiftDist
}

// shiftRightJam
// Shifts 'a' right by the number of bits given in 'dist', which must not
// be zero.  If any nonzero bits are shifted off, they are "jammed" into the
// least-significant bit of the shifted value by setting the least-significant
// bit to 1.  This shifted-and-jammed value is returned.
//
//	The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
//
// greater than 64, the result will be either 0 or 1, depending on whether 'a'
// is zero or nonzero.
func shiftRightJam(a uint64, dist uint32) uint64 {
	if dist < 63 {
		if (a << ((-dist) & 63)) != 0 {
			return (a >> dist) | 1
		} else {
			return a >> dist
		}
	} else if a != 0 {
		return 1
	} else {
		return 0
	}
}

// shortShiftRightJam
// Shifts 'a' right by the number of bits given in 'dist', which must be in
// the range 1 to 63.  If any nonzero bits are shifted off, they are "jammed"
// into the least-significant bit of the shifted value by setting the least-
// significant bit to 1.  This shifted-and-jammed value is returned.
func shortShiftRightJam(a uint64, dist uint32) uint64 {
	if (a & ((1 << dist) - 1)) != 0 {
		return (a >> dist) | 1
	} else {
		return a >> dist
	}
}

// approxRecip32
// Returns an approximation to the reciprocal of the number represented by 'a',
// where 'a' is interpreted as an unsigned fixed-point number with one integer
// bit and 31 fraction bits.  The 'a' input must be "normalized", meaning that
// its most-significant bit (bit 31) must be 1.  Thus, if A is the value of
// the fixed-point interpretation of 'a', then 1 <= A < 2.  The returned value
// is interpreted as a pure unsigned fraction, having no integer bits and 32
// fraction bits.  The approximation returned is never greater than the true
// reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
// (units in the last place).
func approxRecip32(a uint32) uint32 {
	return uint32(uint64(0x7FFFFFFFFFFFFFFF) / uint64(a))
}

var approxRecipSqrt_1k0s = [16]uint16{
	0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
	0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1,
}

var approxRecipSqrt_1k1s = [16]uint16{
	0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
	0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD,
}

// approxRecipSqrt32
// Returns an approximation to the reciprocal of the square root of the number
// represented by 'a', where 'a' is interpreted as an unsigned fixed-point
// number either with one integer bit and 31 fraction bits or with two integer
// bits and 30 fraction bits.  The format of 'a' is determined by 'oddExpA',
// which must be either 0 or 1.  If 'oddExpA' is 1, 'a' is interpreted as
// having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
// two integer bits.  The 'a' input must be "normalized", meaning that its
// most-significant bit (bit 31) must be 1.  Thus, if A is the value of the
// fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
// is 1, and 2 <= A < 4 when 'oddExpA' is 0.
//
//	The returned value is interpreted as a pure unsigned fraction, having
//
// no integer bits and 32 fraction bits.  The approximation returned is never
// greater than the true reciprocal 1/sqrt(A), and it differs from the true
// reciprocal by at most 2.06 ulp (units in the last place).  The approximation
// returned is also always within the range 0.5 to 1; thus, the most-
// significant bit of the result is always set.
func approxRecipSqrt32(oddExpA, a uint32) uint32 {
	index := ((a >> 27) & 0xE) + oddExpA
	eps := uint16(a >> 12)
	r0 := approxRecipSqrt_1k0s[index] - uint16((uint32(approxRecipSqrt_1k1s[index])*uint32(eps))>>20)
	ESqrR0 := uint32(r0) * uint32(r0)
	if oddExpA == 0 {
		ESqrR0 <<= 1
	}
	sigma0 := ^uint32((uint64(ESqrR0) * uint64(a)) >> 23)
	r := (uint32(r0) << 16) + uint32((uint64(r0)*uint64(sigma0))>>25)
	sqrSigma0 := uint32((uint64(sigma0) * uint64(sigma0)) >> 32)
	r += uint32((uint64((r>>1)+(r>>3)-(uint32(r0)<<14)) * uint64(sqrSigma0)) >> 48)

	if (r & 0x80000000) == 0 {
		r = 0x80000000
	}
	return r
}