diff options
Diffstat (limited to 'src/math/big/float.go')
| -rw-r--r-- | src/math/big/float.go | 213 |
1 files changed, 112 insertions, 101 deletions
diff --git a/src/math/big/float.go b/src/math/big/float.go index d46c046e67..1563528797 100644 --- a/src/math/big/float.go +++ b/src/math/big/float.go @@ -525,25 +525,6 @@ func (z *Float) round(sbit uint) { return } -// nlz returns the number of leading zero bits in x. -func nlz(x Word) uint { - return _W - uint(bitLen(x)) -} - -func nlz64(x uint64) uint { - // TODO(gri) this can be done more nicely - if _W == 32 { - if x>>32 == 0 { - return 32 + nlz(Word(x)) - } - return nlz(Word(x >> 32)) - } - if _W == 64 { - return nlz(Word(x)) - } - panic("unreachable") -} - func (z *Float) setBits64(neg bool, x uint64) *Float { if z.prec == 0 { z.prec = 64 @@ -732,25 +713,44 @@ func (z *Float) Copy(x *Float) *Float { return z } -func high32(x nat) uint32 { - // TODO(gri) This can be done more efficiently on 32bit platforms. - return uint32(high64(x) >> 32) +// msb32 returns the 32 most significant bits of x. +func msb32(x nat) uint32 { + i := len(x) - 1 + if i < 0 { + return 0 + } + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + return uint32(x[i]) + case 64: + return uint32(x[i] >> 32) + } + panic("unreachable") } -func high64(x nat) uint64 { - i := len(x) - if i == 0 { +// msb64 returns the 64 most significant bits of x. +func msb64(x nat) uint64 { + i := len(x) - 1 + if i < 0 { return 0 } - // i > 0 - v := uint64(x[i-1]) - if _W == 32 { - v <<= 32 - if i > 1 { - v |= uint64(x[i-2]) + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + v := uint64(x[i]) << 32 + if i > 0 { + v |= uint64(x[i-1]) } + return v + case 64: + return uint64(x[i]) } - return v + panic("unreachable") } // Uint64 returns the unsigned integer resulting from truncating x @@ -776,7 +776,7 @@ func (x *Float) Uint64() (uint64, Accuracy) { // 1 <= x < Inf if x.exp <= 64 { // u = trunc(x) fits into a uint64 - u := high64(x.mant) >> (64 - uint32(x.exp)) + u := msb64(x.mant) >> (64 - uint32(x.exp)) if x.MinPrec() <= 64 { return u, Exact } @@ -821,7 +821,7 @@ func (x *Float) Int64() (int64, Accuracy) { // 1 <= |x| < +Inf if x.exp <= 63 { // i = trunc(x) fits into an int64 (excluding math.MinInt64) - i := int64(high64(x.mant) >> (64 - uint32(x.exp))) + i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) if x.neg { i = -i } @@ -853,9 +853,6 @@ func (x *Float) Int64() (int64, Accuracy) { panic("unreachable") } -// TODO(gri) Float32 and Float64 are very similar internally but for the -// floatxx parameters and some conversions. Should factor out shared code. - // Float32 returns the float32 value nearest to x. If x is too small to be // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result // is (0, Below) or (-0, Above), respectively, depending on the sign of x. @@ -880,64 +877,71 @@ func (x *Float) Float32() (float32, Accuracy) { emax = bias // 127 largest unbiased exponent (normal) ) - // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. - // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. - // For a given mantissa m, we need to add 1 to a floatxx exponent to get the - // corresponding Float exponent. - // (see also implementation of math.Ldexp for similar code) + // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa. + e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0 + p := mbits + 1 // precision of normal float - if x.exp < dmin+1 { - // underflow - if x.neg { - var z float32 - return -z, Above + // If the exponent is too small, we may have a denormal number + // in which case we have fewer mantissa bits available: reduce + // precision accordingly. + if e < emin { + p -= emin - int(e) + // Make sure we have at least 1 bit so that we don't + // lose numbers rounded up to the smallest denormal. + if p < 1 { + p = 1 } - return 0.0, Below } - // x.exp >= dmin+1 + // round var r Float - r.prec = mbits + 1 // +1 for implicit msb - if x.exp < emin+1 { - // denormal number - round to fewer bits - r.prec = uint32(x.exp - dmin) - } + r.prec = uint32(p) r.Set(x) + e = r.exp - 1 // Rounding may have caused r to overflow to ±Inf // (rounding never causes underflows to 0). if r.form == inf { - r.exp = emax + 2 // cause overflow below + e = emax + 1 // cause overflow below } - if r.exp > emax+1 { + // If the exponent is too large, overflow to ±Inf. + if e > emax { // overflow if x.neg { return float32(math.Inf(-1)), Below } return float32(math.Inf(+1)), Above } - // dmin+1 <= r.exp <= emax+1 + // e <= emax - var s uint32 - if r.neg { - s = 1 << (fbits - 1) + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint32 + if x.neg { + sign = 1 << (fbits - 1) } - m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) - // Rounding may have caused a denormal number to // become normal. Check again. - c := float32(1.0) - if r.exp < emin+1 { + if e < emin { // denormal number - r.exp += mbits - c = 1.0 / (1 << mbits) // 2**-mbits + if e < dmin { + // underflow to ±0 + if x.neg { + var z float32 + return -z, Above + } + return 0.0, Below + } + // bexp = 0 + mant = msb32(r.mant) >> (fbits - r.prec) + } else { + // normal number: emin <= e <= emax + bexp = uint32(e+bias) << mbits + mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) } - // emin+1 <= r.exp <= emax+1 - e := uint32(r.exp-emin) << mbits - return c * math.Float32frombits(s|e|m), r.acc + return math.Float32frombits(sign | bexp | mant), r.acc case zero: if x.neg { @@ -980,64 +984,71 @@ func (x *Float) Float64() (float64, Accuracy) { emax = bias // 1023 largest unbiased exponent (normal) ) - // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. - // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. - // For a given mantissa m, we need to add 1 to a floatxx exponent to get the - // corresponding Float exponent. - // (see also implementation of math.Ldexp for similar code) + // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa. + e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0 + p := mbits + 1 // precision of normal float - if x.exp < dmin+1 { - // underflow - if x.neg { - var z float64 - return -z, Above + // If the exponent is too small, we may have a denormal number + // in which case we have fewer mantissa bits available: reduce + // precision accordingly. + if e < emin { + p -= emin - int(e) + // Make sure we have at least 1 bit so that we don't + // lose numbers rounded up to the smallest denormal. + if p < 1 { + p = 1 } - return 0.0, Below } - // x.exp >= dmin+1 + // round var r Float - r.prec = mbits + 1 // +1 for implicit msb - if x.exp < emin+1 { - // denormal number - round to fewer bits - r.prec = uint32(x.exp - dmin) - } + r.prec = uint32(p) r.Set(x) + e = r.exp - 1 // Rounding may have caused r to overflow to ±Inf // (rounding never causes underflows to 0). if r.form == inf { - r.exp = emax + 2 // cause overflow below + e = emax + 1 // cause overflow below } - if r.exp > emax+1 { + // If the exponent is too large, overflow to ±Inf. + if e > emax { // overflow if x.neg { return math.Inf(-1), Below } return math.Inf(+1), Above } - // dmin+1 <= r.exp <= emax+1 + // e <= emax - var s uint64 - if r.neg { - s = 1 << (fbits - 1) + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint64 + if x.neg { + sign = 1 << (fbits - 1) } - m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) - // Rounding may have caused a denormal number to // become normal. Check again. - c := 1.0 - if r.exp < emin+1 { + if e < emin { // denormal number - r.exp += mbits - c = 1.0 / (1 << mbits) // 2**-mbits + if e < dmin { + // underflow to ±0 + if x.neg { + var z float64 + return -z, Above + } + return 0.0, Below + } + // bexp = 0 + mant = msb64(r.mant) >> (fbits - r.prec) + } else { + // normal number: emin <= e <= emax + bexp = uint64(e+bias) << mbits + mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) } - // emin+1 <= r.exp <= emax+1 - e := uint64(r.exp-emin) << mbits - return c * math.Float64frombits(s|e|m), r.acc + return math.Float64frombits(sign | bexp | mant), r.acc case zero: if x.neg { |
