aboutsummaryrefslogtreecommitdiff
path: root/src/math/big/float.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/big/float.go')
-rw-r--r--src/math/big/float.go213
1 files changed, 112 insertions, 101 deletions
diff --git a/src/math/big/float.go b/src/math/big/float.go
index d46c046e67..1563528797 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -525,25 +525,6 @@ func (z *Float) round(sbit uint) {
return
}
-// nlz returns the number of leading zero bits in x.
-func nlz(x Word) uint {
- return _W - uint(bitLen(x))
-}
-
-func nlz64(x uint64) uint {
- // TODO(gri) this can be done more nicely
- if _W == 32 {
- if x>>32 == 0 {
- return 32 + nlz(Word(x))
- }
- return nlz(Word(x >> 32))
- }
- if _W == 64 {
- return nlz(Word(x))
- }
- panic("unreachable")
-}
-
func (z *Float) setBits64(neg bool, x uint64) *Float {
if z.prec == 0 {
z.prec = 64
@@ -732,25 +713,44 @@ func (z *Float) Copy(x *Float) *Float {
return z
}
-func high32(x nat) uint32 {
- // TODO(gri) This can be done more efficiently on 32bit platforms.
- return uint32(high64(x) >> 32)
+// msb32 returns the 32 most significant bits of x.
+func msb32(x nat) uint32 {
+ i := len(x) - 1
+ if i < 0 {
+ return 0
+ }
+ if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+ panic("x not normalized")
+ }
+ switch _W {
+ case 32:
+ return uint32(x[i])
+ case 64:
+ return uint32(x[i] >> 32)
+ }
+ panic("unreachable")
}
-func high64(x nat) uint64 {
- i := len(x)
- if i == 0 {
+// msb64 returns the 64 most significant bits of x.
+func msb64(x nat) uint64 {
+ i := len(x) - 1
+ if i < 0 {
return 0
}
- // i > 0
- v := uint64(x[i-1])
- if _W == 32 {
- v <<= 32
- if i > 1 {
- v |= uint64(x[i-2])
+ if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+ panic("x not normalized")
+ }
+ switch _W {
+ case 32:
+ v := uint64(x[i]) << 32
+ if i > 0 {
+ v |= uint64(x[i-1])
}
+ return v
+ case 64:
+ return uint64(x[i])
}
- return v
+ panic("unreachable")
}
// Uint64 returns the unsigned integer resulting from truncating x
@@ -776,7 +776,7 @@ func (x *Float) Uint64() (uint64, Accuracy) {
// 1 <= x < Inf
if x.exp <= 64 {
// u = trunc(x) fits into a uint64
- u := high64(x.mant) >> (64 - uint32(x.exp))
+ u := msb64(x.mant) >> (64 - uint32(x.exp))
if x.MinPrec() <= 64 {
return u, Exact
}
@@ -821,7 +821,7 @@ func (x *Float) Int64() (int64, Accuracy) {
// 1 <= |x| < +Inf
if x.exp <= 63 {
// i = trunc(x) fits into an int64 (excluding math.MinInt64)
- i := int64(high64(x.mant) >> (64 - uint32(x.exp)))
+ i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
if x.neg {
i = -i
}
@@ -853,9 +853,6 @@ func (x *Float) Int64() (int64, Accuracy) {
panic("unreachable")
}
-// TODO(gri) Float32 and Float64 are very similar internally but for the
-// floatxx parameters and some conversions. Should factor out shared code.
-
// Float32 returns the float32 value nearest to x. If x is too small to be
// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
@@ -880,64 +877,71 @@ func (x *Float) Float32() (float32, Accuracy) {
emax = bias // 127 largest unbiased exponent (normal)
)
- // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
- // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
- // For a given mantissa m, we need to add 1 to a floatxx exponent to get the
- // corresponding Float exponent.
- // (see also implementation of math.Ldexp for similar code)
+ // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+ e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+ p := mbits + 1 // precision of normal float
- if x.exp < dmin+1 {
- // underflow
- if x.neg {
- var z float32
- return -z, Above
+ // If the exponent is too small, we may have a denormal number
+ // in which case we have fewer mantissa bits available: reduce
+ // precision accordingly.
+ if e < emin {
+ p -= emin - int(e)
+ // Make sure we have at least 1 bit so that we don't
+ // lose numbers rounded up to the smallest denormal.
+ if p < 1 {
+ p = 1
}
- return 0.0, Below
}
- // x.exp >= dmin+1
+ // round
var r Float
- r.prec = mbits + 1 // +1 for implicit msb
- if x.exp < emin+1 {
- // denormal number - round to fewer bits
- r.prec = uint32(x.exp - dmin)
- }
+ r.prec = uint32(p)
r.Set(x)
+ e = r.exp - 1
// Rounding may have caused r to overflow to ±Inf
// (rounding never causes underflows to 0).
if r.form == inf {
- r.exp = emax + 2 // cause overflow below
+ e = emax + 1 // cause overflow below
}
- if r.exp > emax+1 {
+ // If the exponent is too large, overflow to ±Inf.
+ if e > emax {
// overflow
if x.neg {
return float32(math.Inf(-1)), Below
}
return float32(math.Inf(+1)), Above
}
- // dmin+1 <= r.exp <= emax+1
+ // e <= emax
- var s uint32
- if r.neg {
- s = 1 << (fbits - 1)
+ // Determine sign, biased exponent, and mantissa.
+ var sign, bexp, mant uint32
+ if x.neg {
+ sign = 1 << (fbits - 1)
}
- m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
// Rounding may have caused a denormal number to
// become normal. Check again.
- c := float32(1.0)
- if r.exp < emin+1 {
+ if e < emin {
// denormal number
- r.exp += mbits
- c = 1.0 / (1 << mbits) // 2**-mbits
+ if e < dmin {
+ // underflow to ±0
+ if x.neg {
+ var z float32
+ return -z, Above
+ }
+ return 0.0, Below
+ }
+ // bexp = 0
+ mant = msb32(r.mant) >> (fbits - r.prec)
+ } else {
+ // normal number: emin <= e <= emax
+ bexp = uint32(e+bias) << mbits
+ mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
}
- // emin+1 <= r.exp <= emax+1
- e := uint32(r.exp-emin) << mbits
- return c * math.Float32frombits(s|e|m), r.acc
+ return math.Float32frombits(sign | bexp | mant), r.acc
case zero:
if x.neg {
@@ -980,64 +984,71 @@ func (x *Float) Float64() (float64, Accuracy) {
emax = bias // 1023 largest unbiased exponent (normal)
)
- // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
- // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
- // For a given mantissa m, we need to add 1 to a floatxx exponent to get the
- // corresponding Float exponent.
- // (see also implementation of math.Ldexp for similar code)
+ // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+ e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+ p := mbits + 1 // precision of normal float
- if x.exp < dmin+1 {
- // underflow
- if x.neg {
- var z float64
- return -z, Above
+ // If the exponent is too small, we may have a denormal number
+ // in which case we have fewer mantissa bits available: reduce
+ // precision accordingly.
+ if e < emin {
+ p -= emin - int(e)
+ // Make sure we have at least 1 bit so that we don't
+ // lose numbers rounded up to the smallest denormal.
+ if p < 1 {
+ p = 1
}
- return 0.0, Below
}
- // x.exp >= dmin+1
+ // round
var r Float
- r.prec = mbits + 1 // +1 for implicit msb
- if x.exp < emin+1 {
- // denormal number - round to fewer bits
- r.prec = uint32(x.exp - dmin)
- }
+ r.prec = uint32(p)
r.Set(x)
+ e = r.exp - 1
// Rounding may have caused r to overflow to ±Inf
// (rounding never causes underflows to 0).
if r.form == inf {
- r.exp = emax + 2 // cause overflow below
+ e = emax + 1 // cause overflow below
}
- if r.exp > emax+1 {
+ // If the exponent is too large, overflow to ±Inf.
+ if e > emax {
// overflow
if x.neg {
return math.Inf(-1), Below
}
return math.Inf(+1), Above
}
- // dmin+1 <= r.exp <= emax+1
+ // e <= emax
- var s uint64
- if r.neg {
- s = 1 << (fbits - 1)
+ // Determine sign, biased exponent, and mantissa.
+ var sign, bexp, mant uint64
+ if x.neg {
+ sign = 1 << (fbits - 1)
}
- m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
// Rounding may have caused a denormal number to
// become normal. Check again.
- c := 1.0
- if r.exp < emin+1 {
+ if e < emin {
// denormal number
- r.exp += mbits
- c = 1.0 / (1 << mbits) // 2**-mbits
+ if e < dmin {
+ // underflow to ±0
+ if x.neg {
+ var z float64
+ return -z, Above
+ }
+ return 0.0, Below
+ }
+ // bexp = 0
+ mant = msb64(r.mant) >> (fbits - r.prec)
+ } else {
+ // normal number: emin <= e <= emax
+ bexp = uint64(e+bias) << mbits
+ mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
}
- // emin+1 <= r.exp <= emax+1
- e := uint64(r.exp-emin) << mbits
- return c * math.Float64frombits(s|e|m), r.acc
+ return math.Float64frombits(sign | bexp | mant), r.acc
case zero:
if x.neg {