aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2015-05-21 18:00:54 -0700
committerRobert Griesemer <gri@golang.org>2015-05-22 21:10:43 +0000
commit5ffbca4823455d42ef6314c67c1cc346599d65bf (patch)
tree73e6f57ef87431302919324c478f2b52f90f0009 /src
parent8de8925c7794a39d9747e386163ebdccda561cca (diff)
downloadgo-5ffbca4823455d42ef6314c67c1cc346599d65bf.tar.xz
math/big: fix Float.Float64 conversion for denormal corner cases
- This change uses the same code as for Float32 and fixes the case of a number that gets rounded up to the smallest denormal. - Enabled correspoding test case. Change-Id: I8aac874a566cd727863a82717854f603fbdc26c6 Reviewed-on: https://go-review.googlesource.com/10352 Reviewed-by: Alan Donovan <adonovan@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/math/big/float.go74
-rw-r--r--src/math/big/float_test.go5
2 files changed, 40 insertions, 39 deletions
diff --git a/src/math/big/float.go b/src/math/big/float.go
index b666b9dd38..dcb72c5754 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -959,10 +959,6 @@ func (x *Float) Float32() (float32, Accuracy) {
panic("unreachable")
}
-// TODO(gri) Use same algorithm for Float64 as for Float32. The Float64 code
-// is incorrect for some corner cases (numbers that get rounded up to smallest
-// denormal - test cases are missing).
-
// Float64 returns the float64 value nearest to x. If x is too small to be
// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
@@ -987,64 +983,70 @@ func (x *Float) Float64() (float64, Accuracy) {
emax = bias // 1023 largest unbiased exponent (normal)
)
- // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
- // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
- // For a given mantissa m, we need to add 1 to a floatxx exponent to get the
- // corresponding Float exponent.
- // (see also implementation of math.Ldexp for similar code)
+ // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+ e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+ p := mbits + 1 // precision of normal float
- if x.exp < dmin+1 {
- // underflow
- if x.neg {
- var z float64
- return -z, Above
+ // If the exponent is too small, we may have a denormal number
+ // in which case we have fewer mantissa bits available: reduce
+ // precision accordingly.
+ if e < emin {
+ p -= emin - int(e)
+ // Make sure we have at least 1 bit so that we don't
+ // lose numbers rounded up to the smallest denormal.
+ if p < 1 {
+ p = 1
}
- return 0.0, Below
}
- // x.exp >= dmin+1
+ // round
var r Float
- r.prec = mbits + 1 // +1 for implicit msb
- if x.exp < emin+1 {
- // denormal number - round to fewer bits
- r.prec = uint32(x.exp - dmin)
- }
+ r.prec = uint32(p)
r.Set(x)
+ e = r.exp - 1
// Rounding may have caused r to overflow to ±Inf
// (rounding never causes underflows to 0).
if r.form == inf {
- r.exp = emax + 2 // cause overflow below
+ e = emax + 1 // cause overflow below
}
- if r.exp > emax+1 {
+ // If the exponent is too large, overflow to ±Inf.
+ if e > emax {
// overflow
if x.neg {
return math.Inf(-1), Below
}
return math.Inf(+1), Above
}
- // dmin+1 <= r.exp <= emax+1
- var s uint64
- if r.neg {
- s = 1 << (fbits - 1)
+ // Determine sign, biased exponent, and mantissa.
+ var sign, bexp, mant uint64
+ if x.neg {
+ sign = 1 << (fbits - 1)
}
- m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
// Rounding may have caused a denormal number to
// become normal. Check again.
- c := 1.0
- if r.exp < emin+1 {
+ if e < emin {
// denormal number
- r.exp += mbits
- c = 1.0 / (1 << mbits) // 2**-mbits
+ if e < dmin {
+ // underflow to ±0
+ if x.neg {
+ var z float64
+ return -z, Above
+ }
+ return 0.0, Below
+ }
+ // bexp = 0
+ mant = high64(r.mant) >> (fbits - r.prec)
+ } else {
+ // normal number: emin <= e <= emax
+ bexp = uint64(e+bias) << mbits
+ mant = high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
}
- // emin+1 <= r.exp <= emax+1
- e := uint64(r.exp-emin) << mbits
- return c * math.Float64frombits(s|e|m), r.acc
+ return math.Float64frombits(sign | bexp | mant), r.acc
case zero:
if x.neg {
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go
index 92eec71c5f..8bd3a9c8c9 100644
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@@ -922,9 +922,8 @@ func TestFloatFloat64(t *testing.T) {
{"0x0.00000000000008p-1022", 0, Below},
// denormals
- // TODO(gri) enable once Float64 is fixed
- // {"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
- {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact}, // smallest denormal
+ {"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
+ {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact}, // smallest denormal
{"0x.8p-1073", math.SmallestNonzeroFloat64, Exact},
{"1p-1074", math.SmallestNonzeroFloat64, Exact},
{"0x.fffffffffffffp-1022", math.Float64frombits(0x000fffffffffffff), Exact}, // largest denormal