1 files changed, 112 insertions, 101 deletions
diff --git a/src/math/big/float.go b/src/math/big/float.go
index d46c046e67..1563528797 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -525,25 +525,6 @@ func (z *Float) round(sbit uint) {
 	return
 }
 
-// nlz returns the number of leading zero bits in x.
-func nlz(x Word) uint {
-	return _W - uint(bitLen(x))
-}
-
-func nlz64(x uint64) uint {
-	// TODO(gri) this can be done more nicely
-	if _W == 32 {
-		if x>>32 == 0 {
-			return 32 + nlz(Word(x))
-		}
-		return nlz(Word(x >> 32))
-	}
-	if _W == 64 {
-		return nlz(Word(x))
-	}
-	panic("unreachable")
-}
-
 func (z *Float) setBits64(neg bool, x uint64) *Float {
 	if z.prec == 0 {
 		z.prec = 64
@@ -732,25 +713,44 @@ func (z *Float) Copy(x *Float) *Float {
 	return z
 }
 
-func high32(x nat) uint32 {
-	// TODO(gri) This can be done more efficiently on 32bit platforms.
-	return uint32(high64(x) >> 32)
+// msb32 returns the 32 most significant bits of x.
+func msb32(x nat) uint32 {
+	i := len(x) - 1
+	if i < 0 {
+		return 0
+	}
+	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+		panic("x not normalized")
+	}
+	switch _W {
+	case 32:
+		return uint32(x[i])
+	case 64:
+		return uint32(x[i] >> 32)
+	}
+	panic("unreachable")
 }
 
-func high64(x nat) uint64 {
-	i := len(x)
-	if i == 0 {
+// msb64 returns the 64 most significant bits of x.
+func msb64(x nat) uint64 {
+	i := len(x) - 1
+	if i < 0 {
 		return 0
 	}
-	// i > 0
-	v := uint64(x[i-1])
-	if _W == 32 {
-		v <<= 32
-		if i > 1 {
-			v |= uint64(x[i-2])
+	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
+		panic("x not normalized")
+	}
+	switch _W {
+	case 32:
+		v := uint64(x[i]) << 32
+		if i > 0 {
+			v |= uint64(x[i-1])
 		}
+		return v
+	case 64:
+		return uint64(x[i])
 	}
-	return v
+	panic("unreachable")
 }
 
 // Uint64 returns the unsigned integer resulting from truncating x
@@ -776,7 +776,7 @@ func (x *Float) Uint64() (uint64, Accuracy) {
 		// 1 <= x < Inf
 		if x.exp <= 64 {
 			// u = trunc(x) fits into a uint64
-			u := high64(x.mant) >> (64 - uint32(x.exp))
+			u := msb64(x.mant) >> (64 - uint32(x.exp))
 			if x.MinPrec() <= 64 {
 				return u, Exact
 			}
@@ -821,7 +821,7 @@ func (x *Float) Int64() (int64, Accuracy) {
 		// 1 <= |x| < +Inf
 		if x.exp <= 63 {
 			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
-			i := int64(high64(x.mant) >> (64 - uint32(x.exp)))
+			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
 			if x.neg {
 				i = -i
 			}
@@ -853,9 +853,6 @@ func (x *Float) Int64() (int64, Accuracy) {
 	panic("unreachable")
 }
 
-// TODO(gri) Float32 and Float64 are very similar internally but for the
-// floatxx parameters and some conversions. Should factor out shared code.
-
 // Float32 returns the float32 value nearest to x. If x is too small to be
 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
@@ -880,64 +877,71 @@ func (x *Float) Float32() (float32, Accuracy) {
 			emax  = bias              //   127  largest unbiased exponent (normal)
 		)
 
-		// Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
-		// floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
-		// For a given mantissa m, we need to add 1 to a floatxx exponent to get the
-		// corresponding Float exponent.
-		// (see also implementation of math.Ldexp for similar code)
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+		p := mbits + 1 // precision of normal float
 
-		if x.exp < dmin+1 {
-			// underflow
-			if x.neg {
-				var z float32
-				return -z, Above
+		// If the exponent is too small, we may have a denormal number
+		// in which case we have fewer mantissa bits available: reduce
+		// precision accordingly.
+		if e < emin {
+			p -= emin - int(e)
+			// Make sure we have at least 1 bit so that we don't
+			// lose numbers rounded up to the smallest denormal.
+			if p < 1 {
+				p = 1
 			}
-			return 0.0, Below
 		}
-		// x.exp >= dmin+1
 
+		// round
 		var r Float
-		r.prec = mbits + 1 // +1 for implicit msb
-		if x.exp < emin+1 {
-			// denormal number - round to fewer bits
-			r.prec = uint32(x.exp - dmin)
-		}
+		r.prec = uint32(p)
 		r.Set(x)
+		e = r.exp - 1
 
 		// Rounding may have caused r to overflow to ±Inf
 		// (rounding never causes underflows to 0).
 		if r.form == inf {
-			r.exp = emax + 2 // cause overflow below
+			e = emax + 1 // cause overflow below
 		}
 
-		if r.exp > emax+1 {
+		// If the exponent is too large, overflow to ±Inf.
+		if e > emax {
 			// overflow
 			if x.neg {
 				return float32(math.Inf(-1)), Below
 			}
 			return float32(math.Inf(+1)), Above
 		}
-		// dmin+1 <= r.exp <= emax+1
+		// e <= emax
 
-		var s uint32
-		if r.neg {
-			s = 1 << (fbits - 1)
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint32
+		if x.neg {
+			sign = 1 << (fbits - 1)
 		}
 
-		m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
 		// Rounding may have caused a denormal number to
 		// become normal. Check again.
-		c := float32(1.0)
-		if r.exp < emin+1 {
+		if e < emin {
 			// denormal number
-			r.exp += mbits
-			c = 1.0 / (1 << mbits) // 2**-mbits
+			if e < dmin {
+				// underflow to ±0
+				if x.neg {
+					var z float32
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// bexp = 0
+			mant = msb32(r.mant) >> (fbits - r.prec)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint32(e+bias) << mbits
+			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
 		}
-		// emin+1 <= r.exp <= emax+1
-		e := uint32(r.exp-emin) << mbits
 
-		return c * math.Float32frombits(s|e|m), r.acc
+		return math.Float32frombits(sign | bexp | mant), r.acc
 
 	case zero:
 		if x.neg {
@@ -980,64 +984,71 @@ func (x *Float) Float64() (float64, Accuracy) {
 			emax  = bias              //  1023  largest unbiased exponent (normal)
 		)
 
-		// Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
-		// floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
-		// For a given mantissa m, we need to add 1 to a floatxx exponent to get the
-		// corresponding Float exponent.
-		// (see also implementation of math.Ldexp for similar code)
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+		p := mbits + 1 // precision of normal float
 
-		if x.exp < dmin+1 {
-			// underflow
-			if x.neg {
-				var z float64
-				return -z, Above
+		// If the exponent is too small, we may have a denormal number
+		// in which case we have fewer mantissa bits available: reduce
+		// precision accordingly.
+		if e < emin {
+			p -= emin - int(e)
+			// Make sure we have at least 1 bit so that we don't
+			// lose numbers rounded up to the smallest denormal.
+			if p < 1 {
+				p = 1
 			}
-			return 0.0, Below
 		}
-		// x.exp >= dmin+1
 
+		// round
 		var r Float
-		r.prec = mbits + 1 // +1 for implicit msb
-		if x.exp < emin+1 {
-			// denormal number - round to fewer bits
-			r.prec = uint32(x.exp - dmin)
-		}
+		r.prec = uint32(p)
 		r.Set(x)
+		e = r.exp - 1
 
 		// Rounding may have caused r to overflow to ±Inf
 		// (rounding never causes underflows to 0).
 		if r.form == inf {
-			r.exp = emax + 2 // cause overflow below
+			e = emax + 1 // cause overflow below
 		}
 
-		if r.exp > emax+1 {
+		// If the exponent is too large, overflow to ±Inf.
+		if e > emax {
 			// overflow
 			if x.neg {
 				return math.Inf(-1), Below
 			}
 			return math.Inf(+1), Above
 		}
-		// dmin+1 <= r.exp <= emax+1
+		// e <= emax
 
-		var s uint64
-		if r.neg {
-			s = 1 << (fbits - 1)
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint64
+		if x.neg {
+			sign = 1 << (fbits - 1)
 		}
 
-		m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
 		// Rounding may have caused a denormal number to
 		// become normal. Check again.
-		c := 1.0
-		if r.exp < emin+1 {
+		if e < emin {
 			// denormal number
-			r.exp += mbits
-			c = 1.0 / (1 << mbits) // 2**-mbits
+			if e < dmin {
+				// underflow to ±0
+				if x.neg {
+					var z float64
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// bexp = 0
+			mant = msb64(r.mant) >> (fbits - r.prec)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint64(e+bias) << mbits
+			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
 		}
-		// emin+1 <= r.exp <= emax+1
-		e := uint64(r.exp-emin) << mbits
 
-		return c * math.Float64frombits(s|e|m), r.acc
+		return math.Float64frombits(sign | bexp | mant), r.acc
 
 	case zero:
 		if x.neg {