diff options
| author | Robert Griesemer <gri@golang.org> | 2015-02-05 17:21:48 -0800 |
|---|---|---|
| committer | Robert Griesemer <gri@golang.org> | 2015-02-06 17:21:01 +0000 |
| commit | 15594df6b4e913d1ed9d7b38fa71868be28e9b63 (patch) | |
| tree | ba08ef6bf4aa3719f91bebed379790f0634a15a7 /src/math/big/float.go | |
| parent | 9b6ccb13233f2977c74c73ae836212c55d342d28 (diff) | |
| download | go-15594df6b4e913d1ed9d7b38fa71868be28e9b63.tar.xz | |
math/big: handling of +/-Inf and zero precision, enable zero values
- clarified representation of +/-Inf
- only 0 and Inf values can have 0 precision
- a zero precision value used as result value takes the max precision
of the arguments (to be fine-tuned for setters)
- the zero precision approach makes Float zero values possible
(they represent +0)
- more tests
Missing: Filling in the blanks. More tests.
Change-Id: Ibb4f97e12e1f356c3085ce80f3464e97b82ac130
Reviewed-on: https://go-review.googlesource.com/4000
Reviewed-by: Alan Donovan <adonovan@google.com>
Diffstat (limited to 'src/math/big/float.go')
| -rw-r--r-- | src/math/big/float.go | 179 |
1 files changed, 127 insertions, 52 deletions
diff --git a/src/math/big/float.go b/src/math/big/float.go index ea42a9166e..44e75cbf39 100644 --- a/src/math/big/float.go +++ b/src/math/big/float.go @@ -18,10 +18,6 @@ import ( "math" ) -// TODO(gri): Determine if there's a more natural way to set the precision. -// Should there be a special meaning for prec 0? Such as "full precision"? -// (would be possible for all ops except quotient). - const debugFloat = true // enable for debugging // Internal representation: A floating-point value x != 0 consists @@ -45,14 +41,15 @@ const debugFloat = true // enable for debugging // // sign * mantissa * 2**exponent // -// Each value also has a precision, rounding mode, and accuracy value: -// The precision is the number of mantissa bits used to represent a -// value, and the result of operations is rounded to that many bits -// according to the value's rounding mode (unless specified othewise). +// Each value also has a precision, rounding mode, and accuracy value. +// The precision is the number of mantissa bits used to represent the +// value, and the result of an operation is rounded to that many bits +// according to the value's rounding mode (unless specified otherwise). // The accuracy value indicates the rounding error with respect to the // exact (not rounded) value. // -// The zero value for a Float represents the number 0. +// The zero (uninitialized) value for a Float is ready to use and +// represents the number 0.0 of 0 bit precision. // // By setting the desired precision to 24 (or 53) and using ToNearestEven // rounding, Float arithmetic operations emulate the corresponding float32 @@ -71,14 +68,26 @@ type Float struct { // NewFloat returns a new Float with value x rounded // to prec bits according to the given rounding mode. +// If prec == 0, the result has value 0.0 independent +// of the value of x. +// BUG(gri) For prec == 0 and x == Inf, the result +// should be Inf as well. func NewFloat(x float64, prec uint, mode RoundingMode) *Float { - // TODO(gri) should make this more efficient - z := new(Float).SetFloat64(x) - return z.Round(z, prec, mode) + var z Float + if prec > 0 { + // TODO(gri) should make this more efficient + z.SetFloat64(x) + return z.Round(&z, prec, mode) + } + z.mode = mode // TODO(gri) don't do this twice for prec > 0 + return &z } -// infExp is the exponent value for infinity. -const infExp = 1<<31 - 1 +// Special exponent values. +const ( + maxExp = math.MaxInt32 + infExp = -maxExp - 1 // exponent value for Inf values +) // NewInf returns a new Float with value positive infinity (sign >= 0), // or negative infinity (sign < 0). @@ -86,12 +95,16 @@ func NewInf(sign int) *Float { return &Float{neg: sign < 0, exp: infExp} } +// setExp sets the exponent for z. +// If the exponent is too small or too large, z becomes +/-Inf. func (z *Float) setExp(e int64) { - e32 := int32(e) - if int64(e32) != e { - panic("exponent overflow") // TODO(gri) handle this gracefully + if -maxExp <= e && e <= maxExp { + z.exp = int32(e) + return } - z.exp = e32 + // Inf + z.mant = z.mant[:0] + z.exp = infExp } // Accuracy describes the rounding error produced by the most recent @@ -155,7 +168,7 @@ func (mode RoundingMode) String() string { } // Precision returns the mantissa precision of x in bits. -// The precision may be 0 if x == 0. // TODO(gri) Determine a better approach. +// The precision may be 0 for |x| == 0 or |x| == Inf. func (x *Float) Precision() uint { return uint(x.prec) } @@ -170,9 +183,17 @@ func (x *Float) Mode() RoundingMode { return x.mode } +// IsInf reports whether x is an infinity, according to sign. +// If sign > 0, IsInf reports whether x is positive infinity. +// If sign < 0, IsInf reports whether x is negative infinity. +// If sign == 0, IsInf reports whether x is either infinity. +func (x *Float) IsInf(sign int) bool { + return x.exp == infExp && (sign == 0 || x.neg == (sign < 0)) +} + // debugging support func (x *Float) validate() { - // assumes x != 0 + // assumes x != 0 && x != Inf const msb = 1 << (_W - 1) m := len(x.mant) if x.mant[m-1]&msb == 0 { @@ -196,6 +217,9 @@ func (z *Float) round(sbit uint) { return } + // handle Inf + // TODO(gri) handle Inf + if debugFloat { z.validate() } @@ -399,10 +423,15 @@ func (z *Float) SetInt64(x int64) *Float { // SetFloat64 sets z to x and returns z. // Precision is set to 53 bits. -// TODO(gri) test denormals, +/-Inf, disallow NaN. +// TODO(gri) test denormals, disallow NaN. func (z *Float) SetFloat64(x float64) *Float { - z.prec = 53 z.neg = math.Signbit(x) // handle -0 correctly (-0 == 0) + z.prec = 53 + if math.IsInf(x, 0) { + z.mant = z.mant[:0] + z.exp = infExp + return z + } if x == 0 { z.mant = z.mant[:0] z.exp = 0 @@ -484,7 +513,7 @@ func high64(x nat) uint64 { return v } -// TODO(gri) FIX THIS (rounding mode, errors, accuracy, etc.) +// TODO(gri) FIX THIS (Inf, rounding mode, errors, accuracy, etc.) func (x *Float) Uint64() uint64 { m := high64(x.mant) s := x.exp @@ -494,7 +523,7 @@ func (x *Float) Uint64() uint64 { return 0 // imprecise } -// TODO(gri) FIX THIS (rounding mode, errors, etc.) +// TODO(gri) FIX THIS (inf, rounding mode, errors, etc.) func (x *Float) Int64() int64 { v := int64(x.Uint64()) if x.neg { @@ -507,6 +536,15 @@ func (x *Float) Int64() int64 { // by rounding to nearest with 53 bits precision. // TODO(gri) implement/document error scenarios. func (x *Float) Float64() (float64, Accuracy) { + // x == +/-Inf + if x.exp == infExp { + var sign int + if x.neg { + sign = -1 + } + return math.Inf(sign), Exact + } + // x == 0 if len(x.mant) == 0 { return 0, Exact } @@ -561,7 +599,7 @@ func (z *Float) Neg(x *Float) *Float { } // z = x + y, ignoring signs of x and y. -// x and y must not be 0. +// x and y must not be 0 or an Inf. func (z *Float) uadd(x, y *Float) { // Note: This implementation requires 2 shifts most of the // time. It is also inefficient if exponents or precisions @@ -603,7 +641,7 @@ func (z *Float) uadd(x, y *Float) { } // z = x - y for x >= y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) usub(x, y *Float) { // This code is symmetric to uadd. // We have not factored the common code out because @@ -643,7 +681,7 @@ func (z *Float) usub(x, y *Float) { } // z = x * y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) umul(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("umul called with 0 argument") @@ -664,7 +702,7 @@ func (z *Float) umul(x, y *Float) { } // z = x / y, ignoring signs of x and y. -// x and y must not be zero. +// x and y must not be 0 or an Inf. func (z *Float) uquo(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("uquo called with 0 argument") @@ -708,7 +746,7 @@ func (z *Float) uquo(x, y *Float) { } // ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y, -// while ignoring the signs of x and y. x and y must not be zero. +// while ignoring the signs of x and y. x and y must not be 0 or an Inf. func (x *Float) ucmp(y *Float) int { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("ucmp called with 0 argument") @@ -765,16 +803,24 @@ func (x *Float) ucmp(y *Float) int { // sign as x even when x is zero. // Add sets z to the rounded sum x+y and returns z. +// If z's precision is 0, it is set to the larger of +// x's or y's precision before the operation. // Rounding is performed according to z's precision // and rounding mode; and z's accuracy reports the // result error relative to the exact (not rounded) // result. func (z *Float) Add(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + // TODO(gri) what about -0? if len(y.mant) == 0 { + // TODO(gri) handle Inf return z.Round(x, z.prec, z.mode) } if len(x.mant) == 0 { + // TODO(gri) handle Inf return z.Round(y, z.prec, z.mode) } @@ -799,13 +845,15 @@ func (z *Float) Add(x, y *Float) *Float { } // Sub sets z to the rounded difference x-y and returns z. -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Sub(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + // TODO(gri) what about -0? if len(y.mant) == 0 { + // TODO(gri) handle Inf return z.Round(x, z.prec, z.mode) } if len(x.mant) == 0 { @@ -836,11 +884,14 @@ func (z *Float) Sub(x, y *Float) *Float { } // Mul sets z to the rounded product x*y and returns z. -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Mul(x, y *Float) *Float { + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + + // TODO(gri) handle Inf + // TODO(gri) what about -0? if len(x.mant) == 0 || len(y.mant) == 0 { z.neg = false @@ -858,46 +909,61 @@ func (z *Float) Mul(x, y *Float) *Float { // Quo sets z to the rounded quotient x/y and returns z. // If y == 0, a division-by-zero run-time panic occurs. TODO(gri) this should become Inf -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Quo(x, y *Float) *Float { - // TODO(gri) what about -0? + if z.prec == 0 { + z.prec = umax(x.prec, y.prec) + } + + // TODO(gri) handle Inf + + // TODO(gri) check that this is correct + z.neg = x.neg != y.neg + + if len(y.mant) == 0 { + z.setExp(infExp) + return z + } + if len(x.mant) == 0 { - z.neg = false z.mant = z.mant[:0] z.exp = 0 z.acc = Exact return z } - if len(y.mant) == 0 { - panic("division-by-zero") // TODO(gri) handle this better - } // x, y != 0 z.uquo(x, y) - z.neg = x.neg != y.neg return z } // Lsh sets z to the rounded x * (1<<s) and returns z. +// If z's precision is 0, it is set to x's precision. // Rounding is performed according to z's precision // and rounding mode; and z's accuracy reports the // result error relative to the exact (not rounded) // result. func (z *Float) Lsh(x *Float, s uint, mode RoundingMode) *Float { + if z.prec == 0 { + z.prec = x.prec + } + + // TODO(gri) handle Inf + z.Round(x, z.prec, mode) z.setExp(int64(z.exp) + int64(s)) return z } // Rsh sets z to the rounded x / (1<<s) and returns z. -// Rounding is performed according to z's precision -// and rounding mode; and z's accuracy reports the -// result error relative to the exact (not rounded) -// result. +// Precision, rounding, and accuracy reporting are as for Lsh. func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float { + if z.prec == 0 { + z.prec = x.prec + } + + // TODO(gri) handle Inf + z.Round(x, z.prec, mode) z.setExp(int64(z.exp) - int64(s)) return z @@ -910,6 +976,8 @@ func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float { // +1 if x > y // func (x *Float) Cmp(y *Float) int { + // TODO(gri) handle Inf + // special cases switch { case len(x.mant) == 0: @@ -943,7 +1011,7 @@ func (x *Float) Cmp(y *Float) int { // Sign returns: // // -1 if x < 0 -// 0 if x == 0 (incl. x == -0) +// 0 if x == 0 (incl. x == -0) // TODO(gri) is this correct? // +1 if x > 0 // func (x *Float) Sign() int { @@ -955,3 +1023,10 @@ func (x *Float) Sign() int { } return 1 } + +func umax(x, y uint) uint { + if x < y { + return x + } + return y +} |
