math/big: multi-precision Floats (starting point)
Implemented:
- +, -, *, /, and some unary ops
- all rounding modes
- basic conversions
- string to float conversion
- tests
Missing:
- float to string conversion, formatting
- handling of +/-0 and +/-inf (under- and overflow)
- various TODOs and cleanups
With precision set to 24 or 53, the results match
float32 or float64 operations exactly (excluding
NaNs and denormalized numbers which will not be
supported).
Change-Id: I3121e90fc4b1528e40bb6ff526008da18b3c6520
Reviewed-on: https://go-review.googlesource.com/1218
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/src/math/big/float.go b/src/math/big/float.go
new file mode 100644
index 0000000..bb0aa1c
--- /dev/null
+++ b/src/math/big/float.go
@@ -0,0 +1,1086 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements multi-precision floating-point numbers.
+// Like in the GNU MPFR library (http://www.mpfr.org/), operands
+// can be of mixed precision. Unlike MPFR, the rounding mode is
+// not specified with each operation, but with each operand. The
+// rounding mode of the result operand determines the rounding
+// mode of an operation. This is a from-scratch implementation.
+
+// CAUTION: WORK IN PROGRESS - ANY ASPECT OF THIS IMPLEMENTATION MAY CHANGE!
+
+package big
+
+import (
+ "fmt"
+ "io"
+ "math"
+ "strings"
+)
+
+// TODO(gri): Determine if there's a more natural way to set the precision.
+// Should there be a special meaning for prec 0? Such as "full precision"?
+// (would be possible for all ops except quotient).
+
+const debugFloat = true // enable for debugging
+
+// Internal representation: A floating-point value x != 0 consists
+// of a sign (x.neg), mantissa (x.mant), and exponent (x.exp) such
+// that
+//
+// x = sign * 0.mantissa * 2**exponent
+//
+// and the mantissa is interpreted as a value between 0.5 and 1:
+//
+// 0.5 <= mantissa < 1.0
+//
+// The mantissa bits are stored in the shortest nat slice long enough
+// to hold x.prec mantissa bits. The mantissa is normalized such that
+// the msb of x.mant == 1. Thus, if the precision is not a multiple of
+// the Word size _W, x.mant[0] contains trailing zero bits. The number
+// 0 is represented by an empty mantissa and a zero exponent.
+
+// A Float represents a multi-precision floating point number
+// of the form
+//
+// sign * mantissa * 2**exponent
+//
+// Each value also has a precision, rounding mode, and accuracy value:
+// The precision is the number of mantissa bits used to represent a
+// value, and the result of operations is rounded to that many bits
+// according to the value's rounding mode (unless specified othewise).
+// The accuracy value indicates the rounding error with respect to the
+// exact (not rounded) value.
+//
+// The zero value for a Float represents the number 0.
+//
+// By setting the desired precision to 24 (or 53) and using ToNearestEven
+// rounding, Float arithmetic operations emulate the corresponding float32
+// or float64 IEEE-754 operations (except for denormalized numbers and NaNs).
+//
+// CAUTION: THIS IS WORK IN PROGRESS - DO NOT USE YET.
+//
+type Float struct {
+ mode RoundingMode
+ acc Accuracy
+ neg bool
+ mant nat
+ exp int32
+ prec uint // TODO(gri) make this a 32bit field
+}
+
+// NewFloat returns a new Float with value x rounded
+// to prec bits according to the given rounding mode.
+func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
+ // TODO(gri) should make this more efficient
+ z := new(Float).SetFloat64(x)
+ return z.Round(z, prec, mode)
+}
+
+// infExp is the exponent value for infinity.
+const infExp = 1<<31 - 1
+
+// NewInf returns a new Float with value positive infinity (sign >= 0),
+// or negative infinity (sign < 0).
+func NewInf(sign int) *Float {
+ return &Float{neg: sign < 0, exp: infExp}
+}
+
+func (z *Float) setExp(e int64) {
+ e32 := int32(e)
+ if int64(e32) != e {
+ panic("exponent overflow") // TODO(gri) handle this gracefully
+ }
+ z.exp = e32
+}
+
+// Accuracy describes the rounding error produced by the most recent
+// operation that generated a Float value, relative to the exact value:
+//
+// -1: below exact value
+// 0: exact value
+// +1: above exact value
+//
+type Accuracy int8
+
+// Constants describing the Accuracy of a Float.
+const (
+ Below Accuracy = -1
+ Exact Accuracy = 0
+ Above Accuracy = +1
+)
+
+func (a Accuracy) String() string {
+ switch {
+ case a < 0:
+ return "below"
+ default:
+ return "exact"
+ case a > 0:
+ return "above"
+ }
+}
+
+// RoundingMode determines how a Float value is rounded to the
+// desired precision. Rounding may change the Float value; the
+// rounding error is described by the Float's Accuracy.
+type RoundingMode uint8
+
+// The following rounding modes are supported.
+const (
+ ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
+ ToNearestAway // == IEEE 754-2008 roundTiesToAway
+ ToZero // == IEEE 754-2008 roundTowardZero
+ AwayFromZero // no IEEE 754-2008 equivalent
+ ToNegativeInf // == IEEE 754-2008 roundTowardNegative
+ ToPositiveInf // == IEEE 754-2008 roundTowardPositive
+)
+
+func (mode RoundingMode) String() string {
+ switch mode {
+ case ToNearestEven:
+ return "ToNearestEven"
+ case ToNearestAway:
+ return "ToNearestAway"
+ case ToZero:
+ return "ToZero"
+ case AwayFromZero:
+ return "AwayFromZero"
+ case ToNegativeInf:
+ return "ToNegativeInf"
+ case ToPositiveInf:
+ return "ToPositiveInf"
+ }
+ panic("unreachable")
+}
+
+// Precision returns the mantissa precision of x in bits.
+// The precision may be 0 if x == 0. // TODO(gri) Determine a better approach.
+func (x *Float) Precision() uint {
+ return uint(x.prec)
+}
+
+// Accuracy returns the accuracy of x produced by the most recent operation.
+func (x *Float) Accuracy() Accuracy {
+ return x.acc
+}
+
+// Mode returns the rounding mode of x.
+func (x *Float) Mode() RoundingMode {
+ return x.mode
+}
+
+// debugging support
+func (x *Float) validate() {
+ // assumes x != 0
+ const msb = 1 << (_W - 1)
+ m := len(x.mant)
+ if x.mant[m-1]&msb == 0 {
+ panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.PString()))
+ }
+ if x.prec <= 0 {
+ panic(fmt.Sprintf("invalid precision %d", x.prec))
+ }
+}
+
+// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
+// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
+// have before calling round. z's mantissa must be normalized, with the msb set.
+func (z *Float) round(sbit uint) {
+ z.acc = Exact
+
+ // handle zero
+ m := uint(len(z.mant)) // mantissa length in words for current precision
+ if m == 0 {
+ z.exp = 0
+ return
+ }
+
+ if debugFloat {
+ z.validate()
+ }
+ // z.prec > 0
+
+ bits := m * _W // available mantissa bits
+ if bits == z.prec {
+ // mantissa fits Exactly => nothing to do
+ return
+ }
+
+ n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
+ if bits < z.prec {
+ // mantissa too small => extend
+ if m < n {
+ // slice too short => extend slice
+ if int(n) <= cap(z.mant) {
+ // reuse existing slice
+ z.mant = z.mant[:n]
+ copy(z.mant[n-m:], z.mant[:m])
+ z.mant[:n-m].clear()
+ } else {
+ // n > cap(z.mant) => allocate new slice
+ const e = 4 // extra capacity (see nat.make)
+ new := make(nat, n, n+e)
+ copy(new[n-m:], z.mant)
+ }
+ }
+ return
+ }
+
+ // Rounding is based on two bits: the rounding bit (rbit) and the
+ // sticky bit (sbit). The rbit is the bit immediately before the
+ // mantissa bits (the "0.5"). The sbit is set if any of the bits
+ // before the rbit are set (the "0.25", "0.125", etc.):
+ //
+ // rbit sbit => "fractional part"
+ //
+ // 0 0 == 0
+ // 0 1 > 0 , < 0.5
+ // 1 0 == 0.5
+ // 1 1 > 0.5, < 1.0
+
+ // bits > z.prec: mantissa too large => round
+ r := bits - z.prec - 1 // rounding bit position; r >= 0
+ rbit := z.mant.bit(r) // rounding bit
+ if sbit == 0 {
+ sbit = z.mant.sticky(r)
+ }
+ if debugFloat && sbit&^1 != 0 {
+ panic(fmt.Sprintf("invalid sbit %#x", sbit))
+ }
+
+ // convert ToXInf rounding modes
+ mode := z.mode
+ switch mode {
+ case ToNegativeInf:
+ mode = ToZero
+ if z.neg {
+ mode = AwayFromZero
+ }
+ case ToPositiveInf:
+ mode = AwayFromZero
+ if z.neg {
+ mode = ToZero
+ }
+ }
+
+ // cut off extra words
+ if m > n {
+ copy(z.mant, z.mant[m-n:]) // move n last words to front
+ z.mant = z.mant[:n]
+ }
+
+ // determine number of trailing zero bits t
+ t := n*_W - z.prec // 0 <= t < _W
+ lsb := Word(1) << t
+
+ // make rounding decision
+ // TODO(gri) This can be simplified (see roundBits in float_test.go).
+ switch mode {
+ case ToZero:
+ // nothing to do
+ case ToNearestEven, ToNearestAway:
+ if rbit == 0 {
+ // rounding bits == 0b0x
+ mode = ToZero
+ } else if sbit == 1 {
+ // rounding bits == 0b11
+ mode = AwayFromZero
+ }
+ case AwayFromZero:
+ if rbit|sbit == 0 {
+ mode = ToZero
+ }
+ default:
+ // ToXInf modes have been converted to ToZero or AwayFromZero
+ panic("unreachable")
+ }
+
+ // round and determine accuracy
+ switch mode {
+ case ToZero:
+ if rbit|sbit != 0 {
+ z.acc = Below
+ }
+
+ case ToNearestEven, ToNearestAway:
+ if debugFloat && rbit != 1 {
+ panic("internal error in rounding")
+ }
+ if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
+ z.acc = Below
+ break
+ }
+ // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
+ fallthrough
+
+ case AwayFromZero:
+ // add 1 to mantissa
+ if addVW(z.mant, z.mant, lsb) != 0 {
+ // overflow => shift mantissa right by 1 and add msb
+ shrVU(z.mant, z.mant, 1)
+ z.mant[n-1] |= 1 << (_W - 1)
+ // adjust exponent
+ z.exp++
+ }
+ z.acc = Above
+ }
+
+ // zero out trailing bits in least-significant word
+ z.mant[0] &^= lsb - 1
+
+ // update accuracy
+ if z.neg {
+ z.acc = -z.acc
+ }
+
+ if debugFloat {
+ z.validate()
+ }
+
+ return
+}
+
+// Round sets z to the value of x rounded according to mode to prec bits and returns z.
+func (z *Float) Round(x *Float, prec uint, mode RoundingMode) *Float {
+ z.Set(x)
+ z.prec = prec
+ z.mode = mode
+ z.round(0)
+ return z
+}
+
+// nlz returns the number of leading zero bits in x.
+func nlz(x Word) uint {
+ return _W - uint(bitLen(x))
+}
+
+// TODO(gri) this assumes a Word is 64 bits
+func nlz64(x uint64) uint {
+ if _W != 64 {
+ panic("size mismatch")
+ }
+ return nlz(Word(x))
+}
+
+// SetUint64 sets z to x and returns z.
+// Precision is set to 64 bits.
+func (z *Float) SetUint64(x uint64) *Float {
+ z.neg = false
+ z.prec = 64
+ if x == 0 {
+ z.mant = z.mant[:0]
+ z.exp = 0
+ return z
+ }
+ s := nlz64(x)
+ z.mant = z.mant.setUint64(x << s)
+ z.exp = int32(64 - s)
+ return z
+}
+
+// SetInt64 sets z to x and returns z.
+// Precision is set to 64 bits.
+func (z *Float) SetInt64(x int64) *Float {
+ u := x
+ if u < 0 {
+ u = -u
+ }
+ z.SetUint64(uint64(u))
+ z.neg = x < 0
+ return z
+}
+
+// SetFloat64 sets z to x and returns z.
+// Precision is set to 53 bits.
+// TODO(gri) test denormals, +/-Inf, disallow NaN.
+func (z *Float) SetFloat64(x float64) *Float {
+ z.prec = 53
+ if x == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ return z
+ }
+ z.neg = x < 0
+ fmant, exp := math.Frexp(x) // get normalized mantissa
+ z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
+ z.exp = int32(exp)
+ return z
+}
+
+// fnorm normalizes mantissa m by shifting it to the left
+// such that the msb of the most-significant word (msw)
+// is 1. It returns the shift amount.
+// It assumes that m is not the zero nat.
+func fnorm(m nat) uint {
+ if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
+ panic("msw of mantissa is 0")
+ }
+ s := nlz(m[len(m)-1])
+ if s > 0 {
+ c := shlVU(m, m, s)
+ if debugFloat && c != 0 {
+ panic("nlz or shlVU incorrect")
+ }
+ }
+ return s
+}
+
+// SetInt sets z to x and returns z.
+// Precision is set to the number of bits required to represent x accurately.
+// TODO(gri) what about precision for x == 0?
+func (z *Float) SetInt(x *Int) *Float {
+ if len(x.abs) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ // z.prec = ?
+ return z
+ }
+ // x != 0
+ z.neg = x.neg
+ z.mant = z.mant.set(x.abs)
+ e := uint(len(z.mant))*_W - fnorm(z.mant)
+ z.exp = int32(e)
+ z.prec = e
+ return z
+}
+
+// SetRat sets z to x rounded to the precision of z and returns z.
+func (z *Float) SetRat(x *Rat, prec uint) *Float {
+ panic("unimplemented")
+}
+
+// Set sets z to x, with the same precision as x, and returns z.
+func (z *Float) Set(x *Float) *Float {
+ if z != x {
+ z.neg = x.neg
+ z.exp = x.exp
+ z.mant = z.mant.set(x.mant)
+ z.prec = x.prec
+ }
+ return z
+}
+
+func high64(x nat) uint64 {
+ if len(x) == 0 {
+ return 0
+ }
+ v := uint64(x[len(x)-1])
+ if _W == 32 && len(x) > 1 {
+ v = v<<32 | uint64(x[len(x)-2])
+ }
+ return v
+}
+
+// TODO(gri) FIX THIS (rounding mode, errors, accuracy, etc.)
+func (x *Float) Uint64() uint64 {
+ m := high64(x.mant)
+ s := x.exp
+ if s >= 0 {
+ return m >> (64 - uint(s))
+ }
+ return 0 // imprecise
+}
+
+// TODO(gri) FIX THIS (rounding mode, errors, etc.)
+func (x *Float) Int64() int64 {
+ v := int64(x.Uint64())
+ if x.neg {
+ return -v
+ }
+ return v
+}
+
+// Float64 returns the closest float64 value of x
+// by rounding to nearest with 53 bits precision.
+// TODO(gri) implement/document error scenarios.
+func (x *Float) Float64() (float64, Accuracy) {
+ if len(x.mant) == 0 {
+ return 0, Exact
+ }
+ // x != 0
+ r := new(Float).Round(x, 53, ToNearestEven)
+ var s uint64
+ if r.neg {
+ s = 1 << 63
+ }
+ e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
+ m := high64(r.mant) >> 11 & (1<<52 - 1)
+ return math.Float64frombits(s | e<<52 | m), r.acc
+}
+
+func (x *Float) Int() *Int {
+ if len(x.mant) == 0 {
+ return new(Int)
+ }
+ panic("unimplemented")
+}
+
+func (x *Float) Rat() *Rat {
+ panic("unimplemented")
+}
+
+func (x *Float) IsInt() bool {
+ if len(x.mant) == 0 {
+ return true
+ }
+ if x.exp <= 0 {
+ return false
+ }
+ if uint(x.exp) >= x.prec {
+ return true
+ }
+ panic("unimplemented")
+}
+
+// Abs sets z to |x| (the absolute value of x) and returns z.
+// TODO(gri) should Abs (and Neg) below ignore z's precision and rounding mode?
+func (z *Float) Abs(x *Float) *Float {
+ z.Set(x)
+ z.neg = false
+ return z
+}
+
+// Neg sets z to x with its sign negated, and returns z.
+func (z *Float) Neg(x *Float) *Float {
+ z.Set(x)
+ z.neg = !z.neg
+ return z
+}
+
+// z = x + y, ignoring signs of x and y.
+// x and y must not be 0.
+func (z *Float) uadd(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("uadd called with 0 argument")
+ }
+
+ // Note: This implementation requires 2 shifts most of the
+ // time. It is also inefficient if exponents or precisions
+ // differ by wide margins. The following article describes
+ // an efficient (but much more complicated) implementation
+ // compatible with the internal representation used here:
+ //
+ // Vincent Lefèvre: "The Generic Multiple-Precision Floating-
+ // Point Addition With Exact Rounding (as in the MPFR Library)"
+ // http://www.vinc17.net/research/papers/rnc6.pdf
+
+ // order x, y by magnitude
+ ex := int(x.exp) - len(x.mant)*_W
+ ey := int(y.exp) - len(y.mant)*_W
+ if ex < ey {
+ // + is commutative => ok to swap operands
+ x, y = y, x
+ ex, ey = ey, ex
+ }
+ // ex >= ey
+ d := uint(ex - ey)
+
+ // compute adjusted xmant
+ var n0 uint // nlz(z) before addition
+ xadj := x.mant
+ if d > 0 {
+ xadj = z.mant.shl(x.mant, d) // 1st shift
+ n0 = _W - d%_W
+ }
+ z.exp = x.exp
+
+ // add numbers
+ z.mant = z.mant.add(xadj, y.mant)
+
+ // normalize mantissa
+ n1 := fnorm(z.mant) // 2nd shift (often)
+
+ // adjust exponent if the result got longer (by at most 1 bit)
+ if n1 != n0 {
+ if debugFloat && (n1+1)%_W != n0 {
+ panic(fmt.Sprintf("carry is %d bits, expected at most 1 bit", n0-n1))
+ }
+ z.exp++
+ }
+
+ z.round(0)
+}
+
+// z = x - y for x >= y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) usub(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("usub called with 0 argument")
+ }
+
+ // Note: Like uadd, this implementation is often doing
+ // too much work and could be optimized by separating
+ // the various special cases.
+
+ // determine magnitude difference
+ ex := int(x.exp) - len(x.mant)*_W
+ ey := int(y.exp) - len(y.mant)*_W
+
+ if ex < ey {
+ panic("underflow")
+ }
+ // ex >= ey
+ d := uint(ex - ey)
+
+ // compute adjusted x.mant
+ var n uint // nlz(z) after adjustment
+ xadj := x.mant
+ if d > 0 {
+ xadj = z.mant.shl(x.mant, d)
+ n = _W - d%_W
+ }
+ e := int64(x.exp) + int64(n)
+
+ // subtract numbers
+ z.mant = z.mant.sub(xadj, y.mant)
+
+ if len(z.mant) != 0 {
+ e -= int64(len(xadj)-len(z.mant)) * _W
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+ }
+
+ z.round(0)
+}
+
+// z = x * y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) umul(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("umul called with 0 argument")
+ }
+
+ // Note: This is doing too much work if the precision
+ // of z is less than the sum of the precisions of x
+ // and y which is often the case (e.g., if all floats
+ // have the same precision).
+ // TODO(gri) Optimize this for the common case.
+
+ e := int64(x.exp) + int64(y.exp)
+ z.mant = z.mant.mul(x.mant, y.mant)
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+ z.round(0)
+}
+
+// z = x / y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) uquo(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("uquo called with 0 argument")
+ }
+
+ // mantissa length in words for desired result precision + 1
+ // (at least one extra bit so we get the rounding bit after
+ // the division)
+ n := int(z.prec/_W) + 1
+
+ // compute adjusted x.mant such that we get enough result precision
+ xadj := x.mant
+ if d := n - len(x.mant) + len(y.mant); d > 0 {
+ // d extra words needed => add d "0 digits" to x
+ xadj = make(nat, len(x.mant)+d)
+ copy(xadj[d:], x.mant)
+ }
+ // TODO(gri): If we have too many digits (d < 0), we should be able
+ // to shorten x for faster division. But we must be extra careful
+ // with rounding in that case.
+
+ // divide
+ var r nat
+ z.mant, r = z.mant.div(nil, xadj, y.mant)
+
+ // determine exponent
+ e := int64(x.exp) - int64(y.exp) - int64(len(xadj)-len(y.mant)-len(z.mant))*_W
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+
+ // The result is long enough to include (at least) the rounding bit.
+ // If there's a non-zero remainder, the corresponding fractional part
+ // (if it were computed), would have a non-zero sticky bit (if it were
+ // zero, it couldn't have a non-zero remainder).
+ var sbit uint
+ if len(r) > 0 {
+ sbit = 1
+ }
+ z.round(sbit)
+}
+
+// ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y,
+// while ignoring the signs of x and y. x and y must not be zero.
+func (x *Float) ucmp(y *Float) int {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("ucmp called with 0 argument")
+ }
+
+ switch {
+ case x.exp < y.exp:
+ return -1
+ case x.exp > y.exp:
+ return 1
+ }
+ // x.exp == y.exp
+
+ // compare mantissas
+ i := len(x.mant)
+ j := len(y.mant)
+ for i > 0 || j > 0 {
+ var xm, ym Word
+ if i > 0 {
+ i--
+ xm = x.mant[i]
+ }
+ if j > 0 {
+ j--
+ ym = y.mant[j]
+ }
+ switch {
+ case xm < ym:
+ return -1
+ case xm > ym:
+ return 1
+ }
+ }
+
+ return 0
+}
+
+// Handling of sign bit as defined by IEEE 754-2008,
+// section 6.3 (note that there are no NaN Floats):
+//
+// When neither the inputs nor result are NaN, the sign of a product or
+// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
+// or of a difference x−y regarded as a sum x+(−y), differs from at most
+// one of the addends’ signs; and the sign of the result of conversions,
+// the quantize operation, the roundToIntegral operations, and the
+// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
+// These rules shall apply even when operands or results are zero or infinite.
+//
+// When the sum of two operands with opposite signs (or the difference of
+// two operands with like signs) is exactly zero, the sign of that sum (or
+// difference) shall be +0 in all rounding-direction attributes except
+// roundTowardNegative; under that attribute, the sign of an exact zero
+// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
+// sign as x even when x is zero.
+
+// Add sets z to the rounded sum x+y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Add(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(y.mant) == 0 {
+ return z.Round(x, z.prec, z.mode)
+ }
+ if len(x.mant) == 0 {
+ return z.Round(y, z.prec, z.mode)
+ }
+
+ // x, y != 0
+ neg := x.neg
+ if x.neg == y.neg {
+ // x + y == x + y
+ // (-x) + (-y) == -(x + y)
+ z.uadd(x, y)
+ } else {
+ // x + (-y) == x - y == -(y - x)
+ // (-x) + y == y - x == -(x - y)
+ if x.ucmp(y) >= 0 {
+ z.usub(x, y)
+ } else {
+ neg = !neg
+ z.usub(y, x)
+ }
+ }
+ z.neg = neg
+ return z
+}
+
+// Sub sets z to the rounded difference x-y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Sub(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(y.mant) == 0 {
+ return z.Round(x, z.prec, z.mode)
+ }
+ if len(x.mant) == 0 {
+ prec := z.prec
+ mode := z.mode
+ z.Neg(y)
+ return z.Round(z, prec, mode)
+ }
+
+ // x, y != 0
+ neg := x.neg
+ if x.neg != y.neg {
+ // x - (-y) == x + y
+ // (-x) - y == -(x + y)
+ z.uadd(x, y)
+ } else {
+ // x - y == x - y == -(y - x)
+ // (-x) - (-y) == y - x == -(x - y)
+ if x.ucmp(y) >= 0 {
+ z.usub(x, y)
+ } else {
+ neg = !neg
+ z.usub(y, x)
+ }
+ }
+ z.neg = neg
+ return z
+}
+
+// Mul sets z to the rounded product x*y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Mul(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(x.mant) == 0 || len(y.mant) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ z.acc = Exact
+ return z
+ }
+
+ // x, y != 0
+ z.umul(x, y)
+ z.neg = x.neg != y.neg
+ return z
+}
+
+// Quo sets z to the rounded quotient x/y and returns z.
+// If y == 0, a division-by-zero run-time panic occurs. TODO(gri) this should become Inf
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Quo(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(x.mant) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ z.acc = Exact
+ return z
+ }
+ if len(y.mant) == 0 {
+ panic("division-by-zero") // TODO(gri) handle this better
+ }
+
+ // x, y != 0
+ z.uquo(x, y)
+ z.neg = x.neg != y.neg
+ return z
+}
+
+// Lsh sets z to the rounded x * (1<<s) and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Lsh(x *Float, s uint, mode RoundingMode) *Float {
+ z.Round(x, z.prec, mode)
+ z.setExp(int64(z.exp) + int64(s))
+ return z
+}
+
+// Rsh sets z to the rounded x / (1<<s) and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float {
+ z.Round(x, z.prec, mode)
+ z.setExp(int64(z.exp) - int64(s))
+ return z
+}
+
+// Cmp compares x and y and returns:
+//
+// -1 if x < y
+// 0 if x == y (incl. -0 == 0)
+// +1 if x > y
+//
+func (x *Float) Cmp(y *Float) int {
+ // special cases
+ switch {
+ case len(x.mant) == 0:
+ // 0 cmp y == -sign(y)
+ return -y.Sign()
+ case len(y.mant) == 0:
+ // x cmp 0 == sign(x)
+ return x.Sign()
+ }
+ // x != 0 && y != 0
+
+ // x cmp y == x cmp y
+ // x cmp (-y) == 1
+ // (-x) cmp y == -1
+ // (-x) cmp (-y) == -(x cmp y)
+ switch {
+ case x.neg == y.neg:
+ r := x.ucmp(y)
+ if x.neg {
+ r = -r
+ }
+ return r
+ case x.neg:
+ return -1
+ default:
+ return 1
+ }
+ return 0
+}
+
+// Sign returns:
+//
+// -1 if x < 0
+// 0 if x == 0 (incl. x == -0)
+// +1 if x > 0
+//
+func (x *Float) Sign() int {
+ if len(x.mant) == 0 {
+ return 0
+ }
+ if x.neg {
+ return -1
+ }
+ return 1
+}
+
+func (x *Float) String() string {
+ return x.PString() // TODO(gri) fix this
+}
+
+// PString returns x as a string in the format ["-"] "0x" mantissa "p" exponent,
+// with a hexadecimal mantissa and a signed decimal exponent.
+func (x *Float) PString() string {
+ prefix := "0."
+ if x.neg {
+ prefix = "-0."
+ }
+ return prefix + x.mant.string(lowercaseDigits[:16]) + fmt.Sprintf("p%d", x.exp)
+}
+
+// SetString sets z to the value of s and returns z and a boolean indicating
+// success. s must be a floating-point number of the form:
+//
+// number = [ sign ] mantissa [ exponent ] .
+// mantissa = digits | digits "." [ digits ] | "." digits .
+// exponent = ( "E" | "e" | "p" ) [ sign ] digits .
+// sign = "+" | "-" .
+// digits = digit { digit } .
+// digit = "0" ... "9" .
+//
+// A "p" exponent indicates power of 2 for the exponent; for instance 1.2p3
+// is 1.2 * 2**3. If the operation failed, the value of z is undefined but
+// the returned value is nil.
+//
+func (z *Float) SetString(s string) (*Float, bool) {
+ r := strings.NewReader(s)
+
+ f, err := z.scan(r)
+ if err != nil {
+ return nil, false
+ }
+
+ // there should be no unread characters left
+ if _, _, err = r.ReadRune(); err != io.EOF {
+ return nil, false
+ }
+
+ return f, true
+}
+
+// scan sets z to the value of the longest prefix of r representing
+// a floating-point number and returns z or an error, if any.
+// The number must be of the form:
+//
+// number = [ sign ] mantissa [ exponent ] .
+// mantissa = digits | digits "." [ digits ] | "." digits .
+// exponent = ( "E" | "e" | "p" ) [ sign ] digits .
+// sign = "+" | "-" .
+// digits = digit { digit } .
+// digit = "0" ... "9" .
+//
+// A "p" exponent indicates power of 2 for the exponent; for instance 1.2p3
+// is 1.2 * 2**3. If the operation failed, the value of z is undefined but
+// the returned value is nil.
+//
+func (z *Float) scan(r io.RuneScanner) (f *Float, err error) {
+ // sign
+ z.neg, err = scanSign(r)
+ if err != nil {
+ return
+ }
+
+ // mantissa
+ var ecorr int // decimal exponent correction; valid if <= 0
+ z.mant, _, ecorr, err = z.mant.scan(r, 1)
+ if err != nil {
+ return
+ }
+
+ // exponent
+ var exp int64
+ var ebase int
+ exp, ebase, err = scanExponent(r)
+ if err != nil {
+ return
+ }
+ // special-case 0
+ if len(z.mant) == 0 {
+ z.exp = 0
+ return z, nil
+ }
+ // len(z.mant) > 0
+
+ // determine binary (exp2) and decimal (exp) exponent
+ exp2 := int64(len(z.mant)*_W - int(fnorm(z.mant)))
+ if ebase == 2 {
+ exp2 += exp
+ exp = 0
+ }
+ if ecorr < 0 {
+ exp += int64(ecorr)
+ }
+
+ z.setExp(exp2)
+ if exp == 0 {
+ // no decimal exponent
+ z.round(0)
+ return z, nil
+ }
+ // exp != 0
+
+ // compute decimal exponent power
+ expabs := exp
+ if expabs < 0 {
+ expabs = -expabs
+ }
+ powTen := new(Float).SetInt(new(Int).SetBits(nat(nil).expNN(natTen, nat(nil).setWord(Word(expabs)), nil)))
+
+ // correct result
+ if exp < 0 {
+ z.uquo(z, powTen)
+ } else {
+ z.umul(z, powTen)
+ }
+
+ return z, nil
+}
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go
new file mode 100644
index 0000000..20c7d89
--- /dev/null
+++ b/src/math/big/float_test.go
@@ -0,0 +1,772 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+ "testing"
+)
+
+func fromBinary(s string) int64 {
+ x, err := strconv.ParseInt(s, 2, 64)
+ if err != nil {
+ panic(err)
+ }
+ return x
+}
+
+func toBinary(x int64) string {
+ return strconv.FormatInt(x, 2)
+}
+
+func testFloatRound(t *testing.T, x, r int64, prec uint, mode RoundingMode) {
+ // verify test data
+ var ok bool
+ switch mode {
+ case ToNearestEven, ToNearestAway:
+ ok = true // nothing to do for now
+ case ToZero:
+ if x < 0 {
+ ok = r >= x
+ } else {
+ ok = r <= x
+ }
+ case AwayFromZero:
+ if x < 0 {
+ ok = r <= x
+ } else {
+ ok = r >= x
+ }
+ case ToNegativeInf:
+ ok = r <= x
+ case ToPositiveInf:
+ ok = r >= x
+ default:
+ panic("unreachable")
+ }
+ if !ok {
+ t.Fatalf("incorrect test data for prec = %d, %s: x = %s, r = %s", prec, mode, toBinary(x), toBinary(r))
+ }
+
+ // compute expected accuracy
+ a := Exact
+ switch {
+ case r < x:
+ a = Below
+ case r > x:
+ a = Above
+ }
+
+ // round
+ f := new(Float).SetInt64(x)
+ f.Round(f, prec, mode)
+
+ // check result
+ r1 := f.Int64()
+ p1 := f.Precision()
+ a1 := f.Accuracy()
+ if r1 != r || p1 != prec || a1 != a {
+ t.Errorf("Round(%s, %d, %s): got %s (%d bits, %s); want %s (%d bits, %s)",
+ toBinary(x), prec, mode,
+ toBinary(r1), p1, a1,
+ toBinary(r), prec, a)
+ }
+}
+
+// TestFloatRound tests basic rounding.
+func TestFloatRound(t *testing.T) {
+ var tests = []struct {
+ prec uint
+ x, zero, neven, naway, away string // input, results rounded to prec bits
+ }{
+ {5, "1000", "1000", "1000", "1000", "1000"},
+ {5, "1001", "1001", "1001", "1001", "1001"},
+ {5, "1010", "1010", "1010", "1010", "1010"},
+ {5, "1011", "1011", "1011", "1011", "1011"},
+ {5, "1100", "1100", "1100", "1100", "1100"},
+ {5, "1101", "1101", "1101", "1101", "1101"},
+ {5, "1110", "1110", "1110", "1110", "1110"},
+ {5, "1111", "1111", "1111", "1111", "1111"},
+
+ {4, "1000", "1000", "1000", "1000", "1000"},
+ {4, "1001", "1001", "1001", "1001", "1001"},
+ {4, "1010", "1010", "1010", "1010", "1010"},
+ {4, "1011", "1011", "1011", "1011", "1011"},
+ {4, "1100", "1100", "1100", "1100", "1100"},
+ {4, "1101", "1101", "1101", "1101", "1101"},
+ {4, "1110", "1110", "1110", "1110", "1110"},
+ {4, "1111", "1111", "1111", "1111", "1111"},
+
+ {3, "1000", "1000", "1000", "1000", "1000"},
+ {3, "1001", "1000", "1000", "1010", "1010"},
+ {3, "1010", "1010", "1010", "1010", "1010"},
+ {3, "1011", "1010", "1100", "1100", "1100"},
+ {3, "1100", "1100", "1100", "1100", "1100"},
+ {3, "1101", "1100", "1100", "1110", "1110"},
+ {3, "1110", "1110", "1110", "1110", "1110"},
+ {3, "1111", "1110", "10000", "10000", "10000"},
+
+ {3, "1000001", "1000000", "1000000", "1000000", "1010000"},
+ {3, "1001001", "1000000", "1010000", "1010000", "1010000"},
+ {3, "1010001", "1010000", "1010000", "1010000", "1100000"},
+ {3, "1011001", "1010000", "1100000", "1100000", "1100000"},
+ {3, "1100001", "1100000", "1100000", "1100000", "1110000"},
+ {3, "1101001", "1100000", "1110000", "1110000", "1110000"},
+ {3, "1110001", "1110000", "1110000", "1110000", "10000000"},
+ {3, "1111001", "1110000", "10000000", "10000000", "10000000"},
+
+ {2, "1000", "1000", "1000", "1000", "1000"},
+ {2, "1001", "1000", "1000", "1000", "1100"},
+ {2, "1010", "1000", "1000", "1100", "1100"},
+ {2, "1011", "1000", "1100", "1100", "1100"},
+ {2, "1100", "1100", "1100", "1100", "1100"},
+ {2, "1101", "1100", "1100", "1100", "10000"},
+ {2, "1110", "1100", "10000", "10000", "10000"},
+ {2, "1111", "1100", "10000", "10000", "10000"},
+
+ {2, "1000001", "1000000", "1000000", "1000000", "1100000"},
+ {2, "1001001", "1000000", "1000000", "1000000", "1100000"},
+ {2, "1010001", "1000000", "1100000", "1100000", "1100000"},
+ {2, "1011001", "1000000", "1100000", "1100000", "1100000"},
+ {2, "1100001", "1100000", "1100000", "1100000", "10000000"},
+ {2, "1101001", "1100000", "1100000", "1100000", "10000000"},
+ {2, "1110001", "1100000", "10000000", "10000000", "10000000"},
+ {2, "1111001", "1100000", "10000000", "10000000", "10000000"},
+
+ {1, "1000", "1000", "1000", "1000", "1000"},
+ {1, "1001", "1000", "1000", "1000", "10000"},
+ {1, "1010", "1000", "1000", "1000", "10000"},
+ {1, "1011", "1000", "1000", "1000", "10000"},
+ {1, "1100", "1000", "10000", "10000", "10000"},
+ {1, "1101", "1000", "10000", "10000", "10000"},
+ {1, "1110", "1000", "10000", "10000", "10000"},
+ {1, "1111", "1000", "10000", "10000", "10000"},
+
+ {1, "1000001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1001001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1010001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1011001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1100001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1101001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1110001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1111001", "1000000", "10000000", "10000000", "10000000"},
+ }
+
+ for _, test := range tests {
+ x := fromBinary(test.x)
+ z := fromBinary(test.zero)
+ e := fromBinary(test.neven)
+ n := fromBinary(test.naway)
+ a := fromBinary(test.away)
+ prec := test.prec
+
+ testFloatRound(t, x, z, prec, ToZero)
+ testFloatRound(t, x, e, prec, ToNearestEven)
+ testFloatRound(t, x, n, prec, ToNearestAway)
+ testFloatRound(t, x, a, prec, AwayFromZero)
+
+ testFloatRound(t, x, z, prec, ToNegativeInf)
+ testFloatRound(t, x, a, prec, ToPositiveInf)
+
+ testFloatRound(t, -x, -a, prec, ToNegativeInf)
+ testFloatRound(t, -x, -z, prec, ToPositiveInf)
+ }
+}
+
+// TestFloatRound24 tests that rounding a float64 to 24 bits
+// matches IEEE-754 rounding to nearest when converting a
+// float64 to a float32.
+func TestFloatRound24(t *testing.T) {
+ const x0 = 1<<26 - 0x10 // 11...110000 (26 bits)
+ for d := 0; d <= 0x10; d++ {
+ x := float64(x0 + d)
+ f := new(Float).SetFloat64(x)
+ f.Round(f, 24, ToNearestEven)
+ got, _ := f.Float64()
+ want := float64(float32(x))
+ if got != want {
+ t.Errorf("Round(%g, 24) = %g; want %g", x, got, want)
+ }
+ }
+}
+
+func TestFloatSetUint64(t *testing.T) {
+ var tests = []uint64{
+ 0,
+ 1,
+ 2,
+ 10,
+ 100,
+ 1<<32 - 1,
+ 1 << 32,
+ 1<<64 - 1,
+ }
+ for _, want := range tests {
+ f := new(Float).SetUint64(want)
+ if got := f.Uint64(); got != want {
+ t.Errorf("got %d (%s); want %d", got, f.PString(), want)
+ }
+ }
+}
+
+func TestFloatSetInt64(t *testing.T) {
+ var tests = []int64{
+ 0,
+ 1,
+ 2,
+ 10,
+ 100,
+ 1<<32 - 1,
+ 1 << 32,
+ 1<<63 - 1,
+ }
+ for _, want := range tests {
+ for i := range [2]int{} {
+ if i&1 != 0 {
+ want = -want
+ }
+ f := new(Float).SetInt64(want)
+ if got := f.Int64(); got != want {
+ t.Errorf("got %d (%s); want %d", got, f.PString(), want)
+ }
+ }
+ }
+}
+
+func TestFloatSetFloat64(t *testing.T) {
+ var tests = []float64{
+ 0,
+ 1,
+ 2,
+ 12345,
+ 1e10,
+ 1e100,
+ 3.14159265e10,
+ 2.718281828e-123,
+ 1.0 / 3,
+ }
+ for _, want := range tests {
+ for i := range [2]int{} {
+ if i&1 != 0 {
+ want = -want
+ }
+ f := new(Float).SetFloat64(want)
+ if got, _ := f.Float64(); got != want {
+ t.Errorf("got %g (%s); want %g", got, f.PString(), want)
+ }
+ }
+ }
+}
+
+func TestFloatSetInt(t *testing.T) {
+ // TODO(gri) implement
+}
+
+// Selected precisions with which to run various tests.
+var precList = [...]uint{1, 2, 5, 8, 10, 16, 23, 24, 32, 50, 53, 64, 100, 128, 500, 511, 512, 513, 1000, 10000}
+
+// Selected bits with which to run various tests.
+// Each entry is a list of bits representing a floating-point number (see fromBits).
+var bitsList = [...][]int{
+ {}, // = 0
+ {0}, // = 1
+ {1}, // = 2
+ {-1}, // = 1/2
+ {10}, // = 2**10 == 1024
+ {-10}, // = 2**-10 == 1/1024
+ {100, 10, 1}, // = 2**100 + 2**10 + 2**1
+ {0, -1, -2, -10},
+ // TODO(gri) add more test cases
+}
+
+// TestFloatAdd tests Float.Add/Sub by comparing the result of a "manual"
+// addition/subtraction of arguments represented by bits lists with the
+// respective floating-point addition/subtraction for a variety of precisions
+// and rounding modes.
+func TestFloatAdd(t *testing.T) {
+ for _, xbits := range bitsList {
+ for _, ybits := range bitsList {
+ // exact values
+ x := fromBits(xbits...)
+ y := fromBits(ybits...)
+ zbits := append(xbits, ybits...)
+ z := fromBits(zbits...)
+
+ for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+ for _, prec := range precList {
+ // +
+ got := NewFloat(0, prec, mode)
+ got.Add(x, y)
+ want := roundBits(zbits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s %v\n\t+ %s %v\n\t= %s\n\twant %s",
+ i, prec, mode, x, xbits, y, ybits, got, want)
+ return
+ }
+
+ // -
+ got.Sub(z, x)
+ want = roundBits(ybits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t- %s\n\t= %s\n\twant %s",
+ i, prec, mode, x, y, got, want)
+ }
+ }
+ }
+ }
+ }
+}
+
+// TestFloatAdd32 tests that Float.Add/Sub of numbers with
+// 24bit mantissa behaves like float32 addition/subtraction.
+func TestFloatAdd32(t *testing.T) {
+ // chose base such that we cross the mantissa precision limit
+ const base = 1<<26 - 0x10 // 11...110000 (26 bits)
+ for d := 0; d <= 0x10; d++ {
+ for i := range [2]int{} {
+ x0, y0 := float64(base), float64(d)
+ if i&1 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 24, ToNearestEven)
+
+ z.Add(x, y)
+ got, acc := z.Float64()
+ want := float64(float32(y0) + float32(x0))
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+ }
+
+ z.Sub(z, y)
+ got, acc = z.Float64()
+ want = float64(float32(want) - float32(y0))
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+ }
+ }
+ }
+}
+
+// TestFloatAdd64 tests that Float.Add/Sub of numbers with
+// 53bit mantissa behaves like float64 addition/subtraction.
+func TestFloatAdd64(t *testing.T) {
+ // chose base such that we cross the mantissa precision limit
+ const base = 1<<55 - 0x10 // 11...110000 (55 bits)
+ for d := 0; d <= 0x10; d++ {
+ for i := range [2]int{} {
+ x0, y0 := float64(base), float64(d)
+ if i&1 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 53, ToNearestEven)
+
+ z.Add(x, y)
+ got, acc := z.Float64()
+ want := x0 + y0
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g + %g = %g; want %g exactly", d, x0, y0, got, acc, want)
+ }
+
+ z.Sub(z, y)
+ got, acc = z.Float64()
+ want -= y0
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g - %g = %g; want %g exactly", d, x0+y0, y0, got, acc, want)
+ }
+ }
+ }
+}
+
+func TestFloatMul(t *testing.T) {
+}
+
+// TestFloatMul64 tests that Float.Mul/Quo of numbers with
+// 53bit mantissa behaves like float64 multiplication/division.
+func TestFloatMul64(t *testing.T) {
+ var tests = []struct {
+ x, y float64
+ }{
+ {0, 0},
+ {0, 1},
+ {1, 1},
+ {1, 1.5},
+ {1.234, 0.5678},
+ {2.718281828, 3.14159265358979},
+ {2.718281828e10, 3.14159265358979e-32},
+ {1.0 / 3, 1e200},
+ }
+ for _, test := range tests {
+ for i := range [8]int{} {
+ x0, y0 := test.x, test.y
+ if i&1 != 0 {
+ x0 = -x0
+ }
+ if i&2 != 0 {
+ y0 = -y0
+ }
+ if i&4 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 53, ToNearestEven)
+
+ z.Mul(x, y)
+ got, _ := z.Float64()
+ want := x0 * y0
+ if got != want {
+ t.Errorf("%g * %g = %g; want %g", x0, y0, got, want)
+ }
+
+ if y0 == 0 {
+ continue // avoid division-by-zero
+ }
+ z.Quo(z, y)
+ got, _ = z.Float64()
+ want /= y0
+ if got != want {
+ t.Errorf("%g / %g = %g; want %g", x0*y0, y0, got, want)
+ }
+ }
+ }
+}
+
+func TestIssue6866(t *testing.T) {
+ for _, prec := range precList {
+ two := NewFloat(2, prec, ToNearestEven)
+ one := NewFloat(1, prec, ToNearestEven)
+ three := NewFloat(3, prec, ToNearestEven)
+ msix := NewFloat(-6, prec, ToNearestEven)
+ psix := NewFloat(+6, prec, ToNearestEven)
+
+ p := NewFloat(0, prec, ToNearestEven)
+ z1 := NewFloat(0, prec, ToNearestEven)
+ z2 := NewFloat(0, prec, ToNearestEven)
+
+ // z1 = 2 + 1.0/3*-6
+ p.Quo(one, three)
+ p.Mul(p, msix)
+ z1.Add(two, p)
+
+ // z2 = 2 - 1.0/3*+6
+ p.Quo(one, three)
+ p.Mul(p, psix)
+ z2.Sub(two, p)
+
+ if z1.Cmp(z2) != 0 {
+ t.Fatalf("prec %d: got z1 = %s != z2 = %s; want z1 == z2\n", prec, z1, z2)
+ }
+ if z1.Sign() != 0 {
+ t.Errorf("prec %d: got z1 = %s; want 0", prec, z1)
+ }
+ if z2.Sign() != 0 {
+ t.Errorf("prec %d: got z2 = %s; want 0", prec, z2)
+ }
+ }
+}
+
+func TestFloatQuo(t *testing.T) {
+ // TODO(gri) make the test vary these precisions
+ preci := 200 // precision of integer part
+ precf := 20 // precision of fractional part
+
+ for i := 0; i < 8; i++ {
+ // compute accurate (not rounded) result z
+ bits := []int{preci - 1}
+ if i&3 != 0 {
+ bits = append(bits, 0)
+ }
+ if i&2 != 0 {
+ bits = append(bits, -1)
+ }
+ if i&1 != 0 {
+ bits = append(bits, -precf)
+ }
+ z := fromBits(bits...)
+
+ // compute accurate x as z*y
+ y := new(Float).SetFloat64(3.14159265358979323e123)
+
+ x := NewFloat(0, z.Precision()+y.Precision(), ToZero)
+ x.Mul(z, y)
+
+ // leave for debugging
+ // fmt.Printf("x = %s\ny = %s\nz = %s\n", x, y, z)
+
+ if got := x.Accuracy(); got != Exact {
+ t.Errorf("got acc = %s; want exact", got)
+ }
+
+ // round accurate z for a variety of precisions and
+ // modes and compare against result of x / y.
+ for _, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+ for d := -5; d < 5; d++ {
+ prec := uint(preci + d)
+ got := NewFloat(0, prec, mode).Quo(x, y)
+ want := roundBits(bits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t/ %s\n\t= %s\n\twant %s",
+ i, prec, mode, x, y, got, want)
+ }
+ }
+ }
+ }
+}
+
+// normBits returns the normalized bits for x: It
+// removes multiple equal entries by treating them
+// as an addition (e.g., []int{5, 5} => []int{6}),
+// and it sorts the result list for reproducible
+// results.
+func normBits(x []int) []int {
+ m := make(map[int]bool)
+ for _, b := range x {
+ for m[b] {
+ m[b] = false
+ b++
+ }
+ m[b] = true
+ }
+ var z []int
+ for b, set := range m {
+ if set {
+ z = append(z, b)
+ }
+ }
+ sort.Ints(z)
+ return z
+}
+
+func TestNormBits(t *testing.T) {
+ var tests = []struct {
+ x, want []int
+ }{
+ {nil, nil},
+ {[]int{}, []int{}},
+ {[]int{0}, []int{0}},
+ {[]int{0, 0}, []int{1}},
+ {[]int{3, 1, 1}, []int{2, 3}},
+ {[]int{10, 9, 8, 7, 6, 6}, []int{11}},
+ }
+
+ for _, test := range tests {
+ got := fmt.Sprintf("%v", normBits(test.x))
+ want := fmt.Sprintf("%v", test.want)
+ if got != want {
+ t.Errorf("normBits(%v) = %s; want %s", test.x, got, want)
+ }
+
+ }
+}
+
+// roundBits returns the Float value rounded to prec bits
+// according to mode from the bit set x.
+func roundBits(x []int, prec uint, mode RoundingMode) *Float {
+ x = normBits(x)
+
+ // determine range
+ var min, max int
+ for i, b := range x {
+ if i == 0 || b < min {
+ min = b
+ }
+ if i == 0 || b > max {
+ max = b
+ }
+ }
+ prec0 := uint(max + 1 - min)
+ if prec >= prec0 {
+ return fromBits(x...)
+ }
+ // prec < prec0
+
+ // determine bit 0, rounding, and sticky bit, and result bits z
+ var bit0, rbit, sbit uint
+ var z []int
+ r := max - int(prec)
+ for _, b := range x {
+ switch {
+ case b == r:
+ rbit = 1
+ case b < r:
+ sbit = 1
+ default:
+ // b > r
+ if b == r+1 {
+ bit0 = 1
+ }
+ z = append(z, b)
+ }
+ }
+
+ // round
+ f := fromBits(z...) // rounded to zero
+ if mode == ToNearestAway {
+ panic("not yet implemented")
+ }
+ if mode == ToNearestEven && rbit == 1 && (sbit == 1 || sbit == 0 && bit0 != 0) || mode == AwayFromZero {
+ // round away from zero
+ f.Round(f, prec, ToZero) // extend precision // TODO(gri) better approach?
+ f.Add(f, fromBits(int(r)+1))
+ }
+ return f
+}
+
+// fromBits returns the *Float z of the smallest possible precision
+// such that z = sum(2**bits[i]), with i = range bits.
+// If multiple bits[i] are equal, they are added: fromBits(0, 1, 0)
+// == 2**1 + 2**0 + 2**0 = 4.
+func fromBits(bits ...int) *Float {
+ // handle 0
+ if len(bits) == 0 {
+ return new(Float)
+ // z.prec = ?
+ }
+ // len(bits) > 0
+
+ // determine lsb exponent
+ var min int
+ for i, b := range bits {
+ if i == 0 || b < min {
+ min = b
+ }
+ }
+
+ // create bit pattern
+ x := NewInt(0)
+ for _, b := range bits {
+ badj := b - min
+ // propagate carry if necessary
+ for x.Bit(badj) != 0 {
+ x.SetBit(x, badj, 0)
+ badj++
+ }
+ x.SetBit(x, badj, 1)
+ }
+
+ // create corresponding float
+ z := new(Float).SetInt(x) // normalized
+ z.setExp(int64(z.exp) + int64(min))
+ return z
+}
+
+func TestFromBits(t *testing.T) {
+ var tests = []struct {
+ bits []int
+ want string
+ }{
+ // all different bit numbers
+ {nil, "0.0p0"},
+ {[]int{0}, "0.8000000000000000p1"},
+ {[]int{1}, "0.8000000000000000p2"},
+ {[]int{-1}, "0.8000000000000000p0"},
+ {[]int{63}, "0.8000000000000000p64"},
+ {[]int{33, -30}, "0.8000000000000001p34"},
+ {[]int{255, 0}, "0.8000000000000000000000000000000000000000000000000000000000000001p256"},
+
+ // multiple equal bit numbers
+ {[]int{0, 0}, "0.8000000000000000p2"},
+ {[]int{0, 0, 0, 0}, "0.8000000000000000p3"},
+ {[]int{0, 1, 0}, "0.8000000000000000p3"},
+ {append([]int{2, 1, 0} /* 7 */, []int{3, 1} /* 10 */ ...), "0.8800000000000000p5" /* 17 */},
+ }
+
+ for _, test := range tests {
+ f := fromBits(test.bits...)
+ if got := f.PString(); got != test.want {
+ t.Errorf("setBits(%v) = %s; want %s", test.bits, got, test.want)
+ }
+ }
+}
+
+var floatSetFloat64StringTests = []struct {
+ s string
+ x float64
+}{
+ {"0", 0},
+ {"-0", -0},
+ {"+0", 0},
+ {"1", 1},
+ {"-1", -1},
+ {"+1", 1},
+ {"1.234", 1.234},
+ {"-1.234", -1.234},
+ {"+1.234", 1.234},
+ {".1", 0.1},
+ {"1.", 1},
+ {"+1.", 1},
+
+ {"0e100", 0},
+ {"-0e+100", 0},
+ {"+0e-100", 0},
+ {"0E100", 0},
+ {"-0E+100", 0},
+ {"+0E-100", 0},
+ {"0p100", 0},
+ {"-0p+100", 0},
+ {"+0p-100", 0},
+
+ {"1.e10", 1e10},
+ {"1e+10", 1e10},
+ {"+1e-10", 1e-10},
+ {"1E10", 1e10},
+ {"1.E+10", 1e10},
+ {"+1E-10", 1e-10},
+ {"1p10", 1 << 10},
+ {"1p+10", 1 << 10},
+ {"+1.p-10", 1.0 / (1 << 10)},
+
+ {"-687436.79457e-245", -687436.79457e-245},
+ {"-687436.79457E245", -687436.79457e245},
+ {"1024.p-12", 0.25},
+ {"-1.p10", -1024},
+ {"0.25p2", 1},
+
+ {".0000000000000000000000000000000000000001", 1e-40},
+ {"+10000000000000000000000000000000000000000e-0", 1e40},
+}
+
+func TestFloatSetFloat64String(t *testing.T) {
+ for _, test := range floatSetFloat64StringTests {
+ var x Float
+ x.prec = 53 // TODO(gri) find better solution
+ _, ok := x.SetString(test.s)
+ if !ok {
+ t.Errorf("%s: parse error", test.s)
+ continue
+ }
+ f, _ := x.Float64()
+ want := new(Float).SetFloat64(test.x)
+ if x.Cmp(want) != 0 {
+ t.Errorf("%s: got %s (%v); want %v", test.s, &x, f, test.x)
+ }
+ }
+}
+
+func TestFloatPString(t *testing.T) {
+ var tests = []struct {
+ x Float
+ want string
+ }{
+ {Float{}, "0.0p0"},
+ {Float{neg: true}, "-0.0p0"},
+ {Float{mant: nat{0x87654321}}, "0.87654321p0"},
+ {Float{mant: nat{0x87654321}, exp: -10}, "0.87654321p-10"},
+ }
+ for _, test := range tests {
+ if got := test.x.PString(); got != test.want {
+ t.Errorf("%v: got %s; want %s", test.x, got, test.want)
+ }
+ }
+}
diff --git a/src/math/big/int.go b/src/math/big/int.go
index 98f0b24..e574cd0 100644
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -463,18 +463,10 @@
//
func (z *Int) scan(r io.RuneScanner, base int) (*Int, int, error) {
// determine sign
- ch, _, err := r.ReadRune()
+ neg, err := scanSign(r)
if err != nil {
return nil, 0, err
}
- neg := false
- switch ch {
- case '-':
- neg = true
- case '+': // nothing to do
- default:
- r.UnreadRune()
- }
// determine mantissa
z.abs, base, _, err = z.abs.scan(r, base)
@@ -486,6 +478,22 @@
return z, base, nil
}
+func scanSign(r io.RuneScanner) (neg bool, err error) {
+ var ch rune
+ if ch, _, err = r.ReadRune(); err != nil {
+ return false, err
+ }
+ switch ch {
+ case '-':
+ neg = true
+ case '+':
+ // nothing to do
+ default:
+ r.UnreadRune()
+ }
+ return
+}
+
// Scan is a support routine for fmt.Scanner; it sets z to the value of
// the scanned number. It accepts the formats 'b' (binary), 'o' (octal),
// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal).
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index c26734f..6ef376c 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -5,18 +5,22 @@
// Package big implements multi-precision arithmetic (big numbers).
// The following numeric types are supported:
//
-// - Int signed integers
-// - Rat rational numbers
+// Int signed integers
+// Rat rational numbers
+// Float floating-point numbers
//
// Methods are typically of the form:
//
-// func (z *Int) Op(x, y *Int) *Int (similar for *Rat)
+// func (z *T) Unary(x *T) *T // z = op x
+// func (z *T) Binary(x, y *T) *T // z = x op y
+// func (x *T) M() T1 // v = x.M()
//
-// and implement operations z = x Op y with the result as receiver; if it
-// is one of the operands it may be overwritten (and its memory reused).
+// with T one of Int, Rat, or Float. For unary and binary operations, the
+// result is the receiver (usually named z in that case); if it is one of
+// the operands x or y it may be overwritten (and its memory reused).
// To enable chaining of operations, the result is also returned. Methods
-// returning a result other than *Int or *Rat take one of the operands as
-// the receiver.
+// returning a result other than *Int, *Rat, or *Float take an operand as
+// the receiver (usually named x in that case).
//
package big
@@ -1198,6 +1202,28 @@
return uint(x[j] >> (i % _W) & 1)
}
+// sticky returns 1 if there's a 1 bit within the
+// i least significant bits, otherwise it returns 0.
+func (x nat) sticky(i uint) uint {
+ j := i / _W
+ if j >= uint(len(x)) {
+ if len(x) == 0 {
+ return 0
+ }
+ return 1
+ }
+ // 0 <= j < len(x)
+ for _, x := range x[:j] {
+ if x != 0 {
+ return 1
+ }
+ }
+ if x[j]<<(_W-i%_W) != 0 {
+ return 1
+ }
+ return 0
+}
+
func (z nat) and(x, y nat) nat {
m := len(x)
n := len(y)
diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go
index d24ce60..42d8619 100644
--- a/src/math/big/nat_test.go
+++ b/src/math/big/nat_test.go
@@ -894,3 +894,43 @@
}
}
}
+
+var stickyTests = []struct {
+ x string
+ i uint
+ want uint
+}{
+ {"0", 0, 0},
+ {"0", 1, 0},
+ {"0", 1000, 0},
+
+ {"0x1", 0, 0},
+ {"0x1", 1, 1},
+
+ {"0x1350", 0, 0},
+ {"0x1350", 4, 0},
+ {"0x1350", 5, 1},
+
+ {"0x8000000000000000", 63, 0},
+ {"0x8000000000000000", 64, 1},
+
+ {"0x1" + strings.Repeat("0", 100), 400, 0},
+ {"0x1" + strings.Repeat("0", 100), 401, 1},
+}
+
+func TestSticky(t *testing.T) {
+ for i, test := range stickyTests {
+ x := natFromString(test.x)
+ if got := x.sticky(test.i); got != test.want {
+ t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i, got, test.want)
+ }
+ if test.want == 1 {
+ // all subsequent i's should also return 1
+ for d := uint(1); d <= 3; d++ {
+ if got := x.sticky(test.i + d); got != 1 {
+ t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i+d, got, 1)
+ }
+ }
+ }
+ }
+}
diff --git a/src/math/big/rat.go b/src/math/big/rat.go
index e21b4a9..dec3100 100644
--- a/src/math/big/rat.go
+++ b/src/math/big/rat.go
@@ -326,14 +326,14 @@
// SetInt sets z to x (by making a copy of x) and returns z.
func (z *Rat) SetInt(x *Int) *Rat {
z.a.Set(x)
- z.b.abs = z.b.abs.make(0)
+ z.b.abs = z.b.abs[:0]
return z
}
// SetInt64 sets z to x and returns z.
func (z *Rat) SetInt64(x int64) *Rat {
z.a.SetInt64(x)
- z.b.abs = z.b.abs.make(0)
+ z.b.abs = z.b.abs[:0]
return z
}
@@ -372,7 +372,7 @@
}
b := z.a.abs
if b.cmp(natOne) == 0 {
- b = b.make(0) // normalize denominator
+ b = b[:0] // normalize denominator
}
z.a.abs, z.b.abs = a, b // sign doesn't change
return z
@@ -417,12 +417,12 @@
case len(z.a.abs) == 0:
// z == 0 - normalize sign and denominator
z.a.neg = false
- z.b.abs = z.b.abs.make(0)
+ z.b.abs = z.b.abs[:0]
case len(z.b.abs) == 0:
// z is normalized int - nothing to do
case z.b.abs.cmp(natOne) == 0:
// z is int - normalize denominator
- z.b.abs = z.b.abs.make(0)
+ z.b.abs = z.b.abs[:0]
default:
neg := z.a.neg
z.a.neg = false
@@ -432,7 +432,7 @@
z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
if z.b.abs.cmp(natOne) == 0 {
// z is int - normalize denominator
- z.b.abs = z.b.abs.make(0)
+ z.b.abs = z.b.abs[:0]
}
}
z.a.neg = neg
@@ -561,32 +561,26 @@
}
// parse floating-point number
-
- // parse sign
- var neg bool
- switch s[0] {
- case '-':
- neg = true
- fallthrough
- case '+':
- s = s[1:]
- }
-
- // parse exponent, if any
- var exp int64
- if sep := strings.IndexAny(s, "eE"); sep >= 0 {
- var err error
- if exp, err = strconv.ParseInt(s[sep+1:], 10, 64); err != nil {
- return nil, false
- }
- s = s[:sep]
- }
-
- // parse mantissa
- var err error
- var ecorr int // exponent correction, valid if ecorr <= 0
r := strings.NewReader(s)
- if z.a.abs, _, ecorr, err = z.a.abs.scan(r, 1); err != nil {
+
+ // sign
+ neg, err := scanSign(r)
+ if err != nil {
+ return nil, false
+ }
+
+ // mantissa
+ var ecorr int
+ z.a.abs, _, ecorr, err = z.a.abs.scan(r, 1)
+ if err != nil {
+ return nil, false
+ }
+
+ // exponent
+ var exp int64
+ var ebase int
+ exp, ebase, err = scanExponent(r)
+ if ebase == 2 || err != nil {
return nil, false
}
@@ -600,7 +594,7 @@
exp += int64(ecorr)
}
- // compute exponent factor
+ // compute exponent power
expabs := exp
if expabs < 0 {
expabs = -expabs
@@ -621,6 +615,64 @@
return z, true
}
+func scanExponent(r io.RuneScanner) (exp int64, base int, err error) {
+ base = 10
+
+ var ch rune
+ if ch, _, err = r.ReadRune(); err != nil {
+ if err == io.EOF {
+ err = nil // no exponent; same as e0
+ }
+ return
+ }
+
+ switch ch {
+ case 'e', 'E':
+ // ok
+ case 'p':
+ base = 2
+ default:
+ r.UnreadRune()
+ return // no exponent; same as e0
+ }
+
+ var neg bool
+ if neg, err = scanSign(r); err != nil {
+ return
+ }
+
+ var digits []byte
+ if neg {
+ digits = append(digits, '-')
+ }
+
+ // no need to use nat.scan for exponent digits
+ // since we only care about int64 values - the
+ // from-scratch scan is easy enough and faster
+ for i := 0; ; i++ {
+ if ch, _, err = r.ReadRune(); err != nil {
+ if err != io.EOF || i == 0 {
+ return
+ }
+ err = nil
+ break // i > 0
+ }
+ if ch < '0' || '9' < ch {
+ if i == 0 {
+ r.UnreadRune()
+ err = fmt.Errorf("invalid exponent (missing digits)")
+ return
+ }
+ break // i > 0
+ }
+ digits = append(digits, byte(ch))
+ }
+ // i > 0 => we have at least one digit
+
+ exp, err = strconv.ParseInt(string(digits), 10, 64)
+ return
+}
+
// String returns a string representation of x in the form "a/b" (even if b == 1).
func (x *Rat) String() string {
s := "/1"