| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // This file implements multi-precision floating-point numbers. |
| // Like in the GNU MPFR library (https://www.mpfr.org/), operands |
| // can be of mixed precision. Unlike MPFR, the rounding mode is |
| // not specified with each operation, but with each operand. The |
| // rounding mode of the result operand determines the rounding |
| // mode of an operation. This is a from-scratch implementation. |
| |
| package big |
| |
| import ( |
| "fmt" |
| "math" |
| "math/bits" |
| ) |
| |
| const debugFloat = false // enable for debugging |
| |
| // A nonzero finite Float represents a multi-precision floating point number |
| // |
| // sign × mantissa × 2**exponent |
| // |
| // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. |
| // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). |
| // All Floats are ordered, and the ordering of two Floats x and y |
| // is defined by x.Cmp(y). |
| // |
| // Each Float value also has a precision, rounding mode, and accuracy. |
| // The precision is the maximum number of mantissa bits available to |
| // represent the value. The rounding mode specifies how a result should |
| // be rounded to fit into the mantissa bits, and accuracy describes the |
| // rounding error with respect to the exact result. |
| // |
| // Unless specified otherwise, all operations (including setters) that |
| // specify a *Float variable for the result (usually via the receiver |
| // with the exception of MantExp), round the numeric result according |
| // to the precision and rounding mode of the result variable. |
| // |
| // If the provided result precision is 0 (see below), it is set to the |
| // precision of the argument with the largest precision value before any |
| // rounding takes place, and the rounding mode remains unchanged. Thus, |
| // uninitialized Floats provided as result arguments will have their |
| // precision set to a reasonable value determined by the operands, and |
| // their mode is the zero value for RoundingMode (ToNearestEven). |
| // |
| // By setting the desired precision to 24 or 53 and using matching rounding |
| // mode (typically ToNearestEven), Float operations produce the same results |
| // as the corresponding float32 or float64 IEEE-754 arithmetic for operands |
| // that correspond to normal (i.e., not denormal) float32 or float64 numbers. |
| // Exponent underflow and overflow lead to a 0 or an Infinity for different |
| // values than IEEE-754 because Float exponents have a much larger range. |
| // |
| // The zero (uninitialized) value for a Float is ready to use and represents |
| // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. |
| // |
| // Operations always take pointer arguments (*Float) rather |
| // than Float values, and each unique Float value requires |
| // its own unique *Float pointer. To "copy" a Float value, |
| // an existing (or newly allocated) Float must be set to |
| // a new value using the Float.Set method; shallow copies |
| // of Floats are not supported and may lead to errors. |
| type Float struct { |
| prec uint32 |
| mode RoundingMode |
| acc Accuracy |
| form form |
| neg bool |
| mant nat |
| exp int32 |
| } |
| |
| // An ErrNaN panic is raised by a Float operation that would lead to |
| // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. |
| type ErrNaN struct { |
| msg string |
| } |
| |
| func (err ErrNaN) Error() string { |
| return err.msg |
| } |
| |
| // NewFloat allocates and returns a new Float set to x, |
| // with precision 53 and rounding mode ToNearestEven. |
| // NewFloat panics with ErrNaN if x is a NaN. |
| func NewFloat(x float64) *Float { |
| if math.IsNaN(x) { |
| panic(ErrNaN{"NewFloat(NaN)"}) |
| } |
| return new(Float).SetFloat64(x) |
| } |
| |
| // Exponent and precision limits. |
| const ( |
| MaxExp = math.MaxInt32 // largest supported exponent |
| MinExp = math.MinInt32 // smallest supported exponent |
| MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited |
| ) |
| |
| // Internal representation: The mantissa bits x.mant of a nonzero finite |
| // Float x are stored in a nat slice long enough to hold up to x.prec bits; |
| // the slice may (but doesn't have to) be shorter if the mantissa contains |
| // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., |
| // the msb is shifted all the way "to the left"). Thus, if the mantissa has |
| // trailing 0 bits or x.prec is not a multiple of the Word size _W, |
| // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds |
| // to the value 0.5; the exponent x.exp shifts the binary point as needed. |
| // |
| // A zero or non-finite Float x ignores x.mant and x.exp. |
| // |
| // x form neg mant exp |
| // ---------------------------------------------------------- |
| // ±0 zero sign - - |
| // 0 < |x| < +Inf finite sign mantissa exponent |
| // ±Inf inf sign - - |
| |
| // A form value describes the internal representation. |
| type form byte |
| |
| // The form value order is relevant - do not change! |
| const ( |
| zero form = iota |
| finite |
| inf |
| ) |
| |
| // RoundingMode determines how a Float value is rounded to the |
| // desired precision. Rounding may change the Float value; the |
| // rounding error is described by the Float's Accuracy. |
| type RoundingMode byte |
| |
| // These constants define supported rounding modes. |
| const ( |
| ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven |
| ToNearestAway // == IEEE 754-2008 roundTiesToAway |
| ToZero // == IEEE 754-2008 roundTowardZero |
| AwayFromZero // no IEEE 754-2008 equivalent |
| ToNegativeInf // == IEEE 754-2008 roundTowardNegative |
| ToPositiveInf // == IEEE 754-2008 roundTowardPositive |
| ) |
| |
| //go:generate stringer -type=RoundingMode |
| |
| // Accuracy describes the rounding error produced by the most recent |
| // operation that generated a Float value, relative to the exact value. |
| type Accuracy int8 |
| |
| // Constants describing the Accuracy of a Float. |
| const ( |
| Below Accuracy = -1 |
| Exact Accuracy = 0 |
| Above Accuracy = +1 |
| ) |
| |
| //go:generate stringer -type=Accuracy |
| |
| // SetPrec sets z's precision to prec and returns the (possibly) rounded |
| // value of z. Rounding occurs according to z's rounding mode if the mantissa |
| // cannot be represented in prec bits without loss of precision. |
| // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. |
| // If prec > MaxPrec, it is set to MaxPrec. |
| func (z *Float) SetPrec(prec uint) *Float { |
| z.acc = Exact // optimistically assume no rounding is needed |
| |
| // special case |
| if prec == 0 { |
| z.prec = 0 |
| if z.form == finite { |
| // truncate z to 0 |
| z.acc = makeAcc(z.neg) |
| z.form = zero |
| } |
| return z |
| } |
| |
| // general case |
| if prec > MaxPrec { |
| prec = MaxPrec |
| } |
| old := z.prec |
| z.prec = uint32(prec) |
| if z.prec < old { |
| z.round(0) |
| } |
| return z |
| } |
| |
| func makeAcc(above bool) Accuracy { |
| if above { |
| return Above |
| } |
| return Below |
| } |
| |
| // SetMode sets z's rounding mode to mode and returns an exact z. |
| // z remains unchanged otherwise. |
| // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. |
| func (z *Float) SetMode(mode RoundingMode) *Float { |
| z.mode = mode |
| z.acc = Exact |
| return z |
| } |
| |
| // Prec returns the mantissa precision of x in bits. |
| // The result may be 0 for |x| == 0 and |x| == Inf. |
| func (x *Float) Prec() uint { |
| return uint(x.prec) |
| } |
| |
| // MinPrec returns the minimum precision required to represent x exactly |
| // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). |
| // The result is 0 for |x| == 0 and |x| == Inf. |
| func (x *Float) MinPrec() uint { |
| if x.form != finite { |
| return 0 |
| } |
| return uint(len(x.mant))*_W - x.mant.trailingZeroBits() |
| } |
| |
| // Mode returns the rounding mode of x. |
| func (x *Float) Mode() RoundingMode { |
| return x.mode |
| } |
| |
| // Acc returns the accuracy of x produced by the most recent |
| // operation, unless explicitly documented otherwise by that |
| // operation. |
| func (x *Float) Acc() Accuracy { |
| return x.acc |
| } |
| |
| // Sign returns: |
| // |
| // -1 if x < 0 |
| // 0 if x is ±0 |
| // +1 if x > 0 |
| // |
| func (x *Float) Sign() int { |
| if debugFloat { |
| x.validate() |
| } |
| if x.form == zero { |
| return 0 |
| } |
| if x.neg { |
| return -1 |
| } |
| return 1 |
| } |
| |
| // MantExp breaks x into its mantissa and exponent components |
| // and returns the exponent. If a non-nil mant argument is |
| // provided its value is set to the mantissa of x, with the |
| // same precision and rounding mode as x. The components |
| // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. |
| // Calling MantExp with a nil argument is an efficient way to |
| // get the exponent of the receiver. |
| // |
| // Special cases are: |
| // |
| // ( ±0).MantExp(mant) = 0, with mant set to ±0 |
| // (±Inf).MantExp(mant) = 0, with mant set to ±Inf |
| // |
| // x and mant may be the same in which case x is set to its |
| // mantissa value. |
| func (x *Float) MantExp(mant *Float) (exp int) { |
| if debugFloat { |
| x.validate() |
| } |
| if x.form == finite { |
| exp = int(x.exp) |
| } |
| if mant != nil { |
| mant.Copy(x) |
| if mant.form == finite { |
| mant.exp = 0 |
| } |
| } |
| return |
| } |
| |
| func (z *Float) setExpAndRound(exp int64, sbit uint) { |
| if exp < MinExp { |
| // underflow |
| z.acc = makeAcc(z.neg) |
| z.form = zero |
| return |
| } |
| |
| if exp > MaxExp { |
| // overflow |
| z.acc = makeAcc(!z.neg) |
| z.form = inf |
| return |
| } |
| |
| z.form = finite |
| z.exp = int32(exp) |
| z.round(sbit) |
| } |
| |
| // SetMantExp sets z to mant × 2**exp and returns z. |
| // The result z has the same precision and rounding mode |
| // as mant. SetMantExp is an inverse of MantExp but does |
| // not require 0.5 <= |mant| < 1.0. Specifically: |
| // |
| // mant := new(Float) |
| // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 |
| // |
| // Special cases are: |
| // |
| // z.SetMantExp( ±0, exp) = ±0 |
| // z.SetMantExp(±Inf, exp) = ±Inf |
| // |
| // z and mant may be the same in which case z's exponent |
| // is set to exp. |
| func (z *Float) SetMantExp(mant *Float, exp int) *Float { |
| if debugFloat { |
| z.validate() |
| mant.validate() |
| } |
| z.Copy(mant) |
| if z.form != finite { |
| return z |
| } |
| z.setExpAndRound(int64(z.exp)+int64(exp), 0) |
| return z |
| } |
| |
| // Signbit reports whether x is negative or negative zero. |
| func (x *Float) Signbit() bool { |
| return x.neg |
| } |
| |
| // IsInf reports whether x is +Inf or -Inf. |
| func (x *Float) IsInf() bool { |
| return x.form == inf |
| } |
| |
| // IsInt reports whether x is an integer. |
| // ±Inf values are not integers. |
| func (x *Float) IsInt() bool { |
| if debugFloat { |
| x.validate() |
| } |
| // special cases |
| if x.form != finite { |
| return x.form == zero |
| } |
| // x.form == finite |
| if x.exp <= 0 { |
| return false |
| } |
| // x.exp > 0 |
| return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa |
| } |
| |
| // debugging support |
| func (x *Float) validate() { |
| if !debugFloat { |
| // avoid performance bugs |
| panic("validate called but debugFloat is not set") |
| } |
| if x.form != finite { |
| return |
| } |
| m := len(x.mant) |
| if m == 0 { |
| panic("nonzero finite number with empty mantissa") |
| } |
| const msb = 1 << (_W - 1) |
| if x.mant[m-1]&msb == 0 { |
| panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) |
| } |
| if x.prec == 0 { |
| panic("zero precision finite number") |
| } |
| } |
| |
| // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. |
| // sbit must be 0 or 1 and summarizes any "sticky bit" information one might |
| // have before calling round. z's mantissa must be normalized (with the msb set) |
| // or empty. |
| // |
| // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the |
| // sign of z. For correct rounding, the sign of z must be set correctly before |
| // calling round. |
| func (z *Float) round(sbit uint) { |
| if debugFloat { |
| z.validate() |
| } |
| |
| z.acc = Exact |
| if z.form != finite { |
| // ±0 or ±Inf => nothing left to do |
| return |
| } |
| // z.form == finite && len(z.mant) > 0 |
| // m > 0 implies z.prec > 0 (checked by validate) |
| |
| m := uint32(len(z.mant)) // present mantissa length in words |
| bits := m * _W // present mantissa bits; bits > 0 |
| if bits <= z.prec { |
| // mantissa fits => nothing to do |
| return |
| } |
| // bits > z.prec |
| |
| // Rounding is based on two bits: the rounding bit (rbit) and the |
| // sticky bit (sbit). The rbit is the bit immediately before the |
| // z.prec leading mantissa bits (the "0.5"). The sbit is set if any |
| // of the bits before the rbit are set (the "0.25", "0.125", etc.): |
| // |
| // rbit sbit => "fractional part" |
| // |
| // 0 0 == 0 |
| // 0 1 > 0 , < 0.5 |
| // 1 0 == 0.5 |
| // 1 1 > 0.5, < 1.0 |
| |
| // bits > z.prec: mantissa too large => round |
| r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 |
| rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit |
| // The sticky bit is only needed for rounding ToNearestEven |
| // or when the rounding bit is zero. Avoid computation otherwise. |
| if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { |
| sbit = z.mant.sticky(r) |
| } |
| sbit &= 1 // be safe and ensure it's a single bit |
| |
| // cut off extra words |
| n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision |
| if m > n { |
| copy(z.mant, z.mant[m-n:]) // move n last words to front |
| z.mant = z.mant[:n] |
| } |
| |
| // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word |
| ntz := n*_W - z.prec // 0 <= ntz < _W |
| lsb := Word(1) << ntz |
| |
| // round if result is inexact |
| if rbit|sbit != 0 { |
| // Make rounding decision: The result mantissa is truncated ("rounded down") |
| // by default. Decide if we need to increment, or "round up", the (unsigned) |
| // mantissa. |
| inc := false |
| switch z.mode { |
| case ToNegativeInf: |
| inc = z.neg |
| case ToZero: |
| // nothing to do |
| case ToNearestEven: |
| inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) |
| case ToNearestAway: |
| inc = rbit != 0 |
| case AwayFromZero: |
| inc = true |
| case ToPositiveInf: |
| inc = !z.neg |
| default: |
| panic("unreachable") |
| } |
| |
| // A positive result (!z.neg) is Above the exact result if we increment, |
| // and it's Below if we truncate (Exact results require no rounding). |
| // For a negative result (z.neg) it is exactly the opposite. |
| z.acc = makeAcc(inc != z.neg) |
| |
| if inc { |
| // add 1 to mantissa |
| if addVW(z.mant, z.mant, lsb) != 0 { |
| // mantissa overflow => adjust exponent |
| if z.exp >= MaxExp { |
| // exponent overflow |
| z.form = inf |
| return |
| } |
| z.exp++ |
| // adjust mantissa: divide by 2 to compensate for exponent adjustment |
| shrVU(z.mant, z.mant, 1) |
| // set msb == carry == 1 from the mantissa overflow above |
| const msb = 1 << (_W - 1) |
| z.mant[n-1] |= msb |
| } |
| } |
| } |
| |
| // zero out trailing bits in least-significant word |
| z.mant[0] &^= lsb - 1 |
| |
| if debugFloat { |
| z.validate() |
| } |
| } |
| |
| func (z *Float) setBits64(neg bool, x uint64) *Float { |
| if z.prec == 0 { |
| z.prec = 64 |
| } |
| z.acc = Exact |
| z.neg = neg |
| if x == 0 { |
| z.form = zero |
| return z |
| } |
| // x != 0 |
| z.form = finite |
| s := bits.LeadingZeros64(x) |
| z.mant = z.mant.setUint64(x << uint(s)) |
| z.exp = int32(64 - s) // always fits |
| if z.prec < 64 { |
| z.round(0) |
| } |
| return z |
| } |
| |
| // SetUint64 sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to 64 (and rounding will have |
| // no effect). |
| func (z *Float) SetUint64(x uint64) *Float { |
| return z.setBits64(false, x) |
| } |
| |
| // SetInt64 sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to 64 (and rounding will have |
| // no effect). |
| func (z *Float) SetInt64(x int64) *Float { |
| u := x |
| if u < 0 { |
| u = -u |
| } |
| // We cannot simply call z.SetUint64(uint64(u)) and change |
| // the sign afterwards because the sign affects rounding. |
| return z.setBits64(x < 0, uint64(u)) |
| } |
| |
| // SetFloat64 sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to 53 (and rounding will have |
| // no effect). SetFloat64 panics with ErrNaN if x is a NaN. |
| func (z *Float) SetFloat64(x float64) *Float { |
| if z.prec == 0 { |
| z.prec = 53 |
| } |
| if math.IsNaN(x) { |
| panic(ErrNaN{"Float.SetFloat64(NaN)"}) |
| } |
| z.acc = Exact |
| z.neg = math.Signbit(x) // handle -0, -Inf correctly |
| if x == 0 { |
| z.form = zero |
| return z |
| } |
| if math.IsInf(x, 0) { |
| z.form = inf |
| return z |
| } |
| // normalized x != 0 |
| z.form = finite |
| fmant, exp := math.Frexp(x) // get normalized mantissa |
| z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) |
| z.exp = int32(exp) // always fits |
| if z.prec < 53 { |
| z.round(0) |
| } |
| return z |
| } |
| |
| // fnorm normalizes mantissa m by shifting it to the left |
| // such that the msb of the most-significant word (msw) is 1. |
| // It returns the shift amount. It assumes that len(m) != 0. |
| func fnorm(m nat) int64 { |
| if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { |
| panic("msw of mantissa is 0") |
| } |
| s := nlz(m[len(m)-1]) |
| if s > 0 { |
| c := shlVU(m, m, s) |
| if debugFloat && c != 0 { |
| panic("nlz or shlVU incorrect") |
| } |
| } |
| return int64(s) |
| } |
| |
| // SetInt sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to the larger of x.BitLen() |
| // or 64 (and rounding will have no effect). |
| func (z *Float) SetInt(x *Int) *Float { |
| // TODO(gri) can be more efficient if z.prec > 0 |
| // but small compared to the size of x, or if there |
| // are many trailing 0's. |
| bits := uint32(x.BitLen()) |
| if z.prec == 0 { |
| z.prec = umax32(bits, 64) |
| } |
| z.acc = Exact |
| z.neg = x.neg |
| if len(x.abs) == 0 { |
| z.form = zero |
| return z |
| } |
| // x != 0 |
| z.mant = z.mant.set(x.abs) |
| fnorm(z.mant) |
| z.setExpAndRound(int64(bits), 0) |
| return z |
| } |
| |
| // SetRat sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to the largest of a.BitLen(), |
| // b.BitLen(), or 64; with x = a/b. |
| func (z *Float) SetRat(x *Rat) *Float { |
| if x.IsInt() { |
| return z.SetInt(x.Num()) |
| } |
| var a, b Float |
| a.SetInt(x.Num()) |
| b.SetInt(x.Denom()) |
| if z.prec == 0 { |
| z.prec = umax32(a.prec, b.prec) |
| } |
| return z.Quo(&a, &b) |
| } |
| |
| // SetInf sets z to the infinite Float -Inf if signbit is |
| // set, or +Inf if signbit is not set, and returns z. The |
| // precision of z is unchanged and the result is always |
| // Exact. |
| func (z *Float) SetInf(signbit bool) *Float { |
| z.acc = Exact |
| z.form = inf |
| z.neg = signbit |
| return z |
| } |
| |
| // Set sets z to the (possibly rounded) value of x and returns z. |
| // If z's precision is 0, it is changed to the precision of x |
| // before setting z (and rounding will have no effect). |
| // Rounding is performed according to z's precision and rounding |
| // mode; and z's accuracy reports the result error relative to the |
| // exact (not rounded) result. |
| func (z *Float) Set(x *Float) *Float { |
| if debugFloat { |
| x.validate() |
| } |
| z.acc = Exact |
| if z != x { |
| z.form = x.form |
| z.neg = x.neg |
| if x.form == finite { |
| z.exp = x.exp |
| z.mant = z.mant.set(x.mant) |
| } |
| if z.prec == 0 { |
| z.prec = x.prec |
| } else if z.prec < x.prec { |
| z.round(0) |
| } |
| } |
| return z |
| } |
| |
| // Copy sets z to x, with the same precision, rounding mode, and |
| // accuracy as x, and returns z. x is not changed even if z and |
| // x are the same. |
| func (z *Float) Copy(x *Float) *Float { |
| if debugFloat { |
| x.validate() |
| } |
| if z != x { |
| z.prec = x.prec |
| z.mode = x.mode |
| z.acc = x.acc |
| z.form = x.form |
| z.neg = x.neg |
| if z.form == finite { |
| z.mant = z.mant.set(x.mant) |
| z.exp = x.exp |
| } |
| } |
| return z |
| } |
| |
| // msb32 returns the 32 most significant bits of x. |
| func msb32(x nat) uint32 { |
| i := len(x) - 1 |
| if i < 0 { |
| return 0 |
| } |
| if debugFloat && x[i]&(1<<(_W-1)) == 0 { |
| panic("x not normalized") |
| } |
| switch _W { |
| case 32: |
| return uint32(x[i]) |
| case 64: |
| return uint32(x[i] >> 32) |
| } |
| panic("unreachable") |
| } |
| |
| // msb64 returns the 64 most significant bits of x. |
| func msb64(x nat) uint64 { |
| i := len(x) - 1 |
| if i < 0 { |
| return 0 |
| } |
| if debugFloat && x[i]&(1<<(_W-1)) == 0 { |
| panic("x not normalized") |
| } |
| switch _W { |
| case 32: |
| v := uint64(x[i]) << 32 |
| if i > 0 { |
| v |= uint64(x[i-1]) |
| } |
| return v |
| case 64: |
| return uint64(x[i]) |
| } |
| panic("unreachable") |
| } |
| |
| // Uint64 returns the unsigned integer resulting from truncating x |
| // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact |
| // if x is an integer and Below otherwise. |
| // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) |
| // for x > math.MaxUint64. |
| func (x *Float) Uint64() (uint64, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| switch x.form { |
| case finite: |
| if x.neg { |
| return 0, Above |
| } |
| // 0 < x < +Inf |
| if x.exp <= 0 { |
| // 0 < x < 1 |
| return 0, Below |
| } |
| // 1 <= x < Inf |
| if x.exp <= 64 { |
| // u = trunc(x) fits into a uint64 |
| u := msb64(x.mant) >> (64 - uint32(x.exp)) |
| if x.MinPrec() <= 64 { |
| return u, Exact |
| } |
| return u, Below // x truncated |
| } |
| // x too large |
| return math.MaxUint64, Below |
| |
| case zero: |
| return 0, Exact |
| |
| case inf: |
| if x.neg { |
| return 0, Above |
| } |
| return math.MaxUint64, Below |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Int64 returns the integer resulting from truncating x towards zero. |
| // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is |
| // an integer, and Above (x < 0) or Below (x > 0) otherwise. |
| // The result is (math.MinInt64, Above) for x < math.MinInt64, |
| // and (math.MaxInt64, Below) for x > math.MaxInt64. |
| func (x *Float) Int64() (int64, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| switch x.form { |
| case finite: |
| // 0 < |x| < +Inf |
| acc := makeAcc(x.neg) |
| if x.exp <= 0 { |
| // 0 < |x| < 1 |
| return 0, acc |
| } |
| // x.exp > 0 |
| |
| // 1 <= |x| < +Inf |
| if x.exp <= 63 { |
| // i = trunc(x) fits into an int64 (excluding math.MinInt64) |
| i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) |
| if x.neg { |
| i = -i |
| } |
| if x.MinPrec() <= uint(x.exp) { |
| return i, Exact |
| } |
| return i, acc // x truncated |
| } |
| if x.neg { |
| // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) |
| if x.exp == 64 && x.MinPrec() == 1 { |
| acc = Exact |
| } |
| return math.MinInt64, acc |
| } |
| // x too large |
| return math.MaxInt64, Below |
| |
| case zero: |
| return 0, Exact |
| |
| case inf: |
| if x.neg { |
| return math.MinInt64, Above |
| } |
| return math.MaxInt64, Below |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Float32 returns the float32 value nearest to x. If x is too small to be |
| // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result |
| // is (0, Below) or (-0, Above), respectively, depending on the sign of x. |
| // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), |
| // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. |
| func (x *Float) Float32() (float32, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| switch x.form { |
| case finite: |
| // 0 < |x| < +Inf |
| |
| const ( |
| fbits = 32 // float size |
| mbits = 23 // mantissa size (excluding implicit msb) |
| ebits = fbits - mbits - 1 // 8 exponent size |
| bias = 1<<(ebits-1) - 1 // 127 exponent bias |
| dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) |
| emin = 1 - bias // -126 smallest unbiased exponent (normal) |
| emax = bias // 127 largest unbiased exponent (normal) |
| ) |
| |
| // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. |
| e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 |
| |
| // Compute precision p for float32 mantissa. |
| // If the exponent is too small, we have a denormal number before |
| // rounding and fewer than p mantissa bits of precision available |
| // (the exponent remains fixed but the mantissa gets shifted right). |
| p := mbits + 1 // precision of normal float |
| if e < emin { |
| // recompute precision |
| p = mbits + 1 - emin + int(e) |
| // If p == 0, the mantissa of x is shifted so much to the right |
| // that its msb falls immediately to the right of the float32 |
| // mantissa space. In other words, if the smallest denormal is |
| // considered "1.0", for p == 0, the mantissa value m is >= 0.5. |
| // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. |
| // If m == 0.5, it is rounded down to even, i.e., 0.0. |
| // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. |
| if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { |
| // underflow to ±0 |
| if x.neg { |
| var z float32 |
| return -z, Above |
| } |
| return 0.0, Below |
| } |
| // otherwise, round up |
| // We handle p == 0 explicitly because it's easy and because |
| // Float.round doesn't support rounding to 0 bits of precision. |
| if p == 0 { |
| if x.neg { |
| return -math.SmallestNonzeroFloat32, Below |
| } |
| return math.SmallestNonzeroFloat32, Above |
| } |
| } |
| // p > 0 |
| |
| // round |
| var r Float |
| r.prec = uint32(p) |
| r.Set(x) |
| e = r.exp - 1 |
| |
| // Rounding may have caused r to overflow to ±Inf |
| // (rounding never causes underflows to 0). |
| // If the exponent is too large, also overflow to ±Inf. |
| if r.form == inf || e > emax { |
| // overflow |
| if x.neg { |
| return float32(math.Inf(-1)), Below |
| } |
| return float32(math.Inf(+1)), Above |
| } |
| // e <= emax |
| |
| // Determine sign, biased exponent, and mantissa. |
| var sign, bexp, mant uint32 |
| if x.neg { |
| sign = 1 << (fbits - 1) |
| } |
| |
| // Rounding may have caused a denormal number to |
| // become normal. Check again. |
| if e < emin { |
| // denormal number: recompute precision |
| // Since rounding may have at best increased precision |
| // and we have eliminated p <= 0 early, we know p > 0. |
| // bexp == 0 for denormals |
| p = mbits + 1 - emin + int(e) |
| mant = msb32(r.mant) >> uint(fbits-p) |
| } else { |
| // normal number: emin <= e <= emax |
| bexp = uint32(e+bias) << mbits |
| mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) |
| } |
| |
| return math.Float32frombits(sign | bexp | mant), r.acc |
| |
| case zero: |
| if x.neg { |
| var z float32 |
| return -z, Exact |
| } |
| return 0.0, Exact |
| |
| case inf: |
| if x.neg { |
| return float32(math.Inf(-1)), Exact |
| } |
| return float32(math.Inf(+1)), Exact |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Float64 returns the float64 value nearest to x. If x is too small to be |
| // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result |
| // is (0, Below) or (-0, Above), respectively, depending on the sign of x. |
| // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), |
| // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. |
| func (x *Float) Float64() (float64, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| switch x.form { |
| case finite: |
| // 0 < |x| < +Inf |
| |
| const ( |
| fbits = 64 // float size |
| mbits = 52 // mantissa size (excluding implicit msb) |
| ebits = fbits - mbits - 1 // 11 exponent size |
| bias = 1<<(ebits-1) - 1 // 1023 exponent bias |
| dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) |
| emin = 1 - bias // -1022 smallest unbiased exponent (normal) |
| emax = bias // 1023 largest unbiased exponent (normal) |
| ) |
| |
| // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. |
| e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 |
| |
| // Compute precision p for float64 mantissa. |
| // If the exponent is too small, we have a denormal number before |
| // rounding and fewer than p mantissa bits of precision available |
| // (the exponent remains fixed but the mantissa gets shifted right). |
| p := mbits + 1 // precision of normal float |
| if e < emin { |
| // recompute precision |
| p = mbits + 1 - emin + int(e) |
| // If p == 0, the mantissa of x is shifted so much to the right |
| // that its msb falls immediately to the right of the float64 |
| // mantissa space. In other words, if the smallest denormal is |
| // considered "1.0", for p == 0, the mantissa value m is >= 0.5. |
| // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. |
| // If m == 0.5, it is rounded down to even, i.e., 0.0. |
| // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. |
| if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { |
| // underflow to ±0 |
| if x.neg { |
| var z float64 |
| return -z, Above |
| } |
| return 0.0, Below |
| } |
| // otherwise, round up |
| // We handle p == 0 explicitly because it's easy and because |
| // Float.round doesn't support rounding to 0 bits of precision. |
| if p == 0 { |
| if x.neg { |
| return -math.SmallestNonzeroFloat64, Below |
| } |
| return math.SmallestNonzeroFloat64, Above |
| } |
| } |
| // p > 0 |
| |
| // round |
| var r Float |
| r.prec = uint32(p) |
| r.Set(x) |
| e = r.exp - 1 |
| |
| // Rounding may have caused r to overflow to ±Inf |
| // (rounding never causes underflows to 0). |
| // If the exponent is too large, also overflow to ±Inf. |
| if r.form == inf || e > emax { |
| // overflow |
| if x.neg { |
| return math.Inf(-1), Below |
| } |
| return math.Inf(+1), Above |
| } |
| // e <= emax |
| |
| // Determine sign, biased exponent, and mantissa. |
| var sign, bexp, mant uint64 |
| if x.neg { |
| sign = 1 << (fbits - 1) |
| } |
| |
| // Rounding may have caused a denormal number to |
| // become normal. Check again. |
| if e < emin { |
| // denormal number: recompute precision |
| // Since rounding may have at best increased precision |
| // and we have eliminated p <= 0 early, we know p > 0. |
| // bexp == 0 for denormals |
| p = mbits + 1 - emin + int(e) |
| mant = msb64(r.mant) >> uint(fbits-p) |
| } else { |
| // normal number: emin <= e <= emax |
| bexp = uint64(e+bias) << mbits |
| mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) |
| } |
| |
| return math.Float64frombits(sign | bexp | mant), r.acc |
| |
| case zero: |
| if x.neg { |
| var z float64 |
| return -z, Exact |
| } |
| return 0.0, Exact |
| |
| case inf: |
| if x.neg { |
| return math.Inf(-1), Exact |
| } |
| return math.Inf(+1), Exact |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Int returns the result of truncating x towards zero; |
| // or nil if x is an infinity. |
| // The result is Exact if x.IsInt(); otherwise it is Below |
| // for x > 0, and Above for x < 0. |
| // If a non-nil *Int argument z is provided, Int stores |
| // the result in z instead of allocating a new Int. |
| func (x *Float) Int(z *Int) (*Int, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| if z == nil && x.form <= finite { |
| z = new(Int) |
| } |
| |
| switch x.form { |
| case finite: |
| // 0 < |x| < +Inf |
| acc := makeAcc(x.neg) |
| if x.exp <= 0 { |
| // 0 < |x| < 1 |
| return z.SetInt64(0), acc |
| } |
| // x.exp > 0 |
| |
| // 1 <= |x| < +Inf |
| // determine minimum required precision for x |
| allBits := uint(len(x.mant)) * _W |
| exp := uint(x.exp) |
| if x.MinPrec() <= exp { |
| acc = Exact |
| } |
| // shift mantissa as needed |
| if z == nil { |
| z = new(Int) |
| } |
| z.neg = x.neg |
| switch { |
| case exp > allBits: |
| z.abs = z.abs.shl(x.mant, exp-allBits) |
| default: |
| z.abs = z.abs.set(x.mant) |
| case exp < allBits: |
| z.abs = z.abs.shr(x.mant, allBits-exp) |
| } |
| return z, acc |
| |
| case zero: |
| return z.SetInt64(0), Exact |
| |
| case inf: |
| return nil, makeAcc(x.neg) |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Rat returns the rational number corresponding to x; |
| // or nil if x is an infinity. |
| // The result is Exact if x is not an Inf. |
| // If a non-nil *Rat argument z is provided, Rat stores |
| // the result in z instead of allocating a new Rat. |
| func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { |
| if debugFloat { |
| x.validate() |
| } |
| |
| if z == nil && x.form <= finite { |
| z = new(Rat) |
| } |
| |
| switch x.form { |
| case finite: |
| // 0 < |x| < +Inf |
| allBits := int32(len(x.mant)) * _W |
| // build up numerator and denominator |
| z.a.neg = x.neg |
| switch { |
| case x.exp > allBits: |
| z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) |
| z.b.abs = z.b.abs[:0] // == 1 (see Rat) |
| // z already in normal form |
| default: |
| z.a.abs = z.a.abs.set(x.mant) |
| z.b.abs = z.b.abs[:0] // == 1 (see Rat) |
| // z already in normal form |
| case x.exp < allBits: |
| z.a.abs = z.a.abs.set(x.mant) |
| t := z.b.abs.setUint64(1) |
| z.b.abs = t.shl(t, uint(allBits-x.exp)) |
| z.norm() |
| } |
| return z, Exact |
| |
| case zero: |
| return z.SetInt64(0), Exact |
| |
| case inf: |
| return nil, makeAcc(x.neg) |
| } |
| |
| panic("unreachable") |
| } |
| |
| // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) |
| // and returns z. |
| func (z *Float) Abs(x *Float) *Float { |
| z.Set(x) |
| z.neg = false |
| return z |
| } |
| |
| // Neg sets z to the (possibly rounded) value of x with its sign negated, |
| // and returns z. |
| func (z *Float) Neg(x *Float) *Float { |
| z.Set(x) |
| z.neg = !z.neg |
| return z |
| } |
| |
| func validateBinaryOperands(x, y *Float) { |
| if !debugFloat { |
| // avoid performance bugs |
| panic("validateBinaryOperands called but debugFloat is not set") |
| } |
| if len(x.mant) == 0 { |
| panic("empty mantissa for x") |
| } |
| if len(y.mant) == 0 { |
| panic("empty mantissa for y") |
| } |
| } |
| |
| // z = x + y, ignoring signs of x and y for the addition |
| // but using the sign of z for rounding the result. |
| // x and y must have a non-empty mantissa and valid exponent. |
| func (z *Float) uadd(x, y *Float) { |
| // Note: This implementation requires 2 shifts most of the |
| // time. It is also inefficient if exponents or precisions |
| // differ by wide margins. The following article describes |
| // an efficient (but much more complicated) implementation |
| // compatible with the internal representation used here: |
| // |
| // Vincent Lefèvre: "The Generic Multiple-Precision Floating- |
| // Point Addition With Exact Rounding (as in the MPFR Library)" |
| // http://www.vinc17.net/research/papers/rnc6.pdf |
| |
| if debugFloat { |
| validateBinaryOperands(x, y) |
| } |
| |
| // compute exponents ex, ey for mantissa with "binary point" |
| // on the right (mantissa.0) - use int64 to avoid overflow |
| ex := int64(x.exp) - int64(len(x.mant))*_W |
| ey := int64(y.exp) - int64(len(y.mant))*_W |
| |
| al := alias(z.mant, x.mant) || alias(z.mant, y.mant) |
| |
| // TODO(gri) having a combined add-and-shift primitive |
| // could make this code significantly faster |
| switch { |
| case ex < ey: |
| if al { |
| t := nat(nil).shl(y.mant, uint(ey-ex)) |
| z.mant = z.mant.add(x.mant, t) |
| } else { |
| z.mant = z.mant.shl(y.mant, uint(ey-ex)) |
| z.mant = z.mant.add(x.mant, z.mant) |
| } |
| default: |
| // ex == ey, no shift needed |
| z.mant = z.mant.add(x.mant, y.mant) |
| case ex > ey: |
| if al { |
| t := nat(nil).shl(x.mant, uint(ex-ey)) |
| z.mant = z.mant.add(t, y.mant) |
| } else { |
| z.mant = z.mant.shl(x.mant, uint(ex-ey)) |
| z.mant = z.mant.add(z.mant, y.mant) |
| } |
| ex = ey |
| } |
| // len(z.mant) > 0 |
| |
| z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) |
| } |
| |
| // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction |
| // but using the sign of z for rounding the result. |
| // x and y must have a non-empty mantissa and valid exponent. |
| func (z *Float) usub(x, y *Float) { |
| // This code is symmetric to uadd. |
| // We have not factored the common code out because |
| // eventually uadd (and usub) should be optimized |
| // by special-casing, and the code will diverge. |
| |
| if debugFloat { |
| validateBinaryOperands(x, y) |
| } |
| |
| ex := int64(x.exp) - int64(len(x.mant))*_W |
| ey := int64(y.exp) - int64(len(y.mant))*_W |
| |
| al := alias(z.mant, x.mant) || alias(z.mant, y.mant) |
| |
| switch { |
| case ex < ey: |
| if al { |
| t := nat(nil).shl(y.mant, uint(ey-ex)) |
| z.mant = t.sub(x.mant, t) |
| } else { |
| z.mant = z.mant.shl(y.mant, uint(ey-ex)) |
| z.mant = z.mant.sub(x.mant, z.mant) |
| } |
| default: |
| // ex == ey, no shift needed |
| z.mant = z.mant.sub(x.mant, y.mant) |
| case ex > ey: |
| if al { |
| t := nat(nil).shl(x.mant, uint(ex-ey)) |
| z.mant = t.sub(t, y.mant) |
| } else { |
| z.mant = z.mant.shl(x.mant, uint(ex-ey)) |
| z.mant = z.mant.sub(z.mant, y.mant) |
| } |
| ex = ey |
| } |
| |
| // operands may have canceled each other out |
| if len(z.mant) == 0 { |
| z.acc = Exact |
| z.form = zero |
| z.neg = false |
| return |
| } |
| // len(z.mant) > 0 |
| |
| z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) |
| } |
| |
| // z = x * y, ignoring signs of x and y for the multiplication |
| // but using the sign of z for rounding the result. |
| // x and y must have a non-empty mantissa and valid exponent. |
| func (z *Float) umul(x, y *Float) { |
| if debugFloat { |
| validateBinaryOperands(x, y) |
| } |
| |
| // Note: This is doing too much work if the precision |
| // of z is less than the sum of the precisions of x |
| // and y which is often the case (e.g., if all floats |
| // have the same precision). |
| // TODO(gri) Optimize this for the common case. |
| |
| e := int64(x.exp) + int64(y.exp) |
| if x == y { |
| z.mant = z.mant.sqr(x.mant) |
| } else { |
| z.mant = z.mant.mul(x.mant, y.mant) |
| } |
| z.setExpAndRound(e-fnorm(z.mant), 0) |
| } |
| |
| // z = x / y, ignoring signs of x and y for the division |
| // but using the sign of z for rounding the result. |
| // x and y must have a non-empty mantissa and valid exponent. |
| func (z *Float) uquo(x, y *Float) { |
| if debugFloat { |
| validateBinaryOperands(x, y) |
| } |
| |
| // mantissa length in words for desired result precision + 1 |
| // (at least one extra bit so we get the rounding bit after |
| // the division) |
| n := int(z.prec/_W) + 1 |
| |
| // compute adjusted x.mant such that we get enough result precision |
| xadj := x.mant |
| if d := n - len(x.mant) + len(y.mant); d > 0 { |
| // d extra words needed => add d "0 digits" to x |
| xadj = make(nat, len(x.mant)+d) |
| copy(xadj[d:], x.mant) |
| } |
| // TODO(gri): If we have too many digits (d < 0), we should be able |
| // to shorten x for faster division. But we must be extra careful |
| // with rounding in that case. |
| |
| // Compute d before division since there may be aliasing of x.mant |
| // (via xadj) or y.mant with z.mant. |
| d := len(xadj) - len(y.mant) |
| |
| // divide |
| var r nat |
| z.mant, r = z.mant.div(nil, xadj, y.mant) |
| e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W |
| |
| // The result is long enough to include (at least) the rounding bit. |
| // If there's a non-zero remainder, the corresponding fractional part |
| // (if it were computed), would have a non-zero sticky bit (if it were |
| // zero, it couldn't have a non-zero remainder). |
| var sbit uint |
| if len(r) > 0 { |
| sbit = 1 |
| } |
| |
| z.setExpAndRound(e-fnorm(z.mant), sbit) |
| } |
| |
| // ucmp returns -1, 0, or +1, depending on whether |
| // |x| < |y|, |x| == |y|, or |x| > |y|. |
| // x and y must have a non-empty mantissa and valid exponent. |
| func (x *Float) ucmp(y *Float) int { |
| if debugFloat { |
| validateBinaryOperands(x, y) |
| } |
| |
| switch { |
| case x.exp < y.exp: |
| return -1 |
| case x.exp > y.exp: |
| return +1 |
| } |
| // x.exp == y.exp |
| |
| // compare mantissas |
| i := len(x.mant) |
| j := len(y.mant) |
| for i > 0 || j > 0 { |
| var xm, ym Word |
| if i > 0 { |
| i-- |
| xm = x.mant[i] |
| } |
| if j > 0 { |
| j-- |
| ym = y.mant[j] |
| } |
| switch { |
| case xm < ym: |
| return -1 |
| case xm > ym: |
| return +1 |
| } |
| } |
| |
| return 0 |
| } |
| |
| // Handling of sign bit as defined by IEEE 754-2008, section 6.3: |
| // |
| // When neither the inputs nor result are NaN, the sign of a product or |
| // quotient is the exclusive OR of the operands’ signs; the sign of a sum, |
| // or of a difference x−y regarded as a sum x+(−y), differs from at most |
| // one of the addends’ signs; and the sign of the result of conversions, |
| // the quantize operation, the roundToIntegral operations, and the |
| // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. |
| // These rules shall apply even when operands or results are zero or infinite. |
| // |
| // When the sum of two operands with opposite signs (or the difference of |
| // two operands with like signs) is exactly zero, the sign of that sum (or |
| // difference) shall be +0 in all rounding-direction attributes except |
| // roundTowardNegative; under that attribute, the sign of an exact zero |
| // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same |
| // sign as x even when x is zero. |
| // |
| // See also: https://play.golang.org/p/RtH3UCt5IH |
| |
| // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, |
| // it is changed to the larger of x's or y's precision before the operation. |
| // Rounding is performed according to z's precision and rounding mode; and |
| // z's accuracy reports the result error relative to the exact (not rounded) |
| // result. Add panics with ErrNaN if x and y are infinities with opposite |
| // signs. The value of z is undefined in that case. |
| func (z *Float) Add(x, y *Float) *Float { |
| if debugFloat { |
| x.validate() |
| y.validate() |
| } |
| |
| if z.prec == 0 { |
| z.prec = umax32(x.prec, y.prec) |
| } |
| |
| if x.form == finite && y.form == finite { |
| // x + y (common case) |
| |
| // Below we set z.neg = x.neg, and when z aliases y this will |
| // change the y operand's sign. This is fine, because if an |
| // operand aliases the receiver it'll be overwritten, but we still |
| // want the original x.neg and y.neg values when we evaluate |
| // x.neg != y.neg, so we need to save y.neg before setting z.neg. |
| yneg := y.neg |
| |
| z.neg = x.neg |
| if x.neg == yneg { |
| // x + y == x + y |
| // (-x) + (-y) == -(x + y) |
| z.uadd(x, y) |
| } else { |
| // x + (-y) == x - y == -(y - x) |
| // (-x) + y == y - x == -(x - y) |
| if x.ucmp(y) > 0 { |
| z.usub(x, y) |
| } else { |
| z.neg = !z.neg |
| z.usub(y, x) |
| } |
| } |
| if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { |
| z.neg = true |
| } |
| return z |
| } |
| |
| if x.form == inf && y.form == inf && x.neg != y.neg { |
| // +Inf + -Inf |
| // -Inf + +Inf |
| // value of z is undefined but make sure it's valid |
| z.acc = Exact |
| z.form = zero |
| z.neg = false |
| panic(ErrNaN{"addition of infinities with opposite signs"}) |
| } |
| |
| if x.form == zero && y.form == zero { |
| // ±0 + ±0 |
| z.acc = Exact |
| z.form = zero |
| z.neg = x.neg && y.neg // -0 + -0 == -0 |
| return z |
| } |
| |
| if x.form == inf || y.form == zero { |
| // ±Inf + y |
| // x + ±0 |
| return z.Set(x) |
| } |
| |
| // ±0 + y |
| // x + ±Inf |
| return z.Set(y) |
| } |
| |
| // Sub sets z to the rounded difference x-y and returns z. |
| // Precision, rounding, and accuracy reporting are as for Add. |
| // Sub panics with ErrNaN if x and y are infinities with equal |
| // signs. The value of z is undefined in that case. |
| func (z *Float) Sub(x, y *Float) *Float { |
| if debugFloat { |
| x.validate() |
| y.validate() |
| } |
| |
| if z.prec == 0 { |
| z.prec = umax32(x.prec, y.prec) |
| } |
| |
| if x.form == finite && y.form == finite { |
| // x - y (common case) |
| yneg := y.neg |
| z.neg = x.neg |
| if x.neg != yneg { |
| // x - (-y) == x + y |
| // (-x) - y == -(x + y) |
| z.uadd(x, y) |
| } else { |
| // x - y == x - y == -(y - x) |
| // (-x) - (-y) == y - x == -(x - y) |
| if x.ucmp(y) > 0 { |
| z.usub(x, y) |
| } else { |
| z.neg = !z.neg |
| z.usub(y, x) |
| } |
| } |
| if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { |
| z.neg = true |
| } |
| return z |
| } |
| |
| if x.form == inf && y.form == inf && x.neg == y.neg { |
| // +Inf - +Inf |
| // -Inf - -Inf |
| // value of z is undefined but make sure it's valid |
| z.acc = Exact |
| z.form = zero |
| z.neg = false |
| panic(ErrNaN{"subtraction of infinities with equal signs"}) |
| } |
| |
| if x.form == zero && y.form == zero { |
| // ±0 - ±0 |
| z.acc = Exact |
| z.form = zero |
| z.neg = x.neg && !y.neg // -0 - +0 == -0 |
| return z |
| } |
| |
| if x.form == inf || y.form == zero { |
| // ±Inf - y |
| // x - ±0 |
| return z.Set(x) |
| } |
| |
| // ±0 - y |
| // x - ±Inf |
| return z.Neg(y) |
| } |
| |
| // Mul sets z to the rounded product x*y and returns z. |
| // Precision, rounding, and accuracy reporting are as for Add. |
| // Mul panics with ErrNaN if one operand is zero and the other |
| // operand an infinity. The value of z is undefined in that case. |
| func (z *Float) Mul(x, y *Float) *Float { |
| if debugFloat { |
| x.validate() |
| y.validate() |
| } |
| |
| if z.prec == 0 { |
| z.prec = umax32(x.prec, y.prec) |
| } |
| |
| z.neg = x.neg != y.neg |
| |
| if x.form == finite && y.form == finite { |
| // x * y (common case) |
| z.umul(x, y) |
| return z |
| } |
| |
| z.acc = Exact |
| if x.form == zero && y.form == inf || x.form == inf && y.form == zero { |
| // ±0 * ±Inf |
| // ±Inf * ±0 |
| // value of z is undefined but make sure it's valid |
| z.form = zero |
| z.neg = false |
| panic(ErrNaN{"multiplication of zero with infinity"}) |
| } |
| |
| if x.form == inf || y.form == inf { |
| // ±Inf * y |
| // x * ±Inf |
| z.form = inf |
| return z |
| } |
| |
| // ±0 * y |
| // x * ±0 |
| z.form = zero |
| return z |
| } |
| |
| // Quo sets z to the rounded quotient x/y and returns z. |
| // Precision, rounding, and accuracy reporting are as for Add. |
| // Quo panics with ErrNaN if both operands are zero or infinities. |
| // The value of z is undefined in that case. |
| func (z *Float) Quo(x, y *Float) *Float { |
| if debugFloat { |
| x.validate() |
| y.validate() |
| } |
| |
| if z.prec == 0 { |
| z.prec = umax32(x.prec, y.prec) |
| } |
| |
| z.neg = x.neg != y.neg |
| |
| if x.form == finite && y.form == finite { |
| // x / y (common case) |
| z.uquo(x, y) |
| return z |
| } |
| |
| z.acc = Exact |
| if x.form == zero && y.form == zero || x.form == inf && y.form == inf { |
| // ±0 / ±0 |
| // ±Inf / ±Inf |
| // value of z is undefined but make sure it's valid |
| z.form = zero |
| z.neg = false |
| panic(ErrNaN{"division of zero by zero or infinity by infinity"}) |
| } |
| |
| if x.form == zero || y.form == inf { |
| // ±0 / y |
| // x / ±Inf |
| z.form = zero |
| return z |
| } |
| |
| // x / ±0 |
| // ±Inf / y |
| z.form = inf |
| return z |
| } |
| |
| // Cmp compares x and y and returns: |
| // |
| // -1 if x < y |
| // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) |
| // +1 if x > y |
| // |
| func (x *Float) Cmp(y *Float) int { |
| if debugFloat { |
| x.validate() |
| y.validate() |
| } |
| |
| mx := x.ord() |
| my := y.ord() |
| switch { |
| case mx < my: |
| return -1 |
| case mx > my: |
| return +1 |
| } |
| // mx == my |
| |
| // only if |mx| == 1 we have to compare the mantissae |
| switch mx { |
| case -1: |
| return y.ucmp(x) |
| case +1: |
| return x.ucmp(y) |
| } |
| |
| return 0 |
| } |
| |
| // ord classifies x and returns: |
| // |
| // -2 if -Inf == x |
| // -1 if -Inf < x < 0 |
| // 0 if x == 0 (signed or unsigned) |
| // +1 if 0 < x < +Inf |
| // +2 if x == +Inf |
| // |
| func (x *Float) ord() int { |
| var m int |
| switch x.form { |
| case finite: |
| m = 1 |
| case zero: |
| return 0 |
| case inf: |
| m = 2 |
| } |
| if x.neg { |
| m = -m |
| } |
| return m |
| } |
| |
| func umax32(x, y uint32) uint32 { |
| if x > y { |
| return x |
| } |
| return y |
| } |