blob: 1678e8f9f1d1a8daa26de042d0e0a233979333e4 [file] [log] [blame]
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Software IEEE754 64-bit floating point.
// Only referred to (and thus linked in) by arm port
// and by tests in this directory.
package runtime
const (
mantbits64 uint = 52
expbits64 uint = 11
bias64 = -1<<(expbits64-1) + 1
nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1
inf64 uint64 = (1<<expbits64 - 1) << mantbits64
neg64 uint64 = 1 << (expbits64 + mantbits64)
mantbits32 uint = 23
expbits32 uint = 8
bias32 = -1<<(expbits32-1) + 1
nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1
inf32 uint32 = (1<<expbits32 - 1) << mantbits32
neg32 uint32 = 1 << (expbits32 + mantbits32)
)
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
sign = f & (1 << (mantbits64 + expbits64))
mant = f & (1<<mantbits64 - 1)
exp = int(f>>mantbits64) & (1<<expbits64 - 1)
switch exp {
case 1<<expbits64 - 1:
if mant != 0 {
nan = true
return
}
inf = true
return
case 0:
// denormalized
if mant != 0 {
exp += bias64 + 1
for mant < 1<<mantbits64 {
mant <<= 1
exp--
}
}
default:
// add implicit top bit
mant |= 1 << mantbits64
exp += bias64
}
return
}
func funpack32(f uint32) (sign, mant uint32, exp int, inf, nan bool) {
sign = f & (1 << (mantbits32 + expbits32))
mant = f & (1<<mantbits32 - 1)
exp = int(f>>mantbits32) & (1<<expbits32 - 1)
switch exp {
case 1<<expbits32 - 1:
if mant != 0 {
nan = true
return
}
inf = true
return
case 0:
// denormalized
if mant != 0 {
exp += bias32 + 1
for mant < 1<<mantbits32 {
mant <<= 1
exp--
}
}
default:
// add implicit top bit
mant |= 1 << mantbits32
exp += bias32
}
return
}
func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 {
mant0, exp0, trunc0 := mant, exp, trunc
if mant == 0 {
return sign
}
for mant < 1<<mantbits64 {
mant <<= 1
exp--
}
for mant >= 4<<mantbits64 {
trunc |= mant & 1
mant >>= 1
exp++
}
if mant >= 2<<mantbits64 {
if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
mant++
if mant >= 4<<mantbits64 {
mant >>= 1
exp++
}
}
mant >>= 1
exp++
}
if exp >= 1<<expbits64-1+bias64 {
return sign ^ inf64
}
if exp < bias64+1 {
if exp < bias64-int(mantbits64) {
return sign | 0
}
// repeat expecting denormal
mant, exp, trunc = mant0, exp0, trunc0
for exp < bias64 {
trunc |= mant & 1
mant >>= 1
exp++
}
if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
mant++
}
mant >>= 1
exp++
if mant < 1<<mantbits64 {
return sign | mant
}
}
return sign | uint64(exp-bias64)<<mantbits64 | mant&(1<<mantbits64-1)
}
func fpack32(sign, mant uint32, exp int, trunc uint32) uint32 {
mant0, exp0, trunc0 := mant, exp, trunc
if mant == 0 {
return sign
}
for mant < 1<<mantbits32 {
mant <<= 1
exp--
}
for mant >= 4<<mantbits32 {
trunc |= mant & 1
mant >>= 1
exp++
}
if mant >= 2<<mantbits32 {
if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
mant++
if mant >= 4<<mantbits32 {
mant >>= 1
exp++
}
}
mant >>= 1
exp++
}
if exp >= 1<<expbits32-1+bias32 {
return sign ^ inf32
}
if exp < bias32+1 {
if exp < bias32-int(mantbits32) {
return sign | 0
}
// repeat expecting denormal
mant, exp, trunc = mant0, exp0, trunc0
for exp < bias32 {
trunc |= mant & 1
mant >>= 1
exp++
}
if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
mant++
}
mant >>= 1
exp++
if mant < 1<<mantbits32 {
return sign | mant
}
}
return sign | uint32(exp-bias32)<<mantbits32 | mant&(1<<mantbits32-1)
}
func fadd64(f, g uint64) uint64 {
fs, fm, fe, fi, fn := funpack64(f)
gs, gm, ge, gi, gn := funpack64(g)
// Special cases.
switch {
case fn || gn: // NaN + x or x + NaN = NaN
return nan64
case fi && gi && fs != gs: // +Inf + -Inf or -Inf + +Inf = NaN
return nan64
case fi: // ±Inf + g = ±Inf
return f
case gi: // f + ±Inf = ±Inf
return g
case fm == 0 && gm == 0 && fs != 0 && gs != 0: // -0 + -0 = -0
return f
case fm == 0: // 0 + g = g but 0 + -0 = +0
if gm == 0 {
g ^= gs
}
return g
case gm == 0: // f + 0 = f
return f
}
if fe < ge || fe == ge && fm < gm {
f, g, fs, fm, fe, gs, gm, ge = g, f, gs, gm, ge, fs, fm, fe
}
shift := uint(fe - ge)
fm <<= 2
gm <<= 2
trunc := gm & (1<<shift - 1)
gm >>= shift
if fs == gs {
fm += gm
} else {
fm -= gm
if trunc != 0 {
fm--
}
}
if fm == 0 {
fs = 0
}
return fpack64(fs, fm, fe-2, trunc)
}
func fsub64(f, g uint64) uint64 {
return fadd64(f, fneg64(g))
}
func fneg64(f uint64) uint64 {
return f ^ (1 << (mantbits64 + expbits64))
}
func fmul64(f, g uint64) uint64 {
fs, fm, fe, fi, fn := funpack64(f)
gs, gm, ge, gi, gn := funpack64(g)
// Special cases.
switch {
case fn || gn: // NaN * g or f * NaN = NaN
return nan64
case fi && gi: // Inf * Inf = Inf (with sign adjusted)
return f ^ gs
case fi && gm == 0, fm == 0 && gi: // 0 * Inf = Inf * 0 = NaN
return nan64
case fm == 0: // 0 * x = 0 (with sign adjusted)
return f ^ gs
case gm == 0: // x * 0 = 0 (with sign adjusted)
return g ^ fs
}
// 53-bit * 53-bit = 107- or 108-bit
lo, hi := mullu(fm, gm)
shift := mantbits64 - 1
trunc := lo & (1<<shift - 1)
mant := hi<<(64-shift) | lo>>shift
return fpack64(fs^gs, mant, fe+ge-1, trunc)
}
func fdiv64(f, g uint64) uint64 {
fs, fm, fe, fi, fn := funpack64(f)
gs, gm, ge, gi, gn := funpack64(g)
// Special cases.
switch {
case fn || gn: // NaN / g = f / NaN = NaN
return nan64
case fi && gi: // ±Inf / ±Inf = NaN
return nan64
case !fi && !gi && fm == 0 && gm == 0: // 0 / 0 = NaN
return nan64
case fi, !gi && gm == 0: // Inf / g = f / 0 = Inf
return fs ^ gs ^ inf64
case gi, fm == 0: // f / Inf = 0 / g = Inf
return fs ^ gs ^ 0
}
_, _, _, _ = fi, fn, gi, gn
// 53-bit<<54 / 53-bit = 53- or 54-bit.
shift := mantbits64 + 2
q, r := divlu(fm>>(64-shift), fm<<shift, gm)
return fpack64(fs^gs, q, fe-ge-2, r)
}
func f64to32(f uint64) uint32 {
fs, fm, fe, fi, fn := funpack64(f)
if fn {
return nan32
}
fs32 := uint32(fs >> 32)
if fi {
return fs32 ^ inf32
}
const d = mantbits64 - mantbits32 - 1
return fpack32(fs32, uint32(fm>>d), fe-1, uint32(fm&(1<<d-1)))
}
func f32to64(f uint32) uint64 {
const d = mantbits64 - mantbits32
fs, fm, fe, fi, fn := funpack32(f)
if fn {
return nan64
}
fs64 := uint64(fs) << 32
if fi {
return fs64 ^ inf64
}
return fpack64(fs64, uint64(fm)<<d, fe, 0)
}
func fcmp64(f, g uint64) (cmp int32, isnan bool) {
fs, fm, _, fi, fn := funpack64(f)
gs, gm, _, gi, gn := funpack64(g)
switch {
case fn, gn: // flag NaN
return 0, true
case !fi && !gi && fm == 0 && gm == 0: // ±0 == ±0
return 0, false
case fs > gs: // f < 0, g > 0
return -1, false
case fs < gs: // f > 0, g < 0
return +1, false
// Same sign, not NaN.
// Can compare encodings directly now.
// Reverse for sign.
case fs == 0 && f < g, fs != 0 && f > g:
return -1, false
case fs == 0 && f > g, fs != 0 && f < g:
return +1, false
}
// f == g
return 0, false
}
func f64toint(f uint64) (val int64, ok bool) {
fs, fm, fe, fi, fn := funpack64(f)
switch {
case fi, fn: // NaN
return 0, false
case fe < -1: // f < 0.5
return 0, false
case fe > 63: // f >= 2^63
if fs != 0 && fm == 0 { // f == -2^63
return -1 << 63, true
}
if fs != 0 {
return 0, false
}
return 0, false
}
for fe > int(mantbits64) {
fe--
fm <<= 1
}
for fe < int(mantbits64) {
fe++
fm >>= 1
}
val = int64(fm)
if fs != 0 {
val = -val
}
return val, true
}
func fintto64(val int64) (f uint64) {
fs := uint64(val) & (1 << 63)
mant := uint64(val)
if fs != 0 {
mant = -mant
}
return fpack64(fs, mant, int(mantbits64), 0)
}
// 64x64 -> 128 multiply.
// adapted from hacker's delight.
func mullu(u, v uint64) (lo, hi uint64) {
const (
s = 32
mask = 1<<s - 1
)
u0 := u & mask
u1 := u >> s
v0 := v & mask
v1 := v >> s
w0 := u0 * v0
t := u1*v0 + w0>>s
w1 := t & mask
w2 := t >> s
w1 += u0 * v1
return u * v, u1*v1 + w2 + w1>>s
}
// 128/64 -> 64 quotient, 64 remainder.
// adapted from hacker's delight
func divlu(u1, u0, v uint64) (q, r uint64) {
const b = 1 << 32
if u1 >= v {
return 1<<64 - 1, 1<<64 - 1
}
// s = nlz(v); v <<= s
s := uint(0)
for v&(1<<63) == 0 {
s++
v <<= 1
}
vn1 := v >> 32
vn0 := v & (1<<32 - 1)
un32 := u1<<s | u0>>(64-s)
un10 := u0 << s
un1 := un10 >> 32
un0 := un10 & (1<<32 - 1)
q1 := un32 / vn1
rhat := un32 - q1*vn1
again1:
if q1 >= b || q1*vn0 > b*rhat+un1 {
q1--
rhat += vn1
if rhat < b {
goto again1
}
}
un21 := un32*b + un1 - q1*v
q0 := un21 / vn1
rhat = un21 - q0*vn1
again2:
if q0 >= b || q0*vn0 > b*rhat+un0 {
q0--
rhat += vn1
if rhat < b {
goto again2
}
}
return q1*b + q0, (un21*b + un0 - q0*v) >> s
}