arm: precise float64 software floating point

Adds softfloat64 to generic runtime
(will be discarded by linker when unused)
and adds test for it.  I used the test to check
the software code against amd64 hardware
and then check the software code against
the arm and its simulation of hardware.
The latter should have been a no-op (testing
against itself) but turned up a bug in 5c causing
the vlrt.c routines to miscompile.

These changes make the cmath, math,
and strconv tests pass without any special
accommodations for arm.

R=ken2
CC=golang-dev
https://golang.org/cl/2713042
diff --git a/src/pkg/Makefile b/src/pkg/Makefile
index 34bd834..0bd56764 100644
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -149,7 +149,6 @@
 	image/jpeg\
 	net/dict\
 	rand\
-	runtime\
 	runtime/pprof\
 	syscall\
 	testing/iotest\
@@ -201,14 +200,6 @@
 NOTEST+=time         # no syscall.Kill, syscall.SIGCHLD for sleep tests
 endif
 
-ifeq ($(GOARCH),arm)
-# Tests that fail, probably 5g bugs.
-# Disable so that dashboard all.bash can catch regressions.
-NOTEST+=cmath        # software floating point (lack of) accuracy
-NOTEST+=math         # software floating point (lack of) accuracy
-NOTEST+=strconv      # software floating point (lack of) accuracy
-endif
-
 TEST=\
 	$(filter-out $(NOTEST),$(DIRS))
 
diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile
index 4c8d549..58e0e76 100644
--- a/src/pkg/runtime/Makefile
+++ b/src/pkg/runtime/Makefile
@@ -25,6 +25,7 @@
 	error.go\
 	extern.go\
 	sig.go\
+	softfloat64.go\
 	type.go\
 	version.go\
 
diff --git a/src/pkg/runtime/arm/softfloat.c b/src/pkg/runtime/arm/softfloat.c
index 396072f..a5a6ba1 100644
--- a/src/pkg/runtime/arm/softfloat.c
+++ b/src/pkg/runtime/arm/softfloat.c
@@ -2,6 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// Software floating point interpretaton of ARM 7500 FP instructions.
+// The interpretation is not bit compatible with the 7500.
+// It uses true little-endian doubles, while the 7500 used mixed-endian.
+
 #include "runtime.h"
 
 void	abort(void);
@@ -18,22 +22,6 @@
 static uint32 doabort = 0;
 static uint32 trace = 0;
 
-#define DOUBLE_EXPBIAS 1023
-#define DOUBLE_MANT_MASK 0xfffffffffffffull
-#define DOUBLE_MANT_TOP_BIT 0x10000000000000ull
-#define DZERO 0x0000000000000000ull
-#define DNZERO 0x8000000000000000ull
-#define DONE 0x3ff0000000000000ull
-#define DINF 0x7ff0000000000000ull
-#define DNINF 0xfff0000000000000ull
-#define DNAN 0x7FF0000000000001ull
-
-#define SINGLE_EXPBIAS 127
-#define FINF 0x7f800000ul
-#define FNINF 0xff800000ul
-#define FNAN 0x7f800000ul
-
-
 static const int8* opnames[] = {
 	// binary
 	"adf",
@@ -115,24 +103,6 @@
 	}
 }
 
-static int32
-fexp(uint64 f)
-{
-	return (int32)((uint32)(f >> 52) & 0x7ff) - DOUBLE_EXPBIAS;
-}
-
-static uint32
-fsign(uint64 f)
-{
-	return (uint32)(f >> 63) & 0x1;
-}
-
-static uint64
-fmantissa(uint64 f)
-{
-	return f &0x000fffffffffffffll;
-}
-
 static void
 fprint(void)
 {
@@ -145,42 +115,19 @@
 static uint32
 d2s(uint64 d)
 {
-	if ((d & ~(1ull << 63)) == 0)
-		return (uint32)(d>>32);
-	if (d == DINF)
-		return FINF;
-	if (d == DNINF)
-		return FNINF;
-	if ((d & ~(1ull << 63)) == DNAN)
-		return FNAN;
-	return (d>>32 & 0x80000000) |	//sign
-		((uint32)(fexp(d) + SINGLE_EXPBIAS) & 0xff) << 23 |	// exponent
-		(d >> 29 & 0x7fffff);	// mantissa
+	uint32 x;
+	
+	·f64to32c(d, &x);
+	return x;
 }
 
 static uint64
 s2d(uint32 s)
 {
-	if ((s & ~(1ul << 31)) == 0)
-		return (uint64)(s) << 32;
-	if (s == FINF)
-		return DINF;
-	if (s == FNINF)
-		return DNINF;
-	if ((s & ~(1ul << 31)) == FNAN)
-		return DNAN;
-	return (uint64)(s & 0x80000000) << 32 |	// sign
-		(uint64)((s >> 23 &0xff) + (DOUBLE_EXPBIAS - SINGLE_EXPBIAS)) << 52  |	// exponent
-		(uint64)(s & 0x7fffff) << 29;	// mantissa
-}
-
-static int64
-rsh(int64 f, int32 s)
-{
-	if (s >= 0)
-		return f>>s;
-	else
-		return f<<-s;
+	uint64 x;
+	
+	·f32to64c(s, &x);
+	return x;
 }
 
 // cdp, data processing instructions
@@ -188,12 +135,8 @@
 dataprocess(uint32* pc)
 {
 	uint32 i, opcode, unary, dest, lhs, rhs, prec;
-	uint32 high;
-	int32 expd, exp0, exp1;
-	uint64 fraw0, fraw1, exp, sign;
-	uint64 fd, f0, f1;
-	int64 fsd, fs0, fs1;
-
+	uint64 l, r;
+	uint64 fd;
 	i = *pc;
 
 	// data processing
@@ -220,129 +163,25 @@
 			goto undef;
 		}
 	} else {
-		fraw0 = m->freg[lhs];
-		fraw1 = frhs(rhs);
-		if (isNaN(float64frombits(fraw0)) || isNaN(float64frombits(fraw1))) {
-			m->freg[dest] = DNAN;
-			goto ret;
-		}
+		l = m->freg[lhs];
+		r = frhs(rhs);
 		switch (opcode) {
-		case 2: // suf
-			fraw1 ^= 0x1ll << 63;
-			// fallthrough
-		case 0: // adf
-			if (fraw0 == DZERO || fraw0 == DNZERO) {
-				m->freg[dest] = fraw1;
-				goto ret;
-			}
-			if (fraw1 == DZERO || fraw1 == DNZERO) {
-				m->freg[dest] = fraw0;
-				goto ret;
-			}
-			fs0 = fraw0 & DOUBLE_MANT_MASK | DOUBLE_MANT_TOP_BIT;
-			fs1 = fraw1 & DOUBLE_MANT_MASK | DOUBLE_MANT_TOP_BIT;
-			exp0 = fexp(fraw0);
-			exp1 = fexp(fraw1);
-			if (exp0 > exp1)
-				fs1 = rsh(fs1, exp0-exp1);
-			else
-				fs0 = rsh(fs0, exp1-exp0);
-			if (fraw0 & 0x1ll<<63)
-				fs0 = -fs0;
-			if (fraw1 & 0x1ll<<63)
-				fs1 = -fs1;
-			fsd = fs0 + fs1;
-			if (fsd == 0) {
-				m->freg[dest] = DZERO;
-				goto ret;
-			}
-			sign = (uint64)fsd & 0x1ll<<63;
-			if (fsd < 0)
-				fsd = -fsd;
-			for (expd = 55; expd > 0; expd--) {
-				if (0x1ll<<expd & fsd)
-					break;
-			}
-			if (expd - 52 < 0)
-				fsd <<= -(expd - 52);
-			else
-				fsd >>= expd - 52;
-			if (exp0 > exp1)
-				exp = expd + exp0 - 52;
-			else
-				exp = expd + exp1 - 52;
-			// too small value, can't represent
-			if (1<<31 & expd) {
-				m->freg[dest] = DZERO;
-				goto ret;
-			}
-			// infinity
-			if (expd > 1<<12) {
-				m->freg[dest] = DINF;
-				goto ret;
-			}
-			fd = sign | (exp + DOUBLE_EXPBIAS)<<52 | (uint64)fsd & DOUBLE_MANT_MASK;
-			m->freg[dest] = fd;
-			goto ret;
-
-		case 4: //dvf
-			if ((fraw1 & ~(1ull<<63)) == 0) {
-				if ((fraw0 & ~(1ull<<63)) == 0) {
-					m->freg[dest] = DNAN;
-				} else {
-					sign = fraw0 & 1ull<<63 ^ fraw1 & 1ull<<63;
-					m->freg[dest] = sign | DINF;
-				}
-				goto ret;
-			}
-			// reciprocal for fraw1
-			if (fraw1 == DONE)
-				goto muf;
-			f0 = 0x1ll << 63;
-			f1 = fraw1 & DOUBLE_MANT_MASK | DOUBLE_MANT_TOP_BIT;
-			f1 >>= 21;
-			fd = f0/f1;
-			fd <<= 21;
-			fd &= DOUBLE_MANT_MASK;
-			exp1 = -fexp(fraw1) - 1;
-			sign = fraw1 & 0x1ll<<63;
-			fraw1 = sign | (uint64)(exp1 + DOUBLE_EXPBIAS)<<52 | fd;
-			// fallthrough
-		case 1: // muf
-muf:			
-			if (fraw0 == DNZERO || fraw1 == DNZERO) {
-				m->freg[dest] = DNZERO;
-				goto ret;
-			}
-			if (fraw0 == DZERO || fraw1 == DZERO) {
-				m->freg[dest] = DZERO;
-				goto ret;
-			}
-			if (fraw0 == DONE) {
-				m->freg[dest] = fraw1;
-				goto ret;
-			}
-			if (fraw1 == DONE) {
-				m->freg[dest] = fraw0;
-				goto ret;
-			}
-			f0 = fraw0>>21 & 0x7fffffff | 0x1ll<<31;
-			f1 = fraw1>>21 & 0x7fffffff | 0x1ll<<31;
-			fd = f0*f1;
-			high = fd >> 63;
-			if (high)
-				fd = fd >> 11 & DOUBLE_MANT_MASK;
-			else
-				fd = fd >> 10 & DOUBLE_MANT_MASK;
-			exp = (uint64)(fexp(fraw0) + fexp(fraw1) + !!high + DOUBLE_EXPBIAS) & 0x7ff;
-			sign = fraw0 >> 63 ^ fraw1 >> 63;
-			fd = sign<<63 | exp<<52 | fd;
-			m->freg[dest] = fd;
-			goto ret;
-
 		default:
 			goto undef;
+		case 0:
+			·fadd64c(l, r, &m->freg[dest]);
+			break;
+		case 1:
+			·fmul64c(l, r, &m->freg[dest]);
+			break;
+		case 2:
+			·fsub64c(l, r, &m->freg[dest]);
+			break;
+		case 4:
+			·fdiv64c(l, r, &m->freg[dest]);
+			break;
 		}
+		goto ret;
 	}
 
 
@@ -375,61 +214,28 @@
 static void
 compare(uint32 *pc, uint32 *regs)
 {
-	uint32 i, flags, lhs, rhs, sign0, sign1;
-	uint64 f0, f1, mant0, mant1;
-	int32 exp0, exp1;
+	uint32 i, flags, lhs, rhs;
+	uint64 l, r;
+	int32 cmp;
+	bool nan;
 
 	i = *pc;
 	flags = 0;
 	lhs = i>>16 & 0x7;
 	rhs = i & 0xf;
 
-	f0 = m->freg[lhs];
-	f1 = frhs(rhs);
-	if (isNaN(float64frombits(f0)) || isNaN(float64frombits(f1))) {
+	l = m->freg[lhs];
+	r = frhs(rhs);
+	·fcmp64c(l, r, &cmp, &nan);
+	if (nan)
 		flags = FLAGS_C | FLAGS_V;
-		goto ret;
-	}
-	if (f0 == f1) {
+	else if (cmp == 0)
 		flags = FLAGS_Z | FLAGS_C;
-		goto ret;
-	}
-
-	sign0 = fsign(f0);
-	sign1 = fsign(f1);
-	if (sign0 == 1 && sign1 == 0) {
+	else if (cmp < 0)
 		flags = FLAGS_N;
-		goto ret;
-	}
-	if (sign0 == 0 && sign1 == 1) {
+	else
 		flags = FLAGS_C;
-		goto ret;
-	}
 
-	if (sign0 == 0) {
-		exp0 = fexp(f0);
-		exp1 = fexp(f1);
-		mant0 = fmantissa(f0);
-		mant1 = fmantissa(f1);
-	} else {
-		exp0 = fexp(f1);
-		exp1 = fexp(f0);
-		mant0 = fmantissa(f1);
-		mant1 = fmantissa(f0);
-	}
-
-	if (exp0 > exp1) {
-		flags = FLAGS_C;
-	} else if (exp0 < exp1) {
-		flags = FLAGS_N;
-	} else {
-		if (mant0 > mant1)
-			flags = FLAGS_C;
-		else
-			flags = FLAGS_N;
-	}
-
-ret:
 	if (trace) {
 		printf(" %p %x\tcmf\tf%d, ", pc, *pc, lhs);
 		if (rhs & 0x8)
@@ -503,9 +309,10 @@
 static void
 fltfix(uint32 *pc, uint32 *regs)
 {
-	uint32 i, toarm, freg, reg, sign, val, prec;
-	int32 rd, exp;
-	uint64 fd, f0;
+	uint32 i, toarm, freg, reg, prec;
+	int64 val;
+	uint64 f0;
+	bool ok;
 	
 	i = *pc;
 	toarm = i>>20 & 0x1;
@@ -513,33 +320,15 @@
 	reg = i>>12 & 0xf;
 	prec = precision(i);
 
-	if (toarm) { //fix
+	if (toarm) { // fix
 		f0 = m->freg[freg];
-		fd = f0 & DOUBLE_MANT_MASK | DOUBLE_MANT_TOP_BIT;
-		exp = fexp(f0) - 52;
-		if (exp < 0)
-			fd = fd>>(-exp);
-		else
-			fd = fd<<exp;
-		rd = ((int32)fd & 0x7fffffff);
-		if (f0 & 0x1ll<<63)
-			rd = -rd;
-		regs[reg] = (uint32)rd;
+		·f64tointc(f0, &val, &ok);
+		if (!ok || (int32)val != val)
+			val = 0;
+		regs[reg] = val;
 	} else { // flt
-		if (regs[reg] == 0) {
-			m->freg[freg] = DZERO;
-			goto ret;
-		}
-		sign = regs[reg] >> 31 & 0x1;
-		val = regs[reg];
-		if (sign) val = -val;
-		for (exp = 31; exp >= 0; exp--) {
-			if (1<<(exp) & val)
-				break;
-		}
-		fd = (uint64)val<<(52-exp) & DOUBLE_MANT_MASK;
-		m->freg[freg] = (uint64)(sign) << 63 | 
-			(uint64)(exp + DOUBLE_EXPBIAS) << 52 | fd;
+		·fintto64c((int32)regs[reg], &f0);
+		m->freg[freg] = f0;
 	}
 	goto ret;
 	
@@ -581,7 +370,8 @@
 		loadstore(pc, regs);
 		return 1;
 	case 7: // 111
-		if (i>>24 & 1) return 0; // ignore swi
+		if (i>>24 & 1)
+			return 0; // ignore swi
 
 		if (i>>4 & 1) { //data transfer
 			if ((i&0x00f0ff00) == 0x0090f100) {
@@ -606,6 +396,14 @@
 		regs[11] = *(uint32*)((uint8*)pc + (i&0xfff) + 8);
 		return 1;
 	}
+	
+	if(i == 0xe08bb00d) {
+		// add sp to 11.
+		// might be part of a large stack offset address
+		// (or might not, but again no harm done).
+		regs[11] += regs[13];
+		return 1;
+	}
 
 	return 0;
 }
@@ -615,11 +413,9 @@
 _sfloat2(uint32 *lr, uint32 r0)
 {
 	uint32 skip;
-//	uint32 cpsr;
 
-	while(skip = stepflt(lr, &r0)) {
+	while(skip = stepflt(lr, &r0))
 		lr += skip;
-	}
 	return lr;
 }
 
diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go
new file mode 100644
index 0000000..58631c7
--- /dev/null
+++ b/src/pkg/runtime/export_test.go
@@ -0,0 +1,17 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing.
+
+package runtime
+
+var Fadd64 = fadd64
+var Fsub64 = fsub64
+var Fmul64 = fmul64
+var Fdiv64 = fdiv64
+var F64to32 = f64to32
+var F32to64 = f32to64
+var Fcmp64 = fcmp64
+var Fintto64 = fintto64
+var F64toint = f64toint
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 92da669..0a36d27 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -558,6 +558,7 @@
 void	·panic(Eface);
 void	·panicindex(void);
 void	·panicslice(void);
+
 /*
  * runtime c-called (but written in Go)
  */
@@ -565,6 +566,16 @@
 void	·printany(Eface);
 void	·newTypeAssertionError(Type*, Type*, Type*, String*, String*, String*, String*, Eface*);
 void	·newErrorString(String, Eface*);
+void	·fadd64c(uint64, uint64, uint64*);
+void	·fsub64c(uint64, uint64, uint64*);
+void	·fmul64c(uint64, uint64, uint64*);
+void	·fdiv64c(uint64, uint64, uint64*);
+void	·fneg64c(uint64, uint64*);
+void	·f32to64c(uint32, uint64*);
+void	·f64to32c(uint64, uint32*);
+void	·fcmp64c(uint64, uint64, int32*, bool*);
+void	·fintto64c(int64, uint64*);
+void	·f64tointc(uint64, int64*, bool*);
 
 /*
  * wrapped for go users
diff --git a/src/pkg/runtime/softfloat64.go b/src/pkg/runtime/softfloat64.go
new file mode 100644
index 0000000..d9bbe5d
--- /dev/null
+++ b/src/pkg/runtime/softfloat64.go
@@ -0,0 +1,498 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Software IEEE754 64-bit floating point.
+// Only referred to (and thus linked in) by arm port
+// and by gotest in this directory.
+
+package runtime
+
+const (
+	mantbits64 uint = 52
+	expbits64  uint = 11
+	bias64     = -1<<(expbits64-1) + 1
+
+	nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1
+	inf64 uint64 = (1<<expbits64 - 1) << mantbits64
+	neg64 uint64 = 1 << (expbits64 + mantbits64)
+
+	mantbits32 uint = 23
+	expbits32  uint = 8
+	bias32     = -1<<(expbits32-1) + 1
+
+	nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1
+	inf32 uint32 = (1<<expbits32 - 1) << mantbits32
+	neg32 uint32 = 1 << (expbits32 + mantbits32)
+)
+
+func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
+	sign = f & (1 << (mantbits64 + expbits64))
+	mant = f & (1<<mantbits64 - 1)
+	exp = int(f>>mantbits64) & (1<<expbits64 - 1)
+
+	switch exp {
+	case 1<<expbits64 - 1:
+		if mant != 0 {
+			nan = true
+			return
+		}
+		inf = true
+		return
+
+	case 0:
+		// denormalized
+		if mant != 0 {
+			exp += bias64 + 1
+			for mant < 1<<mantbits64 {
+				mant <<= 1
+				exp--
+			}
+		}
+
+	default:
+		// add implicit top bit
+		mant |= 1 << mantbits64
+		exp += bias64
+	}
+	return
+}
+
+func funpack32(f uint32) (sign, mant uint32, exp int, inf, nan bool) {
+	sign = f & (1 << (mantbits32 + expbits32))
+	mant = f & (1<<mantbits32 - 1)
+	exp = int(f>>mantbits32) & (1<<expbits32 - 1)
+
+	switch exp {
+	case 1<<expbits32 - 1:
+		if mant != 0 {
+			nan = true
+			return
+		}
+		inf = true
+		return
+
+	case 0:
+		// denormalized
+		if mant != 0 {
+			exp += bias32 + 1
+			for mant < 1<<mantbits32 {
+				mant <<= 1
+				exp--
+			}
+		}
+
+	default:
+		// add implicit top bit
+		mant |= 1 << mantbits32
+		exp += bias32
+	}
+	return
+}
+
+func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 {
+	mant0, exp0, trunc0 := mant, exp, trunc
+	if mant == 0 {
+		return sign
+	}
+	for mant < 1<<mantbits64 {
+		mant <<= 1
+		exp--
+	}
+	for mant >= 4<<mantbits64 {
+		trunc |= mant & 1
+		mant >>= 1
+		exp++
+	}
+	if mant >= 2<<mantbits64 {
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+			if mant >= 4<<mantbits64 {
+				mant >>= 1
+				exp++
+			}
+		}
+		mant >>= 1
+		exp++
+	}
+	if exp >= 1<<expbits64-1+bias64 {
+		return sign ^ inf64
+	}
+	if exp < bias64+1 {
+		if exp < bias64-int(mantbits64) {
+			return sign | 0
+		}
+		// repeat expecting denormal
+		mant, exp, trunc = mant0, exp0, trunc0
+		for exp < bias64 {
+			trunc |= mant & 1
+			mant >>= 1
+			exp++
+		}
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+		}
+		mant >>= 1
+		exp++
+		if mant < 1<<mantbits64 {
+			return sign | mant
+		}
+	}
+	return sign | uint64(exp-bias64)<<mantbits64 | mant&(1<<mantbits64-1)
+}
+
+func fpack32(sign, mant uint32, exp int, trunc uint32) uint32 {
+	mant0, exp0, trunc0 := mant, exp, trunc
+	if mant == 0 {
+		return sign
+	}
+	for mant < 1<<mantbits32 {
+		mant <<= 1
+		exp--
+	}
+	for mant >= 4<<mantbits32 {
+		trunc |= mant & 1
+		mant >>= 1
+		exp++
+	}
+	if mant >= 2<<mantbits32 {
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+			if mant >= 4<<mantbits32 {
+				mant >>= 1
+				exp++
+			}
+		}
+		mant >>= 1
+		exp++
+	}
+	if exp >= 1<<expbits32-1+bias32 {
+		return sign ^ inf32
+	}
+	if exp < bias32+1 {
+		if exp < bias32-int(mantbits32) {
+			return sign | 0
+		}
+		// repeat expecting denormal
+		mant, exp, trunc = mant0, exp0, trunc0
+		for exp < bias32 {
+			trunc |= mant & 1
+			mant >>= 1
+			exp++
+		}
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+		}
+		mant >>= 1
+		exp++
+		if mant < 1<<mantbits32 {
+			return sign | mant
+		}
+	}
+	return sign | uint32(exp-bias32)<<mantbits32 | mant&(1<<mantbits32-1)
+}
+
+func fadd64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN + x or x + NaN = NaN
+		return nan64
+
+	case fi && gi && fs != gs: // +Inf + -Inf or -Inf + +Inf = NaN
+		return nan64
+
+	case fi: // ±Inf + g = ±Inf
+		return f
+
+	case gi: // f + ±Inf = ±Inf
+		return g
+
+	case fm == 0 && gm == 0 && fs != 0 && gs != 0: // -0 + -0 = -0
+		return f
+
+	case fm == 0: // 0 + g = g but 0 + -0 = +0
+		if gm == 0 {
+			g ^= gs
+		}
+		return g
+
+	case gm == 0: // f + 0 = f
+		return f
+
+	}
+
+	if fe < ge || fe == ge && fm < gm {
+		f, g, fs, fm, fe, gs, gm, ge = g, f, gs, gm, ge, fs, fm, fe
+	}
+
+	shift := uint(fe - ge)
+	fm <<= 2
+	gm <<= 2
+	trunc := gm & (1<<shift - 1)
+	gm >>= shift
+	if fs == gs {
+		fm += gm
+	} else {
+		fm -= gm
+		if trunc != 0 {
+			fm--
+		}
+	}
+	if fm == 0 {
+		fs = 0
+	}
+	return fpack64(fs, fm, fe-2, trunc)
+}
+
+func fsub64(f, g uint64) uint64 {
+	return fadd64(f, fneg64(g))
+}
+
+func fneg64(f uint64) uint64 {
+	return f ^ (1 << (mantbits64 + expbits64))
+}
+
+func fmul64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN * g or f * NaN = NaN
+		return nan64
+
+	case fi && gi: // Inf * Inf = Inf (with sign adjusted)
+		return f ^ gs
+
+	case fi && gm == 0, fm == 0 && gi: // 0 * Inf = Inf * 0 = NaN
+		return nan64
+
+	case fm == 0: // 0 * x = 0 (with sign adjusted)
+		return f ^ gs
+
+	case gm == 0: // x * 0 = 0 (with sign adjusted)
+		return g ^ fs
+	}
+
+	// 53-bit * 53-bit = 107- or 108-bit
+	lo, hi := mullu(fm, gm)
+	shift := mantbits64 - 1
+	trunc := lo & (1<<shift - 1)
+	mant := hi<<(64-shift) | lo>>shift
+	return fpack64(fs^gs, mant, fe+ge-1, trunc)
+}
+
+func fdiv64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN / g = f / NaN = NaN
+		return nan64
+
+	case fi && gi: // ±Inf / ±Inf = NaN
+		return nan64
+
+	case !fi && !gi && fm == 0 && gm == 0: // 0 / 0 = NaN
+		return nan64
+
+	case fi, !gi && gm == 0: // Inf / g = f / 0 = Inf
+		return fs ^ gs ^ inf64
+
+	case gi, fm == 0: // f / Inf = 0 / g = Inf
+		return fs ^ gs ^ 0
+	}
+	_, _, _, _ = fi, fn, gi, gn
+
+	// 53-bit<<54 / 53-bit = 53- or 54-bit.
+	shift := mantbits64 + 2
+	q, r := divlu(fm>>(64-shift), fm<<shift, gm)
+	return fpack64(fs^gs, q, fe-ge-2, r)
+}
+
+func f64to32(f uint64) uint32 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	if fn {
+		return nan32
+	}
+	fs32 := uint32(fs >> 32)
+	if fi {
+		return fs32 ^ inf32
+	}
+	const d = mantbits64 - mantbits32 - 1
+	return fpack32(fs32, uint32(fm>>d), fe-1, uint32(fm&(1<<d-1)))
+}
+
+func f32to64(f uint32) uint64 {
+	const d = mantbits64 - mantbits32
+	fs, fm, fe, fi, fn := funpack32(f)
+	if fn {
+		return nan64
+	}
+	fs64 := uint64(fs) << 32
+	if fi {
+		return fs64 ^ inf64
+	}
+	return fpack64(fs64, uint64(fm)<<d, fe, 0)
+}
+
+func fcmp64(f, g uint64) (cmp int, isnan bool) {
+	fs, fm, _, fi, fn := funpack64(f)
+	gs, gm, _, gi, gn := funpack64(g)
+
+	switch {
+	case fn, gn: // flag NaN
+		return 0, true
+
+	case !fi && !gi && fm == 0 && gm == 0: // ±0 == ±0
+		return 0, false
+
+	case fs > gs: // f < 0, g > 0
+		return -1, false
+
+	case fs < gs: // f > 0, g < 0
+		return +1, false
+
+	// Same sign, not NaN.
+	// Can compare encodings directly now.
+	// Reverse for sign.
+	case fs == 0 && f < g, fs != 0 && f > g:
+		return -1, false
+
+	case fs == 0 && f > g, fs != 0 && f < g:
+		return +1, false
+	}
+
+	// f == g
+	return 0, false
+}
+
+func f64toint(f uint64) (val int64, ok bool) {
+	fs, fm, fe, fi, fn := funpack64(f)
+
+	switch {
+	case fi, fn: // NaN
+		return 0, false
+
+	case fe < -1: // f < 0.5
+		return 0, false
+
+	case fe > 63: // f >= 2^63
+		if fs != 0 && fm == 0 { // f == -2^63
+			return -1 << 63, true
+		}
+		if fs != 0 {
+			return 0, false
+		}
+		return 0, false
+	}
+
+	for fe > int(mantbits64) {
+		fe--
+		fm <<= 1
+	}
+	for fe < int(mantbits64) {
+		fe++
+		fm >>= 1
+	}
+	val = int64(fm)
+	if fs != 0 {
+		val = -val
+	}
+	return val, true
+}
+
+func fintto64(val int64) (f uint64) {
+	fs := uint64(val) & (1 << 63)
+	mant := uint64(val)
+	if fs != 0 {
+		mant = -mant
+	}
+	return fpack64(fs, mant, int(mantbits64), 0)
+}
+
+// 64x64 -> 128 multiply.
+// adapted from hacker's delight.
+func mullu(u, v uint64) (lo, hi uint64) {
+	const (
+		s    = 32
+		mask = 1<<s - 1
+	)
+	u0 := u & mask
+	u1 := u >> s
+	v0 := v & mask
+	v1 := v >> s
+	w0 := u0 * v0
+	t := u1*v0 + w0>>s
+	w1 := t & mask
+	w2 := t >> s
+	w1 += u0 * v1
+	return u * v, u1*v1 + w2 + w1>>s
+}
+
+// 128/64 -> 64 quotient, 64 remainder.
+// adapted from hacker's delight
+func divlu(u1, u0, v uint64) (q, r uint64) {
+	const b = 1 << 32
+
+	if u1 >= v {
+		return 1<<64 - 1, 1<<64 - 1
+	}
+
+	// s = nlz(v); v <<= s
+	s := uint(0)
+	for v&(1<<63) == 0 {
+		s++
+		v <<= 1
+	}
+
+	vn1 := v >> 32
+	vn0 := v & (1<<32 - 1)
+	un32 := u1<<s | u0>>(64-s)
+	un10 := u0 << s
+	un1 := un10 >> 32
+	un0 := un10 & (1<<32 - 1)
+	q1 := un32 / vn1
+	rhat := un32 - q1*vn1
+
+again1:
+	if q1 >= b || q1*vn0 > b*rhat+un1 {
+		q1--
+		rhat += vn1
+		if rhat < b {
+			goto again1
+		}
+	}
+
+	un21 := un32*b + un1 - q1*v
+	q0 := un21 / vn1
+	rhat = un21 - q0*vn1
+
+again2:
+	if q0 >= b || q0*vn0 > b*rhat+un0 {
+		q0--
+		rhat += vn1
+		if rhat < b {
+			goto again2
+		}
+	}
+
+	return q1*b + q0, (un21*b + un0 - q0*v) >> s
+}
+
+// callable from C
+
+func fadd64c(f, g uint64, ret *uint64)            { *ret = fadd64(f, g) }
+func fsub64c(f, g uint64, ret *uint64)            { *ret = fsub64(f, g) }
+func fmul64c(f, g uint64, ret *uint64)            { *ret = fmul64(f, g) }
+func fdiv64c(f, g uint64, ret *uint64)            { *ret = fdiv64(f, g) }
+func fneg64c(f uint64, ret *uint64)               { *ret = fneg64(f) }
+func f32to64c(f uint32, ret *uint64)              { *ret = f32to64(f) }
+func f64to32c(f uint64, ret *uint32)              { *ret = f64to32(f) }
+func fcmp64c(f, g uint64, ret *int, retnan *bool) { *ret, *retnan = fcmp64(f, g) }
+func fintto64c(val int64, ret *uint64)            { *ret = fintto64(val) }
+func f64tointc(f uint64, ret *int64, retok *bool) { *ret, *retok = f64toint(f) }
diff --git a/src/pkg/runtime/softfloat64_test.go b/src/pkg/runtime/softfloat64_test.go
new file mode 100644
index 0000000..fb7f3d3
--- /dev/null
+++ b/src/pkg/runtime/softfloat64_test.go
@@ -0,0 +1,198 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math"
+	"rand"
+	. "runtime"
+	"testing"
+)
+
+// turn uint64 op into float64 op
+func fop(f func(x, y uint64) uint64) func(x, y float64) float64 {
+	return func(x, y float64) float64 {
+		bx := math.Float64bits(x)
+		by := math.Float64bits(y)
+		return math.Float64frombits(f(bx, by))
+	}
+}
+
+func add(x, y float64) float64 { return x + y }
+func sub(x, y float64) float64 { return x - y }
+func mul(x, y float64) float64 { return x * y }
+func div(x, y float64) float64 { return x / y }
+
+func TestFloat64(t *testing.T) {
+	base := []float64{
+		0,
+		math.Copysign(0, -1),
+		-1,
+		1,
+		math.NaN(),
+		math.Inf(+1),
+		math.Inf(-1),
+		0.1,
+		1.5,
+		1.9999999999999998,     // all 1s mantissa
+		1.3333333333333333,     // 1.010101010101...
+		1.1428571428571428,     // 1.001001001001...
+		1.112536929253601e-308, // first normal
+		2,
+		4,
+		8,
+		16,
+		32,
+		64,
+		128,
+		256,
+		3,
+		12,
+		1234,
+		123456,
+		-0.1,
+		-1.5,
+		-1.9999999999999998,
+		-1.3333333333333333,
+		-1.1428571428571428,
+		-2,
+		-3,
+		1e-200,
+		1e-300,
+		1e-310,
+		5e-324,
+		1e-105,
+		1e-305,
+		1e+200,
+		1e+306,
+		1e+307,
+		1e+308,
+	}
+	all := make([]float64, 200)
+	copy(all, base)
+	for i := len(base); i < len(all); i++ {
+		all[i] = rand.NormFloat64()
+	}
+
+	test(t, "+", add, fop(Fadd64), all)
+	test(t, "-", sub, fop(Fsub64), all)
+	if GOARCH != "386" { // 386 is not precise!
+		test(t, "*", mul, fop(Fmul64), all)
+		test(t, "/", div, fop(Fdiv64), all)
+	}
+}
+
+// 64 -hw-> 32 -hw-> 64
+func trunc32(f float64) float64 {
+	return float64(float32(f))
+}
+
+// 64 -sw->32 -hw-> 64
+func to32sw(f float64) float64 {
+	return float64(math.Float32frombits(F64to32(math.Float64bits(f))))
+}
+
+// 64 -hw->32 -sw-> 64
+func to64sw(f float64) float64 {
+	return math.Float64frombits(F32to64(math.Float32bits(float32(f))))
+}
+
+// float64 -hw-> int64 -hw-> float64
+func hwint64(f float64) float64 {
+	return float64(int64(f))
+}
+
+// float64 -hw-> int32 -hw-> float64
+func hwint32(f float64) float64 {
+	return float64(int32(f))
+}
+
+// float64 -sw-> int64 -hw-> float64
+func toint64sw(f float64) float64 {
+	i, ok := F64toint(math.Float64bits(f))
+	if !ok {
+		// There's no right answer for out of range.
+		// Match the hardware to pass the test.
+		i = int64(f)
+	}
+	return float64(i)
+}
+
+// float64 -hw-> int64 -sw-> float64
+func fromint64sw(f float64) float64 {
+	return math.Float64frombits(Fintto64(int64(f)))
+}
+
+var nerr int
+
+func err(t *testing.T, format string, args ...interface{}) {
+	t.Errorf(format, args...)
+
+	// cut errors off after a while.
+	// otherwise we spend all our time
+	// allocating memory to hold the
+	// formatted output.
+	if nerr++; nerr >= 10 {
+		t.Fatal("too many errors")
+	}
+}
+
+func test(t *testing.T, op string, hw, sw func(float64, float64) float64, all []float64) {
+	for _, f := range all {
+		for _, g := range all {
+			h := hw(f, g)
+			s := sw(f, g)
+			if !same(h, s) {
+				err(t, "%g %s %g = sw %g, hw %g\n", f, op, g, s, h)
+			}
+			testu(t, "to32", trunc32, to32sw, h)
+			testu(t, "to64", trunc32, to64sw, h)
+			testu(t, "toint64", hwint64, toint64sw, h)
+			testu(t, "fromint64", hwint64, fromint64sw, h)
+			testcmp(t, f, h)
+			testcmp(t, h, f)
+			testcmp(t, g, h)
+			testcmp(t, h, g)
+		}
+	}
+}
+
+func testu(t *testing.T, op string, hw, sw func(float64) float64, v float64) {
+	h := hw(v)
+	s := sw(v)
+	if !same(h, s) {
+		err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
+	}
+}
+
+func hwcmp(f, g float64) (cmp int, isnan bool) {
+	switch {
+	case f < g:
+		return -1, false
+	case f > g:
+		return +1, false
+	case f == g:
+		return 0, false
+	}
+	return 0, true // must be NaN
+}
+
+func testcmp(t *testing.T, f, g float64) {
+	hcmp, hisnan := hwcmp(f, g)
+	scmp, sisnan := Fcmp64(math.Float64bits(f), math.Float64bits(g))
+	if hcmp != scmp || hisnan != sisnan {
+		err(t, "cmp(%g, %g) = sw %v, %v, hw %v, %v\n", f, g, scmp, sisnan, hcmp, hisnan)
+	}
+}
+
+func same(f, g float64) bool {
+	if math.IsNaN(f) && math.IsNaN(g) {
+		return true
+	}
+	if math.Copysign(1, f) != math.Copysign(1, g) {
+		return false
+	}
+	return f == g
+}