strconv: faster float conversion

- added AppendFloatX benchmarks
- 2% to 13% better performance
- check for illegal bitSize

benchmark                                   old ns/op    new ns/op    delta
strconv_test.BenchmarkFormatFloatDecimal         2993         2733   -8.69%
strconv_test.BenchmarkFormatFloat                3384         3141   -7.18%
strconv_test.BenchmarkFormatFloatExp             9192         9010   -1.98%
strconv_test.BenchmarkFormatFloatBig             3279         3207   -2.20%
strconv_test.BenchmarkAppendFloatDecimal         2837         2478  -12.65%
strconv_test.BenchmarkAppendFloat                3196         2928   -8.39%
strconv_test.BenchmarkAppendFloatExp             9028         8773   -2.82%
strconv_test.BenchmarkAppendFloatBig             3151         2782  -11.71%

R=rsc, bradfitz
CC=golang-dev
https://golang.org/cl/5448122
diff --git a/src/pkg/strconv/ftoa.go b/src/pkg/strconv/ftoa.go
index e1ea0a35..b2413ee 100644
--- a/src/pkg/strconv/ftoa.go
+++ b/src/pkg/strconv/ftoa.go
@@ -45,20 +45,30 @@
 // Ftoa32(f) is not the same as Ftoa64(float32(f)),
 // because correct rounding and the number of digits
 // needed to identify f depend on the precision of the representation.
-func FormatFloat(f float64, fmt byte, prec int, n int) string {
-	if n == 32 {
-		return genericFtoa(uint64(math.Float32bits(float32(f))), fmt, prec, &float32info)
-	}
-	return genericFtoa(math.Float64bits(f), fmt, prec, &float64info)
+func FormatFloat(f float64, fmt byte, prec, bitSize int) string {
+	return string(genericFtoa(make([]byte, 0, 16), f, fmt, prec, bitSize))
 }
 
 // AppendFloat appends the string form of the floating-point number f,
 // as generated by FormatFloat, to dst and returns the extended buffer.
-func AppendFloat(dst []byte, f float64, fmt byte, prec int, n int) []byte {
-	return append(dst, FormatFloat(f, fmt, prec, n)...)
+func AppendFloat(dst []byte, f float64, fmt byte, prec int, bitSize int) []byte {
+	return genericFtoa(dst, f, fmt, prec, bitSize)
 }
 
-func genericFtoa(bits uint64, fmt byte, prec int, flt *floatInfo) string {
+func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
+	var bits uint64
+	var flt *floatInfo
+	switch bitSize {
+	case 32:
+		bits = uint64(math.Float32bits(float32(val)))
+		flt = &float32info
+	case 64:
+		bits = math.Float64bits(val)
+		flt = &float64info
+	default:
+		panic("strconv: illegal AppendFloat/FormatFloat bitSize")
+	}
+
 	neg := bits>>(flt.expbits+flt.mantbits) != 0
 	exp := int(bits>>flt.mantbits) & (1<<flt.expbits - 1)
 	mant := bits & (uint64(1)<<flt.mantbits - 1)
@@ -66,13 +76,16 @@
 	switch exp {
 	case 1<<flt.expbits - 1:
 		// Inf, NaN
-		if mant != 0 {
-			return "NaN"
+		var s string
+		switch {
+		case mant != 0:
+			s = "NaN"
+		case neg:
+			s = "-Inf"
+		default:
+			s = "+Inf"
 		}
-		if neg {
-			return "-Inf"
-		}
-		return "+Inf"
+		return append(dst, s...)
 
 	case 0:
 		// denormalized
@@ -86,7 +99,7 @@
 
 	// Pick off easy binary format.
 	if fmt == 'b' {
-		return fmtB(neg, mant, exp, flt)
+		return fmtB(dst, neg, mant, exp, flt)
 	}
 
 	// Create exact decimal representation.
@@ -127,9 +140,9 @@
 
 	switch fmt {
 	case 'e', 'E':
-		return fmtE(neg, d, prec, fmt)
+		return fmtE(dst, neg, d, prec, fmt)
 	case 'f':
-		return fmtF(neg, d, prec)
+		return fmtF(dst, neg, d, prec)
 	case 'g', 'G':
 		// trailing fractional zeros in 'e' form will be trimmed.
 		eprec := prec
@@ -147,15 +160,16 @@
 			if prec > d.nd {
 				prec = d.nd
 			}
-			return fmtE(neg, d, prec-1, fmt+'e'-'g')
+			return fmtE(dst, neg, d, prec-1, fmt+'e'-'g')
 		}
 		if prec > d.dp {
 			prec = d.nd
 		}
-		return fmtF(neg, d, max(prec-d.dp, 0))
+		return fmtF(dst, neg, d, max(prec-d.dp, 0))
 	}
 
-	return "%" + string(fmt)
+	// unknown format
+	return append(dst, '%', fmt)
 }
 
 // Round d (= mant * 2^exp) to the shortest number of digits
@@ -250,121 +264,103 @@
 }
 
 // %e: -d.ddddde±dd
-func fmtE(neg bool, d *decimal, prec int, fmt byte) string {
-	buf := make([]byte, 3+max(prec, 0)+30) // "-0." + prec digits + exp
-	w := 0                                 // write index
-
+func fmtE(dst []byte, neg bool, d *decimal, prec int, fmt byte) []byte {
 	// sign
 	if neg {
-		buf[w] = '-'
-		w++
+		dst = append(dst, '-')
 	}
 
 	// first digit
-	if d.nd == 0 {
-		buf[w] = '0'
-	} else {
-		buf[w] = d.d[0]
+	ch := byte('0')
+	if d.nd != 0 {
+		ch = d.d[0]
 	}
-	w++
+	dst = append(dst, ch)
 
 	// .moredigits
 	if prec > 0 {
-		buf[w] = '.'
-		w++
-		for i := 0; i < prec; i++ {
-			if 1+i < d.nd {
-				buf[w] = d.d[1+i]
-			} else {
-				buf[w] = '0'
+		dst = append(dst, '.')
+		for i := 1; i <= prec; i++ {
+			ch = '0'
+			if i < d.nd {
+				ch = d.d[i]
 			}
-			w++
+			dst = append(dst, ch)
 		}
 	}
 
 	// e±
-	buf[w] = fmt
-	w++
+	dst = append(dst, fmt)
 	exp := d.dp - 1
 	if d.nd == 0 { // special case: 0 has exponent 0
 		exp = 0
 	}
 	if exp < 0 {
-		buf[w] = '-'
+		ch = '-'
 		exp = -exp
 	} else {
-		buf[w] = '+'
+		ch = '+'
 	}
-	w++
+	dst = append(dst, ch)
 
 	// dddd
-	// count digits
-	n := 0
-	for e := exp; e > 0; e /= 10 {
-		n++
+	var buf [3]byte
+	i := len(buf)
+	for exp >= 10 {
+		i--
+		buf[i] = byte(exp%10 + '0')
+		exp /= 10
 	}
-	// leading zeros
-	for i := n; i < 2; i++ {
-		buf[w] = '0'
-		w++
-	}
-	// digits
-	w += n
-	n = 0
-	for e := exp; e > 0; e /= 10 {
-		n++
-		buf[w-n] = byte(e%10 + '0')
+	// exp < 10
+	i--
+	buf[i] = byte(exp + '0')
+
+	// leading zeroes
+	if i > len(buf)-2 {
+		i--
+		buf[i] = '0'
 	}
 
-	return string(buf[0:w])
+	return append(dst, buf[i:]...)
 }
 
 // %f: -ddddddd.ddddd
-func fmtF(neg bool, d *decimal, prec int) string {
-	buf := make([]byte, 1+max(d.dp, 1)+1+max(prec, 0))
-	w := 0
-
+func fmtF(dst []byte, neg bool, d *decimal, prec int) []byte {
 	// sign
 	if neg {
-		buf[w] = '-'
-		w++
+		dst = append(dst, '-')
 	}
 
 	// integer, padded with zeros as needed.
 	if d.dp > 0 {
 		var i int
 		for i = 0; i < d.dp && i < d.nd; i++ {
-			buf[w] = d.d[i]
-			w++
+			dst = append(dst, d.d[i])
 		}
 		for ; i < d.dp; i++ {
-			buf[w] = '0'
-			w++
+			dst = append(dst, '0')
 		}
 	} else {
-		buf[w] = '0'
-		w++
+		dst = append(dst, '0')
 	}
 
 	// fraction
 	if prec > 0 {
-		buf[w] = '.'
-		w++
+		dst = append(dst, '.')
 		for i := 0; i < prec; i++ {
-			if d.dp+i < 0 || d.dp+i >= d.nd {
-				buf[w] = '0'
-			} else {
-				buf[w] = d.d[d.dp+i]
+			ch := byte('0')
+			if j := d.dp + i; 0 <= j && j < d.nd {
+				ch = d.d[j]
 			}
-			w++
+			dst = append(dst, ch)
 		}
 	}
 
-	return string(buf[0:w])
+	return dst
 }
 
 // %b: -ddddddddp+ddd
-func fmtB(neg bool, mant uint64, exp int, flt *floatInfo) string {
+func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
 	var buf [50]byte
 	w := len(buf)
 	exp -= int(flt.mantbits)
@@ -395,7 +391,7 @@
 		w--
 		buf[w] = '-'
 	}
-	return string(buf[w:])
+	return append(dst, buf[w:]...)
 }
 
 func max(a, b int) int {