strconv: faster FormatFloat(x, *, -1, 64) using Grisu3 algorithm.

The implementation is similar to the one from the double-conversion
library used in the Chrome V8 engine.

                            old ns/op   new ns/op  speedup
BenchmarkAppendFloatDecimal      591         480      1.2x
BenchmarkAppendFloat            2956         486      6.1x
BenchmarkAppendFloatExp        10622         503     21.1x
BenchmarkAppendFloatNegExp     40343         483     83.5x
BenchmarkAppendFloatBig         2798         664      4.2x

See F. Loitsch, ``Printing Floating-Point Numbers Quickly and
Accurately with Integers'', Proceedings of the ACM, 2010.

R=rsc
CC=golang-dev, remy
https://golang.org/cl/5502079
diff --git a/src/pkg/strconv/ftoa.go b/src/pkg/strconv/ftoa.go
index ab8dd2b..8eefbee 100644
--- a/src/pkg/strconv/ftoa.go
+++ b/src/pkg/strconv/ftoa.go
@@ -98,29 +98,43 @@
 		return fmtB(dst, neg, mant, exp, flt)
 	}
 
-	// Create exact decimal representation.
-	// The shift is exp - flt.mantbits because mant is a 1-bit integer
-	// followed by a flt.mantbits fraction, and we are treating it as
-	// a 1+flt.mantbits-bit integer.
-	d := new(decimal)
-	d.Assign(mant)
-	d.Shift(exp - int(flt.mantbits))
-
-	// Round appropriately.
 	// Negative precision means "only as much as needed to be exact."
-	shortest := false
-	if prec < 0 {
-		shortest = true
-		roundShortest(d, mant, exp, flt)
-		switch fmt {
-		case 'e', 'E':
-			prec = d.nd - 1
-		case 'f':
-			prec = max(d.nd-d.dp, 0)
-		case 'g', 'G':
-			prec = d.nd
+	shortest := prec < 0
+
+	d := new(decimal)
+	if shortest {
+		ok := false
+		if optimize && bitSize == 64 {
+			// Try Grisu3 algorithm.
+			f := new(extFloat)
+			lower, upper := f.AssignComputeBounds(val)
+			ok = f.ShortestDecimal(d, &lower, &upper)
+		}
+		if !ok {
+			// Create exact decimal representation.
+			// The shift is exp - flt.mantbits because mant is a 1-bit integer
+			// followed by a flt.mantbits fraction, and we are treating it as
+			// a 1+flt.mantbits-bit integer.
+			d.Assign(mant)
+			d.Shift(exp - int(flt.mantbits))
+			roundShortest(d, mant, exp, flt)
+		}
+		// Precision for shortest representation mode.
+		if prec < 0 {
+			switch fmt {
+			case 'e', 'E':
+				prec = d.nd - 1
+			case 'f':
+				prec = max(d.nd-d.dp, 0)
+			case 'g', 'G':
+				prec = d.nd
+			}
 		}
 	} else {
+		// Create exact decimal representation.
+		d.Assign(mant)
+		d.Shift(exp - int(flt.mantbits))
+		// Round appropriately.
 		switch fmt {
 		case 'e', 'E':
 			d.Round(prec + 1)