Make printing faster by avoiding mallocs and some other advances.
Roughly 33% faster for simple cases, probably more for complex ones.

Before:

mallocs per Sprintf(""): 4
mallocs per Sprintf("xxx"): 6
mallocs per Sprintf("%x"): 10
mallocs per Sprintf("%x %x"): 12

Now:

mallocs per Sprintf(""): 2
mallocs per Sprintf("xxx"): 3
mallocs per Sprintf("%x"): 5
mallocs per Sprintf("%x %x"): 7

Speed improves because of avoiding mallocs and also by sharing a bytes.Buffer
between print.go and format.go rather than copying the data back after each
printed item.

Before:

fmt_test.BenchmarkSprintfEmpty	1000000	      1346 ns/op
fmt_test.BenchmarkSprintfString	500000	      3461 ns/op
fmt_test.BenchmarkSprintfInt	500000	      3671 ns/op

Now:

fmt_test.BenchmarkSprintfEmpty	 2000000	       995 ns/op
fmt_test.BenchmarkSprintfString	 1000000	      2745 ns/op
fmt_test.BenchmarkSprintfInt	 1000000	      2391 ns/op
fmt_test.BenchmarkSprintfIntInt	  500000	      3751 ns/op

I believe there is more to get but this is a good milestone.

R=rsc
CC=golang-dev, hong
https://golang.org/cl/166076
diff --git a/src/pkg/bytes/buffer.go b/src/pkg/bytes/buffer.go
index 0920250..6178094 100644
--- a/src/pkg/bytes/buffer.go
+++ b/src/pkg/bytes/buffer.go
@@ -32,9 +32,10 @@
 // with Read and Write methods.
 // The zero value for Buffer is an empty buffer ready to use.
 type Buffer struct {
-	buf	[]byte;		// contents are the bytes buf[off : len(buf)]
-	off	int;		// read at &buf[off], write at &buf[len(buf)]
-	oneByte	[1]byte;	// avoid allocation of slice on each WriteByte
+	buf		[]byte;		// contents are the bytes buf[off : len(buf)]
+	off		int;		// read at &buf[off], write at &buf[len(buf)]
+	oneByte		[1]byte;	// avoid allocation of slice on each WriteByte
+	bootstrap	[64]byte;	// memory to hold first slice; helps small buffers (Printf) avoid allocation.
 }
 
 // Bytes returns the contents of the unread portion of the buffer;
@@ -69,29 +70,51 @@
 // b.Reset() is the same as b.Truncate(0).
 func (b *Buffer) Reset()	{ b.Truncate(0) }
 
+// Resize buffer to guarantee enough space for n more bytes.
+// After this call, the state of b.buf is inconsistent.
+// It must be fixed up as is done in Write and WriteString.
+func (b *Buffer) resize(n int) {
+	var buf []byte;
+	if b.buf == nil && n <= len(b.bootstrap) {
+		buf = &b.bootstrap
+	} else {
+		buf = b.buf;
+		if len(b.buf)+n > cap(b.buf) {
+			// not enough space anywhere
+			buf = make([]byte, 2*cap(b.buf)+n)
+		}
+		copy(buf, b.buf[b.off:]);
+	}
+	b.buf = buf;
+	b.off = 0;
+}
+
 // Write appends the contents of p to the buffer.  The return
 // value n is the length of p; err is always nil.
 func (b *Buffer) Write(p []byte) (n int, err os.Error) {
 	m := b.Len();
 	n = len(p);
-
 	if len(b.buf)+n > cap(b.buf) {
-		// not enough space at end
-		buf := b.buf;
-		if m+n > cap(b.buf) {
-			// not enough space anywhere
-			buf = make([]byte, 2*cap(b.buf)+n)
-		}
-		copyBytes(buf, 0, b.buf[b.off:b.off+m]);
-		b.buf = buf;
-		b.off = 0;
+		b.resize(n)
 	}
-
 	b.buf = b.buf[0 : b.off+m+n];
 	copyBytes(b.buf, b.off+m, p);
 	return n, nil;
 }
 
+// WriteString appends the contents of s to the buffer.  The return
+// value n is the length of s; err is always nil.
+func (b *Buffer) WriteString(s string) (n int, err os.Error) {
+	m := b.Len();
+	n = len(s);
+	if len(b.buf)+n > cap(b.buf) {
+		b.resize(n)
+	}
+	b.buf = b.buf[0 : b.off+m+n];
+	copyString(b.buf, b.off+m, s);
+	return n, nil;
+}
+
 // MinRead is the minimum slice size passed to a Read call by
 // Buffer.ReadFrom.  As long as the Buffer has at least MinRead bytes beyond
 // what is required to hold the contents of r, ReadFrom will not grow the
@@ -146,29 +169,6 @@
 	return;
 }
 
-// WriteString appends the contents of s to the buffer.  The return
-// value n is the length of s; err is always nil.
-func (b *Buffer) WriteString(s string) (n int, err os.Error) {
-	m := b.Len();
-	n = len(s);
-
-	if len(b.buf)+n > cap(b.buf) {
-		// not enough space at end
-		buf := b.buf;
-		if m+n > cap(b.buf) {
-			// not enough space anywhere
-			buf = make([]byte, 2*cap(b.buf)+n)
-		}
-		copyBytes(buf, 0, b.buf[b.off:b.off+m]);
-		b.buf = buf;
-		b.off = 0;
-	}
-
-	b.buf = b.buf[0 : b.off+m+n];
-	copyString(b.buf, b.off+m, s);
-	return n, nil;
-}
-
 // WriteByte appends the byte c to the buffer.
 // The returned error is always nil, but is included
 // to match bufio.Writer's WriteByte.