Make printing faster by avoiding mallocs and some other advances.
Roughly 33% faster for simple cases, probably more for complex ones.

Before:

mallocs per Sprintf(""): 4
mallocs per Sprintf("xxx"): 6
mallocs per Sprintf("%x"): 10
mallocs per Sprintf("%x %x"): 12

Now:

mallocs per Sprintf(""): 2
mallocs per Sprintf("xxx"): 3
mallocs per Sprintf("%x"): 5
mallocs per Sprintf("%x %x"): 7

Speed improves because of avoiding mallocs and also by sharing a bytes.Buffer
between print.go and format.go rather than copying the data back after each
printed item.

Before:

fmt_test.BenchmarkSprintfEmpty	1000000	      1346 ns/op
fmt_test.BenchmarkSprintfString	500000	      3461 ns/op
fmt_test.BenchmarkSprintfInt	500000	      3671 ns/op

Now:

fmt_test.BenchmarkSprintfEmpty	 2000000	       995 ns/op
fmt_test.BenchmarkSprintfString	 1000000	      2745 ns/op
fmt_test.BenchmarkSprintfInt	 1000000	      2391 ns/op
fmt_test.BenchmarkSprintfIntInt	  500000	      3751 ns/op

I believe there is more to get but this is a good milestone.

R=rsc
CC=golang-dev, hong
https://golang.org/cl/166076
diff --git a/src/pkg/bytes/buffer.go b/src/pkg/bytes/buffer.go
index 0920250..6178094 100644
--- a/src/pkg/bytes/buffer.go
+++ b/src/pkg/bytes/buffer.go
@@ -32,9 +32,10 @@
 // with Read and Write methods.
 // The zero value for Buffer is an empty buffer ready to use.
 type Buffer struct {
-	buf	[]byte;		// contents are the bytes buf[off : len(buf)]
-	off	int;		// read at &buf[off], write at &buf[len(buf)]
-	oneByte	[1]byte;	// avoid allocation of slice on each WriteByte
+	buf		[]byte;		// contents are the bytes buf[off : len(buf)]
+	off		int;		// read at &buf[off], write at &buf[len(buf)]
+	oneByte		[1]byte;	// avoid allocation of slice on each WriteByte
+	bootstrap	[64]byte;	// memory to hold first slice; helps small buffers (Printf) avoid allocation.
 }
 
 // Bytes returns the contents of the unread portion of the buffer;
@@ -69,29 +70,51 @@
 // b.Reset() is the same as b.Truncate(0).
 func (b *Buffer) Reset()	{ b.Truncate(0) }
 
+// Resize buffer to guarantee enough space for n more bytes.
+// After this call, the state of b.buf is inconsistent.
+// It must be fixed up as is done in Write and WriteString.
+func (b *Buffer) resize(n int) {
+	var buf []byte;
+	if b.buf == nil && n <= len(b.bootstrap) {
+		buf = &b.bootstrap
+	} else {
+		buf = b.buf;
+		if len(b.buf)+n > cap(b.buf) {
+			// not enough space anywhere
+			buf = make([]byte, 2*cap(b.buf)+n)
+		}
+		copy(buf, b.buf[b.off:]);
+	}
+	b.buf = buf;
+	b.off = 0;
+}
+
 // Write appends the contents of p to the buffer.  The return
 // value n is the length of p; err is always nil.
 func (b *Buffer) Write(p []byte) (n int, err os.Error) {
 	m := b.Len();
 	n = len(p);
-
 	if len(b.buf)+n > cap(b.buf) {
-		// not enough space at end
-		buf := b.buf;
-		if m+n > cap(b.buf) {
-			// not enough space anywhere
-			buf = make([]byte, 2*cap(b.buf)+n)
-		}
-		copyBytes(buf, 0, b.buf[b.off:b.off+m]);
-		b.buf = buf;
-		b.off = 0;
+		b.resize(n)
 	}
-
 	b.buf = b.buf[0 : b.off+m+n];
 	copyBytes(b.buf, b.off+m, p);
 	return n, nil;
 }
 
+// WriteString appends the contents of s to the buffer.  The return
+// value n is the length of s; err is always nil.
+func (b *Buffer) WriteString(s string) (n int, err os.Error) {
+	m := b.Len();
+	n = len(s);
+	if len(b.buf)+n > cap(b.buf) {
+		b.resize(n)
+	}
+	b.buf = b.buf[0 : b.off+m+n];
+	copyString(b.buf, b.off+m, s);
+	return n, nil;
+}
+
 // MinRead is the minimum slice size passed to a Read call by
 // Buffer.ReadFrom.  As long as the Buffer has at least MinRead bytes beyond
 // what is required to hold the contents of r, ReadFrom will not grow the
@@ -146,29 +169,6 @@
 	return;
 }
 
-// WriteString appends the contents of s to the buffer.  The return
-// value n is the length of s; err is always nil.
-func (b *Buffer) WriteString(s string) (n int, err os.Error) {
-	m := b.Len();
-	n = len(s);
-
-	if len(b.buf)+n > cap(b.buf) {
-		// not enough space at end
-		buf := b.buf;
-		if m+n > cap(b.buf) {
-			// not enough space anywhere
-			buf = make([]byte, 2*cap(b.buf)+n)
-		}
-		copyBytes(buf, 0, b.buf[b.off:b.off+m]);
-		b.buf = buf;
-		b.off = 0;
-	}
-
-	b.buf = b.buf[0 : b.off+m+n];
-	copyString(b.buf, b.off+m, s);
-	return n, nil;
-}
-
 // WriteByte appends the byte c to the buffer.
 // The returned error is always nil, but is included
 // to match bufio.Writer's WriteByte.
diff --git a/src/pkg/fmt/fmt_test.go b/src/pkg/fmt/fmt_test.go
index 0556c5d..51a159f 100644
--- a/src/pkg/fmt/fmt_test.go
+++ b/src/pkg/fmt/fmt_test.go
@@ -7,6 +7,7 @@
 import (
 	. "fmt";
 	"io";
+	"malloc";	// for the malloc count test only
 	"math";
 	"strings";
 	"testing";
@@ -242,7 +243,7 @@
 			if _, ok := tt.val.(string); ok {
 				// Don't requote the already-quoted strings.
 				// It's too confusing to read the errors.
-				t.Errorf("Sprintf(%q, %q) = %s want %s", tt.fmt, tt.val, s, tt.out)
+				t.Errorf("Sprintf(%q, %q) = <%s> want <%s>", tt.fmt, tt.val, s, tt.out)
 			} else {
 				t.Errorf("Sprintf(%q, %v) = %q want %q", tt.fmt, tt.val, s, tt.out)
 			}
@@ -268,6 +269,39 @@
 	}
 }
 
+func BenchmarkSprintfIntInt(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		Sprintf("%d %d", 5, 6)
+	}
+}
+
+func TestCountMallocs(t *testing.T) {
+	mallocs := 0 - malloc.GetStats().Mallocs;
+	for i := 0; i < 100; i++ {
+		Sprintf("")
+	}
+	mallocs += malloc.GetStats().Mallocs;
+	Printf("mallocs per Sprintf(\"\"): %d\n", mallocs/100);
+	mallocs = 0 - malloc.GetStats().Mallocs;
+	for i := 0; i < 100; i++ {
+		Sprintf("xxx")
+	}
+	mallocs += malloc.GetStats().Mallocs;
+	Printf("mallocs per Sprintf(\"xxx\"): %d\n", mallocs/100);
+	mallocs = 0 - malloc.GetStats().Mallocs;
+	for i := 0; i < 100; i++ {
+		Sprintf("%x", i)
+	}
+	mallocs += malloc.GetStats().Mallocs;
+	Printf("mallocs per Sprintf(\"%%x\"): %d\n", mallocs/100);
+	mallocs = 0 - malloc.GetStats().Mallocs;
+	for i := 0; i < 100; i++ {
+		Sprintf("%x %x", i, i)
+	}
+	mallocs += malloc.GetStats().Mallocs;
+	Printf("mallocs per Sprintf(\"%%x %%x\"): %d\n", mallocs/100);
+}
+
 type flagPrinter struct{}
 
 func (*flagPrinter) Format(f State, c int) {
diff --git a/src/pkg/fmt/format.go b/src/pkg/fmt/format.go
index c7b7b9b..d09f352 100644
--- a/src/pkg/fmt/format.go
+++ b/src/pkg/fmt/format.go
@@ -5,6 +5,7 @@
 package fmt
 
 import (
+	"bytes";
 	"strconv";
 )
 
@@ -16,34 +17,30 @@
 	udigits	= "0123456789ABCDEF";
 )
 
-const padZeros = "0000000000000000000000000000000000000000000000000000000000000000"
-const padSpaces = "                                                                "
+var padZeroBytes = make([]byte, nByte)
+var padSpaceBytes = make([]byte, nByte)
+
+var newline = []byte{'\n'}
 
 func init() {
-	if len(padZeros) != nByte || len(padSpaces) != nByte {
-		panic("fmt padding wrong length")
+	for i := 0; i < nByte; i++ {
+		padZeroBytes[i] = '0';
+		padSpaceBytes[i] = ' ';
 	}
 }
 
 /*
 	Fmt is the raw formatter used by Printf etc.  Not meant for normal use.
+	It prints into a bytes.Buffer that must be set up externally.
 	See print.go for a more palatable interface.
-
-	The model is to accumulate operands into an internal buffer and then
-	retrieve the buffer in one hit using Str(), Putnl(), etc.  The formatting
-	methods return ``self'' so the operations can be chained.
-
-		f := fmt.New();
-		print(f.Fmt_d(1234).Fmt_s("\n").Str());  // create string, print it
-		f.Fmt_d(-1234).Fmt_s("\n").Put();  // print string
-		f.Fmt_ud(1<<63).Putnl();  // print string with automatic newline
 */
 type Fmt struct {
-	buf		string;
+	intbuf		[nByte]byte;
+	buf		*bytes.Buffer;
 	wid		int;
-	wid_present	bool;
+	widPresent	bool;
 	prec		int;
-	prec_present	bool;
+	precPresent	bool;
 	// flags
 	minus	bool;
 	plus	bool;
@@ -52,11 +49,11 @@
 	zero	bool;
 }
 
-func (f *Fmt) clearflags() {
+func (f *Fmt) ClearFlags() {
 	f.wid = 0;
-	f.wid_present = false;
+	f.widPresent = false;
 	f.prec = 0;
-	f.prec_present = false;
+	f.precPresent = false;
 	f.minus = false;
 	f.plus = false;
 	f.sharp = false;
@@ -64,94 +61,98 @@
 	f.zero = false;
 }
 
-func (f *Fmt) clearbuf()	{ f.buf = "" }
-
-func (f *Fmt) init() {
-	f.clearbuf();
-	f.clearflags();
+func (f *Fmt) Init(buf *bytes.Buffer) {
+	f.buf = buf;
+	f.ClearFlags();
 }
 
-// New returns a new initialized Fmt
-func New() *Fmt {
-	f := new(Fmt);
-	f.init();
-	return f;
-}
-
-// Str returns the buffered contents as a string and resets the Fmt.
-func (f *Fmt) Str() string {
-	s := f.buf;
-	f.clearbuf();
-	f.clearflags();
-	f.buf = "";
-	return s;
-}
-
-// Put writes the buffered contents to stdout and resets the Fmt.
-func (f *Fmt) Put() {
-	print(f.buf);
-	f.clearbuf();
-	f.clearflags();
-}
-
-// Putnl writes the buffered contents to stdout, followed by a newline, and resets the Fmt.
-func (f *Fmt) Putnl() {
-	print(f.buf, "\n");
-	f.clearbuf();
-	f.clearflags();
-}
+func (f *Fmt) Reset()	{ f.ClearFlags() }
 
 // Wp sets the width and precision for formatting the next item.
-func (f *Fmt) Wp(w, p int) *Fmt {
-	f.wid_present = true;
+func (f *Fmt) Wp(w, p int) {
+	f.widPresent = true;
 	f.wid = w;
-	f.prec_present = true;
+	f.precPresent = true;
 	f.prec = p;
-	return f;
 }
 
 // P sets the precision for formatting the next item.
-func (f *Fmt) P(p int) *Fmt {
-	f.prec_present = true;
+func (f *Fmt) P(p int) {
+	f.precPresent = true;
 	f.prec = p;
-	return f;
 }
 
 // W sets the width for formatting the next item.
-func (f *Fmt) W(x int) *Fmt {
-	f.wid_present = true;
+func (f *Fmt) W(x int) {
+	f.widPresent = true;
 	f.wid = x;
-	return f;
+}
+
+// Compute left and right padding widths (only one will be non-zero).
+func (f *Fmt) computePadding(width int) (padding []byte, leftWidth, rightWidth int) {
+	left := !f.minus;
+	w := f.wid;
+	if w < 0 {
+		left = false;
+		w = -w;
+	}
+	w -= width;
+	if w > 0 {
+		if left && f.zero {
+			return padZeroBytes, w, 0
+		}
+		if left {
+			return padSpaceBytes, w, 0
+		} else {
+			// can't be zero padding on the right
+			return padSpaceBytes, 0, w
+		}
+	}
+	return;
+}
+
+// Generate n bytes of padding.
+func (f *Fmt) writePadding(n int, padding []byte) {
+	for n > 0 {
+		m := n;
+		if m > nByte {
+			m = nByte
+		}
+		f.buf.Write(padding[0:m]);
+		n -= m;
+	}
+}
+
+// Append b to f.buf, padded on left (w > 0) or right (w < 0 or f.minus)
+func (f *Fmt) padBytes(b []byte) {
+	var padding []byte;
+	var left, right int;
+	if f.widPresent && f.wid != 0 {
+		padding, left, right = f.computePadding(len(b))
+	}
+	if left > 0 {
+		f.writePadding(left, padding)
+	}
+	f.buf.Write(b);
+	if right > 0 {
+		f.writePadding(right, padding)
+	}
 }
 
 // append s to buf, padded on left (w > 0) or right (w < 0 or f.minus)
-// padding is in bytes, not characters (agrees with ANSIC C, not Plan 9 C)
 func (f *Fmt) pad(s string) {
-	if f.wid_present && f.wid != 0 {
-		left := !f.minus;
-		w := f.wid;
-		if w < 0 {
-			left = false;
-			w = -w;
-		}
-		w -= len(s);
-		padding := padSpaces;
-		if left && f.zero {
-			padding = padZeros
-		}
-		if w > 0 {
-			if w > nByte {
-				w = nByte
-			}
-			padding = padding[0:w];
-			if left {
-				s = padding + s
-			} else {
-				s += padding
-			}
-		}
+	var padding []byte;
+	var left, right int;
+	if f.widPresent && f.wid != 0 {
+		padding, left, right = f.computePadding(len(s))
 	}
-	f.buf += s;
+	if left > 0 {
+		f.writePadding(left, padding)
+	}
+	f.buf.WriteString(s);
+	if right > 0 {
+		f.writePadding(right, padding)
+	}
 }
 
 // format val into buf, ending at buf[i].  (printing is easier right-to-left;
@@ -171,19 +172,18 @@
 }
 
 // Fmt_boolean formats a boolean.
-func (f *Fmt) Fmt_boolean(v bool) *Fmt {
+func (f *Fmt) Fmt_boolean(v bool) {
 	if v {
 		f.pad("true")
 	} else {
 		f.pad("false")
 	}
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // integer; interprets prec but not wid.
-func (f *Fmt) integer(a int64, base uint, is_signed bool, digits string) string {
-	var buf [nByte]byte;
+func (f *Fmt) integer(a int64, base uint, is_signed bool, digits string) []byte {
+	var buf []byte = &f.intbuf;
 	negative := is_signed && a < 0;
 	if negative {
 		a = -a
@@ -192,17 +192,17 @@
 	// two ways to ask for extra leading zero digits: %.3d or %03d.
 	// apparently the first cancels the second.
 	prec := 0;
-	if f.prec_present {
+	if f.precPresent {
 		prec = f.prec;
 		f.zero = false;
-	} else if f.zero && f.wid_present && !f.minus && f.wid > 0 {
+	} else if f.zero && f.widPresent && !f.minus && f.wid > 0 {
 		prec = f.wid;
 		if negative || f.plus || f.space {
 			prec--	// leave room for sign
 		}
 	}
 
-	i := putint(&buf, uint64(base), uint64(a), digits);
+	i := putint(buf, uint64(base), uint64(a), digits);
 	for i > 0 && prec > (nByte-1-i) {
 		buf[i] = '0';
 		i--;
@@ -233,147 +233,137 @@
 		buf[i] = ' ';
 		i--;
 	}
-	return string(buf[i+1 : nByte]);
+	return buf[i+1 : nByte];
 }
 
 // Fmt_d64 formats an int64 in decimal.
-func (f *Fmt) Fmt_d64(v int64) *Fmt {
-	f.pad(f.integer(v, 10, true, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_d64(v int64) {
+	f.padBytes(f.integer(v, 10, true, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_d32 formats an int32 in decimal.
-func (f *Fmt) Fmt_d32(v int32) *Fmt	{ return f.Fmt_d64(int64(v)) }
+func (f *Fmt) Fmt_d32(v int32)	{ f.Fmt_d64(int64(v)) }
 
 // Fmt_d formats an int in decimal.
-func (f *Fmt) Fmt_d(v int) *Fmt	{ return f.Fmt_d64(int64(v)) }
+func (f *Fmt) Fmt_d(v int)	{ f.Fmt_d64(int64(v)) }
 
 // Fmt_ud64 formats a uint64 in decimal.
 func (f *Fmt) Fmt_ud64(v uint64) *Fmt {
-	f.pad(f.integer(int64(v), 10, false, ldigits));
-	f.clearflags();
+	f.padBytes(f.integer(int64(v), 10, false, ldigits));
+	f.ClearFlags();
 	return f;
 }
 
 // Fmt_ud32 formats a uint32 in decimal.
-func (f *Fmt) Fmt_ud32(v uint32) *Fmt	{ return f.Fmt_ud64(uint64(v)) }
+func (f *Fmt) Fmt_ud32(v uint32)	{ f.Fmt_ud64(uint64(v)) }
 
 // Fmt_ud formats a uint in decimal.
-func (f *Fmt) Fmt_ud(v uint) *Fmt	{ return f.Fmt_ud64(uint64(v)) }
+func (f *Fmt) Fmt_ud(v uint)	{ f.Fmt_ud64(uint64(v)) }
 
 // Fmt_x64 formats an int64 in hexadecimal.
-func (f *Fmt) Fmt_x64(v int64) *Fmt {
-	f.pad(f.integer(v, 16, true, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_x64(v int64) {
+	f.padBytes(f.integer(v, 16, true, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_x32 formats an int32 in hexadecimal.
-func (f *Fmt) Fmt_x32(v int32) *Fmt	{ return f.Fmt_x64(int64(v)) }
+func (f *Fmt) Fmt_x32(v int32)	{ f.Fmt_x64(int64(v)) }
 
 // Fmt_x formats an int in hexadecimal.
-func (f *Fmt) Fmt_x(v int) *Fmt	{ return f.Fmt_x64(int64(v)) }
+func (f *Fmt) Fmt_x(v int)	{ f.Fmt_x64(int64(v)) }
 
 // Fmt_ux64 formats a uint64 in hexadecimal.
-func (f *Fmt) Fmt_ux64(v uint64) *Fmt {
-	f.pad(f.integer(int64(v), 16, false, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_ux64(v uint64) {
+	f.padBytes(f.integer(int64(v), 16, false, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_ux32 formats a uint32 in hexadecimal.
-func (f *Fmt) Fmt_ux32(v uint32) *Fmt	{ return f.Fmt_ux64(uint64(v)) }
+func (f *Fmt) Fmt_ux32(v uint32)	{ f.Fmt_ux64(uint64(v)) }
 
 // Fmt_ux formats a uint in hexadecimal.
-func (f *Fmt) Fmt_ux(v uint) *Fmt	{ return f.Fmt_ux64(uint64(v)) }
+func (f *Fmt) Fmt_ux(v uint)	{ f.Fmt_ux64(uint64(v)) }
 
 // Fmt_X64 formats an int64 in upper case hexadecimal.
-func (f *Fmt) Fmt_X64(v int64) *Fmt {
-	f.pad(f.integer(v, 16, true, udigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_X64(v int64) {
+	f.padBytes(f.integer(v, 16, true, udigits));
+	f.ClearFlags();
 }
 
 // Fmt_X32 formats an int32 in upper case hexadecimal.
-func (f *Fmt) Fmt_X32(v int32) *Fmt	{ return f.Fmt_X64(int64(v)) }
+func (f *Fmt) Fmt_X32(v int32)	{ f.Fmt_X64(int64(v)) }
 
 // Fmt_X formats an int in upper case hexadecimal.
-func (f *Fmt) Fmt_X(v int) *Fmt	{ return f.Fmt_X64(int64(v)) }
+func (f *Fmt) Fmt_X(v int)	{ f.Fmt_X64(int64(v)) }
 
 // Fmt_uX64 formats a uint64 in upper case hexadecimal.
-func (f *Fmt) Fmt_uX64(v uint64) *Fmt {
-	f.pad(f.integer(int64(v), 16, false, udigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_uX64(v uint64) {
+	f.padBytes(f.integer(int64(v), 16, false, udigits));
+	f.ClearFlags();
 }
 
 // Fmt_uX32 formats a uint32 in upper case hexadecimal.
-func (f *Fmt) Fmt_uX32(v uint32) *Fmt	{ return f.Fmt_uX64(uint64(v)) }
+func (f *Fmt) Fmt_uX32(v uint32)	{ f.Fmt_uX64(uint64(v)) }
 
 // Fmt_uX formats a uint in upper case hexadecimal.
-func (f *Fmt) Fmt_uX(v uint) *Fmt	{ return f.Fmt_uX64(uint64(v)) }
+func (f *Fmt) Fmt_uX(v uint)	{ f.Fmt_uX64(uint64(v)) }
 
 // Fmt_o64 formats an int64 in octal.
-func (f *Fmt) Fmt_o64(v int64) *Fmt {
-	f.pad(f.integer(v, 8, true, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_o64(v int64) {
+	f.padBytes(f.integer(v, 8, true, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_o32 formats an int32 in octal.
-func (f *Fmt) Fmt_o32(v int32) *Fmt	{ return f.Fmt_o64(int64(v)) }
+func (f *Fmt) Fmt_o32(v int32)	{ f.Fmt_o64(int64(v)) }
 
 // Fmt_o formats an int in octal.
-func (f *Fmt) Fmt_o(v int) *Fmt	{ return f.Fmt_o64(int64(v)) }
+func (f *Fmt) Fmt_o(v int)	{ f.Fmt_o64(int64(v)) }
 
 // Fmt_uo64 formats a uint64 in octal.
-func (f *Fmt) Fmt_uo64(v uint64) *Fmt {
-	f.pad(f.integer(int64(v), 8, false, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_uo64(v uint64) {
+	f.padBytes(f.integer(int64(v), 8, false, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_uo32 formats a uint32 in octal.
-func (f *Fmt) Fmt_uo32(v uint32) *Fmt	{ return f.Fmt_uo64(uint64(v)) }
+func (f *Fmt) Fmt_uo32(v uint32)	{ f.Fmt_uo64(uint64(v)) }
 
 // Fmt_uo formats a uint in octal.
-func (f *Fmt) Fmt_uo(v uint) *Fmt	{ return f.Fmt_uo64(uint64(v)) }
+func (f *Fmt) Fmt_uo(v uint)	{ f.Fmt_uo64(uint64(v)) }
 
 // Fmt_b64 formats a uint64 in binary.
-func (f *Fmt) Fmt_b64(v uint64) *Fmt {
-	f.pad(f.integer(int64(v), 2, false, ldigits));
-	f.clearflags();
-	return f;
+func (f *Fmt) Fmt_b64(v uint64) {
+	f.padBytes(f.integer(int64(v), 2, false, ldigits));
+	f.ClearFlags();
 }
 
 // Fmt_b32 formats a uint32 in binary.
-func (f *Fmt) Fmt_b32(v uint32) *Fmt	{ return f.Fmt_b64(uint64(v)) }
+func (f *Fmt) Fmt_b32(v uint32)	{ f.Fmt_b64(uint64(v)) }
 
 // Fmt_b formats a uint in binary.
-func (f *Fmt) Fmt_b(v uint) *Fmt	{ return f.Fmt_b64(uint64(v)) }
+func (f *Fmt) Fmt_b(v uint)	{ f.Fmt_b64(uint64(v)) }
 
 // Fmt_c formats a Unicode character.
-func (f *Fmt) Fmt_c(v int) *Fmt {
+func (f *Fmt) Fmt_c(v int) {
 	f.pad(string(v));
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // Fmt_s formats a string.
-func (f *Fmt) Fmt_s(s string) *Fmt {
-	if f.prec_present {
+func (f *Fmt) Fmt_s(s string) {
+	if f.precPresent {
 		if f.prec < len(s) {
 			s = s[0:f.prec]
 		}
 	}
 	f.pad(s);
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // Fmt_sx formats a string as a hexadecimal encoding of its bytes.
-func (f *Fmt) Fmt_sx(s string) *Fmt {
+func (f *Fmt) Fmt_sx(s string) {
 	t := "";
 	for i := 0; i < len(s); i++ {
 		if i > 0 && f.space {
@@ -384,12 +374,11 @@
 		t += string(ldigits[v&0xF]);
 	}
 	f.pad(t);
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // Fmt_sX formats a string as an uppercase hexadecimal encoding of its bytes.
-func (f *Fmt) Fmt_sX(s string) *Fmt {
+func (f *Fmt) Fmt_sX(s string) {
 	t := "";
 	for i := 0; i < len(s); i++ {
 		v := s[i];
@@ -397,12 +386,11 @@
 		t += string(udigits[v&0xF]);
 	}
 	f.pad(t);
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // Fmt_q formats a string as a double-quoted, escaped Go string constant.
-func (f *Fmt) Fmt_q(s string) *Fmt {
+func (f *Fmt) Fmt_q(s string) {
 	var quoted string;
 	if f.sharp && strconv.CanBackquote(s) {
 		quoted = "`" + s + "`"
@@ -410,27 +398,25 @@
 		quoted = strconv.Quote(s)
 	}
 	f.pad(quoted);
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // floating-point
 
 func doPrec(f *Fmt, def int) int {
-	if f.prec_present {
+	if f.precPresent {
 		return f.prec
 	}
 	return def;
 }
 
-func fmtString(f *Fmt, s string) *Fmt {
+func fmtString(f *Fmt, s string) {
 	f.pad(s);
-	f.clearflags();
-	return f;
+	f.ClearFlags();
 }
 
 // Add a plus sign or space to the string if missing and required.
-func (f *Fmt) plusSpace(s string) *Fmt {
+func (f *Fmt) plusSpace(s string) {
 	if s[0] != '-' {
 		if f.plus {
 			s = "+" + s
@@ -438,94 +424,78 @@
 			s = " " + s
 		}
 	}
-	return fmtString(f, s);
+	fmtString(f, s);
 }
 
 // Fmt_e64 formats a float64 in the form -1.23e+12.
-func (f *Fmt) Fmt_e64(v float64) *Fmt {
-	return f.plusSpace(strconv.Ftoa64(v, 'e', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_e64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'e', doPrec(f, 6))) }
 
 // Fmt_E64 formats a float64 in the form -1.23E+12.
-func (f *Fmt) Fmt_E64(v float64) *Fmt {
-	return f.plusSpace(strconv.Ftoa64(v, 'E', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_E64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'E', doPrec(f, 6))) }
 
 // Fmt_f64 formats a float64 in the form -1.23.
-func (f *Fmt) Fmt_f64(v float64) *Fmt {
-	return f.plusSpace(strconv.Ftoa64(v, 'f', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_f64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'f', doPrec(f, 6))) }
 
 // Fmt_g64 formats a float64 in the 'f' or 'e' form according to size.
-func (f *Fmt) Fmt_g64(v float64) *Fmt {
-	return f.plusSpace(strconv.Ftoa64(v, 'g', doPrec(f, -1)))
-}
+func (f *Fmt) Fmt_g64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'g', doPrec(f, -1))) }
 
 // Fmt_g64 formats a float64 in the 'f' or 'E' form according to size.
-func (f *Fmt) Fmt_G64(v float64) *Fmt {
-	return f.plusSpace(strconv.Ftoa64(v, 'G', doPrec(f, -1)))
-}
+func (f *Fmt) Fmt_G64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'G', doPrec(f, -1))) }
 
 // Fmt_fb64 formats a float64 in the form -123p3 (exponent is power of 2).
-func (f *Fmt) Fmt_fb64(v float64) *Fmt	{ return f.plusSpace(strconv.Ftoa64(v, 'b', 0)) }
+func (f *Fmt) Fmt_fb64(v float64)	{ f.plusSpace(strconv.Ftoa64(v, 'b', 0)) }
 
 // float32
 // cannot defer to float64 versions
 // because it will get rounding wrong in corner cases.
 
 // Fmt_e32 formats a float32 in the form -1.23e+12.
-func (f *Fmt) Fmt_e32(v float32) *Fmt {
-	return f.plusSpace(strconv.Ftoa32(v, 'e', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_e32(v float32)	{ f.plusSpace(strconv.Ftoa32(v, 'e', doPrec(f, 6))) }
 
 // Fmt_E32 formats a float32 in the form -1.23E+12.
-func (f *Fmt) Fmt_E32(v float32) *Fmt {
-	return f.plusSpace(strconv.Ftoa32(v, 'E', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_E32(v float32)	{ f.plusSpace(strconv.Ftoa32(v, 'E', doPrec(f, 6))) }
 
 // Fmt_f32 formats a float32 in the form -1.23.
-func (f *Fmt) Fmt_f32(v float32) *Fmt {
-	return f.plusSpace(strconv.Ftoa32(v, 'f', doPrec(f, 6)))
-}
+func (f *Fmt) Fmt_f32(v float32)	{ f.plusSpace(strconv.Ftoa32(v, 'f', doPrec(f, 6))) }
 
 // Fmt_g32 formats a float32 in the 'f' or 'e' form according to size.
-func (f *Fmt) Fmt_g32(v float32) *Fmt {
-	return f.plusSpace(strconv.Ftoa32(v, 'g', doPrec(f, -1)))
-}
+func (f *Fmt) Fmt_g32(v float32)	{ f.plusSpace(strconv.Ftoa32(v, 'g', doPrec(f, -1))) }
 
 // Fmt_G32 formats a float32 in the 'f' or 'E' form according to size.
-func (f *Fmt) Fmt_G32(v float32) *Fmt {
-	return f.plusSpace(strconv.Ftoa32(v, 'G', doPrec(f, -1)))
-}
+func (f *Fmt) Fmt_G32(v float32)	{ f.plusSpace(strconv.Ftoa32(v, 'G', doPrec(f, -1))) }
 
 // Fmt_fb32 formats a float32 in the form -123p3 (exponent is power of 2).
-func (f *Fmt) Fmt_fb32(v float32) *Fmt	{ return fmtString(f, strconv.Ftoa32(v, 'b', 0)) }
+func (f *Fmt) Fmt_fb32(v float32)	{ fmtString(f, strconv.Ftoa32(v, 'b', 0)) }
 
 // float
-func (x *Fmt) f(a float) *Fmt {
+func (x *Fmt) f(a float) {
 	if strconv.FloatSize == 32 {
-		return x.Fmt_f32(float32(a))
+		x.Fmt_f32(float32(a))
+	} else {
+		x.Fmt_f64(float64(a))
 	}
-	return x.Fmt_f64(float64(a));
 }
 
-func (x *Fmt) e(a float) *Fmt {
+func (x *Fmt) e(a float) {
 	if strconv.FloatSize == 32 {
-		return x.Fmt_e32(float32(a))
+		x.Fmt_e32(float32(a))
+	} else {
+		x.Fmt_e64(float64(a))
 	}
-	return x.Fmt_e64(float64(a));
 }
 
-func (x *Fmt) g(a float) *Fmt {
+func (x *Fmt) g(a float) {
 	if strconv.FloatSize == 32 {
-		return x.Fmt_g32(float32(a))
+		x.Fmt_g32(float32(a))
+	} else {
+		x.Fmt_g64(float64(a))
 	}
-	return x.Fmt_g64(float64(a));
 }
 
-func (x *Fmt) fb(a float) *Fmt {
+func (x *Fmt) fb(a float) {
 	if strconv.FloatSize == 32 {
-		return x.Fmt_fb32(float32(a))
+		x.Fmt_fb32(float32(a))
+	} else {
+		x.Fmt_fb64(float64(a))
 	}
-	return x.Fmt_fb64(float64(a));
 }
diff --git a/src/pkg/fmt/print.go b/src/pkg/fmt/print.go
index cecdda0..bf54b64 100644
--- a/src/pkg/fmt/print.go
+++ b/src/pkg/fmt/print.go
@@ -77,12 +77,27 @@
 
 
 import (
+	"bytes";
 	"io";
 	"os";
 	"reflect";
 	"utf8";
 )
 
+// Some constants in the form of bytes, to avoid string overhead.
+// Needlessly fastidious, I suppose.
+var (
+	trueBytes	= []byte{'t', 'r', 'u', 'e'};
+	falseBytes	= []byte{'f', 'a', 'l', 's', 'e'};
+	commaSpaceBytes	= []byte{',', ' '};
+	nilAngleBytes	= []byte{'<', 'n', 'i', 'l', '>'};
+	nilParenBytes	= []byte{'(', 'n', 'i', 'l', ')'};
+	nilBytes	= []byte{'n', 'i', 'l'};
+	mapBytes	= []byte{'m', 'a', 'p', '['};
+	missingBytes	= []byte{'m', 'i', 's', 's', 'i', 'n', 'g'};
+	extraBytes	= []byte{'?', '(', 'e', 'x', 't', 'r', 'a', ' '};
+)
+
 // State represents the printer state passed to custom formatters.
 // It provides access to the io.Writer interface plus information about
 // the flags and options for the operand's format specifier.
@@ -126,19 +141,29 @@
 
 type pp struct {
 	n	int;
-	buf	[]byte;
-	fmt	*Fmt;
+	buf	bytes.Buffer;
+	runeBuf	[utf8.UTFMax]byte;
+	fmt	Fmt;
 }
 
+// A leaky bucket of reusable pp structures.
+var ppFree = make(chan *pp, 100)
+
 func newPrinter() *pp {
-	p := new(pp);
-	p.fmt = New();
+	p, ok := <-ppFree;
+	if !ok {
+		p = new(pp)
+	}
+	p.buf.Reset();
+	p.fmt.Init(&p.buf);
 	return p;
 }
 
-func (p *pp) Width() (wid int, ok bool)	{ return p.fmt.wid, p.fmt.wid_present }
+func (p *pp) free()	{ _ = ppFree <- p }
 
-func (p *pp) Precision() (prec int, ok bool)	{ return p.fmt.prec, p.fmt.prec_present }
+func (p *pp) Width() (wid int, ok bool)	{ return p.fmt.wid, p.fmt.widPresent }
+
+func (p *pp) Precision() (prec int, ok bool)	{ return p.fmt.prec, p.fmt.precPresent }
 
 func (p *pp) Flag(b int) bool {
 	switch b {
@@ -156,52 +181,19 @@
 	return false;
 }
 
-func (p *pp) ensure(n int) {
-	if len(p.buf) < n {
-		newn := allocSize + len(p.buf);
-		if newn < n {
-			newn = n + allocSize
-		}
-		b := make([]byte, newn);
-		for i := 0; i < p.n; i++ {
-			b[i] = p.buf[i]
-		}
-		p.buf = b;
-	}
-}
-
-func (p *pp) addstr(s string) {
-	n := len(s);
-	p.ensure(p.n + n);
-	for i := 0; i < n; i++ {
-		p.buf[p.n] = s[i];
-		p.n++;
-	}
-}
-
-func (p *pp) addbytes(b []byte, start, end int) {
-	p.ensure(p.n + end - start);
-	for i := start; i < end; i++ {
-		p.buf[p.n] = b[i];
-		p.n++;
-	}
-}
-
 func (p *pp) add(c int) {
-	p.ensure(p.n + 1);
 	if c < runeSelf {
-		p.buf[p.n] = byte(c);
-		p.n++;
+		p.buf.WriteByte(byte(c))
 	} else {
-		p.addstr(string(c))
+		w := utf8.EncodeRune(c, &p.runeBuf);
+		p.buf.Write(p.runeBuf[0:w]);
 	}
 }
 
-// Implement Write so we can call fprintf on a P, for
+// Implement Write so we can call Fprintf on a pp (through State), for
 // recursive use in custom verbs.
 func (p *pp) Write(b []byte) (ret int, err os.Error) {
-	p.addbytes(b, 0, len(b));
-	return len(b), nil;
+	return p.buf.Write(b)
 }
 
 // These routines end in 'f' and take a format string.
@@ -211,8 +203,9 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprintf(format, v);
-	n, error = w.Write(p.buf[0:p.n]);
-	return n, error;
+	n64, error := p.buf.WriteTo(w);
+	p.free();
+	return int(n64), error;
 }
 
 // Printf formats according to a format specifier and writes to standard output.
@@ -226,7 +219,8 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprintf(format, v);
-	s := string(p.buf)[0:p.n];
+	s := p.buf.String();
+	p.free();
 	return s;
 }
 
@@ -238,8 +232,9 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprint(v, false, false);
-	n, error = w.Write(p.buf[0:p.n]);
-	return n, error;
+	n64, error := p.buf.WriteTo(w);
+	p.free();
+	return int(n64), error;
 }
 
 // Print formats using the default formats for its operands and writes to standard output.
@@ -255,8 +250,8 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprint(v, false, false);
-	s := string(p.buf)[0:p.n];
-	return s;
+	p.free();
+	return p.buf.String();
 }
 
 // These routines end in 'ln', do not take a format string,
@@ -269,8 +264,9 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprint(v, true, true);
-	n, error = w.Write(p.buf[0:p.n]);
-	return n, error;
+	n64, error := p.buf.WriteTo(w);
+	p.free();
+	return int(n64), error;
 }
 
 // Println formats using the default formats for its operands and writes to standard output.
@@ -286,7 +282,8 @@
 	v := reflect.NewValue(a).(*reflect.StructValue);
 	p := newPrinter();
 	p.doprint(v, true, true);
-	s := string(p.buf)[0:p.n];
+	s := p.buf.String();
+	p.free();
 	return s;
 }
 
@@ -409,121 +406,120 @@
 		switch {
 		default:
 			if stringer, ok := inter.(Stringer); ok {
-				p.addstr(stringer.String());
+				p.buf.WriteString(stringer.String());
 				return false;	// this value is not a string
 			}
 		case sharp:
 			if stringer, ok := inter.(GoStringer); ok {
-				p.addstr(stringer.GoString());
+				p.buf.WriteString(stringer.GoString());
 				return false;	// this value is not a string
 			}
 		}
 	}
-	s := "";
 BigSwitch:
 	switch f := field.(type) {
 	case *reflect.BoolValue:
-		s = p.fmt.Fmt_boolean(f.Get()).Str()
+		p.fmt.Fmt_boolean(f.Get())
 	case *reflect.Float32Value:
-		s = p.fmt.Fmt_g32(f.Get()).Str()
+		p.fmt.Fmt_g32(f.Get())
 	case *reflect.Float64Value:
-		s = p.fmt.Fmt_g64(f.Get()).Str()
+		p.fmt.Fmt_g64(f.Get())
 	case *reflect.FloatValue:
 		if field.Type().Size()*8 == 32 {
-			s = p.fmt.Fmt_g32(float32(f.Get())).Str()
+			p.fmt.Fmt_g32(float32(f.Get()))
 		} else {
-			s = p.fmt.Fmt_g64(float64(f.Get())).Str()
+			p.fmt.Fmt_g64(float64(f.Get()))
 		}
 	case *reflect.StringValue:
 		if sharp {
-			s = p.fmt.Fmt_q(f.Get()).Str()
+			p.fmt.Fmt_q(f.Get())
 		} else {
-			s = p.fmt.Fmt_s(f.Get()).Str();
+			p.fmt.Fmt_s(f.Get());
 			was_string = true;
 		}
 	case *reflect.MapValue:
 		if sharp {
-			p.addstr(field.Type().String());
-			p.addstr("{");
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte('{');
 		} else {
-			p.addstr("map[")
+			p.buf.Write(mapBytes)
 		}
 		keys := f.Keys();
 		for i, key := range keys {
 			if i > 0 {
 				if sharp {
-					p.addstr(", ")
+					p.buf.Write(commaSpaceBytes)
 				} else {
-					p.addstr(" ")
+					p.buf.WriteByte(' ')
 				}
 			}
 			p.printField(key, plus, sharp, depth+1);
-			p.addstr(":");
+			p.buf.WriteByte(':');
 			p.printField(f.Elem(key), plus, sharp, depth+1);
 		}
 		if sharp {
-			p.addstr("}")
+			p.buf.WriteByte('}')
 		} else {
-			p.addstr("]")
+			p.buf.WriteByte(']')
 		}
 	case *reflect.StructValue:
 		if sharp {
-			p.addstr(field.Type().String())
+			p.buf.WriteString(field.Type().String())
 		}
 		p.add('{');
 		v := f;
 		t := v.Type().(*reflect.StructType);
-		p.fmt.clearflags();	// clear flags for p.printField
+		p.fmt.ClearFlags();	// clear flags for p.printField
 		for i := 0; i < v.NumField(); i++ {
 			if i > 0 {
 				if sharp {
-					p.addstr(", ")
+					p.buf.Write(commaSpaceBytes)
 				} else {
-					p.addstr(" ")
+					p.buf.WriteByte(' ')
 				}
 			}
 			if plus || sharp {
 				if f := t.Field(i); f.Name != "" {
-					p.addstr(f.Name);
-					p.add(':');
+					p.buf.WriteString(f.Name);
+					p.buf.WriteByte(':');
 				}
 			}
 			p.printField(getField(v, i), plus, sharp, depth+1);
 		}
-		p.addstr("}");
+		p.buf.WriteByte('}');
 	case *reflect.InterfaceValue:
 		value := f.Elem();
 		if value == nil {
 			if sharp {
-				p.addstr(field.Type().String());
-				p.addstr("(nil)");
+				p.buf.WriteString(field.Type().String());
+				p.buf.Write(nilParenBytes);
 			} else {
-				s = "<nil>"
+				p.buf.Write(nilAngleBytes)
 			}
 		} else {
 			return p.printField(value, plus, sharp, depth+1)
 		}
 	case reflect.ArrayOrSliceValue:
 		if sharp {
-			p.addstr(field.Type().String());
-			p.addstr("{");
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte('{');
 		} else {
-			p.addstr("[")
+			p.buf.WriteByte('[')
 		}
 		for i := 0; i < f.Len(); i++ {
 			if i > 0 {
 				if sharp {
-					p.addstr(", ")
+					p.buf.Write(commaSpaceBytes)
 				} else {
-					p.addstr(" ")
+					p.buf.WriteByte(' ')
 				}
 			}
 			p.printField(f.Elem(i), plus, sharp, depth+1);
 		}
 		if sharp {
-			p.addstr("}")
+			p.buf.WriteByte('}')
 		} else {
-			p.addstr("]")
+			p.buf.WriteByte(']')
 		}
 	case *reflect.PtrValue:
 		v := f.Get();
@@ -532,86 +528,92 @@
 		if v != 0 && depth == 0 {
 			switch a := f.Elem().(type) {
 			case reflect.ArrayOrSliceValue:
-				p.addstr("&");
+				p.buf.WriteByte('&');
 				p.printField(a, plus, sharp, depth+1);
 				break BigSwitch;
 			case *reflect.StructValue:
-				p.addstr("&");
+				p.buf.WriteByte('&');
 				p.printField(a, plus, sharp, depth+1);
 				break BigSwitch;
 			}
 		}
 		if sharp {
-			p.addstr("(");
-			p.addstr(field.Type().String());
-			p.addstr(")(");
+			p.buf.WriteByte('(');
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte(')');
+			p.buf.WriteByte('(');
 			if v == 0 {
-				p.addstr("nil")
+				p.buf.Write(nilBytes)
 			} else {
 				p.fmt.sharp = true;
-				p.addstr(p.fmt.Fmt_ux64(uint64(v)).Str());
+				p.fmt.Fmt_ux64(uint64(v));
 			}
-			p.addstr(")");
+			p.buf.WriteByte(')');
 			break;
 		}
 		if v == 0 {
-			s = "<nil>";
+			p.buf.Write(nilAngleBytes);
 			break;
 		}
 		p.fmt.sharp = true;	// turn 0x on
-		s = p.fmt.Fmt_ux64(uint64(v)).Str();
+		p.fmt.Fmt_ux64(uint64(v));
 	case uintptrGetter:
 		v := f.Get();
 		if sharp {
-			p.addstr("(");
-			p.addstr(field.Type().String());
-			p.addstr(")(");
+			p.buf.WriteByte('(');
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte(')');
+			p.buf.WriteByte('(');
 			if v == 0 {
-				p.addstr("nil")
+				p.buf.Write(nilBytes)
 			} else {
 				p.fmt.sharp = true;
-				p.addstr(p.fmt.Fmt_ux64(uint64(v)).Str());
+				p.fmt.Fmt_ux64(uint64(v));
 			}
-			p.addstr(")");
+			p.buf.WriteByte(')');
 		} else {
 			p.fmt.sharp = true;	// turn 0x on
-			p.addstr(p.fmt.Fmt_ux64(uint64(f.Get())).Str());
+			p.fmt.Fmt_ux64(uint64(f.Get()));
 		}
 	default:
 		v, signed, ok := getInt(field);
 		if ok {
 			if signed {
-				s = p.fmt.Fmt_d64(v).Str()
+				p.fmt.Fmt_d64(v)
 			} else {
 				if sharp {
 					p.fmt.sharp = true;	// turn on 0x
-					s = p.fmt.Fmt_ux64(uint64(v)).Str();
+					p.fmt.Fmt_ux64(uint64(v));
 				} else {
-					s = p.fmt.Fmt_ud64(uint64(v)).Str()
+					p.fmt.Fmt_ud64(uint64(v))
 				}
 			}
 			break;
 		}
-		s = "?" + field.Type().String() + "?";
+		p.buf.WriteByte('?');
+		p.buf.WriteString(field.Type().String());
+		p.buf.WriteByte('?');
 	}
-	p.addstr(s);
 	return was_string;
 }
 
 func (p *pp) doprintf(format string, v *reflect.StructValue) {
-	p.ensure(len(format));	// a good starting size
 	end := len(format) - 1;
 	fieldnum := 0;	// we process one field per non-trivial format
 	for i := 0; i <= end; {
 		c, w := utf8.DecodeRuneInString(format[i:]);
 		if c != '%' || i == end {
-			p.add(c);
+			if w == 1 {
+				p.buf.WriteByte(byte(c))
+			} else {
+				p.buf.WriteString(format[i : i+w])
+			}
 			i += w;
 			continue;
 		}
 		i++;
 		// flags and widths
-		p.fmt.clearflags();
+		p.fmt.ClearFlags();
 	F:	for ; i < end; i++ {
 			switch format[i] {
 			case '#':
@@ -629,22 +631,22 @@
 			}
 		}
 		// do we have 20 (width)?
-		p.fmt.wid, p.fmt.wid_present, i = parsenum(format, i, end);
+		p.fmt.wid, p.fmt.widPresent, i = parsenum(format, i, end);
 		// do we have .20 (precision)?
 		if i < end && format[i] == '.' {
-			p.fmt.prec, p.fmt.prec_present, i = parsenum(format, i+1, end)
+			p.fmt.prec, p.fmt.precPresent, i = parsenum(format, i+1, end)
 		}
 		c, w = utf8.DecodeRuneInString(format[i:]);
 		i += w;
 		// percent is special - absorbs no operand
 		if c == '%' {
-			p.add('%');	// TODO: should we bother with width & prec?
+			p.buf.WriteByte('%');	// TODO: should we bother with width & prec?
 			continue;
 		}
 		if fieldnum >= v.NumField() {	// out of operands
-			p.add('%');
+			p.buf.WriteByte('%');
 			p.add(c);
-			p.addstr("(missing)");
+			p.buf.Write(missingBytes);
 			continue;
 		}
 		field := getField(v, fieldnum);
@@ -660,15 +662,14 @@
 			}
 		}
 
-		s := "";
 		switch c {
 		// bool
 		case 't':
 			if v, ok := getBool(field); ok {
 				if v {
-					s = "true"
+					p.buf.Write(trueBytes)
 				} else {
-					s = "false"
+					p.buf.Write(falseBytes)
 				}
 			} else {
 				goto badtype
@@ -677,26 +678,26 @@
 		// int
 		case 'b':
 			if v, _, ok := getInt(field); ok {
-				s = p.fmt.Fmt_b64(uint64(v)).Str()	// always unsigned
+				p.fmt.Fmt_b64(uint64(v))	// always unsigned
 			} else if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_fb32(v).Str()
+				p.fmt.Fmt_fb32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_fb64(v).Str()
+				p.fmt.Fmt_fb64(v)
 			} else {
 				goto badtype
 			}
 		case 'c':
 			if v, _, ok := getInt(field); ok {
-				s = p.fmt.Fmt_c(int(v)).Str()
+				p.fmt.Fmt_c(int(v))
 			} else {
 				goto badtype
 			}
 		case 'd':
 			if v, signed, ok := getInt(field); ok {
 				if signed {
-					s = p.fmt.Fmt_d64(v).Str()
+					p.fmt.Fmt_d64(v)
 				} else {
-					s = p.fmt.Fmt_ud64(uint64(v)).Str()
+					p.fmt.Fmt_ud64(uint64(v))
 				}
 			} else {
 				goto badtype
@@ -704,9 +705,9 @@
 		case 'o':
 			if v, signed, ok := getInt(field); ok {
 				if signed {
-					s = p.fmt.Fmt_o64(v).Str()
+					p.fmt.Fmt_o64(v)
 				} else {
-					s = p.fmt.Fmt_uo64(uint64(v)).Str()
+					p.fmt.Fmt_uo64(uint64(v))
 				}
 			} else {
 				goto badtype
@@ -714,24 +715,24 @@
 		case 'x':
 			if v, signed, ok := getInt(field); ok {
 				if signed {
-					s = p.fmt.Fmt_x64(v).Str()
+					p.fmt.Fmt_x64(v)
 				} else {
-					s = p.fmt.Fmt_ux64(uint64(v)).Str()
+					p.fmt.Fmt_ux64(uint64(v))
 				}
 			} else if v, ok := getString(field); ok {
-				s = p.fmt.Fmt_sx(v).Str()
+				p.fmt.Fmt_sx(v)
 			} else {
 				goto badtype
 			}
 		case 'X':
 			if v, signed, ok := getInt(field); ok {
 				if signed {
-					s = p.fmt.Fmt_X64(v).Str()
+					p.fmt.Fmt_X64(v)
 				} else {
-					s = p.fmt.Fmt_uX64(uint64(v)).Str()
+					p.fmt.Fmt_uX64(uint64(v))
 				}
 			} else if v, ok := getString(field); ok {
-				s = p.fmt.Fmt_sX(v).Str()
+				p.fmt.Fmt_sX(v)
 			} else {
 				goto badtype
 			}
@@ -739,41 +740,41 @@
 		// float
 		case 'e':
 			if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_e32(v).Str()
+				p.fmt.Fmt_e32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_e64(v).Str()
+				p.fmt.Fmt_e64(v)
 			} else {
 				goto badtype
 			}
 		case 'E':
 			if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_E32(v).Str()
+				p.fmt.Fmt_E32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_E64(v).Str()
+				p.fmt.Fmt_E64(v)
 			} else {
 				goto badtype
 			}
 		case 'f':
 			if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_f32(v).Str()
+				p.fmt.Fmt_f32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_f64(v).Str()
+				p.fmt.Fmt_f64(v)
 			} else {
 				goto badtype
 			}
 		case 'g':
 			if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_g32(v).Str()
+				p.fmt.Fmt_g32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_g64(v).Str()
+				p.fmt.Fmt_g64(v)
 			} else {
 				goto badtype
 			}
 		case 'G':
 			if v, ok := getFloat32(field); ok {
-				s = p.fmt.Fmt_G32(v).Str()
+				p.fmt.Fmt_G32(v)
 			} else if v, ok := getFloat64(field); ok {
-				s = p.fmt.Fmt_G64(v).Str()
+				p.fmt.Fmt_G64(v)
 			} else {
 				goto badtype
 			}
@@ -783,18 +784,18 @@
 			if inter != nil {
 				// if object implements String, use the result.
 				if stringer, ok := inter.(Stringer); ok {
-					s = p.fmt.Fmt_s(stringer.String()).Str();
+					p.fmt.Fmt_s(stringer.String());
 					break;
 				}
 			}
 			if v, ok := getString(field); ok {
-				s = p.fmt.Fmt_s(v).Str()
+				p.fmt.Fmt_s(v)
 			} else {
 				goto badtype
 			}
 		case 'q':
 			if v, ok := getString(field); ok {
-				s = p.fmt.Fmt_q(v).Str()
+				p.fmt.Fmt_q(v)
 			} else {
 				goto badtype
 			}
@@ -803,9 +804,10 @@
 		case 'p':
 			if v, ok := getPtr(field); ok {
 				if v == 0 {
-					s = "<nil>"
+					p.buf.Write(nilAngleBytes)
 				} else {
-					s = "0x" + p.fmt.Fmt_uX64(uint64(v)).Str()
+					p.fmt.Fmt_s("0x");
+					p.fmt.Fmt_uX64(uint64(v));
 				}
 			} else {
 				goto badtype
@@ -820,29 +822,31 @@
 
 		// the value's type
 		case 'T':
-			s = field.Type().String()
+			p.buf.WriteString(field.Type().String())
 
 		default:
 		badtype:
-			s = "%" + string(c) + "(" + field.Type().String() + "=";
-			p.addstr(s);
+			p.buf.WriteByte('%');
+			p.add(c);
+			p.buf.WriteByte('(');
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte('=');
 			p.printField(field, false, false, 0);
-			s = ")";
+			p.buf.WriteByte(')');
 		}
-		p.addstr(s);
 	}
 	if fieldnum < v.NumField() {
-		p.addstr("?(extra ");
+		p.buf.Write(extraBytes);
 		for ; fieldnum < v.NumField(); fieldnum++ {
 			field := getField(v, fieldnum);
-			p.addstr(field.Type().String());
-			p.addstr("=");
+			p.buf.WriteString(field.Type().String());
+			p.buf.WriteByte('=');
 			p.printField(field, false, false, 0);
 			if fieldnum+1 < v.NumField() {
-				p.addstr(", ")
+				p.buf.Write(commaSpaceBytes)
 			}
 		}
-		p.addstr(")");
+		p.buf.WriteByte(')');
 	}
 }
 
@@ -854,12 +858,12 @@
 		if fieldnum > 0 {
 			_, is_string := field.(*reflect.StringValue);
 			if addspace || !is_string && !prev_string {
-				p.add(' ')
+				p.buf.WriteByte(' ')
 			}
 		}
 		prev_string = p.printField(field, false, false, 0);
 	}
 	if addnewline {
-		p.add('\n')
+		p.buf.WriteByte('\n')
 	}
 }