proto: improve slice growth in MarshalAppend

When allocating more space for the destination message in MarshalAppend,
use the same slice growth algorithm as the Go runtime's append rather
than allocating precisely the desired space.

Change-Id: If6033f6f7abdca473bc5188c4d3938ce57d3bdd2
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/197758
Reviewed-by: Joe Tsai <joetsai@google.com>
diff --git a/proto/encode.go b/proto/encode.go
index a57133c..756082c 100644
--- a/proto/encode.go
+++ b/proto/encode.go
@@ -101,7 +101,7 @@
 		!(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) {
 		sz := methods.Size(m, protoiface.MarshalOptions(o))
 		if cap(b) < len(b)+sz {
-			x := make([]byte, len(b), len(b)+sz)
+			x := make([]byte, len(b), growcap(cap(b), len(b)+sz))
 			copy(x, b)
 			b = x
 		}
@@ -111,6 +111,32 @@
 	return o.marshalMessageSlow(b, m)
 }
 
+// growcap scales up the capacity of a slice.
+//
+// Given a slice with a current capacity of oldcap and a desired
+// capacity of wantcap, growcap returns a new capacity >= wantcap.
+//
+// The algorithm is mostly identical to the one used by append as of Go 1.14.
+func growcap(oldcap, wantcap int) (newcap int) {
+	if wantcap > oldcap*2 {
+		newcap = wantcap
+	} else if oldcap < 1024 {
+		// The Go 1.14 runtime takes this case when len(s) < 1024,
+		// not when cap(s) < 1024. The difference doesn't seem
+		// significant here.
+		newcap = oldcap * 2
+	} else {
+		newcap = oldcap
+		for 0 < newcap && newcap < wantcap {
+			newcap += newcap / 4
+		}
+		if newcap <= 0 {
+			newcap = wantcap
+		}
+	}
+	return newcap
+}
+
 func (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) {
 	if messageset.IsMessageSet(m.Descriptor()) {
 		return marshalMessageSet(b, m, o)
diff --git a/proto/encode_test.go b/proto/encode_test.go
index b5af461..663612d 100644
--- a/proto/encode_test.go
+++ b/proto/encode_test.go
@@ -155,3 +155,30 @@
 		t.Errorf("Marshal return non-empty, want empty")
 	}
 }
+
+func TestMarshalAppendAllocations(t *testing.T) {
+	m := &test3pb.TestAllTypes{OptionalInt32: 1}
+	size := proto.Size(m)
+	const count = 1000
+	b := make([]byte, size)
+	// AllocsPerRun returns an integral value.
+	marshalAllocs := testing.AllocsPerRun(count, func() {
+		_, err := proto.MarshalOptions{}.MarshalAppend(b[:0], m)
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+	b = nil
+	marshalAppendAllocs := testing.AllocsPerRun(count, func() {
+		var err error
+		b, err = proto.MarshalOptions{}.MarshalAppend(b, m)
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+	if marshalAllocs != marshalAppendAllocs {
+		t.Errorf("%v allocs/op when writing to a preallocated buffer", marshalAllocs)
+		t.Errorf("%v allocs/op when repeatedly appending to a slice", marshalAppendAllocs)
+		t.Errorf("expect amortized allocs/op to be identical")
+	}
+}