compiler,runtime: pass old slice's ptr/len/cap by value to growslice

In the C calling convention, on AMD64, and probably a number of
other architectures, a 3-word struct argument is passed on stack.
This is less efficient than passing in three registers. Further,
this may affect the code generation in other part of the program,
even if the function is not actually called.

Slices are common in Go and append is a common slice operation,
which calls growslice in the growing path. To improve the code
generation, pass the slice header's three fields as separate
values, instead of a struct, to growslice.

The drawback is that this makes the runtime implementation
slightly diverges from the gc runtime.

Change-Id: Ieea46ab1c4e526ff3e7409d6af25e1f216dd1afa
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/168277
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/go/expressions.cc b/go/expressions.cc
index f41647a..018fdbb 100644
--- a/go/expressions.cc
+++ b/go/expressions.cc
@@ -7986,23 +7986,33 @@
   // Using uint here means that if the computation of ntmp overflowed,
   // we will call growslice which will panic.
 
-  Expression* left = Expression::make_temporary_reference(ntmp, loc);
-  left = Expression::make_cast(uint_type, left, loc);
-
   Named_object* capfn = gogo->lookup_global("cap");
   Expression* capref = Expression::make_func_reference(capfn, NULL, loc);
   call_args = new Expression_list();
   call_args->push_back(Expression::make_temporary_reference(s1tmp, loc));
-  Expression* right = Expression::make_call(capref, call_args, false, loc);
+  Expression* cap = Expression::make_call(capref, call_args, false, loc);
+  gogo->lower_expression(function, inserter, &cap);
+  gogo->flatten_expression(function, inserter, &cap);
+  Temporary_statement* c1tmp = Statement::make_temporary(int_type, cap, loc);
+  inserter->insert(c1tmp);
+
+  Expression* left = Expression::make_temporary_reference(ntmp, loc);
+  left = Expression::make_cast(uint_type, left, loc);
+  Expression* right = Expression::make_temporary_reference(c1tmp, loc);
   right = Expression::make_cast(uint_type, right, loc);
 
   Expression* cond = Expression::make_binary(OPERATOR_GT, left, right, loc);
 
+  Type* unsafe_ptr_type = Type::make_pointer_type(Type::make_void_type());
   Expression* a1 = Expression::make_type_descriptor(element_type, loc);
   Expression* a2 = Expression::make_temporary_reference(s1tmp, loc);
-  Expression* a3 = Expression::make_temporary_reference(ntmp, loc);
-  Expression* call = Runtime::make_call(Runtime::GROWSLICE, loc, 3,
-					a1, a2, a3);
+  a2 = slice_type->array_type()->get_value_pointer(gogo, a2, false);
+  a2 = Expression::make_cast(unsafe_ptr_type, a2, loc);
+  Expression* a3 = Expression::make_temporary_reference(l1tmp, loc);
+  Expression* a4 = Expression::make_temporary_reference(c1tmp, loc);
+  Expression* a5 = Expression::make_temporary_reference(ntmp, loc);
+  Expression* call = Runtime::make_call(Runtime::GROWSLICE, loc, 5,
+					a1, a2, a3, a4, a5);
   call = Expression::make_unsafe_cast(slice_type, call, loc);
 
   ref = Expression::make_temporary_reference(s1tmp, loc);
diff --git a/go/runtime.def b/go/runtime.def
index 273c860..83a7152 100644
--- a/go/runtime.def
+++ b/go/runtime.def
@@ -202,7 +202,8 @@
 
 
 // Grow a slice for append.
-DEF_GO_RUNTIME(GROWSLICE, "runtime.growslice", P3(TYPE, SLICE, INT), R1(SLICE))
+DEF_GO_RUNTIME(GROWSLICE, "runtime.growslice",
+               P5(TYPE, POINTER, INT, INT, INT), R1(SLICE))
 
 
 // Register roots (global variables) for the garbage collector.
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index 335532d..9137951 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -77,31 +77,31 @@
 // and it returns a new slice with at least that capacity, with the old data
 // copied into it.
 // The new slice's length is set to the requested capacity.
-func growslice(et *_type, old slice, cap int) slice {
+func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slice {
 	if raceenabled {
 		callerpc := getcallerpc()
-		racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
+		racereadrangepc(oldarray, uintptr(oldlen*int(et.size)), callerpc, funcPC(growslice))
 	}
 	if msanenabled {
-		msanread(old.array, uintptr(old.len*int(et.size)))
+		msanread(oldarray, uintptr(oldlen*int(et.size)))
 	}
 
-	if cap < old.cap {
+	if cap < oldcap {
 		panic(errorString("growslice: cap out of range"))
 	}
 
 	if et.size == 0 {
 		// append should not create a slice with nil pointer but non-zero len.
-		// We assume that append doesn't need to preserve old.array in this case.
+		// We assume that append doesn't need to preserve oldarray in this case.
 		return slice{unsafe.Pointer(&zerobase), cap, cap}
 	}
 
-	newcap := old.cap
+	newcap := oldcap
 	doublecap := newcap + newcap
 	if cap > doublecap {
 		newcap = cap
 	} else {
-		if old.len < 1024 {
+		if oldlen < 1024 {
 			newcap = doublecap
 		} else {
 			// Check 0 < newcap to detect overflow
@@ -125,13 +125,13 @@
 	// For powers of 2, use a variable shift.
 	switch {
 	case et.size == 1:
-		lenmem = uintptr(old.len)
+		lenmem = uintptr(oldlen)
 		newlenmem = uintptr(cap)
 		capmem = roundupsize(uintptr(newcap))
 		overflow = uintptr(newcap) > maxAlloc
 		newcap = int(capmem)
 	case et.size == sys.PtrSize:
-		lenmem = uintptr(old.len) * sys.PtrSize
+		lenmem = uintptr(oldlen) * sys.PtrSize
 		newlenmem = uintptr(cap) * sys.PtrSize
 		capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
 		overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
@@ -144,13 +144,13 @@
 		} else {
 			shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
 		}
-		lenmem = uintptr(old.len) << shift
+		lenmem = uintptr(oldlen) << shift
 		newlenmem = uintptr(cap) << shift
 		capmem = roundupsize(uintptr(newcap) << shift)
 		overflow = uintptr(newcap) > (maxAlloc >> shift)
 		newcap = int(capmem >> shift)
 	default:
-		lenmem = uintptr(old.len) * et.size
+		lenmem = uintptr(oldlen) * et.size
 		newlenmem = uintptr(cap) * et.size
 		capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
 		capmem = roundupsize(capmem)
@@ -177,19 +177,19 @@
 	var p unsafe.Pointer
 	if et.kind&kindNoPointers != 0 {
 		p = mallocgc(capmem, nil, false)
-		// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
+		// The append() that calls growslice is going to overwrite from oldlen to cap (which will be the new length).
 		// Only clear the part that will not be overwritten.
 		memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
 	} else {
 		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
 		p = mallocgc(capmem, et, true)
 		if writeBarrier.enabled {
-			// Only shade the pointers in old.array since we know the destination slice p
+			// Only shade the pointers in oldarray since we know the destination slice p
 			// only contains nil pointers because it has been cleared during alloc.
-			bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem)
+			bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(oldarray), lenmem)
 		}
 	}
-	memmove(p, old.array, lenmem)
+	memmove(p, oldarray, lenmem)
 
 	return slice{p, cap, newcap}
 }