syscall/js: allocate arg slices on stack for small numbers of args

The existing implementation causes unnecessary heap allocations for
javascript syscalls: Call, Invoke, and New. The new change seeks to
hint the Go compiler to allocate arg slices with length <=16 to the
stack.

Original Work: CL 367045
- Calling a JavaScript function with 16 arguments or fewer will not
induce two additional heap allocations, at least with the current Go
compiler.
- Using syscall/js features with slices and strings of
statically-known length will not cause them to be escaped to the heap,
at least with the current Go compiler.
- The reduction in allocations has the additional benefit that the
garbage collector runs less often, blocking WebAssembly's one and only
thread less often.

Fixes #39740

Change-Id: I815047e1d4f8ada796318e2064d38d3e63f73098
GitHub-Last-Rev: 36df1b33a4506e216767d8a73395f2fafdd80eba
GitHub-Pull-Request: golang/go#66684
Reviewed-on: https://go-review.googlesource.com/c/go/+/576575
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/src/syscall/js/js.go b/src/syscall/js/js.go
index f7e32eb..74c02cd 100644
--- a/src/syscall/js/js.go
+++ b/src/syscall/js/js.go
@@ -210,7 +210,13 @@
 	}
 }
 
+// stringVal copies string x to Javascript and returns a ref.
+//
+// (noescape): This is safe because no references are maintained to the
+//             Go string x after the syscall returns.
+//
 //go:wasmimport gojs syscall/js.stringVal
+//go:noescape
 func stringVal(x string) ref
 
 // Type represents the JavaScript type of a Value.
@@ -294,7 +300,13 @@
 	return r
 }
 
+// valueGet returns a ref to JavaScript property p of ref v.
+//
+// (noescape): This is safe because no references are maintained to the
+//             Go string p after the syscall returns.
+//
 //go:wasmimport gojs syscall/js.valueGet
+//go:noescape
 func valueGet(v ref, p string) ref
 
 // Set sets the JavaScript property p of value v to ValueOf(x).
@@ -309,7 +321,13 @@
 	runtime.KeepAlive(xv)
 }
 
+// valueSet sets property p of ref v to ref x.
+//
+// (noescape): This is safe because no references are maintained to the
+//             Go string p after the syscall returns.
+//
 //go:wasmimport gojs syscall/js.valueSet
+//go:noescape
 func valueSet(v ref, p string, x ref)
 
 // Delete deletes the JavaScript property p of value v.
@@ -322,7 +340,13 @@
 	runtime.KeepAlive(v)
 }
 
+// valueDelete deletes the JavaScript property p of ref v.
+//
+// (noescape): This is safe because no references are maintained to the
+//             Go string p after the syscall returns.
+//
 //go:wasmimport gojs syscall/js.valueDelete
+//go:noescape
 func valueDelete(v ref, p string)
 
 // Index returns JavaScript index i of value v.
@@ -354,15 +378,36 @@
 //go:wasmimport gojs syscall/js.valueSetIndex
 func valueSetIndex(v ref, i int, x ref)
 
-func makeArgs(args []any) ([]Value, []ref) {
-	argVals := make([]Value, len(args))
-	argRefs := make([]ref, len(args))
+// makeArgSlices makes two slices to hold JavaScript arg data.
+// It can be paired with storeArgs to make-and-store JavaScript arg slices.
+// However, the two functions are separated to ensure makeArgSlices is inlined
+// which will prevent the slices from being heap allocated for small (<=16)
+// numbers of args.
+func makeArgSlices(size int) (argVals []Value, argRefs []ref) {
+	// value chosen for being power of two, and enough to handle all web APIs
+	// in particular, note that WebGL2's texImage2D takes up to 10 arguments
+	const maxStackArgs = 16
+	if size <= maxStackArgs {
+		// as long as makeArgs is inlined, these will be stack-allocated
+		argVals = make([]Value, size, maxStackArgs)
+		argRefs = make([]ref, size, maxStackArgs)
+	} else {
+		// allocates on the heap, but exceeding maxStackArgs should be rare
+		argVals = make([]Value, size)
+		argRefs = make([]ref, size)
+	}
+	return
+}
+
+// storeArgs maps input args onto respective Value and ref slices.
+// It can be paired with makeArgSlices to make-and-store JavaScript arg slices.
+func storeArgs(args []any, argValsDst []Value, argRefsDst []ref) {
+	// would go in makeArgs if the combined func was simple enough to inline
 	for i, arg := range args {
 		v := ValueOf(arg)
-		argVals[i] = v
-		argRefs[i] = v.ref
+		argValsDst[i] = v
+		argRefsDst[i] = v.ref
 	}
-	return argVals, argRefs
 }
 
 // Length returns the JavaScript property "length" of v.
@@ -383,7 +428,8 @@
 // It panics if v has no method m.
 // The arguments get mapped to JavaScript values according to the ValueOf function.
 func (v Value) Call(m string, args ...any) Value {
-	argVals, argRefs := makeArgs(args)
+	argVals, argRefs := makeArgSlices(len(args))
+	storeArgs(args, argVals, argRefs)
 	res, ok := valueCall(v.ref, m, argRefs)
 	runtime.KeepAlive(v)
 	runtime.KeepAlive(argVals)
@@ -399,15 +445,24 @@
 	return makeValue(res)
 }
 
+// valueCall does a JavaScript call to the method name m of ref v with the given arguments.
+//
+// (noescape): This is safe because no references are maintained to the
+//             Go string m after the syscall returns. Additionally, the args slice
+//             is only used temporarily to collect the JavaScript objects for
+//             the JavaScript method invocation.
+//
 //go:wasmimport gojs syscall/js.valueCall
 //go:nosplit
+//go:noescape
 func valueCall(v ref, m string, args []ref) (ref, bool)
 
 // Invoke does a JavaScript call of the value v with the given arguments.
 // It panics if v is not a JavaScript function.
 // The arguments get mapped to JavaScript values according to the ValueOf function.
 func (v Value) Invoke(args ...any) Value {
-	argVals, argRefs := makeArgs(args)
+	argVals, argRefs := makeArgSlices(len(args))
+	storeArgs(args, argVals, argRefs)
 	res, ok := valueInvoke(v.ref, argRefs)
 	runtime.KeepAlive(v)
 	runtime.KeepAlive(argVals)
@@ -420,14 +475,22 @@
 	return makeValue(res)
 }
 
+// valueInvoke does a JavaScript call to value v with the given arguments.
+//
+// (noescape): This is safe because the args slice is only used temporarily
+//             to collect the JavaScript objects for the JavaScript method
+//             invocation.
+//
 //go:wasmimport gojs syscall/js.valueInvoke
+//go:noescape
 func valueInvoke(v ref, args []ref) (ref, bool)
 
 // New uses JavaScript's "new" operator with value v as constructor and the given arguments.
 // It panics if v is not a JavaScript function.
 // The arguments get mapped to JavaScript values according to the ValueOf function.
 func (v Value) New(args ...any) Value {
-	argVals, argRefs := makeArgs(args)
+	argVals, argRefs := makeArgSlices(len(args))
+	storeArgs(args, argVals, argRefs)
 	res, ok := valueNew(v.ref, argRefs)
 	runtime.KeepAlive(v)
 	runtime.KeepAlive(argVals)
@@ -440,7 +503,13 @@
 	return makeValue(res)
 }
 
+// valueNew uses JavaScript's "new" operator with value v as a constructor and the given arguments.
+//
+// (noescape): This is safe because the args slice is only used temporarily
+//             to collect the JavaScript objects for the constructor execution.
+//
 //go:wasmimport gojs syscall/js.valueNew
+//go:noescape
 func valueNew(v ref, args []ref) (ref, bool)
 
 func (v Value) isNumber() bool {
@@ -543,7 +612,13 @@
 //go:wasmimport gojs syscall/js.valuePrepareString
 func valuePrepareString(v ref) (ref, int)
 
+// valueLoadString loads string data located at ref v into byte slice b.
+//
+// (noescape): This is safe because the byte slice is only used as a destination
+//             for storing the string data and references to it are not maintained.
+//
 //go:wasmimport gojs syscall/js.valueLoadString
+//go:noescape
 func valueLoadString(v ref, b []byte)
 
 // InstanceOf reports whether v is an instance of type t according to JavaScript's instanceof operator.
@@ -581,7 +656,13 @@
 	return n
 }
 
+// copyBytesToGo copies bytes from src to dst.
+//
+// (noescape): This is safe because the dst byte slice is only used as a dst
+//             copy buffer and no references to it are maintained.
+//
 //go:wasmimport gojs syscall/js.copyBytesToGo
+//go:noescape
 func copyBytesToGo(dst []byte, src ref) (int, bool)
 
 // CopyBytesToJS copies bytes from src to dst.
@@ -596,5 +677,11 @@
 	return n
 }
 
+// copyBytesToJs copies bytes from src to dst.
+//
+// (noescape): This is safe because the src byte slice is only used as a src
+//             copy buffer and no references to it are maintained.
+//
 //go:wasmimport gojs syscall/js.copyBytesToJS
+//go:noescape
 func copyBytesToJS(dst ref, src []byte) (int, bool)
diff --git a/src/syscall/js/js_test.go b/src/syscall/js/js_test.go
index 8823421..cc809ac 100644
--- a/src/syscall/js/js_test.go
+++ b/src/syscall/js/js_test.go
@@ -581,6 +581,80 @@
 	}
 }
 
+// This table is used for allocation tests. We expect a specific allocation
+// behavior to be seen, depending on the number of arguments applied to various
+// JavaScript functions.
+// Note: All JavaScript functions return a JavaScript array, which will cause
+// one allocation to be created to track the Value.gcPtr for the Value finalizer.
+var allocTests = []struct {
+	argLen  int // The number of arguments to use for the syscall
+	expected int // The expected number of allocations
+}{
+	// For less than or equal to 16 arguments, we expect 1 alloction:
+	// - makeValue new(ref)
+	{0,  1},
+	{2,  1},
+	{15, 1},
+	{16, 1},
+	// For greater than 16 arguments, we expect 3 alloction:
+	// - makeValue: new(ref)
+	// - makeArgSlices: argVals = make([]Value, size)
+	// - makeArgSlices: argRefs = make([]ref, size)
+	{17, 3},
+	{32, 3},
+	{42, 3},
+}
+
+// TestCallAllocations ensures the correct allocation profile for Value.Call
+func TestCallAllocations(t *testing.T) {
+	for _, test := range allocTests {
+		args := make([]any, test.argLen)
+
+		tmpArray := js.Global().Get("Array").New(0)
+		numAllocs := testing.AllocsPerRun(100, func() {
+			tmpArray.Call("concat", args...)
+		});
+
+		if numAllocs != float64(test.expected) {
+			t.Errorf("got numAllocs %#v, want %#v", numAllocs, test.expected)
+		}
+	}
+}
+
+// TestInvokeAllocations ensures the correct allocation profile for Value.Invoke
+func TestInvokeAllocations(t *testing.T) {
+	for _, test := range allocTests {
+		args := make([]any, test.argLen)
+
+		tmpArray := js.Global().Get("Array").New(0)
+		concatFunc := tmpArray.Get("concat").Call("bind", tmpArray)
+		numAllocs := testing.AllocsPerRun(100, func() {
+			concatFunc.Invoke(args...)
+		});
+
+		if numAllocs != float64(test.expected) {
+			t.Errorf("got numAllocs %#v, want %#v", numAllocs, test.expected)
+		}
+	}
+}
+
+// TestNewAllocations ensures the correct allocation profile for Value.New
+func TestNewAllocations(t *testing.T) {
+	arrayConstructor := js.Global().Get("Array")
+
+	for _, test := range allocTests {
+		args := make([]any, test.argLen)
+
+		numAllocs := testing.AllocsPerRun(100, func() {
+			arrayConstructor.New(args...)
+		});
+
+		if numAllocs != float64(test.expected) {
+			t.Errorf("got numAllocs %#v, want %#v", numAllocs, test.expected)
+		}
+	}
+}
+
 // BenchmarkDOM is a simple benchmark which emulates a webapp making DOM operations.
 // It creates a div, and sets its id. Then searches by that id and sets some data.
 // Finally it removes that div.