cmd/cgo: optimize cgoCheckPointer call

Currently cgoCheckPointer is only used with one optional argument.
Using a slice for the optional arguments is quite expensive, hence
replace it with a single interface{}. This results in ~30% improvement.

When checking struct fields, they quite often end up being without
pointers. Check this before calling cgoCheckPointer, which results in
additional ~20% improvement.

Inline some p == nil checks from cgoIsGoPointer which gives
additional ~15% improvement.

All of this translates to:

name                             old time/op  new time/op  delta
CgoCall/add-int-32               46.9ns ± 1%  46.6ns ± 1%   -0.75%  (p=0.000 n=18+20)
CgoCall/one-pointer-32            143ns ± 1%    87ns ± 1%  -38.96%  (p=0.000 n=20+20)
CgoCall/eight-pointers-32         767ns ± 0%   327ns ± 1%  -57.30%  (p=0.000 n=18+16)
CgoCall/eight-pointers-nil-32     110ns ± 1%    89ns ± 2%  -19.10%  (p=0.000 n=19+19)
CgoCall/eight-pointers-array-32  5.09µs ± 1%  3.56µs ± 2%  -30.09%  (p=0.000 n=19+19)
CgoCall/eight-pointers-slice-32  3.92µs ± 0%  2.57µs ± 2%  -34.48%  (p=0.000 n=20+20)

Change-Id: I2aa9f5ae8962a9a41a7fb1db0c300893109d0d75
Reviewed-on: https://go-review.googlesource.com/c/go/+/198081
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/misc/cgo/test/test.go b/misc/cgo/test/test.go
index 0aa80eb..68bfa90 100644
--- a/misc/cgo/test/test.go
+++ b/misc/cgo/test/test.go
@@ -115,6 +115,44 @@
 	return x+y;
 };
 
+// Following mimicks vulkan complex definitions for benchmarking cgocheck overhead.
+
+typedef uint32_t VkFlags;
+typedef VkFlags  VkDeviceQueueCreateFlags;
+typedef uint32_t VkStructureType;
+
+typedef struct VkDeviceQueueCreateInfo {
+    VkStructureType             sType;
+    const void*                 pNext;
+    VkDeviceQueueCreateFlags    flags;
+    uint32_t                    queueFamilyIndex;
+    uint32_t                    queueCount;
+    const float*                pQueuePriorities;
+} VkDeviceQueueCreateInfo;
+
+typedef struct VkPhysicalDeviceFeatures {
+    uint32_t bools[56];
+} VkPhysicalDeviceFeatures;
+
+typedef struct VkDeviceCreateInfo {
+    VkStructureType                    sType;
+    const void*                        pNext;
+    VkFlags                            flags;
+    uint32_t                           queueCreateInfoCount;
+    const VkDeviceQueueCreateInfo*     pQueueCreateInfos;
+    uint32_t                           enabledLayerCount;
+    const char* const*                 ppEnabledLayerNames;
+    uint32_t                           enabledExtensionCount;
+    const char* const*                 ppEnabledExtensionNames;
+    const VkPhysicalDeviceFeatures*    pEnabledFeatures;
+} VkDeviceCreateInfo;
+
+void handleComplexPointer(VkDeviceCreateInfo *a0) {}
+void handleComplexPointer8(
+	VkDeviceCreateInfo *a0, VkDeviceCreateInfo *a1, VkDeviceCreateInfo *a2, VkDeviceCreateInfo *a3,
+	VkDeviceCreateInfo *a4, VkDeviceCreateInfo *a5, VkDeviceCreateInfo *a6, VkDeviceCreateInfo *a7
+) {}
+
 // complex alignment
 
 struct {
@@ -993,11 +1031,45 @@
 }
 
 func benchCgoCall(b *testing.B) {
-	const x = C.int(2)
-	const y = C.int(3)
-	for i := 0; i < b.N; i++ {
-		C.add(x, y)
-	}
+	b.Run("add-int", func(b *testing.B) {
+		const x = C.int(2)
+		const y = C.int(3)
+
+		for i := 0; i < b.N; i++ {
+			C.add(x, y)
+		}
+	})
+
+	b.Run("one-pointer", func(b *testing.B) {
+		var a0 C.VkDeviceCreateInfo
+		for i := 0; i < b.N; i++ {
+			C.handleComplexPointer(&a0)
+		}
+	})
+	b.Run("eight-pointers", func(b *testing.B) {
+		var a0, a1, a2, a3, a4, a5, a6, a7 C.VkDeviceCreateInfo
+		for i := 0; i < b.N; i++ {
+			C.handleComplexPointer8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7)
+		}
+	})
+	b.Run("eight-pointers-nil", func(b *testing.B) {
+		var a0, a1, a2, a3, a4, a5, a6, a7 *C.VkDeviceCreateInfo
+		for i := 0; i < b.N; i++ {
+			C.handleComplexPointer8(a0, a1, a2, a3, a4, a5, a6, a7)
+		}
+	})
+	b.Run("eight-pointers-array", func(b *testing.B) {
+		var a [8]C.VkDeviceCreateInfo
+		for i := 0; i < b.N; i++ {
+			C.handleComplexPointer8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7])
+		}
+	})
+	b.Run("eight-pointers-slice", func(b *testing.B) {
+		a := make([]C.VkDeviceCreateInfo, 8)
+		for i := 0; i < b.N; i++ {
+			C.handleComplexPointer8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7])
+		}
+	})
 }
 
 // Benchmark measuring overhead from Go to C and back to Go (via a callback)
diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go
index 01b86ad..12d4749 100644
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@@ -816,7 +816,7 @@
 	// Rewrite C.f(p) to
 	//    func() {
 	//            _cgo0 := p
-	//            _cgoCheckPointer(_cgo0)
+	//            _cgoCheckPointer(_cgo0, nil)
 	//            C.f(_cgo0)
 	//    }()
 	// Using a function literal like this lets us evaluate the
@@ -834,7 +834,7 @@
 	//    defer func() func() {
 	//            _cgo0 := p
 	//            return func() {
-	//                    _cgoCheckPointer(_cgo0)
+	//                    _cgoCheckPointer(_cgo0, nil)
 	//                    C.f(_cgo0)
 	//            }
 	//    }()()
@@ -921,7 +921,7 @@
 		}
 
 		fmt.Fprintf(&sb, "_cgo%d := %s; ", i, gofmtPos(arg, origArg.Pos()))
-		fmt.Fprintf(&sbCheck, "_cgoCheckPointer(_cgo%d); ", i)
+		fmt.Fprintf(&sbCheck, "_cgoCheckPointer(_cgo%d, nil); ", i)
 	}
 
 	if call.Deferred {
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index 1fddbb6..6bee9b1 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -1631,14 +1631,14 @@
 func _cgo_runtime_cgocallback(unsafe.Pointer, unsafe.Pointer, uintptr, uintptr)
 
 //go:linkname _cgoCheckPointer runtime.cgoCheckPointer
-func _cgoCheckPointer(interface{}, ...interface{})
+func _cgoCheckPointer(interface{}, interface{})
 
 //go:linkname _cgoCheckResult runtime.cgoCheckResult
 func _cgoCheckResult(interface{})
 `
 
 const gccgoGoProlog = `
-func _cgoCheckPointer(interface{}, ...interface{})
+func _cgoCheckPointer(interface{}, interface{})
 
 func _cgoCheckResult(interface{})
 `
@@ -1825,16 +1825,16 @@
 	void *__object;
 } Eface;
 
-extern void runtimeCgoCheckPointer(Eface, Slice)
+extern void runtimeCgoCheckPointer(Eface, Eface)
 	__asm__("runtime.cgoCheckPointer")
 	__attribute__((weak));
 
-extern void localCgoCheckPointer(Eface, Slice)
+extern void localCgoCheckPointer(Eface, Eface)
 	__asm__("GCCGOSYMBOLPREF._cgoCheckPointer");
 
-void localCgoCheckPointer(Eface ptr, Slice args) {
+void localCgoCheckPointer(Eface ptr, Eface arg) {
 	if(runtimeCgoCheckPointer) {
-		runtimeCgoCheckPointer(ptr, args);
+		runtimeCgoCheckPointer(ptr, arg);
 	}
 }
 
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index a881ae1..3595e49 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -406,7 +406,7 @@
 
 // cgoCheckPointer checks if the argument contains a Go pointer that
 // points to a Go pointer, and panics if it does.
-func cgoCheckPointer(ptr interface{}, args ...interface{}) {
+func cgoCheckPointer(ptr interface{}, arg interface{}) {
 	if debug.cgocheck == 0 {
 		return
 	}
@@ -415,15 +415,15 @@
 	t := ep._type
 
 	top := true
-	if len(args) > 0 && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
+	if arg != nil && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
 		p := ep.data
 		if t.kind&kindDirectIface == 0 {
 			p = *(*unsafe.Pointer)(p)
 		}
-		if !cgoIsGoPointer(p) {
+		if p == nil || !cgoIsGoPointer(p) {
 			return
 		}
-		aep := (*eface)(unsafe.Pointer(&args[0]))
+		aep := (*eface)(unsafe.Pointer(&arg))
 		switch aep._type.kind & kindMask {
 		case kindBool:
 			if t.kind&kindMask == kindUnsafePointer {
@@ -460,7 +460,7 @@
 // depending on indir. The top parameter is whether we are at the top
 // level, where Go pointers are allowed.
 func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
-	if t.ptrdata == 0 {
+	if t.ptrdata == 0 || p == nil {
 		// If the type has no pointers there is nothing to do.
 		return
 	}
@@ -517,7 +517,7 @@
 		st := (*slicetype)(unsafe.Pointer(t))
 		s := (*slice)(p)
 		p = s.array
-		if !cgoIsGoPointer(p) {
+		if p == nil || !cgoIsGoPointer(p) {
 			return
 		}
 		if !top {
@@ -548,11 +548,17 @@
 			return
 		}
 		for _, f := range st.fields {
+			if f.typ.ptrdata == 0 {
+				continue
+			}
 			cgoCheckArg(f.typ, add(p, f.offset()), true, top, msg)
 		}
 	case kindPtr, kindUnsafePointer:
 		if indir {
 			p = *(*unsafe.Pointer)(p)
+			if p == nil {
+				return
+			}
 		}
 
 		if !cgoIsGoPointer(p) {