compiler, runtime: allocate defer records on the stack

When a defer is executed at most once in a function body,
we can allocate the defer record for it on the stack instead
of on the heap.

This should make defers like this (which are very common) faster.

This is a port of CL 171758 from the gc repo.

Change-Id: Ib9b83ebeeb5862194d862253ec587b66a2761f3f
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/190410
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/go/runtime.def b/go/runtime.def
index dfd6ebd..7eac880 100644
--- a/go/runtime.def
+++ b/go/runtime.def
@@ -287,6 +287,10 @@
 DEF_GO_RUNTIME(DEFERPROC, "runtime.deferproc", P3(BOOLPTR, UINTPTR, POINTER),
 	       R0())
 
+// Defer a function, with stack-allocated defer structure.
+DEF_GO_RUNTIME(DEFERPROCSTACK, "runtime.deferprocStack",
+               P4(POINTER, BOOLPTR, UINTPTR, POINTER), R0())
+
 
 // Convert an empty interface to an empty interface, returning ok.
 DEF_GO_RUNTIME(IFACEE2E2, "runtime.ifaceE2E2", P1(EFACE), R2(EFACE, BOOL))
diff --git a/go/statements.cc b/go/statements.cc
index 6d9c0eb..27c309e 100644
--- a/go/statements.cc
+++ b/go/statements.cc
@@ -2614,7 +2614,11 @@
   if (this->classification() == STATEMENT_GO)
     s = Statement::make_go_statement(call, location);
   else if (this->classification() == STATEMENT_DEFER)
-    s = Statement::make_defer_statement(call, location);
+    {
+      s = Statement::make_defer_statement(call, location);
+      if ((Node::make_node(this)->encoding() & ESCAPE_MASK) == Node::ESCAPE_NONE)
+        s->defer_statement()->set_on_stack();
+    }
   else
     go_unreachable();
 
@@ -3019,13 +3023,45 @@
   Location loc = this->location();
   Expression* ds = context->function()->func_value()->defer_stack(loc);
 
-  Expression* call = Runtime::make_call(Runtime::DEFERPROC, loc, 3,
-					ds, fn, arg);
+  Expression* call;
+  if (this->on_stack_)
+    {
+      if (context->gogo()->debug_optimization())
+        go_debug(loc, "stack allocated defer");
+
+      Type* defer_type = Defer_statement::defer_struct_type();
+      Expression* defer = Expression::make_allocation(defer_type, loc);
+      defer->allocation_expression()->set_allocate_on_stack();
+      defer->allocation_expression()->set_no_zero();
+      call = Runtime::make_call(Runtime::DEFERPROCSTACK, loc, 4,
+                                defer, ds, fn, arg);
+    }
+  else
+    call = Runtime::make_call(Runtime::DEFERPROC, loc, 3,
+                              ds, fn, arg);
   Bexpression* bcall = call->get_backend(context);
   Bfunction* bfunction = context->function()->func_value()->get_decl();
   return context->backend()->expression_statement(bfunction, bcall);
 }
 
+Type*
+Defer_statement::defer_struct_type()
+{
+  Type* ptr_type = Type::make_pointer_type(Type::make_void_type());
+  Type* uintptr_type = Type::lookup_integer_type("uintptr");
+  Type* bool_type = Type::make_boolean_type();
+  return Type::make_builtin_struct_type(9,
+                                        "link", ptr_type,
+                                        "frame", ptr_type,
+                                        "panicStack", ptr_type,
+                                        "_panic", ptr_type,
+                                        "pfn", uintptr_type,
+                                        "arg", ptr_type,
+                                        "retaddr", uintptr_type,
+                                        "makefunccanrecover", bool_type,
+                                        "heap", bool_type);
+}
+
 // Dump the AST representation for defer statement.
 
 void
diff --git a/go/statements.h b/go/statements.h
index 7c254d0..311bbaa 100644
--- a/go/statements.h
+++ b/go/statements.h
@@ -24,6 +24,7 @@
 class Block_statement;
 class Return_statement;
 class Thunk_statement;
+class Defer_statement;
 class Goto_statement;
 class Goto_unnamed_statement;
 class Label_statement;
@@ -403,6 +404,11 @@
   Thunk_statement*
   thunk_statement();
 
+  // If this is a defer statement, return it.  Otherwise return NULL.
+  Defer_statement*
+  defer_statement()
+  { return this->convert<Defer_statement, STATEMENT_DEFER>(); }
+
   // If this is a goto statement, return it.  Otherwise return NULL.
   Goto_statement*
   goto_statement()
@@ -1419,15 +1425,26 @@
 {
  public:
   Defer_statement(Call_expression* call, Location location)
-    : Thunk_statement(STATEMENT_DEFER, call, location)
+    : Thunk_statement(STATEMENT_DEFER, call, location),
+      on_stack_(false)
   { }
 
+  void
+  set_on_stack()
+  { this->on_stack_ = true; }
+
  protected:
   Bstatement*
   do_get_backend(Translate_context*);
 
   void
   do_dump_statement(Ast_dump_context*) const;
+
+ private:
+  static Type*
+  defer_struct_type();
+
+  bool on_stack_;
 };
 
 // A goto statement.
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index 1b8a7a3..2463a48 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -657,6 +657,11 @@
 		scanstackblock(uintptr(unsafe.Pointer(&gp.context)), unsafe.Sizeof(gp.context), gcw)
 	}
 
+	// Note: in the gc runtime scanstack also scans defer records.
+	// This is necessary as it uses stack objects (a.k.a. stack tracing).
+	// We don't (yet) do stack objects, and regular stack/heap scan
+	// will take care of defer records just fine.
+
 	gp.gcscanvalid = true
 }
 
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index 264ad38..88c0a4d 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -13,6 +13,7 @@
 // themselves, so that the compiler will export them.
 //
 //go:linkname deferproc runtime.deferproc
+//go:linkname deferprocStack runtime.deferprocStack
 //go:linkname deferreturn runtime.deferreturn
 //go:linkname setdeferretaddr runtime.setdeferretaddr
 //go:linkname checkdefer runtime.checkdefer
@@ -124,6 +125,38 @@
 	d.makefunccanrecover = false
 }
 
+// deferprocStack queues a new deferred function with a defer record on the stack.
+// The defer record, d, does not need to be initialized.
+// Other arguments are the same as in deferproc.
+//go:nosplit
+func deferprocStack(d *_defer, frame *bool, pfn uintptr, arg unsafe.Pointer) {
+	gp := getg()
+	if gp.m.curg != gp {
+		// go code on the system stack can't defer
+		throw("defer on system stack")
+	}
+	d.pfn = pfn
+	d.retaddr = 0
+	d.makefunccanrecover = false
+	d.heap = false
+	// The lines below implement:
+	//   d.frame = frame
+	//   d.arg = arg
+	//   d._panic = nil
+	//   d.panicStack = gp._panic
+	//   d.link = gp._defer
+	// But without write barriers. They are writes to the stack so they
+	// don't need a write barrier, and furthermore are to uninitialized
+	// memory, so they must not use a write barrier.
+	*(*uintptr)(unsafe.Pointer(&d.frame)) = uintptr(unsafe.Pointer(frame))
+	*(*uintptr)(unsafe.Pointer(&d.arg)) = uintptr(unsafe.Pointer(arg))
+	*(*uintptr)(unsafe.Pointer(&d._panic)) = 0
+	*(*uintptr)(unsafe.Pointer(&d.panicStack)) = uintptr(unsafe.Pointer(gp._panic))
+	*(*uintptr)(unsafe.Pointer(&d.link)) = uintptr(unsafe.Pointer(gp._defer))
+
+	gp._defer = d
+}
+
 // Allocate a Defer, usually using per-P pool.
 // Each defer must be released with freedefer.
 func newdefer() *_defer {
@@ -155,11 +188,13 @@
 			// Duplicate the tail below so if there's a
 			// crash in checkPut we can tell if d was just
 			// allocated or came from the pool.
+			d.heap = true
 			d.link = gp._defer
 			gp._defer = d
 			return d
 		}
 	}
+	d.heap = true
 	d.link = gp._defer
 	gp._defer = d
 	return d
@@ -179,6 +214,9 @@
 	if d.pfn != 0 {
 		freedeferfn()
 	}
+	if !d.heap {
+		return
+	}
 	pp := getg().m.p.ptr()
 	if len(pp.deferpool) == cap(pp.deferpool) {
 		// Transfer half of local cache to the central cache.
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 4f823e0..e4dfbdf 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -746,6 +746,12 @@
 
 // A _defer holds an entry on the list of deferred calls.
 // If you add a field here, add code to clear it in freedefer.
+// This struct must match the code in Defer_statement::defer_struct_type
+// in the compiler.
+// Some defers will be allocated on the stack and some on the heap.
+// All defers are logically part of the stack, so write barriers to
+// initialize them are not required. All defers must be manually scanned,
+// and for heap defers, marked.
 type _defer struct {
 	// The next entry in the stack.
 	link *_defer
@@ -781,6 +787,9 @@
 	// function function will be somewhere in libffi, so __retaddr
 	// is not useful.
 	makefunccanrecover bool
+
+	// Whether the _defer is heap allocated.
+	heap bool
 }
 
 // panics
diff --git a/libgo/go/runtime/stack_test.go b/libgo/go/runtime/stack_test.go
new file mode 100644
index 0000000..b696253
--- /dev/null
+++ b/libgo/go/runtime/stack_test.go
@@ -0,0 +1,62 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import "testing"
+
+func TestDeferHeapAndStack(t *testing.T) {
+	P := 4     // processors
+	N := 10000 // iterations
+	D := 200   // stack depth
+
+	if testing.Short() {
+		P /= 2
+		N /= 10
+		D /= 10
+	}
+	c := make(chan bool)
+	for p := 0; p < P; p++ {
+		go func() {
+			for i := 0; i < N; i++ {
+				if deferHeapAndStack(D) != 2*D {
+					panic("bad result")
+				}
+			}
+			c <- true
+		}()
+	}
+	for p := 0; p < P; p++ {
+		<-c
+	}
+}
+
+// deferHeapAndStack(n) computes 2*n
+func deferHeapAndStack(n int) (r int) {
+	if n == 0 {
+		return 0
+	}
+	if n%2 == 0 {
+		// heap-allocated defers
+		for i := 0; i < 2; i++ {
+			defer func() {
+				r++
+			}()
+		}
+	} else {
+		// stack-allocated defers
+		defer func() {
+			r++
+		}()
+		defer func() {
+			r++
+		}()
+	}
+	r = deferHeapAndStack(n - 1)
+	escapeMe(new([1024]byte)) // force some GCs
+	return
+}
+
+// Pass a value to escapeMe to force it to escape.
+var escapeMe = func(x interface{}) {}