runtime: allow physical page aligned stacks to be allocated

Add a physPageAlignedStack boolean which if set, results in over allocation
by a physical page, the allocation being rounded to physical page alignment
and the unused memory surrounding the allocation being freed again.

OpenBSD/octeon has 16KB physical pages and requires stacks to be physical page
aligned in order for them to be remapped as MAP_STACK. This change allows Go
to work on this platform.

Based on a suggestion from mknyszek in issue #41008.

Updates #40995
Fixes #41008

Change-Id: Ia5d652292b515916db473043b41f6030094461d8
Reviewed-on: https://go-review.googlesource.com/c/go/+/266919
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index b8429ee..1855330 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -44,6 +44,11 @@
 	// Must be a multiple of the pageInUse bitmap element size and
 	// must also evenly divide pagesPerArena.
 	pagesPerReclaimerChunk = 512
+
+	// physPageAlignedStacks indicates whether stack allocations must be
+	// physical page aligned. This is a requirement for MAP_STACK on
+	// OpenBSD.
+	physPageAlignedStacks = GOOS == "openbsd"
 )
 
 // Main malloc heap.
@@ -1121,9 +1126,16 @@
 	gp := getg()
 	base, scav := uintptr(0), uintptr(0)
 
+	// On some platforms we need to provide physical page aligned stack
+	// allocations. Where the page size is less than the physical page
+	// size, we already manage to do this by default.
+	needPhysPageAlign := physPageAlignedStacks && typ == spanAllocStack && pageSize < physPageSize
+
 	// If the allocation is small enough, try the page cache!
+	// The page cache does not support aligned allocations, so we cannot use
+	// it if we need to provide a physical page aligned stack allocation.
 	pp := gp.m.p.ptr()
-	if pp != nil && npages < pageCachePages/4 {
+	if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 {
 		c := &pp.pcache
 
 		// If the cache is empty, refill it.
@@ -1149,6 +1161,11 @@
 	// whole job done without the heap lock.
 	lock(&h.lock)
 
+	if needPhysPageAlign {
+		// Overallocate by a physical page to allow for later alignment.
+		npages += physPageSize / pageSize
+	}
+
 	if base == 0 {
 		// Try to acquire a base address.
 		base, scav = h.pages.alloc(npages)
@@ -1168,6 +1185,23 @@
 		// one now that we have the heap lock.
 		s = h.allocMSpanLocked()
 	}
+
+	if needPhysPageAlign {
+		allocBase, allocPages := base, npages
+		base = alignUp(allocBase, physPageSize)
+		npages -= physPageSize / pageSize
+
+		// Return memory around the aligned allocation.
+		spaceBefore := base - allocBase
+		if spaceBefore > 0 {
+			h.pages.free(allocBase, spaceBefore/pageSize)
+		}
+		spaceAfter := (allocPages-npages)*pageSize - spaceBefore
+		if spaceAfter > 0 {
+			h.pages.free(base+npages*pageSize, spaceAfter/pageSize)
+		}
+	}
+
 	unlock(&h.lock)
 
 HaveSpan: