runtime: validate candidate searchAddr in pageAlloc.find

Currently pageAlloc.find attempts to find a better estimate for the
first free page in the heap, even if the space its looking for isn't
necessarily going to be the first free page in the heap (e.g. if npages
>= 2). However, in doing so it has the potential to return a searchAddr
candidate that doesn't actually correspond to mapped memory, but this
candidate might still be adopted. As a result, pageAlloc.alloc's fast
path may look at unmapped summary memory and segfault. This case is rare
on most operating systems since the heap is kept fairly contiguous, so
the chance that the candidate searchAddr discovered is unmapped is
fairly low. Even so, this is totally possible and outside the user's
control when it happens (in fact, it's likely to happen consistently for
a given user on a given system).

Fix this problem by ensuring that our candidate always points to mapped
memory. We do this by looking at mheap's arenas structure first. If it
turns out our candidate doesn't correspond to mapped memory, then we
look at inUse to round up the searchAddr to the next mapped address.

While we're here, clean up some documentation related to searchAddr.

Fixes #40191.

Change-Id: I759efec78987e4a8fde466ae45aabbaa3d9d4214
Reviewed-on: https://go-review.googlesource.com/c/go/+/242680
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 60f7f9f..8b3c62c 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -233,16 +233,12 @@
 
 	// The address to start an allocation search with. It must never
 	// point to any memory that is not contained in inUse, i.e.
-	// inUse.contains(searchAddr) must always be true.
+	// inUse.contains(searchAddr.addr()) must always be true. The one
+	// exception to this rule is that it may take on the value of
+	// maxOffAddr to indicate that the heap is exhausted.
 	//
-	// When added with arenaBaseOffset, we guarantee that
-	// all valid heap addresses (when also added with
-	// arenaBaseOffset) below this value are allocated and
-	// not worth searching.
-	//
-	// Note that adding in arenaBaseOffset transforms addresses
-	// to a new address space with a linear view of the full address
-	// space on architectures with segmented address spaces.
+	// We guarantee that all valid heap addresses below this value
+	// are allocated and not worth searching.
 	searchAddr offAddr
 
 	// start and end represent the chunk indices
@@ -518,6 +514,30 @@
 	return uintptr(scav) * pageSize
 }
 
+// findMappedAddr returns the smallest mapped offAddr that is
+// >= addr. That is, if addr refers to mapped memory, then it is
+// returned. If addr is higher than any mapped region, then
+// it returns maxOffAddr.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+	// If we're not in a test, validate first by checking mheap_.arenas.
+	// This is a fast path which is only safe to use outside of testing.
+	ai := arenaIndex(addr.addr())
+	if s.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil {
+		vAddr, ok := s.inUse.findAddrGreaterEqual(addr.addr())
+		if ok {
+			return offAddr{vAddr}
+		} else {
+			// The candidate search address is greater than any
+			// known address, which means we definitely have no
+			// free memory left.
+			return maxOffAddr
+		}
+	}
+	return addr
+}
+
 // find searches for the first (address-ordered) contiguous free region of
 // npages in size and returns a base address for that region.
 //
@@ -526,6 +546,7 @@
 //
 // find also computes and returns a candidate s.searchAddr, which may or
 // may not prune more of the address space than s.searchAddr already does.
+// This candidate is always a valid s.searchAddr.
 //
 // find represents the slow path and the full radix tree search.
 //
@@ -695,7 +716,7 @@
 			// We found a sufficiently large run of free pages straddling
 			// some boundary, so compute the address and return it.
 			addr := levelIndexToOffAddr(l, i).add(uintptr(base) * pageSize).addr()
-			return addr, firstFree.base
+			return addr, s.findMappedAddr(firstFree.base)
 		}
 		if l == 0 {
 			// We're at level zero, so that means we've exhausted our search.
@@ -741,7 +762,7 @@
 	// found an even narrower free window.
 	searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
 	foundFree(offAddr{searchAddr}, chunkBase(ci+1)-searchAddr)
-	return addr, firstFree.base
+	return addr, s.findMappedAddr(firstFree.base)
 }
 
 // alloc allocates npages worth of memory from the page heap, returning the base
diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go
index 89a4a25..65ba71d 100644
--- a/src/runtime/mpagealloc_test.go
+++ b/src/runtime/mpagealloc_test.go
@@ -612,6 +612,63 @@
 				baseChunkIdx + chunkIdxBigJump:     {{0, PallocChunkPages}},
 			},
 		}
+
+		// Test to check for issue #40191. Essentially, the candidate searchAddr
+		// discovered by find may not point to mapped memory, so we need to handle
+		// that explicitly.
+		//
+		// chunkIdxSmallOffset is an offset intended to be used within chunkIdxBigJump.
+		// It is far enough within chunkIdxBigJump that the summaries at the beginning
+		// of an address range the size of chunkIdxBigJump will not be mapped in.
+		const chunkIdxSmallOffset = 0x503
+		tests["DiscontiguousBadSearchAddr"] = test{
+			before: map[ChunkIdx][]BitRange{
+				// The mechanism for the bug involves three chunks, A, B, and C, which are
+				// far apart in the address space. In particular, B is chunkIdxBigJump +
+				// chunkIdxSmalloffset chunks away from B, and C is 2*chunkIdxBigJump chunks
+				// away from A. A has 1 page free, B has several (NOT at the end of B), and
+				// C is totally free.
+				// Note that B's free memory must not be at the end of B because the fast
+				// path in the page allocator will check if the searchAddr even gives us
+				// enough space to place the allocation in a chunk before accessing the
+				// summary.
+				BaseChunkIdx + chunkIdxBigJump*0: {{0, PallocChunkPages - 1}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {
+					{0, PallocChunkPages - 10},
+					{PallocChunkPages - 1, 1},
+				},
+				BaseChunkIdx + chunkIdxBigJump*2: {},
+			},
+			scav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {},
+			},
+			hits: []hit{
+				// We first allocate into A to set the page allocator's searchAddr to the
+				// end of that chunk. That is the only purpose A serves.
+				{1, PageBase(BaseChunkIdx, PallocChunkPages-1), 0},
+				// Then, we make a big allocation that doesn't fit into B, and so must be
+				// fulfilled by C.
+				//
+				// On the way to fulfilling the allocation into C, we estimate searchAddr
+				// using the summary structure, but that will give us a searchAddr of
+				// B's base address minus chunkIdxSmallOffset chunks. These chunks will
+				// not be mapped.
+				{100, PageBase(baseChunkIdx+chunkIdxBigJump*2, 0), 0},
+				// Now we try to make a smaller allocation that can be fulfilled by B.
+				// In an older implementation of the page allocator, this will segfault,
+				// because this last allocation will first try to access the summary
+				// for B's base address minus chunkIdxSmallOffset chunks in the fast path,
+				// and this will not be mapped.
+				{9, PageBase(baseChunkIdx+chunkIdxBigJump*1+chunkIdxSmallOffset, PallocChunkPages-10), 0},
+			},
+			after: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {{0, 100}},
+			},
+		}
 	}
 	for name, v := range tests {
 		v := v
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index e23d077..2c0eb2c 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -188,6 +188,25 @@
 	return len(a.ranges)
 }
 
+// findAddrGreaterEqual returns the smallest address represented by a
+// that is >= addr. Thus, if the address is represented by a,
+// then it returns addr. The second return value indicates whether
+// such an address exists for addr in a. That is, if addr is larger than
+// any address known to a, the second return value will be false.
+func (a *addrRanges) findAddrGreaterEqual(addr uintptr) (uintptr, bool) {
+	i := a.findSucc(addr)
+	if i == 0 {
+		return a.ranges[0].base.addr(), true
+	}
+	if a.ranges[i-1].contains(addr) {
+		return addr, true
+	}
+	if i < len(a.ranges) {
+		return a.ranges[i].base.addr(), true
+	}
+	return 0, false
+}
+
 // contains returns true if a covers the address addr.
 func (a *addrRanges) contains(addr uintptr) bool {
 	i := a.findSucc(addr)