[dev.boringcrypto.go1.14] all: merge go1.14.10 into dev.boringcrypto.go1.14

Change-Id: I1a32a12c342fca3b4d6bbf446e297c1adf6ce99b
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index d406780..2e1e839 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -126,24 +126,14 @@
 	regMaps     []liveRegMask
 
 	cache progeffectscache
-
-	// These are only populated if open-coded defers are being used.
-	// List of vars/stack slots storing defer args
-	openDeferVars []openDeferVarInfo
-	// Map from defer arg OpVarDef to the block where the OpVarDef occurs.
-	openDeferVardefToBlockMap map[*Node]*ssa.Block
-	// Map of blocks that cannot reach a return or exit (panic)
-	nonReturnBlocks map[*ssa.Block]bool
-}
-
-type openDeferVarInfo struct {
-	n         *Node // Var/stack slot storing a defer arg
-	varsIndex int   // Index of variable in lv.vars
 }
 
 // LivenessMap maps from *ssa.Value to LivenessIndex.
 type LivenessMap struct {
 	m []LivenessIndex
+	// The set of live, pointer-containing variables at the deferreturn
+	// call (only set when open-coded defers are used).
+	deferreturn LivenessIndex
 }
 
 func (m *LivenessMap) reset(ids int) {
@@ -158,6 +148,7 @@
 		m2[i] = none
 	}
 	m.m = m2
+	m.deferreturn = LivenessInvalid
 }
 
 func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
@@ -505,7 +496,7 @@
 		if cap(lc.be) >= f.NumBlocks() {
 			lv.be = lc.be[:f.NumBlocks()]
 		}
-		lv.livenessMap = LivenessMap{lc.livenessMap.m[:0]}
+		lv.livenessMap = LivenessMap{m: lc.livenessMap.m[:0], deferreturn: LivenessInvalid}
 	}
 	if lv.be == nil {
 		lv.be = make([]BlockEffects, f.NumBlocks())
@@ -838,58 +829,12 @@
 func (lv *Liveness) prologue() {
 	lv.initcache()
 
-	if lv.fn.Func.HasDefer() && !lv.fn.Func.OpenCodedDeferDisallowed() {
-		lv.openDeferVardefToBlockMap = make(map[*Node]*ssa.Block)
-		for i, n := range lv.vars {
-			if n.Name.OpenDeferSlot() {
-				lv.openDeferVars = append(lv.openDeferVars, openDeferVarInfo{n: n, varsIndex: i})
-			}
-		}
-
-		// Find any blocks that cannot reach a return or a BlockExit
-		// (panic) -- these must be because of an infinite loop.
-		reachesRet := make(map[ssa.ID]bool)
-		blockList := make([]*ssa.Block, 0, 256)
-
-		for _, b := range lv.f.Blocks {
-			if b.Kind == ssa.BlockRet || b.Kind == ssa.BlockRetJmp || b.Kind == ssa.BlockExit {
-				blockList = append(blockList, b)
-			}
-		}
-
-		for len(blockList) > 0 {
-			b := blockList[0]
-			blockList = blockList[1:]
-			if reachesRet[b.ID] {
-				continue
-			}
-			reachesRet[b.ID] = true
-			for _, e := range b.Preds {
-				blockList = append(blockList, e.Block())
-			}
-		}
-
-		lv.nonReturnBlocks = make(map[*ssa.Block]bool)
-		for _, b := range lv.f.Blocks {
-			if !reachesRet[b.ID] {
-				lv.nonReturnBlocks[b] = true
-				//fmt.Println("No reach ret", lv.f.Name, b.ID, b.Kind)
-			}
-		}
-	}
-
 	for _, b := range lv.f.Blocks {
 		be := lv.blockEffects(b)
 
 		// Walk the block instructions backward and update the block
 		// effects with the each prog effects.
 		for j := len(b.Values) - 1; j >= 0; j-- {
-			if b.Values[j].Op == ssa.OpVarDef {
-				n := b.Values[j].Aux.(*Node)
-				if n.Name.OpenDeferSlot() {
-					lv.openDeferVardefToBlockMap[n] = b
-				}
-			}
 			pos, e := lv.valueEffects(b.Values[j])
 			regUevar, regKill := lv.regEffects(b.Values[j])
 			if e&varkill != 0 {
@@ -906,20 +851,6 @@
 	}
 }
 
-// markDeferVarsLive marks each variable storing an open-coded defer arg as
-// specially live in block b if the variable definition dominates block b.
-func (lv *Liveness) markDeferVarsLive(b *ssa.Block, newliveout *varRegVec) {
-	// Only force computation of dominators if we have a block where we need
-	// to specially mark defer args live.
-	sdom := lv.f.Sdom()
-	for _, info := range lv.openDeferVars {
-		defB := lv.openDeferVardefToBlockMap[info.n]
-		if sdom.IsAncestorEq(defB, b) {
-			newliveout.vars.Set(int32(info.varsIndex))
-		}
-	}
-}
-
 // Solve the liveness dataflow equations.
 func (lv *Liveness) solve() {
 	// These temporary bitvectors exist to avoid successive allocations and
@@ -963,23 +894,6 @@
 				}
 			}
 
-			if lv.fn.Func.HasDefer() && !lv.fn.Func.OpenCodedDeferDisallowed() &&
-				(b.Kind == ssa.BlockExit || lv.nonReturnBlocks[b]) {
-				// Open-coded defer args slots must be live
-				// everywhere in a function, since a panic can
-				// occur (almost) anywhere. Force all appropriate
-				// defer arg slots to be live in BlockExit (panic)
-				// blocks and in blocks that do not reach a return
-				// (because of infinite loop).
-				//
-				// We are assuming that the defer exit code at
-				// BlockReturn/BlockReturnJmp accesses all of the
-				// defer args (with pointers), and so keeps them
-				// live. This analysis may have to be adjusted if
-				// that changes (because of optimizations).
-				lv.markDeferVarsLive(b, &newliveout)
-			}
-
 			if !be.liveout.Eq(newliveout) {
 				change = true
 				be.liveout.Copy(newliveout)
@@ -1032,6 +946,17 @@
 				n.Name.SetNeedzero(true)
 				livedefer.Set(int32(i))
 			}
+			if n.Name.OpenDeferSlot() {
+				// Open-coded defer args slots must be live
+				// everywhere in a function, since a panic can
+				// occur (almost) anywhere. Because it is live
+				// everywhere, it must be zeroed on entry.
+				livedefer.Set(int32(i))
+				// It was already marked as Needzero when created.
+				if !n.Name.Needzero() {
+					Fatalf("all pointer-containing defer arg slots should have Needzero set")
+				}
+			}
 		}
 	}
 
@@ -1133,6 +1058,16 @@
 		lv.compact(b)
 	}
 
+	// If we have an open-coded deferreturn call, make a liveness map for it.
+	if lv.fn.Func.OpenCodedDeferDisallowed() {
+		lv.livenessMap.deferreturn = LivenessInvalid
+	} else {
+		lv.livenessMap.deferreturn = LivenessIndex{
+			stackMapIndex: lv.stackMapSet.add(livedefer),
+			regMapIndex:   0, // entry regMap, containing no live registers
+		}
+	}
+
 	// Done compacting. Throw out the stack map set.
 	lv.stackMaps = lv.stackMapSet.extractUniqe()
 	lv.stackMapSet = bvecSet{}
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index b8e99f0..32d947b 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -4309,12 +4309,6 @@
 			}
 		}
 
-		if i == len(s.openDefers)-1 {
-			// Record the call of the first defer. This will be used
-			// to set liveness info for the deferreturn (which is also
-			// used for any location that causes a runtime panic)
-			s.f.LastDeferExit = call
-		}
 		s.endBlock()
 		s.startBlock(bEnd)
 	}
@@ -5773,11 +5767,6 @@
 
 	// wasm: The number of values on the WebAssembly stack. This is only used as a safeguard.
 	OnWasmStackSkipped int
-
-	// Liveness index for the first function call in the final defer exit code
-	// path that we generated. All defer functions and args should be live at
-	// this point. This will be used to set the liveness for the deferreturn.
-	lastDeferLiveness LivenessIndex
 }
 
 // Prog appends a new Prog.
@@ -6022,12 +6011,6 @@
 				// instruction.
 				s.pp.nextLive = s.livenessMap.Get(v)
 
-				// Remember the liveness index of the first defer call of
-				// the last defer exit
-				if v.Block.Func.LastDeferExit != nil && v == v.Block.Func.LastDeferExit {
-					s.lastDeferLiveness = s.pp.nextLive
-				}
-
 				// Special case for first line in function; move it to the start.
 				if firstPos != src.NoXPos {
 					s.SetPos(firstPos)
@@ -6088,7 +6071,7 @@
 		// When doing open-coded defers, generate a disconnected call to
 		// deferreturn and a return. This will be used to during panic
 		// recovery to unwind the stack and return back to the runtime.
-		s.pp.nextLive = s.lastDeferLiveness
+		s.pp.nextLive = s.livenessMap.deferreturn
 		gencallret(pp, Deferreturn)
 	}
 
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
index 7cf72a8..4b9189f 100644
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -33,15 +33,8 @@
 	Blocks []*Block    // unordered set of all basic blocks (note: not indexable by ID)
 	Entry  *Block      // the entry basic block
 
-	// If we are using open-coded defers, this is the first call to a deferred
-	// function in the final defer exit sequence that we generated. This call
-	// should be after all defer statements, and will have all args, etc. of
-	// all defer calls as live. The liveness info of this call will be used
-	// for the deferreturn/ret segment generated for functions with open-coded
-	// defers.
-	LastDeferExit *Value
-	bid           idAlloc // block ID allocator
-	vid           idAlloc // value ID allocator
+	bid idAlloc // block ID allocator
+	vid idAlloc // value ID allocator
 
 	// Given an environment variable used for debug hash match,
 	// what file (if any) receives the yes/no logging?
diff --git a/src/cmd/compile/internal/x86/387.go b/src/cmd/compile/internal/x86/387.go
index 18838fb..37e8e71 100644
--- a/src/cmd/compile/internal/x86/387.go
+++ b/src/cmd/compile/internal/x86/387.go
@@ -139,12 +139,18 @@
 		// Set precision if needed.  64 bits is the default.
 		switch v.Op {
 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
-			p := s.Prog(x86.AFSTCW)
+			// Save AX so we can use it as scratch space.
+			p := s.Prog(x86.AMOVL)
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = x86.REG_AX
 			s.AddrScratch(&p.To)
-			p = s.Prog(x86.AFLDCW)
-			p.From.Type = obj.TYPE_MEM
-			p.From.Name = obj.NAME_EXTERN
-			p.From.Sym = gc.ControlWord32
+			// Install a 32-bit version of the control word.
+			installControlWord(s, gc.ControlWord32, x86.REG_AX)
+			// Restore AX.
+			p = s.Prog(x86.AMOVL)
+			s.AddrScratch(&p.From)
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = x86.REG_AX
 		}
 
 		var op obj.As
@@ -167,8 +173,7 @@
 		// Restore precision if needed.
 		switch v.Op {
 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
-			p := s.Prog(x86.AFLDCW)
-			s.AddrScratch(&p.From)
+			restoreControlWord(s)
 		}
 
 	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
@@ -225,19 +230,11 @@
 	case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL:
 		push(s, v.Args[0])
 
-		// Save control word.
-		p := s.Prog(x86.AFSTCW)
-		s.AddrScratch(&p.To)
-		p.To.Offset += 4
-
 		// Load control word which truncates (rounds towards zero).
-		p = s.Prog(x86.AFLDCW)
-		p.From.Type = obj.TYPE_MEM
-		p.From.Name = obj.NAME_EXTERN
-		p.From.Sym = gc.ControlWord64trunc
+		installControlWord(s, gc.ControlWord64trunc, v.Reg())
 
 		// Now do the conversion.
-		p = s.Prog(x86.AFMOVLP)
+		p := s.Prog(x86.AFMOVLP)
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = x86.REG_F0
 		s.AddrScratch(&p.To)
@@ -247,9 +244,7 @@
 		p.To.Reg = v.Reg()
 
 		// Restore control word.
-		p = s.Prog(x86.AFLDCW)
-		s.AddrScratch(&p.From)
-		p.From.Offset += 4
+		restoreControlWord(s)
 
 	case ssa.Op386CVTSS2SD:
 		// float32 -> float64 is a nop
@@ -373,3 +368,36 @@
 
 	ssaGenBlock(s, b, next)
 }
+
+// installControlWord saves the current floating-point control
+// word and installs a new one loaded from cw.
+// scratchReg must be an unused register.
+// This call must be paired with restoreControlWord.
+// Bytes 4-5 of the scratch space (s.AddrScratch) are used between
+// this call and restoreControlWord.
+func installControlWord(s *gc.SSAGenState, cw *obj.LSym, scratchReg int16) {
+	// Save current control word.
+	p := s.Prog(x86.AFSTCW)
+	s.AddrScratch(&p.To)
+	p.To.Offset += 4
+
+	// Materialize address of new control word.
+	// Note: this must be a seperate instruction to handle PIE correctly.
+	// See issue 41503.
+	p = s.Prog(x86.ALEAL)
+	p.From.Type = obj.TYPE_MEM
+	p.From.Name = obj.NAME_EXTERN
+	p.From.Sym = cw
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = scratchReg
+
+	// Load replacement control word.
+	p = s.Prog(x86.AFLDCW)
+	p.From.Type = obj.TYPE_MEM
+	p.From.Reg = scratchReg
+}
+func restoreControlWord(s *gc.SSAGenState) {
+	p := s.Prog(x86.AFLDCW)
+	s.AddrScratch(&p.From)
+	p.From.Offset += 4
+}
diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go
index b14dc81..ef6335d 100644
--- a/src/cmd/internal/obj/s390x/objz.go
+++ b/src/cmd/internal/obj/s390x/objz.go
@@ -283,17 +283,6 @@
 			ACMPUBNE:
 			q = p
 			p.Mark |= BRANCH
-			if p.Pcond != nil {
-				q := p.Pcond
-				for q.As == obj.ANOP {
-					q = q.Link
-					p.Pcond = q
-				}
-			}
-
-		case obj.ANOP:
-			q.Link = p.Link /* q is non-nop */
-			p.Link.Mark |= p.Mark
 
 		default:
 			q = p
diff --git a/src/database/sql/sql.go b/src/database/sql/sql.go
index a0b7ca8..74447d1 100644
--- a/src/database/sql/sql.go
+++ b/src/database/sql/sql.go
@@ -2729,10 +2729,17 @@
 	return err
 }
 
+// bypassRowsAwaitDone is only used for testing.
+// If true, it will not close the Rows automatically from the context.
+var bypassRowsAwaitDone = false
+
 func (rs *Rows) initContextClose(ctx, txctx context.Context) {
 	if ctx.Done() == nil && (txctx == nil || txctx.Done() == nil) {
 		return
 	}
+	if bypassRowsAwaitDone {
+		return
+	}
 	ctx, rs.cancel = context.WithCancel(ctx)
 	go rs.awaitDone(ctx, txctx)
 }
diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go
index a9e1800..7be5fc9 100644
--- a/src/database/sql/sql_test.go
+++ b/src/database/sql/sql_test.go
@@ -2724,7 +2724,7 @@
 	}
 }
 
-// Issue 34755: Ensure that a Tx cannot commit after a rollback.
+// Issue 34775: Ensure that a Tx cannot commit after a rollback.
 func TestTxCannotCommitAfterRollback(t *testing.T) {
 	db := newTestDB(t, "tx_status")
 	defer closeDB(t, db)
@@ -2766,6 +2766,9 @@
 	// 2. (A) Start a query, (B) begin Tx rollback through a ctx cancel.
 	// 3. Check if 2.A has committed in Tx (pass) or outside of Tx (fail).
 	sendQuery := make(chan struct{})
+	// The Tx status is returned through the row results, ensure
+	// that the rows results are not cancelled.
+	bypassRowsAwaitDone = true
 	hookTxGrabConn = func() {
 		cancel()
 		<-sendQuery
@@ -2776,6 +2779,7 @@
 	defer func() {
 		hookTxGrabConn = nil
 		rollbackHook = nil
+		bypassRowsAwaitDone = false
 	}()
 
 	err = tx.QueryRow("SELECT|tx_status|tx_status|").Scan(&txStatus)
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index c953b23..17ec2e1 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -233,6 +233,11 @@
 	gp.waiting = mysg
 	gp.param = nil
 	c.sendq.enqueue(mysg)
+	// Signal to anyone trying to shrink our stack that we're about
+	// to park on a channel. The window between when this G's status
+	// changes and when we set gp.activeStackChans is not safe for
+	// stack shrinking.
+	atomic.Store8(&gp.parkingOnChan, 1)
 	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
 	// Ensure the value being sent is kept alive until the
 	// receiver copies it out. The sudog has a pointer to the
@@ -522,6 +527,11 @@
 	mysg.c = c
 	gp.param = nil
 	c.recvq.enqueue(mysg)
+	// Signal to anyone trying to shrink our stack that we're about
+	// to park on a channel. The window between when this G's status
+	// changes and when we set gp.activeStackChans is not safe for
+	// stack shrinking.
+	atomic.Store8(&gp.parkingOnChan, 1)
 	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
 
 	// someone woke us up
@@ -599,7 +609,19 @@
 func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
 	// There are unlocked sudogs that point into gp's stack. Stack
 	// copying must lock the channels of those sudogs.
+	// Set activeStackChans here instead of before we try parking
+	// because we could self-deadlock in stack growth on the
+	// channel lock.
 	gp.activeStackChans = true
+	// Mark that it's safe for stack shrinking to occur now,
+	// because any thread acquiring this G's stack for shrinking
+	// is guaranteed to observe activeStackChans after this store.
+	atomic.Store8(&gp.parkingOnChan, 0)
+	// Make sure we unlock after setting activeStackChans and
+	// unsetting parkingOnChan. The moment we unlock chanLock
+	// we risk gp getting readied by a channel operation and
+	// so gp could continue running before everything before
+	// the unlock is visible (even to gp itself).
 	unlock((*mutex)(chanLock))
 	return true
 }
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
index 1180e76..cf596f9 100644
--- a/src/runtime/chan_test.go
+++ b/src/runtime/chan_test.go
@@ -623,6 +623,62 @@
 	<-done
 }
 
+func TestNoShrinkStackWhileParking(t *testing.T) {
+	// The goal of this test is to trigger a "racy sudog adjustment"
+	// throw. Basically, there's a window between when a goroutine
+	// becomes available for preemption for stack scanning (and thus,
+	// stack shrinking) but before the goroutine has fully parked on a
+	// channel. See issue 40641 for more details on the problem.
+	//
+	// The way we try to induce this failure is to set up two
+	// goroutines: a sender and a reciever that communicate across
+	// a channel. We try to set up a situation where the sender
+	// grows its stack temporarily then *fully* blocks on a channel
+	// often. Meanwhile a GC is triggered so that we try to get a
+	// mark worker to shrink the sender's stack and race with the
+	// sender parking.
+	//
+	// Unfortunately the race window here is so small that we
+	// either need a ridiculous number of iterations, or we add
+	// "usleep(1000)" to park_m, just before the unlockf call.
+	const n = 10
+	send := func(c chan<- int, done chan struct{}) {
+		for i := 0; i < n; i++ {
+			c <- i
+			// Use lots of stack briefly so that
+			// the GC is going to want to shrink us
+			// when it scans us. Make sure not to
+			// do any function calls otherwise
+			// in order to avoid us shrinking ourselves
+			// when we're preempted.
+			stackGrowthRecursive(20)
+		}
+		done <- struct{}{}
+	}
+	recv := func(c <-chan int, done chan struct{}) {
+		for i := 0; i < n; i++ {
+			// Sleep here so that the sender always
+			// fully blocks.
+			time.Sleep(10 * time.Microsecond)
+			<-c
+		}
+		done <- struct{}{}
+	}
+	for i := 0; i < n*20; i++ {
+		c := make(chan int)
+		done := make(chan struct{})
+		go recv(c, done)
+		go send(c, done)
+		// Wait a little bit before triggering
+		// the GC to make sure the sender and
+		// reciever have gotten into their groove.
+		time.Sleep(50 * time.Microsecond)
+		runtime.GC()
+		<-done
+		<-done
+	}
+}
+
 func TestSelectDuplicateChannel(t *testing.T) {
 	// This test makes sure we can queue a G on
 	// the same channel multiple times.
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 6a8d00c..cf07a37 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -360,7 +360,11 @@
 		}
 
 		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
-			pg := mheap_.pages.chunkOf(i).scavenged.popcntRange(0, pallocChunkPages)
+			chunk := mheap_.pages.tryChunkOf(i)
+			if chunk == nil {
+				continue
+			}
+			pg := chunk.scavenged.popcntRange(0, pallocChunkPages)
 			slow.HeapReleased += uint64(pg) * pageSize
 		}
 		for _, p := range allp {
@@ -753,11 +757,7 @@
 // Returns nil if the PallocData's L2 is missing.
 func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
 	ci := chunkIdx(i)
-	l2 := (*pageAlloc)(p).chunks[ci.l1()]
-	if l2 == nil {
-		return nil
-	}
-	return (*PallocData)(&l2[ci.l2()])
+	return (*PallocData)((*pageAlloc)(p).tryChunkOf(ci))
 }
 
 // AddrRange represents a range over addresses.
@@ -896,7 +896,10 @@
 		lock(&mheap_.lock)
 	chunkLoop:
 		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
-			chunk := mheap_.pages.chunkOf(i)
+			chunk := mheap_.pages.tryChunkOf(i)
+			if chunk == nil {
+				continue
+			}
 			for j := 0; j < pallocChunkPages/64; j++ {
 				// Run over each 64-bit bitmap section and ensure
 				// scavenged is being cleared properly on allocation.
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 3c56b60..3291e0c 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -331,7 +331,20 @@
 	return 0
 }
 
+// tryChunkOf returns the bitmap data for the given chunk.
+//
+// Returns nil if the chunk data has not been mapped.
+func (s *pageAlloc) tryChunkOf(ci chunkIdx) *pallocData {
+	l2 := s.chunks[ci.l1()]
+	if l2 == nil {
+		return nil
+	}
+	return &l2[ci.l2()]
+}
+
 // chunkOf returns the chunk at the given chunk index.
+//
+// The chunk index must be valid or this method may throw.
 func (s *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
 	return &s.chunks[ci.l1()][ci.l2()]
 }
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 8c70c19..db3362d 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -517,9 +517,17 @@
 	<-done
 }
 
+var padData [128]uint64
+
 func stackGrowthRecursive(i int) {
 	var pad [128]uint64
-	if i != 0 && pad[0] == 0 {
+	pad = padData
+	for j := range pad {
+		if pad[j] != 0 {
+			return
+		}
+	}
+	if i != 0 {
 		stackGrowthRecursive(i - 1)
 	}
 }
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2a872bf..c4b27e9 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -436,6 +436,10 @@
 	// copying needs to acquire channel locks to protect these
 	// areas of the stack.
 	activeStackChans bool
+	// parkingOnChan indicates that the goroutine is about to
+	// park on a chansend or chanrecv. Used to signal an unsafe point
+	// for stack shrinking. It's a boolean value, but is updated atomically.
+	parkingOnChan uint8
 
 	raceignore     int8     // ignore race detection events
 	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 8033b65..f8f7683 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -7,6 +7,7 @@
 // This file contains the implementation of Go select statements.
 
 import (
+	"runtime/internal/atomic"
 	"unsafe"
 )
 
@@ -77,7 +78,20 @@
 func selparkcommit(gp *g, _ unsafe.Pointer) bool {
 	// There are unlocked sudogs that point into gp's stack. Stack
 	// copying must lock the channels of those sudogs.
+	// Set activeStackChans here instead of before we try parking
+	// because we could self-deadlock in stack growth on a
+	// channel lock.
 	gp.activeStackChans = true
+	// Mark that it's safe for stack shrinking to occur now,
+	// because any thread acquiring this G's stack for shrinking
+	// is guaranteed to observe activeStackChans after this store.
+	atomic.Store8(&gp.parkingOnChan, 0)
+	// Make sure we unlock after setting activeStackChans and
+	// unsetting parkingOnChan. The moment we unlock any of the
+	// channel locks we risk gp getting readied by a channel operation
+	// and so gp could continue running before everything before the
+	// unlock is visible (even to gp itself).
+
 	// This must not access gp's stack (see gopark). In
 	// particular, it must not access the *hselect. That's okay,
 	// because by the time this is called, gp.waiting has all
@@ -313,6 +327,11 @@
 
 	// wait for someone to wake us up
 	gp.param = nil
+	// Signal to anyone trying to shrink our stack that we're about
+	// to park on a channel. The window between when this G's status
+	// changes and when we set gp.activeStackChans is not safe for
+	// stack shrinking.
+	atomic.Store8(&gp.parkingOnChan, 1)
 	gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
 	gp.activeStackChans = false
 
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 0f5b165..e49c905 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -850,6 +850,13 @@
 	// Adjust sudogs, synchronizing with channel ops if necessary.
 	ncopy := used
 	if !gp.activeStackChans {
+		if newsize < old.hi-old.lo && atomic.Load8(&gp.parkingOnChan) != 0 {
+			// It's not safe for someone to shrink this stack while we're actively
+			// parking on a channel, but it is safe to grow since we do that
+			// ourselves and explicitly don't want to synchronize with channels
+			// since we could self-deadlock.
+			throw("racy sudog adjustment due to parking on channel")
+		}
 		adjustsudogs(gp, &adjinfo)
 	} else {
 		// sudogs may be pointing in to the stack and gp has
@@ -1078,7 +1085,11 @@
 	// We also can't copy the stack if we're at an asynchronous
 	// safe-point because we don't have precise pointer maps for
 	// all frames.
-	return gp.syscallsp == 0 && !gp.asyncSafePoint
+	//
+	// We also can't *shrink* the stack in the window between the
+	// goroutine calling gopark to park on a channel and
+	// gp.activeStackChans being set.
+	return gp.syscallsp == 0 && !gp.asyncSafePoint && atomic.Load8(&gp.parkingOnChan) == 0
 }
 
 // Maybe shrink the stack being used by gp.
diff --git a/src/runtime/stubs32.go b/src/runtime/stubs32.go
index a7f52f6..c4715fe 100644
--- a/src/runtime/stubs32.go
+++ b/src/runtime/stubs32.go
@@ -11,4 +11,4 @@
 // Declarations for runtime services implemented in C or assembly that
 // are only present on 32 bit systems.
 
-func call16(fn, arg unsafe.Pointer, n, retoffset uint32)
+func call16(typ, fn, arg unsafe.Pointer, n, retoffset uint32)
diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go
index 5276600..e9687bf 100644
--- a/src/testing/benchmark.go
+++ b/src/testing/benchmark.go
@@ -242,7 +242,7 @@
 		if b.skipped {
 			tag = "SKIP"
 		}
-		if b.chatty && (len(b.output) > 0 || b.finished) {
+		if b.chatty != nil && (len(b.output) > 0 || b.finished) {
 			b.trimOutput()
 			fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
 		}
@@ -523,10 +523,9 @@
 	}
 	main := &B{
 		common: common{
-			name:   "Main",
-			w:      os.Stdout,
-			chatty: *chatty,
-			bench:  true,
+			name:  "Main",
+			w:     os.Stdout,
+			bench: true,
 		},
 		importPath: importPath,
 		benchFunc: func(b *B) {
@@ -537,6 +536,9 @@
 		benchTime: benchTime,
 		context:   ctx,
 	}
+	if Verbose() {
+		main.chatty = newChattyPrinter(main.w)
+	}
 	main.runN(1)
 	return !main.failed
 }
@@ -549,7 +551,7 @@
 			benchName := benchmarkName(b.name, procs)
 
 			// If it's chatty, we've already printed this information.
-			if !b.chatty {
+			if b.chatty == nil {
 				fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
 			}
 			// Recompute the running time for all but the first iteration.
@@ -576,7 +578,7 @@
 				continue
 			}
 			results := r.String()
-			if b.chatty {
+			if b.chatty != nil {
 				fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
 			}
 			if *benchmarkMemory || b.showAllocResult {
@@ -639,7 +641,7 @@
 		atomic.StoreInt32(&sub.hasSub, 1)
 	}
 
-	if b.chatty {
+	if b.chatty != nil {
 		labelsOnce.Do(func() {
 			fmt.Printf("goos: %s\n", runtime.GOOS)
 			fmt.Printf("goarch: %s\n", runtime.GOARCH)
diff --git a/src/testing/sub_test.go b/src/testing/sub_test.go
index 8eb0084..5ed3fc4 100644
--- a/src/testing/sub_test.go
+++ b/src/testing/sub_test.go
@@ -483,10 +483,12 @@
 					signal: make(chan bool),
 					name:   "Test",
 					w:      buf,
-					chatty: tc.chatty,
 				},
 				context: ctx,
 			}
+			if tc.chatty {
+				root.chatty = newChattyPrinter(root.w)
+			}
 			ok := root.Run(tc.desc, tc.f)
 			ctx.release()
 
@@ -665,11 +667,13 @@
 					signal: make(chan bool),
 					name:   "root",
 					w:      buf,
-					chatty: tc.chatty,
 				},
 				benchFunc: func(b *B) { ok = b.Run("test", tc.f) }, // Use Run to catch failure.
 				benchTime: benchTimeFlag{d: 1 * time.Microsecond},
 			}
+			if tc.chatty {
+				root.chatty = newChattyPrinter(root.w)
+			}
 			root.runN(1)
 			if ok != !tc.failed {
 				t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, !tc.failed)
@@ -741,9 +745,13 @@
 	}
 }
 
-type funcWriter func([]byte) (int, error)
+type funcWriter struct {
+	write func([]byte) (int, error)
+}
 
-func (fw funcWriter) Write(b []byte) (int, error) { return fw(b) }
+func (fw *funcWriter) Write(b []byte) (int, error) {
+	return fw.write(b)
+}
 
 func TestRacyOutput(t *T) {
 	var runs int32  // The number of running Writes
@@ -761,9 +769,10 @@
 
 	var wg sync.WaitGroup
 	root := &T{
-		common:  common{w: funcWriter(raceDetector), chatty: true},
+		common:  common{w: &funcWriter{raceDetector}},
 		context: newTestContext(1, newMatcher(regexp.MatchString, "", "")),
 	}
+	root.chatty = newChattyPrinter(root.w)
 	root.Run("", func(t *T) {
 		for i := 0; i < 100; i++ {
 			wg.Add(1)
diff --git a/src/testing/testing.go b/src/testing/testing.go
index e3dcee5..bf9fce6 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -320,7 +320,6 @@
 	cpuListStr           *string
 	parallel             *int
 	testlog              *string
-	printer              *testPrinter
 
 	haveExamples bool // are there examples?
 
@@ -330,55 +329,45 @@
 	numFailed uint32 // number of test failures
 )
 
-type testPrinter struct {
-	chatty bool
-
+type chattyPrinter struct {
+	w          io.Writer
 	lastNameMu sync.Mutex // guards lastName
 	lastName   string     // last printed test name in chatty mode
 }
 
-func newTestPrinter(chatty bool) *testPrinter {
-	return &testPrinter{
-		chatty: chatty,
-	}
+func newChattyPrinter(w io.Writer) *chattyPrinter {
+	return &chattyPrinter{w: w}
 }
 
-func (p *testPrinter) Print(testName, out string) {
-	p.Fprint(os.Stdout, testName, out)
-}
-
-func (p *testPrinter) Fprint(w io.Writer, testName, out string) {
+// Updatef prints a message about the status of the named test to w.
+//
+// The formatted message must include the test name itself.
+func (p *chattyPrinter) Updatef(testName, format string, args ...interface{}) {
 	p.lastNameMu.Lock()
 	defer p.lastNameMu.Unlock()
 
-	if !p.chatty ||
-		strings.HasPrefix(out, "--- PASS: ") ||
-		strings.HasPrefix(out, "--- FAIL: ") ||
-		strings.HasPrefix(out, "--- SKIP: ") ||
-		strings.HasPrefix(out, "=== RUN   ") ||
-		strings.HasPrefix(out, "=== CONT  ") ||
-		strings.HasPrefix(out, "=== PAUSE ") {
-		// If we're buffering test output (!p.chatty), we don't really care which
-		// test is emitting which line so long as they are serialized.
-		//
-		// If the message already implies an association with a specific new test,
-		// we don't need to check what the old test name was or log an extra CONT
-		// line for it. (We're updating it anyway, and the current message already
-		// includes the test name.)
-		p.lastName = testName
-		fmt.Fprint(w, out)
-		return
-	}
+	// Since the message already implies an association with a specific new test,
+	// we don't need to check what the old test name was or log an extra CONT line
+	// for it. (We're updating it anyway, and the current message already includes
+	// the test name.)
+	p.lastName = testName
+	fmt.Fprintf(p.w, format, args...)
+}
+
+// Printf prints a message, generated by the named test, that does not
+// necessarily mention that tests's name itself.
+func (p *chattyPrinter) Printf(testName, format string, args ...interface{}) {
+	p.lastNameMu.Lock()
+	defer p.lastNameMu.Unlock()
 
 	if p.lastName == "" {
 		p.lastName = testName
 	} else if p.lastName != testName {
-		// Always printed as-is, with 0 decoration or indentation. So, we skip
-		// printing to w.
-		fmt.Printf("=== CONT  %s\n", testName)
+		fmt.Fprintf(p.w, "=== CONT  %s\n", testName)
 		p.lastName = testName
 	}
-	fmt.Fprint(w, out)
+
+	fmt.Fprintf(p.w, format, args...)
 }
 
 // The maximum number of stack frames to go through when skipping helper functions for
@@ -398,12 +387,12 @@
 	helpers map[string]struct{} // functions to be skipped when writing file/line info
 	cleanup func()              // optional function to be called at the end of the test
 
-	chatty     bool   // A copy of the chatty flag.
-	bench      bool   // Whether the current test is a benchmark.
-	finished   bool   // Test function has completed.
-	hasSub     int32  // Written atomically.
-	raceErrors int    // Number of races detected during test.
-	runner     string // Function name of tRunner running the test.
+	chatty     *chattyPrinter // A copy of chattyPrinter, if the chatty flag is set.
+	bench      bool           // Whether the current test is a benchmark.
+	finished   bool           // Test function has completed.
+	hasSub     int32          // Written atomically.
+	raceErrors int            // Number of races detected during test.
+	runner     string         // Function name of tRunner running the test.
 
 	parent   *common
 	level    int       // Nesting depth of test or benchmark.
@@ -556,12 +545,31 @@
 	p.mu.Lock()
 	defer p.mu.Unlock()
 
-	printer.Fprint(p.w, testName, fmt.Sprintf(format, args...))
-
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	io.Copy(p.w, bytes.NewReader(c.output))
-	c.output = c.output[:0]
+
+	if len(c.output) > 0 {
+		format += "%s"
+		args = append(args[:len(args):len(args)], c.output)
+		c.output = c.output[:0] // but why?
+	}
+
+	if c.chatty != nil && p.w == c.chatty.w {
+		// We're flushing to the actual output, so track that this output is
+		// associated with a specific test (and, specifically, that the next output
+		// is *not* associated with that test).
+		//
+		// Moreover, if c.output is non-empty it is important that this write be
+		// atomic with respect to the output of other tests, so that we don't end up
+		// with confusing '=== CONT' lines in the middle of our '--- PASS' block.
+		// Neither humans nor cmd/test2json can parse those easily.
+		// (See https://golang.org/issue/40771.)
+		c.chatty.Updatef(testName, format, args...)
+	} else {
+		// We're flushing to the output buffer of the parent test, which will
+		// itself follow a test-name header when it is finally flushed to stdout.
+		fmt.Fprintf(p.w, format, args...)
+	}
 }
 
 type indenter struct {
@@ -729,13 +737,13 @@
 		}
 		panic("Log in goroutine after " + c.name + " has completed")
 	} else {
-		if c.chatty {
+		if c.chatty != nil {
 			if c.bench {
 				// Benchmarks don't print === CONT, so we should skip the test
 				// printer and just print straight to stdout.
 				fmt.Print(c.decorate(s, depth+1))
 			} else {
-				printer.Print(c.name, c.decorate(s, depth+1))
+				c.chatty.Printf(c.name, "%s", c.decorate(s, depth+1))
 			}
 
 			return
@@ -910,34 +918,22 @@
 	t.parent.sub = append(t.parent.sub, t)
 	t.raceErrors += race.Errors()
 
-	if t.chatty {
-		// Print directly to root's io.Writer so there is no delay.
-		root := t.parent
-		for ; root.parent != nil; root = root.parent {
-		}
-		root.mu.Lock()
+	if t.chatty != nil {
 		// Unfortunately, even though PAUSE indicates that the named test is *no
 		// longer* running, cmd/test2json interprets it as changing the active test
 		// for the purpose of log parsing. We could fix cmd/test2json, but that
 		// won't fix existing deployments of third-party tools that already shell
 		// out to older builds of cmd/test2json — so merely fixing cmd/test2json
 		// isn't enough for now.
-		printer.Fprint(root.w, t.name, fmt.Sprintf("=== PAUSE %s\n", t.name))
-		root.mu.Unlock()
+		t.chatty.Updatef(t.name, "=== PAUSE %s\n", t.name)
 	}
 
 	t.signal <- true   // Release calling test.
 	<-t.parent.barrier // Wait for the parent test to complete.
 	t.context.waitParallel()
 
-	if t.chatty {
-		// Print directly to root's io.Writer so there is no delay.
-		root := t.parent
-		for ; root.parent != nil; root = root.parent {
-		}
-		root.mu.Lock()
-		printer.Fprint(root.w, t.name, fmt.Sprintf("=== CONT  %s\n", t.name))
-		root.mu.Unlock()
+	if t.chatty != nil {
+		t.chatty.Updatef(t.name, "=== CONT  %s\n", t.name)
 	}
 
 	t.start = time.Now()
@@ -1088,14 +1084,8 @@
 	}
 	t.w = indenter{&t.common}
 
-	if t.chatty {
-		// Print directly to root's io.Writer so there is no delay.
-		root := t.parent
-		for ; root.parent != nil; root = root.parent {
-		}
-		root.mu.Lock()
-		printer.Fprint(root.w, t.name, fmt.Sprintf("=== RUN   %s\n", t.name))
-		root.mu.Unlock()
+	if t.chatty != nil {
+		t.chatty.Updatef(t.name, "=== RUN   %s\n", t.name)
 	}
 	// Instead of reducing the running count of this test before calling the
 	// tRunner and increasing it afterwards, we rely on tRunner keeping the
@@ -1242,8 +1232,6 @@
 		flag.Parse()
 	}
 
-	printer = newTestPrinter(Verbose())
-
 	if *parallel < 1 {
 		fmt.Fprintln(os.Stderr, "testing: -parallel can only be given a positive integer")
 		flag.Usage()
@@ -1284,7 +1272,7 @@
 	format := "--- %s: %s (%s)\n"
 	if t.Failed() {
 		t.flushToParent(t.name, format, "FAIL", t.name, dstr)
-	} else if t.chatty {
+	} else if t.chatty != nil {
 		if t.Skipped() {
 			t.flushToParent(t.name, format, "SKIP", t.name, dstr)
 		} else {
@@ -1340,10 +1328,12 @@
 					signal:  make(chan bool),
 					barrier: make(chan bool),
 					w:       os.Stdout,
-					chatty:  *chatty,
 				},
 				context: ctx,
 			}
+			if Verbose() {
+				t.chatty = newChattyPrinter(t.w)
+			}
 			tRunner(t, func(t *T) {
 				for _, test := range tests {
 					t.Run(test.Name, test.F)
diff --git a/test/fixedbugs/issue40629.go b/test/fixedbugs/issue40629.go
new file mode 100644
index 0000000..c6ef408
--- /dev/null
+++ b/test/fixedbugs/issue40629.go
@@ -0,0 +1,69 @@
+// run
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "fmt"
+
+const N = 40
+
+func main() {
+	var x [N]int // stack-allocated memory
+	for i := range x {
+		x[i] = 0x999
+	}
+
+	// This defer checks to see if x is uncorrupted.
+	defer func(p *[N]int) {
+		recover()
+		for i := range p {
+			if p[i] != 0x999 {
+				for j := range p {
+					fmt.Printf("p[%d]=0x%x\n", j, p[j])
+				}
+				panic("corrupted stack variable")
+			}
+		}
+	}(&x)
+
+	// This defer starts a new goroutine, which will (hopefully)
+	// overwrite x on the garbage stack.
+	defer func() {
+		c := make(chan bool)
+		go func() {
+			useStack(1000)
+			c <- true
+		}()
+		<-c
+
+	}()
+
+	// This defer causes a stack copy.
+	// The old stack is now garbage.
+	defer func() {
+		useStack(1000)
+	}()
+
+	// Trigger a segfault.
+	*g = 0
+
+	// Make the return statement unreachable.
+	// That makes the stack map at the deferreturn call empty.
+	// In particular, the argument to the first defer is not
+	// marked as a pointer, so it doesn't get adjusted
+	// during the stack copy.
+	for {
+	}
+}
+
+var g *int64
+
+func useStack(n int) {
+	if n == 0 {
+		return
+	}
+	useStack(n - 1)
+}