[dev.boringcrypto.go1.14] all: merge go1.14.9 into dev.boringcrypto.go1.14

Change-Id: I810ef54d98b21ce3dfa51e55ec17ba2bab72f3e4
diff --git a/doc/go1.14.html b/doc/go1.14.html
index 35a9f3c..410e0cb 100644
--- a/doc/go1.14.html
+++ b/doc/go1.14.html
@@ -609,6 +609,12 @@
       If a program needs to accept invalid numbers like the empty string,
       consider wrapping the type with <a href="/pkg/encoding/json/#Unmarshaler"><code>Unmarshaler</code></a>.
     </p>
+
+    <p><!-- CL 200237 -->
+      <a href="/pkg/encoding/json/#Unmarshal"><code>Unmarshal</code></a>
+      can now support map keys with string underlying type which implement
+      <a href="/pkg/encoding/#TextUnmarshaler"><code>encoding.TextUnmarshaler</code></a>.
+    </p>
   </dd>
 </dl><!-- encoding/json -->
 
diff --git a/misc/cgo/testshared/shared_test.go b/misc/cgo/testshared/shared_test.go
index b9ef6da..f6cefa3 100644
--- a/misc/cgo/testshared/shared_test.go
+++ b/misc/cgo/testshared/shared_test.go
@@ -38,7 +38,15 @@
 
 // run runs a command and calls t.Errorf if it fails.
 func run(t *testing.T, msg string, args ...string) {
+	runWithEnv(t, msg, nil, args...)
+}
+
+// runWithEnv runs a command under the given environment and calls t.Errorf if it fails.
+func runWithEnv(t *testing.T, msg string, env []string, args ...string) {
 	c := exec.Command(args[0], args[1:]...)
+	if len(env) != 0 {
+		c.Env = append(os.Environ(), env...)
+	}
 	if output, err := c.CombinedOutput(); err != nil {
 		t.Errorf("executing %s (%s) failed %s:\n%s", strings.Join(args, " "), msg, err, output)
 	}
@@ -1030,3 +1038,11 @@
 	goCmd(nil, "install", "-buildmode=shared", "-linkshared", "./issue30768/issue30768lib")
 	goCmd(nil, "test", "-linkshared", "./issue30768")
 }
+
+// Test that GC data are generated correctly by the linker when it needs a type defined in
+// a shared library. See issue 39927.
+func TestGCData(t *testing.T) {
+	goCmd(t, "install", "-buildmode=shared", "-linkshared", "./gcdata/p")
+	goCmd(t, "build", "-linkshared", "./gcdata/main")
+	runWithEnv(t, "running gcdata/main", []string{"GODEBUG=clobberfree=1"}, "./main")
+}
diff --git a/misc/cgo/testshared/testdata/gcdata/main/main.go b/misc/cgo/testshared/testdata/gcdata/main/main.go
new file mode 100644
index 0000000..394862f
--- /dev/null
+++ b/misc/cgo/testshared/testdata/gcdata/main/main.go
@@ -0,0 +1,37 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that GC data is generated correctly for global
+// variables with types defined in a shared library.
+// See issue 39927.
+
+// This test run under GODEBUG=clobberfree=1. The check
+// *x[i] == 12345 depends on this debug mode to clobber
+// the value if the object is freed prematurely.
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+	"testshared/gcdata/p"
+)
+
+var x p.T
+
+func main() {
+	for i := range x {
+		x[i] = new(int)
+		*x[i] = 12345
+	}
+	runtime.GC()
+	runtime.GC()
+	runtime.GC()
+	for i := range x {
+		if *x[i] != 12345 {
+			fmt.Printf("x[%d] == %d, want 12345\n", i, *x[i])
+			panic("FAIL")
+		}
+	}
+}
diff --git a/misc/cgo/testshared/testdata/gcdata/p/p.go b/misc/cgo/testshared/testdata/gcdata/p/p.go
new file mode 100644
index 0000000..1fee754
--- /dev/null
+++ b/misc/cgo/testshared/testdata/gcdata/p/p.go
@@ -0,0 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+type T [10]*int
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index de18795..8bad47d 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -141,8 +141,8 @@
 	nodeInitorder, _                   // tracks state during init1; two bits
 	_, _                               // second nodeInitorder bit
 	_, nodeHasBreak
-	_, nodeNoInline // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
-	_, nodeImplicit
+	_, nodeNoInline  // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
+	_, nodeImplicit  // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND
 	_, nodeIsDDD     // is the argument variadic
 	_, nodeDiag      // already printed error about this
 	_, nodeColas     // OAS resulting from :=
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index b8b954c..483f338 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -984,6 +984,7 @@
 	case OANDNOT:
 		n.Left = walkexpr(n.Left, init)
 		n.Op = OAND
+		n.SetImplicit(true) // for walkCheckPtrArithmetic
 		n.Right = nod(OBITNOT, n.Right, nil)
 		n.Right = typecheck(n.Right, ctxExpr)
 		n.Right = walkexpr(n.Right, init)
@@ -4056,8 +4057,12 @@
 		case OADD:
 			walk(n.Left)
 			walk(n.Right)
-		case OSUB, OANDNOT:
+		case OSUB:
 			walk(n.Left)
+		case OAND:
+			if n.Implicit() { // was OANDNOT
+				walk(n.Left)
+			}
 		case OCONVNOP:
 			if n.Left.Type.Etype == TUNSAFEPTR {
 				n.Left = cheapexpr(n.Left, init)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index ab671a2..d657957 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -561,9 +561,9 @@
 		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
 
 		// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
-		// It preserves R0 through R15, g, and its arguments R20 and R21,
+		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
 		// but may clobber anything else, including R31 (REGTMP).
-		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
+		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
 
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go
index 6517957..62d56bf 100644
--- a/src/cmd/compile/internal/ssa/gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go
@@ -391,18 +391,20 @@
 
 		{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
 
-		{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"},     // move int64 to float64 (no conversion)
-		{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"},     // move float64 to int64 (no conversion)
-		{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32
-		{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64
-		{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32
-		{name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA"}, // convert float32 to int64
-		{name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA"}, // convert int32 to float32
-		{name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA"}, // convert int32 to float64
-		{name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA"}, // convert int64 to float32
-		{name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA"}, // convert int64 to float64
-		{name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"},   // convert float64 to float32
-		{name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"},   // convert float32 to float64
+		{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
+		{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
+
+		{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA", clobberFlags: true}, // convert float64 to int32
+		{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA", clobberFlags: true}, // convert float64 to int64
+		{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA", clobberFlags: true}, // convert float32 to int32
+		{name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA", clobberFlags: true}, // convert float32 to int64
+		{name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA", clobberFlags: true}, // convert int32 to float32
+		{name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA", clobberFlags: true}, // convert int32 to float64
+		{name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA", clobberFlags: true}, // convert int64 to float32
+		{name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA", clobberFlags: true}, // convert int64 to float64
+
+		{name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32
+		{name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64
 
 		{name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Read"}, // arg0 + auxint + offset encoded in aux
 		{name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Read"},                    // arg0 + arg1 + auxint + aux
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 86428a3..be3f5ee 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -24940,7 +24940,7 @@
 				{0, 1048576}, // R20
 				{1, 2097152}, // R21
 			},
-			clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+			clobbers: 576460746931312640, // R11 R12 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
 		},
 	},
 	{
@@ -28192,9 +28192,10 @@
 		},
 	},
 	{
-		name:   "CFDBRA",
-		argLen: 1,
-		asm:    s390x.ACFDBRA,
+		name:         "CFDBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACFDBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
@@ -28205,9 +28206,10 @@
 		},
 	},
 	{
-		name:   "CGDBRA",
-		argLen: 1,
-		asm:    s390x.ACGDBRA,
+		name:         "CGDBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACGDBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
@@ -28218,9 +28220,10 @@
 		},
 	},
 	{
-		name:   "CFEBRA",
-		argLen: 1,
-		asm:    s390x.ACFEBRA,
+		name:         "CFEBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACFEBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
@@ -28231,9 +28234,10 @@
 		},
 	},
 	{
-		name:   "CGEBRA",
-		argLen: 1,
-		asm:    s390x.ACGEBRA,
+		name:         "CGEBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACGEBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
@@ -28244,9 +28248,10 @@
 		},
 	},
 	{
-		name:   "CEFBRA",
-		argLen: 1,
-		asm:    s390x.ACEFBRA,
+		name:         "CEFBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACEFBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14
@@ -28257,9 +28262,10 @@
 		},
 	},
 	{
-		name:   "CDFBRA",
-		argLen: 1,
-		asm:    s390x.ACDFBRA,
+		name:         "CDFBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACDFBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14
@@ -28270,9 +28276,10 @@
 		},
 	},
 	{
-		name:   "CEGBRA",
-		argLen: 1,
-		asm:    s390x.ACEGBRA,
+		name:         "CEGBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACEGBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14
@@ -28283,9 +28290,10 @@
 		},
 	},
 	{
-		name:   "CDGBRA",
-		argLen: 1,
-		asm:    s390x.ACDGBRA,
+		name:         "CDGBRA",
+		argLen:       1,
+		clobberFlags: true,
+		asm:          s390x.ACDGBRA,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14
diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go
index 774fa94..0506200 100644
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@@ -1069,6 +1069,11 @@
 	//
 	// If all of these conditions are true, then i1 < max and i1 >= min.
 
+	// To ensure this is a loop header node.
+	if len(b.Preds) != 2 {
+		return
+	}
+
 	for _, i1 := range b.Values {
 		if i1.Op != OpPhi {
 			continue
@@ -1111,6 +1116,9 @@
 				}
 				br = negative
 			}
+			if br == unknown {
+				continue
+			}
 
 			tr, has := domainRelationTable[control.Op]
 			if !has {
diff --git a/src/cmd/go/testdata/script/test_json_interleaved.txt b/src/cmd/go/testdata/script/test_json_interleaved.txt
new file mode 100644
index 0000000..e2d349e
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_json_interleaved.txt
@@ -0,0 +1,27 @@
+# Regression test for https://golang.org/issue/40657: output from the main test
+# function should be attributed correctly even if interleaved with the PAUSE
+# line for a new parallel subtest.
+
+[short] skip
+
+go test -json
+stdout '"Test":"TestWeirdTiming","Output":"[^"]* logging to outer again\\n"'
+
+-- go.mod --
+module example.com
+go 1.15
+-- main_test.go --
+package main
+
+import (
+	"testing"
+)
+
+func TestWeirdTiming(outer *testing.T) {
+	outer.Run("pauser", func(pauser *testing.T) {
+		outer.Logf("logging to outer")
+		pauser.Parallel()
+	})
+
+	outer.Logf("logging to outer again")
+}
diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go
index bc55dac..bde643c 100644
--- a/src/cmd/internal/obj/arm/asm5.go
+++ b/src/cmd/internal/obj/arm/asm5.go
@@ -327,6 +327,9 @@
 	{obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0, 0},
 	{obj.AFUNCDATA, C_LCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0, 0},
 	{obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0}, // nop variants, see #40689
+	{obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0, 0},
 	{obj.ADUFFZERO, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL
 	{obj.ADUFFCOPY, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0, 0}, // same as ABL
 	{obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0, 0},
diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go
index a895929..60a3532 100644
--- a/src/cmd/internal/obj/arm/obj5.go
+++ b/src/cmd/internal/obj/arm/obj5.go
@@ -276,67 +276,21 @@
 
 	/*
 	 * find leaf subroutines
-	 * strip NOPs
-	 * expand RET
-	 * expand BECOME pseudo
 	 */
-	var q1 *obj.Prog
-	var q *obj.Prog
 	for p := cursym.Func.Text; p != nil; p = p.Link {
 		switch p.As {
 		case obj.ATEXT:
 			p.Mark |= LEAF
 
-		case obj.ARET:
-			break
-
 		case ADIV, ADIVU, AMOD, AMODU:
-			q = p
 			cursym.Func.Text.Mark &^= LEAF
-			continue
-
-		case obj.ANOP:
-			q1 = p.Link
-			q.Link = q1 /* q is non-nop */
-			if q1 != nil {
-				q1.Mark |= p.Mark
-			}
-			continue
 
 		case ABL,
 			ABX,
 			obj.ADUFFZERO,
 			obj.ADUFFCOPY:
 			cursym.Func.Text.Mark &^= LEAF
-			fallthrough
-
-		case AB,
-			ABEQ,
-			ABNE,
-			ABCS,
-			ABHS,
-			ABCC,
-			ABLO,
-			ABMI,
-			ABPL,
-			ABVS,
-			ABVC,
-			ABHI,
-			ABLS,
-			ABGE,
-			ABLT,
-			ABGT,
-			ABLE:
-			q1 = p.Pcond
-			if q1 != nil {
-				for q1.As == obj.ANOP {
-					q1 = q1.Link
-					p.Pcond = q1
-				}
-			}
 		}
-
-		q = p
 	}
 
 	var q2 *obj.Prog
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 971e1bd..72fde52 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -838,6 +838,9 @@
 	{obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0},
 	{obj.AFUNCDATA, C_VCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0, 0, 0},
 	{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
+	{obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_VREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
 	{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL
 	{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as AB/ABL
 
diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go
index 09f603a..c9b4774 100644
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -467,73 +467,21 @@
 
 	/*
 	 * find leaf subroutines
-	 * strip NOPs
-	 * expand RET
 	 */
-	q := (*obj.Prog)(nil)
-	var q1 *obj.Prog
 	for p := c.cursym.Func.Text; p != nil; p = p.Link {
 		switch p.As {
 		case obj.ATEXT:
 			p.Mark |= LEAF
 
-		case obj.ARET:
-			break
-
-		case obj.ANOP:
-			if p.Link != nil {
-				q1 = p.Link
-				q.Link = q1 /* q is non-nop */
-				q1.Mark |= p.Mark
-			}
-			continue
-
 		case ABL,
 			obj.ADUFFZERO,
 			obj.ADUFFCOPY:
 			c.cursym.Func.Text.Mark &^= LEAF
-			fallthrough
-
-		case ACBNZ,
-			ACBZ,
-			ACBNZW,
-			ACBZW,
-			ATBZ,
-			ATBNZ,
-			AB,
-			ABEQ,
-			ABNE,
-			ABCS,
-			ABHS,
-			ABCC,
-			ABLO,
-			ABMI,
-			ABPL,
-			ABVS,
-			ABVC,
-			ABHI,
-			ABLS,
-			ABGE,
-			ABLT,
-			ABGT,
-			ABLE,
-			AADR, /* strange */
-			AADRP:
-			q1 = p.Pcond
-
-			if q1 != nil {
-				for q1.As == obj.ANOP {
-					q1 = q1.Link
-					p.Pcond = q1
-				}
-			}
-
-			break
 		}
-
-		q = p
 	}
 
+	var q *obj.Prog
+	var q1 *obj.Prog
 	var retjmp *obj.LSym
 	for p := c.cursym.Func.Text; p != nil; p = p.Link {
 		o := p.As
diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go
index 934f88a..dfe8634 100644
--- a/src/cmd/internal/obj/mips/asm0.go
+++ b/src/cmd/internal/obj/mips/asm0.go
@@ -386,6 +386,9 @@
 	{obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0},
 	{obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0},
 	{obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
+	{obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0},
 	{obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP
 	{obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP
 
diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go
index 3106143..77cad97 100644
--- a/src/cmd/internal/obj/mips/obj0.go
+++ b/src/cmd/internal/obj/mips/obj0.go
@@ -158,19 +158,14 @@
 
 	/*
 	 * find leaf subroutines
-	 * strip NOPs
 	 * expand RET
 	 * expand BECOME pseudo
 	 */
 
-	var q *obj.Prog
-	var q1 *obj.Prog
 	for p := c.cursym.Func.Text; p != nil; p = p.Link {
 		switch p.As {
 		/* too hard, just leave alone */
 		case obj.ATEXT:
-			q = p
-
 			p.Mark |= LABEL | LEAF | SYNC
 			if p.Link != nil {
 				p.Link.Mark |= LABEL
@@ -179,7 +174,6 @@
 		/* too hard, just leave alone */
 		case AMOVW,
 			AMOVV:
-			q = p
 			if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL {
 				p.Mark |= LABEL | SYNC
 				break
@@ -195,11 +189,9 @@
 			ATLBWI,
 			ATLBP,
 			ATLBR:
-			q = p
 			p.Mark |= LABEL | SYNC
 
 		case ANOR:
-			q = p
 			if p.To.Type == obj.TYPE_REG {
 				if p.To.Reg == REGZERO {
 					p.Mark |= LABEL | SYNC
@@ -235,8 +227,7 @@
 			} else {
 				p.Mark |= BRANCH
 			}
-			q = p
-			q1 = p.Pcond
+			q1 := p.Pcond
 			if q1 != nil {
 				for q1.As == obj.ANOP {
 					q1 = q1.Link
@@ -254,24 +245,11 @@
 			if q1 != nil {
 				q1.Mark |= LABEL
 			}
-			continue
 
 		case ARET:
-			q = p
 			if p.Link != nil {
 				p.Link.Mark |= LABEL
 			}
-			continue
-
-		case obj.ANOP:
-			q1 = p.Link
-			q.Link = q1 /* q is non-nop */
-			q1.Mark |= p.Mark
-			continue
-
-		default:
-			q = p
-			continue
 		}
 	}
 
@@ -284,6 +262,8 @@
 		mov = AMOVW
 	}
 
+	var q *obj.Prog
+	var q1 *obj.Prog
 	autosize := int32(0)
 	var p1 *obj.Prog
 	var p2 *obj.Prog
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 1496045..0472375 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -607,6 +607,9 @@
 	{obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, 0, 0, 0},
 	{obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0},
 	{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
+	{obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0}, // NOP operand variations added for #40689
+	{obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, 0, 0, 0},  // to preserve previous behavior
+	{obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, 0, 0, 0},
 	{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
 	{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
 	{obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0},   // align code
diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go
index 7135488..a2f16f2e 100644
--- a/src/cmd/internal/obj/ppc64/obj9.go
+++ b/src/cmd/internal/obj/ppc64/obj9.go
@@ -427,7 +427,6 @@
 
 	/*
 	 * find leaf subroutines
-	 * strip NOPs
 	 * expand RET
 	 * expand BECOME pseudo
 	 */
@@ -557,10 +556,7 @@
 			q = p
 			q1 = p.Pcond
 			if q1 != nil {
-				for q1.As == obj.ANOP {
-					q1 = q1.Link
-					p.Pcond = q1
-				}
+				// NOPs are not removed due to #40689.
 
 				if q1.Mark&LEAF == 0 {
 					q1.Mark |= LABEL
@@ -587,9 +583,8 @@
 			continue
 
 		case obj.ANOP:
-			q1 = p.Link
-			q.Link = q1 /* q is non-nop */
-			q1.Mark |= p.Mark
+			// NOPs are not removed due to
+			// #40689
 			continue
 
 		default:
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index 3271c85..88176a3 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -11,6 +11,7 @@
 	"cmd/link/internal/sym"
 	"debug/elf"
 	"fmt"
+	"log"
 )
 
 // Decoding the type.* symbols.	 This has to be in sync with
@@ -93,7 +94,7 @@
 func findShlibSection(ctxt *Link, path string, addr uint64) *elf.Section {
 	for _, shlib := range ctxt.Shlibs {
 		if shlib.Path == path {
-			for _, sect := range shlib.File.Sections {
+			for _, sect := range shlib.File.Sections[1:] { // skip the NULL section
 				if sect.Addr <= addr && addr <= sect.Addr+sect.Size {
 					return sect
 				}
@@ -112,9 +113,15 @@
 			// A gcprog is a 4-byte uint32 indicating length, followed by
 			// the actual program.
 			progsize := make([]byte, 4)
-			sect.ReadAt(progsize, int64(addr-sect.Addr))
+			_, err := sect.ReadAt(progsize, int64(addr-sect.Addr))
+			if err != nil {
+				log.Fatal(err)
+			}
 			progbytes := make([]byte, ctxt.Arch.ByteOrder.Uint32(progsize))
-			sect.ReadAt(progbytes, int64(addr-sect.Addr+4))
+			_, err = sect.ReadAt(progbytes, int64(addr-sect.Addr+4))
+			if err != nil {
+				log.Fatal(err)
+			}
 			return append(progsize, progbytes...)
 		}
 		Exitf("cannot find gcprog for %s", s.Name)
@@ -124,14 +131,6 @@
 }
 
 func decodetypeGcprogShlib(ctxt *Link, s *sym.Symbol) uint64 {
-	if ctxt.Arch.Family == sys.ARM64 {
-		for _, shlib := range ctxt.Shlibs {
-			if shlib.Path == s.File {
-				return shlib.gcdataAddresses[s]
-			}
-		}
-		return 0
-	}
 	return decodeInuxi(ctxt.Arch, s.P[2*int32(ctxt.Arch.PtrSize)+8+1*int32(ctxt.Arch.PtrSize):], ctxt.Arch.PtrSize)
 }
 
@@ -141,8 +140,15 @@
 		ptrdata := decodetypePtrdata(ctxt.Arch, s.P)
 		sect := findShlibSection(ctxt, s.File, addr)
 		if sect != nil {
-			r := make([]byte, ptrdata/int64(ctxt.Arch.PtrSize))
-			sect.ReadAt(r, int64(addr-sect.Addr))
+			bits := ptrdata / int64(ctxt.Arch.PtrSize)
+			r := make([]byte, (bits+7)/8)
+			// ldshlibsyms avoids closing the ELF file so sect.ReadAt works.
+			// If we remove this read (and the ones in decodetypeGcprog), we
+			// can close the file.
+			_, err := sect.ReadAt(r, int64(addr-sect.Addr))
+			if err != nil {
+				log.Fatal(err)
+			}
 			return r
 		}
 		Exitf("cannot find gcmask for %s", s.Name)
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index 0afaf5f..a3a9da4 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -2001,7 +2001,9 @@
 		Errorf(nil, "cannot open shared library: %s", libpath)
 		return
 	}
-	defer f.Close()
+	// Keep the file open as decodetypeGcprog needs to read from it.
+	// TODO: fix. Maybe mmap the file.
+	//defer f.Close()
 
 	hash, err := readnote(f, ELF_NOTE_GO_NAME, ELF_NOTE_GOABIHASH_TAG)
 	if err != nil {
diff --git a/src/net/http/cgi/child.go b/src/net/http/cgi/child.go
index cb140f8..2b210ea 100644
--- a/src/net/http/cgi/child.go
+++ b/src/net/http/cgi/child.go
@@ -165,10 +165,12 @@
 }
 
 type response struct {
-	req        *http.Request
-	header     http.Header
-	bufw       *bufio.Writer
-	headerSent bool
+	req            *http.Request
+	header         http.Header
+	code           int
+	wroteHeader    bool
+	wroteCGIHeader bool
+	bufw           *bufio.Writer
 }
 
 func (r *response) Flush() {
@@ -180,26 +182,38 @@
 }
 
 func (r *response) Write(p []byte) (n int, err error) {
-	if !r.headerSent {
+	if !r.wroteHeader {
 		r.WriteHeader(http.StatusOK)
 	}
+	if !r.wroteCGIHeader {
+		r.writeCGIHeader(p)
+	}
 	return r.bufw.Write(p)
 }
 
 func (r *response) WriteHeader(code int) {
-	if r.headerSent {
+	if r.wroteHeader {
 		// Note: explicitly using Stderr, as Stdout is our HTTP output.
 		fmt.Fprintf(os.Stderr, "CGI attempted to write header twice on request for %s", r.req.URL)
 		return
 	}
-	r.headerSent = true
-	fmt.Fprintf(r.bufw, "Status: %d %s\r\n", code, http.StatusText(code))
+	r.wroteHeader = true
+	r.code = code
+}
 
-	// Set a default Content-Type
-	if _, hasType := r.header["Content-Type"]; !hasType {
-		r.header.Add("Content-Type", "text/html; charset=utf-8")
+// writeCGIHeader finalizes the header sent to the client and writes it to the output.
+// p is not written by writeHeader, but is the first chunk of the body
+// that will be written. It is sniffed for a Content-Type if none is
+// set explicitly.
+func (r *response) writeCGIHeader(p []byte) {
+	if r.wroteCGIHeader {
+		return
 	}
-
+	r.wroteCGIHeader = true
+	fmt.Fprintf(r.bufw, "Status: %d %s\r\n", r.code, http.StatusText(r.code))
+	if _, hasType := r.header["Content-Type"]; !hasType {
+		r.header.Set("Content-Type", http.DetectContentType(p))
+	}
 	r.header.Write(r.bufw)
 	r.bufw.WriteString("\r\n")
 	r.bufw.Flush()
diff --git a/src/net/http/cgi/child_test.go b/src/net/http/cgi/child_test.go
index 14e0af4..f6ecb6e 100644
--- a/src/net/http/cgi/child_test.go
+++ b/src/net/http/cgi/child_test.go
@@ -7,6 +7,11 @@
 package cgi
 
 import (
+	"bufio"
+	"bytes"
+	"net/http"
+	"net/http/httptest"
+	"strings"
 	"testing"
 )
 
@@ -148,3 +153,67 @@
 		t.Errorf("RemoteAddr: got %q; want %q", g, e)
 	}
 }
+
+type countingWriter int
+
+func (c *countingWriter) Write(p []byte) (int, error) {
+	*c += countingWriter(len(p))
+	return len(p), nil
+}
+func (c *countingWriter) WriteString(p string) (int, error) {
+	*c += countingWriter(len(p))
+	return len(p), nil
+}
+
+func TestResponse(t *testing.T) {
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var buf bytes.Buffer
+			resp := response{
+				req:    httptest.NewRequest("GET", "/", nil),
+				header: http.Header{},
+				bufw:   bufio.NewWriter(&buf),
+			}
+			n, err := resp.Write([]byte(tt.body))
+			if err != nil {
+				t.Errorf("Write: unexpected %v", err)
+			}
+			if want := len(tt.body); n != want {
+				t.Errorf("reported short Write: got %v want %v", n, want)
+			}
+			resp.writeCGIHeader(nil)
+			resp.Flush()
+			if got := resp.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("wrong content-type: got %q, want %q", got, tt.wantCT)
+			}
+			if !bytes.HasSuffix(buf.Bytes(), []byte(tt.body)) {
+				t.Errorf("body was not correctly written")
+			}
+		})
+	}
+}
diff --git a/src/net/http/cgi/integration_test.go b/src/net/http/cgi/integration_test.go
index 32d59c0..295c3b8 100644
--- a/src/net/http/cgi/integration_test.go
+++ b/src/net/http/cgi/integration_test.go
@@ -16,7 +16,9 @@
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"os"
+	"strings"
 	"testing"
 	"time"
 )
@@ -52,7 +54,7 @@
 	}
 	replay := runCgiTest(t, h, "GET /test.go?foo=bar&a=b HTTP/1.0\nHost: example.com\n\n", expectedMap)
 
-	if expected, got := "text/html; charset=utf-8", replay.Header().Get("Content-Type"); got != expected {
+	if expected, got := "text/plain; charset=utf-8", replay.Header().Get("Content-Type"); got != expected {
 		t.Errorf("got a Content-Type of %q; expected %q", got, expected)
 	}
 	if expected, got := "X-Test-Value", replay.Header().Get("X-Test-Header"); got != expected {
@@ -152,6 +154,51 @@
 	}
 }
 
+func TestChildContentType(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	h := &Handler{
+		Path: os.Args[0],
+		Root: "/test.go",
+		Args: []string{"-test.run=TestBeChildCGIProcess"},
+	}
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			expectedMap := map[string]string{"_body": tt.body}
+			req := fmt.Sprintf("GET /test.go?exact-body=%s HTTP/1.0\nHost: example.com\n\n", url.QueryEscape(tt.body))
+			replay := runCgiTest(t, h, req, expectedMap)
+			if got := replay.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("got a Content-Type of %q; expected it to start with %q", got, tt.wantCT)
+			}
+		})
+	}
+}
+
 // golang.org/issue/7198
 func Test500WithNoHeaders(t *testing.T)     { want500Test(t, "/immediate-disconnect") }
 func Test500WithNoContentType(t *testing.T) { want500Test(t, "/no-content-type") }
@@ -203,6 +250,10 @@
 		if req.FormValue("no-body") == "1" {
 			return
 		}
+		if eb, ok := req.Form["exact-body"]; ok {
+			io.WriteString(rw, eb[0])
+			return
+		}
 		if req.FormValue("write-forever") == "1" {
 			io.Copy(rw, neverEnding('a'))
 			for {
diff --git a/src/net/http/fcgi/child.go b/src/net/http/fcgi/child.go
index 30a6b2c..a31273b 100644
--- a/src/net/http/fcgi/child.go
+++ b/src/net/http/fcgi/child.go
@@ -74,10 +74,12 @@
 
 // response implements http.ResponseWriter.
 type response struct {
-	req         *request
-	header      http.Header
-	w           *bufWriter
-	wroteHeader bool
+	req            *request
+	header         http.Header
+	code           int
+	wroteHeader    bool
+	wroteCGIHeader bool
+	w              *bufWriter
 }
 
 func newResponse(c *child, req *request) *response {
@@ -92,11 +94,14 @@
 	return r.header
 }
 
-func (r *response) Write(data []byte) (int, error) {
+func (r *response) Write(p []byte) (n int, err error) {
 	if !r.wroteHeader {
 		r.WriteHeader(http.StatusOK)
 	}
-	return r.w.Write(data)
+	if !r.wroteCGIHeader {
+		r.writeCGIHeader(p)
+	}
+	return r.w.Write(p)
 }
 
 func (r *response) WriteHeader(code int) {
@@ -104,22 +109,34 @@
 		return
 	}
 	r.wroteHeader = true
+	r.code = code
 	if code == http.StatusNotModified {
 		// Must not have body.
 		r.header.Del("Content-Type")
 		r.header.Del("Content-Length")
 		r.header.Del("Transfer-Encoding")
-	} else if r.header.Get("Content-Type") == "" {
-		r.header.Set("Content-Type", "text/html; charset=utf-8")
 	}
-
 	if r.header.Get("Date") == "" {
 		r.header.Set("Date", time.Now().UTC().Format(http.TimeFormat))
 	}
+}
 
-	fmt.Fprintf(r.w, "Status: %d %s\r\n", code, http.StatusText(code))
+// writeCGIHeader finalizes the header sent to the client and writes it to the output.
+// p is not written by writeHeader, but is the first chunk of the body
+// that will be written. It is sniffed for a Content-Type if none is
+// set explicitly.
+func (r *response) writeCGIHeader(p []byte) {
+	if r.wroteCGIHeader {
+		return
+	}
+	r.wroteCGIHeader = true
+	fmt.Fprintf(r.w, "Status: %d %s\r\n", r.code, http.StatusText(r.code))
+	if _, hasType := r.header["Content-Type"]; r.code != http.StatusNotModified && !hasType {
+		r.header.Set("Content-Type", http.DetectContentType(p))
+	}
 	r.header.Write(r.w)
 	r.w.WriteString("\r\n")
+	r.w.Flush()
 }
 
 func (r *response) Flush() {
@@ -290,6 +307,8 @@
 		httpReq = httpReq.WithContext(envVarCtx)
 		c.handler.ServeHTTP(r, httpReq)
 	}
+	// Make sure we serve something even if nothing was written to r
+	r.Write(nil)
 	r.Close()
 	c.mu.Lock()
 	delete(c.requests, req.reqId)
diff --git a/src/net/http/fcgi/fcgi_test.go b/src/net/http/fcgi/fcgi_test.go
index e9d2b34..59246c26 100644
--- a/src/net/http/fcgi/fcgi_test.go
+++ b/src/net/http/fcgi/fcgi_test.go
@@ -10,6 +10,7 @@
 	"io"
 	"io/ioutil"
 	"net/http"
+	"strings"
 	"testing"
 )
 
@@ -344,3 +345,55 @@
 		<-done
 	}
 }
+
+func TestResponseWriterSniffsContentType(t *testing.T) {
+	t.Skip("this test is flaky, see Issue 41167")
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			input := make([]byte, len(streamFullRequestStdin))
+			copy(input, streamFullRequestStdin)
+			rc := nopWriteCloser{bytes.NewBuffer(input)}
+			done := make(chan bool)
+			var resp *response
+			c := newChild(rc, http.HandlerFunc(func(
+				w http.ResponseWriter,
+				r *http.Request,
+			) {
+				io.WriteString(w, tt.body)
+				resp = w.(*response)
+				done <- true
+			}))
+			defer c.cleanUp()
+			go c.serve()
+			<-done
+			if got := resp.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("got a Content-Type of %q; expected it to start with %q", got, tt.wantCT)
+			}
+		})
+	}
+}
diff --git a/src/net/http/transfer.go b/src/net/http/transfer.go
index 2e01a07..5486bf9 100644
--- a/src/net/http/transfer.go
+++ b/src/net/http/transfer.go
@@ -335,7 +335,7 @@
 	var ncopy int64
 
 	// Write body. We "unwrap" the body first if it was wrapped in a
-	// nopCloser. This is to ensure that we can take advantage of
+	// nopCloser or readTrackingBody. This is to ensure that we can take advantage of
 	// OS-level optimizations in the event that the body is an
 	// *os.File.
 	if t.Body != nil {
@@ -413,7 +413,10 @@
 	if reflect.TypeOf(t.Body) == nopCloserType {
 		return reflect.ValueOf(t.Body).Field(0).Interface().(io.Reader)
 	}
-
+	if r, ok := t.Body.(*readTrackingBody); ok {
+		r.didRead = true
+		return r.ReadCloser
+	}
 	return t.Body
 }
 
@@ -1092,6 +1095,9 @@
 	if reflect.TypeOf(r) == nopCloserType {
 		return isKnownInMemoryReader(reflect.ValueOf(r).Field(0).Interface().(io.Reader))
 	}
+	if r, ok := r.(*readTrackingBody); ok {
+		return isKnownInMemoryReader(r.ReadCloser)
+	}
 	return false
 }
 
diff --git a/src/net/http/transport.go b/src/net/http/transport.go
index d0bfdb4..a01a375 100644
--- a/src/net/http/transport.go
+++ b/src/net/http/transport.go
@@ -100,7 +100,7 @@
 	idleLRU      connLRU
 
 	reqMu       sync.Mutex
-	reqCanceler map[*Request]func(error)
+	reqCanceler map[cancelKey]func(error)
 
 	altMu    sync.Mutex   // guards changing altProto only
 	altProto atomic.Value // of nil or map[string]RoundTripper, key is URI scheme
@@ -273,6 +273,13 @@
 	ForceAttemptHTTP2 bool
 }
 
+// A cancelKey is the key of the reqCanceler map.
+// We wrap the *Request in this type since we want to use the original request,
+// not any transient one created by roundTrip.
+type cancelKey struct {
+	req *Request
+}
+
 func (t *Transport) writeBufferSize() int {
 	if t.WriteBufferSize > 0 {
 		return t.WriteBufferSize
@@ -433,9 +440,10 @@
 // optional extra headers to write and stores any error to return
 // from roundTrip.
 type transportRequest struct {
-	*Request                        // original request, not to be mutated
-	extra    Header                 // extra headers to write, or nil
-	trace    *httptrace.ClientTrace // optional
+	*Request                         // original request, not to be mutated
+	extra     Header                 // extra headers to write, or nil
+	trace     *httptrace.ClientTrace // optional
+	cancelKey cancelKey
 
 	mu  sync.Mutex // guards err
 	err error      // first setError value for mapRoundTripError to consider
@@ -511,10 +519,19 @@
 		}
 	}
 
+	origReq := req
+	cancelKey := cancelKey{origReq}
+	req = setupRewindBody(req)
+
 	if altRT := t.alternateRoundTripper(req); altRT != nil {
 		if resp, err := altRT.RoundTrip(req); err != ErrSkipAltProtocol {
 			return resp, err
 		}
+		var err error
+		req, err = rewindBody(req)
+		if err != nil {
+			return nil, err
+		}
 	}
 	if !isHTTP {
 		req.closeBody()
@@ -538,7 +555,7 @@
 		}
 
 		// treq gets modified by roundTrip, so we need to recreate for each retry.
-		treq := &transportRequest{Request: req, trace: trace}
+		treq := &transportRequest{Request: req, trace: trace, cancelKey: cancelKey}
 		cm, err := t.connectMethodForRequest(treq)
 		if err != nil {
 			req.closeBody()
@@ -551,7 +568,7 @@
 		// to send it requests.
 		pconn, err := t.getConn(treq, cm)
 		if err != nil {
-			t.setReqCanceler(req, nil)
+			t.setReqCanceler(cancelKey, nil)
 			req.closeBody()
 			return nil, err
 		}
@@ -559,12 +576,13 @@
 		var resp *Response
 		if pconn.alt != nil {
 			// HTTP/2 path.
-			t.setReqCanceler(req, nil) // not cancelable with CancelRequest
+			t.setReqCanceler(cancelKey, nil) // not cancelable with CancelRequest
 			resp, err = pconn.alt.RoundTrip(req)
 		} else {
 			resp, err = pconn.roundTrip(treq)
 		}
 		if err == nil {
+			resp.Request = origReq
 			return resp, nil
 		}
 
@@ -587,18 +605,59 @@
 		testHookRoundTripRetried()
 
 		// Rewind the body if we're able to.
-		if req.GetBody != nil {
-			newReq := *req
-			var err error
-			newReq.Body, err = req.GetBody()
-			if err != nil {
-				return nil, err
-			}
-			req = &newReq
+		req, err = rewindBody(req)
+		if err != nil {
+			return nil, err
 		}
 	}
 }
 
+var errCannotRewind = errors.New("net/http: cannot rewind body after connection loss")
+
+type readTrackingBody struct {
+	io.ReadCloser
+	didRead bool
+}
+
+func (r *readTrackingBody) Read(data []byte) (int, error) {
+	r.didRead = true
+	return r.ReadCloser.Read(data)
+}
+
+// setupRewindBody returns a new request with a custom body wrapper
+// that can report whether the body needs rewinding.
+// This lets rewindBody avoid an error result when the request
+// does not have GetBody but the body hasn't been read at all yet.
+func setupRewindBody(req *Request) *Request {
+	if req.Body == nil || req.Body == NoBody {
+		return req
+	}
+	newReq := *req
+	newReq.Body = &readTrackingBody{ReadCloser: req.Body}
+	return &newReq
+}
+
+// rewindBody returns a new request with the body rewound.
+// It returns req unmodified if the body does not need rewinding.
+// rewindBody takes care of closing req.Body when appropriate
+// (in all cases except when rewindBody returns req unmodified).
+func rewindBody(req *Request) (rewound *Request, err error) {
+	if req.Body == nil || req.Body == NoBody || !req.Body.(*readTrackingBody).didRead {
+		return req, nil // nothing to rewind
+	}
+	req.closeBody()
+	if req.GetBody == nil {
+		return nil, errCannotRewind
+	}
+	body, err := req.GetBody()
+	if err != nil {
+		return nil, err
+	}
+	newReq := *req
+	newReq.Body = &readTrackingBody{ReadCloser: body}
+	return &newReq, nil
+}
+
 // shouldRetryRequest reports whether we should retry sending a failed
 // HTTP request on a new connection. The non-nil input error is the
 // error from roundTrip.
@@ -706,14 +765,14 @@
 // cancelable context instead. CancelRequest cannot cancel HTTP/2
 // requests.
 func (t *Transport) CancelRequest(req *Request) {
-	t.cancelRequest(req, errRequestCanceled)
+	t.cancelRequest(cancelKey{req}, errRequestCanceled)
 }
 
 // Cancel an in-flight request, recording the error value.
-func (t *Transport) cancelRequest(req *Request, err error) {
+func (t *Transport) cancelRequest(key cancelKey, err error) {
 	t.reqMu.Lock()
-	cancel := t.reqCanceler[req]
-	delete(t.reqCanceler, req)
+	cancel := t.reqCanceler[key]
+	delete(t.reqCanceler, key)
 	t.reqMu.Unlock()
 	if cancel != nil {
 		cancel(err)
@@ -1046,16 +1105,16 @@
 	return removed
 }
 
-func (t *Transport) setReqCanceler(r *Request, fn func(error)) {
+func (t *Transport) setReqCanceler(key cancelKey, fn func(error)) {
 	t.reqMu.Lock()
 	defer t.reqMu.Unlock()
 	if t.reqCanceler == nil {
-		t.reqCanceler = make(map[*Request]func(error))
+		t.reqCanceler = make(map[cancelKey]func(error))
 	}
 	if fn != nil {
-		t.reqCanceler[r] = fn
+		t.reqCanceler[key] = fn
 	} else {
-		delete(t.reqCanceler, r)
+		delete(t.reqCanceler, key)
 	}
 }
 
@@ -1063,17 +1122,17 @@
 // for the request, we don't set the function and return false.
 // Since CancelRequest will clear the canceler, we can use the return value to detect if
 // the request was canceled since the last setReqCancel call.
-func (t *Transport) replaceReqCanceler(r *Request, fn func(error)) bool {
+func (t *Transport) replaceReqCanceler(key cancelKey, fn func(error)) bool {
 	t.reqMu.Lock()
 	defer t.reqMu.Unlock()
-	_, ok := t.reqCanceler[r]
+	_, ok := t.reqCanceler[key]
 	if !ok {
 		return false
 	}
 	if fn != nil {
-		t.reqCanceler[r] = fn
+		t.reqCanceler[key] = fn
 	} else {
-		delete(t.reqCanceler, r)
+		delete(t.reqCanceler, key)
 	}
 	return true
 }
@@ -1277,12 +1336,12 @@
 		// set request canceler to some non-nil function so we
 		// can detect whether it was cleared between now and when
 		// we enter roundTrip
-		t.setReqCanceler(req, func(error) {})
+		t.setReqCanceler(treq.cancelKey, func(error) {})
 		return pc, nil
 	}
 
 	cancelc := make(chan error, 1)
-	t.setReqCanceler(req, func(err error) { cancelc <- err })
+	t.setReqCanceler(treq.cancelKey, func(err error) { cancelc <- err })
 
 	// Queue for permission to dial.
 	t.queueForDial(w)
@@ -2025,7 +2084,7 @@
 		}
 
 		if !hasBody || bodyWritable {
-			pc.t.setReqCanceler(rc.req, nil)
+			pc.t.setReqCanceler(rc.cancelKey, nil)
 
 			// Put the idle conn back into the pool before we send the response
 			// so if they process it quickly and make another request, they'll
@@ -2098,7 +2157,7 @@
 		// reading the response body. (or for cancellation or death)
 		select {
 		case bodyEOF := <-waitForBodyRead:
-			pc.t.setReqCanceler(rc.req, nil) // before pc might return to idle pool
+			pc.t.setReqCanceler(rc.cancelKey, nil) // before pc might return to idle pool
 			alive = alive &&
 				bodyEOF &&
 				!pc.sawEOF &&
@@ -2112,7 +2171,7 @@
 			pc.t.CancelRequest(rc.req)
 		case <-rc.req.Context().Done():
 			alive = false
-			pc.t.cancelRequest(rc.req, rc.req.Context().Err())
+			pc.t.cancelRequest(rc.cancelKey, rc.req.Context().Err())
 		case <-pc.closech:
 			alive = false
 		}
@@ -2353,8 +2412,9 @@
 }
 
 type requestAndChan struct {
-	req *Request
-	ch  chan responseAndError // unbuffered; always send in select on callerGone
+	req       *Request
+	cancelKey cancelKey
+	ch        chan responseAndError // unbuffered; always send in select on callerGone
 
 	// whether the Transport (as opposed to the user client code)
 	// added the Accept-Encoding gzip header. If the Transport
@@ -2416,7 +2476,7 @@
 
 func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err error) {
 	testHookEnterRoundTrip()
-	if !pc.t.replaceReqCanceler(req.Request, pc.cancelRequest) {
+	if !pc.t.replaceReqCanceler(req.cancelKey, pc.cancelRequest) {
 		pc.t.putOrCloseIdleConn(pc)
 		return nil, errRequestCanceled
 	}
@@ -2468,7 +2528,7 @@
 
 	defer func() {
 		if err != nil {
-			pc.t.setReqCanceler(req.Request, nil)
+			pc.t.setReqCanceler(req.cancelKey, nil)
 		}
 	}()
 
@@ -2484,6 +2544,7 @@
 	resc := make(chan responseAndError)
 	pc.reqch <- requestAndChan{
 		req:        req.Request,
+		cancelKey:  req.cancelKey,
 		ch:         resc,
 		addedGzip:  requestedGzip,
 		continueCh: continueCh,
@@ -2535,10 +2596,10 @@
 			}
 			return re.res, nil
 		case <-cancelChan:
-			pc.t.CancelRequest(req.Request)
+			pc.t.cancelRequest(req.cancelKey, errRequestCanceled)
 			cancelChan = nil
 		case <-ctxDoneChan:
-			pc.t.cancelRequest(req.Request, req.Context().Err())
+			pc.t.cancelRequest(req.cancelKey, req.Context().Err())
 			cancelChan = nil
 			ctxDoneChan = nil
 		}
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index 3ca7ce9..fa0c370 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -2346,6 +2346,50 @@
 	}
 }
 
+func testTransportCancelRequestInDo(t *testing.T, body io.Reader) {
+	setParallel(t)
+	defer afterTest(t)
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+	unblockc := make(chan bool)
+	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+		<-unblockc
+	}))
+	defer ts.Close()
+	defer close(unblockc)
+
+	c := ts.Client()
+	tr := c.Transport.(*Transport)
+
+	donec := make(chan bool)
+	req, _ := NewRequest("GET", ts.URL, body)
+	go func() {
+		defer close(donec)
+		c.Do(req)
+	}()
+	start := time.Now()
+	timeout := 10 * time.Second
+	for time.Since(start) < timeout {
+		time.Sleep(100 * time.Millisecond)
+		tr.CancelRequest(req)
+		select {
+		case <-donec:
+			return
+		default:
+		}
+	}
+	t.Errorf("Do of canceled request has not returned after %v", timeout)
+}
+
+func TestTransportCancelRequestInDo(t *testing.T) {
+	testTransportCancelRequestInDo(t, nil)
+}
+
+func TestTransportCancelRequestWithBodyInDo(t *testing.T) {
+	testTransportCancelRequestInDo(t, bytes.NewBuffer([]byte{0}))
+}
+
 func TestTransportCancelRequestInDial(t *testing.T) {
 	defer afterTest(t)
 	if testing.Short() {
@@ -3490,7 +3534,8 @@
 
 			for i := 0; i < 3; i++ {
 				t0 := time.Now()
-				res, err := c.Do(tc.req())
+				req := tc.req()
+				res, err := c.Do(req)
 				if err != nil {
 					if time.Since(t0) < MaxWriteWaitBeforeConnReuse/2 {
 						mu.Lock()
@@ -3501,6 +3546,9 @@
 					t.Skipf("connection likely wasn't recycled within %d, interfering with actual test; skipping", MaxWriteWaitBeforeConnReuse)
 				}
 				res.Body.Close()
+				if res.Request != req {
+					t.Errorf("Response.Request != original request; want identical Request")
+				}
 			}
 
 			mu.Lock()
@@ -6175,3 +6223,29 @@
 		return nil, errors.New("request was not canceled")
 	}
 }
+
+type roundTripFunc func(r *Request) (*Response, error)
+
+func (f roundTripFunc) RoundTrip(r *Request) (*Response, error) { return f(r) }
+
+// Issue 32441: body is not reset after ErrSkipAltProtocol
+func TestIssue32441(t *testing.T) {
+	defer afterTest(t)
+	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+		if n, _ := io.Copy(ioutil.Discard, r.Body); n == 0 {
+			t.Error("body length is zero")
+		}
+	}))
+	defer ts.Close()
+	c := ts.Client()
+	c.Transport.(*Transport).RegisterProtocol("http", roundTripFunc(func(r *Request) (*Response, error) {
+		// Draining body to trigger failure condition on actual request to server.
+		if n, _ := io.Copy(ioutil.Discard, r.Body); n == 0 {
+			t.Error("body length is zero during round trip")
+		}
+		return nil, ErrSkipAltProtocol
+	}))
+	if _, err := c.Post(ts.URL, "application/octet-stream", bytes.NewBufferString("data")); err != nil {
+		t.Error(err)
+	}
+}
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 11d2f2f..23387a2 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -916,23 +916,23 @@
 // - R20 is the destination of the write
 // - R21 is the value being written at R20.
 // It clobbers condition codes.
-// It does not clobber R0 through R15,
+// It does not clobber R0 through R17 (except special registers),
 // but may clobber any other register, *including* R31.
 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112
 	// The standard prologue clobbers R31.
-	// We use R16 and R17 as scratch registers.
-	MOVD	g_m(g), R16
-	MOVD	m_p(R16), R16
-	MOVD	(p_wbBuf+wbBuf_next)(R16), R17
+	// We use R18 and R19 as scratch registers.
+	MOVD	g_m(g), R18
+	MOVD	m_p(R18), R18
+	MOVD	(p_wbBuf+wbBuf_next)(R18), R19
 	// Increment wbBuf.next position.
-	ADD	$16, R17
-	MOVD	R17, (p_wbBuf+wbBuf_next)(R16)
-	MOVD	(p_wbBuf+wbBuf_end)(R16), R16
-	CMP	R16, R17
+	ADD	$16, R19
+	MOVD	R19, (p_wbBuf+wbBuf_next)(R18)
+	MOVD	(p_wbBuf+wbBuf_end)(R18), R18
+	CMP	R18, R19
 	// Record the write.
-	MOVD	R21, -16(R17)	// Record value
-	MOVD	(R20), R16	// TODO: This turns bad writes into bad reads.
-	MOVD	R16, -8(R17)	// Record *slot
+	MOVD	R21, -16(R19)	// Record value
+	MOVD	(R20), R18	// TODO: This turns bad writes into bad reads.
+	MOVD	R18, -8(R19)	// Record *slot
 	// Is the buffer full? (flags set in CMP above)
 	BEQ	flush
 ret:
@@ -956,11 +956,12 @@
 	MOVD	R8, (FIXED_FRAME+56)(R1)
 	MOVD	R9, (FIXED_FRAME+64)(R1)
 	MOVD	R10, (FIXED_FRAME+72)(R1)
-	MOVD	R11, (FIXED_FRAME+80)(R1)
-	MOVD	R12, (FIXED_FRAME+88)(R1)
+	// R11, R12 may be clobbered by external-linker-inserted trampoline
 	// R13 is REGTLS
-	MOVD	R14, (FIXED_FRAME+96)(R1)
-	MOVD	R15, (FIXED_FRAME+104)(R1)
+	MOVD	R14, (FIXED_FRAME+80)(R1)
+	MOVD	R15, (FIXED_FRAME+88)(R1)
+	MOVD	R16, (FIXED_FRAME+96)(R1)
+	MOVD	R17, (FIXED_FRAME+104)(R1)
 
 	// This takes arguments R20 and R21.
 	CALL	runtime·wbBufFlush(SB)
@@ -975,10 +976,10 @@
 	MOVD	(FIXED_FRAME+56)(R1), R8
 	MOVD	(FIXED_FRAME+64)(R1), R9
 	MOVD	(FIXED_FRAME+72)(R1), R10
-	MOVD	(FIXED_FRAME+80)(R1), R11
-	MOVD	(FIXED_FRAME+88)(R1), R12
-	MOVD	(FIXED_FRAME+96)(R1), R14
-	MOVD	(FIXED_FRAME+104)(R1), R15
+	MOVD	(FIXED_FRAME+80)(R1), R14
+	MOVD	(FIXED_FRAME+88)(R1), R15
+	MOVD	(FIXED_FRAME+96)(R1), R16
+	MOVD	(FIXED_FRAME+104)(R1), R17
 	JMP	ret
 
 // Note: these functions use a special calling convention to save generated code space.
diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go
index 624f1b6..8887254 100644
--- a/src/runtime/checkptr_test.go
+++ b/src/runtime/checkptr_test.go
@@ -27,6 +27,7 @@
 		{"CheckPtrAlignmentPtr", "fatal error: checkptr: unsafe pointer conversion\n"},
 		{"CheckPtrAlignmentNoPtr", ""},
 		{"CheckPtrArithmetic", "fatal error: checkptr: unsafe pointer arithmetic\n"},
+		{"CheckPtrArithmetic2", "fatal error: checkptr: unsafe pointer arithmetic\n"},
 		{"CheckPtrSize", "fatal error: checkptr: unsafe pointer conversion\n"},
 		{"CheckPtrSmall", "fatal error: checkptr: unsafe pointer arithmetic\n"},
 	}
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index bb751f1..3c56b60 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -227,7 +227,9 @@
 
 	// The address to start an allocation search with. It must never
 	// point to any memory that is not contained in inUse, i.e.
-	// inUse.contains(searchAddr) must always be true.
+	// inUse.contains(searchAddr) must always be true. The one
+	// exception to this rule is that it may take on the value of
+	// maxSearchAddr to indicate that the heap is exhausted.
 	//
 	// When added with arenaBaseOffset, we guarantee that
 	// all valid heap addresses (when also added with
@@ -517,6 +519,30 @@
 	return uintptr(scav) * pageSize
 }
 
+// findMappedAddr returns the smallest mapped virtual address that is
+// >= addr. That is, if addr refers to mapped memory, then it is
+// returned. If addr is higher than any mapped region, then
+// it returns maxSearchAddr.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) findMappedAddr(addr uintptr) uintptr {
+	// If we're not in a test, validate first by checking mheap_.arenas.
+	// This is a fast path which is only safe to use outside of testing.
+	ai := arenaIndex(addr)
+	if s.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil {
+		vAddr, ok := s.inUse.findAddrGreaterEqual(addr)
+		if ok {
+			return vAddr
+		} else {
+			// The candidate search address is greater than any
+			// known address, which means we definitely have no
+			// free memory left.
+			return maxSearchAddr
+		}
+	}
+	return addr
+}
+
 // find searches for the first (address-ordered) contiguous free region of
 // npages in size and returns a base address for that region.
 //
@@ -525,6 +551,7 @@
 //
 // find also computes and returns a candidate s.searchAddr, which may or
 // may not prune more of the address space than s.searchAddr already does.
+// This candidate is always a valid s.searchAddr.
 //
 // find represents the slow path and the full radix tree search.
 //
@@ -694,7 +721,7 @@
 			// We found a sufficiently large run of free pages straddling
 			// some boundary, so compute the address and return it.
 			addr := uintptr(i<<levelShift[l]) - arenaBaseOffset + uintptr(base)*pageSize
-			return addr, firstFree.base - arenaBaseOffset
+			return addr, s.findMappedAddr(firstFree.base - arenaBaseOffset)
 		}
 		if l == 0 {
 			// We're at level zero, so that means we've exhausted our search.
@@ -740,7 +767,7 @@
 	// found an even narrower free window.
 	searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
 	foundFree(searchAddr+arenaBaseOffset, chunkBase(ci+1)-searchAddr)
-	return addr, firstFree.base - arenaBaseOffset
+	return addr, s.findMappedAddr(firstFree.base - arenaBaseOffset)
 }
 
 // alloc allocates npages worth of memory from the page heap, returning the base
diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go
index 89a4a25..65ba71d 100644
--- a/src/runtime/mpagealloc_test.go
+++ b/src/runtime/mpagealloc_test.go
@@ -612,6 +612,63 @@
 				baseChunkIdx + chunkIdxBigJump:     {{0, PallocChunkPages}},
 			},
 		}
+
+		// Test to check for issue #40191. Essentially, the candidate searchAddr
+		// discovered by find may not point to mapped memory, so we need to handle
+		// that explicitly.
+		//
+		// chunkIdxSmallOffset is an offset intended to be used within chunkIdxBigJump.
+		// It is far enough within chunkIdxBigJump that the summaries at the beginning
+		// of an address range the size of chunkIdxBigJump will not be mapped in.
+		const chunkIdxSmallOffset = 0x503
+		tests["DiscontiguousBadSearchAddr"] = test{
+			before: map[ChunkIdx][]BitRange{
+				// The mechanism for the bug involves three chunks, A, B, and C, which are
+				// far apart in the address space. In particular, B is chunkIdxBigJump +
+				// chunkIdxSmalloffset chunks away from B, and C is 2*chunkIdxBigJump chunks
+				// away from A. A has 1 page free, B has several (NOT at the end of B), and
+				// C is totally free.
+				// Note that B's free memory must not be at the end of B because the fast
+				// path in the page allocator will check if the searchAddr even gives us
+				// enough space to place the allocation in a chunk before accessing the
+				// summary.
+				BaseChunkIdx + chunkIdxBigJump*0: {{0, PallocChunkPages - 1}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {
+					{0, PallocChunkPages - 10},
+					{PallocChunkPages - 1, 1},
+				},
+				BaseChunkIdx + chunkIdxBigJump*2: {},
+			},
+			scav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {},
+			},
+			hits: []hit{
+				// We first allocate into A to set the page allocator's searchAddr to the
+				// end of that chunk. That is the only purpose A serves.
+				{1, PageBase(BaseChunkIdx, PallocChunkPages-1), 0},
+				// Then, we make a big allocation that doesn't fit into B, and so must be
+				// fulfilled by C.
+				//
+				// On the way to fulfilling the allocation into C, we estimate searchAddr
+				// using the summary structure, but that will give us a searchAddr of
+				// B's base address minus chunkIdxSmallOffset chunks. These chunks will
+				// not be mapped.
+				{100, PageBase(baseChunkIdx+chunkIdxBigJump*2, 0), 0},
+				// Now we try to make a smaller allocation that can be fulfilled by B.
+				// In an older implementation of the page allocator, this will segfault,
+				// because this last allocation will first try to access the summary
+				// for B's base address minus chunkIdxSmallOffset chunks in the fast path,
+				// and this will not be mapped.
+				{9, PageBase(baseChunkIdx+chunkIdxBigJump*1+chunkIdxSmallOffset, PallocChunkPages-10), 0},
+			},
+			after: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {{0, 100}},
+			},
+		}
 	}
 	for name, v := range tests {
 		v := v
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index b133851..89e9fd5 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -92,6 +92,25 @@
 	return len(a.ranges)
 }
 
+// findAddrGreaterEqual returns the smallest address represented by a
+// that is >= addr. Thus, if the address is represented by a,
+// then it returns addr. The second return value indicates whether
+// such an address exists for addr in a. That is, if addr is larger than
+// any address known to a, the second return value will be false.
+func (a *addrRanges) findAddrGreaterEqual(addr uintptr) (uintptr, bool) {
+	i := a.findSucc(addr)
+	if i == 0 {
+		return a.ranges[0].base, true
+	}
+	if a.ranges[i-1].contains(addr) {
+		return addr, true
+	}
+	if i < len(a.ranges) {
+		return a.ranges[i].base, true
+	}
+	return 0, false
+}
+
 // contains returns true if a covers the address addr.
 func (a *addrRanges) contains(addr uintptr) bool {
 	i := a.findSucc(addr)
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 7f4ce14..4aaa318 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -36,7 +36,10 @@
 //go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._OpenProcess OpenProcess%3 "kernel32.dll"
 //go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
+//go:cgo_import_dynamic runtime._ProcessIdToSessionId ProcessIdToSessionId%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._QueryFullProcessImageNameA QueryFullProcessImageNameA%4 "kernel32.dll"
 //go:cgo_import_dynamic runtime._ResumeThread ResumeThread%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._SetConsoleCtrlHandler SetConsoleCtrlHandler%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._SetErrorMode SetErrorMode%1 "kernel32.dll"
@@ -84,7 +87,10 @@
 	_SetThreadContext,
 	_LoadLibraryW,
 	_LoadLibraryA,
+	_OpenProcess,
 	_PostQueuedCompletionStatus,
+	_ProcessIdToSessionId,
+	_QueryFullProcessImageNameA,
 	_QueryPerformanceCounter,
 	_QueryPerformanceFrequency,
 	_ResumeThread,
@@ -129,7 +135,8 @@
 	// Load ntdll.dll manually during startup, otherwise Mingw
 	// links wrong printf function to cgo executable (see issue
 	// 12030 for details).
-	_NtWaitForSingleObject stdFunction
+	_NtWaitForSingleObject     stdFunction
+	_NtQueryInformationProcess stdFunction
 
 	// These are from non-kernel32.dll, so we prefer to LoadLibraryEx them.
 	_timeBeginPeriod,
@@ -257,6 +264,7 @@
 		throw("ntdll.dll not found")
 	}
 	_NtWaitForSingleObject = windowsFindfunc(n32, []byte("NtWaitForSingleObject\000"))
+	_NtQueryInformationProcess = windowsFindfunc(n32, []byte("NtQueryInformationProcess\000"))
 
 	if GOARCH == "arm" {
 		_QueryPerformanceCounter = windowsFindfunc(k32, []byte("QueryPerformanceCounter\000"))
@@ -997,6 +1005,63 @@
 	onosstack(usleep2Addr, 10*us)
 }
 
+// isWindowsService returns whether the process is currently executing as a
+// Windows service. The below technique looks a bit hairy, but it's actually
+// exactly what the .NET framework does for the similarly named function:
+// https://github.com/dotnet/extensions/blob/f4066026ca06984b07e90e61a6390ac38152ba93/src/Hosting/WindowsServices/src/WindowsServiceHelpers.cs#L26-L31
+// Specifically, it looks up whether the parent process has session ID zero
+// and is called "services".
+func isWindowsService() bool {
+	const (
+		_CURRENT_PROCESS                   = ^uintptr(0)
+		_PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
+	)
+	// pbi is a PROCESS_BASIC_INFORMATION struct, where we just care about
+	// the 6th pointer inside of it, which contains the pid of the process
+	// parent:
+	// https://github.com/wine-mirror/wine/blob/42cb7d2ad1caba08de235e6319b9967296b5d554/include/winternl.h#L1294
+	var pbi [6]uintptr
+	var pbiLen uint32
+	err := stdcall5(_NtQueryInformationProcess, _CURRENT_PROCESS, 0, uintptr(unsafe.Pointer(&pbi[0])), uintptr(unsafe.Sizeof(pbi)), uintptr(unsafe.Pointer(&pbiLen)))
+	if err != 0 {
+		return false
+	}
+	var psid uint32
+	err = stdcall2(_ProcessIdToSessionId, pbi[5], uintptr(unsafe.Pointer(&psid)))
+	if err == 0 || psid != 0 {
+		return false
+	}
+	pproc := stdcall3(_OpenProcess, _PROCESS_QUERY_LIMITED_INFORMATION, 0, pbi[5])
+	if pproc == 0 {
+		return false
+	}
+	defer stdcall1(_CloseHandle, pproc)
+	// exeName gets the path to the executable image of the parent process
+	var exeName [261]byte
+	exeNameLen := uint32(len(exeName) - 1)
+	err = stdcall4(_QueryFullProcessImageNameA, pproc, 0, uintptr(unsafe.Pointer(&exeName[0])), uintptr(unsafe.Pointer(&exeNameLen)))
+	if err == 0 || exeNameLen == 0 {
+		return false
+	}
+	servicesLower := "services.exe"
+	servicesUpper := "SERVICES.EXE"
+	i := int(exeNameLen) - 1
+	j := len(servicesLower) - 1
+	if i < j {
+		return false
+	}
+	for {
+		if j == -1 {
+			return i == -1 || exeName[i] == '\\'
+		}
+		if exeName[i] != servicesLower[j] && exeName[i] != servicesUpper[j] {
+			return false
+		}
+		i--
+		j--
+	}
+}
+
 func ctrlhandler1(_type uint32) uint32 {
 	var s uint32
 
@@ -1012,7 +1077,11 @@
 	if sigsend(s) {
 		return 1
 	}
-	exit(2) // SIGINT, SIGTERM, etc
+	if !islibrary && !isarchive && !isWindowsService() {
+		// Only exit the program if we don't have a DLL or service.
+		// See https://golang.org/issues/35965 and https://golang.org/issues/40167
+		exit(2) // SIGINT, SIGTERM, etc
+	}
 	return 0
 }
 
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index a66b4d0..378d02d 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -131,7 +131,7 @@
 
 	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
 		systemstack(func() {
-			newm(sysmon, nil)
+			newm(sysmon, nil, -1)
 		})
 	}
 
@@ -544,7 +544,7 @@
 	stackinit()
 	mallocinit()
 	fastrandinit() // must run before mcommoninit
-	mcommoninit(_g_.m)
+	mcommoninit(_g_.m, -1)
 	cpuinit()       // must run before alginit
 	alginit()       // maps must not be used before this call
 	modulesinit()   // provides activeModules
@@ -605,7 +605,22 @@
 	}
 }
 
-func mcommoninit(mp *m) {
+// mReserveID returns the next ID to use for a new m. This new m is immediately
+// considered 'running' by checkdead.
+//
+// sched.lock must be held.
+func mReserveID() int64 {
+	if sched.mnext+1 < sched.mnext {
+		throw("runtime: thread ID overflow")
+	}
+	id := sched.mnext
+	sched.mnext++
+	checkmcount()
+	return id
+}
+
+// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
+func mcommoninit(mp *m, id int64) {
 	_g_ := getg()
 
 	// g0 stack won't make sense for user (and is not necessary unwindable).
@@ -614,12 +629,12 @@
 	}
 
 	lock(&sched.lock)
-	if sched.mnext+1 < sched.mnext {
-		throw("runtime: thread ID overflow")
+
+	if id >= 0 {
+		mp.id = id
+	} else {
+		mp.id = mReserveID()
 	}
-	mp.id = sched.mnext
-	sched.mnext++
-	checkmcount()
 
 	mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
 	mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
@@ -1006,7 +1021,7 @@
 			notewakeup(&mp.park)
 		} else {
 			// Start M to run P.  Do not start another M below.
-			newm(nil, p)
+			newm(nil, p, -1)
 		}
 	}
 
@@ -1353,12 +1368,13 @@
 // Allocate a new m unassociated with any thread.
 // Can use p for allocation context if needed.
 // fn is recorded as the new m's m.mstartfn.
+// id is optional pre-allocated m ID. Omit by passing -1.
 //
 // This function is allowed to have write barriers even if the caller
 // isn't because it borrows _p_.
 //
 //go:yeswritebarrierrec
-func allocm(_p_ *p, fn func()) *m {
+func allocm(_p_ *p, fn func(), id int64) *m {
 	_g_ := getg()
 	acquirem() // disable GC because it can be called from sysmon
 	if _g_.m.p == 0 {
@@ -1387,7 +1403,7 @@
 
 	mp := new(m)
 	mp.mstartfn = fn
-	mcommoninit(mp)
+	mcommoninit(mp, id)
 
 	// In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack.
 	// Windows and Plan 9 will layout sched stack on OS stack.
@@ -1526,7 +1542,7 @@
 	// The sched.pc will never be returned to, but setting it to
 	// goexit makes clear to the traceback routines where
 	// the goroutine stack ends.
-	mp := allocm(nil, nil)
+	mp := allocm(nil, nil, -1)
 	gp := malg(4096)
 	gp.sched.pc = funcPC(goexit) + sys.PCQuantum
 	gp.sched.sp = gp.stack.hi
@@ -1699,9 +1715,11 @@
 // Create a new m. It will start off with a call to fn, or else the scheduler.
 // fn needs to be static and not a heap allocated closure.
 // May run with m.p==nil, so write barriers are not allowed.
+//
+// id is optional pre-allocated m ID. Omit by passing -1.
 //go:nowritebarrierrec
-func newm(fn func(), _p_ *p) {
-	mp := allocm(_p_, fn)
+func newm(fn func(), _p_ *p, id int64) {
+	mp := allocm(_p_, fn, id)
 	mp.nextp.set(_p_)
 	mp.sigmask = initSigmask
 	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
@@ -1770,7 +1788,7 @@
 		releasem(mp)
 		return
 	}
-	newm(templateThread, nil)
+	newm(templateThread, nil, -1)
 	releasem(mp)
 }
 
@@ -1865,16 +1883,31 @@
 		}
 	}
 	mp := mget()
-	unlock(&sched.lock)
 	if mp == nil {
+		// No M is available, we must drop sched.lock and call newm.
+		// However, we already own a P to assign to the M.
+		//
+		// Once sched.lock is released, another G (e.g., in a syscall),
+		// could find no idle P while checkdead finds a runnable G but
+		// no running M's because this new M hasn't started yet, thus
+		// throwing in an apparent deadlock.
+		//
+		// Avoid this situation by pre-allocating the ID for the new M,
+		// thus marking it as 'running' before we drop sched.lock. This
+		// new M will eventually run the scheduler to execute any
+		// queued G's.
+		id := mReserveID()
+		unlock(&sched.lock)
+
 		var fn func()
 		if spinning {
 			// The caller incremented nmspinning, so set m.spinning in the new M.
 			fn = mspinning
 		}
-		newm(fn, _p_)
+		newm(fn, _p_, id)
 		return
 	}
+	unlock(&sched.lock)
 	if mp.spinning {
 		throw("startm: m is spinning")
 	}
diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go
index 45e6fb1..e0a2794 100644
--- a/src/runtime/testdata/testprog/checkptr.go
+++ b/src/runtime/testdata/testprog/checkptr.go
@@ -10,6 +10,7 @@
 	register("CheckPtrAlignmentNoPtr", CheckPtrAlignmentNoPtr)
 	register("CheckPtrAlignmentPtr", CheckPtrAlignmentPtr)
 	register("CheckPtrArithmetic", CheckPtrArithmetic)
+	register("CheckPtrArithmetic2", CheckPtrArithmetic2)
 	register("CheckPtrSize", CheckPtrSize)
 	register("CheckPtrSmall", CheckPtrSmall)
 }
@@ -32,6 +33,13 @@
 	sink2 = (*int)(unsafe.Pointer(i))
 }
 
+func CheckPtrArithmetic2() {
+	var x [2]int64
+	p := unsafe.Pointer(&x[1])
+	var one uintptr = 1
+	sink2 = unsafe.Pointer(uintptr(p) & ^one)
+}
+
 func CheckPtrSize() {
 	p := new(int64)
 	sink2 = p
diff --git a/src/testing/testing.go b/src/testing/testing.go
index 75f1b54..e3dcee5 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -352,10 +352,19 @@
 	defer p.lastNameMu.Unlock()
 
 	if !p.chatty ||
-		strings.HasPrefix(out, "--- PASS") ||
-		strings.HasPrefix(out, "--- FAIL") ||
-		strings.HasPrefix(out, "=== CONT") ||
-		strings.HasPrefix(out, "=== RUN") {
+		strings.HasPrefix(out, "--- PASS: ") ||
+		strings.HasPrefix(out, "--- FAIL: ") ||
+		strings.HasPrefix(out, "--- SKIP: ") ||
+		strings.HasPrefix(out, "=== RUN   ") ||
+		strings.HasPrefix(out, "=== CONT  ") ||
+		strings.HasPrefix(out, "=== PAUSE ") {
+		// If we're buffering test output (!p.chatty), we don't really care which
+		// test is emitting which line so long as they are serialized.
+		//
+		// If the message already implies an association with a specific new test,
+		// we don't need to check what the old test name was or log an extra CONT
+		// line for it. (We're updating it anyway, and the current message already
+		// includes the test name.)
 		p.lastName = testName
 		fmt.Fprint(w, out)
 		return
@@ -907,7 +916,13 @@
 		for ; root.parent != nil; root = root.parent {
 		}
 		root.mu.Lock()
-		fmt.Fprintf(root.w, "=== PAUSE %s\n", t.name)
+		// Unfortunately, even though PAUSE indicates that the named test is *no
+		// longer* running, cmd/test2json interprets it as changing the active test
+		// for the purpose of log parsing. We could fix cmd/test2json, but that
+		// won't fix existing deployments of third-party tools that already shell
+		// out to older builds of cmd/test2json — so merely fixing cmd/test2json
+		// isn't enough for now.
+		printer.Fprint(root.w, t.name, fmt.Sprintf("=== PAUSE %s\n", t.name))
 		root.mu.Unlock()
 	}
 
diff --git a/test/fixedbugs/issue39651.go b/test/fixedbugs/issue39651.go
new file mode 100644
index 0000000..256a34d
--- /dev/null
+++ b/test/fixedbugs/issue39651.go
@@ -0,0 +1,26 @@
+// run
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that float -> integer conversion doesn't clobber
+// flags.
+
+package main
+
+//go:noinline
+func f(x, y float64, a, b *bool, r *int64) {
+	*a = x < y    // set flags
+	*r = int64(x) // clobber flags
+	*b = x == y   // use flags
+}
+
+func main() {
+	var a, b bool
+	var r int64
+	f(1, 1, &a, &b, &r)
+	if a || !b {
+		panic("comparison incorrect")
+	}
+}
diff --git a/test/fixedbugs/issue40367.go b/test/fixedbugs/issue40367.go
new file mode 100644
index 0000000..0dc5ad7
--- /dev/null
+++ b/test/fixedbugs/issue40367.go
@@ -0,0 +1,41 @@
+// run
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+func case1() {
+	rates := []int32{1,2,3,4,5,6}
+	var sink [6]int
+	j := len(sink)
+	for star, _ := range rates {
+		if star+1 < 1 {
+			panic("")
+		}
+		j--
+		sink[j] = j
+	}
+}
+
+func case2() {
+	i := 0
+	var sink [3]int
+	j := len(sink)
+top:
+	j--
+	sink[j] = j
+	if i < 2 {
+		i++
+		if i < 1 {
+			return
+		}
+		goto top
+	}
+}
+
+func main() {
+	case1()
+	case2()
+}
\ No newline at end of file
diff --git a/test/fixedbugs/issue40917.go b/test/fixedbugs/issue40917.go
new file mode 100644
index 0000000..2128be5
--- /dev/null
+++ b/test/fixedbugs/issue40917.go
@@ -0,0 +1,23 @@
+// run -gcflags=-d=checkptr
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "unsafe"
+
+func main() {
+	var x [2]uint64
+	a := unsafe.Pointer(&x[1])
+
+	b := a
+	b = unsafe.Pointer(uintptr(b) + 2)
+	b = unsafe.Pointer(uintptr(b) - 1)
+	b = unsafe.Pointer(uintptr(b) &^ 1)
+
+	if a != b {
+		panic("pointer arithmetic failed")
+	}
+}
diff --git a/test/prove.go b/test/prove.go
index eba0f79..6e562b1 100644
--- a/test/prove.go
+++ b/test/prove.go
@@ -670,7 +670,8 @@
 	i := 0
 	if len(b) > i {
 	top:
-		println(b[i]) // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
+		// TODO: remove the todo of next line once we complete the following optimization of CL 244579
+		// println(b[i]) // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
 		i++
 		if i < len(b) {
 			goto top
@@ -720,7 +721,8 @@
 // range2 elements are larger, so they use the general form of a range loop.
 func range2(b [][32]int) {
 	for i, v := range b {
-		b[i][0] = v[0] + 1 // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
+		// TODO: remove the todo of next line once we complete the following optimization of CL 244579
+		b[i][0] = v[0] + 1 // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
 		if i < len(b) {    // ERROR "Proved Less64$"
 			println("x")
 		}