[release-branch.go1.15] all: merge master into release-branch.go1.15

ba9e108899 cmd: update golang.org/x/xerrors
027d7241ce encoding/binary: read at most MaxVarintLen64 bytes in ReadUvarint
6f08e89ec3 cmd/go: fix error stacks when there are scanner errors
f235275097 net/http: fix cancelation of requests with a readTrackingBody wrapper
f92337422e runtime/race: fix ppc64le build
e49b2308a5 runtime/race: rebuild some .syso files to remove getauxval dependency
10523c0efb doc/go1.15: fix a few trivial inconsistencies
7388956b76 cmd/cgo: fix mangling of enum and union types
b56791cdea runtime: validate candidate searchAddr in pageAlloc.find
10374e2435 testing: fix quotation marks
7f86080476 cmd/compile: don't addLocalInductiveFacts if there is no direct edge from if block to phi block
54e75e8f9d crypto/ed25519: remove s390x KDSA implementation
6b4dcf19fa runtime: hold sched.lock over globrunqputbatch in runqputbatch
85afa2eb19 runtime: ensure startm new M is consistently visible to checkdead
c4fed25553 cmd/compile: add floating point load+op operations to addressing modes pass
19a932ceb8 cmd/link: don't mark shared library symbols reachable unconditionally
8696ae82c9 syscall: use correct file descriptor in dup2 fallback path
9591515f51 runtime, sync: add copyright headers to new files
074f2d800f doc/go1.15: surface the crypto/x509 CommonName deprecation note

Change-Id: I0bfcff1fc2de723960909d9dda718fee6abc2912
diff --git a/doc/go1.15.html b/doc/go1.15.html
index 0cbc9be..17e442d 100644
--- a/doc/go1.15.html
+++ b/doc/go1.15.html
@@ -386,6 +386,23 @@
   documentation</a> for more information.
 </p>
 
+<h3 id="commonname">X.509 CommonName deprecation</h3>
+
+<p><!-- CL 231379 -->
+  The deprecated, legacy behavior of treating the <code>CommonName</code>
+  field on X.509 certificates as a host name when no Subject Alternative Names
+  are present is now disabled by default. It can be temporarily re-enabled by
+  adding the value <code>x509ignoreCN=0</code> to the <code>GODEBUG</code>
+  environment variable.
+</p>
+
+<p>
+  Note that if the <code>CommonName</code> is an invalid host name, it's always
+  ignored, regardless of <code>GODEBUG</code> settings. Invalid names include
+  those with any characters other than letters, digits, hyphens and underscores,
+  and those with empty labels or trailing dots.
+</p>
+
 <h3 id="minor_library_changes">Minor changes to the library</h3>
 
 <p>
@@ -523,15 +540,6 @@
       certificates with trailing dots.
     </p>
 
-    <p><!-- CL 231379 -->
-      The deprecated, legacy behavior of treating the <code>CommonName</code>
-      field as a hostname when no Subject Alternative Names are present is now
-      disabled by default. It can be temporarily re-enabled by adding the value
-      <code>x509ignoreCN=0</code> to the <code>GODEBUG</code> environment
-      variable. If the <code>CommonName</code> is an invalid hostname, it's
-      always ignored.
-    </p>
-
     <p><!-- CL 217298 -->
       The new <a href="/pkg/crypto/x509/#CreateRevocationList"><code>CreateRevocationList</code></a>
       function and <a href="/pkg/crypto/x509/#RevocationList"><code>RevocationList</code></a> type
@@ -647,8 +655,8 @@
 <dl id="flag"><dt><a href="/pkg/flag/">flag</a></dt>
   <dd>
     <p><!-- CL 221427 -->
-      When the flag package sees <code>-h</code> or <code>-help</code>, and
-      those flags are not defined, it now prints a usage message.
+      When the <code>flag</code> package sees <code>-h</code> or <code>-help</code>,
+      and those flags are not defined, it now prints a usage message.
       If the <a href="/pkg/flag/#FlagSet"><code>FlagSet</code></a> was created with
       <a href="/pkg/flag/#ExitOnError"><code>ExitOnError</code></a>,
       <a href="/pkg/flag/#FlagSet.Parse"><code>FlagSet.Parse</code></a> would then
@@ -893,9 +901,9 @@
 <dl id="pkg-runtime-pprof"><dt><a href="/pkg/runtime/pprof/">runtime/pprof</a></dt>
   <dd>
     <p><!-- CL 189318 -->
-      The goroutine profile includes the profile labels associated with each goroutine
-      at the time of profiling. This feature is not yet implemented for the profile
-      reported with <code>debug=2</code>.
+      The goroutine profile now includes the profile labels associated with each
+      goroutine at the time of profiling. This feature is not yet implemented for
+      the profile reported with <code>debug=2</code>.
     </p>
   </dd>
 </dl>
@@ -926,6 +934,7 @@
       <a href="/pkg/sync/#Map.Delete"><code>Map.Delete</code></a>
       is more efficient.
     </p>
+  </dd>
 </dl><!-- sync -->
 
 <dl id="syscall"><dt><a href="/pkg/syscall/">syscall</a></dt>
diff --git a/misc/cgo/test/test.go b/misc/cgo/test/test.go
index 8c69ad9..35bc3a1 100644
--- a/misc/cgo/test/test.go
+++ b/misc/cgo/test/test.go
@@ -901,6 +901,12 @@
 // issue 38649
 // Test that #define'd type aliases work.
 #define netbsd_gid unsigned int
+
+// issue 40494
+// Inconsistent handling of tagged enum and union types.
+enum Enum40494 { X_40494 };
+union Union40494 { int x; };
+void issue40494(enum Enum40494 e, union Union40494* up) {}
 */
 import "C"
 
@@ -2204,3 +2210,10 @@
 // issue 39877
 
 var issue39877 *C.void = nil
+
+// issue 40494
+// No runtime test; just make sure it compiles.
+
+func Issue40494() {
+	C.issue40494(C.enum_Enum40494(C.X_40494), (*C.union_Union40494)(nil))
+}
diff --git a/misc/cgo/testshared/shared_test.go b/misc/cgo/testshared/shared_test.go
index f8dabbe..5e08937 100644
--- a/misc/cgo/testshared/shared_test.go
+++ b/misc/cgo/testshared/shared_test.go
@@ -462,6 +462,7 @@
 	run(t, "trivial executable", "../../bin/trivial")
 	AssertIsLinkedTo(t, "../../bin/trivial", soname)
 	AssertHasRPath(t, "../../bin/trivial", gorootInstallDir)
+	checkSize(t, "../../bin/trivial", 100000) // it is 19K on linux/amd64, 100K should be enough
 }
 
 // Build a trivial program in PIE mode that links against the shared runtime and check it runs.
@@ -470,6 +471,18 @@
 	run(t, "trivial executable", "./trivial.pie")
 	AssertIsLinkedTo(t, "./trivial.pie", soname)
 	AssertHasRPath(t, "./trivial.pie", gorootInstallDir)
+	checkSize(t, "./trivial.pie", 100000) // it is 19K on linux/amd64, 100K should be enough
+}
+
+// Check that the file size does not exceed a limit.
+func checkSize(t *testing.T, f string, limit int64) {
+	fi, err := os.Stat(f)
+	if err != nil {
+		t.Fatalf("stat failed: %v", err)
+	}
+	if sz := fi.Size(); sz > limit {
+		t.Errorf("file too large: got %d, want <= %d", sz, limit)
+	}
 }
 
 // Build a division test program and check it runs.
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index 6c22147..4064f0a 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -123,7 +123,9 @@
 		// Moreover, empty file name makes compile emit no source debug info at all.
 		var buf bytes.Buffer
 		noSourceConf.Fprint(&buf, fset, def.Go)
-		if bytes.HasPrefix(buf.Bytes(), []byte("_Ctype_")) {
+		if bytes.HasPrefix(buf.Bytes(), []byte("_Ctype_")) ||
+			strings.HasPrefix(name, "_Ctype_enum_") ||
+			strings.HasPrefix(name, "_Ctype_union_") {
 			// This typedef is of the form `typedef a b` and should be an alias.
 			fmt.Fprintf(fgo2, "= ")
 		}
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 47cb422..9d8a092 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -874,7 +874,11 @@
 		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
 		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
 		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
-		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8:
+		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
+		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
+		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
+		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
+		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
 		p := s.Prog(v.Op.Asm())
 
 		r, i := v.Args[1].Reg(), v.Args[2].Reg()
diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
index 78c979b..97a5ab4 100644
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@@ -321,6 +321,23 @@
 	[2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ1}: OpAMD64XORQconstmodifyidx1,
 	[2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ8}: OpAMD64XORQconstmodifyidx8,
 
+	[2]Op{OpAMD64ADDSSload, OpAMD64LEAQ1}: OpAMD64ADDSSloadidx1,
+	[2]Op{OpAMD64ADDSSload, OpAMD64LEAQ4}: OpAMD64ADDSSloadidx4,
+	[2]Op{OpAMD64ADDSDload, OpAMD64LEAQ1}: OpAMD64ADDSDloadidx1,
+	[2]Op{OpAMD64ADDSDload, OpAMD64LEAQ8}: OpAMD64ADDSDloadidx8,
+	[2]Op{OpAMD64SUBSSload, OpAMD64LEAQ1}: OpAMD64SUBSSloadidx1,
+	[2]Op{OpAMD64SUBSSload, OpAMD64LEAQ4}: OpAMD64SUBSSloadidx4,
+	[2]Op{OpAMD64SUBSDload, OpAMD64LEAQ1}: OpAMD64SUBSDloadidx1,
+	[2]Op{OpAMD64SUBSDload, OpAMD64LEAQ8}: OpAMD64SUBSDloadidx8,
+	[2]Op{OpAMD64MULSSload, OpAMD64LEAQ1}: OpAMD64MULSSloadidx1,
+	[2]Op{OpAMD64MULSSload, OpAMD64LEAQ4}: OpAMD64MULSSloadidx4,
+	[2]Op{OpAMD64MULSDload, OpAMD64LEAQ1}: OpAMD64MULSDloadidx1,
+	[2]Op{OpAMD64MULSDload, OpAMD64LEAQ8}: OpAMD64MULSDloadidx8,
+	[2]Op{OpAMD64DIVSSload, OpAMD64LEAQ1}: OpAMD64DIVSSloadidx1,
+	[2]Op{OpAMD64DIVSSload, OpAMD64LEAQ4}: OpAMD64DIVSSloadidx4,
+	[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ1}: OpAMD64DIVSDloadidx1,
+	[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ8}: OpAMD64DIVSDloadidx8,
+
 	// 386
 	[2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
 	[2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index cd9cb51..a3b2904 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -149,14 +149,15 @@
 		gpstorexchg     = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: []regMask{gp}}
 		cmpxchg         = regInfo{inputs: []regMask{gp, ax, gp, 0}, outputs: []regMask{gp, 0}, clobbers: ax}
 
-		fp01     = regInfo{inputs: nil, outputs: fponly}
-		fp21     = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
-		fp31     = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
-		fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
-		fpgp     = regInfo{inputs: fponly, outputs: gponly}
-		gpfp     = regInfo{inputs: gponly, outputs: fponly}
-		fp11     = regInfo{inputs: fponly, outputs: fponly}
-		fp2flags = regInfo{inputs: []regMask{fp, fp}}
+		fp01        = regInfo{inputs: nil, outputs: fponly}
+		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
+		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
+		fp21load    = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
+		fp21loadidx = regInfo{inputs: []regMask{fp, gpspsb, gpspsb, 0}, outputs: fponly}
+		fpgp        = regInfo{inputs: fponly, outputs: gponly}
+		gpfp        = regInfo{inputs: gponly, outputs: fponly}
+		fp11        = regInfo{inputs: fponly, outputs: fponly}
+		fp2flags    = regInfo{inputs: []regMask{fp, fp}}
 
 		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
 		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
@@ -201,6 +202,23 @@
 		{name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
 		{name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
 
+		{name: "ADDSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "ADDSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+		{name: "ADDSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "ADDSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+		{name: "SUBSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "SUBSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+		{name: "SUBSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "SUBSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+		{name: "MULSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "MULSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+		{name: "MULSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "MULSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+		{name: "DIVSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "DIVSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+		{name: "DIVSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem
+		{name: "DIVSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+
 		// binary ops
 		{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true},                                                                   // arg0 + arg1
 		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                                                                   // arg0 + arg1
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d27682e..9efa1bf 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -572,6 +572,22 @@
 	OpAMD64MULSDload
 	OpAMD64DIVSSload
 	OpAMD64DIVSDload
+	OpAMD64ADDSSloadidx1
+	OpAMD64ADDSSloadidx4
+	OpAMD64ADDSDloadidx1
+	OpAMD64ADDSDloadidx8
+	OpAMD64SUBSSloadidx1
+	OpAMD64SUBSSloadidx4
+	OpAMD64SUBSDloadidx1
+	OpAMD64SUBSDloadidx8
+	OpAMD64MULSSloadidx1
+	OpAMD64MULSSloadidx4
+	OpAMD64MULSDloadidx1
+	OpAMD64MULSDloadidx8
+	OpAMD64DIVSSloadidx1
+	OpAMD64DIVSSloadidx4
+	OpAMD64DIVSDloadidx1
+	OpAMD64DIVSDloadidx8
 	OpAMD64ADDQ
 	OpAMD64ADDL
 	OpAMD64ADDQconst
@@ -6577,6 +6593,310 @@
 		},
 	},
 	{
+		name:         "ADDSSloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AADDSS,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "ADDSSloadidx4",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AADDSS,
+		scale:        4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "ADDSDloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AADDSD,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "ADDSDloadidx8",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AADDSD,
+		scale:        8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "SUBSSloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ASUBSS,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "SUBSSloadidx4",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ASUBSS,
+		scale:        4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "SUBSDloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ASUBSD,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "SUBSDloadidx8",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ASUBSD,
+		scale:        8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "MULSSloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AMULSS,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "MULSSloadidx4",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AMULSS,
+		scale:        4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "MULSDloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AMULSD,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "MULSDloadidx8",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.AMULSD,
+		scale:        8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "DIVSSloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ADIVSS,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "DIVSSloadidx4",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ADIVSS,
+		scale:        4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "DIVSDloadidx1",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ADIVSD,
+		scale:        1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
+		name:         "DIVSDloadidx8",
+		auxType:      auxSymOff,
+		argLen:       4,
+		resultInArg0: true,
+		symEffect:    SymRead,
+		asm:          x86.ADIVSD,
+		scale:        8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+				{1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+				{2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+			},
+		},
+	},
+	{
 		name:         "ADDQ",
 		argLen:       2,
 		commutative:  true,
diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go
index a8e43d0..6c6be39 100644
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@@ -1051,6 +1051,11 @@
 	//
 	// If all of these conditions are true, then i1 < max and i1 >= min.
 
+	// To ensure this is a loop header node.
+	if len(b.Preds) != 2 {
+		return
+	}
+
 	for _, i1 := range b.Values {
 		if i1.Op != OpPhi {
 			continue
@@ -1093,6 +1098,9 @@
 				}
 				br = negative
 			}
+			if br == unknown {
+				continue
+			}
 
 			tr, has := domainRelationTable[control.Op]
 			if !has {
diff --git a/src/cmd/go.mod b/src/cmd/go.mod
index 274ef0d..6d57cee 100644
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@@ -10,4 +10,5 @@
 	golang.org/x/mod v0.3.0
 	golang.org/x/sys v0.0.0-20200501145240-bc7a7d42d5c3 // indirect
 	golang.org/x/tools v0.0.0-20200616133436-c1934b75d054
+	golang.org/x/xerrors v0.0.0-20200806184451-1a77d5e9f316 // indirect
 )
diff --git a/src/cmd/go.sum b/src/cmd/go.sum
index 30a0be0..3fc693e 100644
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@@ -34,4 +34,6 @@
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200806184451-1a77d5e9f316 h1:Jhw4VC65LaKnpq9FvcK+a8ZzrFm3D+UygvMMrhkOw70=
+golang.org/x/xerrors v0.0.0-20200806184451-1a77d5e9f316/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/src/cmd/go/internal/load/pkg.go b/src/cmd/go/internal/load/pkg.go
index fcc47bd..2b5fbb1 100644
--- a/src/cmd/go/internal/load/pkg.go
+++ b/src/cmd/go/internal/load/pkg.go
@@ -239,11 +239,25 @@
 		err = &NoGoError{Package: p}
 	}
 
+	// Take only the first error from a scanner.ErrorList. PackageError only
+	// has room for one position, so we report the first error with a position
+	// instead of all of the errors without a position.
+	var pos string
+	var isScanErr bool
+	if scanErr, ok := err.(scanner.ErrorList); ok && len(scanErr) > 0 {
+		isScanErr = true // For stack push/pop below.
+
+		scanPos := scanErr[0].Pos
+		scanPos.Filename = base.ShortPath(scanPos.Filename)
+		pos = scanPos.String()
+		err = errors.New(scanErr[0].Msg)
+	}
+
 	// Report the error on the importing package if the problem is with the import declaration
 	// for example, if the package doesn't exist or if the import path is malformed.
 	// On the other hand, don't include a position if the problem is with the imported package,
 	// for example there are no Go files (NoGoError), or there's a problem in the imported
-	// package's source files themselves.
+	// package's source files themselves (scanner errors).
 	//
 	// TODO(matloob): Perhaps make each of those the errors in the first group
 	// (including modload.ImportMissingError, and the corresponding
@@ -254,22 +268,11 @@
 	// to make it easier to check for them? That would save us from having to
 	// move the modload errors into this package to avoid a package import cycle,
 	// and from having to export an error type for the errors produced in build.
-	if !isMatchErr && nogoErr != nil {
+	if !isMatchErr && (nogoErr != nil || isScanErr) {
 		stk.Push(path)
 		defer stk.Pop()
 	}
 
-	// Take only the first error from a scanner.ErrorList. PackageError only
-	// has room for one position, so we report the first error with a position
-	// instead of all of the errors without a position.
-	var pos string
-	if scanErr, ok := err.(scanner.ErrorList); ok && len(scanErr) > 0 {
-		scanPos := scanErr[0].Pos
-		scanPos.Filename = base.ShortPath(scanPos.Filename)
-		pos = scanPos.String()
-		err = errors.New(scanErr[0].Msg)
-	}
-
 	p.Error = &PackageError{
 		ImportStack: stk.Copy(),
 		Pos:         pos,
diff --git a/src/cmd/go/testdata/script/list_err_stack.txt b/src/cmd/go/testdata/script/list_err_stack.txt
new file mode 100644
index 0000000..a7be9fd
--- /dev/null
+++ b/src/cmd/go/testdata/script/list_err_stack.txt
@@ -0,0 +1,27 @@
+
+# golang.org/issue/40544: regression in error stacks for parse errors
+
+env GO111MODULE=off
+cd sandbox/foo
+go list -e -json .
+stdout '"sandbox/foo"'
+stdout '"sandbox/bar"'
+stdout '"Pos": "..(/|\\\\)bar(/|\\\\)bar.go:1:1"'
+stdout '"Err": "expected ''package'', found ackage"'
+
+env GO111MODULE=on
+go list -e -json .
+stdout '"sandbox/foo"'
+stdout '"sandbox/bar"'
+stdout '"Pos": "..(/|\\\\)bar(/|\\\\)bar.go:1:1"'
+stdout '"Err": "expected ''package'', found ackage"'
+
+-- sandbox/go.mod --
+module sandbox
+
+-- sandbox/foo/foo.go --
+package pkg
+
+import "sandbox/bar"
+-- sandbox/bar/bar.go --
+ackage bar
\ No newline at end of file
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index b0a9613..0366bc7 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -2191,17 +2191,6 @@
 		l.SetSymElfType(s, elf.ST_TYPE(elfsym.Info))
 		su.SetSize(int64(elfsym.Size))
 		if elfsym.Section != elf.SHN_UNDEF {
-			// If it's not undefined, mark the symbol as reachable
-			// so as to protect it from dead code elimination,
-			// even if there aren't any explicit references to it.
-			// Under the previous sym.Symbol based regime this
-			// wasn't necessary, but for the loader-based deadcode
-			// it is definitely needed.
-			//
-			// FIXME: have a more general/flexible mechanism for this?
-			//
-			l.SetAttrReachable(s, true)
-
 			// Set .File for the library that actually defines the symbol.
 			l.SetSymPkg(s, libpath)
 
diff --git a/src/cmd/vendor/modules.txt b/src/cmd/vendor/modules.txt
index 7ee7478..21fc78c 100644
--- a/src/cmd/vendor/modules.txt
+++ b/src/cmd/vendor/modules.txt
@@ -84,6 +84,7 @@
 golang.org/x/tools/go/types/objectpath
 golang.org/x/tools/go/types/typeutil
 golang.org/x/tools/internal/analysisinternal
-# golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
+# golang.org/x/xerrors v0.0.0-20200806184451-1a77d5e9f316
+## explicit
 golang.org/x/xerrors
 golang.org/x/xerrors/internal
diff --git a/src/crypto/ed25519/ed25519.go b/src/crypto/ed25519/ed25519.go
index 5766970..6f59bb5 100644
--- a/src/crypto/ed25519/ed25519.go
+++ b/src/crypto/ed25519/ed25519.go
@@ -154,7 +154,7 @@
 	return signature
 }
 
-func signGeneric(signature, privateKey, message []byte) {
+func sign(signature, privateKey, message []byte) {
 	if l := len(privateKey); l != PrivateKeySize {
 		panic("ed25519: bad private key length: " + strconv.Itoa(l))
 	}
@@ -201,10 +201,6 @@
 // Verify reports whether sig is a valid signature of message by publicKey. It
 // will panic if len(publicKey) is not PublicKeySize.
 func Verify(publicKey PublicKey, message, sig []byte) bool {
-	return verify(publicKey, message, sig)
-}
-
-func verifyGeneric(publicKey PublicKey, message, sig []byte) bool {
 	if l := len(publicKey); l != PublicKeySize {
 		panic("ed25519: bad public key length: " + strconv.Itoa(l))
 	}
diff --git a/src/crypto/ed25519/ed25519_noasm.go b/src/crypto/ed25519/ed25519_noasm.go
deleted file mode 100644
index caa84f7..0000000
--- a/src/crypto/ed25519/ed25519_noasm.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !s390x
-
-package ed25519
-
-func sign(signature, privateKey, message []byte) {
-	signGeneric(signature, privateKey, message)
-}
-
-func verify(publicKey PublicKey, message, sig []byte) bool {
-	return verifyGeneric(publicKey, message, sig)
-}
diff --git a/src/crypto/ed25519/ed25519_s390x.go b/src/crypto/ed25519/ed25519_s390x.go
deleted file mode 100644
index c8627a0..0000000
--- a/src/crypto/ed25519/ed25519_s390x.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package ed25519
-
-import (
-	"internal/cpu"
-	"strconv"
-)
-
-//go:noescape
-func kdsaSign(message, signature, privateKey []byte) bool
-
-//go:noescape
-func kdsaVerify(message, signature, publicKey []byte) bool
-
-// sign does a check to see if hardware has Edwards Curve instruction available.
-// If it does, use the hardware implementation. Otherwise, use the generic version.
-func sign(signature, privateKey, message []byte) {
-	if cpu.S390X.HasEDDSA {
-		if l := len(privateKey); l != PrivateKeySize {
-			panic("ed25519: bad private key length: " + strconv.Itoa(l))
-		}
-
-		ret := kdsaSign(message, signature, privateKey[:32])
-		if !ret {
-			panic("ed25519: kdsa sign has a failure")
-		}
-		return
-	}
-	signGeneric(signature, privateKey, message)
-}
-
-// verify does a check to see if hardware has Edwards Curve instruction available.
-// If it does, use the hardware implementation for eddsa verfication. Otherwise, the generic
-// version is used
-func verify(publicKey PublicKey, message, sig []byte) bool {
-	if cpu.S390X.HasEDDSA {
-		if l := len(publicKey); l != PublicKeySize {
-			panic("ed25519: bad public key length: " + strconv.Itoa(l))
-		}
-
-		if len(sig) != SignatureSize || sig[63]&224 != 0 {
-			return false
-		}
-
-		return kdsaVerify(message, sig, publicKey)
-	}
-	return verifyGeneric(publicKey, message, sig)
-}
diff --git a/src/crypto/ed25519/ed25519_s390x.s b/src/crypto/ed25519/ed25519_s390x.s
deleted file mode 100644
index 1c77b51..0000000
--- a/src/crypto/ed25519/ed25519_s390x.s
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// func kdsaSign(message, signature, privateKey []byte) bool
-TEXT ·kdsaSign(SB), $4096-73
-	// The kdsa instruction takes function code,
-	// buffer's location, message's location and message len
-	// as parameters. Out of those, the function code and buffer's location
-	// should be placed in R0 and R1 respectively. The message's location
-	// and message length should be placed in an even-odd register pair. (e.g: R2 and R3)
-
-	// The content of parameter block(buffer) looks like the following:
-	// Signature R, Signature S and Private Key all take 32 bytes.
-	// In the signing case, the signatures(R and S) will be generated by
-	// the signing instruction and get placed in the locations shown in the parameter block.
-	//    0 +---------------+
-	//      |  Signature(R) |
-	//   32 +---------------+
-	//      |  Signature(S) |
-	//   64 +---------------+
-	//      |  Private Key  |
-	//   96 +---------------+
-	//      |   Reserved    |
-	//   112+---------------+
-	//      |               |
-	//      |     ...       |
-	//      |               |
-	// 4088 +---------------+
-
-	// The following code section setups the buffer from stack:
-	// Get the address of the buffer stack variable.
-	MOVD $buffer-4096(SP), R1
-
-	// Zero the buffer.
-	MOVD R1, R2
-	MOVD $(4096/256), R0 // number of 256 byte chunks to clear
-
-clear:
-	XC    $256, (R2), (R2)
-	MOVD  $256(R2), R2
-	BRCTG R0, clear
-
-	MOVD $40, R0                   // EDDSA-25519 sign has a function code of 40
-	LMG  message+0(FP), R2, R3     // R2=base R3=len
-	LMG  signature+24(FP), R4, R5  // R4=base R5=len
-	LMG  privateKey+48(FP), R6, R7 // R6=base R7=len
-
-	// Checks the length of signature and private key
-	CMPBNE R5, $64, panic
-	CMPBNE R7, $32, panic
-
-	// The instruction uses RFC 8032's private key, which is the first 32 bytes
-	// of the private key in this package. So we copy that into the buffer.
-	MVC $32, (R6), 64(R1)
-
-loop:
-	WORD $0xB93A0002 // The KDSA instruction
-	BVS  loop        // The instruction is exectued by hardware and can be interrupted. This does a retry when that happens.
-	BNE  error
-
-success:
-	// The signatures generated are in big-endian form, so we
-	// need to reverse the bytes of Signature(R) and Signature(S) in the buffers to transform
-	// them from big-endian to little-endian.
-
-	// Transform Signature(R) from big endian to little endian and copy into the signature
-	MVCIN $32, 31(R1), (R4)
-
-	// Transform Signature(S) from big endian to little endian and copy into the signature
-	MVCIN $32, 63(R1), 32(R4)
-
-	MOVB $1, ret+72(FP)
-	RET
-
-error:
-	// return false
-	MOVB $0, ret+72(FP)
-	RET
-
-panic:
-	UNDEF
-
-// func kdsaVerify(message, signature, publicKey []byte) bool
-TEXT ·kdsaVerify(SB), $4096-73
-	// The kdsa instruction takes function code,
-	// buffer's location, message's location and message len
-	// as parameters. Out of those, the function code and buffer's location
-	// should be placed in R0 and R1 respectively. The message's location
-	// and message length should be placed in an even-odd register pair. (e.g: R2 and R3)
-
-	// The parameter block(buffer) is similar to that of signing, except that
-	// we use public key for verification, and Signatures(R and S) are provided
-	// as input parameters to the parameter block.
-	//    0 +---------------+
-	//      |  Signature(R) |
-	//   32 +---------------+
-	//      |  Signature(S) |
-	//   64 +---------------+
-	//      |  Public Key   |
-	//   96 +---------------+
-	//      |   Reserved    |
-	//   112+---------------+
-	//      |               |
-	//      |     ...       |
-	//      |               |
-	// 4088 +---------------+
-
-	// The following code section setups the buffer from stack:
-	// Get the address of the buffer stack variable.
-	MOVD $buffer-4096(SP), R1
-
-	// Zero the buffer.
-	MOVD R1, R2
-	MOVD $(4096/256), R0 // number of 256 byte chunks to clear
-
-clear:
-	XC    $256, (R2), (R2)
-	MOVD  $256(R2), R2
-	BRCTG R0, clear
-
-	MOVD $32, R0                  // EDDSA-25519 verify has a function code of 32
-	LMG  message+0(FP), R2, R3    // R2=base R3=len
-	LMG  signature+24(FP), R4, R5 // R4=base R5=len
-	LMG  publicKey+48(FP), R6, R7 // R6=base R7=len
-
-	// Checks the length of public key and signature
-	CMPBNE R5, $64, panic
-	CMPBNE R7, $32, panic
-
-verify:
-	// The instruction needs Signature(R), Signature(S) and public key
-	// to be in big-endian form during computation. Therefore,
-	// we do the transformation (from little endian to big endian) and copy those into the buffer.
-
-	// Transform Signature(R) from little endian to big endian and copy into the buffer
-	MVCIN $32, 31(R4), (R1)
-
-	// Transform Signature(S) from little endian to big endian and copy into the buffer
-	MVCIN $32, 63(R4), 32(R1)
-
-	// Transform Public Key from little endian to big endian and copy into the buffer
-	MVCIN $32, 31(R6), 64(R1)
-
-verifyLoop:
-	WORD $0xB93A0002 // KDSA instruction
-	BVS  verifyLoop  // Retry upon hardware interrupt
-	BNE  error
-
-success:
-	MOVB $1, ret+72(FP)
-	RET
-
-error:
-	MOVB $0, ret+72(FP)
-	RET
-
-panic:
-	UNDEF
diff --git a/src/crypto/ed25519/ed25519_test.go b/src/crypto/ed25519/ed25519_test.go
index f77d463..adb09e4 100644
--- a/src/crypto/ed25519/ed25519_test.go
+++ b/src/crypto/ed25519/ed25519_test.go
@@ -26,14 +26,6 @@
 	return len(buf), nil
 }
 
-// signGenericWrapper is identical to Sign except that it unconditionally calls signGeneric directly
-// rather than going through the sign function that might call assembly code.
-func signGenericWrapper(privateKey PrivateKey, msg []byte) []byte {
-	sig := make([]byte, SignatureSize)
-	signGeneric(sig, privateKey, msg)
-	return sig
-}
-
 func TestUnmarshalMarshal(t *testing.T) {
 	pub, _, _ := GenerateKey(rand.Reader)
 
@@ -53,33 +45,22 @@
 }
 
 func TestSignVerify(t *testing.T) {
-	t.Run("Generic", func(t *testing.T) { testSignVerify(t, signGenericWrapper, verifyGeneric) })
-	t.Run("Native", func(t *testing.T) { testSignVerify(t, Sign, Verify) })
-}
-
-func testSignVerify(t *testing.T, signImpl func(privateKey PrivateKey, message []byte) []byte,
-	verifyImpl func(publicKey PublicKey, message, sig []byte) bool) {
 	var zero zeroReader
 	public, private, _ := GenerateKey(zero)
 
 	message := []byte("test message")
-	sig := signImpl(private, message)
-	if !verifyImpl(public, message, sig) {
+	sig := Sign(private, message)
+	if !Verify(public, message, sig) {
 		t.Errorf("valid signature rejected")
 	}
 
 	wrongMessage := []byte("wrong message")
-	if verifyImpl(public, wrongMessage, sig) {
+	if Verify(public, wrongMessage, sig) {
 		t.Errorf("signature of different message accepted")
 	}
 }
 
 func TestCryptoSigner(t *testing.T) {
-	t.Run("Generic", func(t *testing.T) { testCryptoSigner(t, verifyGeneric) })
-	t.Run("Native", func(t *testing.T) { testCryptoSigner(t, Verify) })
-}
-
-func testCryptoSigner(t *testing.T, verifyImpl func(publicKey PublicKey, message, sig []byte) bool) {
 	var zero zeroReader
 	public, private, _ := GenerateKey(zero)
 
@@ -102,7 +83,7 @@
 		t.Fatalf("error from Sign(): %s", err)
 	}
 
-	if !verifyImpl(public, message, signature) {
+	if !Verify(public, message, signature) {
 		t.Errorf("Verify failed on signature from Sign()")
 	}
 }
@@ -130,12 +111,6 @@
 }
 
 func TestGolden(t *testing.T) {
-	t.Run("Generic", func(t *testing.T) { testGolden(t, signGenericWrapper, verifyGeneric) })
-	t.Run("Native", func(t *testing.T) { testGolden(t, Sign, Verify) })
-}
-
-func testGolden(t *testing.T, signImpl func(privateKey PrivateKey, message []byte) []byte,
-	verifyImpl func(publicKey PublicKey, message, sig []byte) bool) {
 	// sign.input.gz is a selection of test cases from
 	// https://ed25519.cr.yp.to/python/sign.input
 	testDataZ, err := os.Open("testdata/sign.input.gz")
@@ -177,12 +152,12 @@
 		copy(priv[:], privBytes)
 		copy(priv[32:], pubKey)
 
-		sig2 := signImpl(priv[:], msg)
+		sig2 := Sign(priv[:], msg)
 		if !bytes.Equal(sig, sig2[:]) {
 			t.Errorf("different signature result on line %d: %x vs %x", lineNo, sig, sig2)
 		}
 
-		if !verifyImpl(pubKey, msg, sig2) {
+		if !Verify(pubKey, msg, sig2) {
 			t.Errorf("signature failed to verify on line %d", lineNo)
 		}
 
@@ -206,11 +181,6 @@
 }
 
 func TestMalleability(t *testing.T) {
-	t.Run("Generic", func(t *testing.T) { testMalleability(t, verifyGeneric) })
-	t.Run("Native", func(t *testing.T) { testMalleability(t, Verify) })
-}
-
-func testMalleability(t *testing.T, verifyImpl func(publicKey PublicKey, message, sig []byte) bool) {
 	// https://tools.ietf.org/html/rfc8032#section-5.1.7 adds an additional test
 	// that s be in [0, order). This prevents someone from adding a multiple of
 	// order to s and obtaining a second valid signature for the same message.
@@ -229,7 +199,7 @@
 		0xb1, 0x08, 0xc3, 0xbd, 0xae, 0x36, 0x9e, 0xf5, 0x49, 0xfa,
 	}
 
-	if verifyImpl(publicKey, msg, sig) {
+	if Verify(publicKey, msg, sig) {
 		t.Fatal("non-canonical signature accepted")
 	}
 }
diff --git a/src/encoding/binary/varint.go b/src/encoding/binary/varint.go
index bcb8ac9..38af610 100644
--- a/src/encoding/binary/varint.go
+++ b/src/encoding/binary/varint.go
@@ -106,13 +106,13 @@
 func ReadUvarint(r io.ByteReader) (uint64, error) {
 	var x uint64
 	var s uint
-	for i := 0; ; i++ {
+	for i := 0; i < MaxVarintLen64; i++ {
 		b, err := r.ReadByte()
 		if err != nil {
 			return x, err
 		}
 		if b < 0x80 {
-			if i > 9 || i == 9 && b > 1 {
+			if i == 9 && b > 1 {
 				return x, overflow
 			}
 			return x | uint64(b)<<s, nil
@@ -120,6 +120,7 @@
 		x |= uint64(b&0x7f) << s
 		s += 7
 	}
+	return x, overflow
 }
 
 // ReadVarint reads an encoded signed integer from r and returns it as an int64.
diff --git a/src/encoding/binary/varint_test.go b/src/encoding/binary/varint_test.go
index ca411ec..6ef4c99 100644
--- a/src/encoding/binary/varint_test.go
+++ b/src/encoding/binary/varint_test.go
@@ -121,21 +121,27 @@
 	}
 }
 
-func testOverflow(t *testing.T, buf []byte, n0 int, err0 error) {
+func testOverflow(t *testing.T, buf []byte, x0 uint64, n0 int, err0 error) {
 	x, n := Uvarint(buf)
 	if x != 0 || n != n0 {
 		t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
 	}
 
-	x, err := ReadUvarint(bytes.NewReader(buf))
-	if x != 0 || err != err0 {
-		t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0)
+	r := bytes.NewReader(buf)
+	len := r.Len()
+	x, err := ReadUvarint(r)
+	if x != x0 || err != err0 {
+		t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want %d, %s", buf, x, err, x0, err0)
+	}
+	if read := len - r.Len(); read > MaxVarintLen64 {
+		t.Errorf("ReadUvarint(%v): read more than MaxVarintLen64 bytes, got %d", buf, read)
 	}
 }
 
 func TestOverflow(t *testing.T) {
-	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow)
-	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow)
+	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, 0, -10, overflow)
+	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, 0, -13, overflow)
+	testOverflow(t, []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 1<<64-1, 0, overflow) // 11 bytes, should overflow
 }
 
 func TestNonCanonicalZero(t *testing.T) {
diff --git a/src/net/http/transport.go b/src/net/http/transport.go
index a41e732..d37b52b 100644
--- a/src/net/http/transport.go
+++ b/src/net/http/transport.go
@@ -100,7 +100,7 @@
 	idleLRU      connLRU
 
 	reqMu       sync.Mutex
-	reqCanceler map[*Request]func(error)
+	reqCanceler map[cancelKey]func(error)
 
 	altMu    sync.Mutex   // guards changing altProto only
 	altProto atomic.Value // of nil or map[string]RoundTripper, key is URI scheme
@@ -273,6 +273,13 @@
 	ForceAttemptHTTP2 bool
 }
 
+// A cancelKey is the key of the reqCanceler map.
+// We wrap the *Request in this type since we want to use the original request,
+// not any transient one created by roundTrip.
+type cancelKey struct {
+	req *Request
+}
+
 func (t *Transport) writeBufferSize() int {
 	if t.WriteBufferSize > 0 {
 		return t.WriteBufferSize
@@ -433,9 +440,10 @@
 // optional extra headers to write and stores any error to return
 // from roundTrip.
 type transportRequest struct {
-	*Request                        // original request, not to be mutated
-	extra    Header                 // extra headers to write, or nil
-	trace    *httptrace.ClientTrace // optional
+	*Request                         // original request, not to be mutated
+	extra     Header                 // extra headers to write, or nil
+	trace     *httptrace.ClientTrace // optional
+	cancelKey cancelKey
 
 	mu  sync.Mutex // guards err
 	err error      // first setError value for mapRoundTripError to consider
@@ -512,6 +520,7 @@
 	}
 
 	origReq := req
+	cancelKey := cancelKey{origReq}
 	req = setupRewindBody(req)
 
 	if altRT := t.alternateRoundTripper(req); altRT != nil {
@@ -546,7 +555,7 @@
 		}
 
 		// treq gets modified by roundTrip, so we need to recreate for each retry.
-		treq := &transportRequest{Request: req, trace: trace}
+		treq := &transportRequest{Request: req, trace: trace, cancelKey: cancelKey}
 		cm, err := t.connectMethodForRequest(treq)
 		if err != nil {
 			req.closeBody()
@@ -559,7 +568,7 @@
 		// to send it requests.
 		pconn, err := t.getConn(treq, cm)
 		if err != nil {
-			t.setReqCanceler(req, nil)
+			t.setReqCanceler(cancelKey, nil)
 			req.closeBody()
 			return nil, err
 		}
@@ -567,7 +576,7 @@
 		var resp *Response
 		if pconn.alt != nil {
 			// HTTP/2 path.
-			t.setReqCanceler(req, nil) // not cancelable with CancelRequest
+			t.setReqCanceler(cancelKey, nil) // not cancelable with CancelRequest
 			resp, err = pconn.alt.RoundTrip(req)
 		} else {
 			resp, err = pconn.roundTrip(treq)
@@ -753,14 +762,14 @@
 // cancelable context instead. CancelRequest cannot cancel HTTP/2
 // requests.
 func (t *Transport) CancelRequest(req *Request) {
-	t.cancelRequest(req, errRequestCanceled)
+	t.cancelRequest(cancelKey{req}, errRequestCanceled)
 }
 
 // Cancel an in-flight request, recording the error value.
-func (t *Transport) cancelRequest(req *Request, err error) {
+func (t *Transport) cancelRequest(key cancelKey, err error) {
 	t.reqMu.Lock()
-	cancel := t.reqCanceler[req]
-	delete(t.reqCanceler, req)
+	cancel := t.reqCanceler[key]
+	delete(t.reqCanceler, key)
 	t.reqMu.Unlock()
 	if cancel != nil {
 		cancel(err)
@@ -1093,16 +1102,16 @@
 	return removed
 }
 
-func (t *Transport) setReqCanceler(r *Request, fn func(error)) {
+func (t *Transport) setReqCanceler(key cancelKey, fn func(error)) {
 	t.reqMu.Lock()
 	defer t.reqMu.Unlock()
 	if t.reqCanceler == nil {
-		t.reqCanceler = make(map[*Request]func(error))
+		t.reqCanceler = make(map[cancelKey]func(error))
 	}
 	if fn != nil {
-		t.reqCanceler[r] = fn
+		t.reqCanceler[key] = fn
 	} else {
-		delete(t.reqCanceler, r)
+		delete(t.reqCanceler, key)
 	}
 }
 
@@ -1110,17 +1119,17 @@
 // for the request, we don't set the function and return false.
 // Since CancelRequest will clear the canceler, we can use the return value to detect if
 // the request was canceled since the last setReqCancel call.
-func (t *Transport) replaceReqCanceler(r *Request, fn func(error)) bool {
+func (t *Transport) replaceReqCanceler(key cancelKey, fn func(error)) bool {
 	t.reqMu.Lock()
 	defer t.reqMu.Unlock()
-	_, ok := t.reqCanceler[r]
+	_, ok := t.reqCanceler[key]
 	if !ok {
 		return false
 	}
 	if fn != nil {
-		t.reqCanceler[r] = fn
+		t.reqCanceler[key] = fn
 	} else {
-		delete(t.reqCanceler, r)
+		delete(t.reqCanceler, key)
 	}
 	return true
 }
@@ -1324,12 +1333,12 @@
 		// set request canceler to some non-nil function so we
 		// can detect whether it was cleared between now and when
 		// we enter roundTrip
-		t.setReqCanceler(req, func(error) {})
+		t.setReqCanceler(treq.cancelKey, func(error) {})
 		return pc, nil
 	}
 
 	cancelc := make(chan error, 1)
-	t.setReqCanceler(req, func(err error) { cancelc <- err })
+	t.setReqCanceler(treq.cancelKey, func(err error) { cancelc <- err })
 
 	// Queue for permission to dial.
 	t.queueForDial(w)
@@ -2078,7 +2087,7 @@
 		}
 
 		if !hasBody || bodyWritable {
-			pc.t.setReqCanceler(rc.req, nil)
+			pc.t.setReqCanceler(rc.cancelKey, nil)
 
 			// Put the idle conn back into the pool before we send the response
 			// so if they process it quickly and make another request, they'll
@@ -2151,7 +2160,7 @@
 		// reading the response body. (or for cancellation or death)
 		select {
 		case bodyEOF := <-waitForBodyRead:
-			pc.t.setReqCanceler(rc.req, nil) // before pc might return to idle pool
+			pc.t.setReqCanceler(rc.cancelKey, nil) // before pc might return to idle pool
 			alive = alive &&
 				bodyEOF &&
 				!pc.sawEOF &&
@@ -2165,7 +2174,7 @@
 			pc.t.CancelRequest(rc.req)
 		case <-rc.req.Context().Done():
 			alive = false
-			pc.t.cancelRequest(rc.req, rc.req.Context().Err())
+			pc.t.cancelRequest(rc.cancelKey, rc.req.Context().Err())
 		case <-pc.closech:
 			alive = false
 		}
@@ -2408,9 +2417,10 @@
 }
 
 type requestAndChan struct {
-	_   incomparable
-	req *Request
-	ch  chan responseAndError // unbuffered; always send in select on callerGone
+	_         incomparable
+	req       *Request
+	cancelKey cancelKey
+	ch        chan responseAndError // unbuffered; always send in select on callerGone
 
 	// whether the Transport (as opposed to the user client code)
 	// added the Accept-Encoding gzip header. If the Transport
@@ -2472,7 +2482,7 @@
 
 func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err error) {
 	testHookEnterRoundTrip()
-	if !pc.t.replaceReqCanceler(req.Request, pc.cancelRequest) {
+	if !pc.t.replaceReqCanceler(req.cancelKey, pc.cancelRequest) {
 		pc.t.putOrCloseIdleConn(pc)
 		return nil, errRequestCanceled
 	}
@@ -2524,7 +2534,7 @@
 
 	defer func() {
 		if err != nil {
-			pc.t.setReqCanceler(req.Request, nil)
+			pc.t.setReqCanceler(req.cancelKey, nil)
 		}
 	}()
 
@@ -2540,6 +2550,7 @@
 	resc := make(chan responseAndError)
 	pc.reqch <- requestAndChan{
 		req:        req.Request,
+		cancelKey:  req.cancelKey,
 		ch:         resc,
 		addedGzip:  requestedGzip,
 		continueCh: continueCh,
@@ -2591,10 +2602,10 @@
 			}
 			return re.res, nil
 		case <-cancelChan:
-			pc.t.CancelRequest(req.Request)
+			pc.t.cancelRequest(req.cancelKey, errRequestCanceled)
 			cancelChan = nil
 		case <-ctxDoneChan:
-			pc.t.cancelRequest(req.Request, req.Context().Err())
+			pc.t.cancelRequest(req.cancelKey, req.Context().Err())
 			cancelChan = nil
 			ctxDoneChan = nil
 		}
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index 31a41f5..0a47687 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -2364,6 +2364,50 @@
 	}
 }
 
+func testTransportCancelRequestInDo(t *testing.T, body io.Reader) {
+	setParallel(t)
+	defer afterTest(t)
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+	unblockc := make(chan bool)
+	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+		<-unblockc
+	}))
+	defer ts.Close()
+	defer close(unblockc)
+
+	c := ts.Client()
+	tr := c.Transport.(*Transport)
+
+	donec := make(chan bool)
+	req, _ := NewRequest("GET", ts.URL, body)
+	go func() {
+		defer close(donec)
+		c.Do(req)
+	}()
+	start := time.Now()
+	timeout := 10 * time.Second
+	for time.Since(start) < timeout {
+		time.Sleep(100 * time.Millisecond)
+		tr.CancelRequest(req)
+		select {
+		case <-donec:
+			return
+		default:
+		}
+	}
+	t.Errorf("Do of canceled request has not returned after %v", timeout)
+}
+
+func TestTransportCancelRequestInDo(t *testing.T) {
+	testTransportCancelRequestInDo(t, nil)
+}
+
+func TestTransportCancelRequestWithBodyInDo(t *testing.T) {
+	testTransportCancelRequestInDo(t, bytes.NewBuffer([]byte{0}))
+}
+
 func TestTransportCancelRequestInDial(t *testing.T) {
 	defer afterTest(t)
 	if testing.Short() {
diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go
index 891589c..425ca8d 100644
--- a/src/runtime/lockrank_off.go
+++ b/src/runtime/lockrank_off.go
@@ -1,3 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 // +build !goexperiment.staticlockranking
 
 package runtime
diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go
index cf4151f..fbc5ff5 100644
--- a/src/runtime/lockrank_on.go
+++ b/src/runtime/lockrank_on.go
@@ -1,3 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 // +build goexperiment.staticlockranking
 
 package runtime
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 60f7f9f..8b3c62c 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -233,16 +233,12 @@
 
 	// The address to start an allocation search with. It must never
 	// point to any memory that is not contained in inUse, i.e.
-	// inUse.contains(searchAddr) must always be true.
+	// inUse.contains(searchAddr.addr()) must always be true. The one
+	// exception to this rule is that it may take on the value of
+	// maxOffAddr to indicate that the heap is exhausted.
 	//
-	// When added with arenaBaseOffset, we guarantee that
-	// all valid heap addresses (when also added with
-	// arenaBaseOffset) below this value are allocated and
-	// not worth searching.
-	//
-	// Note that adding in arenaBaseOffset transforms addresses
-	// to a new address space with a linear view of the full address
-	// space on architectures with segmented address spaces.
+	// We guarantee that all valid heap addresses below this value
+	// are allocated and not worth searching.
 	searchAddr offAddr
 
 	// start and end represent the chunk indices
@@ -518,6 +514,30 @@
 	return uintptr(scav) * pageSize
 }
 
+// findMappedAddr returns the smallest mapped offAddr that is
+// >= addr. That is, if addr refers to mapped memory, then it is
+// returned. If addr is higher than any mapped region, then
+// it returns maxOffAddr.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+	// If we're not in a test, validate first by checking mheap_.arenas.
+	// This is a fast path which is only safe to use outside of testing.
+	ai := arenaIndex(addr.addr())
+	if s.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil {
+		vAddr, ok := s.inUse.findAddrGreaterEqual(addr.addr())
+		if ok {
+			return offAddr{vAddr}
+		} else {
+			// The candidate search address is greater than any
+			// known address, which means we definitely have no
+			// free memory left.
+			return maxOffAddr
+		}
+	}
+	return addr
+}
+
 // find searches for the first (address-ordered) contiguous free region of
 // npages in size and returns a base address for that region.
 //
@@ -526,6 +546,7 @@
 //
 // find also computes and returns a candidate s.searchAddr, which may or
 // may not prune more of the address space than s.searchAddr already does.
+// This candidate is always a valid s.searchAddr.
 //
 // find represents the slow path and the full radix tree search.
 //
@@ -695,7 +716,7 @@
 			// We found a sufficiently large run of free pages straddling
 			// some boundary, so compute the address and return it.
 			addr := levelIndexToOffAddr(l, i).add(uintptr(base) * pageSize).addr()
-			return addr, firstFree.base
+			return addr, s.findMappedAddr(firstFree.base)
 		}
 		if l == 0 {
 			// We're at level zero, so that means we've exhausted our search.
@@ -741,7 +762,7 @@
 	// found an even narrower free window.
 	searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
 	foundFree(offAddr{searchAddr}, chunkBase(ci+1)-searchAddr)
-	return addr, firstFree.base
+	return addr, s.findMappedAddr(firstFree.base)
 }
 
 // alloc allocates npages worth of memory from the page heap, returning the base
diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go
index 89a4a25..65ba71d 100644
--- a/src/runtime/mpagealloc_test.go
+++ b/src/runtime/mpagealloc_test.go
@@ -612,6 +612,63 @@
 				baseChunkIdx + chunkIdxBigJump:     {{0, PallocChunkPages}},
 			},
 		}
+
+		// Test to check for issue #40191. Essentially, the candidate searchAddr
+		// discovered by find may not point to mapped memory, so we need to handle
+		// that explicitly.
+		//
+		// chunkIdxSmallOffset is an offset intended to be used within chunkIdxBigJump.
+		// It is far enough within chunkIdxBigJump that the summaries at the beginning
+		// of an address range the size of chunkIdxBigJump will not be mapped in.
+		const chunkIdxSmallOffset = 0x503
+		tests["DiscontiguousBadSearchAddr"] = test{
+			before: map[ChunkIdx][]BitRange{
+				// The mechanism for the bug involves three chunks, A, B, and C, which are
+				// far apart in the address space. In particular, B is chunkIdxBigJump +
+				// chunkIdxSmalloffset chunks away from B, and C is 2*chunkIdxBigJump chunks
+				// away from A. A has 1 page free, B has several (NOT at the end of B), and
+				// C is totally free.
+				// Note that B's free memory must not be at the end of B because the fast
+				// path in the page allocator will check if the searchAddr even gives us
+				// enough space to place the allocation in a chunk before accessing the
+				// summary.
+				BaseChunkIdx + chunkIdxBigJump*0: {{0, PallocChunkPages - 1}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {
+					{0, PallocChunkPages - 10},
+					{PallocChunkPages - 1, 1},
+				},
+				BaseChunkIdx + chunkIdxBigJump*2: {},
+			},
+			scav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {},
+			},
+			hits: []hit{
+				// We first allocate into A to set the page allocator's searchAddr to the
+				// end of that chunk. That is the only purpose A serves.
+				{1, PageBase(BaseChunkIdx, PallocChunkPages-1), 0},
+				// Then, we make a big allocation that doesn't fit into B, and so must be
+				// fulfilled by C.
+				//
+				// On the way to fulfilling the allocation into C, we estimate searchAddr
+				// using the summary structure, but that will give us a searchAddr of
+				// B's base address minus chunkIdxSmallOffset chunks. These chunks will
+				// not be mapped.
+				{100, PageBase(baseChunkIdx+chunkIdxBigJump*2, 0), 0},
+				// Now we try to make a smaller allocation that can be fulfilled by B.
+				// In an older implementation of the page allocator, this will segfault,
+				// because this last allocation will first try to access the summary
+				// for B's base address minus chunkIdxSmallOffset chunks in the fast path,
+				// and this will not be mapped.
+				{9, PageBase(baseChunkIdx+chunkIdxBigJump*1+chunkIdxSmallOffset, PallocChunkPages-10), 0},
+			},
+			after: map[ChunkIdx][]BitRange{
+				BaseChunkIdx + chunkIdxBigJump*0:                       {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {{0, PallocChunkPages}},
+				BaseChunkIdx + chunkIdxBigJump*2:                       {{0, 100}},
+			},
+		}
 	}
 	for name, v := range tests {
 		v := v
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index e23d077..2c0eb2c 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -188,6 +188,25 @@
 	return len(a.ranges)
 }
 
+// findAddrGreaterEqual returns the smallest address represented by a
+// that is >= addr. Thus, if the address is represented by a,
+// then it returns addr. The second return value indicates whether
+// such an address exists for addr in a. That is, if addr is larger than
+// any address known to a, the second return value will be false.
+func (a *addrRanges) findAddrGreaterEqual(addr uintptr) (uintptr, bool) {
+	i := a.findSucc(addr)
+	if i == 0 {
+		return a.ranges[0].base.addr(), true
+	}
+	if a.ranges[i-1].contains(addr) {
+		return addr, true
+	}
+	if i < len(a.ranges) {
+		return a.ranges[i].base.addr(), true
+	}
+	return 0, false
+}
+
 // contains returns true if a covers the address addr.
 func (a *addrRanges) contains(addr uintptr) bool {
 	i := a.findSucc(addr)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 2399f0a..0358222 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -132,7 +132,7 @@
 
 	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
 		systemstack(func() {
-			newm(sysmon, nil)
+			newm(sysmon, nil, -1)
 		})
 	}
 
@@ -562,7 +562,7 @@
 	stackinit()
 	mallocinit()
 	fastrandinit() // must run before mcommoninit
-	mcommoninit(_g_.m)
+	mcommoninit(_g_.m, -1)
 	cpuinit()       // must run before alginit
 	alginit()       // maps must not be used before this call
 	modulesinit()   // provides activeModules
@@ -623,7 +623,22 @@
 	}
 }
 
-func mcommoninit(mp *m) {
+// mReserveID returns the next ID to use for a new m. This new m is immediately
+// considered 'running' by checkdead.
+//
+// sched.lock must be held.
+func mReserveID() int64 {
+	if sched.mnext+1 < sched.mnext {
+		throw("runtime: thread ID overflow")
+	}
+	id := sched.mnext
+	sched.mnext++
+	checkmcount()
+	return id
+}
+
+// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
+func mcommoninit(mp *m, id int64) {
 	_g_ := getg()
 
 	// g0 stack won't make sense for user (and is not necessary unwindable).
@@ -632,12 +647,12 @@
 	}
 
 	lock(&sched.lock)
-	if sched.mnext+1 < sched.mnext {
-		throw("runtime: thread ID overflow")
+
+	if id >= 0 {
+		mp.id = id
+	} else {
+		mp.id = mReserveID()
 	}
-	mp.id = sched.mnext
-	sched.mnext++
-	checkmcount()
 
 	mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
 	mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
@@ -1068,7 +1083,7 @@
 			notewakeup(&mp.park)
 		} else {
 			// Start M to run P.  Do not start another M below.
-			newm(nil, p)
+			newm(nil, p, -1)
 		}
 	}
 
@@ -1413,12 +1428,13 @@
 // Allocate a new m unassociated with any thread.
 // Can use p for allocation context if needed.
 // fn is recorded as the new m's m.mstartfn.
+// id is optional pre-allocated m ID. Omit by passing -1.
 //
 // This function is allowed to have write barriers even if the caller
 // isn't because it borrows _p_.
 //
 //go:yeswritebarrierrec
-func allocm(_p_ *p, fn func()) *m {
+func allocm(_p_ *p, fn func(), id int64) *m {
 	_g_ := getg()
 	acquirem() // disable GC because it can be called from sysmon
 	if _g_.m.p == 0 {
@@ -1447,7 +1463,7 @@
 
 	mp := new(m)
 	mp.mstartfn = fn
-	mcommoninit(mp)
+	mcommoninit(mp, id)
 
 	// In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack.
 	// Windows and Plan 9 will layout sched stack on OS stack.
@@ -1586,7 +1602,7 @@
 	// The sched.pc will never be returned to, but setting it to
 	// goexit makes clear to the traceback routines where
 	// the goroutine stack ends.
-	mp := allocm(nil, nil)
+	mp := allocm(nil, nil, -1)
 	gp := malg(4096)
 	gp.sched.pc = funcPC(goexit) + sys.PCQuantum
 	gp.sched.sp = gp.stack.hi
@@ -1757,9 +1773,11 @@
 // Create a new m. It will start off with a call to fn, or else the scheduler.
 // fn needs to be static and not a heap allocated closure.
 // May run with m.p==nil, so write barriers are not allowed.
+//
+// id is optional pre-allocated m ID. Omit by passing -1.
 //go:nowritebarrierrec
-func newm(fn func(), _p_ *p) {
-	mp := allocm(_p_, fn)
+func newm(fn func(), _p_ *p, id int64) {
+	mp := allocm(_p_, fn, id)
 	mp.nextp.set(_p_)
 	mp.sigmask = initSigmask
 	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
@@ -1828,7 +1846,7 @@
 		releasem(mp)
 		return
 	}
-	newm(templateThread, nil)
+	newm(templateThread, nil, -1)
 	releasem(mp)
 }
 
@@ -1923,16 +1941,31 @@
 		}
 	}
 	mp := mget()
-	unlock(&sched.lock)
 	if mp == nil {
+		// No M is available, we must drop sched.lock and call newm.
+		// However, we already own a P to assign to the M.
+		//
+		// Once sched.lock is released, another G (e.g., in a syscall),
+		// could find no idle P while checkdead finds a runnable G but
+		// no running M's because this new M hasn't started yet, thus
+		// throwing in an apparent deadlock.
+		//
+		// Avoid this situation by pre-allocating the ID for the new M,
+		// thus marking it as 'running' before we drop sched.lock. This
+		// new M will eventually run the scheduler to execute any
+		// queued G's.
+		id := mReserveID()
+		unlock(&sched.lock)
+
 		var fn func()
 		if spinning {
 			// The caller incremented nmspinning, so set m.spinning in the new M.
 			fn = mspinning
 		}
-		newm(fn, _p_)
+		newm(fn, _p_, id)
 		return
 	}
+	unlock(&sched.lock)
 	if mp.spinning {
 		throw("startm: m is spinning")
 	}
@@ -5192,7 +5225,9 @@
 
 	atomic.StoreRel(&pp.runqtail, t)
 	if !q.empty() {
+		lock(&sched.lock)
 		globrunqputbatch(q, int32(qsize))
+		unlock(&sched.lock)
 	}
 }
 
diff --git a/src/runtime/race/README b/src/runtime/race/README
index 65378c8..34485f0 100644
--- a/src/runtime/race/README
+++ b/src/runtime/race/README
@@ -6,8 +6,8 @@
 
 race_darwin_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
 race_freebsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
-race_linux_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
-race_linux_ppc64le.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_linux_amd64.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf.
+race_linux_ppc64le.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf.
 race_netbsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
 race_windows_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
-race_linux_arm64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408.
+race_linux_arm64.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf.
diff --git a/src/runtime/race/race_linux_amd64.syso b/src/runtime/race/race_linux_amd64.syso
index 255b2e5..d31f85d 100644
--- a/src/runtime/race/race_linux_amd64.syso
+++ b/src/runtime/race/race_linux_amd64.syso
Binary files differ
diff --git a/src/runtime/race/race_linux_arm64.syso b/src/runtime/race/race_linux_arm64.syso
index f15c599..7c74171 100644
--- a/src/runtime/race/race_linux_arm64.syso
+++ b/src/runtime/race/race_linux_arm64.syso
Binary files differ
diff --git a/src/runtime/race/race_linux_ppc64le.syso b/src/runtime/race/race_linux_ppc64le.syso
index 2bf5029..a3c72be 100644
--- a/src/runtime/race/race_linux_ppc64le.syso
+++ b/src/runtime/race/race_linux_ppc64le.syso
Binary files differ
diff --git a/src/runtime/race/syso_test.go b/src/runtime/race/syso_test.go
new file mode 100644
index 0000000..db846c5
--- /dev/null
+++ b/src/runtime/race/syso_test.go
@@ -0,0 +1,39 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !android,!js,!ppc64le
+
+// Note: we don't run on Android or ppc64 because if there is any non-race test
+// file in this package, the OS tries to link the .syso file into the
+// test (even when we're not in race mode), which fails. I'm not sure
+// why, but easiest to just punt - as long as a single builder runs
+// this test, we're good.
+
+package race
+
+import (
+	"bytes"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+func TestIssue37485(t *testing.T) {
+	files, err := filepath.Glob("./*.syso")
+	if err != nil {
+		t.Fatalf("can't find syso files: %s", err)
+	}
+	for _, f := range files {
+		cmd := exec.Command(filepath.Join(runtime.GOROOT(), "bin", "go"), "tool", "nm", f)
+		res, err := cmd.CombinedOutput()
+		if err != nil {
+			t.Errorf("nm of %s failed: %s", f, err)
+			continue
+		}
+		if bytes.Contains(res, []byte("getauxval")) {
+			t.Errorf("%s contains getauxval", f)
+		}
+	}
+}
diff --git a/src/sync/runtime2.go b/src/sync/runtime2.go
index 931edad..f10c4e8 100644
--- a/src/sync/runtime2.go
+++ b/src/sync/runtime2.go
@@ -1,3 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 // +build !goexperiment.staticlockranking
 
 package sync
diff --git a/src/sync/runtime2_lockrank.go b/src/sync/runtime2_lockrank.go
index 5a68e90..aaa1c27 100644
--- a/src/sync/runtime2_lockrank.go
+++ b/src/sync/runtime2_lockrank.go
@@ -1,3 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 // +build goexperiment.staticlockranking
 
 package sync
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go
index 23d7343..b7351cd 100644
--- a/src/syscall/exec_linux.go
+++ b/src/syscall/exec_linux.go
@@ -465,7 +465,7 @@
 			}
 			_, _, err1 = RawSyscall(SYS_DUP3, uintptr(fd[i]), uintptr(nextfd), O_CLOEXEC)
 			if _SYS_dup != SYS_DUP3 && err1 == ENOSYS {
-				_, _, err1 = RawSyscall(_SYS_dup, uintptr(pipe), uintptr(nextfd), 0)
+				_, _, err1 = RawSyscall(_SYS_dup, uintptr(fd[i]), uintptr(nextfd), 0)
 				if err1 != 0 {
 					goto childerror
 				}
diff --git a/src/testing/testing.go b/src/testing/testing.go
index 85da6bb..061142b 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -3,7 +3,7 @@
 // license that can be found in the LICENSE file.
 
 // Package testing provides support for automated testing of Go packages.
-// It is intended to be used in concert with the ``go test'' command, which automates
+// It is intended to be used in concert with the "go test" command, which automates
 // execution of any function of the form
 //     func TestXxx(*testing.T)
 // where Xxx does not start with a lowercase letter. The function name
@@ -14,8 +14,8 @@
 // To write a new test suite, create a file whose name ends _test.go that
 // contains the TestXxx functions as described here. Put the file in the same
 // package as the one being tested. The file will be excluded from regular
-// package builds but will be included when the ``go test'' command is run.
-// For more detail, run ``go help test'' and ``go help testflag''.
+// package builds but will be included when the "go test" command is run.
+// For more detail, run "go help test" and "go help testflag".
 //
 // A simple test function looks like this:
 //
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
index cd35910..a234283 100644
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -354,3 +354,26 @@
 	}
 	return 1
 }
+
+func idxFloatOps(a []float64, b []float32, i int) (float64, float32) {
+	c := float64(7)
+	// amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c += a[i+1]
+	// amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c -= a[i+2]
+	// amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c *= a[i+3]
+	// amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c /= a[i+4]
+
+	d := float32(8)
+	// amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d += b[i+1]
+	// amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d -= b[i+2]
+	// amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d *= b[i+3]
+	// amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d /= b[i+4]
+	return c, d
+}
diff --git a/test/fixedbugs/issue40367.go b/test/fixedbugs/issue40367.go
new file mode 100644
index 0000000..0dc5ad7
--- /dev/null
+++ b/test/fixedbugs/issue40367.go
@@ -0,0 +1,41 @@
+// run
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+func case1() {
+	rates := []int32{1,2,3,4,5,6}
+	var sink [6]int
+	j := len(sink)
+	for star, _ := range rates {
+		if star+1 < 1 {
+			panic("")
+		}
+		j--
+		sink[j] = j
+	}
+}
+
+func case2() {
+	i := 0
+	var sink [3]int
+	j := len(sink)
+top:
+	j--
+	sink[j] = j
+	if i < 2 {
+		i++
+		if i < 1 {
+			return
+		}
+		goto top
+	}
+}
+
+func main() {
+	case1()
+	case2()
+}
\ No newline at end of file
diff --git a/test/prove.go b/test/prove.go
index d37021d..3c19c51 100644
--- a/test/prove.go
+++ b/test/prove.go
@@ -670,7 +670,8 @@
 	i := 0
 	if len(b) > i {
 	top:
-		println(b[i]) // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
+		// TODO: remove the todo of next line once we complete the following optimization of CL 244579
+		// println(b[i]) // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
 		i++
 		if i < len(b) {
 			goto top
@@ -720,7 +721,8 @@
 // range2 elements are larger, so they use the general form of a range loop.
 func range2(b [][32]int) {
 	for i, v := range b {
-		b[i][0] = v[0] + 1 // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
+		// TODO: remove the todo of next line once we complete the following optimization of CL 244579
+		b[i][0] = v[0] + 1 // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$"
 		if i < len(b) {    // ERROR "Proved Less64$"
 			println("x")
 		}