[dev.link] all: merge branch 'master' into dev.link

Clean merge.

Change-Id: If2280b25fedfea9aca8560301cfa54ba4990e47b
diff --git a/doc/go1.15.html b/doc/go1.15.html
index 79b18a3..ba68c65 100644
--- a/doc/go1.15.html
+++ b/doc/go1.15.html
@@ -26,7 +26,7 @@
 <h2 id="language">Changes to the language</h2>
 
 <p>
-TODO
+  There are no changes to the language.
 </p>
 
 <h2 id="ports">Ports</h2>
diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index 5a24296..6f69abf 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -151,9 +151,11 @@
 	"int %x":                                          "",
 	"int16 %d":                                        "",
 	"int16 %x":                                        "",
+	"int32 %#x":                                       "",
 	"int32 %d":                                        "",
 	"int32 %v":                                        "",
 	"int32 %x":                                        "",
+	"int64 %#x":                                       "",
 	"int64 %+d":                                       "",
 	"int64 %-10d":                                     "",
 	"int64 %.5d":                                      "",
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index b58696d..47cb422 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1252,11 +1252,11 @@
 	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
 }
 
-var eqfJumps = [2][2]gc.FloatingEQNEJump{
+var eqfJumps = [2][2]gc.IndexJump{
 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
 }
-var nefJumps = [2][2]gc.FloatingEQNEJump{
+var nefJumps = [2][2]gc.IndexJump{
 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
 }
@@ -1296,10 +1296,10 @@
 		p.To.Sym = b.Aux.(*obj.LSym)
 
 	case ssa.BlockAMD64EQF:
-		s.FPJump(b, next, &eqfJumps)
+		s.CombJump(b, next, &eqfJumps)
 
 	case ssa.BlockAMD64NEF:
-		s.FPJump(b, next, &nefJumps)
+		s.CombJump(b, next, &nefJumps)
 
 	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
 		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 06c520d..709253b 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -998,6 +998,20 @@
 	ssa.BlockARM64FGE:  {arm64.ABGE, arm64.ABLT},
 	ssa.BlockARM64FLE:  {arm64.ABLS, arm64.ABHI},
 	ssa.BlockARM64FGT:  {arm64.ABGT, arm64.ABLE},
+	ssa.BlockARM64LTnoov:{arm64.ABMI, arm64.ABPL},
+	ssa.BlockARM64GEnoov:{arm64.ABPL, arm64.ABMI},
+}
+
+// To model a 'LEnoov' ('<=' without overflow checking) branching
+var leJumps = [2][2]gc.IndexJump{
+	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
+	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
+}
+
+// To model a 'GTnoov' ('>' without overflow checking) branching
+var gtJumps = [2][2]gc.IndexJump{
+	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
+	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
 }
 
 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
@@ -1045,7 +1059,8 @@
 		ssa.BlockARM64Z, ssa.BlockARM64NZ,
 		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
 		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
-		ssa.BlockARM64FLE, ssa.BlockARM64FGT:
+		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
+		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
 		jmp := blockJump[b.Kind]
 		var p *obj.Prog
 		switch next {
@@ -1087,6 +1102,10 @@
 		p.From.Type = obj.TYPE_CONST
 		p.Reg = b.Controls[0].Reg()
 
+	case ssa.BlockARM64LEnoov:
+		s.CombJump(b, next, &leJumps)
+	case ssa.BlockARM64GTnoov:
+		s.CombJump(b, next, &gtJumps)
 	default:
 		b.Fatalf("branch not implemented: %s", b.LongString())
 	}
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index a4c051b..e2de628 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -1249,7 +1249,7 @@
 		if go115ReduceLiveness {
 			hasStackMap := lv.hasStackMap(v)
 			isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
-			idx := LivenessIndex{StackMapDontCare, 0, isUnsafePoint}
+			idx := LivenessIndex{StackMapDontCare, StackMapDontCare, isUnsafePoint}
 			if hasStackMap {
 				idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
 				pos++
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 70f6dd6..c0902cd 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -6313,34 +6313,39 @@
 	thearch.ZeroRange(pp, p, frame+lo, hi-lo, &state)
 }
 
-type FloatingEQNEJump struct {
+// For generating consecutive jump instructions to model a specific branching
+type IndexJump struct {
 	Jump  obj.As
 	Index int
 }
 
-func (s *SSAGenState) oneFPJump(b *ssa.Block, jumps *FloatingEQNEJump) {
-	p := s.Prog(jumps.Jump)
-	p.To.Type = obj.TYPE_BRANCH
+func (s *SSAGenState) oneJump(b *ssa.Block, jump *IndexJump) {
+	p := s.Br(jump.Jump, b.Succs[jump.Index].Block())
 	p.Pos = b.Pos
-	to := jumps.Index
-	s.Branches = append(s.Branches, Branch{p, b.Succs[to].Block()})
 }
 
-func (s *SSAGenState) FPJump(b, next *ssa.Block, jumps *[2][2]FloatingEQNEJump) {
+// CombJump generates combinational instructions (2 at present) for a block jump,
+// thereby the behaviour of non-standard condition codes could be simulated
+func (s *SSAGenState) CombJump(b, next *ssa.Block, jumps *[2][2]IndexJump) {
 	switch next {
 	case b.Succs[0].Block():
-		s.oneFPJump(b, &jumps[0][0])
-		s.oneFPJump(b, &jumps[0][1])
+		s.oneJump(b, &jumps[0][0])
+		s.oneJump(b, &jumps[0][1])
 	case b.Succs[1].Block():
-		s.oneFPJump(b, &jumps[1][0])
-		s.oneFPJump(b, &jumps[1][1])
+		s.oneJump(b, &jumps[1][0])
+		s.oneJump(b, &jumps[1][1])
 	default:
-		s.oneFPJump(b, &jumps[1][0])
-		s.oneFPJump(b, &jumps[1][1])
-		q := s.Prog(obj.AJMP)
+		var q *obj.Prog
+		if b.Likely != ssa.BranchUnlikely {
+			s.oneJump(b, &jumps[1][0])
+			s.oneJump(b, &jumps[1][1])
+			q = s.Br(obj.AJMP, b.Succs[1].Block())
+		} else {
+			s.oneJump(b, &jumps[0][0])
+			s.oneJump(b, &jumps[0][1])
+			q = s.Br(obj.AJMP, b.Succs[0].Block())
+		}
 		q.Pos = b.Pos
-		q.To.Type = obj.TYPE_BRANCH
-		s.Branches = append(s.Branches, Branch{q, b.Succs[1].Block()})
 	}
 }
 
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index 0320241..47f2214 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -598,31 +598,31 @@
 
 (EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no)
 (NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no)
-(LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LT (CMNconst [c] y) yes no)
-(LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LE (CMNconst [c] y) yes no)
-(GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GT (CMNconst [c] y) yes no)
-(GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GE (CMNconst [c] y) yes no)
+(LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no)
+(LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no)
+(GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no)
+(GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no)
 
 (EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no)
 (NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no)
-(LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LT (CMNWconst [int32(c)] y) yes no)
-(LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LE (CMNWconst [int32(c)] y) yes no)
-(GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GT (CMNWconst [int32(c)] y) yes no)
-(GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GE (CMNWconst [int32(c)] y) yes no)
+(LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no)
+(LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no)
+(GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no)
+(GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no)
 
 (EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
 (NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
-(LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LT (CMN x y) yes no)
-(LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LE (CMN x y) yes no)
-(GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GT (CMN x y) yes no)
-(GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GE (CMN x y) yes no)
+(LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no)
+(LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no)
+(GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no)
+(GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no)
 
 (EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
 (NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
-(LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LT (CMNW x y) yes no)
-(LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LE (CMNW x y) yes no)
-(GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GT (CMNW x y) yes no)
-(GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GE (CMNW x y) yes no)
+(LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no)
+(LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no)
+(GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no)
+(GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no)
 
 (EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
 (NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
@@ -645,31 +645,31 @@
 
 (EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL <x.Type> x y)) yes no)
 (NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL <x.Type> x y)) yes no)
-(LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LT (CMN a (MUL <x.Type> x y)) yes no)
-(LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LE (CMN a (MUL <x.Type> x y)) yes no)
-(GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GT (CMN a (MUL <x.Type> x y)) yes no)
-(GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GE (CMN a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL <x.Type> x y)) yes no)
 
 (EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no)
 (NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL <x.Type> x y)) yes no)
-(LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LE (CMP a (MUL <x.Type> x y)) yes no)
-(LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LT (CMP a (MUL <x.Type> x y)) yes no)
-(GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GE (CMP a (MUL <x.Type> x y)) yes no)
-(GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GT (CMP a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL <x.Type> x y)) yes no)
 
 (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW <x.Type> x y)) yes no)
 (NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW <x.Type> x y)) yes no)
-(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LE (CMNW a (MULW <x.Type> x y)) yes no)
-(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LT (CMNW a (MULW <x.Type> x y)) yes no)
-(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GE (CMNW a (MULW <x.Type> x y)) yes no)
-(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GT (CMNW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW <x.Type> x y)) yes no)
 
 (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW <x.Type> x y)) yes no)
 (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW <x.Type> x y)) yes no)
-(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LE (CMPW a (MULW <x.Type> x y)) yes no)
-(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LT (CMPW a (MULW <x.Type> x y)) yes no)
-(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GE (CMPW a (MULW <x.Type> x y)) yes no)
-(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GT (CMPW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW <x.Type> x y)) yes no)
 
 // Absorb bit-tests into block
 (Z  (ANDconst [c] x) yes no) && oneBit(c) => (TBZ  [int64(ntz64(c))] x yes no)
@@ -1503,6 +1503,10 @@
 (FGT (InvertFlags cmp) yes no) -> (FLT cmp yes no)
 (FLE (InvertFlags cmp) yes no) -> (FGE cmp yes no)
 (FGE (InvertFlags cmp) yes no) -> (FLE cmp yes no)
+(LTnoov (InvertFlags cmp) yes no) => (GTnoov cmp yes no)
+(GEnoov (InvertFlags cmp) yes no) => (LEnoov cmp yes no)
+(LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no)
+(GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
 
 // absorb InvertFlags into CSEL(0)
 (CSEL {cc} x y (InvertFlags cmp)) -> (CSEL {arm64Invert(cc.(Op))} x y cmp)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 73e18bc..63faab2 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -701,6 +701,10 @@
 		{name: "FLE", controls: 1},
 		{name: "FGT", controls: 1},
 		{name: "FGE", controls: 1},
+		{name: "LTnoov", controls: 1}, // 'LT' but without honoring overflow
+		{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
+		{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
+		{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
 	}
 
 	archs = append(archs, arch{
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d619f36..4a83a46 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -82,6 +82,10 @@
 	BlockARM64FLE
 	BlockARM64FGT
 	BlockARM64FGE
+	BlockARM64LTnoov
+	BlockARM64LEnoov
+	BlockARM64GTnoov
+	BlockARM64GEnoov
 
 	BlockMIPSEQ
 	BlockMIPSNE
@@ -192,26 +196,30 @@
 	BlockARMUGT: "UGT",
 	BlockARMUGE: "UGE",
 
-	BlockARM64EQ:   "EQ",
-	BlockARM64NE:   "NE",
-	BlockARM64LT:   "LT",
-	BlockARM64LE:   "LE",
-	BlockARM64GT:   "GT",
-	BlockARM64GE:   "GE",
-	BlockARM64ULT:  "ULT",
-	BlockARM64ULE:  "ULE",
-	BlockARM64UGT:  "UGT",
-	BlockARM64UGE:  "UGE",
-	BlockARM64Z:    "Z",
-	BlockARM64NZ:   "NZ",
-	BlockARM64ZW:   "ZW",
-	BlockARM64NZW:  "NZW",
-	BlockARM64TBZ:  "TBZ",
-	BlockARM64TBNZ: "TBNZ",
-	BlockARM64FLT:  "FLT",
-	BlockARM64FLE:  "FLE",
-	BlockARM64FGT:  "FGT",
-	BlockARM64FGE:  "FGE",
+	BlockARM64EQ:     "EQ",
+	BlockARM64NE:     "NE",
+	BlockARM64LT:     "LT",
+	BlockARM64LE:     "LE",
+	BlockARM64GT:     "GT",
+	BlockARM64GE:     "GE",
+	BlockARM64ULT:    "ULT",
+	BlockARM64ULE:    "ULE",
+	BlockARM64UGT:    "UGT",
+	BlockARM64UGE:    "UGE",
+	BlockARM64Z:      "Z",
+	BlockARM64NZ:     "NZ",
+	BlockARM64ZW:     "ZW",
+	BlockARM64NZW:    "NZW",
+	BlockARM64TBZ:    "TBZ",
+	BlockARM64TBNZ:   "TBNZ",
+	BlockARM64FLT:    "FLT",
+	BlockARM64FLE:    "FLE",
+	BlockARM64FGT:    "FGT",
+	BlockARM64FGE:    "FGE",
+	BlockARM64LTnoov: "LTnoov",
+	BlockARM64LEnoov: "LEnoov",
+	BlockARM64GTnoov: "GTnoov",
+	BlockARM64GEnoov: "GEnoov",
 
 	BlockMIPSEQ:  "EQ",
 	BlockMIPSNE:  "NE",
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index d243ea9..4b8ef43 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -26436,7 +26436,7 @@
 		}
 		// match: (GE (CMPconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (GE (CMNconst [c] y) yes no)
+		// result: (GEnoov (CMNconst [c] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26454,12 +26454,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNconst, types.TypeFlags)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPWconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (GE (CMNWconst [int32(c)] y) yes no)
+		// result: (GEnoov (CMNWconst [int32(c)] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26477,12 +26477,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNWconst, types.TypeFlags)
 			v0.AuxInt = int32ToAuxInt(int32(c))
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (GE (CMN x y) yes no)
+		// result: (GEnoov (CMN x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26503,14 +26503,14 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMN, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64GE, v0)
+				b.resetWithControl(BlockARM64GEnoov, v0)
 				return true
 			}
 			break
 		}
 		// match: (GE (CMPWconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (GE (CMNW x y) yes no)
+		// result: (GEnoov (CMNW x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26531,7 +26531,7 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMNW, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64GE, v0)
+				b.resetWithControl(BlockARM64GEnoov, v0)
 				return true
 			}
 			break
@@ -26578,7 +26578,7 @@
 		}
 		// match: (GE (CMPconst [0] z:(MADD a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GE (CMN a (MUL <x.Type> x y)) yes no)
+		// result: (GEnoov (CMN a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26598,12 +26598,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPconst [0] z:(MSUB a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GE (CMP a (MUL <x.Type> x y)) yes no)
+		// result: (GEnoov (CMP a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26623,12 +26623,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPWconst [0] z:(MADDW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GE (CMNW a (MULW <x.Type> x y)) yes no)
+		// result: (GEnoov (CMNW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26648,12 +26648,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPWconst [0] z:(MSUBW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GE (CMPW a (MULW <x.Type> x y)) yes no)
+		// result: (GEnoov (CMPW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26673,7 +26673,7 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GE, v0)
+			b.resetWithControl(BlockARM64GEnoov, v0)
 			return true
 		}
 		// match: (GE (CMPWconst [0] x) yes no)
@@ -26740,6 +26740,15 @@
 			b.resetWithControl(BlockARM64LE, cmp)
 			return true
 		}
+	case BlockARM64GEnoov:
+		// match: (GEnoov (InvertFlags cmp) yes no)
+		// result: (LEnoov cmp yes no)
+		for b.Controls[0].Op == OpARM64InvertFlags {
+			v_0 := b.Controls[0]
+			cmp := v_0.Args[0]
+			b.resetWithControl(BlockARM64LEnoov, cmp)
+			return true
+		}
 	case BlockARM64GT:
 		// match: (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no)
 		// cond: x.Uses == 1
@@ -26845,7 +26854,7 @@
 		}
 		// match: (GT (CMPconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (GT (CMNconst [c] y) yes no)
+		// result: (GTnoov (CMNconst [c] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26863,12 +26872,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNconst, types.TypeFlags)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (CMPWconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (GT (CMNWconst [int32(c)] y) yes no)
+		// result: (GTnoov (CMNWconst [int32(c)] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26886,12 +26895,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNWconst, types.TypeFlags)
 			v0.AuxInt = int32ToAuxInt(int32(c))
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (CMPconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (GT (CMN x y) yes no)
+		// result: (GTnoov (CMN x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -26912,14 +26921,14 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMN, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64GT, v0)
+				b.resetWithControl(BlockARM64GTnoov, v0)
 				return true
 			}
 			break
 		}
 		// match: (GT (CMPWconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (GT (CMNW x y) yes no)
+		// result: (GTnoov (CMNW x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -26940,7 +26949,7 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMNW, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64GT, v0)
+				b.resetWithControl(BlockARM64GTnoov, v0)
 				return true
 			}
 			break
@@ -26987,7 +26996,7 @@
 		}
 		// match: (GT (CMPconst [0] z:(MADD a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GT (CMN a (MUL <x.Type> x y)) yes no)
+		// result: (GTnoov (CMN a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27007,12 +27016,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (CMPconst [0] z:(MSUB a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GT (CMP a (MUL <x.Type> x y)) yes no)
+		// result: (GTnoov (CMP a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27032,12 +27041,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (CMPWconst [0] z:(MADDW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GT (CMNW a (MULW <x.Type> x y)) yes no)
+		// result: (GTnoov (CMNW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27057,12 +27066,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (CMPWconst [0] z:(MSUBW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (GT (CMPW a (MULW <x.Type> x y)) yes no)
+		// result: (GTnoov (CMPW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27082,7 +27091,7 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64GT, v0)
+			b.resetWithControl(BlockARM64GTnoov, v0)
 			return true
 		}
 		// match: (GT (FlagEQ) yes no)
@@ -27126,6 +27135,15 @@
 			b.resetWithControl(BlockARM64LT, cmp)
 			return true
 		}
+	case BlockARM64GTnoov:
+		// match: (GTnoov (InvertFlags cmp) yes no)
+		// result: (LTnoov cmp yes no)
+		for b.Controls[0].Op == OpARM64InvertFlags {
+			v_0 := b.Controls[0]
+			cmp := v_0.Args[0]
+			b.resetWithControl(BlockARM64LTnoov, cmp)
+			return true
+		}
 	case BlockIf:
 		// match: (If (Equal cc) yes no)
 		// result: (EQ cc yes no)
@@ -27351,7 +27369,7 @@
 		}
 		// match: (LE (CMPconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (LE (CMNconst [c] y) yes no)
+		// result: (LEnoov (CMNconst [c] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27369,12 +27387,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNconst, types.TypeFlags)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (CMPWconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (LE (CMNWconst [int32(c)] y) yes no)
+		// result: (LEnoov (CMNWconst [int32(c)] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27392,12 +27410,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNWconst, types.TypeFlags)
 			v0.AuxInt = int32ToAuxInt(int32(c))
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (CMPconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (LE (CMN x y) yes no)
+		// result: (LEnoov (CMN x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27418,14 +27436,14 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMN, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64LE, v0)
+				b.resetWithControl(BlockARM64LEnoov, v0)
 				return true
 			}
 			break
 		}
 		// match: (LE (CMPWconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (LE (CMNW x y) yes no)
+		// result: (LEnoov (CMNW x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27446,7 +27464,7 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMNW, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64LE, v0)
+				b.resetWithControl(BlockARM64LEnoov, v0)
 				return true
 			}
 			break
@@ -27493,7 +27511,7 @@
 		}
 		// match: (LE (CMPconst [0] z:(MADD a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LE (CMN a (MUL <x.Type> x y)) yes no)
+		// result: (LEnoov (CMN a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27513,12 +27531,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (CMPconst [0] z:(MSUB a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LE (CMP a (MUL <x.Type> x y)) yes no)
+		// result: (LEnoov (CMP a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27538,12 +27556,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (CMPWconst [0] z:(MADDW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LE (CMNW a (MULW <x.Type> x y)) yes no)
+		// result: (LEnoov (CMNW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27563,12 +27581,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (CMPWconst [0] z:(MSUBW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LE (CMPW a (MULW <x.Type> x y)) yes no)
+		// result: (LEnoov (CMPW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27588,7 +27606,7 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LE, v0)
+			b.resetWithControl(BlockARM64LEnoov, v0)
 			return true
 		}
 		// match: (LE (FlagEQ) yes no)
@@ -27631,6 +27649,15 @@
 			b.resetWithControl(BlockARM64GE, cmp)
 			return true
 		}
+	case BlockARM64LEnoov:
+		// match: (LEnoov (InvertFlags cmp) yes no)
+		// result: (GEnoov cmp yes no)
+		for b.Controls[0].Op == OpARM64InvertFlags {
+			v_0 := b.Controls[0]
+			cmp := v_0.Args[0]
+			b.resetWithControl(BlockARM64GEnoov, cmp)
+			return true
+		}
 	case BlockARM64LT:
 		// match: (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no)
 		// cond: x.Uses == 1
@@ -27736,7 +27763,7 @@
 		}
 		// match: (LT (CMPconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (LT (CMNconst [c] y) yes no)
+		// result: (LTnoov (CMNconst [c] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27754,12 +27781,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNconst, types.TypeFlags)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPWconst [0] x:(ADDconst [c] y)) yes no)
 		// cond: x.Uses == 1
-		// result: (LT (CMNWconst [int32(c)] y) yes no)
+		// result: (LTnoov (CMNWconst [int32(c)] y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27777,12 +27804,12 @@
 			v0 := b.NewValue0(v_0.Pos, OpARM64CMNWconst, types.TypeFlags)
 			v0.AuxInt = int32ToAuxInt(int32(c))
 			v0.AddArg(y)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (LT (CMN x y) yes no)
+		// result: (LTnoov (CMN x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27803,14 +27830,14 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMN, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64LT, v0)
+				b.resetWithControl(BlockARM64LTnoov, v0)
 				return true
 			}
 			break
 		}
 		// match: (LT (CMPWconst [0] z:(ADD x y)) yes no)
 		// cond: z.Uses == 1
-		// result: (LT (CMNW x y) yes no)
+		// result: (LTnoov (CMNW x y) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27831,7 +27858,7 @@
 				}
 				v0 := b.NewValue0(v_0.Pos, OpARM64CMNW, types.TypeFlags)
 				v0.AddArg2(x, y)
-				b.resetWithControl(BlockARM64LT, v0)
+				b.resetWithControl(BlockARM64LTnoov, v0)
 				return true
 			}
 			break
@@ -27878,7 +27905,7 @@
 		}
 		// match: (LT (CMPconst [0] z:(MADD a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LT (CMN a (MUL <x.Type> x y)) yes no)
+		// result: (LTnoov (CMN a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27898,12 +27925,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPconst [0] z:(MSUB a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LT (CMP a (MUL <x.Type> x y)) yes no)
+		// result: (LTnoov (CMP a (MUL <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 0 {
@@ -27923,12 +27950,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MUL, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPWconst [0] z:(MADDW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LT (CMNW a (MULW <x.Type> x y)) yes no)
+		// result: (LTnoov (CMNW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27948,12 +27975,12 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPWconst [0] z:(MSUBW a x y)) yes no)
 		// cond: z.Uses==1
-		// result: (LT (CMPW a (MULW <x.Type> x y)) yes no)
+		// result: (LTnoov (CMPW a (MULW <x.Type> x y)) yes no)
 		for b.Controls[0].Op == OpARM64CMPWconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt32(v_0.AuxInt) != 0 {
@@ -27973,7 +28000,7 @@
 			v1 := b.NewValue0(v_0.Pos, OpARM64MULW, x.Type)
 			v1.AddArg2(x, y)
 			v0.AddArg2(a, v1)
-			b.resetWithControl(BlockARM64LT, v0)
+			b.resetWithControl(BlockARM64LTnoov, v0)
 			return true
 		}
 		// match: (LT (CMPWconst [0] x) yes no)
@@ -28041,6 +28068,15 @@
 			b.resetWithControl(BlockARM64GT, cmp)
 			return true
 		}
+	case BlockARM64LTnoov:
+		// match: (LTnoov (InvertFlags cmp) yes no)
+		// result: (GTnoov cmp yes no)
+		for b.Controls[0].Op == OpARM64InvertFlags {
+			v_0 := b.Controls[0]
+			cmp := v_0.Args[0]
+			b.resetWithControl(BlockARM64GTnoov, cmp)
+			return true
+		}
 	case BlockARM64NE:
 		// match: (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
 		// cond: x.Uses == 1
diff --git a/src/cmd/compile/internal/ssa/rewriteCond_test.go b/src/cmd/compile/internal/ssa/rewriteCond_test.go
new file mode 100644
index 0000000..b8feff7
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rewriteCond_test.go
@@ -0,0 +1,536 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+	"runtime"
+)
+
+var (
+	x64   int64 = math.MaxInt64 - 2
+	x64b  int64 = math.MaxInt64 - 2
+	x64c  int64 = math.MaxInt64 - 2
+	y64   int64 = math.MinInt64 + 1
+	x32   int32 = math.MaxInt32 - 2
+	x32b  int32 = math.MaxInt32 - 2
+	y32   int32 = math.MinInt32 + 1
+	one64 int64 = 1
+	one32 int32 = 1
+	v64   int64 = 11 // ensure it's not 2**n +/- 1
+	v64_n int64 = -11
+	v32   int32 = 11
+	v32_n int32 = -11
+)
+
+var crTests = []struct {
+	name string
+	tf   func(t *testing.T)
+}{
+	{"AddConst64", testAddConst64},
+	{"AddConst32", testAddConst32},
+	{"AddVar64", testAddVar64},
+	{"AddVar32", testAddVar32},
+	{"MAddVar64", testMAddVar64},
+	{"MAddVar32", testMAddVar32},
+	{"MSubVar64", testMSubVar64},
+	{"MSubVar32", testMSubVar32},
+}
+
+var crBenches = []struct {
+	name string
+	bf   func(b *testing.B)
+}{
+	{"SoloJump", benchSoloJump},
+	{"CombJump", benchCombJump},
+}
+
+// Test int32/int64's add/sub/madd/msub operations with boundary values to
+// ensure the optimization to 'comparing to zero' expressions of if-statements
+// yield expected results.
+// 32 rewriting rules are covered. At least two scenarios for "Canonicalize
+// the order of arguments to comparisons", which helps with CSE, are covered.
+// The tedious if-else structures are necessary to ensure all concerned rules
+// and machine code sequences are covered.
+// It's for arm64 initially, please see https://github.com/golang/go/issues/38740
+func TestCondRewrite(t *testing.T) {
+	if runtime.GOARCH == "arm" {
+		t.Skip("fix on arm expected!")
+	}
+	for _, test := range crTests {
+		t.Run(test.name, test.tf)
+	}
+}
+
+// Profile the aforementioned optimization from two angles:
+//   SoloJump: generated branching code has one 'jump', for '<' and '>='
+//   CombJump: generated branching code has two consecutive 'jump', for '<=' and '>'
+// We expect that 'CombJump' is generally on par with the non-optimized code, and
+// 'SoloJump' demonstrates some improvement.
+// It's for arm64 initially, please see https://github.com/golang/go/issues/38740
+func BenchmarkCondRewrite(b *testing.B) {
+	for _, bench := range crBenches {
+		b.Run(bench.name, bench.bf)
+	}
+}
+
+// var +/- const
+func testAddConst64(t *testing.T) {
+	if x64+11 < 0 {
+	} else {
+		t.Errorf("'%#x + 11 < 0' failed", x64)
+	}
+
+	if x64+13 <= 0 {
+	} else {
+		t.Errorf("'%#x + 13 <= 0' failed", x64)
+	}
+
+	if y64-11 > 0 {
+	} else {
+		t.Errorf("'%#x - 11 > 0' failed", y64)
+	}
+
+	if y64-13 >= 0 {
+	} else {
+		t.Errorf("'%#x - 13 >= 0' failed", y64)
+	}
+
+	if x64+19 > 0 {
+		t.Errorf("'%#x + 19 > 0' failed", x64)
+	}
+
+	if x64+23 >= 0 {
+		t.Errorf("'%#x + 23 >= 0' failed", x64)
+	}
+
+	if y64-19 < 0 {
+		t.Errorf("'%#x - 19 < 0' failed", y64)
+	}
+
+	if y64-23 <= 0 {
+		t.Errorf("'%#x - 23 <= 0' failed", y64)
+	}
+}
+
+// 32-bit var +/- const
+func testAddConst32(t *testing.T) {
+	if x32+11 < 0 {
+	} else {
+		t.Errorf("'%#x + 11 < 0' failed", x32)
+	}
+
+	if x32+13 <= 0 {
+	} else {
+		t.Errorf("'%#x + 13 <= 0' failed", x32)
+	}
+
+	if y32-11 > 0 {
+	} else {
+		t.Errorf("'%#x - 11 > 0' failed", y32)
+	}
+
+	if y32-13 >= 0 {
+	} else {
+		t.Errorf("'%#x - 13 >= 0' failed", y32)
+	}
+
+	if x32+19 > 0 {
+		t.Errorf("'%#x + 19 > 0' failed", x32)
+	}
+
+	if x32+23 >= 0 {
+		t.Errorf("'%#x + 23 >= 0' failed", x32)
+	}
+
+	if y32-19 < 0 {
+		t.Errorf("'%#x - 19 < 0' failed", y32)
+	}
+
+	if y32-23 <= 0 {
+		t.Errorf("'%#x - 23 <= 0' failed", y32)
+	}
+}
+
+// var + var
+func testAddVar64(t *testing.T) {
+	if x64+v64 < 0 {
+	} else {
+		t.Errorf("'%#x + %#x < 0' failed", x64, v64)
+	}
+
+	if x64+v64 <= 0 {
+	} else {
+		t.Errorf("'%#x + %#x <= 0' failed", x64, v64)
+	}
+
+	if y64+v64_n > 0 {
+	} else {
+		t.Errorf("'%#x + %#x > 0' failed", y64, v64_n)
+	}
+
+	if y64+v64_n >= 0 {
+	} else {
+		t.Errorf("'%#x + %#x >= 0' failed", y64, v64_n)
+	}
+
+	if x64+v64 > 0 {
+		t.Errorf("'%#x + %#x > 0' failed", x64, v64)
+	}
+
+	if x64+v64 >= 0 {
+		t.Errorf("'%#x + %#x >= 0' failed", x64, v64)
+	}
+
+	if y64+v64_n < 0 {
+		t.Errorf("'%#x + %#x < 0' failed", y64, v64_n)
+	}
+
+	if y64+v64_n <= 0 {
+		t.Errorf("'%#x + %#x <= 0' failed", y64, v64_n)
+	}
+}
+
+// 32-bit var+var
+func testAddVar32(t *testing.T) {
+	if x32+v32 < 0 {
+	} else {
+		t.Errorf("'%#x + %#x < 0' failed", x32, v32)
+	}
+
+	if x32+v32 <= 0 {
+	} else {
+		t.Errorf("'%#x + %#x <= 0' failed", x32, v32)
+	}
+
+	if y32+v32_n > 0 {
+	} else {
+		t.Errorf("'%#x + %#x > 0' failed", y32, v32_n)
+	}
+
+	if y32+v32_n >= 0 {
+	} else {
+		t.Errorf("'%#x + %#x >= 0' failed", y32, v32_n)
+	}
+
+	if x32+v32 > 0 {
+		t.Errorf("'%#x + %#x > 0' failed", x32, v32)
+	}
+
+	if x32+v32 >= 0 {
+		t.Errorf("'%#x + %#x >= 0' failed", x32, v32)
+	}
+
+	if y32+v32_n < 0 {
+		t.Errorf("'%#x + %#x < 0' failed", y32, v32_n)
+	}
+
+	if y32+v32_n <= 0 {
+		t.Errorf("'%#x + %#x <= 0' failed", y32, v32_n)
+	}
+}
+
+// multiply-add
+func testMAddVar64(t *testing.T) {
+	if x64+v64*one64 < 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 < 0' failed", x64, v64)
+	}
+
+	if x64+v64*one64 <= 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 <= 0' failed", x64, v64)
+	}
+
+	if y64+v64_n*one64 > 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 > 0' failed", y64, v64_n)
+	}
+
+	if y64+v64_n*one64 >= 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 >= 0' failed", y64, v64_n)
+	}
+
+	if x64+v64*one64 > 0 {
+		t.Errorf("'%#x + %#x*1 > 0' failed", x64, v64)
+	}
+
+	if x64+v64*one64 >= 0 {
+		t.Errorf("'%#x + %#x*1 >= 0' failed", x64, v64)
+	}
+
+	if y64+v64_n*one64 < 0 {
+		t.Errorf("'%#x + %#x*1 < 0' failed", y64, v64_n)
+	}
+
+	if y64+v64_n*one64 <= 0 {
+		t.Errorf("'%#x + %#x*1 <= 0' failed", y64, v64_n)
+	}
+}
+
+// 32-bit multiply-add
+func testMAddVar32(t *testing.T) {
+	if x32+v32*one32 < 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 < 0' failed", x32, v32)
+	}
+
+	if x32+v32*one32 <= 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 <= 0' failed", x32, v32)
+	}
+
+	if y32+v32_n*one32 > 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 > 0' failed", y32, v32_n)
+	}
+
+	if y32+v32_n*one32 >= 0 {
+	} else {
+		t.Errorf("'%#x + %#x*1 >= 0' failed", y32, v32_n)
+	}
+
+	if x32+v32*one32 > 0 {
+		t.Errorf("'%#x + %#x*1 > 0' failed", x32, v32)
+	}
+
+	if x32+v32*one32 >= 0 {
+		t.Errorf("'%#x + %#x*1 >= 0' failed", x32, v32)
+	}
+
+	if y32+v32_n*one32 < 0 {
+		t.Errorf("'%#x + %#x*1 < 0' failed", y32, v32_n)
+	}
+
+	if y32+v32_n*one32 <= 0 {
+		t.Errorf("'%#x + %#x*1 <= 0' failed", y32, v32_n)
+	}
+}
+
+// multiply-sub
+func testMSubVar64(t *testing.T) {
+	if x64-v64_n*one64 < 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 < 0' failed", x64, v64_n)
+	}
+
+	if x64-v64_n*one64 <= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 <= 0' failed", x64, v64_n)
+	}
+
+	if y64-v64*one64 > 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 > 0' failed", y64, v64)
+	}
+
+	if y64-v64*one64 >= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", y64, v64)
+	}
+
+	if x64-v64_n*one64 > 0 {
+		t.Errorf("'%#x - %#x*1 > 0' failed", x64, v64_n)
+	}
+
+	if x64-v64_n*one64 >= 0 {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", x64, v64_n)
+	}
+
+	if y64-v64*one64 < 0 {
+		t.Errorf("'%#x - %#x*1 < 0' failed", y64, v64)
+	}
+
+	if y64-v64*one64 <= 0 {
+		t.Errorf("'%#x - %#x*1 <= 0' failed", y64, v64)
+	}
+
+	if x64-x64b*one64 < 0 {
+		t.Errorf("'%#x - %#x*1 < 0' failed", x64, x64b)
+	}
+
+	if x64-x64b*one64 >= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", x64, x64b)
+	}
+}
+
+// 32-bit multiply-sub
+func testMSubVar32(t *testing.T) {
+	if x32-v32_n*one32 < 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 < 0' failed", x32, v32_n)
+	}
+
+	if x32-v32_n*one32 <= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 <= 0' failed", x32, v32_n)
+	}
+
+	if y32-v32*one32 > 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 > 0' failed", y32, v32)
+	}
+
+	if y32-v32*one32 >= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", y32, v32)
+	}
+
+	if x32-v32_n*one32 > 0 {
+		t.Errorf("'%#x - %#x*1 > 0' failed", x32, v32_n)
+	}
+
+	if x32-v32_n*one32 >= 0 {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", x32, v32_n)
+	}
+
+	if y32-v32*one32 < 0 {
+		t.Errorf("'%#x - %#x*1 < 0' failed", y32, v32)
+	}
+
+	if y32-v32*one32 <= 0 {
+		t.Errorf("'%#x - %#x*1 <= 0' failed", y32, v32)
+	}
+
+	if x32-x32b*one32 < 0 {
+		t.Errorf("'%#x - %#x*1 < 0' failed", x32, x32b)
+	}
+
+	if x32-x32b*one32 >= 0 {
+	} else {
+		t.Errorf("'%#x - %#x*1 >= 0' failed", x32, x32b)
+	}
+}
+
+var rnd = rand.New(rand.NewSource(0))
+var sink int64
+
+func benchSoloJump(b *testing.B) {
+	r1 := x64
+	r2 := x64b
+	r3 := x64c
+	r4 := y64
+	d := rnd.Int63n(10)
+
+	// 6 out 10 conditions evaluate to true
+	for i := 0; i < b.N; i++ {
+		if r1+r2 < 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+r3 >= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+r2*one64 < 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r2+r3*one64 >= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1-r2*v64 >= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r3-r4*v64 < 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+11 < 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+13 >= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r4-17 < 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r4-19 >= 0 {
+			d *= 2
+			d /= 2
+		}
+	}
+	sink = d
+}
+
+func benchCombJump(b *testing.B) {
+	r1 := x64
+	r2 := x64b
+	r3 := x64c
+	r4 := y64
+	d := rnd.Int63n(10)
+
+	// 6 out 10 conditions evaluate to true
+	for i := 0; i < b.N; i++ {
+		if r1+r2 <= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+r3 > 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+r2*one64 <= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r2+r3*one64 > 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1-r2*v64 > 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r3-r4*v64 <= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+11 <= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r1+13 > 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r4-17 <= 0 {
+			d *= 2
+			d /= 2
+		}
+
+		if r4-19 > 0 {
+			d *= 2
+			d /= 2
+		}
+	}
+	sink = d
+}
diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go
index 0c7e5bd..2de978c 100644
--- a/src/cmd/compile/internal/x86/ssa.go
+++ b/src/cmd/compile/internal/x86/ssa.go
@@ -885,11 +885,11 @@
 	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
 }
 
-var eqfJumps = [2][2]gc.FloatingEQNEJump{
+var eqfJumps = [2][2]gc.IndexJump{
 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
 }
-var nefJumps = [2][2]gc.FloatingEQNEJump{
+var nefJumps = [2][2]gc.IndexJump{
 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
 }
@@ -929,10 +929,10 @@
 		p.To.Sym = b.Aux.(*obj.LSym)
 
 	case ssa.Block386EQF:
-		s.FPJump(b, next, &eqfJumps)
+		s.CombJump(b, next, &eqfJumps)
 
 	case ssa.Block386NEF:
-		s.FPJump(b, next, &nefJumps)
+		s.CombJump(b, next, &nefJumps)
 
 	case ssa.Block386EQ, ssa.Block386NE,
 		ssa.Block386LT, ssa.Block386GE,
diff --git a/src/cmd/go/internal/clean/clean.go b/src/cmd/go/internal/clean/clean.go
index d5028de..99704cb 100644
--- a/src/cmd/go/internal/clean/clean.go
+++ b/src/cmd/go/internal/clean/clean.go
@@ -137,20 +137,27 @@
 				if cfg.BuildN || cfg.BuildX {
 					b.Showcmd("", "rm -r %s", strings.Join(subdirs, " "))
 				}
-				for _, d := range subdirs {
-					// Only print the first error - there may be many.
-					// This also mimics what os.RemoveAll(dir) would do.
-					if err := os.RemoveAll(d); err != nil && !printedErrors {
-						printedErrors = true
-						base.Errorf("go clean -cache: %v", err)
+				if !cfg.BuildN {
+					for _, d := range subdirs {
+						// Only print the first error - there may be many.
+						// This also mimics what os.RemoveAll(dir) would do.
+						if err := os.RemoveAll(d); err != nil && !printedErrors {
+							printedErrors = true
+							base.Errorf("go clean -cache: %v", err)
+						}
 					}
 				}
 			}
 
 			logFile := filepath.Join(dir, "log.txt")
-			if err := os.RemoveAll(logFile); err != nil && !printedErrors {
-				printedErrors = true
-				base.Errorf("go clean -cache: %v", err)
+			if cfg.BuildN || cfg.BuildX {
+				b.Showcmd("", "rm -f %s", logFile)
+			}
+			if !cfg.BuildN {
+				if err := os.RemoveAll(logFile); err != nil && !printedErrors {
+					printedErrors = true
+					base.Errorf("go clean -cache: %v", err)
+				}
 			}
 		}
 	}
diff --git a/src/cmd/go/internal/modfetch/proxy.go b/src/cmd/go/internal/modfetch/proxy.go
index 3971598..1c35d0b 100644
--- a/src/cmd/go/internal/modfetch/proxy.go
+++ b/src/cmd/go/internal/modfetch/proxy.go
@@ -171,6 +171,14 @@
 				fallBackOnError: fallBackOnError,
 			})
 		}
+
+		if len(proxyOnce.list) == 0 ||
+			len(proxyOnce.list) == 1 && proxyOnce.list[0].url == "noproxy" {
+			// There were no proxies, other than the implicit "noproxy" added when
+			// GONOPROXY is set. This can happen if GOPROXY is a non-empty string
+			// like "," or " ".
+			proxyOnce.err = fmt.Errorf("GOPROXY list is not the empty string, but contains no entries")
+		}
 	})
 
 	return proxyOnce.list, proxyOnce.err
@@ -191,7 +199,7 @@
 		return err
 	}
 	if len(proxies) == 0 {
-		return f("off")
+		panic("GOPROXY list is empty")
 	}
 
 	// We try to report the most helpful error to the user. "direct" and "noproxy"
diff --git a/src/cmd/go/testdata/script/clean_cache_n.txt b/src/cmd/go/testdata/script/clean_cache_n.txt
new file mode 100644
index 0000000..4497b36
--- /dev/null
+++ b/src/cmd/go/testdata/script/clean_cache_n.txt
@@ -0,0 +1,25 @@
+# We're testing cache behavior, so start with a clean GOCACHE.
+env GOCACHE=$WORK/cache
+
+# Build something so that the cache gets populates
+go build main.go
+
+# Check that cache contains directories before running
+exists $GOCACHE/00
+
+# Run go clean -cache -n and ensure that directories weren't deleted
+go clean -cache -n
+exists $GOCACHE/00
+
+# Re-run go clean cache without the -n flag go ensure that directories were properly removed
+go clean -cache
+! exists $GOCACHE/00
+
+-- main.go --
+package main
+
+import "fmt"
+
+func main() {
+	fmt.Println("hello!")
+}
diff --git a/src/cmd/go/testdata/script/mod_gonoproxy.txt b/src/cmd/go/testdata/script/mod_gonoproxy.txt
index d7848c7..a9e0ca4 100644
--- a/src/cmd/go/testdata/script/mod_gonoproxy.txt
+++ b/src/cmd/go/testdata/script/mod_gonoproxy.txt
@@ -18,6 +18,11 @@
 env GONOPROXY=none # that is, proxy all despite GOPRIVATE
 go get rsc.io/quote
 
+# When GOPROXY is not empty but contains no entries, an error should be reported.
+env GOPROXY=','
+! go get golang.org/x/text
+stderr '^go get golang.org/x/text: GOPROXY list is not the empty string, but contains no entries$'
+
 # When GOPROXY=off, fetching modules not matched by GONOPROXY fails.
 env GONOPROXY=*/fortune
 env GOPROXY=off
diff --git a/src/cmd/go/testdata/script/test_benchmark_chatty_fail.txt b/src/cmd/go/testdata/script/test_benchmark_chatty_fail.txt
new file mode 100644
index 0000000..6031ead
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_benchmark_chatty_fail.txt
@@ -0,0 +1,32 @@
+# Run chatty tests. Assert on CONT lines.
+! go test chatty_test.go -v -bench . chatty_bench
+
+# Sanity check that output occurs.
+stdout -count=2 'this is sub-0'
+stdout -count=2 'this is sub-1'
+stdout -count=2 'this is sub-2'
+stdout -count=1 'error from sub-0'
+stdout -count=1 'error from sub-1'
+stdout -count=1 'error from sub-2'
+
+# Benchmarks should not print CONT.
+! stdout CONT
+
+-- chatty_test.go --
+package chatty_bench
+
+import (
+	"testing"
+	"fmt"
+)
+
+func BenchmarkChatty(b *testing.B) {
+    for i := 0; i < 3; i++ {
+        b.Run(fmt.Sprintf("sub-%d", i), func(b *testing.B) {
+            for j := 0; j < 2; j++ {
+                b.Logf("this is sub-%d", i)
+            }
+            b.Errorf("error from sub-%d", i)
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/cmd/go/testdata/script/test_benchmark_chatty_success.txt b/src/cmd/go/testdata/script/test_benchmark_chatty_success.txt
new file mode 100644
index 0000000..a1c0d65
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_benchmark_chatty_success.txt
@@ -0,0 +1,29 @@
+# Run chatty tests. Assert on CONT lines.
+go test chatty_test.go -v -bench . chatty_bench
+
+# Sanity check that output happens. We don't provide -count because the amount
+# of output is variable.
+stdout 'this is sub-0'
+stdout 'this is sub-1'
+stdout 'this is sub-2'
+
+# Benchmarks should not print CONT.
+! stdout CONT
+
+-- chatty_test.go --
+package chatty_bench
+
+import (
+	"testing"
+	"fmt"
+)
+
+func BenchmarkChatty(b *testing.B) {
+    for i := 0; i < 3; i++ {
+        b.Run(fmt.Sprintf("sub-%d", i), func(b *testing.B) {
+            for j := 0; j < 2; j++ {
+                b.Logf("this is sub-%d", i)
+            }
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/cmd/go/testdata/script/test_chatty_fail.txt b/src/cmd/go/testdata/script/test_chatty_fail.txt
new file mode 100644
index 0000000..a5ef559
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_chatty_fail.txt
@@ -0,0 +1,32 @@
+# Run chatty tests. Assert on CONT lines.
+! go test chatty_test.go -v
+
+# Sanity check that output occurs.
+stdout -count=2 'this is sub-0'
+stdout -count=2 'this is sub-1'
+stdout -count=2 'this is sub-2'
+stdout -count=1 'error from sub-0'
+stdout -count=1 'error from sub-1'
+stdout -count=1 'error from sub-2'
+
+# Non-parallel tests should not print CONT.
+! stdout CONT
+
+-- chatty_test.go --
+package chatty_test
+
+import (
+	"testing"
+	"fmt"
+)
+
+func TestChatty(t *testing.T) {
+    for i := 0; i < 3; i++ {
+        t.Run(fmt.Sprintf("sub-%d", i), func(t *testing.T) {
+            for j := 0; j < 2; j++ {
+                t.Logf("this is sub-%d", i)
+            }
+            t.Errorf("error from sub-%d", i)
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/cmd/go/testdata/script/test_chatty_parallel_fail.txt b/src/cmd/go/testdata/script/test_chatty_parallel_fail.txt
new file mode 100644
index 0000000..3f7360b
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_chatty_parallel_fail.txt
@@ -0,0 +1,58 @@
+# Run parallel chatty tests. Assert on CONT lines. This test makes sure that
+# multiple parallel outputs have the appropriate CONT lines between them.
+! go test -parallel 3 chatty_parallel_test.go -v
+
+stdout -count=1 '^=== CONT  TestChattyParallel/sub-0\n    chatty_parallel_test.go:38: error from sub-0$'
+stdout -count=1 '^=== CONT  TestChattyParallel/sub-1\n    chatty_parallel_test.go:38: error from sub-1$'
+stdout -count=1 '^=== CONT  TestChattyParallel/sub-2\n    chatty_parallel_test.go:38: error from sub-2$'
+
+# Run parallel chatty tests with -json. Assert on CONT lines as above - make
+# sure there are CONT lines before each output line.
+! go test -json -parallel 3 chatty_parallel_test.go -v
+stdout -count=1 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-0","Output":"=== CONT  TestChattyParallel/sub-0\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-0","Output":"    chatty_parallel_test.go:38: error from sub-0\\n"}'
+stdout -count=1 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-1","Output":"=== CONT  TestChattyParallel/sub-1\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-1","Output":"    chatty_parallel_test.go:38: error from sub-1\\n"}'
+stdout -count=1 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-2","Output":"=== CONT  TestChattyParallel/sub-2\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-2","Output":"    chatty_parallel_test.go:38: error from sub-2\\n"}'
+
+-- chatty_parallel_test.go --
+package chatty_paralell_test
+
+import (
+	"testing"
+	"fmt"
+	"flag"
+)
+
+// This test ensures the the order of CONT lines in parallel chatty tests.
+func TestChattyParallel(t *testing.T) {
+	t.Parallel()
+
+	// The number of concurrent tests running. This is closely tied to the
+	// -parallel test flag, so we grab it from the flag rather than setting it
+	// to some constant.
+	parallel := flag.Lookup("test.parallel").Value.(flag.Getter).Get().(int)
+
+	// ready is a synchronization mechanism that causes subtests to execute
+	// round robin.
+	ready := make([]chan bool, parallel)
+	for i := range ready {
+		ready[i] = make(chan bool, 1)
+	}
+	ready[0] <- true
+
+	for i := range ready {
+		i := i
+		t.Run(fmt.Sprintf("sub-%d", i), func(t *testing.T) {
+			t.Parallel()
+
+			// Some basic log output to precede the failures.
+			<-ready[i]
+			t.Logf("this is sub-%d", i)
+			ready[(i+1)%len(ready)] <- true
+
+			// The actual failure messages we care about.
+			<-ready[i]
+			t.Errorf("error from sub-%d", i)
+			ready[(i+1)%len(ready)] <- true
+		})
+	}
+}
diff --git a/src/cmd/go/testdata/script/test_chatty_parallel_success.txt b/src/cmd/go/testdata/script/test_chatty_parallel_success.txt
new file mode 100644
index 0000000..4a86d74
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_chatty_parallel_success.txt
@@ -0,0 +1,52 @@
+# Run parallel chatty tests. Assert on CONT lines. This test makes sure that
+# multiple parallel outputs have the appropriate CONT lines between them.
+go test -parallel 3 chatty_parallel_test.go -v
+stdout -count=2 '^=== CONT  TestChattyParallel/sub-0\n    chatty_parallel_test.go:32: this is sub-0$'
+stdout -count=2 '^=== CONT  TestChattyParallel/sub-1\n    chatty_parallel_test.go:32: this is sub-1$'
+stdout -count=2 '^=== CONT  TestChattyParallel/sub-2\n    chatty_parallel_test.go:32: this is sub-2$'
+
+# Run parallel chatty tests with -json. Assert on CONT lines as above - make
+# sure there are CONT lines before each output line.
+go test -json -parallel 3 chatty_parallel_test.go -v
+stdout -count=2 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-0","Output":"=== CONT  TestChattyParallel/sub-0\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-0","Output":"    chatty_parallel_test.go:32: this is sub-0\\n"}'
+stdout -count=2 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-1","Output":"=== CONT  TestChattyParallel/sub-1\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-1","Output":"    chatty_parallel_test.go:32: this is sub-1\\n"}'
+stdout -count=2 '{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-2","Output":"=== CONT  TestChattyParallel/sub-2\\n"}\n{"Time":"[0-9TZ:.+-]{20,40}","Action":"output","Package":"command-line-arguments","Test":"TestChattyParallel/sub-2","Output":"    chatty_parallel_test.go:32: this is sub-2\\n"}'
+
+-- chatty_parallel_test.go --
+package chatty_paralell_test
+
+import (
+	"testing"
+	"fmt"
+	"flag"
+)
+
+// This test ensures the the order of CONT lines in parallel chatty tests.
+func TestChattyParallel(t *testing.T) {
+	t.Parallel()
+
+	// The number of concurrent tests running. This is closely tied to the
+	// -parallel test flag, so we grab it from the flag rather than setting it
+	// to some constant.
+	parallel := flag.Lookup("test.parallel").Value.(flag.Getter).Get().(int)
+
+	// ready is a synchronization mechanism that causes subtests to execute
+	// round robin.
+	ready := make([]chan bool, parallel)
+	for i := range ready {
+		ready[i] = make(chan bool, 1)
+	}
+	ready[0] <- true
+
+	for i := range ready {
+		i := i
+		t.Run(fmt.Sprintf("sub-%d", i), func(t *testing.T) {
+			t.Parallel()
+			for j := 0; j < 2; j++ {
+				<-ready[i]
+				t.Logf("this is sub-%d", i)
+				ready[(i+1)%len(ready)] <- true
+			}
+		})
+	}
+}
diff --git a/src/cmd/go/testdata/script/test_chatty_success.txt b/src/cmd/go/testdata/script/test_chatty_success.txt
new file mode 100644
index 0000000..8bfa569
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_chatty_success.txt
@@ -0,0 +1,27 @@
+# Run chatty tests. Assert on CONT lines.
+go test chatty_test.go -v
+
+# Non-parallel tests should not print CONT.
+! stdout CONT
+
+# The assertion is condensed into one line so that it precisely matches output,
+# rather than skipping lines and allow rogue CONT lines.
+stdout '=== RUN   TestChatty\n=== RUN   TestChatty/sub-0\n    chatty_test.go:12: this is sub-0\n    chatty_test.go:12: this is sub-0\n=== RUN   TestChatty/sub-1\n    chatty_test.go:12: this is sub-1\n    chatty_test.go:12: this is sub-1\n=== RUN   TestChatty/sub-2\n    chatty_test.go:12: this is sub-2\n    chatty_test.go:12: this is sub-2\n--- PASS: TestChatty'
+
+-- chatty_test.go --
+package chatty_test
+
+import (
+	"testing"
+	"fmt"
+)
+
+func TestChatty(t *testing.T) {
+    for i := 0; i < 3; i++ {
+        t.Run(fmt.Sprintf("sub-%d", i), func(t *testing.T) {
+            for j := 0; j < 2; j++ {
+                t.Logf("this is sub-%d", i)
+            }
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/cmd/go/testdata/script/test_flags.txt b/src/cmd/go/testdata/script/test_flags.txt
index 226b8d1..27d718a 100644
--- a/src/cmd/go/testdata/script/test_flags.txt
+++ b/src/cmd/go/testdata/script/test_flags.txt
@@ -30,23 +30,23 @@
 # with the 'test.' prefix in the GOFLAGS entry...
 env GOFLAGS='-test.timeout=24h0m0s -count=1'
 go test -v -x ./x
-stdout 'TestLogTimeout: .*: 24h0m0s$'
+stdout '.*: 24h0m0s$'
 stderr '-test.count=1'
 
 # ...or without.
 env GOFLAGS='-timeout=24h0m0s -count=1'
 go test -v -x ./x
-stdout 'TestLogTimeout: .*: 24h0m0s$'
+stdout '.*: 24h0m0s$'
 stderr '-test.count=1'
 
 # Arguments from the command line should override GOFLAGS...
 go test -v -x -timeout=25h0m0s ./x
-stdout 'TestLogTimeout: .*: 25h0m0s$'
+stdout '.*: 25h0m0s$'
 stderr '-test.count=1'
 
 # ...even if they use a different flag name.
 go test -v -x -test.timeout=26h0m0s ./x
-stdout 'TestLogTimeout: .*: 26h0m0s$'
+stdout '.*: 26h0m0s$'
 stderr '-test\.timeout=26h0m0s'
 ! stderr 'timeout=24h0m0s'
 stderr '-test.count=1'
diff --git a/src/cmd/go/testdata/script/test_regexps.txt b/src/cmd/go/testdata/script/test_regexps.txt
index 39dedbf..a616195 100644
--- a/src/cmd/go/testdata/script/test_regexps.txt
+++ b/src/cmd/go/testdata/script/test_regexps.txt
@@ -4,28 +4,28 @@
 
 # TestX is run, twice
 stdout -count=2 '^=== RUN   TestX$'
-stdout -count=2 '^    TestX: x_test.go:6: LOG: X running$'
+stdout -count=2 '^    x_test.go:6: LOG: X running$'
 
 # TestX/Y is run, twice
 stdout -count=2 '^=== RUN   TestX/Y$'
-stdout -count=2 '^    TestX/Y: x_test.go:8: LOG: Y running$'
+stdout -count=2 '^    x_test.go:8: LOG: Y running$'
 
 # TestXX is run, twice
 stdout -count=2 '^=== RUN   TestXX$'
-stdout -count=2 '^    TestXX: z_test.go:10: LOG: XX running'
+stdout -count=2 '^    z_test.go:10: LOG: XX running'
 
 # TestZ is not run
 ! stdout '^=== RUN   TestZ$'
 
 # BenchmarkX is run with N=1 once, only to discover what sub-benchmarks it has,
 # and should not print a final summary line.
-stdout -count=1 '^\s+BenchmarkX: x_test.go:13: LOG: X running N=1$'
+stdout -count=1 '^    x_test.go:13: LOG: X running N=1$'
 ! stdout '^\s+BenchmarkX: x_test.go:13: LOG: X running N=\d\d+'
 ! stdout 'BenchmarkX\s+\d+'
 
 # Same for BenchmarkXX.
-stdout -count=1 '^\s+BenchmarkXX: z_test.go:18: LOG: XX running N=1$'
-! stdout  '^\s+BenchmarkXX: z_test.go:18: LOG: XX running N=\d\d+'
+stdout -count=1 '^    z_test.go:18: LOG: XX running N=1$'
+! stdout  '^    z_test.go:18: LOG: XX running N=\d\d+'
 ! stdout 'BenchmarkXX\s+\d+'
 
 # BenchmarkX/Y is run in full twice due to -count=2.
@@ -33,7 +33,7 @@
 # but may cap out at N=1e9.
 # We don't actually care what the final iteration count is, but it should be
 # a large number, and the last iteration count prints right before the results.
-stdout -count=2 '^\s+BenchmarkX/Y: x_test.go:15: LOG: Y running N=[1-9]\d{4,}\nBenchmarkX/Y\s+\d+'
+stdout -count=2 '^    x_test.go:15: LOG: Y running N=[1-9]\d{4,}\nBenchmarkX/Y\s+\d+'
 
 -- testregexp/x_test.go --
 package x
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index b0c8f91..429ea98 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -1423,6 +1423,7 @@
 		}
 	}
 
+	var altLinker string
 	if ctxt.IsELF && ctxt.DynlinkingGo() {
 		// We force all symbol resolution to be done at program startup
 		// because lazy PLT resolution can use large amounts of stack at
@@ -1434,6 +1435,11 @@
 		// from the beginning of the section (like sym.STYPE).
 		argv = append(argv, "-Wl,-znocopyreloc")
 
+		if objabi.GOOS == "android" {
+			// Use lld to avoid errors from default linker (issue #38838)
+			altLinker = "lld"
+		}
+
 		if ctxt.Arch.InFamily(sys.ARM, sys.ARM64) && objabi.GOOS == "linux" {
 			// On ARM, the GNU linker will generate COPY relocations
 			// even with -znocopyreloc set.
@@ -1443,7 +1449,7 @@
 			// generating COPY relocations.
 			//
 			// In both cases, switch to gold.
-			argv = append(argv, "-fuse-ld=gold")
+			altLinker = "gold"
 
 			// If gold is not installed, gcc will silently switch
 			// back to ld.bfd. So we parse the version information
@@ -1456,10 +1462,9 @@
 			}
 		}
 	}
-
 	if ctxt.Arch.Family == sys.ARM64 && objabi.GOOS == "freebsd" {
 		// Switch to ld.bfd on freebsd/arm64.
-		argv = append(argv, "-fuse-ld=bfd")
+		altLinker = "bfd"
 
 		// Provide a useful error if ld.bfd is missing.
 		cmd := exec.Command(*flagExtld, "-fuse-ld=bfd", "-Wl,--version")
@@ -1469,6 +1474,9 @@
 			}
 		}
 	}
+	if altLinker != "" {
+		argv = append(argv, "-fuse-ld="+altLinker)
+	}
 
 	if ctxt.IsELF && len(buildinfo) > 0 {
 		argv = append(argv, fmt.Sprintf("-Wl,--build-id=0x%x", buildinfo))
@@ -1505,7 +1513,7 @@
 	}
 
 	const compressDWARF = "-Wl,--compress-debug-sections=zlib-gnu"
-	if ctxt.compressDWARF && linkerFlagSupported(argv[0], compressDWARF) {
+	if ctxt.compressDWARF && linkerFlagSupported(argv[0], altLinker, compressDWARF) {
 		argv = append(argv, compressDWARF)
 	}
 
@@ -1595,7 +1603,7 @@
 	if ctxt.BuildMode == BuildModeExe && !ctxt.linkShared {
 		// GCC uses -no-pie, clang uses -nopie.
 		for _, nopie := range []string{"-no-pie", "-nopie"} {
-			if linkerFlagSupported(argv[0], nopie) {
+			if linkerFlagSupported(argv[0], altLinker, nopie) {
 				argv = append(argv, nopie)
 				break
 			}
@@ -1696,7 +1704,7 @@
 
 var createTrivialCOnce sync.Once
 
-func linkerFlagSupported(linker, flag string) bool {
+func linkerFlagSupported(linker, altLinker, flag string) bool {
 	createTrivialCOnce.Do(func() {
 		src := filepath.Join(*flagTmpdir, "trivial.c")
 		if err := ioutil.WriteFile(src, []byte("int main() { return 0; }"), 0666); err != nil {
@@ -1756,6 +1764,9 @@
 		}
 	}
 
+	if altLinker != "" {
+		flags = append(flags, "-fuse-ld="+altLinker)
+	}
 	flags = append(flags, flag, "trivial.c")
 
 	cmd := exec.Command(linker, flags...)
diff --git a/src/cmd/link/internal/ld/outbuf.go b/src/cmd/link/internal/ld/outbuf.go
index 4ce2111..09162ae 100644
--- a/src/cmd/link/internal/ld/outbuf.go
+++ b/src/cmd/link/internal/ld/outbuf.go
@@ -115,7 +115,6 @@
 	if out.isMmapped() {
 		out.copyHeap()
 		out.munmap()
-		return nil
 	}
 	if out.f == nil {
 		return nil
diff --git a/src/crypto/x509/x509.go b/src/crypto/x509/x509.go
index ecf4407..288c9c6 100644
--- a/src/crypto/x509/x509.go
+++ b/src/crypto/x509/x509.go
@@ -2100,7 +2100,7 @@
 		return nil, errors.New("x509: no SerialNumber given")
 	}
 
-	if template.BasicConstraintsValid && !template.IsCA && (template.MaxPathLen != 0 || template.MaxPathLenZero) {
+	if template.BasicConstraintsValid && !template.IsCA && template.MaxPathLen != -1 && (template.MaxPathLen != 0 || template.MaxPathLenZero) {
 		return nil, errors.New("x509: only CAs are allowed to specify MaxPathLen")
 	}
 
diff --git a/src/crypto/x509/x509_test.go b/src/crypto/x509/x509_test.go
index 7e00147..0141021 100644
--- a/src/crypto/x509/x509_test.go
+++ b/src/crypto/x509/x509_test.go
@@ -1674,11 +1674,15 @@
 		BasicConstraintsValid: true,
 		IsCA:                  false,
 	}
-	cert := serialiseAndParse(t, template)
-	if m := cert.MaxPathLen; m != -1 {
+	if m := serialiseAndParse(t, template).MaxPathLen; m != -1 {
 		t.Errorf("MaxPathLen should be -1 when IsCa is false, got %d", m)
 	}
 
+	template.MaxPathLen = -1
+	if m := serialiseAndParse(t, template).MaxPathLen; m != -1 {
+		t.Errorf("MaxPathLen should be -1 when IsCa is false and MaxPathLen set to -1, got %d", m)
+	}
+
 	template.MaxPathLen = 5
 	if _, err := CreateCertificate(rand.Reader, template, template, &testPrivateKey.PublicKey, testPrivateKey); err == nil {
 		t.Error("specifying a MaxPathLen when IsCA is false should fail")
@@ -1691,8 +1695,7 @@
 	}
 
 	template.BasicConstraintsValid = false
-	cert2 := serialiseAndParse(t, template)
-	if m := cert2.MaxPathLen; m != 0 {
+	if m := serialiseAndParse(t, template).MaxPathLen; m != 0 {
 		t.Errorf("Bad MaxPathLen should be ignored if BasicConstraintsValid is false, got %d", m)
 	}
 }
diff --git a/src/encoding/asn1/asn1.go b/src/encoding/asn1/asn1.go
index 90ba577..d809dde 100644
--- a/src/encoding/asn1/asn1.go
+++ b/src/encoding/asn1/asn1.go
@@ -1037,6 +1037,12 @@
 // Because Unmarshal uses the reflect package, the structs
 // being written to must use upper case field names.
 //
+// After parsing b, any bytes that were leftover and not used to fill
+// val will be returned in rest. When parsing a SEQUENCE into a struct,
+// any trailing elements of the SEQUENCE that do not have matching
+// fields in val will not be included in rest, as these are considered
+// valid elements of the SEQUENCE and not trailing data.
+//
 // An ASN.1 INTEGER can be written to an int, int32, int64,
 // or *big.Int (from the math/big package).
 // If the encoded value does not fit in the Go type,
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 5acc6d8..5f34af4 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -677,6 +677,7 @@
 		return nil
 	}
 
+	var mapElem reflect.Value
 	origErrorContext := d.errorContext
 
 	for {
@@ -700,66 +701,17 @@
 		}
 
 		// Figure out field corresponding to key.
-		var kv, subv reflect.Value
+		var subv reflect.Value
 		destring := false // whether the value is wrapped in a string to be decoded first
 
 		if v.Kind() == reflect.Map {
-			// First, figure out the key value from the input.
-			kt := t.Key()
-			switch {
-			case reflect.PtrTo(kt).Implements(textUnmarshalerType):
-				kv = reflect.New(kt)
-				if err := d.literalStore(item, kv, true); err != nil {
-					return err
-				}
-				kv = kv.Elem()
-			case kt.Kind() == reflect.String:
-				kv = reflect.ValueOf(key).Convert(kt)
-			default:
-				switch kt.Kind() {
-				case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
-					s := string(key)
-					n, err := strconv.ParseInt(s, 10, 64)
-					if err != nil || reflect.Zero(kt).OverflowInt(n) {
-						d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
-						break
-					}
-					kv = reflect.ValueOf(n).Convert(kt)
-				case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
-					s := string(key)
-					n, err := strconv.ParseUint(s, 10, 64)
-					if err != nil || reflect.Zero(kt).OverflowUint(n) {
-						d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
-						break
-					}
-					kv = reflect.ValueOf(n).Convert(kt)
-				default:
-					panic("json: Unexpected key type") // should never occur
-				}
+			elemType := t.Elem()
+			if !mapElem.IsValid() {
+				mapElem = reflect.New(elemType).Elem()
+			} else {
+				mapElem.Set(reflect.Zero(elemType))
 			}
-
-			// Then, decide what element value we'll decode into.
-			et := t.Elem()
-			if kv.IsValid() {
-				if existing := v.MapIndex(kv); !existing.IsValid() {
-					// Nothing to reuse.
-				} else if et.Kind() == reflect.Ptr {
-					// Pointer; decode directly into it if non-nil.
-					if !existing.IsNil() {
-						subv = existing
-					}
-				} else {
-					// Non-pointer. Make a copy and decode into the
-					// addressable copy. Don't just use a new/zero
-					// value, as that would lose existing data.
-					subv = reflect.New(et).Elem()
-					subv.Set(existing)
-				}
-			}
-			if !subv.IsValid() {
-				// We couldn't reuse an existing value.
-				subv = reflect.New(et).Elem()
-			}
+			subv = mapElem
 		} else {
 			var f *field
 			if i, ok := fields.nameIndex[string(key)]; ok {
@@ -838,8 +790,43 @@
 
 		// Write value back to map;
 		// if using struct, subv points into struct already.
-		if v.Kind() == reflect.Map && kv.IsValid() {
-			v.SetMapIndex(kv, subv)
+		if v.Kind() == reflect.Map {
+			kt := t.Key()
+			var kv reflect.Value
+			switch {
+			case reflect.PtrTo(kt).Implements(textUnmarshalerType):
+				kv = reflect.New(kt)
+				if err := d.literalStore(item, kv, true); err != nil {
+					return err
+				}
+				kv = kv.Elem()
+			case kt.Kind() == reflect.String:
+				kv = reflect.ValueOf(key).Convert(kt)
+			default:
+				switch kt.Kind() {
+				case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+					s := string(key)
+					n, err := strconv.ParseInt(s, 10, 64)
+					if err != nil || reflect.Zero(kt).OverflowInt(n) {
+						d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
+						break
+					}
+					kv = reflect.ValueOf(n).Convert(kt)
+				case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+					s := string(key)
+					n, err := strconv.ParseUint(s, 10, 64)
+					if err != nil || reflect.Zero(kt).OverflowUint(n) {
+						d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
+						break
+					}
+					kv = reflect.ValueOf(n).Convert(kt)
+				default:
+					panic("json: Unexpected key type") // should never occur
+				}
+			}
+			if kv.IsValid() {
+				v.SetMapIndex(kv, subv)
+			}
 		}
 
 		// Next token must be , or }.
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go
index a62488d..5ac1022 100644
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -2569,57 +2569,3 @@
 		}
 	}
 }
-
-func TestUnmarshalMapPointerElem(t *testing.T) {
-	type S struct{ Unchanged, Changed int }
-	input := []byte(`{"S":{"Changed":5}}`)
-	want := S{1, 5}
-
-	// First, a map with struct pointer elements. The key-value pair exists,
-	// so reuse the existing value.
-	s := &S{1, 2}
-	ptrMap := map[string]*S{"S": s}
-	if err := Unmarshal(input, &ptrMap); err != nil {
-		t.Fatal(err)
-	}
-	if s != ptrMap["S"] {
-		t.Fatal("struct pointer element in map was completely replaced")
-	}
-	if got := *s; got != want {
-		t.Fatalf("want %#v, got %#v", want, got)
-	}
-
-	// Second, a map with struct elements. The key-value pair exists, but
-	// the value isn't addresable, so make a copy and use that.
-	s = &S{1, 2}
-	strMap := map[string]S{"S": *s}
-	if err := Unmarshal(input, &strMap); err != nil {
-		t.Fatal(err)
-	}
-	if *s == strMap["S"] {
-		t.Fatal("struct element in map wasn't copied")
-	}
-	if got := strMap["S"]; got != want {
-		t.Fatalf("want %#v, got %#v", want, got)
-	}
-
-	// Finally, check the cases where the key-value pair exists, but the
-	// value is zero.
-	want = S{0, 5}
-
-	ptrMap = map[string]*S{"S": nil}
-	if err := Unmarshal(input, &ptrMap); err != nil {
-		t.Fatal(err)
-	}
-	if got := *ptrMap["S"]; got != want {
-		t.Fatalf("want %#v, got %#v", want, got)
-	}
-
-	strMap = map[string]S{"S": {}}
-	if err := Unmarshal(input, &strMap); err != nil {
-		t.Fatal(err)
-	}
-	if got := strMap["S"]; got != want {
-		t.Fatalf("want %#v, got %#v", want, got)
-	}
-}
diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go
index 2440a51..d8a04a9 100644
--- a/src/encoding/xml/marshal.go
+++ b/src/encoding/xml/marshal.go
@@ -482,8 +482,11 @@
 		xmlname := tinfo.xmlname
 		if xmlname.name != "" {
 			start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name
-		} else if v, ok := xmlname.value(val).Interface().(Name); ok && v.Local != "" {
-			start.Name = v
+		} else {
+			fv := xmlname.value(val, dontInitNilPointers)
+			if v, ok := fv.Interface().(Name); ok && v.Local != "" {
+				start.Name = v
+			}
 		}
 	}
 	if start.Name.Local == "" && finfo != nil {
@@ -503,7 +506,7 @@
 		if finfo.flags&fAttr == 0 {
 			continue
 		}
-		fv := finfo.value(val)
+		fv := finfo.value(val, dontInitNilPointers)
 
 		if finfo.flags&fOmitEmpty != 0 && isEmptyValue(fv) {
 			continue
@@ -806,7 +809,12 @@
 		if finfo.flags&fAttr != 0 {
 			continue
 		}
-		vf := finfo.value(val)
+		vf := finfo.value(val, dontInitNilPointers)
+		if !vf.IsValid() {
+			// The field is behind an anonymous struct field that's
+			// nil. Skip it.
+			continue
+		}
 
 		switch finfo.flags & fMode {
 		case fCDATA, fCharData:
diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go
index 6085ddb..d2e5137 100644
--- a/src/encoding/xml/marshal_test.go
+++ b/src/encoding/xml/marshal_test.go
@@ -309,6 +309,11 @@
 	Contents *string `xml:",chardata"`
 }
 
+type PointerAnonFields struct {
+	*MyInt
+	*NamedType
+}
+
 type MyMarshalerTest struct {
 }
 
@@ -889,6 +894,18 @@
 			`</EmbedA>`,
 	},
 
+	// Anonymous struct pointer field which is nil
+	{
+		Value:     &EmbedB{},
+		ExpectXML: `<EmbedB><FieldB></FieldB></EmbedB>`,
+	},
+
+	// Other kinds of nil anonymous fields
+	{
+		Value:     &PointerAnonFields{},
+		ExpectXML: `<PointerAnonFields></PointerAnonFields>`,
+	},
+
 	// Test that name casing matters
 	{
 		Value:     &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"},
diff --git a/src/encoding/xml/read.go b/src/encoding/xml/read.go
index 10a60ee..ef5df3f 100644
--- a/src/encoding/xml/read.go
+++ b/src/encoding/xml/read.go
@@ -435,7 +435,7 @@
 				}
 				return UnmarshalError(e)
 			}
-			fv := finfo.value(sv)
+			fv := finfo.value(sv, initNilPointers)
 			if _, ok := fv.Interface().(Name); ok {
 				fv.Set(reflect.ValueOf(start.Name))
 			}
@@ -449,7 +449,7 @@
 				finfo := &tinfo.fields[i]
 				switch finfo.flags & fMode {
 				case fAttr:
-					strv := finfo.value(sv)
+					strv := finfo.value(sv, initNilPointers)
 					if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) {
 						if err := d.unmarshalAttr(strv, a); err != nil {
 							return err
@@ -465,7 +465,7 @@
 			}
 			if !handled && any >= 0 {
 				finfo := &tinfo.fields[any]
-				strv := finfo.value(sv)
+				strv := finfo.value(sv, initNilPointers)
 				if err := d.unmarshalAttr(strv, a); err != nil {
 					return err
 				}
@@ -478,22 +478,22 @@
 			switch finfo.flags & fMode {
 			case fCDATA, fCharData:
 				if !saveData.IsValid() {
-					saveData = finfo.value(sv)
+					saveData = finfo.value(sv, initNilPointers)
 				}
 
 			case fComment:
 				if !saveComment.IsValid() {
-					saveComment = finfo.value(sv)
+					saveComment = finfo.value(sv, initNilPointers)
 				}
 
 			case fAny, fAny | fElement:
 				if !saveAny.IsValid() {
-					saveAny = finfo.value(sv)
+					saveAny = finfo.value(sv, initNilPointers)
 				}
 
 			case fInnerXML:
 				if !saveXML.IsValid() {
-					saveXML = finfo.value(sv)
+					saveXML = finfo.value(sv, initNilPointers)
 					if d.saved == nil {
 						saveXMLIndex = 0
 						d.saved = new(bytes.Buffer)
@@ -687,7 +687,7 @@
 		}
 		if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local {
 			// It's a perfect match, unmarshal the field.
-			return true, d.unmarshal(finfo.value(sv), start)
+			return true, d.unmarshal(finfo.value(sv, initNilPointers), start)
 		}
 		if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local {
 			// It's a prefix for the field. Break and recurse
diff --git a/src/encoding/xml/typeinfo.go b/src/encoding/xml/typeinfo.go
index 639952c..f30fe58 100644
--- a/src/encoding/xml/typeinfo.go
+++ b/src/encoding/xml/typeinfo.go
@@ -344,15 +344,25 @@
 	return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2)
 }
 
+const (
+	initNilPointers     = true
+	dontInitNilPointers = false
+)
+
 // value returns v's field value corresponding to finfo.
-// It's equivalent to v.FieldByIndex(finfo.idx), but initializes
-// and dereferences pointers as necessary.
-func (finfo *fieldInfo) value(v reflect.Value) reflect.Value {
+// It's equivalent to v.FieldByIndex(finfo.idx), but when passed
+// initNilPointers, it initializes and dereferences pointers as necessary.
+// When passed dontInitNilPointers and a nil pointer is reached, the function
+// returns a zero reflect.Value.
+func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value {
 	for i, x := range finfo.idx {
 		if i > 0 {
 			t := v.Type()
 			if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct {
 				if v.IsNil() {
+					if !shouldInitNilPointers {
+						return reflect.Value{}
+					}
 					v.Set(reflect.New(v.Type().Elem()))
 				}
 				v = v.Elem()
diff --git a/src/go/build/build_test.go b/src/go/build/build_test.go
index 7151ba1..a7f2a3e 100644
--- a/src/go/build/build_test.go
+++ b/src/go/build/build_test.go
@@ -5,6 +5,7 @@
 package build
 
 import (
+	"flag"
 	"internal/testenv"
 	"io"
 	"io/ioutil"
@@ -16,6 +17,14 @@
 	"testing"
 )
 
+func TestMain(m *testing.M) {
+	flag.Parse()
+	if goTool, err := testenv.GoTool(); err == nil {
+		os.Setenv("PATH", filepath.Dir(goTool)+string(os.PathListSeparator)+os.Getenv("PATH"))
+	}
+	os.Exit(m.Run())
+}
+
 func TestMatch(t *testing.T) {
 	ctxt := Default
 	what := "default"
diff --git a/src/go/internal/srcimporter/srcimporter_test.go b/src/go/internal/srcimporter/srcimporter_test.go
index c456b8e..102ac43 100644
--- a/src/go/internal/srcimporter/srcimporter_test.go
+++ b/src/go/internal/srcimporter/srcimporter_test.go
@@ -5,11 +5,13 @@
 package srcimporter
 
 import (
+	"flag"
 	"go/build"
 	"go/token"
 	"go/types"
 	"internal/testenv"
 	"io/ioutil"
+	"os"
 	"path"
 	"path/filepath"
 	"runtime"
@@ -18,6 +20,14 @@
 	"time"
 )
 
+func TestMain(m *testing.M) {
+	flag.Parse()
+	if goTool, err := testenv.GoTool(); err == nil {
+		os.Setenv("PATH", filepath.Dir(goTool)+string(os.PathListSeparator)+os.Getenv("PATH"))
+	}
+	os.Exit(m.Run())
+}
+
 const maxTime = 2 * time.Second
 
 var importer = New(&build.Default, token.NewFileSet(), make(map[string]*types.Package))
diff --git a/src/math/exp_amd64.s b/src/math/exp_amd64.s
index 525745d..b3e1c22 100644
--- a/src/math/exp_amd64.s
+++ b/src/math/exp_amd64.s
@@ -8,7 +8,7 @@
 // methods of elementary functions suitable for SIMD computation", Proc.
 // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
 // (May 2010). The paper is available at
-// https://www.springerlink.com/content/340228x165742104/
+// https://link.springer.com/article/10.1007/s00450-010-0108-2
 //
 // The original code and the constants below are from the author's
 // implementation available at http://freshmeat.net/projects/sleef.
diff --git a/src/net/http/request.go b/src/net/http/request.go
index e386f13..e924e2a 100644
--- a/src/net/http/request.go
+++ b/src/net/http/request.go
@@ -425,6 +425,8 @@
 // AddCookie does not attach more than one Cookie header field. That
 // means all cookies, if any, are written into the same line,
 // separated by semicolon.
+// AddCookie only sanitizes c's name and value, and does not sanitize
+// a Cookie header already present in the request.
 func (r *Request) AddCookie(c *Cookie) {
 	s := fmt.Sprintf("%s=%s", sanitizeCookieName(c.Name), sanitizeCookieValue(c.Value))
 	if c := r.Header.Get("Cookie"); c != "" {
diff --git a/src/net/http/transfer.go b/src/net/http/transfer.go
index 6d5ea05..9019afb 100644
--- a/src/net/http/transfer.go
+++ b/src/net/http/transfer.go
@@ -335,7 +335,7 @@
 	var ncopy int64
 
 	// Write body. We "unwrap" the body first if it was wrapped in a
-	// nopCloser. This is to ensure that we can take advantage of
+	// nopCloser or readTrackingBody. This is to ensure that we can take advantage of
 	// OS-level optimizations in the event that the body is an
 	// *os.File.
 	if t.Body != nil {
@@ -413,7 +413,10 @@
 	if reflect.TypeOf(t.Body) == nopCloserType {
 		return reflect.ValueOf(t.Body).Field(0).Interface().(io.Reader)
 	}
-
+	if r, ok := t.Body.(*readTrackingBody); ok {
+		r.didRead = true
+		return r.ReadCloser
+	}
 	return t.Body
 }
 
@@ -1075,6 +1078,9 @@
 	if reflect.TypeOf(r) == nopCloserType {
 		return isKnownInMemoryReader(reflect.ValueOf(r).Field(0).Interface().(io.Reader))
 	}
+	if r, ok := r.(*readTrackingBody); ok {
+		return isKnownInMemoryReader(r.ReadCloser)
+	}
 	return false
 }
 
diff --git a/src/net/http/transport.go b/src/net/http/transport.go
index b1705d5..da86b261 100644
--- a/src/net/http/transport.go
+++ b/src/net/http/transport.go
@@ -511,10 +511,17 @@
 		}
 	}
 
+	req = setupRewindBody(req)
+
 	if altRT := t.alternateRoundTripper(req); altRT != nil {
 		if resp, err := altRT.RoundTrip(req); err != ErrSkipAltProtocol {
 			return resp, err
 		}
+		var err error
+		req, err = rewindBody(req)
+		if err != nil {
+			return nil, err
+		}
 	}
 	if !isHTTP {
 		req.closeBody()
@@ -584,18 +591,59 @@
 		testHookRoundTripRetried()
 
 		// Rewind the body if we're able to.
-		if req.GetBody != nil {
-			newReq := *req
-			var err error
-			newReq.Body, err = req.GetBody()
-			if err != nil {
-				return nil, err
-			}
-			req = &newReq
+		req, err = rewindBody(req)
+		if err != nil {
+			return nil, err
 		}
 	}
 }
 
+var errCannotRewind = errors.New("net/http: cannot rewind body after connection loss")
+
+type readTrackingBody struct {
+	io.ReadCloser
+	didRead bool
+}
+
+func (r *readTrackingBody) Read(data []byte) (int, error) {
+	r.didRead = true
+	return r.ReadCloser.Read(data)
+}
+
+// setupRewindBody returns a new request with a custom body wrapper
+// that can report whether the body needs rewinding.
+// This lets rewindBody avoid an error result when the request
+// does not have GetBody but the body hasn't been read at all yet.
+func setupRewindBody(req *Request) *Request {
+	if req.Body == nil || req.Body == NoBody {
+		return req
+	}
+	newReq := *req
+	newReq.Body = &readTrackingBody{ReadCloser: req.Body}
+	return &newReq
+}
+
+// rewindBody returns a new request with the body rewound.
+// It returns req unmodified if the body does not need rewinding.
+// rewindBody takes care of closing req.Body when appropriate
+// (in all cases except when rewindBody returns req unmodified).
+func rewindBody(req *Request) (rewound *Request, err error) {
+	if req.Body == nil || req.Body == NoBody || !req.Body.(*readTrackingBody).didRead {
+		return req, nil // nothing to rewind
+	}
+	req.closeBody()
+	if req.GetBody == nil {
+		return nil, errCannotRewind
+	}
+	body, err := req.GetBody()
+	if err != nil {
+		return nil, err
+	}
+	newReq := *req
+	newReq.Body = &readTrackingBody{ReadCloser: body}
+	return &newReq, nil
+}
+
 // shouldRetryRequest reports whether we should retry sending a failed
 // HTTP request on a new connection. The non-nil input error is the
 // error from roundTrip.
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index f4014d9..5ccb3d1 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -6196,3 +6196,29 @@
 		return nil, errors.New("request was not canceled")
 	}
 }
+
+type roundTripFunc func(r *Request) (*Response, error)
+
+func (f roundTripFunc) RoundTrip(r *Request) (*Response, error) { return f(r) }
+
+// Issue 32441: body is not reset after ErrSkipAltProtocol
+func TestIssue32441(t *testing.T) {
+	defer afterTest(t)
+	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+		if n, _ := io.Copy(ioutil.Discard, r.Body); n == 0 {
+			t.Error("body length is zero")
+		}
+	}))
+	defer ts.Close()
+	c := ts.Client()
+	c.Transport.(*Transport).RegisterProtocol("http", roundTripFunc(func(r *Request) (*Response, error) {
+		// Draining body to trigger failure condition on actual request to server.
+		if n, _ := io.Copy(ioutil.Discard, r.Body); n == 0 {
+			t.Error("body length is zero during round trip")
+		}
+		return nil, ErrSkipAltProtocol
+	}))
+	if _, err := c.Post(ts.URL, "application/octet-stream", bytes.NewBufferString("data")); err != nil {
+		t.Error(err)
+	}
+}
diff --git a/src/os/file.go b/src/os/file.go
index 93ba4d7..a2b71cb 100644
--- a/src/os/file.go
+++ b/src/os/file.go
@@ -384,7 +384,7 @@
 // within this one and use that.
 //
 // On Unix systems, it returns $XDG_CACHE_HOME as specified by
-// https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html if
+// https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html if
 // non-empty, else $HOME/.cache.
 // On Darwin, it returns $HOME/Library/Caches.
 // On Windows, it returns %LocalAppData%.
diff --git a/src/os/file_windows.go b/src/os/file_windows.go
index 0d8c0fd..cc695fd 100644
--- a/src/os/file_windows.go
+++ b/src/os/file_windows.go
@@ -330,8 +330,18 @@
 
 	// need the exact location of the oldname when it's relative to determine if it's a directory
 	destpath := oldname
-	if !isAbs(oldname) {
-		destpath = dirname(newname) + `\` + oldname
+	if v := volumeName(oldname); v == "" {
+		if len(oldname) > 0 && IsPathSeparator(oldname[0]) {
+			// oldname is relative to the volume containing newname.
+			if v = volumeName(newname); v != "" {
+				// Prepend the volume explicitly, because it may be different from the
+				// volume of the current working directory.
+				destpath = v + oldname
+			}
+		} else {
+			// oldname is relative to newname.
+			destpath = dirname(newname) + `\` + oldname
+		}
 	}
 
 	fi, err := Stat(destpath)
diff --git a/src/os/os_windows_test.go b/src/os/os_windows_test.go
index 8c14103..f03ec75 100644
--- a/src/os/os_windows_test.go
+++ b/src/os/os_windows_test.go
@@ -965,9 +965,10 @@
 // works on Windows when developer mode is active.
 // This is supported starting Windows 10 (1703, v10.0.14972).
 func TestSymlinkCreation(t *testing.T) {
-	if !isWindowsDeveloperModeActive() {
+	if !testenv.HasSymlink() && !isWindowsDeveloperModeActive() {
 		t.Skip("Windows developer mode is not active")
 	}
+	t.Parallel()
 
 	temp, err := ioutil.TempDir("", "TestSymlinkCreation")
 	if err != nil {
@@ -1005,6 +1006,122 @@
 	return val != 0
 }
 
+// TestRootRelativeDirSymlink verifies that symlinks to paths relative to the
+// drive root (beginning with "\" but no volume name) are created with the
+// correct symlink type.
+// (See https://golang.org/issue/39183#issuecomment-632175728.)
+func TestRootRelativeDirSymlink(t *testing.T) {
+	testenv.MustHaveSymlink(t)
+	t.Parallel()
+
+	temp := t.TempDir()
+	dir := filepath.Join(temp, "dir")
+	if err := os.Mkdir(dir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	volumeRelDir := strings.TrimPrefix(dir, filepath.VolumeName(dir)) // leaves leading backslash
+
+	link := filepath.Join(temp, "link")
+	err := os.Symlink(volumeRelDir, link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Symlink(%#q, %#q)", volumeRelDir, link)
+
+	f, err := os.Open(link)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	if fi, err := f.Stat(); err != nil {
+		t.Fatal(err)
+	} else if !fi.IsDir() {
+		t.Errorf("Open(%#q).Stat().IsDir() = false; want true", f.Name())
+	}
+}
+
+// TestWorkingDirectoryRelativeSymlink verifies that symlinks to paths relative
+// to the current working directory for the drive, such as "C:File.txt", are
+// correctly converted to absolute links of the correct symlink type (per
+// https://docs.microsoft.com/en-us/windows/win32/fileio/creating-symbolic-links).
+func TestWorkingDirectoryRelativeSymlink(t *testing.T) {
+	testenv.MustHaveSymlink(t)
+
+	// Construct a directory to be symlinked.
+	temp := t.TempDir()
+	if v := filepath.VolumeName(temp); len(v) < 2 || v[1] != ':' {
+		t.Skipf("Can't test relative symlinks: t.TempDir() (%#q) does not begin with a drive letter.", temp)
+	}
+
+	absDir := filepath.Join(temp, `dir\sub`)
+	if err := os.MkdirAll(absDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Change to the temporary directory and construct a
+	// working-directory-relative symlink.
+	oldwd, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		if err := os.Chdir(oldwd); err != nil {
+			t.Fatal(err)
+		}
+	}()
+	if err := os.Chdir(temp); err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Chdir(%#q)", temp)
+
+	wdRelDir := filepath.VolumeName(temp) + `dir\sub` // no backslash after volume.
+	absLink := filepath.Join(temp, "link")
+	err = os.Symlink(wdRelDir, absLink)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Symlink(%#q, %#q)", wdRelDir, absLink)
+
+	// Now change back to the original working directory and verify that the
+	// symlink still refers to its original path and is correctly marked as a
+	// directory.
+	if err := os.Chdir(oldwd); err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Chdir(%#q)", oldwd)
+
+	resolved, err := os.Readlink(absLink)
+	if err != nil {
+		t.Errorf("Readlink(%#q): %v", absLink, err)
+	} else if resolved != absDir {
+		t.Errorf("Readlink(%#q) = %#q; want %#q", absLink, resolved, absDir)
+	}
+
+	linkFile, err := os.Open(absLink)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer linkFile.Close()
+
+	linkInfo, err := linkFile.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !linkInfo.IsDir() {
+		t.Errorf("Open(%#q).Stat().IsDir() = false; want true", absLink)
+	}
+
+	absInfo, err := os.Stat(absDir)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !os.SameFile(absInfo, linkInfo) {
+		t.Errorf("SameFile(Stat(%#q), Open(%#q).Stat()) = false; want true", absDir, absLink)
+	}
+}
+
 // TestStatOfInvalidName is regression test for issue #24999.
 func TestStatOfInvalidName(t *testing.T) {
 	_, err := os.Stat("*.go")
diff --git a/src/run.bat b/src/run.bat
index 896c4ac..69c1818 100644
--- a/src/run.bat
+++ b/src/run.bat
@@ -30,7 +30,7 @@
 
 if x%1==x--no-rebuild goto norebuild
 echo ##### Building packages and commands.
-go install -a -v std cmd
+..\bin\go install -a -v std cmd
 if errorlevel 1 goto fail
 echo.
 :norebuild
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 5333b60..34f30c9 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -55,6 +55,16 @@
 		t.Fatal(err)
 	}
 
+	return runBuiltTestProg(t, exe, name, env...)
+}
+
+func runBuiltTestProg(t *testing.T, exe, name string, env ...string) string {
+	if *flagQuick {
+		t.Skip("-quick")
+	}
+
+	testenv.MustHaveGoBuild(t)
+
 	cmd := testenv.CleanCmdEnv(exec.Command(exe, name))
 	cmd.Env = append(cmd.Env, env...)
 	if testing.Short() {
@@ -64,7 +74,7 @@
 	cmd.Stdout = &b
 	cmd.Stderr = &b
 	if err := cmd.Start(); err != nil {
-		t.Fatalf("starting %s %s: %v", binary, name, err)
+		t.Fatalf("starting %s %s: %v", exe, name, err)
 	}
 
 	// If the process doesn't complete within 1 minute,
@@ -92,7 +102,7 @@
 	}()
 
 	if err := cmd.Wait(); err != nil {
-		t.Logf("%s %s exit status: %v", binary, name, err)
+		t.Logf("%s %s exit status: %v", exe, name, err)
 	}
 	close(done)
 
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 4c281ce..c5c8a4c 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -12,6 +12,7 @@
 	"runtime"
 	"runtime/debug"
 	"sort"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"testing"
@@ -192,6 +193,15 @@
 	}
 }
 
+func TestGcZombieReporting(t *testing.T) {
+	// This test is somewhat sensitive to how the allocator works.
+	got := runTestProg(t, "testprog", "GCZombie")
+	want := "found pointer to free object"
+	if !strings.Contains(got, want) {
+		t.Fatalf("expected %q in output, but got %q", want, got)
+	}
+}
+
 func BenchmarkSetTypePtr(b *testing.B) {
 	benchSetType(b, new(*byte))
 }
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index f9b03d3..3aa3afc 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -439,10 +439,31 @@
 		}
 	}
 
+	// Check for zombie objects.
+	if s.freeindex < s.nelems {
+		// Everything < freeindex is allocated and hence
+		// cannot be zombies.
+		//
+		// Check the first bitmap byte, where we have to be
+		// careful with freeindex.
+		obj := s.freeindex
+		if (*s.gcmarkBits.bytep(obj / 8)&^*s.allocBits.bytep(obj / 8))>>(obj%8) != 0 {
+			s.reportZombies()
+		}
+		// Check remaining bytes.
+		for i := obj/8 + 1; i < divRoundUp(s.nelems, 8); i++ {
+			if *s.gcmarkBits.bytep(i)&^*s.allocBits.bytep(i) != 0 {
+				s.reportZombies()
+			}
+		}
+	}
+
 	// Count the number of free objects in this span.
 	nalloc := uint16(s.countAlloc())
 	nfreed := s.allocCount - nalloc
 	if nalloc > s.allocCount {
+		// The zombie check above should have caught this in
+		// more detail.
 		print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
 		throw("sweep increased allocation count")
 	}
@@ -755,6 +776,57 @@
 	return res
 }
 
+// reportZombies reports any marked but free objects in s and throws.
+//
+// This generally means one of the following:
+//
+// 1. User code converted a pointer to a uintptr and then back
+// unsafely, and a GC ran while the uintptr was the only reference to
+// an object.
+//
+// 2. User code (or a compiler bug) constructed a bad pointer that
+// points to a free slot, often a past-the-end pointer.
+//
+// 3. The GC two cycles ago missed a pointer and freed a live object,
+// but it was still live in the last cycle, so this GC cycle found a
+// pointer to that object and marked it.
+func (s *mspan) reportZombies() {
+	printlock()
+	print("runtime: marked free object in span ", s, ", elemsize=", s.elemsize, " freeindex=", s.freeindex, " (bad use of unsafe.Pointer? try -d=checkptr)\n")
+	mbits := s.markBitsForBase()
+	abits := s.allocBitsForIndex(0)
+	for i := uintptr(0); i < s.nelems; i++ {
+		addr := s.base() + i*s.elemsize
+		print(hex(addr))
+		alloc := i < s.freeindex || abits.isMarked()
+		if alloc {
+			print(" alloc")
+		} else {
+			print(" free ")
+		}
+		if mbits.isMarked() {
+			print(" marked  ")
+		} else {
+			print(" unmarked")
+		}
+		zombie := mbits.isMarked() && !alloc
+		if zombie {
+			print(" zombie")
+		}
+		print("\n")
+		if zombie {
+			length := s.elemsize
+			if length > 1024 {
+				length = 1024
+			}
+			hexdumpWords(addr, addr+length, nil)
+		}
+		mbits.advance()
+		abits.advance()
+	}
+	throw("found pointer to free object")
+}
+
 // deductSweepCredit deducts sweep credit for allocating a span of
 // size spanBytes. This must be performed *before* the span is
 // allocated to ensure the system has enough credit. If necessary, it
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index b423026..e5823dd 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1820,10 +1820,16 @@
 	if GOARCH == "wasm" { // no threads on wasm yet
 		return
 	}
+
+	// Disable preemption to guarantee that the template thread will be
+	// created before a park once haveTemplateThread is set.
+	mp := acquirem()
 	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
+		releasem(mp)
 		return
 	}
 	newm(templateThread, nil)
+	releasem(mp)
 }
 
 // templateThread is a thread in a known-good state that exists solely
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 764a279..de4dec3 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -7,6 +7,7 @@
 import (
 	"fmt"
 	"internal/race"
+	"internal/testenv"
 	"math"
 	"net"
 	"runtime"
@@ -929,6 +930,29 @@
 	}
 }
 
+func TestLockOSThreadTemplateThreadRace(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+
+	exe, err := buildTestProg(t, "testprog")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	iterations := 100
+	if testing.Short() {
+		// Reduce run time to ~100ms, with much lower probability of
+		// catching issues.
+		iterations = 5
+	}
+	for i := 0; i < iterations; i++ {
+		want := "OK\n"
+		output := runBuiltTestProg(t, exe, "LockOSThreadTemplateThreadRace")
+		if output != want {
+			t.Fatalf("run %d: want %q, got %q", i, want, output)
+		}
+	}
+}
+
 // fakeSyscall emulates a system call.
 //go:nosplit
 func fakeSyscall(duration time.Duration) {
diff --git a/src/runtime/rt0_openbsd_arm64.s b/src/runtime/rt0_openbsd_arm64.s
index ab8ea97..12408f2 100644
--- a/src/runtime/rt0_openbsd_arm64.s
+++ b/src/runtime/rt0_openbsd_arm64.s
@@ -4,6 +4,12 @@
 
 #include "textflag.h"
 
+// See comment in runtime/sys_openbsd_arm64.s re this construction.
+#define	INVOKE_SYSCALL	\
+	SVC;		\
+	NOOP;		\
+	NOOP
+
 TEXT _rt0_arm64_openbsd(SB),NOSPLIT|NOFRAME,$0
 	MOVD	0(RSP), R0	// argc
 	ADD	$8, RSP, R1	// argv
@@ -101,5 +107,5 @@
 exit:
 	MOVD	$0, R0
 	MOVD	$1, R8		// sys_exit
-	SVC
+	INVOKE_SYSCALL
 	B	exit
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index bb625aa..2818ada 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -241,8 +241,11 @@
 		"-ex", "echo END\n",
 		filepath.Join(dir, "a.exe"),
 	)
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
-	t.Logf("gdb output: %s\n", got)
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	firstLine := bytes.SplitN(got, []byte("\n"), 2)[0]
 	if string(firstLine) != "Loading Go Runtime support." {
@@ -388,7 +391,11 @@
 		"-ex", "continue",
 		filepath.Join(dir, "a.exe"),
 	}
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	// Check that the backtrace matches the source code.
 	bt := []string{
@@ -403,8 +410,7 @@
 		s := fmt.Sprintf("#%v.*main\\.%v", i, name)
 		re := regexp.MustCompile(s)
 		if found := re.Find(got) != nil; !found {
-			t.Errorf("could not find '%v' in backtrace", s)
-			t.Fatalf("gdb output:\n%v", string(got))
+			t.Fatalf("could not find '%v' in backtrace", s)
 		}
 	}
 }
@@ -463,7 +469,11 @@
 		"-ex", "info types astruct",
 		filepath.Join(dir, "a.exe"),
 	}
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	sgot := string(got)
 
@@ -477,8 +487,7 @@
 	}
 	for _, name := range types {
 		if !strings.Contains(sgot, name) {
-			t.Errorf("could not find %s in 'info typrs astruct' output", name)
-			t.Fatalf("gdb output:\n%v", sgot)
+			t.Fatalf("could not find %s in 'info typrs astruct' output", name)
 		}
 	}
 }
@@ -532,12 +541,14 @@
 		"-ex", "print 'runtime._PageSize'",
 		filepath.Join(dir, "a.exe"),
 	}
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	sgot := strings.ReplaceAll(string(got), "\r\n", "\n")
 
-	t.Logf("output %q", sgot)
-
 	if !strings.Contains(sgot, "\n$1 = 42\n$2 = 18446744073709551615\n$3 = -1\n$4 = 1 '\\001'\n$5 = 8192") {
 		t.Fatalf("output mismatch")
 	}
@@ -592,7 +603,11 @@
 		"-ex", "backtrace",
 		filepath.Join(dir, "a.exe"),
 	}
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	// Check that the backtrace matches the source code.
 	bt := []string{
@@ -603,8 +618,7 @@
 		s := fmt.Sprintf("(#.* .* in )?main\\.%v", name)
 		re := regexp.MustCompile(s)
 		if found := re.Find(got) != nil; !found {
-			t.Errorf("could not find '%v' in backtrace", s)
-			t.Fatalf("gdb output:\n%v", string(got))
+			t.Fatalf("could not find '%v' in backtrace", s)
 		}
 	}
 }
@@ -671,7 +685,11 @@
 		"-ex", "continue",
 		filepath.Join(dir, "a.exe"),
 	}
-	got, _ := exec.Command("gdb", args...).CombinedOutput()
+	got, err := exec.Command("gdb", args...).CombinedOutput()
+	t.Logf("gdb output:\n%s", got)
+	if err != nil {
+		t.Fatalf("gdb exited with error: %v", err)
+	}
 
 	// Check that the backtrace matches
 	// We check the 3 inner most frames only as they are present certainly, according to gcc_<OS>_arm64.c
@@ -684,8 +702,7 @@
 		s := fmt.Sprintf("#%v.*%v", i, name)
 		re := regexp.MustCompile(s)
 		if found := re.Find(got) != nil; !found {
-			t.Errorf("could not find '%v' in backtrace", s)
-			t.Fatalf("gdb output:\n%v", string(got))
+			t.Fatalf("could not find '%v' in backtrace", s)
 		}
 	}
 }
diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go
index d615690..736e848 100644
--- a/src/runtime/sizeof_test.go
+++ b/src/runtime/sizeof_test.go
@@ -21,7 +21,7 @@
 		_32bit uintptr     // size on 32bit platforms
 		_64bit uintptr     // size on 64bit platforms
 	}{
-		{runtime.G{}, 216, 376}, // g, but exported for testing
+		{runtime.G{}, 216, 376},   // g, but exported for testing
 		{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
 	}
 
diff --git a/src/runtime/sys_openbsd_arm.s b/src/runtime/sys_openbsd_arm.s
index 11f6e00..9e18ce0 100644
--- a/src/runtime/sys_openbsd_arm.s
+++ b/src/runtime/sys_openbsd_arm.s
@@ -13,11 +13,23 @@
 #define CLOCK_REALTIME	$0
 #define	CLOCK_MONOTONIC	$3
 
+// With OpenBSD 6.7 onwards, an armv7 syscall returns two instructions
+// after the SWI instruction, to allow for a speculative execution
+// barrier to be placed after the SWI without impacting performance.
+// For now use hardware no-ops as this works with both older and newer
+// kernels. After OpenBSD 6.8 is released this should be changed to
+// speculation barriers.
+#define NOOP	MOVW    R0, R0
+#define	INVOKE_SYSCALL	\
+	SWI	$0;	\
+	NOOP;		\
+	NOOP
+
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0
 	MOVW	code+0(FP), R0	// arg 1 - status
 	MOVW	$1, R12			// sys_exit
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$0, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	RET
@@ -26,7 +38,7 @@
 TEXT runtime·exitThread(SB),NOSPLIT,$0-4
 	MOVW	wait+0(FP), R0		// arg 1 - notdead
 	MOVW	$302, R12		// sys___threxit
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$1, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	JMP	0(PC)
@@ -36,7 +48,7 @@
 	MOVW	mode+4(FP), R1		// arg 2 - mode
 	MOVW	perm+8(FP), R2		// arg 3 - perm
 	MOVW	$5, R12			// sys_open
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$-1, R0
 	MOVW	R0, ret+12(FP)
 	RET
@@ -44,7 +56,7 @@
 TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0
 	MOVW	fd+0(FP), R0		// arg 1 - fd
 	MOVW	$6, R12			// sys_close
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$-1, R0
 	MOVW	R0, ret+4(FP)
 	RET
@@ -54,7 +66,7 @@
 	MOVW	p+4(FP), R1		// arg 2 - buf
 	MOVW	n+8(FP), R2		// arg 3 - nbyte
 	MOVW	$3, R12			// sys_read
-	SWI	$0
+	INVOKE_SYSCALL
 	RSB.CS	$0, R0		// caller expects negative errno
 	MOVW	R0, ret+12(FP)
 	RET
@@ -63,7 +75,7 @@
 TEXT runtime·pipe(SB),NOSPLIT,$0-12
 	MOVW	$r+0(FP), R0
 	MOVW	$263, R12
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW	R0, errno+8(FP)
 	RET
 
@@ -72,7 +84,7 @@
 	MOVW	$r+4(FP), R0
 	MOVW	flags+0(FP), R1
 	MOVW	$101, R12
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW	R0, errno+12(FP)
 	RET
 
@@ -81,7 +93,7 @@
 	MOVW	p+4(FP), R1		// arg 2 - buf
 	MOVW	n+8(FP), R2		// arg 3 - nbyte
 	MOVW	$4, R12			// sys_write
-	SWI	$0
+	INVOKE_SYSCALL
 	RSB.CS	$0, R0		// caller expects negative errno
 	MOVW	R0, ret+12(FP)
 	RET
@@ -99,12 +111,12 @@
 	MOVW	$4(R13), R0		// arg 1 - rqtp
 	MOVW	$0, R1			// arg 2 - rmtp
 	MOVW	$91, R12		// sys_nanosleep
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·getthrid(SB),NOSPLIT,$0-4
 	MOVW	$299, R12		// sys_getthrid
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW	R0, ret+0(FP)
 	RET
 
@@ -113,16 +125,16 @@
 	MOVW	sig+4(FP), R1		// arg 2 - signum
 	MOVW	$0, R2			// arg 3 - tcb
 	MOVW	$119, R12		// sys_thrkill
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·raiseproc(SB),NOSPLIT,$12
-	MOVW	$20, R12
-	SWI	$0			// sys_getpid
+	MOVW	$20, R12		// sys_getpid
+	INVOKE_SYSCALL
 					// arg 1 - pid, already in R0
 	MOVW	sig+0(FP), R1		// arg 2 - signum
 	MOVW	$122, R12		// sys_kill
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$16
@@ -140,7 +152,7 @@
 	MOVW	R7, 16(R13)		// high 32 bits
 	ADD	$4, R13
 	MOVW	$197, R12		// sys_mmap
-	SWI	$0
+	INVOKE_SYSCALL
 	SUB	$4, R13
 	MOVW	$0, R1
 	MOVW.CS	R0, R1			// if error, move to R1
@@ -153,7 +165,7 @@
 	MOVW	addr+0(FP), R0		// arg 1 - addr
 	MOVW	n+4(FP), R1		// arg 2 - len
 	MOVW	$73, R12		// sys_munmap
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$0, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	RET
@@ -163,7 +175,7 @@
 	MOVW	n+4(FP), R1		// arg 2 - len
 	MOVW	flags+8(FP), R2		// arg 2 - flags
 	MOVW	$75, R12		// sys_madvise
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$-1, R0
 	MOVW	R0, ret+12(FP)
 	RET
@@ -173,7 +185,7 @@
 	MOVW	new+4(FP), R1		// arg 2 - new value
 	MOVW	old+8(FP), R2		// arg 3 - old value
 	MOVW	$69, R12		// sys_setitimer
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 // func walltime1() (sec int64, nsec int32)
@@ -181,7 +193,7 @@
 	MOVW	CLOCK_REALTIME, R0	// arg 1 - clock_id
 	MOVW	$8(R13), R1		// arg 2 - tp
 	MOVW	$87, R12		// sys_clock_gettime
-	SWI	$0
+	INVOKE_SYSCALL
 
 	MOVW	8(R13), R0		// sec - l32
 	MOVW	12(R13), R1		// sec - h32
@@ -199,7 +211,7 @@
 	MOVW	CLOCK_MONOTONIC, R0	// arg 1 - clock_id
 	MOVW	$8(R13), R1		// arg 2 - tp
 	MOVW	$87, R12		// sys_clock_gettime
-	SWI	$0
+	INVOKE_SYSCALL
 
 	MOVW	8(R13), R0		// sec - l32
 	MOVW	12(R13), R4		// sec - h32
@@ -220,7 +232,7 @@
 	MOVW	new+4(FP), R1		// arg 2 - new sigaction
 	MOVW	old+8(FP), R2		// arg 3 - old sigaction
 	MOVW	$46, R12		// sys_sigaction
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$3, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	RET
@@ -229,7 +241,7 @@
 	MOVW	how+0(FP), R0		// arg 1 - mode
 	MOVW	new+4(FP), R1		// arg 2 - new
 	MOVW	$48, R12		// sys_sigprocmask
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$3, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	MOVW	R0, ret+8(FP)
@@ -280,7 +292,7 @@
 	MOVW	param+0(FP), R0		// arg 1 - param
 	MOVW	psize+4(FP), R1		// arg 2 - psize
 	MOVW	$8, R12			// sys___tfork
-	SWI	$0
+	INVOKE_SYSCALL
 
 	// Return if syscall failed.
 	B.CC	4(PC)
@@ -313,14 +325,14 @@
 	MOVW	new+0(FP), R0		// arg 1 - new sigaltstack
 	MOVW	old+4(FP), R1		// arg 2 - old sigaltstack
 	MOVW	$288, R12		// sys_sigaltstack
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW.CS	$0, R8			// crash on syscall failure
 	MOVW.CS	R8, (R8)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
 	MOVW	$298, R12		// sys_sched_yield
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·thrsleep(SB),NOSPLIT,$4
@@ -332,7 +344,7 @@
 	MOVW	R4, 4(R13)
 	ADD	$4, R13
 	MOVW	$94, R12		// sys___thrsleep
-	SWI	$0
+	INVOKE_SYSCALL
 	SUB	$4, R13
 	MOVW	R0, ret+20(FP)
 	RET
@@ -341,7 +353,7 @@
 	MOVW	ident+0(FP), R0		// arg 1 - ident
 	MOVW	n+4(FP), R1		// arg 2 - n
 	MOVW	$301, R12		// sys___thrwakeup
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVW	R0, ret+8(FP)
 	RET
 
@@ -356,7 +368,7 @@
 	MOVW	R5, 8(R13)
 	ADD	$4, R13
 	MOVW	$202, R12		// sys___sysctl
-	SWI	$0
+	INVOKE_SYSCALL
 	SUB	$4, R13
 	MOVW.CC	$0, R0
 	RSB.CS	$0, R0
@@ -366,7 +378,7 @@
 // int32 runtime·kqueue(void);
 TEXT runtime·kqueue(SB),NOSPLIT,$0
 	MOVW	$269, R12		// sys_kqueue
-	SWI	$0
+	INVOKE_SYSCALL
 	RSB.CS	$0, R0
 	MOVW	R0, ret+0(FP)
 	RET
@@ -383,7 +395,7 @@
 	MOVW	R5, 8(R13)
 	ADD	$4, R13
 	MOVW	$72, R12		// sys_kevent
-	SWI	$0
+	INVOKE_SYSCALL
 	RSB.CS	$0, R0
 	SUB	$4, R13
 	MOVW	R0, ret+24(FP)
@@ -395,7 +407,7 @@
 	MOVW	$2, R1			// arg 2 - cmd (F_SETFD)
 	MOVW	$1, R2			// arg 3 - arg (FD_CLOEXEC)
 	MOVW	$92, R12		// sys_fcntl
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 // func runtime·setNonblock(fd int32)
@@ -404,12 +416,12 @@
 	MOVW	$3, R1	// F_GETFL
 	MOVW	$0, R2
 	MOVW	$92, R12
-	SWI	$0
+	INVOKE_SYSCALL
 	ORR	$0x4, R0, R2	// O_NONBLOCK
 	MOVW	fd+0(FP), R0	// fd
 	MOVW	$4, R1	// F_SETFL
 	MOVW	$92, R12
-	SWI	$0
+	INVOKE_SYSCALL
 	RET
 
 TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
@@ -418,6 +430,6 @@
 TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
 	MOVM.WP	[R1, R2, R3, R12], (R13)
 	MOVW	$330, R12		// sys___get_tcb
-	SWI	$0
+	INVOKE_SYSCALL
 	MOVM.IAW (R13), [R1, R2, R3, R12]
 	RET
diff --git a/src/runtime/sys_openbsd_arm64.s b/src/runtime/sys_openbsd_arm64.s
index 839aa57..621b1b1 100644
--- a/src/runtime/sys_openbsd_arm64.s
+++ b/src/runtime/sys_openbsd_arm64.s
@@ -13,11 +13,22 @@
 #define CLOCK_REALTIME	$0
 #define	CLOCK_MONOTONIC	$3
 
+// With OpenBSD 6.7 onwards, an arm64 syscall returns two instructions
+// after the SVC instruction, to allow for a speculative execution
+// barrier to be placed after the SVC without impacting performance.
+// For now use hardware no-ops as this works with both older and newer
+// kernels. After OpenBSD 6.8 is released this should be changed to
+// speculation barriers.
+#define	INVOKE_SYSCALL	\
+	SVC;		\
+	NOOP;		\
+	NOOP
+
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0
 	MOVW	code+0(FP), R0		// arg 1 - status
 	MOVD	$1, R8			// sys_exit
-	SVC
+	INVOKE_SYSCALL
 	BCC	3(PC)
 	MOVD	$0, R0			// crash on syscall failure
 	MOVD	R0, (R0)
@@ -27,7 +38,7 @@
 TEXT runtime·exitThread(SB),NOSPLIT,$0
 	MOVD	wait+0(FP), R0		// arg 1 - notdead
 	MOVD	$302, R8		// sys___threxit
-	SVC
+	INVOKE_SYSCALL
 	MOVD	$0, R0			// crash on syscall failure
 	MOVD	R0, (R0)
 	JMP	0(PC)
@@ -37,7 +48,7 @@
 	MOVW	mode+8(FP), R1		// arg 2 - mode
 	MOVW	perm+12(FP), R2		// arg 3 - perm
 	MOVD	$5, R8			// sys_open
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	MOVW	$-1, R0
 	MOVW	R0, ret+16(FP)
@@ -46,7 +57,7 @@
 TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0
 	MOVW	fd+0(FP), R0		// arg 1 - fd
 	MOVD	$6, R8			// sys_close
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	MOVW	$-1, R0
 	MOVW	R0, ret+8(FP)
@@ -57,7 +68,7 @@
 	MOVD	p+8(FP), R1		// arg 2 - buf
 	MOVW	n+16(FP), R2		// arg 3 - nbyte
 	MOVD	$3, R8			// sys_read
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, ret+24(FP)
@@ -68,7 +79,7 @@
 	MOVD	$r+0(FP), R0
 	MOVW	$0, R1
 	MOVD	$101, R8		// sys_pipe2
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, errno+8(FP)
@@ -79,7 +90,7 @@
 	MOVD	$r+8(FP), R0
 	MOVW	flags+0(FP), R1
 	MOVD	$101, R8		// sys_pipe2
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, errno+16(FP)
@@ -90,7 +101,7 @@
 	MOVD	p+8(FP), R1		// arg 2 - buf
 	MOVW	n+16(FP), R2		// arg 3 - nbyte
 	MOVD	$4, R8			// sys_write
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, ret+24(FP)
@@ -111,12 +122,12 @@
 	ADD	$8, RSP, R0		// arg 1 - rqtp
 	MOVD	$0, R1			// arg 2 - rmtp
 	MOVD	$91, R8			// sys_nanosleep
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·getthrid(SB),NOSPLIT,$0-4
 	MOVD	$299, R8		// sys_getthrid
-	SVC
+	INVOKE_SYSCALL
 	MOVW	R0, ret+0(FP)
 	RET
 
@@ -125,16 +136,16 @@
 	MOVD	sig+8(FP), R1		// arg 2 - signum
 	MOVW	$0, R2			// arg 3 - tcb
 	MOVD	$119, R8		// sys_thrkill
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·raiseproc(SB),NOSPLIT,$0
 	MOVD	$20, R8			// sys_getpid
-	SVC
+	INVOKE_SYSCALL
 					// arg 1 - pid, already in R0
 	MOVW	sig+0(FP), R1		// arg 2 - signum
 	MOVD	$122, R8		// sys_kill
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
@@ -146,7 +157,7 @@
 	MOVW	$0, R5			// arg 6 - pad
 	MOVW	off+28(FP), R6		// arg 7 - offset
 	MOVD	$197, R8		// sys_mmap
-	SVC
+	INVOKE_SYSCALL
 	MOVD	$0, R1
 	BCC	3(PC)
 	MOVD	R0, R1			// if error, move to R1
@@ -159,7 +170,7 @@
 	MOVD	addr+0(FP), R0		// arg 1 - addr
 	MOVD	n+8(FP), R1		// arg 2 - len
 	MOVD	$73, R8			// sys_munmap
-	SVC
+	INVOKE_SYSCALL
 	BCC	3(PC)
 	MOVD	$0, R0			// crash on syscall failure
 	MOVD	R0, (R0)
@@ -170,7 +181,7 @@
 	MOVD	n+8(FP), R1		// arg 2 - len
 	MOVW	flags+16(FP), R2	// arg 2 - flags
 	MOVD	$75, R8			// sys_madvise
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	MOVW	$-1, R0
 	MOVW	R0, ret+24(FP)
@@ -181,7 +192,7 @@
 	MOVD	new+8(FP), R1		// arg 2 - new value
 	MOVD	old+16(FP), R2		// arg 3 - old value
 	MOVD	$69, R8			// sys_setitimer
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 // func walltime1() (sec int64, nsec int32)
@@ -189,7 +200,7 @@
 	MOVW	CLOCK_REALTIME, R0	// arg 1 - clock_id
 	MOVD	$8(RSP), R1		// arg 2 - tp
 	MOVD	$87, R8			// sys_clock_gettime
-	SVC
+	INVOKE_SYSCALL
 
 	MOVD	8(RSP), R0		// sec
 	MOVD	16(RSP), R1		// nsec
@@ -204,7 +215,7 @@
 	MOVW	CLOCK_MONOTONIC, R0	// arg 1 - clock_id
 	MOVD	$8(RSP), R1		// arg 2 - tp
 	MOVD	$87, R8			// sys_clock_gettime
-	SVC
+	INVOKE_SYSCALL
 
 	MOVW	8(RSP), R3		// sec
 	MOVW	16(RSP), R5		// nsec
@@ -220,7 +231,7 @@
 	MOVD	new+8(FP), R1		// arg 2 - new sigaction
 	MOVD	old+16(FP), R2		// arg 3 - old sigaction
 	MOVD	$46, R8			// sys_sigaction
-	SVC
+	INVOKE_SYSCALL
 	BCC	3(PC)
 	MOVD	$3, R0			// crash on syscall failure
 	MOVD	R0, (R0)
@@ -230,7 +241,7 @@
 	MOVW	how+0(FP), R0		// arg 1 - mode
 	MOVW	new+4(FP), R1		// arg 2 - new
 	MOVD	$48, R8			// sys_sigprocmask
-	SVC
+	INVOKE_SYSCALL
 	BCC	3(PC)
 	MOVD	$3, R8			// crash on syscall failure
 	MOVD	R8, (R8)
@@ -314,7 +325,7 @@
 	MOVD	param+0(FP), R0		// arg 1 - param
 	MOVD	psize+8(FP), R1		// arg 2 - psize
 	MOVD	$8, R8			// sys___tfork
-	SVC
+	INVOKE_SYSCALL
 
 	// Return if syscall failed.
 	BCC	4(PC)
@@ -344,7 +355,7 @@
 	MOVD	new+0(FP), R0		// arg 1 - new sigaltstack
 	MOVD	old+8(FP), R1		// arg 2 - old sigaltstack
 	MOVD	$288, R8		// sys_sigaltstack
-	SVC
+	INVOKE_SYSCALL
 	BCC	3(PC)
 	MOVD	$0, R8			// crash on syscall failure
 	MOVD	R8, (R8)
@@ -352,7 +363,7 @@
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
 	MOVD	$298, R8		// sys_sched_yield
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 TEXT runtime·thrsleep(SB),NOSPLIT,$0
@@ -362,7 +373,7 @@
 	MOVD	lock+24(FP), R3		// arg 4 - lock
 	MOVD	abort+32(FP), R4	// arg 5 - abort
 	MOVD	$94, R8			// sys___thrsleep
-	SVC
+	INVOKE_SYSCALL
 	MOVW	R0, ret+40(FP)
 	RET
 
@@ -370,7 +381,7 @@
 	MOVD	ident+0(FP), R0		// arg 1 - ident
 	MOVW	n+8(FP), R1		// arg 2 - n
 	MOVD	$301, R8		// sys___thrwakeup
-	SVC
+	INVOKE_SYSCALL
 	MOVW	R0, ret+16(FP)
 	RET
 
@@ -382,7 +393,7 @@
 	MOVD	dst+32(FP), R4		// arg 5 - dest
 	MOVD	ndst+40(FP), R5		// arg 6 - newlen
 	MOVD	$202, R8		// sys___sysctl
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, ret+48(FP)
@@ -391,7 +402,7 @@
 // int32 runtime·kqueue(void);
 TEXT runtime·kqueue(SB),NOSPLIT,$0
 	MOVD	$269, R8		// sys_kqueue
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, ret+0(FP)
@@ -406,7 +417,7 @@
 	MOVW	nev+32(FP), R4		// arg 5 - nevents
 	MOVD	ts+40(FP), R5		// arg 6 - timeout
 	MOVD	$72, R8			// sys_kevent
-	SVC
+	INVOKE_SYSCALL
 	BCC	2(PC)
 	NEG	R0, R0
 	MOVW	R0, ret+48(FP)
@@ -418,7 +429,7 @@
 	MOVD	$2, R1			// arg 2 - cmd (F_SETFD)
 	MOVD	$1, R2			// arg 3 - arg (FD_CLOEXEC)
 	MOVD	$92, R8			// sys_fcntl
-	SVC
+	INVOKE_SYSCALL
 	RET
 
 // func runtime·setNonblock(int32 fd)
@@ -427,11 +438,11 @@
 	MOVD	$3, R1			// arg 2 - cmd (F_GETFL)
 	MOVD	$0, R2			// arg 3
 	MOVD	$92, R8			// sys_fcntl
-	SVC
+	INVOKE_SYSCALL
 	MOVD	$4, R2			// O_NONBLOCK
 	ORR	R0, R2			// arg 3 - flags
 	MOVW	fd+0(FP), R0		// arg 1 - fd
 	MOVD	$4, R1			// arg 2 - cmd (F_SETFL)
 	MOVD	$92, R8			// sys_fcntl
-	SVC
+	INVOKE_SYSCALL
 	RET
diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go
index f691a1d..74732cd 100644
--- a/src/runtime/testdata/testprog/gc.go
+++ b/src/runtime/testdata/testprog/gc.go
@@ -11,6 +11,7 @@
 	"runtime/debug"
 	"sync/atomic"
 	"time"
+	"unsafe"
 )
 
 func init() {
@@ -19,6 +20,7 @@
 	register("GCSys", GCSys)
 	register("GCPhys", GCPhys)
 	register("DeferLiveness", DeferLiveness)
+	register("GCZombie", GCZombie)
 }
 
 func GCSys() {
@@ -264,3 +266,37 @@
 func escape(x interface{}) { sink2 = x; sink2 = nil }
 
 var sink2 interface{}
+
+// Test zombie object detection and reporting.
+func GCZombie() {
+	// Allocate several objects of unusual size (so free slots are
+	// unlikely to all be re-allocated by the runtime).
+	const size = 190
+	const count = 8192 / size
+	keep := make([]*byte, 0, (count+1)/2)
+	free := make([]uintptr, 0, (count+1)/2)
+	zombies := make([]*byte, 0, len(free))
+	for i := 0; i < count; i++ {
+		obj := make([]byte, size)
+		p := &obj[0]
+		if i%2 == 0 {
+			keep = append(keep, p)
+		} else {
+			free = append(free, uintptr(unsafe.Pointer(p)))
+		}
+	}
+
+	// Free the unreferenced objects.
+	runtime.GC()
+
+	// Bring the free objects back to life.
+	for _, p := range free {
+		zombies = append(zombies, (*byte)(unsafe.Pointer(p)))
+	}
+
+	// GC should detect the zombie objects.
+	runtime.GC()
+	println("failed")
+	runtime.KeepAlive(keep)
+	runtime.KeepAlive(zombies)
+}
diff --git a/src/runtime/testdata/testprog/lockosthread.go b/src/runtime/testdata/testprog/lockosthread.go
index fd3123e..e9d7fdb 100644
--- a/src/runtime/testdata/testprog/lockosthread.go
+++ b/src/runtime/testdata/testprog/lockosthread.go
@@ -7,6 +7,7 @@
 import (
 	"os"
 	"runtime"
+	"sync"
 	"time"
 )
 
@@ -30,6 +31,7 @@
 		runtime.LockOSThread()
 	})
 	register("LockOSThreadAvoidsStatePropagation", LockOSThreadAvoidsStatePropagation)
+	register("LockOSThreadTemplateThreadRace", LockOSThreadTemplateThreadRace)
 }
 
 func LockOSThreadMain() {
@@ -195,3 +197,50 @@
 	runtime.UnlockOSThread()
 	println("OK")
 }
+
+func LockOSThreadTemplateThreadRace() {
+	// This test attempts to reproduce the race described in
+	// golang.org/issue/38931. To do so, we must have a stop-the-world
+	// (achieved via ReadMemStats) racing with two LockOSThread calls.
+	//
+	// While this test attempts to line up the timing, it is only expected
+	// to fail (and thus hang) around 2% of the time if the race is
+	// present.
+
+	// Ensure enough Ps to actually run everything in parallel. Though on
+	// <4 core machines, we are still at the whim of the kernel scheduler.
+	runtime.GOMAXPROCS(4)
+
+	go func() {
+		// Stop the world; race with LockOSThread below.
+		var m runtime.MemStats
+		for {
+			runtime.ReadMemStats(&m)
+		}
+	}()
+
+	// Try to synchronize both LockOSThreads.
+	start := time.Now().Add(10 * time.Millisecond)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	for i := 0; i < 2; i++ {
+		go func() {
+			for time.Now().Before(start) {
+			}
+
+			// Add work to the local runq to trigger early startm
+			// in handoffp.
+			go func() {}()
+
+			runtime.LockOSThread()
+			runtime.Gosched() // add a preemption point.
+			wg.Done()
+		}()
+	}
+
+	wg.Wait()
+	// If both LockOSThreads completed then we did not hit the race.
+	println("OK")
+}
diff --git a/src/syscall/asm_openbsd_arm.s b/src/syscall/asm_openbsd_arm.s
index 9279ed9..26fd791 100644
--- a/src/syscall/asm_openbsd_arm.s
+++ b/src/syscall/asm_openbsd_arm.s
@@ -15,13 +15,20 @@
 // func RawSyscall(trap int32, a1, a2, a3 int32) (r1, r2, err int32);
 // func RawSyscall6(trap int32, a1, a2, a3, a4, a5, a6 int32) (r1, r2, err int32);
 
+// See comment in runtime/sys_openbsd_arm.s re this construction.
+#define NOOP	MOVW    R0, R0
+#define	INVOKE_SYSCALL	\
+	SWI	$0;	\
+	NOOP;		\
+	NOOP
+
 TEXT	·Syscall(SB),NOSPLIT,$0-28
 	BL runtime·entersyscall(SB)
 	MOVW trap+0(FP), R12		// syscall number
 	MOVW a1+4(FP), R0		// arg 1
 	MOVW a2+8(FP), R1		// arg 2
 	MOVW a3+12(FP), R2		// arg 3
-	SWI $0
+	INVOKE_SYSCALL
 	MOVW $0, R2
 	BCS error
 	MOVW R0, r1+16(FP)		// ret 1
@@ -46,7 +53,7 @@
 	MOVW a4+16(FP), R3		// arg 4
 	MOVW R13, R4
 	MOVW $a5+20(FP), R13		// arg 5 to arg 6 are passed on stack
-	SWI $0
+	INVOKE_SYSCALL
 	MOVW R4, R13
 	MOVW $0, R2
 	BCS error6
@@ -72,7 +79,7 @@
 	MOVW a4+16(FP), R3		// arg 4
 	MOVW R13, R4
 	MOVW $a5+20(FP), R13		// arg 5 to arg 9 are passed on stack
-	SWI $0
+	INVOKE_SYSCALL
 	MOVW R4, R13
 	MOVW $0, R2
 	BCS error9
@@ -94,7 +101,7 @@
 	MOVW a1+4(FP), R0		// arg 1
 	MOVW a2+8(FP), R1		// arg 2
 	MOVW a3+12(FP), R2		// arg 3
-	SWI $0
+	INVOKE_SYSCALL
 	MOVW $0, R2
 	BCS errorr
 	MOVW R0, r1+16(FP)		// ret 1
@@ -116,7 +123,7 @@
 	MOVW a4+16(FP), R3		// arg 4
 	MOVW R13, R4
 	MOVW $a5+20(FP), R13		// arg 5 to arg 6 are passed on stack
-	SWI $0
+	INVOKE_SYSCALL
 	MOVW R4, R13
 	MOVW $0, R2
 	BCS errorr6
diff --git a/src/syscall/asm_openbsd_arm64.s b/src/syscall/asm_openbsd_arm64.s
index 16be5fb..dcbed10 100644
--- a/src/syscall/asm_openbsd_arm64.s
+++ b/src/syscall/asm_openbsd_arm64.s
@@ -4,6 +4,12 @@
 
 #include "textflag.h"
 
+// See comment in runtime/sys_openbsd_arm64.s re this construction.
+#define	INVOKE_SYSCALL	\
+	SVC;		\
+	NOOP;		\
+	NOOP
+
 // func Syscall(trap int64, a1, a2, a3 int64) (r1, r2, err int64);
 TEXT ·Syscall(SB),NOSPLIT,$0-56
 	BL	runtime·entersyscall(SB)
@@ -14,7 +20,7 @@
 	MOVD	$0, R4
 	MOVD	$0, R5
 	MOVD	trap+0(FP), R8	// syscall number
-	SVC
+	INVOKE_SYSCALL
 	BCC	ok
 	MOVD	$-1, R4
 	MOVD	R4, r1+32(FP)	// r1
@@ -38,7 +44,7 @@
 	MOVD	a5+40(FP), R4
 	MOVD	a6+48(FP), R5
 	MOVD	trap+0(FP), R8	// syscall number
-	SVC
+	INVOKE_SYSCALL
 	BCC	ok
 	MOVD	$-1, R4
 	MOVD	R4, r1+56(FP)	// r1
@@ -66,7 +72,7 @@
 	MOVD	a9+72(FP), R8	// on stack
 	MOVD	R8, 8(RSP)
 	MOVD	num+0(FP), R8	// syscall number
-	SVC
+	INVOKE_SYSCALL
 	BCC	ok
 	MOVD	$-1, R4
 	MOVD	R4, r1+80(FP)	// r1
@@ -89,7 +95,7 @@
 	MOVD	$0, R4
 	MOVD	$0, R5
 	MOVD	trap+0(FP), R8	// syscall number
-	SVC
+	INVOKE_SYSCALL
 	BCC	ok
 	MOVD	$-1, R4
 	MOVD	R4, r1+32(FP)	// r1
@@ -110,7 +116,7 @@
 	MOVD	a5+40(FP), R4
 	MOVD	a6+48(FP), R5
 	MOVD	trap+0(FP), R8	// syscall number
-	SVC
+	INVOKE_SYSCALL
 	BCC	ok
 	MOVD	$-1, R4
 	MOVD	R4, r1+56(FP)	// r1
diff --git a/src/syscall/js/func.go b/src/syscall/js/func.go
index 9e99027..da4cf68 100644
--- a/src/syscall/js/func.go
+++ b/src/syscall/js/func.go
@@ -39,7 +39,7 @@
 // immediate deadlock. Therefore a blocking function should explicitly start a
 // new goroutine.
 //
-// Func.Release must be called to free up resources when the function will not be used any more.
+// Func.Release must be called to free up resources when the function will not be invoked any more.
 func FuncOf(fn func(this Value, args []Value) interface{}) Func {
 	funcsMu.Lock()
 	id := nextFuncID
@@ -54,6 +54,7 @@
 
 // Release frees up resources allocated for the function.
 // The function must not be invoked after calling Release.
+// It is allowed to call Release while the function is still running.
 func (c Func) Release() {
 	funcsMu.Lock()
 	delete(funcs, c.id)
diff --git a/src/syscall/syscall_dup2_linux.go b/src/syscall/syscall_dup2_linux.go
new file mode 100644
index 0000000..f03a923
--- /dev/null
+++ b/src/syscall/syscall_dup2_linux.go
@@ -0,0 +1,10 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !android
+// +build 386 amd64 arm mips mipsle mips64 mips64le ppc64 ppc64le s390x
+
+package syscall
+
+const _SYS_dup = SYS_DUP2
diff --git a/src/syscall/syscall_dup3_linux.go b/src/syscall/syscall_dup3_linux.go
new file mode 100644
index 0000000..1ebdcb2
--- /dev/null
+++ b/src/syscall/syscall_dup3_linux.go
@@ -0,0 +1,9 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build android arm64 riscv64
+
+package syscall
+
+const _SYS_dup = SYS_DUP3
diff --git a/src/syscall/syscall_linux_386.go b/src/syscall/syscall_linux_386.go
index 3c1e6e4..5076dd9 100644
--- a/src/syscall/syscall_linux_386.go
+++ b/src/syscall/syscall_linux_386.go
@@ -9,10 +9,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS32
-)
+const _SYS_setgroups = SYS_SETGROUPS32
 
 func setTimespec(sec, nsec int64) Timespec {
 	return Timespec{Sec: int32(sec), Nsec: int32(nsec)}
diff --git a/src/syscall/syscall_linux_amd64.go b/src/syscall/syscall_linux_amd64.go
index 0f28b55..bf340d9 100644
--- a/src/syscall/syscall_linux_amd64.go
+++ b/src/syscall/syscall_linux_amd64.go
@@ -4,10 +4,7 @@
 
 package syscall
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 //sys	Dup2(oldfd int, newfd int) (err error)
 //sysnb	EpollCreate(size int) (fd int, err error)
diff --git a/src/syscall/syscall_linux_arm.go b/src/syscall/syscall_linux_arm.go
index d346029..c4c403a 100644
--- a/src/syscall/syscall_linux_arm.go
+++ b/src/syscall/syscall_linux_arm.go
@@ -6,10 +6,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS32
-)
+const _SYS_setgroups = SYS_SETGROUPS32
 
 func setTimespec(sec, nsec int64) Timespec {
 	return Timespec{Sec: int32(sec), Nsec: int32(nsec)}
diff --git a/src/syscall/syscall_linux_arm64.go b/src/syscall/syscall_linux_arm64.go
index 1ad9dd8..61014b2 100644
--- a/src/syscall/syscall_linux_arm64.go
+++ b/src/syscall/syscall_linux_arm64.go
@@ -6,10 +6,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP3
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 func EpollCreate(size int) (fd int, err error) {
 	if size <= 0 {
diff --git a/src/syscall/syscall_linux_mips64x.go b/src/syscall/syscall_linux_mips64x.go
index 157c323..e3683de 100644
--- a/src/syscall/syscall_linux_mips64x.go
+++ b/src/syscall/syscall_linux_mips64x.go
@@ -7,10 +7,7 @@
 
 package syscall
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 //sys	Dup2(oldfd int, newfd int) (err error)
 //sysnb	EpollCreate(size int) (fd int, err error)
diff --git a/src/syscall/syscall_linux_mipsx.go b/src/syscall/syscall_linux_mipsx.go
index f2fea71..cbe2f02 100644
--- a/src/syscall/syscall_linux_mipsx.go
+++ b/src/syscall/syscall_linux_mipsx.go
@@ -9,10 +9,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 func Syscall9(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err Errno)
 
diff --git a/src/syscall/syscall_linux_ppc64x.go b/src/syscall/syscall_linux_ppc64x.go
index 22d6e56..ba52e5a 100644
--- a/src/syscall/syscall_linux_ppc64x.go
+++ b/src/syscall/syscall_linux_ppc64x.go
@@ -7,10 +7,7 @@
 
 package syscall
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 //sys	Dup2(oldfd int, newfd int) (err error)
 //sysnb	EpollCreate(size int) (fd int, err error)
diff --git a/src/syscall/syscall_linux_riscv64.go b/src/syscall/syscall_linux_riscv64.go
index 61e9c60..d54bd38 100644
--- a/src/syscall/syscall_linux_riscv64.go
+++ b/src/syscall/syscall_linux_riscv64.go
@@ -6,10 +6,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP3
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 func EpollCreate(size int) (fd int, err error) {
 	if size <= 0 {
diff --git a/src/syscall/syscall_linux_s390x.go b/src/syscall/syscall_linux_s390x.go
index fcedf59..80cb1cc 100644
--- a/src/syscall/syscall_linux_s390x.go
+++ b/src/syscall/syscall_linux_s390x.go
@@ -6,10 +6,7 @@
 
 import "unsafe"
 
-const (
-	_SYS_dup       = SYS_DUP2
-	_SYS_setgroups = SYS_SETGROUPS
-)
+const _SYS_setgroups = SYS_SETGROUPS
 
 //sys	Dup2(oldfd int, newfd int) (err error)
 //sysnb	EpollCreate(size int) (fd int, err error)
diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go
index 88ba0f0..5276600 100644
--- a/src/testing/benchmark.go
+++ b/src/testing/benchmark.go
@@ -526,6 +526,7 @@
 			name:   "Main",
 			w:      os.Stdout,
 			chatty: *chatty,
+			bench:  true,
 		},
 		importPath: importPath,
 		benchFunc: func(b *B) {
@@ -559,6 +560,7 @@
 						name:   b.name,
 						w:      b.w,
 						chatty: b.chatty,
+						bench:  true,
 					},
 					benchFunc: b.benchFunc,
 					benchTime: b.benchTime,
@@ -624,6 +626,7 @@
 			creator: pc[:n],
 			w:       b.w,
 			chatty:  b.chatty,
+			bench:   true,
 		},
 		importPath: b.importPath,
 		benchFunc:  f,
diff --git a/src/testing/sub_test.go b/src/testing/sub_test.go
index 95f8220..8eb0084 100644
--- a/src/testing/sub_test.go
+++ b/src/testing/sub_test.go
@@ -438,8 +438,6 @@
 	}, {
 		// A chatty test should always log with fmt.Print, even if the
 		// parent test has completed.
-		// TODO(deklerk) Capture the log of fmt.Print and assert that the
-		// subtest message is not lost.
 		desc:   "log in finished sub test with chatty",
 		ok:     false,
 		chatty: true,
@@ -477,35 +475,37 @@
 		},
 	}}
 	for _, tc := range testCases {
-		ctx := newTestContext(tc.maxPar, newMatcher(regexp.MatchString, "", ""))
-		buf := &bytes.Buffer{}
-		root := &T{
-			common: common{
-				signal: make(chan bool),
-				name:   "Test",
-				w:      buf,
-				chatty: tc.chatty,
-			},
-			context: ctx,
-		}
-		ok := root.Run(tc.desc, tc.f)
-		ctx.release()
+		t.Run(tc.desc, func(t *T) {
+			ctx := newTestContext(tc.maxPar, newMatcher(regexp.MatchString, "", ""))
+			buf := &bytes.Buffer{}
+			root := &T{
+				common: common{
+					signal: make(chan bool),
+					name:   "Test",
+					w:      buf,
+					chatty: tc.chatty,
+				},
+				context: ctx,
+			}
+			ok := root.Run(tc.desc, tc.f)
+			ctx.release()
 
-		if ok != tc.ok {
-			t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, tc.ok)
-		}
-		if ok != !root.Failed() {
-			t.Errorf("%s:root failed: got %v; want %v", tc.desc, !ok, root.Failed())
-		}
-		if ctx.running != 0 || ctx.numWaiting != 0 {
-			t.Errorf("%s:running and waiting non-zero: got %d and %d", tc.desc, ctx.running, ctx.numWaiting)
-		}
-		got := strings.TrimSpace(buf.String())
-		want := strings.TrimSpace(tc.output)
-		re := makeRegexp(want)
-		if ok, err := regexp.MatchString(re, got); !ok || err != nil {
-			t.Errorf("%s:output:\ngot:\n%s\nwant:\n%s", tc.desc, got, want)
-		}
+			if ok != tc.ok {
+				t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, tc.ok)
+			}
+			if ok != !root.Failed() {
+				t.Errorf("%s:root failed: got %v; want %v", tc.desc, !ok, root.Failed())
+			}
+			if ctx.running != 0 || ctx.numWaiting != 0 {
+				t.Errorf("%s:running and waiting non-zero: got %d and %d", tc.desc, ctx.running, ctx.numWaiting)
+			}
+			got := strings.TrimSpace(buf.String())
+			want := strings.TrimSpace(tc.output)
+			re := makeRegexp(want)
+			if ok, err := regexp.MatchString(re, got); !ok || err != nil {
+				t.Errorf("%s:output:\ngot:\n%s\nwant:\n%s", tc.desc, got, want)
+			}
+		})
 	}
 }
 
@@ -655,43 +655,45 @@
 		},
 	}}
 	for _, tc := range testCases {
-		var ok bool
-		buf := &bytes.Buffer{}
-		// This is almost like the Benchmark function, except that we override
-		// the benchtime and catch the failure result of the subbenchmark.
-		root := &B{
-			common: common{
-				signal: make(chan bool),
-				name:   "root",
-				w:      buf,
-				chatty: tc.chatty,
-			},
-			benchFunc: func(b *B) { ok = b.Run("test", tc.f) }, // Use Run to catch failure.
-			benchTime: benchTimeFlag{d: 1 * time.Microsecond},
-		}
-		root.runN(1)
-		if ok != !tc.failed {
-			t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, !tc.failed)
-		}
-		if !ok != root.Failed() {
-			t.Errorf("%s:root failed: got %v; want %v", tc.desc, !ok, root.Failed())
-		}
-		// All tests are run as subtests
-		if root.result.N != 1 {
-			t.Errorf("%s: N for parent benchmark was %d; want 1", tc.desc, root.result.N)
-		}
-		got := strings.TrimSpace(buf.String())
-		want := strings.TrimSpace(tc.output)
-		re := makeRegexp(want)
-		if ok, err := regexp.MatchString(re, got); !ok || err != nil {
-			t.Errorf("%s:output:\ngot:\n%s\nwant:\n%s", tc.desc, got, want)
-		}
+		t.Run(tc.desc, func(t *T) {
+			var ok bool
+			buf := &bytes.Buffer{}
+			// This is almost like the Benchmark function, except that we override
+			// the benchtime and catch the failure result of the subbenchmark.
+			root := &B{
+				common: common{
+					signal: make(chan bool),
+					name:   "root",
+					w:      buf,
+					chatty: tc.chatty,
+				},
+				benchFunc: func(b *B) { ok = b.Run("test", tc.f) }, // Use Run to catch failure.
+				benchTime: benchTimeFlag{d: 1 * time.Microsecond},
+			}
+			root.runN(1)
+			if ok != !tc.failed {
+				t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, !tc.failed)
+			}
+			if !ok != root.Failed() {
+				t.Errorf("%s:root failed: got %v; want %v", tc.desc, !ok, root.Failed())
+			}
+			// All tests are run as subtests
+			if root.result.N != 1 {
+				t.Errorf("%s: N for parent benchmark was %d; want 1", tc.desc, root.result.N)
+			}
+			got := strings.TrimSpace(buf.String())
+			want := strings.TrimSpace(tc.output)
+			re := makeRegexp(want)
+			if ok, err := regexp.MatchString(re, got); !ok || err != nil {
+				t.Errorf("%s:output:\ngot:\n%s\nwant:\n%s", tc.desc, got, want)
+			}
+		})
 	}
 }
 
 func makeRegexp(s string) string {
 	s = regexp.QuoteMeta(s)
-	s = strings.ReplaceAll(s, ":NNN:", `:\d\d\d:`)
+	s = strings.ReplaceAll(s, ":NNN:", `:\d\d\d\d?:`)
 	s = strings.ReplaceAll(s, "N\\.NNs", `\d*\.\d*s`)
 	return s
 }
diff --git a/src/testing/testing.go b/src/testing/testing.go
index 608bb39..4a14d49 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -325,6 +325,7 @@
 	cpuListStr           *string
 	parallel             *int
 	testlog              *string
+	printer              *testPrinter
 
 	haveExamples bool // are there examples?
 
@@ -334,6 +335,49 @@
 	numFailed uint32 // number of test failures
 )
 
+type testPrinter struct {
+	chatty bool
+
+	lastNameMu sync.Mutex // guards lastName
+	lastName   string     // last printed test name in chatty mode
+}
+
+func newTestPrinter(chatty bool) *testPrinter {
+	return &testPrinter{
+		chatty: chatty,
+	}
+}
+
+func (p *testPrinter) Print(testName, out string) {
+	p.Fprint(os.Stdout, testName, out)
+}
+
+func (p *testPrinter) Fprint(w io.Writer, testName, out string) {
+	if !p.chatty || strings.HasPrefix(out, "--- PASS") || strings.HasPrefix(out, "--- FAIL") {
+		fmt.Fprint(w, out)
+		return
+	}
+
+	p.lastNameMu.Lock()
+	defer p.lastNameMu.Unlock()
+
+	if strings.HasPrefix(out, "=== CONT") || strings.HasPrefix(out, "=== RUN") {
+		p.lastName = testName
+		fmt.Fprint(w, out)
+		return
+	}
+
+	if p.lastName == "" {
+		p.lastName = testName
+	} else if p.lastName != testName {
+		// Always printed as-is, with 0 decoration or indentation. So, we skip
+		// printing to w.
+		fmt.Printf("=== CONT  %s\n", testName)
+		p.lastName = testName
+	}
+	fmt.Fprint(w, out)
+}
+
 // The maximum number of stack frames to go through when skipping helper functions for
 // the purpose of decorating log messages.
 const maxStackLen = 50
@@ -354,10 +398,11 @@
 	cleanupPc   []uintptr           // The stack trace at the point where Cleanup was called.
 
 	chatty     bool   // A copy of the chatty flag.
+	bench      bool   // Whether the current test is a benchmark.
 	finished   bool   // Test function has completed.
-	hasSub     int32  // written atomically
-	raceErrors int    // number of races detected during test
-	runner     string // function name of tRunner running the test
+	hasSub     int32  // Written atomically.
+	raceErrors int    // Number of races detected during test.
+	runner     string // Function name of tRunner running the test.
 
 	parent   *common
 	level    int       // Nesting depth of test or benchmark.
@@ -496,9 +541,6 @@
 	buf := new(strings.Builder)
 	// Every line is indented at least 4 spaces.
 	buf.WriteString("    ")
-	if c.chatty {
-		fmt.Fprintf(buf, "%s: ", c.name)
-	}
 	fmt.Fprintf(buf, "%s:%d: ", file, line)
 	lines := strings.Split(s, "\n")
 	if l := len(lines); l > 1 && lines[l-1] == "" {
@@ -517,12 +559,12 @@
 
 // flushToParent writes c.output to the parent after first writing the header
 // with the given format and arguments.
-func (c *common) flushToParent(format string, args ...interface{}) {
+func (c *common) flushToParent(testName, format string, args ...interface{}) {
 	p := c.parent
 	p.mu.Lock()
 	defer p.mu.Unlock()
 
-	fmt.Fprintf(p.w, format, args...)
+	printer.Fprint(p.w, testName, fmt.Sprintf(format, args...))
 
 	c.mu.Lock()
 	defer c.mu.Unlock()
@@ -697,7 +739,14 @@
 		panic("Log in goroutine after " + c.name + " has completed")
 	} else {
 		if c.chatty {
-			fmt.Print(c.decorate(s, depth+1))
+			if c.bench {
+				// Benchmarks don't print === CONT, so we should skip the test
+				// printer and just print straight to stdout.
+				fmt.Print(c.decorate(s, depth+1))
+			} else {
+				printer.Print(c.name, c.decorate(s, depth+1))
+			}
+
 			return
 		}
 		c.output = append(c.output, c.decorate(s, depth+1)...)
@@ -942,7 +991,7 @@
 		for ; root.parent != nil; root = root.parent {
 		}
 		root.mu.Lock()
-		fmt.Fprintf(root.w, "=== CONT  %s\n", t.name)
+		printer.Fprint(root.w, t.name, fmt.Sprintf("=== CONT  %s\n", t.name))
 		root.mu.Unlock()
 	}
 
@@ -1001,7 +1050,7 @@
 				root.duration += time.Since(root.start)
 				d := root.duration
 				root.mu.Unlock()
-				root.flushToParent("--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
+				root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
 				if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
 					fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
 				}
@@ -1100,7 +1149,7 @@
 		for ; root.parent != nil; root = root.parent {
 		}
 		root.mu.Lock()
-		fmt.Fprintf(root.w, "=== RUN   %s\n", t.name)
+		printer.Fprint(root.w, t.name, fmt.Sprintf("=== RUN   %s\n", t.name))
 		root.mu.Unlock()
 	}
 	// Instead of reducing the running count of this test before calling the
@@ -1266,6 +1315,8 @@
 		flag.Parse()
 	}
 
+	printer = newTestPrinter(Verbose())
+
 	if *parallel < 1 {
 		fmt.Fprintln(os.Stderr, "testing: -parallel can only be given a positive integer")
 		flag.Usage()
@@ -1309,12 +1360,12 @@
 	dstr := fmtDuration(t.duration)
 	format := "--- %s: %s (%s)\n"
 	if t.Failed() {
-		t.flushToParent(format, "FAIL", t.name, dstr)
+		t.flushToParent(t.name, format, "FAIL", t.name, dstr)
 	} else if t.chatty {
 		if t.Skipped() {
-			t.flushToParent(format, "SKIP", t.name, dstr)
+			t.flushToParent(t.name, format, "SKIP", t.name, dstr)
 		} else {
-			t.flushToParent(format, "PASS", t.name, dstr)
+			t.flushToParent(t.name, format, "PASS", t.name, dstr)
 		}
 	}
 }
diff --git a/src/text/template/link_test.go b/src/text/template/link_test.go
index b7415d2..4eac7e6 100644
--- a/src/text/template/link_test.go
+++ b/src/text/template/link_test.go
@@ -49,7 +49,7 @@
 	if err := ioutil.WriteFile(filepath.Join(td, "x.go"), []byte(prog), 0644); err != nil {
 		t.Fatal(err)
 	}
-	cmd := exec.Command("go", "build", "-o", "x.exe", "x.go")
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "x.exe", "x.go")
 	cmd.Dir = td
 	if out, err := cmd.CombinedOutput(); err != nil {
 		t.Fatalf("go build: %v, %s", err, out)
diff --git a/src/time/example_test.go b/src/time/example_test.go
index 15811a6..0afb18a 100644
--- a/src/time/example_test.go
+++ b/src/time/example_test.go
@@ -50,10 +50,11 @@
 }
 
 func ExampleDuration_String() {
-	t1 := time.Date(2016, time.August, 15, 0, 0, 0, 0, time.UTC)
-	t2 := time.Date(2017, time.February, 16, 0, 0, 0, 0, time.UTC)
-	fmt.Println(t2.Sub(t1).String())
-	// Output: 4440h0m0s
+	fmt.Println(1*time.Hour + 2*time.Minute + 300*time.Millisecond)
+	fmt.Println(300 * time.Millisecond)
+	// Output:
+	// 1h2m0.3s
+	// 300ms
 }
 
 func ExampleDuration_Truncate() {
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
index c020ea8..eb2f331 100644
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -248,3 +248,139 @@
 	}
 	return 0
 }
+
+// The following CmpToZero_ex* check that cmp|cmn with bmi|bpl are generated for
+// 'comparing to zero' expressions
+
+// var + const
+func CmpToZero_ex1(a int64, e int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+3 < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+5 <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+13 >= 0 {
+		return 2
+	}
+
+	// arm64:`CMP`,-`SUB`,`(BMI|BPL)`
+	if a-7 < 0 {
+		return 3
+	}
+
+	// arm64:`CMP`,-`SUB`,`(BMI|BPL)`
+	if a-11 >= 0 {
+		return 4
+	}
+
+	// arm64:`CMP`,-`SUB`,`BEQ`,`(BMI|BPL)`
+	if a-19 > 0 {
+		return 4
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	if e+3 < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	if e+13 >= 0 {
+		return 6
+	}
+
+	// arm64:`CMPW`,-`SUBW`,`(BMI|BPL)`
+	if e-7 < 0 {
+		return 7
+	}
+
+	// arm64:`CMPW`,-`SUBW`,`(BMI|BPL)`
+	if e-11 >= 0 {
+		return 8
+	}
+
+	return 0
+}
+
+// var + var
+// TODO: optimize 'var - var'
+func CmpToZero_ex2(a, b, c int64, e, f, g int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+b < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+c <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if b+c >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	if e+f < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	if f+g >= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var + var*var
+func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if a+b*c < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if b+c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	if e+f*g > 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	if f+g*h <= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var - var*var
+func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMP`,-`MSUB`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if a-b*c > 0 {
+		return 1
+	}
+
+	// arm64:`CMP`,-`MSUB`,`MUL`,`(BMI|BPL)`
+	if b-c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if e-f*g < 0 {
+		return 5
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if f-g*h >= 0 {
+		return 6
+	}
+	return 0
+}