[dev.boringcrypto.go1.16] all: merge go1.16.10 into dev.boringcrypto.go1.16

Change-Id: I164105d3036f0729da3f0b1dfa034f1d1d6a3a27
diff --git a/misc/wasm/wasm_exec.js b/misc/wasm/wasm_exec.js
index a0a2642..48164c3 100644
--- a/misc/wasm/wasm_exec.js
+++ b/misc/wasm/wasm_exec.js
@@ -566,9 +566,9 @@
 
 			// The linker guarantees global data starts from at least wasmMinDataAddr.
 			// Keep in sync with cmd/link/internal/ld/data.go:wasmMinDataAddr.
-			const wasmMinDataAddr = 4096 + 4096;
+			const wasmMinDataAddr = 4096 + 8192;
 			if (offset >= wasmMinDataAddr) {
-				throw new Error("command line too long");
+				throw new Error("total length of command line and environment variables exceeds limit");
 			}
 
 			this._inst.exports.run(argc, argv);
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index 801d131..b68d323 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -692,6 +692,9 @@
 		for _, file := range r.File {
 			isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
 			name := toValidName(file.Name)
+			if name == "" {
+				continue
+			}
 			for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
 				dirs[dir] = true
 			}
@@ -733,8 +736,11 @@
 func (r *Reader) Open(name string) (fs.File, error) {
 	r.initFileList()
 
+	if !fs.ValidPath(name) {
+		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
+	}
 	e := r.openLookup(name)
-	if e == nil || !fs.ValidPath(name) {
+	if e == nil {
 		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
 	}
 	if e.isDir {
@@ -748,7 +754,7 @@
 }
 
 func split(name string) (dir, elem string, isDir bool) {
-	if name[len(name)-1] == '/' {
+	if len(name) > 0 && name[len(name)-1] == '/' {
 		isDir = true
 		name = name[:len(name)-1]
 	}
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index 99f1334..1682c58 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -13,6 +13,7 @@
 	"io/fs"
 	"os"
 	"path/filepath"
+	"reflect"
 	"regexp"
 	"strings"
 	"testing"
@@ -1165,6 +1166,15 @@
 	if err != nil {
 		t.Errorf("Error reading file: %v", err)
 	}
+	if len(r.File) != 1 {
+		t.Fatalf("No entries in the file list")
+	}
+	if r.File[0].Name != "../test.txt" {
+		t.Errorf("Unexpected entry name: %s", r.File[0].Name)
+	}
+	if _, err := r.File[0].Open(); err != nil {
+		t.Errorf("Error opening file: %v", err)
+	}
 }
 
 func TestCVE202133196(t *testing.T) {
@@ -1243,3 +1253,121 @@
 		t.Fatalf("unexpected error, got: %v, want: %v", err, ErrFormat)
 	}
 }
+
+func TestCVE202141772(t *testing.T) {
+	// Archive contains a file whose name is exclusively made up of '/', '\'
+	// characters, or "../", "..\" paths, which would previously cause a panic.
+	//
+	//  Length   Method    Size  Cmpr    Date    Time   CRC-32   Name
+	// --------  ------  ------- ---- ---------- ----- --------  ----
+	//        0  Stored        0   0% 08-05-2021 18:32 00000000  /
+	//        0  Stored        0   0% 09-14-2021 12:59 00000000  //
+	//        0  Stored        0   0% 09-14-2021 12:59 00000000  \
+	//       11  Stored       11   0% 09-14-2021 13:04 0d4a1185  /test.txt
+	// --------          -------  ---                            -------
+	//       11               11   0%                            4 files
+	data := []byte{
+		0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x08,
+		0x00, 0x00, 0x06, 0x94, 0x05, 0x53, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2f, 0x50,
+		0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x02, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x50,
+		0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x01, 0x00, 0x00, 0x00, 0x5c, 0x50, 0x4b,
+		0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x91, 0x68, 0x2e, 0x53, 0x85, 0x11, 0x4a, 0x0d,
+		0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+		0x09, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73,
+		0x74, 0x2e, 0x74, 0x78, 0x74, 0x68, 0x65, 0x6c,
+		0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64,
+		0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x0a, 0x00,
+		0x00, 0x08, 0x00, 0x00, 0x06, 0x94, 0x05, 0x53,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+		0xed, 0x41, 0x00, 0x00, 0x00, 0x00, 0x2f, 0x50,
+		0x4b, 0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x02, 0x00, 0x24, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00,
+		0x00, 0x1f, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x0a,
+		0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+		0x00, 0x18, 0x00, 0x93, 0x98, 0x25, 0x57, 0x25,
+		0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25,
+		0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25,
+		0xa9, 0xd7, 0x01, 0x50, 0x4b, 0x01, 0x02, 0x3f,
+		0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78,
+		0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+		0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x20, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00,
+		0x00, 0x5c, 0x0a, 0x00, 0x20, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x93, 0x98,
+		0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98,
+		0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98,
+		0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x50, 0x4b,
+		0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x91, 0x68, 0x2e, 0x53, 0x85, 0x11,
+		0x4a, 0x0d, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00,
+		0x00, 0x00, 0x09, 0x00, 0x24, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+		0x5e, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73,
+		0x74, 0x2e, 0x74, 0x78, 0x74, 0x0a, 0x00, 0x20,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x18,
+		0x00, 0xa9, 0x80, 0x51, 0x01, 0x26, 0xa9, 0xd7,
+		0x01, 0x31, 0xd1, 0x57, 0x01, 0x26, 0xa9, 0xd7,
+		0x01, 0xdf, 0x48, 0x85, 0xf9, 0x25, 0xa9, 0xd7,
+		0x01, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00,
+		0x00, 0x04, 0x00, 0x04, 0x00, 0x31, 0x01, 0x00,
+		0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+	}
+	r, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data)))
+	if err != nil {
+		t.Fatalf("Error reading the archive: %v", err)
+	}
+	entryNames := []string{`/`, `//`, `\`, `/test.txt`}
+	var names []string
+	for _, f := range r.File {
+		names = append(names, f.Name)
+		if _, err := f.Open(); err != nil {
+			t.Errorf("Error opening %q: %v", f.Name, err)
+		}
+		if _, err := r.Open(f.Name); err == nil {
+			t.Errorf("Opening %q with fs.FS API succeeded", f.Name)
+		}
+	}
+	if !reflect.DeepEqual(names, entryNames) {
+		t.Errorf("Unexpected file entries: %q", names)
+	}
+	if _, err := r.Open(""); err == nil {
+		t.Errorf("Opening %q with fs.FS API succeeded", "")
+	}
+	if _, err := r.Open("test.txt"); err != nil {
+		t.Errorf("Error opening %q with fs.FS API: %v", "test.txt", err)
+	}
+	dirEntries, err := fs.ReadDir(r, ".")
+	if err != nil {
+		t.Fatalf("Error reading the root directory: %v", err)
+	}
+	if len(dirEntries) != 1 || dirEntries[0].Name() != "test.txt" {
+		t.Errorf("Unexpected directory entries")
+		for _, dirEntry := range dirEntries {
+			_, err := r.Open(dirEntry.Name())
+			t.Logf("%q (Open error: %v)", dirEntry.Name(), err)
+		}
+		t.FailNow()
+	}
+	info, err := dirEntries[0].Info()
+	if err != nil {
+		t.Fatalf("Error reading info entry: %v", err)
+	}
+	if name := info.Name(); name != "test.txt" {
+		t.Errorf("Inconsistent name in info entry: %v", name)
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
index 11c36b5..69989b0 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -496,9 +496,9 @@
 (XOR x (MOVWconst [c])) => (XORconst [c] x)
 (BIC x (MOVWconst [c])) => (BICconst [c] x)
 
-(SLL x (MOVWconst [c])) => (SLLconst x [c&31]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=32)
-(SRL x (MOVWconst [c])) => (SRLconst x [c&31])
-(SRA x (MOVWconst [c])) => (SRAconst x [c&31])
+(SLL x (MOVWconst [c])) && 0 <= c && c < 32 => (SLLconst x [c])
+(SRL x (MOVWconst [c])) && 0 <= c && c < 32 => (SRLconst x [c])
+(SRA x (MOVWconst [c])) && 0 <= c && c < 32 => (SRAconst x [c])
 
 (CMP x (MOVWconst [c])) => (CMPconst [c] x)
 (CMP (MOVWconst [c]) x) => (InvertFlags (CMPconst [c] x))
@@ -1067,60 +1067,60 @@
 (CMNshiftRL x (MOVWconst [c]) [d]) => (CMNconst x [int32(uint32(c)>>uint64(d))])
 (CMNshiftRA x (MOVWconst [c]) [d]) => (CMNconst x [c>>uint64(d)])
 
-(ADDshiftLLreg x y (MOVWconst [c])) => (ADDshiftLL x y [c])
-(ADDshiftRLreg x y (MOVWconst [c])) => (ADDshiftRL x y [c])
-(ADDshiftRAreg x y (MOVWconst [c])) => (ADDshiftRA x y [c])
-(ADCshiftLLreg x y (MOVWconst [c]) flags) => (ADCshiftLL x y [c] flags)
-(ADCshiftRLreg x y (MOVWconst [c]) flags) => (ADCshiftRL x y [c] flags)
-(ADCshiftRAreg x y (MOVWconst [c]) flags) => (ADCshiftRA x y [c] flags)
-(ADDSshiftLLreg x y (MOVWconst [c])) => (ADDSshiftLL x y [c])
-(ADDSshiftRLreg x y (MOVWconst [c])) => (ADDSshiftRL x y [c])
-(ADDSshiftRAreg x y (MOVWconst [c])) => (ADDSshiftRA x y [c])
-(SUBshiftLLreg x y (MOVWconst [c])) => (SUBshiftLL x y [c])
-(SUBshiftRLreg x y (MOVWconst [c])) => (SUBshiftRL x y [c])
-(SUBshiftRAreg x y (MOVWconst [c])) => (SUBshiftRA x y [c])
-(SBCshiftLLreg x y (MOVWconst [c]) flags) => (SBCshiftLL x y [c] flags)
-(SBCshiftRLreg x y (MOVWconst [c]) flags) => (SBCshiftRL x y [c] flags)
-(SBCshiftRAreg x y (MOVWconst [c]) flags) => (SBCshiftRA x y [c] flags)
-(SUBSshiftLLreg x y (MOVWconst [c])) => (SUBSshiftLL x y [c])
-(SUBSshiftRLreg x y (MOVWconst [c])) => (SUBSshiftRL x y [c])
-(SUBSshiftRAreg x y (MOVWconst [c])) => (SUBSshiftRA x y [c])
-(RSBshiftLLreg x y (MOVWconst [c])) => (RSBshiftLL x y [c])
-(RSBshiftRLreg x y (MOVWconst [c])) => (RSBshiftRL x y [c])
-(RSBshiftRAreg x y (MOVWconst [c])) => (RSBshiftRA x y [c])
-(RSCshiftLLreg x y (MOVWconst [c]) flags) => (RSCshiftLL x y [c] flags)
-(RSCshiftRLreg x y (MOVWconst [c]) flags) => (RSCshiftRL x y [c] flags)
-(RSCshiftRAreg x y (MOVWconst [c]) flags) => (RSCshiftRA x y [c] flags)
-(RSBSshiftLLreg x y (MOVWconst [c])) => (RSBSshiftLL x y [c])
-(RSBSshiftRLreg x y (MOVWconst [c])) => (RSBSshiftRL x y [c])
-(RSBSshiftRAreg x y (MOVWconst [c])) => (RSBSshiftRA x y [c])
-(ANDshiftLLreg x y (MOVWconst [c])) => (ANDshiftLL x y [c])
-(ANDshiftRLreg x y (MOVWconst [c])) => (ANDshiftRL x y [c])
-(ANDshiftRAreg x y (MOVWconst [c])) => (ANDshiftRA x y [c])
-(ORshiftLLreg x y (MOVWconst [c])) => (ORshiftLL x y [c])
-(ORshiftRLreg x y (MOVWconst [c])) => (ORshiftRL x y [c])
-(ORshiftRAreg x y (MOVWconst [c])) => (ORshiftRA x y [c])
-(XORshiftLLreg x y (MOVWconst [c])) => (XORshiftLL x y [c])
-(XORshiftRLreg x y (MOVWconst [c])) => (XORshiftRL x y [c])
-(XORshiftRAreg x y (MOVWconst [c])) => (XORshiftRA x y [c])
-(BICshiftLLreg x y (MOVWconst [c])) => (BICshiftLL x y [c])
-(BICshiftRLreg x y (MOVWconst [c])) => (BICshiftRL x y [c])
-(BICshiftRAreg x y (MOVWconst [c])) => (BICshiftRA x y [c])
-(MVNshiftLLreg x (MOVWconst [c])) => (MVNshiftLL x [c])
-(MVNshiftRLreg x (MOVWconst [c])) => (MVNshiftRL x [c])
-(MVNshiftRAreg x (MOVWconst [c])) => (MVNshiftRA x [c])
-(CMPshiftLLreg x y (MOVWconst [c])) => (CMPshiftLL x y [c])
-(CMPshiftRLreg x y (MOVWconst [c])) => (CMPshiftRL x y [c])
-(CMPshiftRAreg x y (MOVWconst [c])) => (CMPshiftRA x y [c])
-(TSTshiftLLreg x y (MOVWconst [c])) => (TSTshiftLL x y [c])
-(TSTshiftRLreg x y (MOVWconst [c])) => (TSTshiftRL x y [c])
-(TSTshiftRAreg x y (MOVWconst [c])) => (TSTshiftRA x y [c])
-(TEQshiftLLreg x y (MOVWconst [c])) => (TEQshiftLL x y [c])
-(TEQshiftRLreg x y (MOVWconst [c])) => (TEQshiftRL x y [c])
-(TEQshiftRAreg x y (MOVWconst [c])) => (TEQshiftRA x y [c])
-(CMNshiftLLreg x y (MOVWconst [c])) => (CMNshiftLL x y [c])
-(CMNshiftRLreg x y (MOVWconst [c])) => (CMNshiftRL x y [c])
-(CMNshiftRAreg x y (MOVWconst [c])) => (CMNshiftRA x y [c])
+(ADDshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftLL x y [c])
+(ADDshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftRL x y [c])
+(ADDshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftRA x y [c])
+(ADCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftLL x y [c] flags)
+(ADCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftRL x y [c] flags)
+(ADCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftRA x y [c] flags)
+(ADDSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftLL x y [c])
+(ADDSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftRL x y [c])
+(ADDSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftRA x y [c])
+(SUBshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftLL x y [c])
+(SUBshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftRL x y [c])
+(SUBshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftRA x y [c])
+(SBCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftLL x y [c] flags)
+(SBCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftRL x y [c] flags)
+(SBCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftRA x y [c] flags)
+(SUBSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftLL x y [c])
+(SUBSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftRL x y [c])
+(SUBSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftRA x y [c])
+(RSBshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftLL x y [c])
+(RSBshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftRL x y [c])
+(RSBshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftRA x y [c])
+(RSCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftLL x y [c] flags)
+(RSCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftRL x y [c] flags)
+(RSCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftRA x y [c] flags)
+(RSBSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftLL x y [c])
+(RSBSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftRL x y [c])
+(RSBSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftRA x y [c])
+(ANDshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftLL x y [c])
+(ANDshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftRL x y [c])
+(ANDshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftRA x y [c])
+(ORshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftLL x y [c])
+(ORshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftRL x y [c])
+(ORshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftRA x y [c])
+(XORshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftLL x y [c])
+(XORshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftRL x y [c])
+(XORshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftRA x y [c])
+(BICshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftLL x y [c])
+(BICshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftRL x y [c])
+(BICshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftRA x y [c])
+(MVNshiftLLreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftLL x [c])
+(MVNshiftRLreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftRL x [c])
+(MVNshiftRAreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftRA x [c])
+(CMPshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftLL x y [c])
+(CMPshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftRL x y [c])
+(CMPshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftRA x y [c])
+(TSTshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftLL x y [c])
+(TSTshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftRL x y [c])
+(TSTshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftRA x y [c])
+(TEQshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftLL x y [c])
+(TEQshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftRL x y [c])
+(TEQshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftRA x y [c])
+(CMNshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftLL x y [c])
+(CMNshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftRL x y [c])
+(CMNshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftRA x y [c])
 
 // Generate rotates
 (ADDshiftLL [c] (SRLconst x [32-c]) x) => (SRRconst [32-c] x)
@@ -1232,24 +1232,24 @@
 (AND x (MVN y)) => (BIC x y)
 
 // simplification with *shift ops
-(SUBshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(SUBshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(SUBshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVWconst [0])
-(RSBshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(RSBshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(RSBshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVWconst [0])
-(ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d => y
-(ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d => y
-(ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d => y
-(ORshiftLL x y:(SLLconst x [c]) [d]) && c==d => y
-(ORshiftRL x y:(SRLconst x [c]) [d]) && c==d => y
-(ORshiftRA x y:(SRAconst x [c]) [d]) && c==d => y
-(XORshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(XORshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(XORshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVWconst [0])
-(BICshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(BICshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVWconst [0])
-(BICshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVWconst [0])
+(SUBshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0])
+(SUBshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0])
+(SUBshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0])
+(RSBshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0])
+(RSBshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0])
+(RSBshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0])
+(ANDshiftLL y:(SLLconst x [c]) x [c]) => y
+(ANDshiftRL y:(SRLconst x [c]) x [c]) => y
+(ANDshiftRA y:(SRAconst x [c]) x [c]) => y
+(ORshiftLL y:(SLLconst x [c]) x [c]) => y
+(ORshiftRL y:(SRLconst x [c]) x [c]) => y
+(ORshiftRA y:(SRAconst x [c]) x [c]) => y
+(XORshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0])
+(XORshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0])
+(XORshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0])
+(BICshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0])
+(BICshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0])
+(BICshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0])
 (AND x (MVNshiftLL y [c])) => (BICshiftLL x y [c])
 (AND x (MVNshiftRL y [c])) => (BICshiftRL x y [c])
 (AND x (MVNshiftRA y [c])) => (BICshiftRA x y [c])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index 3f4d0c1..80b4005 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -1652,27 +1652,27 @@
 (TSTshiftRA x (MOVDconst [c]) [d]) => (TSTconst x [c>>uint64(d)])
 
 // simplification with *shift ops
-(SUBshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(SUBshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(SUBshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVDconst [0])
-(ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d => y
-(ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d => y
-(ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d => y
-(ORshiftLL  x y:(SLLconst x [c]) [d]) && c==d => y
-(ORshiftRL  x y:(SRLconst x [c]) [d]) && c==d => y
-(ORshiftRA  x y:(SRAconst x [c]) [d]) && c==d => y
-(XORshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(XORshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(XORshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVDconst [0])
-(BICshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(BICshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVDconst [0])
-(BICshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVDconst [0])
-(EONshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVDconst [-1])
-(EONshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVDconst [-1])
-(EONshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVDconst [-1])
-(ORNshiftLL x (SLLconst x [c]) [d]) && c==d => (MOVDconst [-1])
-(ORNshiftRL x (SRLconst x [c]) [d]) && c==d => (MOVDconst [-1])
-(ORNshiftRA x (SRAconst x [c]) [d]) && c==d => (MOVDconst [-1])
+(SUBshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
+(SUBshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
+(SUBshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
+(ANDshiftLL y:(SLLconst x [c]) x [c]) => y
+(ANDshiftRL y:(SRLconst x [c]) x [c]) => y
+(ANDshiftRA y:(SRAconst x [c]) x [c]) => y
+(ORshiftLL  y:(SLLconst x [c]) x [c]) => y
+(ORshiftRL  y:(SRLconst x [c]) x [c]) => y
+(ORshiftRA  y:(SRAconst x [c]) x [c]) => y
+(XORshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
+(XORshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
+(XORshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
+(BICshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
+(BICshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
+(BICshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
+(EONshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
+(EONshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
+(EONshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
+(ORNshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
+(ORNshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
+(ORNshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
 
 // Generate rotates with const shift
 (ADDshiftLL [c] (SRLconst x [64-c]) x) => (RORconst [64-c] x)
diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go
index 70c7899..1a7eefa 100644
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -226,14 +226,15 @@
 
 		// shifts
 		{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"},                    // arg0 << arg1, shift amount is mod 256
-		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt
+		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt, 0 <= auxInt < 32
 		{name: "SRL", argLength: 2, reg: gp21, asm: "SRL"},                    // arg0 >> arg1, unsigned, shift amount is mod 256
-		{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned
+		{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned, 0 <= auxInt < 32
 		{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"},                    // arg0 >> arg1, signed, shift amount is mod 256
-		{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed
+		{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed, 0 <= auxInt < 32
 		{name: "SRR", argLength: 2, reg: gp21},                                // arg0 right rotate by arg1 bits
-		{name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"},             // arg0 right rotate by auxInt bits
+		{name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"},             // arg0 right rotate by auxInt bits, 0 <= auxInt < 32
 
+		// auxInt for all of these satisfy 0 <= auxInt < 32
 		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt
 		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift
 		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 8cfda35..0a7d5dd 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -105,7 +105,7 @@
 
 	// For shifts, AxB means the shifted value has A bits and the shift amount has B bits.
 	// Shift amounts are considered unsigned.
-	// If arg1 is known to be less than the number of bits in arg0,
+	// If arg1 is known to be nonnegative and less than the number of bits in arg0,
 	// then auxInt may be set to 1.
 	// This enables better code generation on some platforms.
 	{name: "Lsh8x8", argLength: 2, aux: "Bool"}, // arg0 << arg1
diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go
index d9d439f..1f25005 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
@@ -1114,6 +1114,7 @@
 		return true
 	}
 	// match: (ADCshiftLLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (ADCshiftLL x y [c] flags)
 	for {
 		x := v_0
@@ -1123,6 +1124,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADCshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -1194,6 +1198,7 @@
 		return true
 	}
 	// match: (ADCshiftRAreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (ADCshiftRA x y [c] flags)
 	for {
 		x := v_0
@@ -1203,6 +1208,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADCshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -1274,6 +1282,7 @@
 		return true
 	}
 	// match: (ADCshiftRLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (ADCshiftRL x y [c] flags)
 	for {
 		x := v_0
@@ -1283,6 +1292,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADCshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -1735,6 +1747,7 @@
 		return true
 	}
 	// match: (ADDSshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDSshiftLL x y [c])
 	for {
 		x := v_0
@@ -1743,6 +1756,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDSshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -1809,6 +1825,7 @@
 		return true
 	}
 	// match: (ADDSshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDSshiftRA x y [c])
 	for {
 		x := v_0
@@ -1817,6 +1834,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDSshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -1883,6 +1903,7 @@
 		return true
 	}
 	// match: (ADDSshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDSshiftRL x y [c])
 	for {
 		x := v_0
@@ -1891,6 +1912,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDSshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2119,6 +2143,7 @@
 		return true
 	}
 	// match: (ADDshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDshiftLL x y [c])
 	for {
 		x := v_0
@@ -2127,6 +2152,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2193,6 +2221,7 @@
 		return true
 	}
 	// match: (ADDshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDshiftRA x y [c])
 	for {
 		x := v_0
@@ -2201,6 +2230,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2283,6 +2315,7 @@
 		return true
 	}
 	// match: (ADDshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ADDshiftRL x y [c])
 	for {
 		x := v_0
@@ -2291,6 +2324,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMADDshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2609,18 +2645,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftLL x y:(SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftLL y:(SLLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSLLconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2650,6 +2684,7 @@
 		return true
 	}
 	// match: (ANDshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ANDshiftLL x y [c])
 	for {
 		x := v_0
@@ -2658,6 +2693,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMANDshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2700,18 +2738,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftRA x y:(SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftRA y:(SRAconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSRAconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2741,6 +2777,7 @@
 		return true
 	}
 	// match: (ANDshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ANDshiftRA x y [c])
 	for {
 		x := v_0
@@ -2749,6 +2786,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMANDshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -2791,18 +2831,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftRL x y:(SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftRL y:(SRLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSRLconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2832,6 +2870,7 @@
 		return true
 	}
 	// match: (ANDshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ANDshiftRL x y [c])
 	for {
 		x := v_0
@@ -2840,6 +2879,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMANDshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3086,17 +3128,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSLLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -3110,6 +3150,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BICshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (BICshiftLL x y [c])
 	for {
 		x := v_0
@@ -3118,6 +3159,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMBICshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3142,17 +3186,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRAconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -3166,6 +3208,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BICshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (BICshiftRA x y [c])
 	for {
 		x := v_0
@@ -3174,6 +3217,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMBICshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3198,17 +3244,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -3222,6 +3266,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BICshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (BICshiftRL x y [c])
 	for {
 		x := v_0
@@ -3230,6 +3275,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMBICshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3432,6 +3480,7 @@
 		return true
 	}
 	// match: (CMNshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMNshiftLL x y [c])
 	for {
 		x := v_0
@@ -3440,6 +3489,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMNshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3506,6 +3558,7 @@
 		return true
 	}
 	// match: (CMNshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMNshiftRA x y [c])
 	for {
 		x := v_0
@@ -3514,6 +3567,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMNshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -3580,6 +3636,7 @@
 		return true
 	}
 	// match: (CMNshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMNshiftRL x y [c])
 	for {
 		x := v_0
@@ -3588,6 +3645,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMNshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -4085,6 +4145,7 @@
 		return true
 	}
 	// match: (CMPshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMPshiftLL x y [c])
 	for {
 		x := v_0
@@ -4093,6 +4154,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMPshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -4163,6 +4227,7 @@
 		return true
 	}
 	// match: (CMPshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMPshiftRA x y [c])
 	for {
 		x := v_0
@@ -4171,6 +4236,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMPshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -4241,6 +4309,7 @@
 		return true
 	}
 	// match: (CMPshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (CMPshiftRL x y [c])
 	for {
 		x := v_0
@@ -4249,6 +4318,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMCMPshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -8081,6 +8153,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MVNshiftLLreg x (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (MVNshiftLL x [c])
 	for {
 		x := v_0
@@ -8088,6 +8161,9 @@
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMMVNshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
@@ -8115,6 +8191,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MVNshiftRAreg x (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (MVNshiftRA x [c])
 	for {
 		x := v_0
@@ -8122,6 +8199,9 @@
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMMVNshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
@@ -8149,6 +8229,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MVNshiftRLreg x (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (MVNshiftRL x [c])
 	for {
 		x := v_0
@@ -8156,6 +8237,9 @@
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMMVNshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
@@ -8536,18 +8620,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftLL x y:(SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftLL y:(SLLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSLLconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -8577,6 +8659,7 @@
 		return true
 	}
 	// match: (ORshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ORshiftLL x y [c])
 	for {
 		x := v_0
@@ -8585,6 +8668,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMORshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -8627,18 +8713,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftRA x y:(SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftRA y:(SRAconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSRAconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -8668,6 +8752,7 @@
 		return true
 	}
 	// match: (ORshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ORshiftRA x y [c])
 	for {
 		x := v_0
@@ -8676,6 +8761,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMORshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -8734,18 +8822,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftRL x y:(SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftRL y:(SRLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		y := v_0
+		if y.Op != OpARMSRLconst || auxIntToInt32(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -8775,6 +8861,7 @@
 		return true
 	}
 	// match: (ORshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (ORshiftRL x y [c])
 	for {
 		x := v_0
@@ -8783,6 +8870,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMORshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9070,6 +9160,7 @@
 		return true
 	}
 	// match: (RSBSshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBSshiftLL x y [c])
 	for {
 		x := v_0
@@ -9078,6 +9169,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBSshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9144,6 +9238,7 @@
 		return true
 	}
 	// match: (RSBSshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBSshiftRA x y [c])
 	for {
 		x := v_0
@@ -9152,6 +9247,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBSshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9218,6 +9316,7 @@
 		return true
 	}
 	// match: (RSBSshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBSshiftRL x y [c])
 	for {
 		x := v_0
@@ -9226,6 +9325,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBSshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9326,17 +9428,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (RSBshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (RSBshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSLLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -9367,6 +9467,7 @@
 		return true
 	}
 	// match: (RSBshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBshiftLL x y [c])
 	for {
 		x := v_0
@@ -9375,6 +9476,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9417,17 +9521,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (RSBshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (RSBshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRAconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -9458,6 +9560,7 @@
 		return true
 	}
 	// match: (RSBshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBshiftRA x y [c])
 	for {
 		x := v_0
@@ -9466,6 +9569,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9508,17 +9614,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (RSBshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (RSBshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -9549,6 +9653,7 @@
 		return true
 	}
 	// match: (RSBshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (RSBshiftRL x y [c])
 	for {
 		x := v_0
@@ -9557,6 +9662,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSBshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -9663,6 +9771,7 @@
 		return true
 	}
 	// match: (RSCshiftLLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (RSCshiftLL x y [c] flags)
 	for {
 		x := v_0
@@ -9672,6 +9781,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSCshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -9743,6 +9855,7 @@
 		return true
 	}
 	// match: (RSCshiftRAreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (RSCshiftRA x y [c] flags)
 	for {
 		x := v_0
@@ -9752,6 +9865,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSCshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -9823,6 +9939,7 @@
 		return true
 	}
 	// match: (RSCshiftRLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (RSCshiftRL x y [c] flags)
 	for {
 		x := v_0
@@ -9832,6 +9949,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMRSCshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -10146,6 +10266,7 @@
 		return true
 	}
 	// match: (SBCshiftLLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (SBCshiftLL x y [c] flags)
 	for {
 		x := v_0
@@ -10155,6 +10276,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSBCshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -10226,6 +10350,7 @@
 		return true
 	}
 	// match: (SBCshiftRAreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (SBCshiftRA x y [c] flags)
 	for {
 		x := v_0
@@ -10235,6 +10360,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSBCshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -10306,6 +10434,7 @@
 		return true
 	}
 	// match: (SBCshiftRLreg x y (MOVWconst [c]) flags)
+	// cond: 0 <= c && c < 32
 	// result: (SBCshiftRL x y [c] flags)
 	for {
 		x := v_0
@@ -10315,6 +10444,9 @@
 		}
 		c := auxIntToInt32(v_2.AuxInt)
 		flags := v_3
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSBCshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg3(x, y, flags)
@@ -10326,15 +10458,19 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLL x (MOVWconst [c]))
-	// result: (SLLconst x [c&31])
+	// cond: 0 <= c && c < 32
+	// result: (SLLconst x [c])
 	for {
 		x := v_0
 		if v_1.Op != OpARMMOVWconst {
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSLLconst)
-		v.AuxInt = int32ToAuxInt(c & 31)
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -10360,15 +10496,19 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRA x (MOVWconst [c]))
-	// result: (SRAconst x [c&31])
+	// cond: 0 <= c && c < 32
+	// result: (SRAconst x [c])
 	for {
 		x := v_0
 		if v_1.Op != OpARMMOVWconst {
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSRAconst)
-		v.AuxInt = int32ToAuxInt(c & 31)
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -10452,15 +10592,19 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRL x (MOVWconst [c]))
-	// result: (SRLconst x [c&31])
+	// cond: 0 <= c && c < 32
+	// result: (SRLconst x [c])
 	for {
 		x := v_0
 		if v_1.Op != OpARMMOVWconst {
 			break
 		}
 		c := auxIntToInt32(v_1.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSRLconst)
-		v.AuxInt = int32ToAuxInt(c & 31)
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -11038,6 +11182,7 @@
 		return true
 	}
 	// match: (SUBSshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBSshiftLL x y [c])
 	for {
 		x := v_0
@@ -11046,6 +11191,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBSshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11112,6 +11260,7 @@
 		return true
 	}
 	// match: (SUBSshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBSshiftRA x y [c])
 	for {
 		x := v_0
@@ -11120,6 +11269,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBSshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11186,6 +11338,7 @@
 		return true
 	}
 	// match: (SUBSshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBSshiftRL x y [c])
 	for {
 		x := v_0
@@ -11194,6 +11347,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBSshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11348,17 +11504,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSLLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -11389,6 +11543,7 @@
 		return true
 	}
 	// match: (SUBshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBshiftLL x y [c])
 	for {
 		x := v_0
@@ -11397,6 +11552,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11439,17 +11597,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRAconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -11480,6 +11636,7 @@
 		return true
 	}
 	// match: (SUBshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBshiftRA x y [c])
 	for {
 		x := v_0
@@ -11488,6 +11645,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11530,17 +11690,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -11571,6 +11729,7 @@
 		return true
 	}
 	// match: (SUBshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (SUBshiftRL x y [c])
 	for {
 		x := v_0
@@ -11579,6 +11738,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMSUBshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11781,6 +11943,7 @@
 		return true
 	}
 	// match: (TEQshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TEQshiftLL x y [c])
 	for {
 		x := v_0
@@ -11789,6 +11952,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTEQshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11855,6 +12021,7 @@
 		return true
 	}
 	// match: (TEQshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TEQshiftRA x y [c])
 	for {
 		x := v_0
@@ -11863,6 +12030,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTEQshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -11929,6 +12099,7 @@
 		return true
 	}
 	// match: (TEQshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TEQshiftRL x y [c])
 	for {
 		x := v_0
@@ -11937,6 +12108,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTEQshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12139,6 +12313,7 @@
 		return true
 	}
 	// match: (TSTshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TSTshiftLL x y [c])
 	for {
 		x := v_0
@@ -12147,6 +12322,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTSTshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12213,6 +12391,7 @@
 		return true
 	}
 	// match: (TSTshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TSTshiftRA x y [c])
 	for {
 		x := v_0
@@ -12221,6 +12400,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTSTshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12287,6 +12469,7 @@
 		return true
 	}
 	// match: (TSTshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (TSTshiftRL x y [c])
 	for {
 		x := v_0
@@ -12295,6 +12478,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMTSTshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12575,17 +12761,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSLLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSLLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -12616,6 +12800,7 @@
 		return true
 	}
 	// match: (XORshiftLLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (XORshiftLL x y [c])
 	for {
 		x := v_0
@@ -12624,6 +12809,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMXORshiftLL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12666,17 +12854,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRAconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRAconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -12707,6 +12893,7 @@
 		return true
 	}
 	// match: (XORshiftRAreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (XORshiftRA x y [c])
 	for {
 		x := v_0
@@ -12715,6 +12902,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMXORshiftRA)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
@@ -12773,17 +12963,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVWconst [0])
 	for {
-		d := auxIntToInt32(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARMSRLconst {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpARMSRLconst || auxIntToInt32(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt32(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARMMOVWconst)
@@ -12814,6 +13002,7 @@
 		return true
 	}
 	// match: (XORshiftRLreg x y (MOVWconst [c]))
+	// cond: 0 <= c && c < 32
 	// result: (XORshiftRL x y [c])
 	for {
 		x := v_0
@@ -12822,6 +13011,9 @@
 			break
 		}
 		c := auxIntToInt32(v_2.AuxInt)
+		if !(0 <= c && c < 32) {
+			break
+		}
 		v.reset(OpARMXORshiftRL)
 		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 5d5e526..e61d899 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -2167,18 +2167,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftLL x y:(SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftLL y:(SLLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SLLconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2221,18 +2219,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftRA x y:(SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftRA y:(SRAconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SRAconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2275,18 +2271,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDshiftRL x y:(SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (ANDshiftRL y:(SRLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SRLconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -2397,17 +2391,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -2433,17 +2425,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -2469,17 +2459,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (BICshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (BICshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -3543,17 +3531,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (EONshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (EONshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -3579,17 +3565,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (EONshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (EONshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -3615,17 +3599,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (EONshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (EONshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -17071,17 +17053,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORNshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORNshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -17107,17 +17087,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORNshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (ORNshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -17143,17 +17121,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORNshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORNshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVDconst [-1])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -17266,18 +17242,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftLL x y:(SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftLL y:(SLLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SLLconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -18871,18 +18845,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftRA x y:(SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftRA y:(SRAconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SRAconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -18925,18 +18897,16 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORshiftRL x y:(SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (ORshiftRL y:(SRLconst x [c]) x [c])
 	// result: y
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		if y.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if y.Op != OpARM64SRLconst || auxIntToInt64(y.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(y.AuxInt)
-		if x != y.Args[0] || !(c == d) {
+		x := y.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.copyOf(y)
@@ -19968,17 +19938,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -20004,17 +19972,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -20040,17 +20006,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (SUBshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -21139,17 +21103,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftLL x (SLLconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftLL (SLLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SLLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -21276,17 +21238,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftRA x (SRAconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftRA (SRAconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRAconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
@@ -21330,17 +21290,15 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (XORshiftRL x (SRLconst x [c]) [d])
-	// cond: c==d
+	// match: (XORshiftRL (SRLconst x [c]) x [c])
 	// result: (MOVDconst [0])
 	for {
-		d := auxIntToInt64(v.AuxInt)
-		x := v_0
-		if v_1.Op != OpARM64SRLconst {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
 			break
 		}
-		c := auxIntToInt64(v_1.AuxInt)
-		if x != v_1.Args[0] || !(c == d) {
+		x := v_0.Args[0]
+		if x != v_1 {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index 54a1d18..7028b85 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -2330,10 +2330,11 @@
 	return sect, n, va
 }
 
-// On Wasm, we reserve 4096 bytes for zero page, then 4096 bytes for wasm_exec.js
-// to store command line args. Data sections starts from at least address 8192.
+// On Wasm, we reserve 4096 bytes for zero page, then 8192 bytes for wasm_exec.js
+// to store command line args and environment variables.
+// Data sections starts from at least address 12288.
 // Keep in sync with wasm_exec.js.
-const wasmMinDataAddr = 4096 + 4096
+const wasmMinDataAddr = 4096 + 8192
 
 // address assigns virtual addresses to all segments and sections and
 // returns all segments in file order.
diff --git a/src/debug/macho/file.go b/src/debug/macho/file.go
index 085b0c8..73cfce3 100644
--- a/src/debug/macho/file.go
+++ b/src/debug/macho/file.go
@@ -345,6 +345,15 @@
 			if err := binary.Read(b, bo, &hdr); err != nil {
 				return nil, err
 			}
+			if hdr.Iundefsym > uint32(len(f.Symtab.Syms)) {
+				return nil, &FormatError{offset, fmt.Sprintf(
+					"undefined symbols index in dynamic symbol table command is greater than symbol table length (%d > %d)",
+					hdr.Iundefsym, len(f.Symtab.Syms)), nil}
+			} else if hdr.Iundefsym+hdr.Nundefsym > uint32(len(f.Symtab.Syms)) {
+				return nil, &FormatError{offset, fmt.Sprintf(
+					"number of undefined symbols after index in dynamic symbol table command is greater than symbol table length (%d > %d)",
+					hdr.Iundefsym+hdr.Nundefsym, len(f.Symtab.Syms)), nil}
+			}
 			dat := make([]byte, hdr.Nindirectsyms*4)
 			if _, err := r.ReadAt(dat, int64(hdr.Indirectsymoff)); err != nil {
 				return nil, err
diff --git a/src/debug/macho/file_test.go b/src/debug/macho/file_test.go
index 03915c8..9beeb80 100644
--- a/src/debug/macho/file_test.go
+++ b/src/debug/macho/file_test.go
@@ -416,3 +416,10 @@
 		t.Errorf("got %v, want %v", TypeExec.GoString(), "macho.Exec")
 	}
 }
+
+func TestOpenBadDysymCmd(t *testing.T) {
+	_, err := openObscured("testdata/gcc-amd64-darwin-exec-with-bad-dysym.base64")
+	if err == nil {
+		t.Fatal("openObscured did not fail when opening a file with an invalid dynamic symbol table command")
+	}
+}
diff --git a/src/debug/macho/testdata/gcc-amd64-darwin-exec-with-bad-dysym.base64 b/src/debug/macho/testdata/gcc-amd64-darwin-exec-with-bad-dysym.base64
new file mode 100644
index 0000000..8e04366
--- /dev/null
+++ b/src/debug/macho/testdata/gcc-amd64-darwin-exec-with-bad-dysym.base64
@@ -0,0 +1 @@
+z/rt/gcAAAEDAACAAgAAAAsAAABoBQAAhQAAAAAAAAAZAAAASAAAAF9fUEFHRVpFUk8AAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZAAAA2AEAAF9fVEVYVAAAAAAAAAAAAAAAAAAAAQAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAcAAAAFAAAABQAAAAAAAABfX3RleHQAAAAAAAAAAAAAX19URVhUAAAAAAAAAAAAABQPAAABAAAAbQAAAAAAAAAUDwAAAgAAAAAAAAAAAAAAAAQAgAAAAAAAAAAAAAAAAF9fc3ltYm9sX3N0dWIxAABfX1RFWFQAAAAAAAAAAAAAgQ8AAAEAAAAMAAAAAAAAAIEPAAAAAAAAAAAAAAAAAAAIBACAAAAAAAYAAAAAAAAAX19zdHViX2hlbHBlcgAAAF9fVEVYVAAAAAAAAAAAAACQDwAAAQAAABgAAAAAAAAAkA8AAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfX2NzdHJpbmcAAAAAAAAAX19URVhUAAAAAAAAAAAAAKgPAAABAAAADQAAAAAAAACoDwAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAF9fZWhfZnJhbWUAAAAAAABfX1RFWFQAAAAAAAAAAAAAuA8AAAEAAABIAAAAAAAAALgPAAADAAAAAAAAAAAAAAALAABgAAAAAAAAAAAAAAAAGQAAADgBAABfX0RBVEEAAAAAAAAAAAAAABAAAAEAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAHAAAAAwAAAAMAAAAAAAAAX19kYXRhAAAAAAAAAAAAAF9fREFUQQAAAAAAAAAAAAAAEAAAAQAAABwAAAAAAAAAABAAAAMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfX2R5bGQAAAAAAAAAAAAAX19EQVRBAAAAAAAAAAAAACAQAAABAAAAOAAAAAAAAAAgEAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF9fbGFfc3ltYm9sX3B0cgBfX0RBVEEAAAAAAAAAAAAAWBAAAAEAAAAQAAAAAAAAAFgQAAACAAAAAAAAAAAAAAAHAAAAAgAAAAAAAAAAAAAAGQAAAEgAAABfX0xJTktFRElUAAAAAAAAACAAAAEAAAAAEAAAAAAAAAAgAAAAAAAAQAEAAAAAAAAHAAAAAQAAAAAAAAAAAAAAAgAAABgAAAAAIAAACwAAAMAgAACAAAAACwAAAFAAAAAAAAAAAgAAAAIAAAAHAAAACQAAAP8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACwIAAABAAAAAAAAAAAAAAAAAAAAAAAAAAOAAAAIAAAAAwAAAAvdXNyL2xpYi9keWxkAAAAAAAAABsAAAAYAAAAOyS4cg5FdtQoqu6JsMEhXQUAAAC4AAAABAAAACoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQPAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMAAAAOAAAABgAAAACAAAAAAABAAAAAQAvdXNyL2xpYi9saWJnY2Nfcy4xLmR5bGliAAAAAAAAAAwAAAA4AAAAGAAAAAIAAAAEAW8AAAABAC91c3IvbGliL2xpYlN5c3RlbS5CLmR5bGliAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABqAEiJ5UiD5PBIi30ISI11EIn6g8IBweIDSAHySInR6wRIg8EISIM5AHX2SIPBCOgiAAAAicfoMgAAAPRBU0yNHafw//9BU/8lvwAAAA8fAP8lvgAAAFVIieVIjT0zAAAA6A0AAAC4AAAAAMnD/yXRAAAA/yXTAAAAAAAATI0dwQAAAOm0////TI0dvQAAAOmo////aGVsbG8sIHdvcmxkAAAAABQAAAAAAAAAAXpSAAF4EAEQDAcIkAEAACwAAAAcAAAAkv////////8XAAAAAAAAAAAEAQAAAA4QhgIEAwAAAA0GAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABDAX/9/AAAIEMBf/38AAAAAAAABAAAAGBAAAAEAAAAQEAAAAQAAAAgQAAABAAAAABAAAAEAAACQDwAAAQAAAJwPAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAHgEAAFAPAAABAAAAGwAAAB4BAABkDwAAAQAAAC4AAAAPBgAAGBAAAAEAAAA2AAAADwYAABAQAAABAAAAPgAAAA8GAAAAEAAAAQAAAEoAAAADABAAAAAAAAEAAABeAAAADwYAAAgQAAABAAAAZwAAAA8BAABqDwAAAQAAAG0AAAAPAQAAFA8AAAEAAABzAAAAAQABAgAAAAAAAAAAeQAAAAEAAQIAAAAAAAAAAAkAAAAKAAAACQAAAAoAAAAgAGR5bGRfc3R1Yl9iaW5kaW5nX2hlbHBlcgBfX2R5bGRfZnVuY19sb29rdXAAX05YQXJnYwBfTlhBcmd2AF9fX3Byb2duYW1lAF9fbWhfZXhlY3V0ZV9oZWFkZXIAX2Vudmlyb24AX21haW4Ac3RhcnQAX2V4aXQAX3B1dHMAAA==
\ No newline at end of file
diff --git a/src/go.mod b/src/go.mod
index 17fc364..25e7992 100644
--- a/src/go.mod
+++ b/src/go.mod
@@ -4,7 +4,7 @@
 
 require (
 	golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897
-	golang.org/x/net v0.0.0-20210901185431-d2e9a4ea682f
+	golang.org/x/net v0.0.0-20211101194150-d8c3cde3c676
 	golang.org/x/sys v0.0.0-20201204225414-ed752295db88 // indirect
 	golang.org/x/text v0.3.4 // indirect
 )
diff --git a/src/go.sum b/src/go.sum
index cea3ab4..e5467ea 100644
--- a/src/go.sum
+++ b/src/go.sum
@@ -2,8 +2,8 @@
 golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897 h1:pLI5jrR7OSLijeIDcmRxNmw2api+jEfxLoykJVice/E=
 golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20210901185431-d2e9a4ea682f h1:pkBhRt1hlsctT9xYRDknb4y+e/yiacRWlnft5a4QH8Y=
-golang.org/x/net v0.0.0-20210901185431-d2e9a4ea682f/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20211101194150-d8c3cde3c676 h1:HdGRNh1uWfD+9WfeSGYgD64N7Nmu3oMnSkdZAONtoU8=
+golang.org/x/net v0.0.0-20211101194150-d8c3cde3c676/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/src/net/http/h2_bundle.go b/src/net/http/h2_bundle.go
index 22f1e78..0fd8da1 100644
--- a/src/net/http/h2_bundle.go
+++ b/src/net/http/h2_bundle.go
@@ -52,6 +52,48 @@
 	"golang.org/x/net/idna"
 )
 
+// asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t
+// are equal, ASCII-case-insensitively.
+func http2asciiEqualFold(s, t string) bool {
+	if len(s) != len(t) {
+		return false
+	}
+	for i := 0; i < len(s); i++ {
+		if http2lower(s[i]) != http2lower(t[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+// lower returns the ASCII lowercase version of b.
+func http2lower(b byte) byte {
+	if 'A' <= b && b <= 'Z' {
+		return b + ('a' - 'A')
+	}
+	return b
+}
+
+// isASCIIPrint returns whether s is ASCII and printable according to
+// https://tools.ietf.org/html/rfc20#section-4.2.
+func http2isASCIIPrint(s string) bool {
+	for i := 0; i < len(s); i++ {
+		if s[i] < ' ' || s[i] > '~' {
+			return false
+		}
+	}
+	return true
+}
+
+// asciiToLower returns the lowercase version of s if s is ASCII and printable,
+// and whether or not it was.
+func http2asciiToLower(s string) (lower string, ok bool) {
+	if !http2isASCIIPrint(s) {
+		return "", false
+	}
+	return strings.ToLower(s), true
+}
+
 // A list of the possible cipher suite ids. Taken from
 // https://www.iana.org/assignments/tls-parameters/tls-parameters.txt
 
@@ -690,6 +732,12 @@
 
 // ClientConnPool manages a pool of HTTP/2 client connections.
 type http2ClientConnPool interface {
+	// GetClientConn returns a specific HTTP/2 connection (usually
+	// a TLS-TCP connection) to an HTTP/2 server. On success, the
+	// returned ClientConn accounts for the upcoming RoundTrip
+	// call, so the caller should not omit it. If the caller needs
+	// to, ClientConn.RoundTrip can be called with a bogus
+	// new(http.Request) to release the stream reservation.
 	GetClientConn(req *Request, addr string) (*http2ClientConn, error)
 	MarkDead(*http2ClientConn)
 }
@@ -716,7 +764,7 @@
 	conns        map[string][]*http2ClientConn // key is host:port
 	dialing      map[string]*http2dialCall     // currently in-flight dials
 	keys         map[*http2ClientConn][]string
-	addConnCalls map[string]*http2addConnCall // in-flight addConnIfNeede calls
+	addConnCalls map[string]*http2addConnCall // in-flight addConnIfNeeded calls
 }
 
 func (p *http2clientConnPool) GetClientConn(req *Request, addr string) (*http2ClientConn, error) {
@@ -728,87 +776,85 @@
 	http2noDialOnMiss = false
 )
 
-// shouldTraceGetConn reports whether getClientConn should call any
-// ClientTrace.GetConn hook associated with the http.Request.
-//
-// This complexity is needed to avoid double calls of the GetConn hook
-// during the back-and-forth between net/http and x/net/http2 (when the
-// net/http.Transport is upgraded to also speak http2), as well as support
-// the case where x/net/http2 is being used directly.
-func (p *http2clientConnPool) shouldTraceGetConn(st http2clientConnIdleState) bool {
-	// If our Transport wasn't made via ConfigureTransport, always
-	// trace the GetConn hook if provided, because that means the
-	// http2 package is being used directly and it's the one
-	// dialing, as opposed to net/http.
-	if _, ok := p.t.ConnPool.(http2noDialClientConnPool); !ok {
-		return true
-	}
-	// Otherwise, only use the GetConn hook if this connection has
-	// been used previously for other requests. For fresh
-	// connections, the net/http package does the dialing.
-	return !st.freshConn
-}
-
 func (p *http2clientConnPool) getClientConn(req *Request, addr string, dialOnMiss bool) (*http2ClientConn, error) {
+	// TODO(dneil): Dial a new connection when t.DisableKeepAlives is set?
 	if http2isConnectionCloseRequest(req) && dialOnMiss {
 		// It gets its own connection.
 		http2traceGetConn(req, addr)
 		const singleUse = true
-		cc, err := p.t.dialClientConn(addr, singleUse)
+		cc, err := p.t.dialClientConn(req.Context(), addr, singleUse)
 		if err != nil {
 			return nil, err
 		}
 		return cc, nil
 	}
-	p.mu.Lock()
-	for _, cc := range p.conns[addr] {
-		if st := cc.idleState(); st.canTakeNewRequest {
-			if p.shouldTraceGetConn(st) {
-				http2traceGetConn(req, addr)
+	for {
+		p.mu.Lock()
+		for _, cc := range p.conns[addr] {
+			if cc.ReserveNewRequest() {
+				// When a connection is presented to us by the net/http package,
+				// the GetConn hook has already been called.
+				// Don't call it a second time here.
+				if !cc.getConnCalled {
+					http2traceGetConn(req, addr)
+				}
+				cc.getConnCalled = false
+				p.mu.Unlock()
+				return cc, nil
 			}
+		}
+		if !dialOnMiss {
 			p.mu.Unlock()
+			return nil, http2ErrNoCachedConn
+		}
+		http2traceGetConn(req, addr)
+		call := p.getStartDialLocked(req.Context(), addr)
+		p.mu.Unlock()
+		<-call.done
+		if http2shouldRetryDial(call, req) {
+			continue
+		}
+		cc, err := call.res, call.err
+		if err != nil {
+			return nil, err
+		}
+		if cc.ReserveNewRequest() {
 			return cc, nil
 		}
 	}
-	if !dialOnMiss {
-		p.mu.Unlock()
-		return nil, http2ErrNoCachedConn
-	}
-	http2traceGetConn(req, addr)
-	call := p.getStartDialLocked(addr)
-	p.mu.Unlock()
-	<-call.done
-	return call.res, call.err
 }
 
 // dialCall is an in-flight Transport dial call to a host.
 type http2dialCall struct {
-	_    http2incomparable
-	p    *http2clientConnPool
+	_ http2incomparable
+	p *http2clientConnPool
+	// the context associated with the request
+	// that created this dialCall
+	ctx  context.Context
 	done chan struct{}    // closed when done
 	res  *http2ClientConn // valid after done is closed
 	err  error            // valid after done is closed
 }
 
 // requires p.mu is held.
-func (p *http2clientConnPool) getStartDialLocked(addr string) *http2dialCall {
+func (p *http2clientConnPool) getStartDialLocked(ctx context.Context, addr string) *http2dialCall {
 	if call, ok := p.dialing[addr]; ok {
 		// A dial is already in-flight. Don't start another.
 		return call
 	}
-	call := &http2dialCall{p: p, done: make(chan struct{})}
+	call := &http2dialCall{p: p, done: make(chan struct{}), ctx: ctx}
 	if p.dialing == nil {
 		p.dialing = make(map[string]*http2dialCall)
 	}
 	p.dialing[addr] = call
-	go call.dial(addr)
+	go call.dial(call.ctx, addr)
 	return call
 }
 
 // run in its own goroutine.
-func (c *http2dialCall) dial(addr string) {
+func (c *http2dialCall) dial(ctx context.Context, addr string) {
 	const singleUse = false // shared conn
-	c.res, c.err = c.p.t.dialClientConn(addr, singleUse)
+	c.res, c.err = c.p.t.dialClientConn(ctx, addr, singleUse)
 	close(c.done)
 
 	c.p.mu.Lock()
@@ -871,6 +917,7 @@
 	if err != nil {
 		c.err = err
 	} else {
+		cc.getConnCalled = true // already called by the net/http package
 		p.addConnLocked(key, cc)
 	}
 	delete(p.addConnCalls, key)
@@ -953,6 +1000,31 @@
 	return p.getClientConn(req, addr, http2noDialOnMiss)
 }
 
+// shouldRetryDial reports whether the current request should
+// retry dialing after the call finished unsuccessfully, for example
+// if the dial was canceled because of a context cancellation or
+// deadline expiry.
+func http2shouldRetryDial(call *http2dialCall, req *Request) bool {
+	if call.err == nil {
+		// No error, no need to retry
+		return false
+	}
+	if call.ctx == req.Context() {
+		// If the call has the same context as the request, the dial
+		// should not be retried, since any cancellation will have come
+		// from this request.
+		return false
+	}
+	if !errors.Is(call.err, context.Canceled) && !errors.Is(call.err, context.DeadlineExceeded) {
+		// If the call error is not because of a context cancellation or a deadline expiry,
+		// the dial should not be retried.
+		return false
+	}
+	// Only retry if the error is a context cancellation error or deadline expiry
+	// and the context associated with the call was canceled or expired.
+	return call.ctx.Err() != nil
+}
+
 // Buffer chunks are allocated from a pool to reduce pressure on GC.
 // The maximum wasted space per dataBuffer is 2x the largest size class,
 // which happens when the dataBuffer has multiple chunks and there is
@@ -1148,6 +1220,11 @@
 	Cause    error // optional additional detail
 }
 
+// errFromPeer is a sentinel error value for StreamError.Cause to
+// indicate that the StreamError was sent from the peer over the wire
+// and wasn't locally generated in the Transport.
+var http2errFromPeer = errors.New("received from peer")
+
 func http2streamError(id uint32, code http2ErrCode) http2StreamError {
 	return http2StreamError{StreamID: id, Code: code}
 }
@@ -2261,7 +2338,7 @@
 			return nil, err
 		}
 	}
-	if len(p)-int(padLength) <= 0 {
+	if len(p)-int(padLength) < 0 {
 		return nil, http2streamError(fh.StreamID, http2ErrCodeProtocol)
 	}
 	hf.headerFragBuf = p[:len(p)-int(padLength)]
@@ -3094,12 +3171,12 @@
 	}
 }
 
-func http2lowerHeader(v string) string {
+func http2lowerHeader(v string) (lower string, ascii bool) {
 	http2buildCommonHeaderMapsOnce()
 	if s, ok := http2commonLowerHeader[v]; ok {
-		return s
+		return s, true
 	}
-	return strings.ToLower(v)
+	return http2asciiToLower(v)
 }
 
 var (
@@ -3480,6 +3557,17 @@
 	io.Reader
 }
 
+// setBuffer initializes the pipe buffer.
+// It has no effect if the pipe is already closed.
+func (p *http2pipe) setBuffer(b http2pipeBuffer) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.err != nil || p.breakErr != nil {
+		return
+	}
+	p.b = b
+}
+
 func (p *http2pipe) Len() int {
 	p.mu.Lock()
 	defer p.mu.Unlock()
@@ -3831,16 +3919,12 @@
 
 	s.TLSConfig.PreferServerCipherSuites = true
 
-	haveNPN := false
-	for _, p := range s.TLSConfig.NextProtos {
-		if p == http2NextProtoTLS {
-			haveNPN = true
-			break
-		}
-	}
-	if !haveNPN {
+	if !http2strSliceContains(s.TLSConfig.NextProtos, http2NextProtoTLS) {
 		s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, http2NextProtoTLS)
 	}
+	if !http2strSliceContains(s.TLSConfig.NextProtos, "http/1.1") {
+		s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, "http/1.1")
+	}
 
 	if s.TLSNextProto == nil {
 		s.TLSNextProto = map[string]func(*Server, *tls.Conn, Handler){}
@@ -4873,7 +4957,9 @@
 	sc.shutdownOnce.Do(func() { sc.sendServeMsg(http2gracefulShutdownMsg) })
 }
 
-// After sending GOAWAY, the connection will close after goAwayTimeout.
+// After sending GOAWAY with an error code (non-graceful shutdown), the
+// connection will close after goAwayTimeout.
+//
 // If we close the connection immediately after sending GOAWAY, there may
 // be unsent data in our kernel receive buffer, which will cause the kernel
 // to send a TCP RST on close() instead of a FIN. This RST will abort the
@@ -5209,23 +5295,37 @@
 
 func (sc *http2serverConn) processData(f *http2DataFrame) error {
 	sc.serveG.check()
-	if sc.inGoAway && sc.goAwayCode != http2ErrCodeNo {
+	id := f.Header().StreamID
+	if sc.inGoAway && (sc.goAwayCode != http2ErrCodeNo || id > sc.maxClientStreamID) {
+		// Discard all DATA frames if the GOAWAY is due to an
+		// error, or:
+		//
+		// Section 6.8: After sending a GOAWAY frame, the sender
+		// can discard frames for streams initiated by the
+		// receiver with identifiers higher than the identified
+		// last stream.
 		return nil
 	}
-	data := f.Data()
 
-	// "If a DATA frame is received whose stream is not in "open"
-	// or "half closed (local)" state, the recipient MUST respond
-	// with a stream error (Section 5.4.2) of type STREAM_CLOSED."
-	id := f.Header().StreamID
+	data := f.Data()
 	state, st := sc.state(id)
 	if id == 0 || state == http2stateIdle {
+		// Section 6.1: "DATA frames MUST be associated with a
+		// stream. If a DATA frame is received whose stream
+		// identifier field is 0x0, the recipient MUST respond
+		// with a connection error (Section 5.4.1) of type
+		// PROTOCOL_ERROR."
+		//
 		// Section 5.1: "Receiving any frame other than HEADERS
 		// or PRIORITY on a stream in this state MUST be
 		// treated as a connection error (Section 5.4.1) of
 		// type PROTOCOL_ERROR."
 		return http2ConnectionError(http2ErrCodeProtocol)
 	}
+
+	// "If a DATA frame is received whose stream is not in "open"
+	// or "half closed (local)" state, the recipient MUST respond
+	// with a stream error (Section 5.4.2) of type STREAM_CLOSED."
 	if st == nil || state != http2stateOpen || st.gotTrailerHeader || st.resetQueued {
 		// This includes sending a RST_STREAM if the stream is
 		// in stateHalfClosedLocal (which currently means that
@@ -6353,8 +6453,12 @@
 		// but PUSH_PROMISE requests cannot have a body.
 		// http://tools.ietf.org/html/rfc7540#section-8.2
 		// Also disallow Host, since the promised URL must be absolute.
-		switch strings.ToLower(k) {
-		case "content-length", "content-encoding", "trailer", "te", "expect", "host":
+		if http2asciiEqualFold(k, "content-length") ||
+			http2asciiEqualFold(k, "content-encoding") ||
+			http2asciiEqualFold(k, "trailer") ||
+			http2asciiEqualFold(k, "te") ||
+			http2asciiEqualFold(k, "expect") ||
+			http2asciiEqualFold(k, "host") {
 			return fmt.Errorf("promised request headers cannot include %q", k)
 		}
 	}
@@ -6562,6 +6666,15 @@
 	http2transportDefaultStreamMinRefresh = 4 << 10
 
 	http2defaultUserAgent = "Go-http-client/2.0"
+
+	// initialMaxConcurrentStreams is a connections maxConcurrentStreams until
+	// it's received servers initial SETTINGS frame, which corresponds with the
+	// spec's minimum recommended value.
+	http2initialMaxConcurrentStreams = 100
+
+	// defaultMaxConcurrentStreams is a connections default maxConcurrentStreams
+	// if the server doesn't include one in its initial SETTINGS frame.
+	http2defaultMaxConcurrentStreams = 1000
 )
 
 // Transport is an HTTP/2 Transport.
@@ -6738,11 +6851,12 @@
 // ClientConn is the state of a single HTTP/2 client connection to an
 // HTTP/2 server.
 type http2ClientConn struct {
-	t         *http2Transport
-	tconn     net.Conn             // usually *tls.Conn, except specialized impls
-	tlsState  *tls.ConnectionState // nil only for specialized impls
-	reused    uint32               // whether conn is being reused; atomic
-	singleUse bool                 // whether being used for a single http.Request
+	t             *http2Transport
+	tconn         net.Conn             // usually *tls.Conn, except specialized impls
+	tlsState      *tls.ConnectionState // nil only for specialized impls
+	reused        uint32               // whether conn is being reused; atomic
+	singleUse     bool                 // whether being used for a single http.Request
+	getConnCalled bool                 // used by clientConnPool
 
 	// readLoop goroutine fields:
 	readerDone chan struct{} // closed on error
@@ -6755,87 +6869,94 @@
 	cond            *sync.Cond // hold mu; broadcast on flow/closed changes
 	flow            http2flow  // our conn-level flow control quota (cs.flow is per stream)
 	inflow          http2flow  // peer's conn-level flow control
+	doNotReuse      bool       // whether conn is marked to not be reused for any future requests
 	closing         bool
 	closed          bool
+	seenSettings    bool                          // true if we've seen a settings frame, false otherwise
 	wantSettingsAck bool                          // we sent a SETTINGS frame and haven't heard back
 	goAway          *http2GoAwayFrame             // if non-nil, the GoAwayFrame we received
 	goAwayDebug     string                        // goAway frame's debug data, retained as a string
 	streams         map[uint32]*http2clientStream // client-initiated
+	streamsReserved int                           // incr by ReserveNewRequest; decr on RoundTrip
 	nextStreamID    uint32
 	pendingRequests int                       // requests blocked and waiting to be sent because len(streams) == maxConcurrentStreams
 	pings           map[[8]byte]chan struct{} // in flight ping data to notification channel
-	bw              *bufio.Writer
 	br              *bufio.Reader
-	fr              *http2Framer
 	lastActive      time.Time
 	lastIdle        time.Time // time last idle
-	// Settings from peer: (also guarded by mu)
+	// Settings from peer: (also guarded by wmu)
 	maxFrameSize          uint32
 	maxConcurrentStreams  uint32
 	peerMaxHeaderListSize uint64
 	initialWindowSize     uint32
 
-	hbuf    bytes.Buffer // HPACK encoder writes into this
-	henc    *hpack.Encoder
-	freeBuf [][]byte
+	// reqHeaderMu is a 1-element semaphore channel controlling access to sending new requests.
+	// Write to reqHeaderMu to lock it, read from it to unlock.
+	// Lock reqmu BEFORE mu or wmu.
+	reqHeaderMu chan struct{}
 
-	wmu  sync.Mutex // held while writing; acquire AFTER mu if holding both
-	werr error      // first write error that has occurred
+	// wmu is held while writing.
+	// Acquire BEFORE mu when holding both, to avoid blocking mu on network writes.
+	// Only acquire both at the same time when changing peer settings.
+	wmu  sync.Mutex
+	bw   *bufio.Writer
+	fr   *http2Framer
+	werr error        // first write error that has occurred
+	hbuf bytes.Buffer // HPACK encoder writes into this
+	henc *hpack.Encoder
 }
 
 // clientStream is the state for a single HTTP/2 stream. One of these
 // is created for each Transport.RoundTrip call.
 type http2clientStream struct {
-	cc            *http2ClientConn
-	req           *Request
+	cc *http2ClientConn
+
+	// Fields of Request that we may access even after the response body is closed.
+	ctx       context.Context
+	reqCancel <-chan struct{}
+
 	trace         *httptrace.ClientTrace // or nil
 	ID            uint32
-	resc          chan http2resAndError
 	bufPipe       http2pipe // buffered pipe with the flow-controlled response payload
-	startedWrite  bool      // started request body write; guarded by cc.mu
 	requestedGzip bool
-	on100         func() // optional code to run if get a 100 continue response
+	isHead        bool
+
+	abortOnce sync.Once
+	abort     chan struct{} // closed to signal stream should end immediately
+	abortErr  error         // set if abort is closed
+
+	peerClosed chan struct{} // closed when the peer sends an END_STREAM flag
+	donec      chan struct{} // closed after the stream is in the closed state
+	on100      chan struct{} // buffered; written to if a 100 is received
+
+	respHeaderRecv chan struct{} // closed when headers are received
+	res            *Response     // set if respHeaderRecv is closed
 
 	flow        http2flow // guarded by cc.mu
 	inflow      http2flow // guarded by cc.mu
 	bytesRemain int64     // -1 means unknown; owned by transportResponseBody.Read
 	readErr     error     // sticky read error; owned by transportResponseBody.Read
-	stopReqBody error     // if non-nil, stop writing req body; guarded by cc.mu
-	didReset    bool      // whether we sent a RST_STREAM to the server; guarded by cc.mu
 
-	peerReset chan struct{} // closed on peer reset
-	resetErr  error         // populated before peerReset is closed
+	reqBody              io.ReadCloser
+	reqBodyContentLength int64 // -1 means unknown
+	reqBodyClosed        bool  // body has been closed; guarded by cc.mu
 
-	done chan struct{} // closed when stream remove from cc.streams map; close calls guarded by cc.mu
+	// owned by writeRequest:
+	sentEndStream bool // sent an END_STREAM flag to the peer
+	sentHeaders   bool
 
 	// owned by clientConnReadLoop:
 	firstByte    bool  // got the first response byte
 	pastHeaders  bool  // got first MetaHeadersFrame (actual headers)
 	pastTrailers bool  // got optional second MetaHeadersFrame (trailers)
 	num1xx       uint8 // number of 1xx responses seen
+	readClosed   bool  // peer sent an END_STREAM flag
+	readAborted  bool  // read loop reset the stream
 
 	trailer    Header  // accumulated trailers
 	resTrailer *Header // client's Response.Trailer
 }
 
-// awaitRequestCancel waits for the user to cancel a request or for the done
-// channel to be signaled. A non-nil error is returned only if the request was
-// canceled.
-func http2awaitRequestCancel(req *Request, done <-chan struct{}) error {
-	ctx := req.Context()
-	if req.Cancel == nil && ctx.Done() == nil {
-		return nil
-	}
-	select {
-	case <-req.Cancel:
-		return http2errRequestCanceled
-	case <-ctx.Done():
-		return ctx.Err()
-	case <-done:
-		return nil
-	}
-}
-
 var http2got1xxFuncForTests func(int, textproto.MIMEHeader) error
 
 // get1xxTraceFunc returns the value of request's httptrace.ClientTrace.Got1xxResponse func,
@@ -6847,59 +6968,37 @@
 	return http2traceGot1xxResponseFunc(cs.trace)
 }
 
-// awaitRequestCancel waits for the user to cancel a request, its context to
-// expire, or for the request to be done (any way it might be removed from the
-// cc.streams map: peer reset, successful completion, TCP connection breakage,
-// etc). If the request is canceled, then cs will be canceled and closed.
-func (cs *http2clientStream) awaitRequestCancel(req *Request) {
-	if err := http2awaitRequestCancel(req, cs.done); err != nil {
-		cs.cancelStream()
-		cs.bufPipe.CloseWithError(err)
+func (cs *http2clientStream) abortStream(err error) {
+	cs.cc.mu.Lock()
+	defer cs.cc.mu.Unlock()
+	cs.abortStreamLocked(err)
+}
+
+func (cs *http2clientStream) abortStreamLocked(err error) {
+	cs.abortOnce.Do(func() {
+		cs.abortErr = err
+		close(cs.abort)
+	})
+	if cs.reqBody != nil && !cs.reqBodyClosed {
+		cs.reqBody.Close()
+		cs.reqBodyClosed = true
+	}
+	// TODO(dneil): Clean up tests where cs.cc.cond is nil.
+	if cs.cc.cond != nil {
+		// Wake up writeRequestBody if it is waiting on flow control.
+		cs.cc.cond.Broadcast()
 	}
 }
 
-func (cs *http2clientStream) cancelStream() {
-	cc := cs.cc
-	cc.mu.Lock()
-	didReset := cs.didReset
-	cs.didReset = true
-	cc.mu.Unlock()
-
-	if !didReset {
-		cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
-		cc.forgetStreamID(cs.ID)
-	}
-}
-
-// checkResetOrDone reports any error sent in a RST_STREAM frame by the
-// server, or errStreamClosed if the stream is complete.
-func (cs *http2clientStream) checkResetOrDone() error {
-	select {
-	case <-cs.peerReset:
-		return cs.resetErr
-	case <-cs.done:
-		return http2errStreamClosed
-	default:
-		return nil
-	}
-}
-
-func (cs *http2clientStream) getStartedWrite() bool {
+func (cs *http2clientStream) abortRequestBodyWrite() {
 	cc := cs.cc
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
-	return cs.startedWrite
-}
-
-func (cs *http2clientStream) abortRequestBodyWrite(err error) {
-	if err == nil {
-		panic("nil error")
+	if cs.reqBody != nil && !cs.reqBodyClosed {
+		cs.reqBody.Close()
+		cs.reqBodyClosed = true
+		cc.cond.Broadcast()
 	}
-	cc := cs.cc
-	cc.mu.Lock()
-	cs.stopReqBody = err
-	cc.cond.Broadcast()
-	cc.mu.Unlock()
 }
 
 type http2stickyErrWriter struct {
@@ -6987,9 +7086,9 @@
 		}
 		reused := !atomic.CompareAndSwapUint32(&cc.reused, 0, 1)
 		http2traceGotConn(req, cc, reused)
-		res, gotErrAfterReqBodyWrite, err := cc.roundTrip(req)
+		res, err := cc.RoundTrip(req)
 		if err != nil && retry <= 6 {
-			if req, err = http2shouldRetryRequest(req, err, gotErrAfterReqBodyWrite); err == nil {
+			if req, err = http2shouldRetryRequest(req, err); err == nil {
 				// After the first retry, do exponential backoff with 10% jitter.
 				if retry == 0 {
 					continue
@@ -7000,7 +7099,7 @@
 				case <-time.After(time.Second * time.Duration(backoff)):
 					continue
 				case <-req.Context().Done():
-					return nil, req.Context().Err()
+					err = req.Context().Err()
 				}
 			}
 		}
@@ -7031,7 +7130,7 @@
 // response headers. It is always called with a non-nil error.
 // It returns either a request to retry (either the same request, or a
 // modified clone), or an error if the request can't be replayed.
-func http2shouldRetryRequest(req *Request, err error, afterBodyWrite bool) (*Request, error) {
+func http2shouldRetryRequest(req *Request, err error) (*Request, error) {
 	if !http2canRetryError(err) {
 		return nil, err
 	}
@@ -7044,7 +7143,6 @@
 	// If the request body can be reset back to its original
 	// state via the optional req.GetBody, do that.
 	if req.GetBody != nil {
-		// TODO: consider a req.Body.Close here? or audit that all caller paths do?
 		body, err := req.GetBody()
 		if err != nil {
 			return nil, err
@@ -7056,10 +7154,8 @@
 
 	// The Request.Body can't reset back to the beginning, but we
 	// don't seem to have started to read from it yet, so reuse
-	// the request directly. The "afterBodyWrite" means the
-	// bodyWrite process has started, which becomes true before
-	// the first Read.
-	if !afterBodyWrite {
+	// the request directly.
+	if err == http2errClientConnUnusable {
 		return req, nil
 	}
 
@@ -7071,17 +7167,21 @@
 		return true
 	}
 	if se, ok := err.(http2StreamError); ok {
+		if se.Code == http2ErrCodeProtocol && se.Cause == http2errFromPeer {
+			// See golang/go#47635, golang/go#42777
+			return true
+		}
 		return se.Code == http2ErrCodeRefusedStream
 	}
 	return false
 }
 
-func (t *http2Transport) dialClientConn(addr string, singleUse bool) (*http2ClientConn, error) {
+func (t *http2Transport) dialClientConn(ctx context.Context, addr string, singleUse bool) (*http2ClientConn, error) {
 	host, _, err := net.SplitHostPort(addr)
 	if err != nil {
 		return nil, err
 	}
-	tconn, err := t.dialTLS()("tcp", addr, t.newTLSConfig(host))
+	tconn, err := t.dialTLS(ctx)("tcp", addr, t.newTLSConfig(host))
 	if err != nil {
 		return nil, err
 	}
@@ -7102,34 +7202,28 @@
 	return cfg
 }
 
-func (t *http2Transport) dialTLS() func(string, string, *tls.Config) (net.Conn, error) {
+func (t *http2Transport) dialTLS(ctx context.Context) func(string, string, *tls.Config) (net.Conn, error) {
 	if t.DialTLS != nil {
 		return t.DialTLS
 	}
-	return t.dialTLSDefault
-}
-
-func (t *http2Transport) dialTLSDefault(network, addr string, cfg *tls.Config) (net.Conn, error) {
-	cn, err := tls.Dial(network, addr, cfg)
-	if err != nil {
-		return nil, err
-	}
-	if err := cn.Handshake(); err != nil {
-		return nil, err
-	}
-	if !cfg.InsecureSkipVerify {
-		if err := cn.VerifyHostname(cfg.ServerName); err != nil {
+	return func(network, addr string, cfg *tls.Config) (net.Conn, error) {
+		dialer := &tls.Dialer{
+			Config: cfg,
+		}
+		cn, err := dialer.DialContext(ctx, network, addr)
+		if err != nil {
 			return nil, err
 		}
+		tlsCn := cn.(*tls.Conn) // DialContext comment promises this will always succeed
+		state := tlsCn.ConnectionState()
+		if p := state.NegotiatedProtocol; p != http2NextProtoTLS {
+			return nil, fmt.Errorf("http2: unexpected ALPN protocol %q; want %q", p, http2NextProtoTLS)
+		}
+		if !state.NegotiatedProtocolIsMutual {
+			return nil, errors.New("http2: could not negotiate protocol mutually")
+		}
+		return cn, nil
 	}
-	state := cn.ConnectionState()
-	if p := state.NegotiatedProtocol; p != http2NextProtoTLS {
-		return nil, fmt.Errorf("http2: unexpected ALPN protocol %q; want %q", p, http2NextProtoTLS)
-	}
-	if !state.NegotiatedProtocolIsMutual {
-		return nil, errors.New("http2: could not negotiate protocol mutually")
-	}
-	return cn, nil
 }
 
 // disableKeepAlives reports whether connections should be closed as
@@ -7155,14 +7249,15 @@
 		tconn:                 c,
 		readerDone:            make(chan struct{}),
 		nextStreamID:          1,
-		maxFrameSize:          16 << 10,           // spec default
-		initialWindowSize:     65535,              // spec default
-		maxConcurrentStreams:  1000,               // "infinite", per spec. 1000 seems good enough.
-		peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead.
+		maxFrameSize:          16 << 10,                         // spec default
+		initialWindowSize:     65535,                            // spec default
+		maxConcurrentStreams:  http2initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings.
+		peerMaxHeaderListSize: 0xffffffffffffffff,               // "infinite", per spec. Use 2^64-1 instead.
 		streams:               make(map[uint32]*http2clientStream),
 		singleUse:             singleUse,
 		wantSettingsAck:       true,
 		pings:                 make(map[[8]byte]chan struct{}),
+		reqHeaderMu:           make(chan struct{}, 1),
 	}
 	if d := t.idleConnTimeout(); d != 0 {
 		cc.idleTimeout = d
@@ -7232,6 +7327,13 @@
 	}
 }
 
+// SetDoNotReuse marks cc as not reusable for future HTTP requests.
+func (cc *http2ClientConn) SetDoNotReuse() {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	cc.doNotReuse = true
+}
+
 func (cc *http2ClientConn) setGoAway(f *http2GoAwayFrame) {
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
@@ -7249,27 +7351,39 @@
 	last := f.LastStreamID
 	for streamID, cs := range cc.streams {
 		if streamID > last {
-			select {
-			case cs.resc <- http2resAndError{err: http2errClientConnGotGoAway}:
-			default:
-			}
+			cs.abortStreamLocked(http2errClientConnGotGoAway)
 		}
 	}
 }
 
 // CanTakeNewRequest reports whether the connection can take a new request,
 // meaning it has not been closed or received or sent a GOAWAY.
+//
+// If the caller is going to immediately make a new request on this
+// connection, use ReserveNewRequest instead.
 func (cc *http2ClientConn) CanTakeNewRequest() bool {
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
 	return cc.canTakeNewRequestLocked()
 }
 
+// ReserveNewRequest is like CanTakeNewRequest but also reserves a
+// concurrent stream in cc. The reservation is decremented on the
+// next call to RoundTrip.
+func (cc *http2ClientConn) ReserveNewRequest() bool {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	if st := cc.idleStateLocked(); !st.canTakeNewRequest {
+		return false
+	}
+	cc.streamsReserved++
+	return true
+}
+
 // clientConnIdleState describes the suitability of a client
 // connection to initiate a new RoundTrip request.
 type http2clientConnIdleState struct {
 	canTakeNewRequest bool
-	freshConn         bool // whether it's unused by any previous request
 }
 
 func (cc *http2ClientConn) idleState() http2clientConnIdleState {
@@ -7290,13 +7404,13 @@
 		// writing it.
 		maxConcurrentOkay = true
 	} else {
-		maxConcurrentOkay = int64(len(cc.streams)+1) < int64(cc.maxConcurrentStreams)
+		maxConcurrentOkay = int64(len(cc.streams)+cc.streamsReserved+1) <= int64(cc.maxConcurrentStreams)
 	}
 
 	st.canTakeNewRequest = cc.goAway == nil && !cc.closed && !cc.closing && maxConcurrentOkay &&
+		!cc.doNotReuse &&
 		int64(cc.nextStreamID)+2*int64(cc.pendingRequests) < math.MaxInt32 &&
 		!cc.tooIdleLocked()
-	st.freshConn = cc.nextStreamID == 1 && st.canTakeNewRequest
 	return
 }
 
@@ -7327,7 +7441,7 @@
 
 func (cc *http2ClientConn) closeIfIdle() {
 	cc.mu.Lock()
-	if len(cc.streams) > 0 {
+	if len(cc.streams) > 0 || cc.streamsReserved > 0 {
 		cc.mu.Unlock()
 		return
 	}
@@ -7342,9 +7456,15 @@
 	cc.tconn.Close()
 }
 
+func (cc *http2ClientConn) isDoNotReuseAndIdle() bool {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	return cc.doNotReuse && len(cc.streams) == 0
+}
+
 var http2shutdownEnterWaitStateHook = func() {}
 
-// Shutdown gracefully close the client connection, waiting for running streams to complete.
+// Shutdown gracefully closes the client connection, waiting for running streams to complete.
 func (cc *http2ClientConn) Shutdown(ctx context.Context) error {
 	if err := cc.sendGoAway(); err != nil {
 		return err
@@ -7383,15 +7503,18 @@
 
 func (cc *http2ClientConn) sendGoAway() error {
 	cc.mu.Lock()
-	defer cc.mu.Unlock()
-	cc.wmu.Lock()
-	defer cc.wmu.Unlock()
-	if cc.closing {
+	closing := cc.closing
+	cc.closing = true
+	maxStreamID := cc.nextStreamID
+	cc.mu.Unlock()
+	if closing {
 		// GOAWAY sent already
 		return nil
 	}
+
+	cc.wmu.Lock()
+	defer cc.wmu.Unlock()
 	// Send a graceful shutdown frame to server
-	maxStreamID := cc.nextStreamID
 	if err := cc.fr.WriteGoAway(maxStreamID, http2ErrCodeNo, nil); err != nil {
 		return err
 	}
@@ -7399,7 +7522,6 @@
 		return err
 	}
 	// Prevent new requests
-	cc.closing = true
 	return nil
 }
 
@@ -7407,17 +7529,12 @@
 // err is sent to streams.
 func (cc *http2ClientConn) closeForError(err error) error {
 	cc.mu.Lock()
+	cc.closed = true
+	for _, cs := range cc.streams {
+		cs.abortStreamLocked(err)
+	}
 	defer cc.cond.Broadcast()
 	defer cc.mu.Unlock()
-	for id, cs := range cc.streams {
-		select {
-		case cs.resc <- http2resAndError{err: err}:
-		default:
-		}
-		cs.bufPipe.CloseWithError(err)
-		delete(cc.streams, id)
-	}
-	cc.closed = true
 	return cc.tconn.Close()
 }
 
@@ -7435,46 +7552,6 @@
 	return cc.closeForError(err)
 }
 
-const http2maxAllocFrameSize = 512 << 10
-
-// frameBuffer returns a scratch buffer suitable for writing DATA frames.
-// They're capped at the min of the peer's max frame size or 512KB
-// (kinda arbitrarily), but definitely capped so we don't allocate 4GB
-// bufers.
-func (cc *http2ClientConn) frameScratchBuffer() []byte {
-	cc.mu.Lock()
-	size := cc.maxFrameSize
-	if size > http2maxAllocFrameSize {
-		size = http2maxAllocFrameSize
-	}
-	for i, buf := range cc.freeBuf {
-		if len(buf) >= int(size) {
-			cc.freeBuf[i] = nil
-			cc.mu.Unlock()
-			return buf[:size]
-		}
-	}
-	cc.mu.Unlock()
-	return make([]byte, size)
-}
-
-func (cc *http2ClientConn) putFrameScratchBuffer(buf []byte) {
-	cc.mu.Lock()
-	defer cc.mu.Unlock()
-	const maxBufs = 4 // arbitrary; 4 concurrent requests per conn? investigate.
-	if len(cc.freeBuf) < maxBufs {
-		cc.freeBuf = append(cc.freeBuf, buf)
-		return
-	}
-	for i, old := range cc.freeBuf {
-		if old == nil {
-			cc.freeBuf[i] = buf
-			return
-		}
-	}
-	// forget about it.
-}
-
 // errRequestCanceled is a copy of net/http's errRequestCanceled because it's not
 // exported. At least they'll be DeepEqual for h1-vs-h2 comparisons tests.
 var http2errRequestCanceled = errors.New("net/http: request canceled")
@@ -7517,7 +7594,7 @@
 	if vv := req.Header["Transfer-Encoding"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && vv[0] != "chunked") {
 		return fmt.Errorf("http2: invalid Transfer-Encoding request header: %q", vv)
 	}
-	if vv := req.Header["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !strings.EqualFold(vv[0], "close") && !strings.EqualFold(vv[0], "keep-alive")) {
+	if vv := req.Header["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !http2asciiEqualFold(vv[0], "close") && !http2asciiEqualFold(vv[0], "keep-alive")) {
 		return fmt.Errorf("http2: invalid Connection request header: %q", vv)
 	}
 	return nil
@@ -7536,41 +7613,142 @@
 	return -1
 }
 
-func (cc *http2ClientConn) RoundTrip(req *Request) (*Response, error) {
-	resp, _, err := cc.roundTrip(req)
-	return resp, err
+func (cc *http2ClientConn) decrStreamReservations() {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	cc.decrStreamReservationsLocked()
 }
 
-func (cc *http2ClientConn) roundTrip(req *Request) (res *Response, gotErrAfterReqBodyWrite bool, err error) {
-	if err := http2checkConnHeaders(req); err != nil {
-		return nil, false, err
+func (cc *http2ClientConn) decrStreamReservationsLocked() {
+	if cc.streamsReserved > 0 {
+		cc.streamsReserved--
 	}
+}
+
+func (cc *http2ClientConn) RoundTrip(req *Request) (*Response, error) {
+	ctx := req.Context()
+	cs := &http2clientStream{
+		cc:                   cc,
+		ctx:                  ctx,
+		reqCancel:            req.Cancel,
+		isHead:               req.Method == "HEAD",
+		reqBody:              req.Body,
+		reqBodyContentLength: http2actualContentLength(req),
+		trace:                httptrace.ContextClientTrace(ctx),
+		peerClosed:           make(chan struct{}),
+		abort:                make(chan struct{}),
+		respHeaderRecv:       make(chan struct{}),
+		donec:                make(chan struct{}),
+	}
+	go cs.doRequest(req)
+
+	waitDone := func() error {
+		select {
+		case <-cs.donec:
+			return nil
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-cs.reqCancel:
+			return http2errRequestCanceled
+		}
+	}
+
+	for {
+		select {
+		case <-cs.respHeaderRecv:
+			res := cs.res
+			if res.StatusCode > 299 {
+				// On error or status code 3xx, 4xx, 5xx, etc abort any
+				// ongoing write, assuming that the server doesn't care
+				// about our request body. If the server replied with 1xx or
+				// 2xx, however, then assume the server DOES potentially
+				// want our body (e.g. full-duplex streaming:
+				// golang.org/issue/13444). If it turns out the server
+				// doesn't, they'll RST_STREAM us soon enough. This is a
+				// heuristic to avoid adding knobs to Transport. Hopefully
+				// we can keep it.
+				cs.abortRequestBodyWrite()
+			}
+			res.Request = req
+			res.TLS = cc.tlsState
+			if res.Body == http2noBody && http2actualContentLength(req) == 0 {
+				// If there isn't a request or response body still being
+				// written, then wait for the stream to be closed before
+				// RoundTrip returns.
+				if err := waitDone(); err != nil {
+					return nil, err
+				}
+			}
+			return res, nil
+		case <-cs.abort:
+			waitDone()
+			return nil, cs.abortErr
+		case <-ctx.Done():
+			err := ctx.Err()
+			cs.abortStream(err)
+			return nil, err
+		case <-cs.reqCancel:
+			cs.abortStream(http2errRequestCanceled)
+			return nil, http2errRequestCanceled
+		}
+	}
+}
+
+// writeRequest runs for the duration of the request lifetime.
+//
+// It sends the request and performs post-request cleanup (closing Request.Body, etc.).
+func (cs *http2clientStream) doRequest(req *Request) {
+	err := cs.writeRequest(req)
+	cs.cleanupWriteRequest(err)
+}
+
+// writeRequest sends a request.
+//
+// It returns nil after the request is written, the response read,
+// and the request stream is half-closed by the peer.
+//
+// It returns non-nil if the request ends otherwise.
+// If the returned error is StreamError, the error Code may be used in resetting the stream.
+func (cs *http2clientStream) writeRequest(req *Request) (err error) {
+	cc := cs.cc
+	ctx := cs.ctx
+
+	if err := http2checkConnHeaders(req); err != nil {
+		return err
+	}
+
+	// Acquire the new-request lock by writing to reqHeaderMu.
+	// This lock guards the critical section covering allocating a new stream ID
+	// (requires mu) and creating the stream (requires wmu).
+	if cc.reqHeaderMu == nil {
+		panic("RoundTrip on uninitialized ClientConn") // for tests
+	}
+	select {
+	case cc.reqHeaderMu <- struct{}{}:
+	case <-cs.reqCancel:
+		return http2errRequestCanceled
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+
+	cc.mu.Lock()
 	if cc.idleTimer != nil {
 		cc.idleTimer.Stop()
 	}
-
-	trailers, err := http2commaSeparatedTrailers(req)
-	if err != nil {
-		return nil, false, err
-	}
-	hasTrailers := trailers != ""
-
-	cc.mu.Lock()
-	if err := cc.awaitOpenSlotForRequest(req); err != nil {
+	cc.decrStreamReservationsLocked()
+	if err := cc.awaitOpenSlotForStreamLocked(cs); err != nil {
 		cc.mu.Unlock()
-		return nil, false, err
+		<-cc.reqHeaderMu
+		return err
 	}
-
-	body := req.Body
-	contentLen := http2actualContentLength(req)
-	hasBody := contentLen != 0
+	cc.addStreamLocked(cs) // assigns stream ID
+	cc.mu.Unlock()
 
 	// TODO(bradfitz): this is a copy of the logic in net/http. Unify somewhere?
-	var requestedGzip bool
 	if !cc.t.disableCompression() &&
 		req.Header.Get("Accept-Encoding") == "" &&
 		req.Header.Get("Range") == "" &&
-		req.Method != "HEAD" {
+		!cs.isHead {
 		// Request gzip only, not deflate. Deflate is ambiguous and
 		// not as universally supported anyway.
 		// See: https://zlib.net/zlib_faq.html#faq39
@@ -7583,195 +7761,223 @@
 		// We don't request gzip if the request is for a range, since
 		// auto-decoding a portion of a gzipped document will just fail
 		// anyway. See https://golang.org/issue/8923
-		requestedGzip = true
+		cs.requestedGzip = true
 	}
 
-	// we send: HEADERS{1}, CONTINUATION{0,} + DATA{0,} (DATA is
-	// sent by writeRequestBody below, along with any Trailers,
-	// again in form HEADERS{1}, CONTINUATION{0,})
-	hdrs, err := cc.encodeHeaders(req, requestedGzip, trailers, contentLen)
+	continueTimeout := cc.t.expectContinueTimeout()
+	if continueTimeout != 0 &&
+		!httpguts.HeaderValuesContainsToken(
+			req.Header["Expect"],
+			"100-continue") {
+		continueTimeout = 0
+		cs.on100 = make(chan struct{}, 1)
+	}
+
+	// Past this point (where we send request headers), it is possible for
+	// RoundTrip to return successfully. Since the RoundTrip contract permits
+	// the caller to "mutate or reuse" the Request after closing the Response's Body,
+	// we must take care when referencing the Request from here on.
+	err = cs.encodeAndWriteHeaders(req)
+	<-cc.reqHeaderMu
 	if err != nil {
-		cc.mu.Unlock()
-		return nil, false, err
+		return err
 	}
 
-	cs := cc.newStream()
-	cs.req = req
-	cs.trace = httptrace.ContextClientTrace(req.Context())
-	cs.requestedGzip = requestedGzip
-	bodyWriter := cc.t.getBodyWriterState(cs, body)
-	cs.on100 = bodyWriter.on100
-
-	defer func() {
-		cc.wmu.Lock()
-		werr := cc.werr
-		cc.wmu.Unlock()
-		if werr != nil {
-			cc.Close()
+	hasBody := cs.reqBodyContentLength != 0
+	if !hasBody {
+		cs.sentEndStream = true
+	} else {
+		if continueTimeout != 0 {
+			http2traceWait100Continue(cs.trace)
+			timer := time.NewTimer(continueTimeout)
+			select {
+			case <-timer.C:
+				err = nil
+			case <-cs.on100:
+				err = nil
+			case <-cs.abort:
+				err = cs.abortErr
+			case <-ctx.Done():
+				err = ctx.Err()
+			case <-cs.reqCancel:
+				err = http2errRequestCanceled
+			}
+			timer.Stop()
+			if err != nil {
+				http2traceWroteRequest(cs.trace, err)
+				return err
+			}
 		}
-	}()
 
-	cc.wmu.Lock()
-	endStream := !hasBody && !hasTrailers
-	werr := cc.writeHeaders(cs.ID, endStream, int(cc.maxFrameSize), hdrs)
-	cc.wmu.Unlock()
-	http2traceWroteHeaders(cs.trace)
-	cc.mu.Unlock()
-
-	if werr != nil {
-		if hasBody {
-			req.Body.Close() // per RoundTripper contract
-			bodyWriter.cancel()
+		if err = cs.writeRequestBody(req); err != nil {
+			if err != http2errStopReqBodyWrite {
+				http2traceWroteRequest(cs.trace, err)
+				return err
+			}
+		} else {
+			cs.sentEndStream = true
 		}
-		cc.forgetStreamID(cs.ID)
-		// Don't bother sending a RST_STREAM (our write already failed;
-		// no need to keep writing)
-		http2traceWroteRequest(cs.trace, werr)
-		return nil, false, werr
 	}
 
+	http2traceWroteRequest(cs.trace, err)
+
 	var respHeaderTimer <-chan time.Time
-	if hasBody {
-		bodyWriter.scheduleBodyWrite()
-	} else {
-		http2traceWroteRequest(cs.trace, nil)
-		if d := cc.responseHeaderTimeout(); d != 0 {
-			timer := time.NewTimer(d)
-			defer timer.Stop()
-			respHeaderTimer = timer.C
-		}
+	var respHeaderRecv chan struct{}
+	if d := cc.responseHeaderTimeout(); d != 0 {
+		timer := time.NewTimer(d)
+		defer timer.Stop()
+		respHeaderTimer = timer.C
+		respHeaderRecv = cs.respHeaderRecv
 	}
-
-	readLoopResCh := cs.resc
-	bodyWritten := false
-	ctx := req.Context()
-
-	handleReadLoopResponse := func(re http2resAndError) (*Response, bool, error) {
-		res := re.res
-		if re.err != nil || res.StatusCode > 299 {
-			// On error or status code 3xx, 4xx, 5xx, etc abort any
-			// ongoing write, assuming that the server doesn't care
-			// about our request body. If the server replied with 1xx or
-			// 2xx, however, then assume the server DOES potentially
-			// want our body (e.g. full-duplex streaming:
-			// golang.org/issue/13444). If it turns out the server
-			// doesn't, they'll RST_STREAM us soon enough. This is a
-			// heuristic to avoid adding knobs to Transport. Hopefully
-			// we can keep it.
-			bodyWriter.cancel()
-			cs.abortRequestBodyWrite(http2errStopReqBodyWrite)
-			if hasBody && !bodyWritten {
-				<-bodyWriter.resc
-			}
-		}
-		if re.err != nil {
-			cc.forgetStreamID(cs.ID)
-			return nil, cs.getStartedWrite(), re.err
-		}
-		res.Request = req
-		res.TLS = cc.tlsState
-		return res, false, nil
-	}
-
+	// Wait until the peer half-closes its end of the stream,
+	// or until the request is aborted (via context, error, or otherwise),
+	// whichever comes first.
 	for {
 		select {
-		case re := <-readLoopResCh:
-			return handleReadLoopResponse(re)
+		case <-cs.peerClosed:
+			return nil
 		case <-respHeaderTimer:
-			if !hasBody || bodyWritten {
-				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
-			} else {
-				bodyWriter.cancel()
-				cs.abortRequestBodyWrite(http2errStopReqBodyWriteAndCancel)
-				<-bodyWriter.resc
-			}
-			cc.forgetStreamID(cs.ID)
-			return nil, cs.getStartedWrite(), http2errTimeout
+			return http2errTimeout
+		case <-respHeaderRecv:
+			respHeaderTimer = nil // keep waiting for END_STREAM
+		case <-cs.abort:
+			return cs.abortErr
 		case <-ctx.Done():
-			if !hasBody || bodyWritten {
-				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
-			} else {
-				bodyWriter.cancel()
-				cs.abortRequestBodyWrite(http2errStopReqBodyWriteAndCancel)
-				<-bodyWriter.resc
-			}
-			cc.forgetStreamID(cs.ID)
-			return nil, cs.getStartedWrite(), ctx.Err()
-		case <-req.Cancel:
-			if !hasBody || bodyWritten {
-				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
-			} else {
-				bodyWriter.cancel()
-				cs.abortRequestBodyWrite(http2errStopReqBodyWriteAndCancel)
-				<-bodyWriter.resc
-			}
-			cc.forgetStreamID(cs.ID)
-			return nil, cs.getStartedWrite(), http2errRequestCanceled
-		case <-cs.peerReset:
-			// processResetStream already removed the
-			// stream from the streams map; no need for
-			// forgetStreamID.
-			return nil, cs.getStartedWrite(), cs.resetErr
-		case err := <-bodyWriter.resc:
-			bodyWritten = true
-			// Prefer the read loop's response, if available. Issue 16102.
-			select {
-			case re := <-readLoopResCh:
-				return handleReadLoopResponse(re)
-			default:
-			}
-			if err != nil {
-				cc.forgetStreamID(cs.ID)
-				return nil, cs.getStartedWrite(), err
-			}
-			if d := cc.responseHeaderTimeout(); d != 0 {
-				timer := time.NewTimer(d)
-				defer timer.Stop()
-				respHeaderTimer = timer.C
-			}
+			return ctx.Err()
+		case <-cs.reqCancel:
+			return http2errRequestCanceled
 		}
 	}
 }
 
-// awaitOpenSlotForRequest waits until len(streams) < maxConcurrentStreams.
+func (cs *http2clientStream) encodeAndWriteHeaders(req *Request) error {
+	cc := cs.cc
+	ctx := cs.ctx
+
+	cc.wmu.Lock()
+	defer cc.wmu.Unlock()
+
+	// If the request was canceled while waiting for cc.mu, just quit.
+	select {
+	case <-cs.abort:
+		return cs.abortErr
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-cs.reqCancel:
+		return http2errRequestCanceled
+	default:
+	}
+
+	// Encode headers.
+	//
+	// we send: HEADERS{1}, CONTINUATION{0,} + DATA{0,} (DATA is
+	// sent by writeRequestBody below, along with any Trailers,
+	// again in form HEADERS{1}, CONTINUATION{0,})
+	trailers, err := http2commaSeparatedTrailers(req)
+	if err != nil {
+		return err
+	}
+	hasTrailers := trailers != ""
+	contentLen := http2actualContentLength(req)
+	hasBody := contentLen != 0
+	hdrs, err := cc.encodeHeaders(req, cs.requestedGzip, trailers, contentLen)
+	if err != nil {
+		return err
+	}
+
+	// Write the request.
+	endStream := !hasBody && !hasTrailers
+	cs.sentHeaders = true
+	err = cc.writeHeaders(cs.ID, endStream, int(cc.maxFrameSize), hdrs)
+	http2traceWroteHeaders(cs.trace)
+	return err
+}
+
+// cleanupWriteRequest performs post-request tasks.
+//
+// If err (the result of writeRequest) is non-nil and the stream is not closed,
+// cleanupWriteRequest will send a reset to the peer.
+func (cs *http2clientStream) cleanupWriteRequest(err error) {
+	cc := cs.cc
+
+	if cs.ID == 0 {
+		// We were canceled before creating the stream, so return our reservation.
+		cc.decrStreamReservations()
+	}
+
+	// TODO: write h12Compare test showing whether
+	// Request.Body is closed by the Transport,
+	// and in multiple cases: server replies <=299 and >299
+	// while still writing request body
+	cc.mu.Lock()
+	bodyClosed := cs.reqBodyClosed
+	cs.reqBodyClosed = true
+	cc.mu.Unlock()
+	if !bodyClosed && cs.reqBody != nil {
+		cs.reqBody.Close()
+	}
+
+	if err != nil && cs.sentEndStream {
+		// If the connection is closed immediately after the response is read,
+		// we may be aborted before finishing up here. If the stream was closed
+		// cleanly on both sides, there is no error.
+		select {
+		case <-cs.peerClosed:
+			err = nil
+		default:
+		}
+	}
+	if err != nil {
+		cs.abortStream(err) // possibly redundant, but harmless
+		if cs.sentHeaders {
+			if se, ok := err.(http2StreamError); ok {
+				if se.Cause != http2errFromPeer {
+					cc.writeStreamReset(cs.ID, se.Code, err)
+				}
+			} else {
+				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, err)
+			}
+		}
+		cs.bufPipe.CloseWithError(err) // no-op if already closed
+	} else {
+		if cs.sentHeaders && !cs.sentEndStream {
+			cc.writeStreamReset(cs.ID, http2ErrCodeNo, nil)
+		}
+		cs.bufPipe.CloseWithError(http2errRequestCanceled)
+	}
+	if cs.ID != 0 {
+		cc.forgetStreamID(cs.ID)
+	}
+
+	cc.wmu.Lock()
+	werr := cc.werr
+	cc.wmu.Unlock()
+	if werr != nil {
+		cc.Close()
+	}
+
+	close(cs.donec)
+}
+
+// awaitOpenSlotForStream waits until len(streams) < maxConcurrentStreams.
 // Must hold cc.mu.
-func (cc *http2ClientConn) awaitOpenSlotForRequest(req *Request) error {
-	var waitingForConn chan struct{}
-	var waitingForConnErr error // guarded by cc.mu
+func (cc *http2ClientConn) awaitOpenSlotForStreamLocked(cs *http2clientStream) error {
 	for {
 		cc.lastActive = time.Now()
 		if cc.closed || !cc.canTakeNewRequestLocked() {
-			if waitingForConn != nil {
-				close(waitingForConn)
-			}
 			return http2errClientConnUnusable
 		}
 		cc.lastIdle = time.Time{}
-		if int64(len(cc.streams))+1 <= int64(cc.maxConcurrentStreams) {
-			if waitingForConn != nil {
-				close(waitingForConn)
-			}
+		if int64(len(cc.streams)) < int64(cc.maxConcurrentStreams) {
 			return nil
 		}
-		// Unfortunately, we cannot wait on a condition variable and channel at
-		// the same time, so instead, we spin up a goroutine to check if the
-		// request is canceled while we wait for a slot to open in the connection.
-		if waitingForConn == nil {
-			waitingForConn = make(chan struct{})
-			go func() {
-				if err := http2awaitRequestCancel(req, waitingForConn); err != nil {
-					cc.mu.Lock()
-					waitingForConnErr = err
-					cc.cond.Broadcast()
-					cc.mu.Unlock()
-				}
-			}()
-		}
 		cc.pendingRequests++
 		cc.cond.Wait()
 		cc.pendingRequests--
-		if waitingForConnErr != nil {
-			return waitingForConnErr
+		select {
+		case <-cs.abort:
+			return cs.abortErr
+		default:
 		}
 	}
 }
@@ -7798,10 +8004,6 @@
 			cc.fr.WriteContinuation(streamID, endHeaders, chunk)
 		}
 	}
-	// TODO(bradfitz): this Flush could potentially block (as
-	// could the WriteHeaders call(s) above), which means they
-	// wouldn't respond to Request.Cancel being readable. That's
-	// rare, but this should probably be in a goroutine.
 	cc.bw.Flush()
 	return cc.werr
 }
@@ -7817,32 +8019,59 @@
 	http2errReqBodyTooLong = errors.New("http2: request body larger than specified content length")
 )
 
-func (cs *http2clientStream) writeRequestBody(body io.Reader, bodyCloser io.Closer) (err error) {
+// frameScratchBufferLen returns the length of a buffer to use for
+// outgoing request bodies to read/write to/from.
+//
+// It returns max(1, min(peer's advertised max frame size,
+// Request.ContentLength+1, 512KB)).
+func (cs *http2clientStream) frameScratchBufferLen(maxFrameSize int) int {
+	const max = 512 << 10
+	n := int64(maxFrameSize)
+	if n > max {
+		n = max
+	}
+	if cl := cs.reqBodyContentLength; cl != -1 && cl+1 < n {
+		// Add an extra byte past the declared content-length to
+		// give the caller's Request.Body io.Reader a chance to
+		// give us more bytes than they declared, so we can catch it
+		// early.
+		n = cl + 1
+	}
+	if n < 1 {
+		return 1
+	}
+	return int(n) // doesn't truncate; max is 512K
+}
+
+var http2bufPool sync.Pool // of *[]byte
+
+func (cs *http2clientStream) writeRequestBody(req *Request) (err error) {
 	cc := cs.cc
+	body := cs.reqBody
 	sentEnd := false // whether we sent the final DATA frame w/ END_STREAM
-	buf := cc.frameScratchBuffer()
-	defer cc.putFrameScratchBuffer(buf)
 
-	defer func() {
-		http2traceWroteRequest(cs.trace, err)
-		// TODO: write h12Compare test showing whether
-		// Request.Body is closed by the Transport,
-		// and in multiple cases: server replies <=299 and >299
-		// while still writing request body
-		cerr := bodyCloser.Close()
-		if err == nil {
-			err = cerr
-		}
-	}()
-
-	req := cs.req
 	hasTrailers := req.Trailer != nil
-	remainLen := http2actualContentLength(req)
+	remainLen := cs.reqBodyContentLength
 	hasContentLen := remainLen != -1
 
+	cc.mu.Lock()
+	maxFrameSize := int(cc.maxFrameSize)
+	cc.mu.Unlock()
+
+	// Scratch buffer for reading into & writing from.
+	scratchLen := cs.frameScratchBufferLen(maxFrameSize)
+	var buf []byte
+	if bp, ok := http2bufPool.Get().(*[]byte); ok && len(*bp) >= scratchLen {
+		defer http2bufPool.Put(bp)
+		buf = *bp
+	} else {
+		buf = make([]byte, scratchLen)
+		defer http2bufPool.Put(&buf)
+	}
+
 	var sawEOF bool
 	for !sawEOF {
-		n, err := body.Read(buf[:len(buf)-1])
+		n, err := body.Read(buf[:len(buf)])
 		if hasContentLen {
 			remainLen -= int64(n)
 			if remainLen == 0 && err == nil {
@@ -7853,13 +8082,13 @@
 				// to send the END_STREAM bit early, double-check that we're actually
 				// at EOF. Subsequent reads should return (0, EOF) at this point.
 				// If either value is different, we return an error in one of two ways below.
+				var scratch [1]byte
 				var n1 int
-				n1, err = body.Read(buf[n:])
+				n1, err = body.Read(scratch[:])
 				remainLen -= int64(n1)
 			}
 			if remainLen < 0 {
 				err = http2errReqBodyTooLong
-				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, err)
 				return err
 			}
 		}
@@ -7867,7 +8096,6 @@
 			sawEOF = true
 			err = nil
 		} else if err != nil {
-			cc.writeStreamReset(cs.ID, http2ErrCodeCancel, err)
 			return err
 		}
 
@@ -7875,13 +8103,7 @@
 		for len(remain) > 0 && err == nil {
 			var allowed int32
 			allowed, err = cs.awaitFlowControl(len(remain))
-			switch {
-			case err == http2errStopReqBodyWrite:
-				return err
-			case err == http2errStopReqBodyWriteAndCancel:
-				cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
-				return err
-			case err != nil:
+			if err != nil {
 				return err
 			}
 			cc.wmu.Lock()
@@ -7912,24 +8134,26 @@
 		return nil
 	}
 
-	var trls []byte
-	if hasTrailers {
-		cc.mu.Lock()
-		trls, err = cc.encodeTrailers(req)
-		cc.mu.Unlock()
-		if err != nil {
-			cc.writeStreamReset(cs.ID, http2ErrCodeInternal, err)
-			cc.forgetStreamID(cs.ID)
-			return err
-		}
-	}
-
+	// Since the RoundTrip contract permits the caller to "mutate or reuse"
+	// a request after the Response's Body is closed, verify that this hasn't
+	// happened before accessing the trailers.
 	cc.mu.Lock()
-	maxFrameSize := int(cc.maxFrameSize)
+	trailer := req.Trailer
+	err = cs.abortErr
 	cc.mu.Unlock()
+	if err != nil {
+		return err
+	}
 
 	cc.wmu.Lock()
 	defer cc.wmu.Unlock()
+	var trls []byte
+	if len(trailer) > 0 {
+		trls, err = cc.encodeTrailers(trailer)
+		if err != nil {
+			return err
+		}
+	}
 
 	// Two ways to send END_STREAM: either with trailers, or
 	// with an empty DATA frame.
@@ -7950,17 +8174,24 @@
 // if the stream is dead.
 func (cs *http2clientStream) awaitFlowControl(maxBytes int) (taken int32, err error) {
 	cc := cs.cc
+	ctx := cs.ctx
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
 	for {
 		if cc.closed {
 			return 0, http2errClientConnClosed
 		}
-		if cs.stopReqBody != nil {
-			return 0, cs.stopReqBody
+		if cs.reqBodyClosed {
+			return 0, http2errStopReqBodyWrite
 		}
-		if err := cs.checkResetOrDone(); err != nil {
-			return 0, err
+		select {
+		case <-cs.abort:
+			return 0, cs.abortErr
+		case <-ctx.Done():
+			return 0, ctx.Err()
+		case <-cs.reqCancel:
+			return 0, http2errRequestCanceled
+		default:
 		}
 		if a := cs.flow.available(); a > 0 {
 			take := a
@@ -7978,9 +8209,14 @@
 	}
 }
 
-// requires cc.mu be held.
+var http2errNilRequestURL = errors.New("http2: Request.URI is nil")
+
+// requires cc.wmu be held.
 func (cc *http2ClientConn) encodeHeaders(req *Request, addGzipHeader bool, trailers string, contentLength int64) ([]byte, error) {
 	cc.hbuf.Reset()
+	if req.URL == nil {
+		return nil, http2errNilRequestURL
+	}
 
 	host := req.Host
 	if host == "" {
@@ -8043,19 +8279,21 @@
 
 		var didUA bool
 		for k, vv := range req.Header {
-			if strings.EqualFold(k, "host") || strings.EqualFold(k, "content-length") {
+			if http2asciiEqualFold(k, "host") || http2asciiEqualFold(k, "content-length") {
 				// Host is :authority, already sent.
 				// Content-Length is automatic, set below.
 				continue
-			} else if strings.EqualFold(k, "connection") || strings.EqualFold(k, "proxy-connection") ||
-				strings.EqualFold(k, "transfer-encoding") || strings.EqualFold(k, "upgrade") ||
-				strings.EqualFold(k, "keep-alive") {
+			} else if http2asciiEqualFold(k, "connection") ||
+				http2asciiEqualFold(k, "proxy-connection") ||
+				http2asciiEqualFold(k, "transfer-encoding") ||
+				http2asciiEqualFold(k, "upgrade") ||
+				http2asciiEqualFold(k, "keep-alive") {
 				// Per 8.1.2.2 Connection-Specific Header
 				// Fields, don't send connection-specific
 				// fields. We have already checked if any
 				// are error-worthy so just ignore the rest.
 				continue
-			} else if strings.EqualFold(k, "user-agent") {
+			} else if http2asciiEqualFold(k, "user-agent") {
 				// Match Go's http1 behavior: at most one
 				// User-Agent. If set to nil or empty string,
 				// then omit it. Otherwise if not mentioned,
@@ -8068,7 +8306,7 @@
 				if vv[0] == "" {
 					continue
 				}
-			} else if strings.EqualFold(k, "cookie") {
+			} else if http2asciiEqualFold(k, "cookie") {
 				// Per 8.1.2.5 To allow for better compression efficiency, the
 				// Cookie header field MAY be split into separate header fields,
 				// each with one or more cookie-pairs.
@@ -8127,7 +8365,12 @@
 
 	// Header list size is ok. Write the headers.
 	enumerateHeaders(func(name, value string) {
-		name = strings.ToLower(name)
+		name, ascii := http2asciiToLower(name)
+		if !ascii {
+			// Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
+			// field names have to be ASCII characters (just as in HTTP/1.x).
+			return
+		}
 		cc.writeHeader(name, value)
 		if traceHeaders {
 			http2traceWroteHeaderField(trace, name, value)
@@ -8159,12 +8402,12 @@
 	}
 }
 
-// requires cc.mu be held.
-func (cc *http2ClientConn) encodeTrailers(req *Request) ([]byte, error) {
+// requires cc.wmu be held.
+func (cc *http2ClientConn) encodeTrailers(trailer Header) ([]byte, error) {
 	cc.hbuf.Reset()
 
 	hlSize := uint64(0)
-	for k, vv := range req.Trailer {
+	for k, vv := range trailer {
 		for _, v := range vv {
 			hf := hpack.HeaderField{Name: k, Value: v}
 			hlSize += uint64(hf.Size())
@@ -8174,10 +8417,15 @@
 		return nil, http2errRequestHeaderListSize
 	}
 
-	for k, vv := range req.Trailer {
+	for k, vv := range trailer {
+		lowKey, ascii := http2asciiToLower(k)
+		if !ascii {
+			// Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
+			// field names have to be ASCII characters (just as in HTTP/1.x).
+			continue
+		}
 		// Transfer-Encoding, etc.. have already been filtered at the
 		// start of RoundTrip
-		lowKey := strings.ToLower(k)
 		for _, v := range vv {
 			cc.writeHeader(lowKey, v)
 		}
@@ -8199,51 +8447,51 @@
 }
 
 // requires cc.mu be held.
-func (cc *http2ClientConn) newStream() *http2clientStream {
-	cs := &http2clientStream{
-		cc:        cc,
-		ID:        cc.nextStreamID,
-		resc:      make(chan http2resAndError, 1),
-		peerReset: make(chan struct{}),
-		done:      make(chan struct{}),
-	}
+func (cc *http2ClientConn) addStreamLocked(cs *http2clientStream) {
 	cs.flow.add(int32(cc.initialWindowSize))
 	cs.flow.setConnFlow(&cc.flow)
 	cs.inflow.add(http2transportDefaultStreamFlow)
 	cs.inflow.setConnFlow(&cc.inflow)
+	cs.ID = cc.nextStreamID
 	cc.nextStreamID += 2
 	cc.streams[cs.ID] = cs
-	return cs
+	if cs.ID == 0 {
+		panic("assigned stream ID 0")
+	}
 }
 
 func (cc *http2ClientConn) forgetStreamID(id uint32) {
-	cc.streamByID(id, true)
-}
-
-func (cc *http2ClientConn) streamByID(id uint32, andRemove bool) *http2clientStream {
 	cc.mu.Lock()
-	defer cc.mu.Unlock()
-	cs := cc.streams[id]
-	if andRemove && cs != nil && !cc.closed {
-		cc.lastActive = time.Now()
-		delete(cc.streams, id)
-		if len(cc.streams) == 0 && cc.idleTimer != nil {
-			cc.idleTimer.Reset(cc.idleTimeout)
-			cc.lastIdle = time.Now()
-		}
-		close(cs.done)
-		// Wake up checkResetOrDone via clientStream.awaitFlowControl and
-		// wake up RoundTrip if there is a pending request.
-		cc.cond.Broadcast()
+	slen := len(cc.streams)
+	delete(cc.streams, id)
+	if len(cc.streams) != slen-1 {
+		panic("forgetting unknown stream id")
 	}
-	return cs
+	cc.lastActive = time.Now()
+	if len(cc.streams) == 0 && cc.idleTimer != nil {
+		cc.idleTimer.Reset(cc.idleTimeout)
+		cc.lastIdle = time.Now()
+	}
+	// Wake up writeRequestBody via clientStream.awaitFlowControl and
+	// wake up RoundTrip if there is a pending request.
+	cc.cond.Broadcast()
+
+	closeOnIdle := cc.singleUse || cc.doNotReuse || cc.t.disableKeepAlives()
+	if closeOnIdle && cc.streamsReserved == 0 && len(cc.streams) == 0 {
+		if http2VerboseLogs {
+			cc.vlogf("http2: Transport closing idle conn %p (forSingleUse=%v, maxStream=%v)", cc, cc.singleUse, cc.nextStreamID-2)
+		}
+		cc.closed = true
+		defer cc.tconn.Close()
+	}
+
+	cc.mu.Unlock()
 }
 
 // clientConnReadLoop is the state owned by the clientConn's frame-reading readLoop.
 type http2clientConnReadLoop struct {
-	_             http2incomparable
-	cc            *http2ClientConn
-	closeWhenIdle bool
+	_  http2incomparable
+	cc *http2ClientConn
 }
 
 // readLoop runs in its own goroutine and reads and dispatches frames.
@@ -8303,23 +8551,22 @@
 	} else if err == io.EOF {
 		err = io.ErrUnexpectedEOF
 	}
-	for _, cs := range cc.streams {
-		cs.bufPipe.CloseWithError(err) // no-op if already closed
-		select {
-		case cs.resc <- http2resAndError{err: err}:
-		default:
-		}
-		close(cs.done)
-	}
 	cc.closed = true
+	for _, cs := range cc.streams {
+		select {
+		case <-cs.peerClosed:
+			// The server closed the stream before closing the conn,
+			// so no need to interrupt it.
+		default:
+			cs.abortStreamLocked(err)
+		}
+	}
 	cc.cond.Broadcast()
 	cc.mu.Unlock()
 }
 
 func (rl *http2clientConnReadLoop) run() error {
 	cc := rl.cc
-	rl.closeWhenIdle = cc.t.disableKeepAlives() || cc.singleUse
-	gotReply := false // ever saw a HEADERS reply
 	gotSettings := false
 	readIdleTimeout := cc.t.ReadIdleTimeout
 	var t *time.Timer
@@ -8336,9 +8583,7 @@
 			cc.vlogf("http2: Transport readFrame error on conn %p: (%T) %v", cc, err, err)
 		}
 		if se, ok := err.(http2StreamError); ok {
-			if cs := cc.streamByID(se.StreamID, false); cs != nil {
-				cs.cc.writeStreamReset(cs.ID, se.Code, err)
-				cs.cc.forgetStreamID(cs.ID)
+			if cs := rl.streamByID(se.StreamID); cs != nil {
 				if se.Cause == nil {
 					se.Cause = cc.fr.errDetail
 				}
@@ -8358,22 +8603,16 @@
 			}
 			gotSettings = true
 		}
-		maybeIdle := false // whether frame might transition us to idle
 
 		switch f := f.(type) {
 		case *http2MetaHeadersFrame:
 			err = rl.processHeaders(f)
-			maybeIdle = true
-			gotReply = true
 		case *http2DataFrame:
 			err = rl.processData(f)
-			maybeIdle = true
 		case *http2GoAwayFrame:
 			err = rl.processGoAway(f)
-			maybeIdle = true
 		case *http2RSTStreamFrame:
 			err = rl.processResetStream(f)
-			maybeIdle = true
 		case *http2SettingsFrame:
 			err = rl.processSettings(f)
 		case *http2PushPromiseFrame:
@@ -8391,38 +8630,24 @@
 			}
 			return err
 		}
-		if rl.closeWhenIdle && gotReply && maybeIdle {
-			cc.closeIfIdle()
-		}
 	}
 }
 
 func (rl *http2clientConnReadLoop) processHeaders(f *http2MetaHeadersFrame) error {
-	cc := rl.cc
-	cs := cc.streamByID(f.StreamID, false)
+	cs := rl.streamByID(f.StreamID)
 	if cs == nil {
 		// We'd get here if we canceled a request while the
 		// server had its response still in flight. So if this
 		// was just something we canceled, ignore it.
 		return nil
 	}
-	if f.StreamEnded() {
-		// Issue 20521: If the stream has ended, streamByID() causes
-		// clientStream.done to be closed, which causes the request's bodyWriter
-		// to be closed with an errStreamClosed, which may be received by
-		// clientConn.RoundTrip before the result of processing these headers.
-		// Deferring stream closure allows the header processing to occur first.
-		// clientConn.RoundTrip may still receive the bodyWriter error first, but
-		// the fix for issue 16102 prioritises any response.
-		//
-		// Issue 22413: If there is no request body, we should close the
-		// stream before writing to cs.resc so that the stream is closed
-		// immediately once RoundTrip returns.
-		if cs.req.Body != nil {
-			defer cc.forgetStreamID(f.StreamID)
-		} else {
-			cc.forgetStreamID(f.StreamID)
-		}
+	if cs.readClosed {
+		rl.endStreamError(cs, http2StreamError{
+			StreamID: f.StreamID,
+			Code:     http2ErrCodeProtocol,
+			Cause:    errors.New("protocol error: headers after END_STREAM"),
+		})
+		return nil
 	}
 	if !cs.firstByte {
 		if cs.trace != nil {
@@ -8446,9 +8671,11 @@
 			return err
 		}
 		// Any other error type is a stream error.
-		cs.cc.writeStreamReset(f.StreamID, http2ErrCodeProtocol, err)
-		cc.forgetStreamID(cs.ID)
-		cs.resc <- http2resAndError{err: err}
+		rl.endStreamError(cs, http2StreamError{
+			StreamID: f.StreamID,
+			Code:     http2ErrCodeProtocol,
+			Cause:    err,
+		})
 		return nil // return nil from process* funcs to keep conn alive
 	}
 	if res == nil {
@@ -8456,7 +8683,11 @@
 		return nil
 	}
 	cs.resTrailer = &res.Trailer
-	cs.resc <- http2resAndError{res: res}
+	cs.res = res
+	close(cs.respHeaderRecv)
+	if f.StreamEnded() {
+		rl.endStream(cs)
+	}
 	return nil
 }
 
@@ -8518,6 +8749,9 @@
 	}
 
 	if statusCode >= 100 && statusCode <= 199 {
+		if f.StreamEnded() {
+			return nil, errors.New("1xx informational response with END_STREAM flag")
+		}
 		cs.num1xx++
 		const max1xxResponses = 5 // arbitrary bound on number of informational responses, same as net/http
 		if cs.num1xx > max1xxResponses {
@@ -8530,40 +8764,47 @@
 		}
 		if statusCode == 100 {
 			http2traceGot100Continue(cs.trace)
-			if cs.on100 != nil {
-				cs.on100() // forces any write delay timer to fire
+			select {
+			case cs.on100 <- struct{}{}:
+			default:
 			}
 		}
 		cs.pastHeaders = false // do it all again
 		return nil, nil
 	}
 
-	streamEnded := f.StreamEnded()
-	isHead := cs.req.Method == "HEAD"
-	if !streamEnded || isHead {
-		res.ContentLength = -1
-		if clens := res.Header["Content-Length"]; len(clens) == 1 {
-			if cl, err := strconv.ParseUint(clens[0], 10, 63); err == nil {
-				res.ContentLength = int64(cl)
-			} else {
-				// TODO: care? unlike http/1, it won't mess up our framing, so it's
-				// more safe smuggling-wise to ignore.
-			}
-		} else if len(clens) > 1 {
+	res.ContentLength = -1
+	if clens := res.Header["Content-Length"]; len(clens) == 1 {
+		if cl, err := strconv.ParseUint(clens[0], 10, 63); err == nil {
+			res.ContentLength = int64(cl)
+		} else {
 			// TODO: care? unlike http/1, it won't mess up our framing, so it's
 			// more safe smuggling-wise to ignore.
 		}
+	} else if len(clens) > 1 {
+		// TODO: care? unlike http/1, it won't mess up our framing, so it's
+		// more safe smuggling-wise to ignore.
+	} else if f.StreamEnded() && !cs.isHead {
+		res.ContentLength = 0
 	}
 
-	if streamEnded || isHead {
+	if cs.isHead {
 		res.Body = http2noBody
 		return res, nil
 	}
 
-	cs.bufPipe = http2pipe{b: &http2dataBuffer{expected: res.ContentLength}}
+	if f.StreamEnded() {
+		if res.ContentLength > 0 {
+			res.Body = http2missingBody{}
+		} else {
+			res.Body = http2noBody
+		}
+		return res, nil
+	}
+
+	cs.bufPipe.setBuffer(&http2dataBuffer{expected: res.ContentLength})
 	cs.bytesRemain = res.ContentLength
 	res.Body = http2transportResponseBody{cs}
-	go cs.awaitRequestCancel(cs.req)
 
 	if cs.requestedGzip && res.Header.Get("Content-Encoding") == "gzip" {
 		res.Header.Del("Content-Encoding")
@@ -8604,8 +8845,7 @@
 }
 
 // transportResponseBody is the concrete type of Transport.RoundTrip's
-// Response.Body. It is an io.ReadCloser. On Read, it reads from cs.body.
-// On Close it sends RST_STREAM if EOF wasn't already seen.
+// Response.Body. It is an io.ReadCloser.
 type http2transportResponseBody struct {
 	cs *http2clientStream
 }
@@ -8623,7 +8863,7 @@
 			n = int(cs.bytesRemain)
 			if err == nil {
 				err = errors.New("net/http: server replied with more than declared Content-Length; truncated")
-				cc.writeStreamReset(cs.ID, http2ErrCodeProtocol, err)
+				cs.abortStream(err)
 			}
 			cs.readErr = err
 			return int(cs.bytesRemain), err
@@ -8641,8 +8881,6 @@
 	}
 
 	cc.mu.Lock()
-	defer cc.mu.Unlock()
-
 	var connAdd, streamAdd int32
 	// Check the conn-level first, before the stream-level.
 	if v := cc.inflow.available(); v < http2transportDefaultConnFlow/2 {
@@ -8659,6 +8897,8 @@
 			cs.inflow.add(streamAdd)
 		}
 	}
+	cc.mu.Unlock()
+
 	if connAdd != 0 || streamAdd != 0 {
 		cc.wmu.Lock()
 		defer cc.wmu.Unlock()
@@ -8679,34 +8919,42 @@
 	cs := b.cs
 	cc := cs.cc
 
-	serverSentStreamEnd := cs.bufPipe.Err() == io.EOF
 	unread := cs.bufPipe.Len()
-
-	if unread > 0 || !serverSentStreamEnd {
+	if unread > 0 {
 		cc.mu.Lock()
-		cc.wmu.Lock()
-		if !serverSentStreamEnd {
-			cc.fr.WriteRSTStream(cs.ID, http2ErrCodeCancel)
-			cs.didReset = true
-		}
 		// Return connection-level flow control.
 		if unread > 0 {
 			cc.inflow.add(int32(unread))
+		}
+		cc.mu.Unlock()
+
+		// TODO(dneil): Acquiring this mutex can block indefinitely.
+		// Move flow control return to a goroutine?
+		cc.wmu.Lock()
+		// Return connection-level flow control.
+		if unread > 0 {
 			cc.fr.WriteWindowUpdate(0, uint32(unread))
 		}
 		cc.bw.Flush()
 		cc.wmu.Unlock()
-		cc.mu.Unlock()
 	}
 
 	cs.bufPipe.BreakWithError(http2errClosedResponseBody)
-	cc.forgetStreamID(cs.ID)
+	cs.abortStream(http2errClosedResponseBody)
+
+	select {
+	case <-cs.donec:
+	case <-cs.ctx.Done():
+		return cs.ctx.Err()
+	case <-cs.reqCancel:
+		return http2errRequestCanceled
+	}
 	return nil
 }
 
 func (rl *http2clientConnReadLoop) processData(f *http2DataFrame) error {
 	cc := rl.cc
-	cs := cc.streamByID(f.StreamID, f.StreamEnded())
+	cs := rl.streamByID(f.StreamID)
 	data := f.Data()
 	if cs == nil {
 		cc.mu.Lock()
@@ -8735,6 +8983,14 @@
 		}
 		return nil
 	}
+	if cs.readClosed {
+		cc.logf("protocol error: received DATA after END_STREAM")
+		rl.endStreamError(cs, http2StreamError{
+			StreamID: f.StreamID,
+			Code:     http2ErrCodeProtocol,
+		})
+		return nil
+	}
 	if !cs.firstByte {
 		cc.logf("protocol error: received DATA before a HEADERS frame")
 		rl.endStreamError(cs, http2StreamError{
@@ -8744,7 +9000,7 @@
 		return nil
 	}
 	if f.Length > 0 {
-		if cs.req.Method == "HEAD" && len(data) > 0 {
+		if cs.isHead && len(data) > 0 {
 			cc.logf("protocol error: received DATA on a HEAD request")
 			rl.endStreamError(cs, http2StreamError{
 				StreamID: f.StreamID,
@@ -8766,30 +9022,39 @@
 		if pad := int(f.Length) - len(data); pad > 0 {
 			refund += pad
 		}
-		// Return len(data) now if the stream is already closed,
-		// since data will never be read.
-		didReset := cs.didReset
-		if didReset {
-			refund += len(data)
+
+		didReset := false
+		var err error
+		if len(data) > 0 {
+			if _, err = cs.bufPipe.Write(data); err != nil {
+				// Return len(data) now if the stream is already closed,
+				// since data will never be read.
+				didReset = true
+				refund += len(data)
+			}
 		}
+
 		if refund > 0 {
 			cc.inflow.add(int32(refund))
+			if !didReset {
+				cs.inflow.add(int32(refund))
+			}
+		}
+		cc.mu.Unlock()
+
+		if refund > 0 {
 			cc.wmu.Lock()
 			cc.fr.WriteWindowUpdate(0, uint32(refund))
 			if !didReset {
-				cs.inflow.add(int32(refund))
 				cc.fr.WriteWindowUpdate(cs.ID, uint32(refund))
 			}
 			cc.bw.Flush()
 			cc.wmu.Unlock()
 		}
-		cc.mu.Unlock()
 
-		if len(data) > 0 && !didReset {
-			if _, err := cs.bufPipe.Write(data); err != nil {
-				rl.endStreamError(cs, err)
-				return err
-			}
+		if err != nil {
+			rl.endStreamError(cs, err)
+			return nil
 		}
 	}
 
@@ -8802,24 +9067,26 @@
 func (rl *http2clientConnReadLoop) endStream(cs *http2clientStream) {
 	// TODO: check that any declared content-length matches, like
 	// server.go's (*stream).endStream method.
-	rl.endStreamError(cs, nil)
+	if !cs.readClosed {
+		cs.readClosed = true
+		cs.bufPipe.closeWithErrorAndCode(io.EOF, cs.copyTrailers)
+		close(cs.peerClosed)
+	}
 }
 
 func (rl *http2clientConnReadLoop) endStreamError(cs *http2clientStream, err error) {
-	var code func()
-	if err == nil {
-		err = io.EOF
-		code = cs.copyTrailers
-	}
-	if http2isConnectionCloseRequest(cs.req) {
-		rl.closeWhenIdle = true
-	}
-	cs.bufPipe.closeWithErrorAndCode(err, code)
+	cs.readAborted = true
+	cs.abortStream(err)
+}
 
-	select {
-	case cs.resc <- http2resAndError{err: err}:
-	default:
+func (rl *http2clientConnReadLoop) streamByID(id uint32) *http2clientStream {
+	rl.cc.mu.Lock()
+	defer rl.cc.mu.Unlock()
+	cs := rl.cc.streams[id]
+	if cs != nil && !cs.readAborted {
+		return cs
 	}
+	return nil
 }
 
 func (cs *http2clientStream) copyTrailers() {
@@ -8845,6 +9112,23 @@
 
 func (rl *http2clientConnReadLoop) processSettings(f *http2SettingsFrame) error {
 	cc := rl.cc
+	// Locking both mu and wmu here allows frame encoding to read settings with only wmu held.
+	// Acquiring wmu when f.IsAck() is unnecessary, but convenient and mostly harmless.
+	cc.wmu.Lock()
+	defer cc.wmu.Unlock()
+
+	if err := rl.processSettingsNoWrite(f); err != nil {
+		return err
+	}
+	if !f.IsAck() {
+		cc.fr.WriteSettingsAck()
+		cc.bw.Flush()
+	}
+	return nil
+}
+
+func (rl *http2clientConnReadLoop) processSettingsNoWrite(f *http2SettingsFrame) error {
+	cc := rl.cc
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
 
@@ -8856,12 +9140,14 @@
 		return http2ConnectionError(http2ErrCodeProtocol)
 	}
 
+	var seenMaxConcurrentStreams bool
 	err := f.ForeachSetting(func(s http2Setting) error {
 		switch s.ID {
 		case http2SettingMaxFrameSize:
 			cc.maxFrameSize = s.Val
 		case http2SettingMaxConcurrentStreams:
 			cc.maxConcurrentStreams = s.Val
+			seenMaxConcurrentStreams = true
 		case http2SettingMaxHeaderListSize:
 			cc.peerMaxHeaderListSize = uint64(s.Val)
 		case http2SettingInitialWindowSize:
@@ -8893,17 +9179,23 @@
 		return err
 	}
 
-	cc.wmu.Lock()
-	defer cc.wmu.Unlock()
+	if !cc.seenSettings {
+		if !seenMaxConcurrentStreams {
+			// This was the servers initial SETTINGS frame and it
+			// didn't contain a MAX_CONCURRENT_STREAMS field so
+			// increase the number of concurrent streams this
+			// connection can establish to our default.
+			cc.maxConcurrentStreams = http2defaultMaxConcurrentStreams
+		}
+		cc.seenSettings = true
+	}
 
-	cc.fr.WriteSettingsAck()
-	cc.bw.Flush()
-	return cc.werr
+	return nil
 }
 
 func (rl *http2clientConnReadLoop) processWindowUpdate(f *http2WindowUpdateFrame) error {
 	cc := rl.cc
-	cs := cc.streamByID(f.StreamID, false)
+	cs := rl.streamByID(f.StreamID)
 	if f.StreamID != 0 && cs == nil {
 		return nil
 	}
@@ -8923,24 +9215,19 @@
 }
 
 func (rl *http2clientConnReadLoop) processResetStream(f *http2RSTStreamFrame) error {
-	cs := rl.cc.streamByID(f.StreamID, true)
+	cs := rl.streamByID(f.StreamID)
 	if cs == nil {
 		// TODO: return error if server tries to RST_STEAM an idle stream
 		return nil
 	}
-	select {
-	case <-cs.peerReset:
-		// Already reset.
-		// This is the only goroutine
-		// which closes this, so there
-		// isn't a race.
-	default:
-		err := http2streamError(cs.ID, f.ErrCode)
-		cs.resetErr = err
-		close(cs.peerReset)
-		cs.bufPipe.CloseWithError(err)
-		cs.cc.cond.Broadcast() // wake up checkResetOrDone via clientStream.awaitFlowControl
+	serr := http2streamError(cs.ID, f.ErrCode)
+	serr.Cause = http2errFromPeer
+	if f.ErrCode == http2ErrCodeProtocol {
+		rl.cc.SetDoNotReuse()
 	}
+	cs.abortStream(serr)
+
+	cs.bufPipe.CloseWithError(serr)
 	return nil
 }
 
@@ -8962,19 +9249,24 @@
 		}
 		cc.mu.Unlock()
 	}
-	cc.wmu.Lock()
-	if err := cc.fr.WritePing(false, p); err != nil {
-		cc.wmu.Unlock()
-		return err
-	}
-	if err := cc.bw.Flush(); err != nil {
-		cc.wmu.Unlock()
-		return err
-	}
-	cc.wmu.Unlock()
+	errc := make(chan error, 1)
+	go func() {
+		cc.wmu.Lock()
+		defer cc.wmu.Unlock()
+		if err := cc.fr.WritePing(false, p); err != nil {
+			errc <- err
+			return
+		}
+		if err := cc.bw.Flush(); err != nil {
+			errc <- err
+			return
+		}
+	}()
 	select {
 	case <-c:
 		return nil
+	case err := <-errc:
+		return err
 	case <-ctx.Done():
 		return ctx.Err()
 	case <-cc.readerDone:
@@ -9051,6 +9343,12 @@
 
 var http2noBody io.ReadCloser = ioutil.NopCloser(bytes.NewReader(nil))
 
+type http2missingBody struct{}
+
+func (http2missingBody) Close() error { return nil }
+
+func (http2missingBody) Read([]byte) (int, error) { return 0, io.ErrUnexpectedEOF }
+
 func http2strSliceContains(ss []string, s string) bool {
 	for _, v := range ss {
 		if v == s {
@@ -9097,87 +9395,6 @@
 
 func (r http2errorReader) Read(p []byte) (int, error) { return 0, r.err }
 
-// bodyWriterState encapsulates various state around the Transport's writing
-// of the request body, particularly regarding doing delayed writes of the body
-// when the request contains "Expect: 100-continue".
-type http2bodyWriterState struct {
-	cs     *http2clientStream
-	timer  *time.Timer   // if non-nil, we're doing a delayed write
-	fnonce *sync.Once    // to call fn with
-	fn     func()        // the code to run in the goroutine, writing the body
-	resc   chan error    // result of fn's execution
-	delay  time.Duration // how long we should delay a delayed write for
-}
-
-func (t *http2Transport) getBodyWriterState(cs *http2clientStream, body io.Reader) (s http2bodyWriterState) {
-	s.cs = cs
-	if body == nil {
-		return
-	}
-	resc := make(chan error, 1)
-	s.resc = resc
-	s.fn = func() {
-		cs.cc.mu.Lock()
-		cs.startedWrite = true
-		cs.cc.mu.Unlock()
-		resc <- cs.writeRequestBody(body, cs.req.Body)
-	}
-	s.delay = t.expectContinueTimeout()
-	if s.delay == 0 ||
-		!httpguts.HeaderValuesContainsToken(
-			cs.req.Header["Expect"],
-			"100-continue") {
-		return
-	}
-	s.fnonce = new(sync.Once)
-
-	// Arm the timer with a very large duration, which we'll
-	// intentionally lower later. It has to be large now because
-	// we need a handle to it before writing the headers, but the
-	// s.delay value is defined to not start until after the
-	// request headers were written.
-	const hugeDuration = 365 * 24 * time.Hour
-	s.timer = time.AfterFunc(hugeDuration, func() {
-		s.fnonce.Do(s.fn)
-	})
-	return
-}
-
-func (s http2bodyWriterState) cancel() {
-	if s.timer != nil {
-		if s.timer.Stop() {
-			s.resc <- nil
-		}
-	}
-}
-
-func (s http2bodyWriterState) on100() {
-	if s.timer == nil {
-		// If we didn't do a delayed write, ignore the server's
-		// bogus 100 continue response.
-		return
-	}
-	s.timer.Stop()
-	go func() { s.fnonce.Do(s.fn) }()
-}
-
-// scheduleBodyWrite starts writing the body, either immediately (in
-// the common case) or after the delay timeout. It should not be
-// called until after the headers have been written.
-func (s http2bodyWriterState) scheduleBodyWrite() {
-	if s.timer == nil {
-		// We're not doing a delayed write (see
-		// getBodyWriterState), so just start the writing
-		// goroutine immediately.
-		go s.fn()
-		return
-	}
-	http2traceWait100Continue(s.cs.trace)
-	if s.timer.Stop() {
-		s.timer.Reset(s.delay)
-	}
-}
-
 // isConnectionCloseRequest reports whether req should use its own
 // connection for a single request and then close the connection.
 func http2isConnectionCloseRequest(req *Request) bool {
@@ -9600,7 +9817,12 @@
 	}
 	for _, k := range keys {
 		vv := h[k]
-		k = http2lowerHeader(k)
+		k, ascii := http2lowerHeader(k)
+		if !ascii {
+			// Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
+			// field names have to be ASCII characters (just as in HTTP/1.x).
+			continue
+		}
 		if !http2validWireHeaderFieldName(k) {
 			// Skip it as backup paranoia. Per
 			// golang.org/issue/14048, these should
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index 77eb3aa4..f296b0a 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -74,6 +74,7 @@
 	// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
 	// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
 	// in a lock-free way by all operations.
+	// TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness.
 	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
 	// that will blow up when GC starts moving objects.
 	lock    mutex // protects the following fields
@@ -82,11 +83,11 @@
 	everr   bool      // marks event scanning error happened
 	user    uint32    // user settable cookie
 	rseq    uintptr   // protects from stale read timers
-	rg      uintptr   // pdReady, pdWait, G waiting for read or nil
+	rg      uintptr   // pdReady, pdWait, G waiting for read or nil. Accessed atomically.
 	rt      timer     // read deadline timer (set if rt.f != nil)
 	rd      int64     // read deadline
 	wseq    uintptr   // protects from stale write timers
-	wg      uintptr   // pdReady, pdWait, G waiting for write or nil
+	wg      uintptr   // pdReady, pdWait, G waiting for write or nil. Accessed atomically.
 	wt      timer     // write deadline timer
 	wd      int64     // write deadline
 	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
@@ -143,20 +144,22 @@
 func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
 	pd := pollcache.alloc()
 	lock(&pd.lock)
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := atomic.Loaduintptr(&pd.wg)
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on free polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := atomic.Loaduintptr(&pd.rg)
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on free polldesc")
 	}
 	pd.fd = fd
 	pd.closing = false
 	pd.everr = false
 	pd.rseq++
-	pd.rg = 0
+	atomic.Storeuintptr(&pd.rg, 0)
 	pd.rd = 0
 	pd.wseq++
-	pd.wg = 0
+	atomic.Storeuintptr(&pd.wg, 0)
 	pd.wd = 0
 	pd.self = pd
 	unlock(&pd.lock)
@@ -171,10 +174,12 @@
 	if !pd.closing {
 		throw("runtime: close polldesc w/o unblock")
 	}
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := atomic.Loaduintptr(&pd.wg)
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on closing polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := atomic.Loaduintptr(&pd.rg)
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on closing polldesc")
 	}
 	netpollclose(pd.fd)
@@ -198,9 +203,9 @@
 		return errcode
 	}
 	if mode == 'r' {
-		pd.rg = 0
+		atomic.Storeuintptr(&pd.rg, 0)
 	} else if mode == 'w' {
-		pd.wg = 0
+		atomic.Storeuintptr(&pd.wg, 0)
 	}
 	return pollNoError
 }
@@ -410,6 +415,8 @@
 
 // returns true if IO is ready, or false if timedout or closed
 // waitio - wait only for completed IO, ignore errors
+// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
+// can hold only a single waiting goroutine for each mode.
 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	gpp := &pd.rg
 	if mode == 'w' {
@@ -418,17 +425,19 @@
 
 	// set the gpp semaphore to pdWait
 	for {
-		old := *gpp
-		if old == pdReady {
-			*gpp = 0
+		// Consume notification if already ready.
+		if atomic.Casuintptr(gpp, pdReady, 0) {
 			return true
 		}
-		if old != 0 {
-			throw("runtime: double wait")
-		}
 		if atomic.Casuintptr(gpp, 0, pdWait) {
 			break
 		}
+
+		// Double check that this isn't corrupt; otherwise we'd loop
+		// forever.
+		if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 {
+			throw("runtime: double wait")
+		}
 	}
 
 	// need to recheck error states after setting gpp to pdWait
@@ -452,7 +461,7 @@
 	}
 
 	for {
-		old := *gpp
+		old := atomic.Loaduintptr(gpp)
 		if old == pdReady {
 			return nil
 		}
diff --git a/src/vendor/modules.txt b/src/vendor/modules.txt
index 061e0ac..a3768fd 100644
--- a/src/vendor/modules.txt
+++ b/src/vendor/modules.txt
@@ -8,7 +8,7 @@
 golang.org/x/crypto/hkdf
 golang.org/x/crypto/internal/subtle
 golang.org/x/crypto/poly1305
-# golang.org/x/net v0.0.0-20210901185431-d2e9a4ea682f
+# golang.org/x/net v0.0.0-20211101194150-d8c3cde3c676
 ## explicit
 golang.org/x/net/dns/dnsmessage
 golang.org/x/net/http/httpguts
diff --git a/test/fixedbugs/issue48473.go b/test/fixedbugs/issue48473.go
new file mode 100644
index 0000000..8edef1f
--- /dev/null
+++ b/test/fixedbugs/issue48473.go
@@ -0,0 +1,30 @@
+// run
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "fmt"
+
+func f(x uint64) uint64 {
+	s := "\x04"
+	c := s[0]
+	return x - x<<c<<4
+}
+
+func g(x uint32) uint32 {
+	s := "\x04"
+	c := s[0]
+	return x - x<<c<<4
+}
+
+func main() {
+	if want, got := uint64(0xffffffffffffff01), f(1); want != got {
+		panic(fmt.Sprintf("want %x got %x", want, got))
+	}
+	if want, got := uint32(0xffffff01), g(1); want != got {
+		panic(fmt.Sprintf("want %x got %x", want, got))
+	}
+}
diff --git a/test/fixedbugs/issue48476.go b/test/fixedbugs/issue48476.go
new file mode 100644
index 0000000..6b77f7c
--- /dev/null
+++ b/test/fixedbugs/issue48476.go
@@ -0,0 +1,21 @@
+// run
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "fmt"
+
+//go:noinline
+func f(x uint64) uint64 {
+	s := "\x04"
+	c := s[0]
+	return x << c << 4
+}
+func main() {
+	if want, got := uint64(1<<8), f(1); want != got {
+		panic(fmt.Sprintf("want %x got %x", want, got))
+	}
+}