[dev.ssa] Merge remote-tracking branch 'origin/master' into mergebranch

The only major fixup is that duffzero changed from
8-byte writes to 16-byte writes.

Change-Id: I1762b74ce67a8e4b81c11568027cdb3572f7f87c
diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go
index 67daca2..b2c45fd 100644
--- a/src/archive/tar/reader.go
+++ b/src/archive/tar/reader.go
@@ -12,6 +12,7 @@
 	"errors"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"strconv"
 	"strings"
@@ -49,12 +50,36 @@
 	nb int64     // number of unread bytes for current file entry
 }
 
-// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
+// A sparseFileReader is a numBytesReader for reading sparse file data from a
+// tar archive.
 type sparseFileReader struct {
-	rfr *regFileReader // reads the sparse-encoded file data
-	sp  []sparseEntry  // the sparse map for the file
-	pos int64          // keeps track of file position
-	tot int64          // total size of the file
+	rfr   numBytesReader // Reads the sparse-encoded file data
+	sp    []sparseEntry  // The sparse map for the file
+	pos   int64          // Keeps track of file position
+	total int64          // Total size of the file
+}
+
+// A sparseEntry holds a single entry in a sparse file's sparse map.
+//
+// Sparse files are represented using a series of sparseEntrys.
+// Despite the name, a sparseEntry represents an actual data fragment that
+// references data found in the underlying archive stream. All regions not
+// covered by a sparseEntry are logically filled with zeros.
+//
+// For example, if the underlying raw file contains the 10-byte data:
+//	var compactData = "abcdefgh"
+//
+// And the sparse map has the following entries:
+//	var sp = []sparseEntry{
+//		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
+//		{offset: 18, numBytes: 3} // Data fragment for [18..21]
+//	}
+//
+// Then the content of the resulting sparse file with a "real" size of 25 is:
+//	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
+type sparseEntry struct {
+	offset   int64 // Starting position of the fragment
+	numBytes int64 // Length of the fragment
 }
 
 // Keywords for GNU sparse files in a PAX extended header
@@ -128,7 +153,10 @@
 		if sp != nil {
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
-			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+			if tr.err != nil {
+				return nil, tr.err
+			}
 		}
 		return hdr, nil
 	case TypeGNULongName:
@@ -137,18 +165,24 @@
 		if err != nil {
 			return nil, err
 		}
-		hdr, err := tr.Next()
+		hdr, tr.err = tr.Next()
+		if tr.err != nil {
+			return nil, tr.err
+		}
 		hdr.Name = cString(realname)
-		return hdr, err
+		return hdr, nil
 	case TypeGNULongLink:
 		// We have a GNU long link header.
 		realname, err := ioutil.ReadAll(tr)
 		if err != nil {
 			return nil, err
 		}
-		hdr, err := tr.Next()
+		hdr, tr.err = tr.Next()
+		if tr.err != nil {
+			return nil, tr.err
+		}
 		hdr.Linkname = cString(realname)
-		return hdr, err
+		return hdr, nil
 	}
 	return hdr, tr.err
 }
@@ -541,21 +575,17 @@
 		if tr.err != nil {
 			return nil
 		}
+
 		// Current file is a GNU sparse file. Update the current file reader.
-		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+		tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		if tr.err != nil {
+			return nil
+		}
 	}
 
 	return hdr
 }
 
-// A sparseEntry holds a single entry in a sparse file's sparse map.
-// A sparse entry indicates the offset and size in a sparse file of a
-// block of data.
-type sparseEntry struct {
-	offset   int64
-	numBytes int64
-}
-
 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
@@ -688,40 +718,37 @@
 	return sp, nil
 }
 
-// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
-// The sparse map is stored in the PAX headers.
-func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
-	// Get number of entries
-	numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
-	if !ok {
-		return nil, ErrHeader
-	}
-	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
-	if err != nil {
+// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
+// version 0.1. The sparse map is stored in the PAX headers.
+func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
+	// Get number of entries.
+	// Use integer overflow resistant math to check this.
+	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
+	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
 		return nil, ErrHeader
 	}
 
-	sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
-
-	// There should be two numbers in sparseMap for each entry
+	// There should be two numbers in sparseMap for each entry.
+	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
 	if int64(len(sparseMap)) != 2*numEntries {
 		return nil, ErrHeader
 	}
 
-	// Loop through the entries in the sparse map
+	// Loop through the entries in the sparse map.
+	// numEntries is trusted now.
 	sp := make([]sparseEntry, 0, numEntries)
 	for i := int64(0); i < numEntries; i++ {
-		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
+		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
-		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
+		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 	}
-
 	return sp, nil
 }
 
@@ -739,9 +766,13 @@
 // It returns 0, io.EOF when it reaches the end of that entry,
 // until Next is called to advance to the next entry.
 func (tr *Reader) Read(b []byte) (n int, err error) {
+	if tr.err != nil {
+		return 0, tr.err
+	}
 	if tr.curr == nil {
 		return 0, io.EOF
 	}
+
 	n, err = tr.curr.Read(b)
 	if err != nil && err != io.EOF {
 		tr.err = err
@@ -771,9 +802,33 @@
 	return rfr.nb
 }
 
-// readHole reads a sparse file hole ending at offset toOffset
-func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
-	n64 := toOffset - sfr.pos
+// newSparseFileReader creates a new sparseFileReader, but validates all of the
+// sparse entries before doing so.
+func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
+	if total < 0 {
+		return nil, ErrHeader // Total size cannot be negative
+	}
+
+	// Validate all sparse entries. These are the same checks as performed by
+	// the BSD tar utility.
+	for i, s := range sp {
+		switch {
+		case s.offset < 0 || s.numBytes < 0:
+			return nil, ErrHeader // Negative values are never okay
+		case s.offset > math.MaxInt64-s.numBytes:
+			return nil, ErrHeader // Integer overflow with large length
+		case s.offset+s.numBytes > total:
+			return nil, ErrHeader // Region extends beyond the "real" size
+		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
+			return nil, ErrHeader // Regions can't overlap and must be in order
+		}
+	}
+	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
+}
+
+// readHole reads a sparse hole ending at endOffset.
+func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
+	n64 := endOffset - sfr.pos
 	if n64 > int64(len(b)) {
 		n64 = int64(len(b))
 	}
@@ -787,49 +842,54 @@
 
 // Read reads the sparse file data in expanded form.
 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
-	if len(sfr.sp) == 0 {
-		// No more data fragments to read from.
-		if sfr.pos < sfr.tot {
-			// We're in the last hole
-			n = sfr.readHole(b, sfr.tot)
-			return
-		}
-		// Otherwise, we're at the end of the file
-		return 0, io.EOF
-	}
-	if sfr.tot < sfr.sp[0].offset {
-		return 0, io.ErrUnexpectedEOF
-	}
-	if sfr.pos < sfr.sp[0].offset {
-		// We're in a hole
-		n = sfr.readHole(b, sfr.sp[0].offset)
-		return
+	// Skip past all empty fragments.
+	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
+		sfr.sp = sfr.sp[1:]
 	}
 
-	// We're not in a hole, so we'll read from the next data fragment
-	posInFragment := sfr.pos - sfr.sp[0].offset
-	bytesLeft := sfr.sp[0].numBytes - posInFragment
+	// If there are no more fragments, then it is possible that there
+	// is one last sparse hole.
+	if len(sfr.sp) == 0 {
+		// This behavior matches the BSD tar utility.
+		// However, GNU tar stops returning data even if sfr.total is unmet.
+		if sfr.pos < sfr.total {
+			return sfr.readHole(b, sfr.total), nil
+		}
+		return 0, io.EOF
+	}
+
+	// In front of a data fragment, so read a hole.
+	if sfr.pos < sfr.sp[0].offset {
+		return sfr.readHole(b, sfr.sp[0].offset), nil
+	}
+
+	// In a data fragment, so read from it.
+	// This math is overflow free since we verify that offset and numBytes can
+	// be safely added when creating the sparseFileReader.
+	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
+	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
 	if int64(len(b)) > bytesLeft {
-		b = b[0:bytesLeft]
+		b = b[:bytesLeft]
 	}
 
 	n, err = sfr.rfr.Read(b)
 	sfr.pos += int64(n)
-
-	if int64(n) == bytesLeft {
-		// We're done with this fragment
-		sfr.sp = sfr.sp[1:]
+	if err == io.EOF {
+		if sfr.pos < endPos {
+			err = io.ErrUnexpectedEOF // There was supposed to be more data
+		} else if sfr.pos < sfr.total {
+			err = nil // There is still an implicit sparse hole at the end
+		}
 	}
 
-	if err == io.EOF && sfr.pos < sfr.tot {
-		// We reached the end of the last fragment's data, but there's a final hole
-		err = nil
+	if sfr.pos == endPos {
+		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
 	}
-	return
+	return n, err
 }
 
 // numBytes returns the number of bytes left to read in the sparse file's
 // sparse-encoded data in the tar archive.
 func (sfr *sparseFileReader) numBytes() int64 {
-	return sfr.rfr.nb
+	return sfr.rfr.numBytes()
 }
diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go
index da01f26..4a6d1a9 100644
--- a/src/archive/tar/reader_test.go
+++ b/src/archive/tar/reader_test.go
@@ -10,6 +10,7 @@
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"strings"
@@ -18,9 +19,10 @@
 )
 
 type untarTest struct {
-	file    string
-	headers []*Header
-	cksums  []string
+	file    string    // Test input file
+	headers []*Header // Expected output headers
+	chksums []string  // MD5 checksum of files, leave as nil if not checked
+	err     error     // Expected error to occur
 }
 
 var gnuTarTest = &untarTest{
@@ -49,7 +51,7 @@
 			Gname:    "eng",
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"e38b27eaccb4391bdec553a7f3ae6b2f",
 		"c65bd2e50a56a2138bf1716f2fd56fe9",
 	},
@@ -129,7 +131,7 @@
 			Devminor: 0,
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
@@ -286,37 +288,101 @@
 			},
 		},
 	},
+	{
+		file: "testdata/neg-size.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue10968.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue11169.tar",
+		// TODO(dsnet): Currently the library does not detect that this file is
+		// malformed. Instead it incorrectly believes that file just ends.
+		// At least the library doesn't crash anymore.
+		// err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue12435.tar",
+		// TODO(dsnet): Currently the library does not detect that this file is
+		// malformed. Instead, it incorrectly believes that file just ends.
+		// At least the library doesn't crash anymore.
+		// err:  ErrHeader,
+	},
 }
 
 func TestReader(t *testing.T) {
-testLoop:
-	for i, test := range untarTests {
-		f, err := os.Open(test.file)
+	for i, v := range untarTests {
+		f, err := os.Open(v.file)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err)
 			continue
 		}
 		defer f.Close()
-		tr := NewReader(f)
-		for j, header := range test.headers {
-			hdr, err := tr.Next()
-			if err != nil || hdr == nil {
-				t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err)
-				f.Close()
-				continue testLoop
+
+		// Capture all headers and checksums.
+		var (
+			tr      = NewReader(f)
+			hdrs    []*Header
+			chksums []string
+			rdbuf   = make([]byte, 8)
+		)
+		for {
+			var hdr *Header
+			hdr, err = tr.Next()
+			if err != nil {
+				if err == io.EOF {
+					err = nil // Expected error
+				}
+				break
 			}
-			if !reflect.DeepEqual(*hdr, *header) {
-				t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v",
-					i, j, *hdr, *header)
+			hdrs = append(hdrs, hdr)
+
+			if v.chksums == nil {
+				continue
+			}
+			h := md5.New()
+			_, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read
+			if err != nil {
+				break
+			}
+			chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil)))
+		}
+
+		for j, hdr := range hdrs {
+			if j >= len(v.headers) {
+				t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v",
+					v.file, i, j, *hdr)
+				continue
+			}
+			if !reflect.DeepEqual(*hdr, *v.headers[j]) {
+				t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot  %+v\nwant %+v",
+					v.file, i, j, *hdr, *v.headers[j])
 			}
 		}
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			continue testLoop
+		if len(hdrs) != len(v.headers) {
+			t.Errorf("file %s, test %d: got %d headers, want %d headers",
+				v.file, i, len(hdrs), len(v.headers))
 		}
-		if hdr != nil || err != nil {
-			t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err)
+
+		for j, sum := range chksums {
+			if j >= len(v.chksums) {
+				t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s",
+					v.file, i, j, sum)
+				continue
+			}
+			if sum != v.chksums[j] {
+				t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s",
+					v.file, i, j, sum, v.chksums[j])
+			}
 		}
+
+		if err != v.err {
+			t.Errorf("file %s, test %d: unexpected error: got %v, want %v",
+				v.file, i, err, v.err)
+		}
+		f.Close()
 	}
 }
 
@@ -356,60 +422,6 @@
 	}
 }
 
-func TestIncrementalRead(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read file contents in little chunks EOF,
-		// checksumming all the way
-		h := md5.New()
-		rdbuf := make([]uint8, 8)
-		for {
-			nr, err := tr.Read(rdbuf)
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				t.Errorf("Read: unexpected error %v\n", err)
-				break
-			}
-			h.Write(rdbuf[0:nr])
-		}
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
 func TestNonSeekable(t *testing.T) {
 	test := gnuTarTest
 	f, err := os.Open(test.file)
@@ -514,187 +526,232 @@
 	}
 }
 
-func TestSparseEndToEnd(t *testing.T) {
-	test := sparseTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read and checksum the file data
-		h := md5.New()
-		_, err = io.Copy(h, tr)
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
-type sparseFileReadTest struct {
-	sparseData []byte
-	sparseMap  []sparseEntry
-	realSize   int64
-	expected   []byte
-}
-
-var sparseFileReadTests = []sparseFileReadTest{
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 8,
-		expected: []byte("ab\x00\x00\x00cde"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 10,
-		expected: []byte("ab\x00\x00\x00cde\x00\x00"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 8,
-		expected: []byte("\x00abc\x00\x00de"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 10,
-		expected: []byte("\x00abc\x00\x00de\x00\x00"),
-	},
-	{
-		sparseData: []byte(""),
-		sparseMap:  nil,
-		realSize:   2,
-		expected:   []byte("\x00\x00"),
-	},
-}
-
 func TestSparseFileReader(t *testing.T) {
-	for i, test := range sparseFileReadTests {
-		r := bytes.NewReader(test.sparseData)
-		nb := int64(r.Len())
-		sfr := &sparseFileReader{
-			rfr: &regFileReader{r: r, nb: nb},
-			sp:  test.sparseMap,
-			pos: 0,
-			tot: test.realSize,
-		}
-		if sfr.numBytes() != nb {
-			t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb)
-		}
-		buf, err := ioutil.ReadAll(sfr)
+	var vectors = []struct {
+		realSize   int64         // Real size of the output file
+		sparseMap  []sparseEntry // Input sparse map
+		sparseData string        // Input compact data
+		expected   string        // Expected output data
+		err        error         // Expected error outcome
+	}{{
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde\x00\x00",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize:   2,
+		sparseMap:  []sparseEntry{},
+		sparseData: "",
+		expected:   "\x00\x00",
+	}, {
+		realSize:  -2,
+		sparseMap: []sparseEntry{},
+		err:       ErrHeader,
+	}, {
+		realSize: -10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        io.ErrUnexpectedEOF,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: math.MaxInt64, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 2, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}}
+
+	for i, v := range vectors {
+		r := bytes.NewReader([]byte(v.sparseData))
+		rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
+
+		var sfr *sparseFileReader
+		var err error
+		var buf []byte
+
+		sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			goto fail
 		}
-		if e := test.expected; !bytes.Equal(buf, e) {
-			t.Errorf("test %d: Contents = %v, want %v", i, buf, e)
+		if sfr.numBytes() != int64(len(v.sparseData)) {
+			t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
+		}
+		buf, err = ioutil.ReadAll(sfr)
+		if err != nil {
+			goto fail
+		}
+		if string(buf) != v.expected {
+			t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
 		}
 		if sfr.numBytes() != 0 {
-			t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i)
+			t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
 		}
-	}
-}
 
-func TestSparseIncrementalRead(t *testing.T) {
-	sparseMap := []sparseEntry{{10, 2}}
-	sparseData := []byte("Go")
-	expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00"
-
-	r := bytes.NewReader(sparseData)
-	nb := int64(r.Len())
-	sfr := &sparseFileReader{
-		rfr: &regFileReader{r: r, nb: nb},
-		sp:  sparseMap,
-		pos: 0,
-		tot: int64(len(expected)),
-	}
-
-	// We'll read the data 6 bytes at a time, with a hole of size 10 at
-	// the beginning and one of size 8 at the end.
-	var outputBuf bytes.Buffer
-	buf := make([]byte, 6)
-	for {
-		n, err := sfr.Read(buf)
-		if err == io.EOF {
-			break
+	fail:
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
 		}
-		if err != nil {
-			t.Errorf("Read: unexpected error %v\n", err)
-		}
-		if n > 0 {
-			_, err := outputBuf.Write(buf[:n])
-			if err != nil {
-				t.Errorf("Write: unexpected error %v\n", err)
-			}
-		}
-	}
-	got := outputBuf.String()
-	if got != expected {
-		t.Errorf("Contents = %v, want %v", got, expected)
 	}
 }
 
 func TestReadGNUSparseMap0x1(t *testing.T) {
-	headers := map[string]string{
-		paxGNUSparseNumBlocks: "4",
-		paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
-	}
-	expected := []sparseEntry{
-		{offset: 0, numBytes: 5},
-		{offset: 10, numBytes: 5},
-		{offset: 20, numBytes: 5},
-		{offset: 30, numBytes: 5},
-	}
+	const (
+		maxUint = ^uint(0)
+		maxInt  = int(maxUint >> 1)
+	)
+	var (
+		big1 = fmt.Sprintf("%d", int64(maxInt))
+		big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1)
+		big3 = fmt.Sprintf("%d", (int64(maxInt) / 3))
+	)
 
-	sp, err := readGNUSparseMap0x1(headers)
-	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
-	}
-	if !reflect.DeepEqual(sp, expected) {
-		t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected)
+	var vectors = []struct {
+		extHdrs   map[string]string // Input data
+		sparseMap []sparseEntry     // Expected sparse entries to be outputted
+		err       error             // Expected errors that may be raised
+	}{{
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big1,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big2,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big3,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0.5,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5.5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,fewafewa.5,fewafw,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
+	}}
+
+	for i, v := range vectors {
+		sp, err := readGNUSparseMap0x1(v.extHdrs)
+		if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
+			t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap)
+		}
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
+		}
 	}
 }
 
@@ -746,53 +803,3 @@
 	}
 
 }
-
-// Negative header size should not cause panic.
-// Issues 10959 and 10960.
-func TestNegativeHdrSize(t *testing.T) {
-	f, err := os.Open("testdata/neg-size.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err != ErrHeader {
-		t.Error("want ErrHeader, got", err)
-	}
-	io.Copy(ioutil.Discard, r)
-}
-
-// This used to hang in (*sparseFileReader).readHole due to missing
-// verification of sparse offsets against file size.
-func TestIssue10968(t *testing.T) {
-	f, err := os.Open("testdata/issue10968.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err != nil {
-		t.Fatal(err)
-	}
-	_, err = io.Copy(ioutil.Discard, r)
-	if err != io.ErrUnexpectedEOF {
-		t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
-	}
-}
-
-// Do not panic if there are errors in header blocks after the pax header.
-// Issue 11169
-func TestIssue11169(t *testing.T) {
-	f, err := os.Open("testdata/issue11169.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err == nil {
-		t.Fatal("Unexpected success")
-	}
-}
diff --git a/src/archive/tar/testdata/issue12435.tar b/src/archive/tar/testdata/issue12435.tar
new file mode 100644
index 0000000..3542dd8
--- /dev/null
+++ b/src/archive/tar/testdata/issue12435.tar
Binary files differ
diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go
index 9dbc01a..3547c17 100644
--- a/src/archive/tar/writer.go
+++ b/src/archive/tar/writer.go
@@ -23,7 +23,6 @@
 	ErrWriteTooLong    = errors.New("archive/tar: write too long")
 	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
 	ErrWriteAfterClose = errors.New("archive/tar: write after close")
-	errNameTooLong     = errors.New("archive/tar: name too long")
 	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
 )
 
@@ -215,26 +214,14 @@
 	_, paxPathUsed := paxHeaders[paxPath]
 	// try to use a ustar header when only the name is too long
 	if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
-		suffix := hdr.Name
-		prefix := ""
-		if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
-			var err error
-			prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
-			if err == nil {
-				// ok we can use a ustar long name instead of pax, now correct the fields
+		prefix, suffix, ok := splitUSTARPath(hdr.Name)
+		if ok {
+			// Since we can encode in USTAR format, disable PAX header.
+			delete(paxHeaders, paxPath)
 
-				// remove the path field from the pax header. this will suppress the pax header
-				delete(paxHeaders, paxPath)
-
-				// update the path fields
-				tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
-				tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
-
-				// Use the ustar magic if we used ustar long names.
-				if len(prefix) > 0 && !tw.usedBinary {
-					copy(header[257:265], []byte("ustar\x00"))
-				}
-			}
+			// Update the path fields
+			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
+			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
 		}
 	}
 
@@ -270,28 +257,25 @@
 	return tw.err
 }
 
-// writeUSTARLongName splits a USTAR long name hdr.Name.
-// name must be < 256 characters. errNameTooLong is returned
-// if hdr.Name can't be split. The splitting heuristic
-// is compatible with gnu tar.
-func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
+// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
+// If the path is not splittable, then it will return ("", "", false).
+func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
 	length := len(name)
-	if length > fileNamePrefixSize+1 {
+	if length <= fileNameSize || !isASCII(name) {
+		return "", "", false
+	} else if length > fileNamePrefixSize+1 {
 		length = fileNamePrefixSize + 1
 	} else if name[length-1] == '/' {
 		length--
 	}
+
 	i := strings.LastIndex(name[:length], "/")
-	// nlen contains the resulting length in the name field.
-	// plen contains the resulting length in the prefix field.
-	nlen := len(name) - i - 1
-	plen := i
+	nlen := len(name) - i - 1 // nlen is length of suffix
+	plen := i                 // plen is length of prefix
 	if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
-		err = errNameTooLong
-		return
+		return "", "", false
 	}
-	prefix, suffix = name[:i], name[i+1:]
-	return
+	return name[:i], name[i+1:], true
 }
 
 // writePaxHeader writes an extended pax header to the
diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go
index fe46a67..caf40a8 100644
--- a/src/archive/tar/writer_test.go
+++ b/src/archive/tar/writer_test.go
@@ -544,3 +544,37 @@
 		t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
 	}
 }
+
+func TestSplitUSTARPath(t *testing.T) {
+	var sr = strings.Repeat
+
+	var vectors = []struct {
+		input  string // Input path
+		prefix string // Expected output prefix
+		suffix string // Expected output suffix
+		ok     bool   // Split success?
+	}{
+		{"", "", "", false},
+		{"abc", "", "", false},
+		{"用戶名", "", "", false},
+		{sr("a", fileNameSize), "", "", false},
+		{sr("a", fileNameSize) + "/", "", "", false},
+		{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
+		{sr("a", fileNamePrefixSize) + "/", "", "", false},
+		{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
+		{sr("a", fileNameSize+1), "", "", false},
+		{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
+		{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
+			sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
+		{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
+		{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
+	}
+
+	for _, v := range vectors {
+		prefix, suffix, ok := splitUSTARPath(v.input)
+		if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
+			t.Errorf("splitUSTARPath(%q):\ngot  (%q, %q, %v)\nwant (%q, %q, %v)",
+				v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
+		}
+	}
+}
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index 519748ba..0f70860 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -376,14 +376,16 @@
 	}
 	d.comment = string(b[:l])
 
-	p, err := findDirectory64End(r, directoryEndOffset)
-	if err == nil && p >= 0 {
-		err = readDirectory64End(r, p, d)
+	// These values mean that the file can be a zip64 file
+	if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
+		p, err := findDirectory64End(r, directoryEndOffset)
+		if err == nil && p >= 0 {
+			err = readDirectory64End(r, p, d)
+		}
+		if err != nil {
+			return nil, err
+		}
 	}
-	if err != nil {
-		return nil, err
-	}
-
 	// Make sure directoryOffset points to somewhere in our file.
 	if o := int64(d.directoryOffset); o < 0 || o >= size {
 		return nil, ErrFormat
@@ -407,8 +409,13 @@
 	if sig := b.uint32(); sig != directory64LocSignature {
 		return -1, nil
 	}
-	b = b[4:]       // skip number of the disk with the start of the zip64 end of central directory
-	p := b.uint64() // relative offset of the zip64 end of central directory record
+	if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
+		return -1, nil // the file is not a valid zip64-file
+	}
+	p := b.uint64()      // relative offset of the zip64 end of central directory record
+	if b.uint32() != 1 { // total number of disks
+		return -1, nil // the file is not a valid zip64-file
+	}
 	return int64(p), nil
 }
 
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index 547dd39..8f7e8bf 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -605,3 +605,40 @@
 	}
 	r.Close()
 }
+
+// Verify we do not treat non-zip64 archives as zip64
+func TestIssue12449(t *testing.T) {
+	data := []byte{
+		0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00,
+		0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x03, 0x00, 0x18, 0x00, 0xca, 0x64,
+		0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05,
+		0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01,
+		0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00,
+		0x00, 0x31, 0x31, 0x31, 0x32, 0x32, 0x32, 0x0a,
+		0x50, 0x4b, 0x07, 0x08, 0x1d, 0x88, 0x77, 0xb0,
+		0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+		0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x14, 0x00,
+		0x08, 0x00, 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46,
+		0x1d, 0x88, 0x77, 0xb0, 0x07, 0x00, 0x00, 0x00,
+		0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x18, 0x00,
+		0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0xa0, 0x81, 0x00, 0x00, 0x00, 0x00, 0xca, 0x64,
+		0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05,
+		0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01,
+		0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00,
+		0x00, 0x97, 0x2b, 0x49, 0x23, 0x05, 0xc5, 0x0b,
+		0xa7, 0xd1, 0x52, 0xa2, 0x9c, 0x50, 0x4b, 0x06,
+		0x07, 0xc8, 0x19, 0xc1, 0xaf, 0x94, 0x9c, 0x61,
+		0x44, 0xbe, 0x94, 0x19, 0x42, 0x58, 0x12, 0xc6,
+		0x5b, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00,
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x69, 0x00, 0x00,
+		0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
+	}
+	// Read in the archive.
+	_, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data)))
+	if err != nil {
+		t.Errorf("Error reading the archive: %v", err)
+	}
+}
diff --git a/src/bufio/example_test.go b/src/bufio/example_test.go
index 3da9141..4666e6d 100644
--- a/src/bufio/example_test.go
+++ b/src/bufio/example_test.go
@@ -80,3 +80,32 @@
 	// 5678
 	// Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range
 }
+
+// Use a Scanner with a custom split function to parse a comma-separated
+// list with an empty final value.
+func ExampleScanner_emptyFinalToken() {
+	// Comma-separated list; last entry is empty.
+	const input = "1,2,3,4,"
+	scanner := bufio.NewScanner(strings.NewReader(input))
+	// Define a split function that separates on commas.
+	onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+		for i := 0; i < len(data); i++ {
+			if data[i] == ',' {
+				return i + 1, data[:i], nil
+			}
+		}
+		// There is one final token to be delivered, which may be the empty string.
+		// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
+		// but does not trigger an error to be returned from Scan itself.
+		return 0, data, bufio.ErrFinalToken
+	}
+	scanner.Split(onComma)
+	// Scan.
+	for scanner.Scan() {
+		fmt.Printf("%q ", scanner.Text())
+	}
+	if err := scanner.Err(); err != nil {
+		fmt.Fprintln(os.Stderr, "reading input:", err)
+	}
+	// Output: "1" "2" "3" "4" ""
+}
diff --git a/src/bufio/scan.go b/src/bufio/scan.go
index 7a349fa..27a0f00 100644
--- a/src/bufio/scan.go
+++ b/src/bufio/scan.go
@@ -37,6 +37,8 @@
 	end          int       // End of data in buf.
 	err          error     // Sticky error.
 	empties      int       // Count of successive empty tokens.
+	scanCalled   bool      // Scan has been called; buffer is in use.
+	done         bool      // Scan has finished.
 }
 
 // SplitFunc is the signature of the split function used to tokenize the
@@ -65,10 +67,13 @@
 )
 
 const (
-	// MaxScanTokenSize is the maximum size used to buffer a token.
+	// MaxScanTokenSize is the maximum size used to buffer a token
+	// unless the user provides an explicit buffer with Scan.Buffer.
 	// The actual maximum token size may be smaller as the buffer
 	// may need to include, for instance, a newline.
 	MaxScanTokenSize = 64 * 1024
+
+	startBufSize = 4096 // Size of initial allocation for buffer.
 )
 
 // NewScanner returns a new Scanner to read from r.
@@ -78,7 +83,6 @@
 		r:            r,
 		split:        ScanLines,
 		maxTokenSize: MaxScanTokenSize,
-		buf:          make([]byte, 4096), // Plausible starting size; needn't be large.
 	}
 }
 
@@ -103,6 +107,16 @@
 	return string(s.token)
 }
 
+// ErrFinalToken is a special sentinel error value. It is intended to be
+// returned by a Split function to indicate that the token being delivered
+// with the error is the last token and scanning should stop after this one.
+// After ErrFinalToken is received by Scan, scanning stops with no error.
+// The value is useful to stop processing early or when it is necessary to
+// deliver a final empty token. One could achieve the same behavior
+// with a custom error value but providing one here is tidier.
+// See the emptyFinalToken example for a use of this value.
+var ErrFinalToken = errors.New("final token")
+
 // Scan advances the Scanner to the next token, which will then be
 // available through the Bytes or Text method. It returns false when the
 // scan stops, either by reaching the end of the input or an error.
@@ -112,6 +126,10 @@
 // Scan panics if the split function returns 100 empty tokens without
 // advancing the input. This is a common error mode for scanners.
 func (s *Scanner) Scan() bool {
+	if s.done {
+		return false
+	}
+	s.scanCalled = true
 	// Loop until we have a token.
 	for {
 		// See if we can get a token with what we already have.
@@ -120,6 +138,11 @@
 		if s.end > s.start || s.err != nil {
 			advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
 			if err != nil {
+				if err == ErrFinalToken {
+					s.token = token
+					s.done = true
+					return true
+				}
 				s.setErr(err)
 				return false
 			}
@@ -158,11 +181,16 @@
 		}
 		// Is the buffer full? If so, resize.
 		if s.end == len(s.buf) {
-			if len(s.buf) >= s.maxTokenSize {
+			// Guarantee no overflow in the multiplication below.
+			const maxInt = int(^uint(0) >> 1)
+			if len(s.buf) >= s.maxTokenSize || len(s.buf) > maxInt/2 {
 				s.setErr(ErrTooLong)
 				return false
 			}
 			newSize := len(s.buf) * 2
+			if newSize == 0 {
+				newSize = startBufSize
+			}
 			if newSize > s.maxTokenSize {
 				newSize = s.maxTokenSize
 			}
@@ -217,9 +245,31 @@
 	}
 }
 
-// Split sets the split function for the Scanner. If called, it must be
-// called before Scan. The default split function is ScanLines.
+// Buffer sets the initial buffer to use when scanning and the maximum
+// size of buffer that may be allocated during scanning. The maximum
+// token size is the larger of max and cap(buf). If max <= cap(buf),
+// Scan will use this buffer only and do no allocation.
+//
+// By default, Scan uses an internal buffer and sets the
+// maximum token size to MaxScanTokenSize.
+//
+// Buffer panics if it is called after scanning has started.
+func (s *Scanner) Buffer(buf []byte, max int) {
+	if s.scanCalled {
+		panic("Buffer called after Scan")
+	}
+	s.buf = buf[0:cap(buf)]
+	s.maxTokenSize = max
+}
+
+// Split sets the split function for the Scanner.
+// The default split function is ScanLines.
+//
+// Split panics if it is called after scanning has started.
 func (s *Scanner) Split(split SplitFunc) {
+	if s.scanCalled {
+		panic("Split called after Scan")
+	}
 	s.split = split
 }
 
diff --git a/src/bufio/scan_test.go b/src/bufio/scan_test.go
index eea87cb..07b1a56 100644
--- a/src/bufio/scan_test.go
+++ b/src/bufio/scan_test.go
@@ -429,33 +429,37 @@
 			return i + 1, data[:i], nil
 		}
 	}
-	if !atEOF {
-		return 0, nil, nil
-	}
-	return 0, data, nil
+	return 0, data, ErrFinalToken
 }
 
-func TestEmptyTokens(t *testing.T) {
-	s := NewScanner(strings.NewReader("1,2,3,"))
-	values := []string{"1", "2", "3", ""}
+func testEmptyTokens(t *testing.T, text string, values []string) {
+	s := NewScanner(strings.NewReader(text))
 	s.Split(commaSplit)
 	var i int
-	for i = 0; i < len(values); i++ {
-		if !s.Scan() {
-			break
+	for i = 0; s.Scan(); i++ {
+		if i >= len(values) {
+			t.Fatalf("got %d fields, expected %d", i+1, len(values))
 		}
 		if s.Text() != values[i] {
 			t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
 		}
 	}
 	if i != len(values) {
-		t.Errorf("got %d fields, expected %d", i, len(values))
+		t.Fatalf("got %d fields, expected %d", i, len(values))
 	}
 	if err := s.Err(); err != nil {
 		t.Fatal(err)
 	}
 }
 
+func TestEmptyTokens(t *testing.T) {
+	testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
+}
+
+func TestWithNoEmptyTokens(t *testing.T) {
+	testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
+}
+
 func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
 	if len(data) > 0 {
 		return 1, data[:1], nil
@@ -522,3 +526,19 @@
 		t.Fatalf("stopped with %d left to process", c)
 	}
 }
+
+// Make sure we can read a huge token if a big enough buffer is provided.
+func TestHugeBuffer(t *testing.T) {
+	text := strings.Repeat("x", 2*MaxScanTokenSize)
+	s := NewScanner(strings.NewReader(text + "\n"))
+	s.Buffer(make([]byte, 100), 3*MaxScanTokenSize)
+	for s.Scan() {
+		token := s.Text()
+		if token != text {
+			t.Errorf("scan got incorrect token of length %d", len(token))
+		}
+	}
+	if s.Err() != nil {
+		t.Fatal("after scan:", s.Err())
+	}
+}
diff --git a/src/cmd/api/goapi.go b/src/cmd/api/goapi.go
index 01b6def..5d1cf05 100644
--- a/src/cmd/api/goapi.go
+++ b/src/cmd/api/goapi.go
@@ -428,10 +428,15 @@
 	}
 	w.imported[name] = &importing
 
+	root := w.root
+	if strings.HasPrefix(name, "golang.org/x/") {
+		root = filepath.Join(root, "vendor")
+	}
+
 	// Determine package files.
-	dir := filepath.Join(w.root, filepath.FromSlash(name))
+	dir := filepath.Join(root, filepath.FromSlash(name))
 	if fi, err := os.Stat(dir); err != nil || !fi.IsDir() {
-		log.Fatalf("no source in tree for package %q", pkg)
+		log.Fatalf("no source in tree for import %q: %v", name, err)
 	}
 
 	context := w.context
diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go
index e6901eb..04622e6 100644
--- a/src/cmd/asm/internal/arch/arch.go
+++ b/src/cmd/asm/internal/arch/arch.go
@@ -252,7 +252,9 @@
 	register["EQ"] = arm64.COND_EQ
 	register["NE"] = arm64.COND_NE
 	register["HS"] = arm64.COND_HS
+	register["CS"] = arm64.COND_HS
 	register["LO"] = arm64.COND_LO
+	register["CC"] = arm64.COND_LO
 	register["MI"] = arm64.COND_MI
 	register["PL"] = arm64.COND_PL
 	register["VS"] = arm64.COND_VS
diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go
index e098961..6128ca8 100644
--- a/src/cmd/asm/internal/asm/asm.go
+++ b/src/cmd/asm/internal/asm/asm.go
@@ -27,15 +27,18 @@
 		case '5':
 			if !arch.ARMConditionCodes(prog, cond) {
 				p.errorf("unrecognized condition code .%q", cond)
+				return
 			}
 
 		case '7':
 			if !arch.ARM64Suffix(prog, cond) {
 				p.errorf("unrecognized suffix .%q", cond)
+				return
 			}
 
 		default:
 			p.errorf("unrecognized suffix .%q", cond)
+			return
 		}
 	}
 	if p.firstProg == nil {
@@ -49,6 +52,7 @@
 		for _, label := range p.pendingLabels {
 			if p.labels[label] != nil {
 				p.errorf("label %q multiply defined", label)
+				return
 			}
 			p.labels[label] = prog
 		}
@@ -63,14 +67,17 @@
 	}
 }
 
-// validateSymbol checks that addr represents a valid name for a pseudo-op.
-func (p *Parser) validateSymbol(pseudo string, addr *obj.Addr, offsetOk bool) {
+// validSymbol checks that addr represents a valid name for a pseudo-op.
+func (p *Parser) validSymbol(pseudo string, addr *obj.Addr, offsetOk bool) bool {
 	if addr.Name != obj.NAME_EXTERN && addr.Name != obj.NAME_STATIC || addr.Scale != 0 || addr.Reg != 0 {
 		p.errorf("%s symbol %q must be a symbol(SB)", pseudo, symbolName(addr))
+		return false
 	}
 	if !offsetOk && addr.Offset != 0 {
 		p.errorf("%s symbol %q must not be offset from SB", pseudo, symbolName(addr))
+		return false
 	}
+	return true
 }
 
 // evalInteger evaluates an integer constant for a pseudo-op.
@@ -79,11 +86,13 @@
 	return p.getConstantPseudo(pseudo, &addr)
 }
 
-// validateImmediate checks that addr represents an immediate constant.
-func (p *Parser) validateImmediate(pseudo string, addr *obj.Addr) {
+// validImmediate checks that addr represents an immediate constant.
+func (p *Parser) validImmediate(pseudo string, addr *obj.Addr) bool {
 	if addr.Type != obj.TYPE_CONST || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
 		p.errorf("%s: expected immediate constant; found %s", pseudo, obj.Dconv(&emptyProg, addr))
+		return false
 	}
+	return true
 }
 
 // asmText assembles a TEXT pseudo-op.
@@ -102,7 +111,9 @@
 	// Operand 0 is the symbol name in the form foo(SB).
 	// That means symbol plus indirect on SB and no offset.
 	nameAddr := p.address(operands[0])
-	p.validateSymbol("TEXT", &nameAddr, false)
+	if !p.validSymbol("TEXT", &nameAddr, false) {
+		return
+	}
 	name := symbolName(&nameAddr)
 	next := 1
 
@@ -144,6 +155,7 @@
 		// There is an argument size. It must be a minus sign followed by a non-negative integer literal.
 		if len(op) != 2 || op[0].ScanToken != '-' || op[1].ScanToken != scanner.Int {
 			p.errorf("TEXT %s: argument size must be of form -integer", name)
+			return
 		}
 		argSize = p.positiveAtoi(op[1].String())
 	}
@@ -185,7 +197,9 @@
 	scale := p.parseScale(op[n-1].String())
 	op = op[:n-2]
 	nameAddr := p.address(op)
-	p.validateSymbol("DATA", &nameAddr, true)
+	if !p.validSymbol("DATA", &nameAddr, true) {
+		return
+	}
 	name := symbolName(&nameAddr)
 
 	// Operand 1 is an immediate constant or address.
@@ -195,11 +209,13 @@
 		// OK
 	default:
 		p.errorf("DATA value must be an immediate constant or address")
+		return
 	}
 
 	// The addresses must not overlap. Easiest test: require monotonicity.
 	if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr {
 		p.errorf("overlapping DATA entry for %s", name)
+		return
 	}
 	p.dataAddr[name] = nameAddr.Offset + int64(scale)
 
@@ -228,7 +244,9 @@
 
 	// Operand 0 has the general form foo<>+0x04(SB).
 	nameAddr := p.address(operands[0])
-	p.validateSymbol("GLOBL", &nameAddr, false)
+	if !p.validSymbol("GLOBL", &nameAddr, false) {
+		return
+	}
 	next := 1
 
 	// Next operand is the optional flag, a literal integer.
@@ -240,7 +258,9 @@
 
 	// Final operand is an immediate constant.
 	addr := p.address(operands[next])
-	p.validateImmediate("GLOBL", &addr)
+	if !p.validImmediate("GLOBL", &addr) {
+		return
+	}
 
 	// log.Printf("GLOBL %s %d, $%d", name, flag, size)
 	prog := &obj.Prog{
@@ -266,11 +286,15 @@
 
 	// Operand 0 must be an immediate constant.
 	key := p.address(operands[0])
-	p.validateImmediate("PCDATA", &key)
+	if !p.validImmediate("PCDATA", &key) {
+		return
+	}
 
 	// Operand 1 must be an immediate constant.
 	value := p.address(operands[1])
-	p.validateImmediate("PCDATA", &value)
+	if !p.validImmediate("PCDATA", &value) {
+		return
+	}
 
 	// log.Printf("PCDATA $%d, $%d", key.Offset, value.Offset)
 	prog := &obj.Prog{
@@ -293,11 +317,15 @@
 
 	// Operand 0 must be an immediate constant.
 	valueAddr := p.address(operands[0])
-	p.validateImmediate("FUNCDATA", &valueAddr)
+	if !p.validImmediate("FUNCDATA", &valueAddr) {
+		return
+	}
 
 	// Operand 1 is a symbol name in the form foo(SB).
 	nameAddr := p.address(operands[1])
-	p.validateSymbol("FUNCDATA", &nameAddr, true)
+	if !p.validSymbol("FUNCDATA", &nameAddr, true) {
+		return
+	}
 
 	prog := &obj.Prog{
 		Ctxt:   p.ctxt,
@@ -340,6 +368,7 @@
 			reg, ok := p.arch.RegisterNumber("R", int16(reg))
 			if !ok {
 				p.errorf("bad register number %d", reg)
+				return
 			}
 			prog.Reg = reg
 			break
@@ -390,6 +419,7 @@
 		prog.To = a[0]
 	default:
 		p.errorf("cannot assemble jump %+v", target)
+		return
 	}
 
 	p.append(prog, cond, true)
@@ -400,9 +430,9 @@
 		targetProg := p.labels[patch.label]
 		if targetProg == nil {
 			p.errorf("undefined label %s", patch.label)
-		} else {
-			p.branch(patch.prog, targetProg)
+			return
 		}
+		p.branch(patch.prog, targetProg)
 	}
 	p.toPatch = p.toPatch[:0]
 }
@@ -468,6 +498,7 @@
 					break
 				}
 				p.errorf("unrecognized addressing for %s", obj.Aconv(op))
+				return
 			}
 			if arch.IsARMFloatCmp(op) {
 				prog.From = a[0]
@@ -506,6 +537,7 @@
 				prog.To = a[1]
 				if a[2].Type != obj.TYPE_REG {
 					p.errorf("invalid addressing modes for third operand to %s instruction, must be register", obj.Aconv(op))
+					return
 				}
 				prog.RegTo2 = a[2].Reg
 				break
@@ -541,9 +573,11 @@
 				prog.To = a[2]
 			default:
 				p.errorf("invalid addressing modes for %s instruction", obj.Aconv(op))
+				return
 			}
 		default:
 			p.errorf("TODO: implement three-operand instructions for this architecture")
+			return
 		}
 	case 4:
 		if p.arch.Thechar == '5' && arch.IsARMMULA(op) {
@@ -577,6 +611,7 @@
 			break
 		}
 		p.errorf("can't handle %s instruction with 4 operands", obj.Aconv(op))
+		return
 	case 5:
 		if p.arch.Thechar == '9' && arch.IsPPC64RLD(op) {
 			// Always reg, reg, con, con, reg.  (con, con is a 'mask').
@@ -598,6 +633,7 @@
 			break
 		}
 		p.errorf("can't handle %s instruction with 5 operands", obj.Aconv(op))
+		return
 	case 6:
 		if p.arch.Thechar == '5' && arch.IsARMMRC(op) {
 			// Strange special case: MCR, MRC.
@@ -621,6 +657,7 @@
 		fallthrough
 	default:
 		p.errorf("can't handle %s instruction with %d operands", obj.Aconv(op), len(a))
+		return
 	}
 
 	p.append(prog, cond, true)
diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go
index d196574..0f8271b5 100644
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -293,6 +293,7 @@
 	{"[R0,R1,g,R15", ""}, // Issue 11764 - asm hung parsing ']' missing register lists.
 	{"[):[o-FP", ""},     // Issue 12469 - there was no infinite loop for ARM; these are just sanity checks.
 	{"[):[R0-FP", ""},
+	{"(", ""}, // Issue 12466 - backed up before beginning of line.
 }
 
 var ppc64OperandTests = []operandTest{
diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go
index 7d81d4b..9a42838 100644
--- a/src/cmd/asm/internal/asm/parse.go
+++ b/src/cmd/asm/internal/asm/parse.go
@@ -38,6 +38,7 @@
 	firstProg     *obj.Prog
 	lastProg      *obj.Prog
 	dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
+	isJump        bool             // Instruction being assembled is a jump.
 	errorWriter   io.Writer
 }
 
@@ -155,6 +156,7 @@
 					// Remember this location so we can swap the operands below.
 					if colon >= 0 {
 						p.errorf("invalid ':' in operand")
+						return true
 					}
 					colon = len(operands)
 				}
@@ -196,15 +198,15 @@
 
 func (p *Parser) instruction(op int, word, cond string, operands [][]lex.Token) {
 	p.addr = p.addr[0:0]
-	isJump := p.arch.IsJump(word)
+	p.isJump = p.arch.IsJump(word)
 	for _, op := range operands {
 		addr := p.address(op)
-		if !isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
+		if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
 			p.errorf("illegal use of pseudo-register in %s", word)
 		}
 		p.addr = append(p.addr, addr)
 	}
-	if isJump {
+	if p.isJump {
 		p.asmJump(op, cond, p.addr)
 		return
 	}
@@ -338,8 +340,13 @@
 	case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~':
 		haveConstant = true
 	case '(':
-		// Could be parenthesized expression or (R).
-		rname := p.next().String()
+		// Could be parenthesized expression or (R). Must be something, though.
+		tok := p.next()
+		if tok.ScanToken == scanner.EOF {
+			p.errorf("missing right parenthesis")
+			return false
+		}
+		rname := tok.String()
 		p.back()
 		haveConstant = !p.atStartOfRegister(rname)
 		if !haveConstant {
@@ -361,6 +368,7 @@
 		if p.have(scanner.String) {
 			if prefix != '$' {
 				p.errorf("string constant must be an immediate")
+				return false
 			}
 			str, err := strconv.Unquote(p.get(scanner.String).String())
 			if err != nil {
@@ -568,12 +576,14 @@
 	}
 	a.Sym = obj.Linklookup(p.ctxt, name, isStatic)
 	if p.peek() == scanner.EOF {
-		if prefix != 0 {
-			p.errorf("illegal addressing mode for symbol %s", name)
+		if prefix == 0 && p.isJump {
+			// Symbols without prefix or suffix are jump labels.
+			return
 		}
+		p.errorf("illegal or missing addressing mode for symbol %s", name)
 		return
 	}
-	// Expect (SB) or (FP), (PC), (SB), or (SP)
+	// Expect (SB), (FP), (PC), or (SP)
 	p.get('(')
 	reg := p.get(scanner.Ident).String()
 	p.get(')')
@@ -952,7 +962,11 @@
 }
 
 func (p *Parser) back() {
-	p.inputPos--
+	if p.inputPos == 0 {
+		p.errorf("internal error: backing up before BOL")
+	} else {
+		p.inputPos--
+	}
 }
 
 func (p *Parser) peek() lex.ScanToken {
diff --git a/src/cmd/asm/internal/asm/pseudo_test.go b/src/cmd/asm/internal/asm/pseudo_test.go
index df1adc5..ee13b72 100644
--- a/src/cmd/asm/internal/asm/pseudo_test.go
+++ b/src/cmd/asm/internal/asm/pseudo_test.go
@@ -35,6 +35,8 @@
 		{"TEXT", "%", "expect two or three operands for TEXT"},
 		{"TEXT", "1, 1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
 		{"TEXT", "$\"foo\", 0, $1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
+		{"TEXT", "$0É:0, 0, $1", "expected EOF, found É"},   // Issue #12467.
+		{"TEXT", "$:0:(SB, 0, $1", "expected '(', found 0"}, // Issue 12468.
 		{"FUNCDATA", "", "expect two operands for FUNCDATA"},
 		{"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"},
 		{"DATA", "", "expect two operands for DATA"},
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.out b/src/cmd/asm/internal/asm/testdata/arm64.out
index 0b7b430..37944bc 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.out
+++ b/src/cmd/asm/internal/asm/testdata/arm64.out
@@ -37,7 +37,7 @@
 147 00037 (testdata/arm64.s:147)	CSEL	LT, R1, R2, ZR
 148 00038 (testdata/arm64.s:148)	CSINC	GT, R1, ZR, R3
 149 00039 (testdata/arm64.s:149)	CSNEG	MI, R1, R2, R3
-150 00040 (testdata/arm64.s:150)	CSINV	0, R1, R2, R3
+150 00040 (testdata/arm64.s:150)	CSINV	HS, R1, R2, R3
 156 00041 (testdata/arm64.s:156)	CSEL	LT, R1, R2
 164 00042 (testdata/arm64.s:164)	CCMN	MI, ZR, R1, $4
 173 00043 (testdata/arm64.s:173)	FADDD	$(0.5), F1
diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go
index cd91680..e5a3301 100644
--- a/src/cmd/asm/internal/lex/input.go
+++ b/src/cmd/asm/internal/lex/input.go
@@ -63,7 +63,12 @@
 	return macros
 }
 
+var panicOnError bool // For testing.
+
 func (in *Input) Error(args ...interface{}) {
+	if panicOnError {
+		panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
+	}
 	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
 	os.Exit(1)
 }
@@ -113,6 +118,10 @@
 			}
 			fallthrough
 		default:
+			if tok == scanner.EOF && len(in.ifdefStack) > 0 {
+				// We're skipping text but have run out of input with no #endif.
+				in.Error("unclosed #ifdef or #ifndef")
+			}
 			in.beginningOfLine = tok == '\n'
 			if in.enabled() {
 				in.text = in.Stack.Text()
@@ -251,6 +260,9 @@
 	var tokens []Token
 	// Scan to newline. Backslashes escape newlines.
 	for tok != '\n' {
+		if tok == scanner.EOF {
+			in.Error("missing newline in macro definition for %q\n", name)
+		}
 		if tok == '\\' {
 			tok = in.Stack.Next()
 			if tok != '\n' && tok != '\\' {
diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go
index 32cc13e..f606ffe 100644
--- a/src/cmd/asm/internal/lex/lex_test.go
+++ b/src/cmd/asm/internal/lex/lex_test.go
@@ -226,6 +226,35 @@
 		),
 		"C.\n",
 	},
+	{
+		"nested #define",
+		lines(
+			"#define A #define B THIS",
+			"A",
+			"B",
+		),
+		"THIS.\n",
+	},
+	{
+		"nested #define with args",
+		lines(
+			"#define A #define B(x) x",
+			"A",
+			"B(THIS)",
+		),
+		"THIS.\n",
+	},
+	/* This one fails. See comment in Slice.Col.
+	{
+		"nested #define with args",
+		lines(
+			"#define A #define B (x) x",
+			"A",
+			"B(THIS)",
+		),
+		"x.\n",
+	},
+	*/
 }
 
 func TestLex(t *testing.T) {
@@ -258,3 +287,76 @@
 		buf.WriteString(input.Text())
 	}
 }
+
+type badLexTest struct {
+	input string
+	error string
+}
+
+var badLexTests = []badLexTest{
+	{
+		"3 #define foo bar\n",
+		"'#' must be first item on line",
+	},
+	{
+		"#ifdef foo\nhello",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#ifndef foo\nhello",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#ifdef foo\nhello\n#else\nbye",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#define A() A()\nA()",
+		"recursive macro invocation",
+	},
+	{
+		"#define A a\n#define A a\n",
+		"redefinition of macro",
+	},
+	{
+		"#define A a",
+		"no newline after macro definition",
+	},
+}
+
+func TestBadLex(t *testing.T) {
+	for _, test := range badLexTests {
+		input := NewInput(test.error)
+		input.Push(NewTokenizer(test.error, strings.NewReader(test.input), nil))
+		err := firstError(input)
+		if err == nil {
+			t.Errorf("%s: got no error", test.error)
+			continue
+		}
+		if !strings.Contains(err.Error(), test.error) {
+			t.Errorf("got error %q expected %q", err.Error(), test.error)
+		}
+	}
+}
+
+// firstError returns the first error value triggered by the input.
+func firstError(input *Input) (err error) {
+	panicOnError = true
+	defer func() {
+		panicOnError = false
+		switch e := recover(); e := e.(type) {
+		case nil:
+		case error:
+			err = e
+		default:
+			panic(e)
+		}
+	}()
+
+	for {
+		tok := input.Next()
+		if tok == scanner.EOF {
+			return
+		}
+	}
+}
diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go
index e94106b..b0d5429 100644
--- a/src/cmd/asm/internal/lex/slice.go
+++ b/src/cmd/asm/internal/lex/slice.go
@@ -44,8 +44,16 @@
 }
 
 func (s *Slice) Col() int {
-	// Col is only called when defining a macro, which can't reach here.
-	panic("cannot happen: slice col")
+	// TODO: Col is only called when defining a macro and all it cares about is increasing
+	// position to discover whether there is a blank before the parenthesis.
+	// We only get here if defining a macro inside a macro.
+	// This imperfect implementation means we cannot tell the difference between
+	//	#define A #define B(x) x
+	// and
+	//	#define A #define B (x) x
+	// The first has definition of B has an argument, the second doesn't. Because we let
+	// text/scanner strip the blanks for us, this is extremely rare, hard to fix, and not worth it.
+	return s.pos
 }
 
 func (s *Slice) SetPos(line int, file string) {
diff --git a/src/cmd/cgo/doc.go b/src/cmd/cgo/doc.go
index b2a5428..f82d782 100644
--- a/src/cmd/cgo/doc.go
+++ b/src/cmd/cgo/doc.go
@@ -391,17 +391,13 @@
 
 Translating Go
 
-[The rest of this comment refers to 6g, the Go compiler that is part
-of the amd64 port of the gc Go toolchain. Everything here applies to
-another architecture's compilers as well.]
-
 Given the input Go files x.go and y.go, cgo generates these source
 files:
 
-	x.cgo1.go       # for 6g
-	y.cgo1.go       # for 6g
-	_cgo_gotypes.go # for 6g
-	_cgo_import.go  # for 6g (if -dynout _cgo_import.go)
+	x.cgo1.go       # for gc (cmd/compile)
+	y.cgo1.go       # for gc
+	_cgo_gotypes.go # for gc
+	_cgo_import.go  # for gc (if -dynout _cgo_import.go)
 	x.cgo2.c        # for gcc
 	y.cgo2.c        # for gcc
 	_cgo_defun.c    # for gcc (if -gccgo)
@@ -464,7 +460,7 @@
 
 Once the _cgo_export.c and *.cgo2.c files have been compiled with gcc,
 they need to be linked into the final binary, along with the libraries
-they might depend on (in the case of puts, stdio). 6l has been
+they might depend on (in the case of puts, stdio). cmd/link has been
 extended to understand basic ELF files, but it does not understand ELF
 in the full complexity that modern C libraries embrace, so it cannot
 in general generate direct references to the system libraries.
@@ -495,23 +491,23 @@
 	//go:cgo_import_dynamic _ _ "libc.so.6"
 
 In the end, the compiled Go package, which will eventually be
-presented to 6l as part of a larger program, contains:
+presented to cmd/link as part of a larger program, contains:
 
-	_go_.6        # 6g-compiled object for _cgo_gotypes.go, _cgo_import.go, *.cgo1.go
+	_go_.o        # gc-compiled object for _cgo_gotypes.go, _cgo_import.go, *.cgo1.go
 	_all.o        # gcc-compiled object for _cgo_export.c, *.cgo2.c
 
-The final program will be a dynamic executable, so that 6l can avoid
+The final program will be a dynamic executable, so that cmd/link can avoid
 needing to process arbitrary .o files. It only needs to process the .o
 files generated from C files that cgo writes, and those are much more
 limited in the ELF or other features that they use.
 
-In essence, the _cgo_import.6 file includes the extra linking
-directives that 6l is not sophisticated enough to derive from _all.o
+In essence, the _cgo_import.o file includes the extra linking
+directives that cmd/link is not sophisticated enough to derive from _all.o
 on its own. Similarly, the _all.o uses dynamic references to real
-system object code because 6l is not sophisticated enough to process
+system object code because cmd/link is not sophisticated enough to process
 the real code.
 
-The main benefits of this system are that 6l remains relatively simple
+The main benefits of this system are that cmd/link remains relatively simple
 (it does not need to implement a complete ELF and Mach-O linker) and
 that gcc is not needed after the package is compiled. For example,
 package net uses cgo for access to name resolution functions provided
@@ -540,17 +536,17 @@
 
 Internal and External Linking
 
-The text above describes "internal" linking, in which 6l parses and
+The text above describes "internal" linking, in which cmd/link parses and
 links host object files (ELF, Mach-O, PE, and so on) into the final
-executable itself. Keeping 6l simple means we cannot possibly
+executable itself. Keeping cmd/link simple means we cannot possibly
 implement the full semantics of the host linker, so the kinds of
 objects that can be linked directly into the binary is limited (other
 code can only be used as a dynamic library). On the other hand, when
-using internal linking, 6l can generate Go binaries by itself.
+using internal linking, cmd/link can generate Go binaries by itself.
 
 In order to allow linking arbitrary object files without requiring
 dynamic libraries, cgo supports an "external" linking mode too. In
-external linking mode, 6l does not process any host object files.
+external linking mode, cmd/link does not process any host object files.
 Instead, it collects all the Go code and writes a single go.o object
 file containing it. Then it invokes the host linker (usually gcc) to
 combine the go.o object file and any supporting non-Go code into a
@@ -582,8 +578,8 @@
 Linking Directives
 
 In either linking mode, package-specific directives must be passed
-through to 6l. These are communicated by writing //go: directives in a
-Go source file compiled by 6g. The directives are copied into the .6
+through to cmd/link. These are communicated by writing //go: directives in a
+Go source file compiled by gc. The directives are copied into the .o
 object file and then processed by the linker.
 
 The directives are:
@@ -672,7 +668,7 @@
 As a simple example, consider a package that uses cgo to call C.sin.
 The following code will be generated by cgo:
 
-	// compiled by 6g
+	// compiled by gc
 
 	//go:cgo_ldflag "-lm"
 
@@ -708,7 +704,7 @@
 The linking directives are used according to the kind of final link
 used.
 
-In internal mode, 6l itself processes all the host object files, in
+In internal mode, cmd/link itself processes all the host object files, in
 particular foo.cgo2.o. To do so, it uses the cgo_import_dynamic and
 cgo_dynamic_linker directives to learn that the otherwise undefined
 reference to sin in foo.cgo2.o should be rewritten to refer to the
@@ -716,56 +712,56 @@
 "libm.so.6", and the binary should request "/lib/ld-linux.so.2" as its
 runtime dynamic linker.
 
-In external mode, 6l does not process any host object files, in
-particular foo.cgo2.o. It links together the 6g-generated object
+In external mode, cmd/link does not process any host object files, in
+particular foo.cgo2.o. It links together the gc-generated object
 files, along with any other Go code, into a go.o file. While doing
-that, 6l will discover that there is no definition for
-_cgo_gcc_Cfunc_sin, referred to by the 6g-compiled source file. This
-is okay, because 6l also processes the cgo_import_static directive and
+that, cmd/link will discover that there is no definition for
+_cgo_gcc_Cfunc_sin, referred to by the gc-compiled source file. This
+is okay, because cmd/link also processes the cgo_import_static directive and
 knows that _cgo_gcc_Cfunc_sin is expected to be supplied by a host
-object file, so 6l does not treat the missing symbol as an error when
+object file, so cmd/link does not treat the missing symbol as an error when
 creating go.o. Indeed, the definition for _cgo_gcc_Cfunc_sin will be
 provided to the host linker by foo2.cgo.o, which in turn will need the
-symbol 'sin'. 6l also processes the cgo_ldflag directives, so that it
+symbol 'sin'. cmd/link also processes the cgo_ldflag directives, so that it
 knows that the eventual host link command must include the -lm
 argument, so that the host linker will be able to find 'sin' in the
 math library.
 
-6l Command Line Interface
+cmd/link Command Line Interface
 
-The go command and any other Go-aware build systems invoke 6l
-to link a collection of packages into a single binary. By default, 6l will
+The go command and any other Go-aware build systems invoke cmd/link
+to link a collection of packages into a single binary. By default, cmd/link will
 present the same interface it does today:
 
-	6l main.a
+	cmd/link main.a
 
-produces a file named 6.out, even if 6l does so by invoking the host
+produces a file named a.out, even if cmd/link does so by invoking the host
 linker in external linking mode.
 
-By default, 6l will decide the linking mode as follows: if the only
+By default, cmd/link will decide the linking mode as follows: if the only
 packages using cgo are those on a whitelist of standard library
-packages (net, os/user, runtime/cgo), 6l will use internal linking
-mode. Otherwise, there are non-standard cgo packages involved, and 6l
+packages (net, os/user, runtime/cgo), cmd/link will use internal linking
+mode. Otherwise, there are non-standard cgo packages involved, and cmd/link
 will use external linking mode. The first rule means that a build of
 the godoc binary, which uses net but no other cgo, can run without
 needing gcc available. The second rule means that a build of a
 cgo-wrapped library like sqlite3 can generate a standalone executable
 instead of needing to refer to a dynamic library. The specific choice
-can be overridden using a command line flag: 6l -linkmode=internal or
-6l -linkmode=external.
+can be overridden using a command line flag: cmd/link -linkmode=internal or
+cmd/link -linkmode=external.
 
-In an external link, 6l will create a temporary directory, write any
+In an external link, cmd/link will create a temporary directory, write any
 host object files found in package archives to that directory (renamed
 to avoid conflicts), write the go.o file to that directory, and invoke
 the host linker. The default value for the host linker is $CC, split
 into fields, or else "gcc". The specific host linker command line can
-be overridden using command line flags: 6l -extld=clang
+be overridden using command line flags: cmd/link -extld=clang
 -extldflags='-ggdb -O3'.  If any package in a build includes a .cc or
 other file compiled by the C++ compiler, the go tool will use the
 -extld option to set the host linker to the C++ compiler.
 
 These defaults mean that Go-aware build systems can ignore the linking
-changes and keep running plain '6l' and get reasonable results, but
+changes and keep running plain 'cmd/link' and get reasonable results, but
 they can also control the linking details if desired.
 
 */
diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go
index b64849a..b65b6cb 100644
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@@ -607,6 +607,10 @@
 			if r.Name.Kind != "func" {
 				if r.Name.Kind == "type" {
 					r.Context = "type"
+					if r.Name.Type == nil {
+						error_(r.Pos(), "invalid conversion to C.%s: undefined C type '%s'", fixGo(r.Name.Go), r.Name.C)
+						break
+					}
 					expr = r.Name.Type.Go
 					break
 				}
@@ -658,6 +662,10 @@
 				}
 			} else if r.Name.Kind == "type" {
 				// Okay - might be new(T)
+				if r.Name.Type == nil {
+					error_(r.Pos(), "expression C.%s: undefined C type '%s'", fixGo(r.Name.Go), r.Name.C)
+					break
+				}
 				expr = r.Name.Type.Go
 			} else if r.Name.Kind == "var" {
 				expr = &ast.StarExpr{Star: (*r.Expr).Pos(), X: expr}
diff --git a/src/cmd/cgo/godefs.go b/src/cmd/cgo/godefs.go
index 1b0ece2..aff616e 100644
--- a/src/cmd/cgo/godefs.go
+++ b/src/cmd/cgo/godefs.go
@@ -11,6 +11,7 @@
 	"go/printer"
 	"go/token"
 	"os"
+	"path/filepath"
 	"strings"
 )
 
@@ -19,7 +20,7 @@
 	var buf bytes.Buffer
 
 	fmt.Fprintf(&buf, "// Created by cgo -godefs - DO NOT EDIT\n")
-	fmt.Fprintf(&buf, "// %s\n", strings.Join(os.Args, " "))
+	fmt.Fprintf(&buf, "// %s %s\n", filepath.Base(os.Args[0]), strings.Join(os.Args[1:], " "))
 	fmt.Fprintf(&buf, "\n")
 
 	override := make(map[string]string)
diff --git a/src/cmd/cgo/main.go b/src/cmd/cgo/main.go
index 02d297c..5e7520d 100644
--- a/src/cmd/cgo/main.go
+++ b/src/cmd/cgo/main.go
@@ -279,11 +279,7 @@
 		if nerrors > 0 {
 			os.Exit(2)
 		}
-		pkg := f.Package
-		if dir := os.Getenv("CGOPKGPATH"); dir != "" {
-			pkg = filepath.Join(dir, pkg)
-		}
-		p.PackagePath = pkg
+		p.PackagePath = f.Package
 		p.Record(f)
 		if *godefs {
 			os.Stdout.WriteString(p.godefs(f, input))
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index aaa105d..b69f410 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -933,23 +933,15 @@
 			fmt.Fprintf(fgcch, "\n%s", exp.Doc)
 		}
 
+		fmt.Fprintf(fgcch, "extern %s %s %s;\n", cRet, exp.ExpName, cParams)
+
 		// We need to use a name that will be exported by the
 		// Go code; otherwise gccgo will make it static and we
 		// will not be able to link against it from the C
 		// code.
 		goName := "Cgoexp_" + exp.ExpName
-		fmt.Fprintf(fgcch, `extern %s %s %s __asm__("%s.%s");`, cRet, goName, cParams, gccgoSymbolPrefix, goName)
-		fmt.Fprint(fgcch, "\n")
-
-		// Use a #define so that the C code that includes
-		// cgo_export.h will be able to refer to the Go
-		// function using the expected name.
-		fmt.Fprintf(fgcch, "#define %s %s\n", exp.ExpName, goName)
-
-		// Use a #undef in _cgo_export.c so that we ignore the
-		// #define from cgo_export.h, since here we are
-		// defining the real function.
-		fmt.Fprintf(fgcc, "#undef %s\n", exp.ExpName)
+		fmt.Fprintf(fgcc, `extern %s %s %s __asm__("%s.%s");`, cRet, goName, cParams, gccgoSymbolPrefix, goName)
+		fmt.Fprint(fgcc, "\n")
 
 		fmt.Fprint(fgcc, "\n")
 		fmt.Fprintf(fgcc, "%s %s %s {\n", cRet, exp.ExpName, cParams)
diff --git a/src/cmd/compile/internal/amd64/cgen.go b/src/cmd/compile/internal/amd64/cgen.go
index 71f8f88..4b00003 100644
--- a/src/cmd/compile/internal/amd64/cgen.go
+++ b/src/cmd/compile/internal/amd64/cgen.go
@@ -80,17 +80,27 @@
 		gins(x86.ACLD, nil, nil)
 	} else {
 		// normal direction
-		if q > 128 || (gc.Nacl && q >= 4) {
+		if q > 128 || (gc.Nacl && q >= 4) || (obj.Getgoos() == "plan9" && q >= 4) {
 			gconreg(movptr, q, x86.REG_CX)
 			gins(x86.AREP, nil, nil)   // repeat
 			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
 		} else if q >= 4 {
+			var oldx0 gc.Node
+			var x0 gc.Node
+			savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64])
+
 			p := gins(obj.ADUFFCOPY, nil, nil)
 			p.To.Type = obj.TYPE_ADDR
 			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
 
-			// 14 and 128 = magic constants: see ../../runtime/asm_amd64.s
-			p.To.Offset = 14 * (128 - q)
+			// 64 blocks taking 14 bytes each
+			// see ../../../../runtime/mkduff.go
+			p.To.Offset = 14 * (64 - q/2)
+			restx(&x0, &oldx0)
+
+			if q%2 != 0 {
+				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
+			}
 		} else if !gc.Nacl && c == 0 {
 			// We don't need the MOVSQ side-effect of updating SI and DI,
 			// and issuing a sequence of MOVQs directly is faster.
diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go
index f1f4955..a4f1ec9 100644
--- a/src/cmd/compile/internal/amd64/ggen.go
+++ b/src/cmd/compile/internal/amd64/ggen.go
@@ -28,6 +28,7 @@
 	hi := int64(0)
 	lo := hi
 	ax := uint32(0)
+	x0 := uint32(0)
 
 	// iterate through declarations - they are sorted in decreasing xoffset order.
 	for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next {
@@ -50,7 +51,7 @@
 		}
 
 		// zero old range
-		p = zerorange(p, int64(frame), lo, hi, &ax)
+		p = zerorange(p, int64(frame), lo, hi, &ax, &x0)
 
 		// set new range
 		hi = n.Xoffset + n.Type.Width
@@ -59,88 +60,104 @@
 	}
 
 	// zero final range
-	zerorange(p, int64(frame), lo, hi, &ax)
+	zerorange(p, int64(frame), lo, hi, &ax, &x0)
 }
 
-// DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
-// with 4 STOSQs at the very end.
-// The trailing STOSQs prevent the need for a DI preadjustment
-// for small numbers of words to clear.
+// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
 // See runtime/mkduff.go.
 const (
-	dzBlocks    = 31 // number of MOV/ADD blocks
+	dzBlocks    = 16 // number of MOV/ADD blocks
 	dzBlockLen  = 4  // number of clears per block
 	dzBlockSize = 19 // size of instructions in a single block
 	dzMovSize   = 4  // size of single MOV instruction w/ offset
 	dzAddSize   = 4  // size of single ADD instruction
-	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
+	dzClearStep = 16 // number of bytes cleared by each MOV instruction
 
-	dzTailLen  = 4 // number of final STOSQ instructions
-	dzTailSize = 2 // size of single STOSQ instruction
-
-	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
+	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
+	dzSize     = dzBlocks * dzBlockSize
 )
 
-// duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
-// q is the number of words to zero.
-func dzDI(q int64) int64 {
-	if q < dzTailLen {
-		return 0
-	}
-	q -= dzTailLen
-	if q%dzBlockLen == 0 {
-		return 0
-	}
-	return -dzDIStep * (dzBlockLen - q%dzBlockLen)
-}
-
 // dzOff returns the offset for a jump into DUFFZERO.
-// q is the number of words to zero.
-func dzOff(q int64) int64 {
+// b is the number of bytes to zero.
+func dzOff(b int64) int64 {
 	off := int64(dzSize)
-	if q < dzTailLen {
-		return off - q*dzTailSize
-	}
-	off -= dzTailLen * dzTailSize
-	q -= dzTailLen
-	blocks, steps := q/dzBlockLen, q%dzBlockLen
-	off -= dzBlockSize * blocks
-	if steps > 0 {
-		off -= dzAddSize + dzMovSize*steps
+	off -= b / dzClearLen * dzBlockSize
+	tailLen := b % dzClearLen
+	if tailLen >= dzClearStep {
+		off -= dzAddSize + dzMovSize*(tailLen/dzClearStep)
 	}
 	return off
 }
 
-func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
+// duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
+// b is the number of bytes to zero.
+func dzDI(b int64) int64 {
+	tailLen := b % dzClearLen
+	if tailLen < dzClearStep {
+		return 0
+	}
+	tailSteps := tailLen / dzClearStep
+	return -dzClearStep * (dzBlockLen - tailSteps)
+}
+
+func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32, x0 *uint32) *obj.Prog {
 	cnt := hi - lo
 	if cnt == 0 {
 		return p
 	}
-	if *ax == 0 {
-		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
-		*ax = 1
-	}
 
 	if cnt%int64(gc.Widthreg) != 0 {
 		// should only happen with nacl
 		if cnt%int64(gc.Widthptr) != 0 {
 			gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
 		}
+		if *ax == 0 {
+			p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
+			*ax = 1
+		}
 		p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
 		lo += int64(gc.Widthptr)
 		cnt -= int64(gc.Widthptr)
 	}
 
-	if cnt <= int64(4*gc.Widthreg) {
-		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
-			p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
+	if cnt == 8 {
+		if *ax == 0 {
+			p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
+			*ax = 1
+		}
+		p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
+	} else if cnt <= int64(8*gc.Widthreg) {
+		if *x0 == 0 {
+			p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
+			*x0 = 1
+		}
+
+		for i := int64(0); i < cnt/16; i++ {
+			p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i*16)
+		}
+
+		if cnt%16 != 0 {
+			p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+cnt-int64(16))
 		}
 	} else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) {
-		q := cnt / int64(gc.Widthreg)
-		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(q), obj.TYPE_REG, x86.REG_DI, 0)
-		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(q))
+		if *x0 == 0 {
+			p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
+			*x0 = 1
+		}
+
+		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
+		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
+
+		if cnt%16 != 0 {
+			p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
+		}
 	} else {
+		if *ax == 0 {
+			p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
+			*ax = 1
+		}
+
 		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
 		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
 		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
@@ -537,106 +554,150 @@
 		gc.Dump("\nclearfat", nl)
 	}
 
-	w := nl.Type.Width
-
 	// Avoid taking the address for simple enough types.
 	if gc.Componentgen(nil, nl) {
 		return
 	}
 
-	c := w % 8 // bytes
-	q := w / 8 // quads
+	w := nl.Type.Width
 
-	if q < 4 {
-		// Write sequence of MOV 0, off(base) instead of using STOSQ.
-		// The hope is that although the code will be slightly longer,
-		// the MOVs will have no dependencies and pipeline better
-		// than the unrolled STOSQ loop.
-		// NOTE: Must use agen, not igen, so that optimizer sees address
-		// being taken. We are not writing on field boundaries.
+	if w > 1024 || (gc.Nacl && w >= 64) {
+		var oldn1 gc.Node
 		var n1 gc.Node
-		gc.Agenr(nl, &n1, nil)
+		savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
+		gc.Agen(nl, &n1)
 
-		n1.Op = gc.OINDREG
-		var z gc.Node
-		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
-		for ; q > 0; q-- {
-			n1.Type = z.Type
-			gins(x86.AMOVQ, &z, &n1)
-			n1.Xoffset += 8
+		var ax gc.Node
+		var oldax gc.Node
+		savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
+		gconreg(x86.AMOVL, 0, x86.REG_AX)
+		gconreg(movptr, w/8, x86.REG_CX)
+
+		gins(x86.AREP, nil, nil)   // repeat
+		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
+
+		if w%8 != 0 {
+			n1.Op = gc.OINDREG
+			clearfat_tail(&n1, w%8)
 		}
 
-		if c >= 4 {
-			gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
-			n1.Type = z.Type
-			gins(x86.AMOVL, &z, &n1)
-			n1.Xoffset += 4
-			c -= 4
-		}
-
-		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
-		for ; c > 0; c-- {
-			n1.Type = z.Type
-			gins(x86.AMOVB, &z, &n1)
-			n1.Xoffset++
-		}
-
-		gc.Regfree(&n1)
+		restx(&n1, &oldn1)
+		restx(&ax, &oldax)
 		return
 	}
 
-	var oldn1 gc.Node
-	var n1 gc.Node
-	savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
-	gc.Agen(nl, &n1)
+	if w >= 64 {
+		var oldn1 gc.Node
+		var n1 gc.Node
+		savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
+		gc.Agen(nl, &n1)
 
-	var ax gc.Node
-	var oldax gc.Node
-	savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
-	gconreg(x86.AMOVL, 0, x86.REG_AX)
+		var vec_zero gc.Node
+		var old_x0 gc.Node
+		savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64])
+		gins(x86.AXORPS, &vec_zero, &vec_zero)
 
-	if q > 128 || gc.Nacl {
-		gconreg(movptr, q, x86.REG_CX)
-		gins(x86.AREP, nil, nil)   // repeat
-		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
-	} else {
-		if di := dzDI(q); di != 0 {
+		if di := dzDI(w); di != 0 {
 			gconreg(addptr, di, x86.REG_DI)
 		}
 		p := gins(obj.ADUFFZERO, nil, nil)
 		p.To.Type = obj.TYPE_ADDR
 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
-		p.To.Offset = dzOff(q)
+		p.To.Offset = dzOff(w)
+
+		if w%16 != 0 {
+			n1.Op = gc.OINDREG
+			n1.Xoffset -= 16 - w%16
+			gins(x86.AMOVUPS, &vec_zero, &n1)
+		}
+
+		restx(&vec_zero, &old_x0)
+		restx(&n1, &oldn1)
+		return
 	}
 
-	z := ax
-	di := n1
-	if w >= 8 && c >= 4 {
-		di.Op = gc.OINDREG
-		z.Type = gc.Types[gc.TINT64]
-		di.Type = z.Type
-		p := gins(x86.AMOVQ, &z, &di)
-		p.To.Scale = 1
-		p.To.Offset = c - 8
-	} else if c >= 4 {
-		di.Op = gc.OINDREG
-		z.Type = gc.Types[gc.TINT32]
-		di.Type = z.Type
-		gins(x86.AMOVL, &z, &di)
-		if c > 4 {
-			p := gins(x86.AMOVL, &z, &di)
-			p.To.Scale = 1
-			p.To.Offset = c - 4
+	// NOTE: Must use agen, not igen, so that optimizer sees address
+	// being taken. We are not writing on field boundaries.
+	var n1 gc.Node
+	gc.Agenr(nl, &n1, nil)
+	n1.Op = gc.OINDREG
+
+	clearfat_tail(&n1, w)
+
+	gc.Regfree(&n1)
+}
+
+func clearfat_tail(n1 *gc.Node, b int64) {
+	if b >= 16 {
+		var vec_zero gc.Node
+		gc.Regalloc(&vec_zero, gc.Types[gc.TFLOAT64], nil)
+		gins(x86.AXORPS, &vec_zero, &vec_zero)
+
+		for b >= 16 {
+			gins(x86.AMOVUPS, &vec_zero, n1)
+			n1.Xoffset += 16
+			b -= 16
 		}
-	} else {
-		for c > 0 {
-			gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
-			c--
+
+		// MOVUPS X0, off(base) is a few bytes shorter than MOV 0, off(base)
+		if b != 0 {
+			n1.Xoffset -= 16 - b
+			gins(x86.AMOVUPS, &vec_zero, n1)
 		}
+
+		gc.Regfree(&vec_zero)
+		return
 	}
 
-	restx(&n1, &oldn1)
-	restx(&ax, &oldax)
+	// Write sequence of MOV 0, off(base) instead of using STOSQ.
+	// The hope is that although the code will be slightly longer,
+	// the MOVs will have no dependencies and pipeline better
+	// than the unrolled STOSQ loop.
+	var z gc.Node
+	gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
+	if b >= 8 {
+		n1.Type = z.Type
+		gins(x86.AMOVQ, &z, n1)
+		n1.Xoffset += 8
+		b -= 8
+
+		if b != 0 {
+			n1.Xoffset -= 8 - b
+			gins(x86.AMOVQ, &z, n1)
+		}
+		return
+	}
+
+	if b >= 4 {
+		gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
+		n1.Type = z.Type
+		gins(x86.AMOVL, &z, n1)
+		n1.Xoffset += 4
+		b -= 4
+
+		if b != 0 {
+			n1.Xoffset -= 4 - b
+			gins(x86.AMOVL, &z, n1)
+		}
+		return
+	}
+
+	if b >= 2 {
+		gc.Nodconst(&z, gc.Types[gc.TUINT16], 0)
+		n1.Type = z.Type
+		gins(x86.AMOVW, &z, n1)
+		n1.Xoffset += 2
+		b -= 2
+	}
+
+	gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
+	for b > 0 {
+		n1.Type = z.Type
+		gins(x86.AMOVB, &z, n1)
+		n1.Xoffset++
+		b--
+	}
+
 }
 
 // Called after regopt and peep have run.
diff --git a/src/cmd/compile/internal/amd64/prog.go b/src/cmd/compile/internal/amd64/prog.go
index 59fb0761..9502cf9 100644
--- a/src/cmd/compile/internal/amd64/prog.go
+++ b/src/cmd/compile/internal/amd64/prog.go
@@ -136,6 +136,7 @@
 	x86.AMOVL:      {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move},
 	x86.AMOVQ:      {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move},
 	x86.AMOVW:      {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.Move},
+	x86.AMOVUPS:    {Flags: gc.LeftRead | gc.RightWrite | gc.Move},
 	x86.AMOVSB:     {Flags: gc.OK, Reguse: DI | SI, Regset: DI | SI},
 	x86.AMOVSL:     {Flags: gc.OK, Reguse: DI | SI, Regset: DI | SI},
 	x86.AMOVSQ:     {Flags: gc.OK, Reguse: DI | SI, Regset: DI | SI},
@@ -248,6 +249,7 @@
 	x86.AXORL:     {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
 	x86.AXORQ:     {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
 	x86.AXORW:     {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
+	x86.AXORPS:    {Flags: gc.LeftRead | RightRdwr},
 }
 
 func progflags(p *obj.Prog) uint32 {
diff --git a/src/cmd/compile/internal/arm64/gsubr.go b/src/cmd/compile/internal/arm64/gsubr.go
index 1121478..50ff29bf 100644
--- a/src/cmd/compile/internal/arm64/gsubr.go
+++ b/src/cmd/compile/internal/arm64/gsubr.go
@@ -467,30 +467,18 @@
 	return
 }
 
-func intLiteral(n *gc.Node) (x int64, ok bool) {
-	switch {
-	case n == nil:
-		return
-	case gc.Isconst(n, gc.CTINT):
-		return n.Int(), true
-	case gc.Isconst(n, gc.CTBOOL):
-		return int64(obj.Bool2int(n.Bool())), true
-	}
-	return
-}
-
 // gins is called by the front end.
 // It synthesizes some multiple-instruction sequences
 // so the front end can stay simpler.
 func gins(as int, f, t *gc.Node) *obj.Prog {
 	if as >= obj.A_ARCHSPECIFIC {
-		if x, ok := intLiteral(f); ok {
+		if x, ok := f.IntLiteral(); ok {
 			ginscon(as, x, t)
 			return nil // caller must not use
 		}
 	}
 	if as == arm64.ACMP {
-		if x, ok := intLiteral(t); ok {
+		if x, ok := t.IntLiteral(); ok {
 			ginscon2(as, f, x)
 			return nil // caller must not use
 		}
diff --git a/src/cmd/compile/internal/big/float.go b/src/cmd/compile/internal/big/float.go
index d7aa895..1f8edee 100644
--- a/src/cmd/compile/internal/big/float.go
+++ b/src/cmd/compile/internal/big/float.go
@@ -1272,7 +1272,7 @@
 		ex = ey
 	}
 
-	// operands may have cancelled each other out
+	// operands may have canceled each other out
 	if len(z.mant) == 0 {
 		z.acc = Exact
 		z.form = zero
diff --git a/src/cmd/compile/internal/big/int_test.go b/src/cmd/compile/internal/big/int_test.go
index 9787462..bf3b832 100644
--- a/src/cmd/compile/internal/big/int_test.go
+++ b/src/cmd/compile/internal/big/int_test.go
@@ -698,7 +698,9 @@
 		testGcd(t, d, x, y, a, b)
 	}
 
-	quick.Check(checkGcd, nil)
+	if err := quick.Check(checkGcd, nil); err != nil {
+		t.Error(err)
+	}
 }
 
 var primes = []string{
diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go
index 951f84f..d6538a3 100644
--- a/src/cmd/compile/internal/gc/cgen.go
+++ b/src/cmd/compile/internal/gc/cgen.go
@@ -810,10 +810,7 @@
 	a := &p.To
 	a.Type = obj.TYPE_MEM
 	a.Reg = int16(Thearch.REGSP)
-	a.Offset = 0
-	if HasLinkRegister() {
-		a.Offset += int64(Widthptr)
-	}
+	a.Offset = Ctxt.FixedFrameSize()
 	p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
 	p2.To = p.To
 	p2.To.Offset += int64(Widthptr)
@@ -849,10 +846,7 @@
 	a := &p.To
 	a.Type = obj.TYPE_MEM
 	a.Reg = int16(Thearch.REGSP)
-	a.Offset = 0
-	if HasLinkRegister() {
-		a.Offset += int64(Widthptr)
-	}
+	a.Offset = Ctxt.FixedFrameSize()
 	if needType {
 		a.Offset += int64(Widthptr)
 	}
@@ -1686,10 +1680,7 @@
 		a.Op = OINDREG
 		a.Reg = int16(Thearch.REGSP)
 		a.Addable = true
-		a.Xoffset = fp.Width
-		if HasLinkRegister() {
-			a.Xoffset += int64(Ctxt.Arch.Ptrsize)
-		}
+		a.Xoffset = fp.Width + Ctxt.FixedFrameSize()
 		a.Type = n.Type
 		return
 
@@ -2219,11 +2210,7 @@
 		var flist Iter
 		t = Structfirst(&flist, Getoutarg(t))
 		if t != nil {
-			w := t.Width
-			if HasLinkRegister() {
-				w += int64(Ctxt.Arch.Ptrsize)
-			}
-			return w
+			return t.Width + Ctxt.FixedFrameSize()
 		}
 	}
 
@@ -2379,17 +2366,11 @@
 		// size of arguments at 0(SP)
 		stk.Op = OINDREG
 		stk.Reg = int16(Thearch.REGSP)
-		stk.Xoffset = 0
-		if HasLinkRegister() {
-			stk.Xoffset += int64(Ctxt.Arch.Ptrsize)
-		}
+		stk.Xoffset = Ctxt.FixedFrameSize()
 		Thearch.Ginscon(Thearch.Optoas(OAS, Types[TINT32]), int64(Argsize(f.Type)), &stk)
 
 		// FuncVal* at 8(SP)
-		stk.Xoffset = int64(Widthptr)
-		if HasLinkRegister() {
-			stk.Xoffset += int64(Ctxt.Arch.Ptrsize)
-		}
+		stk.Xoffset = int64(Widthptr) + Ctxt.FixedFrameSize()
 
 		var reg Node
 		Nodreg(&reg, Types[Tptr], Thearch.REGCALLX2)
@@ -2447,10 +2428,7 @@
 
 	var nodsp Node
 	Nodindreg(&nodsp, Types[Tptr], Thearch.REGSP)
-	nodsp.Xoffset = 0
-	if HasLinkRegister() {
-		nodsp.Xoffset += int64(Ctxt.Arch.Ptrsize)
-	}
+	nodsp.Xoffset = Ctxt.FixedFrameSize()
 	if proc != 0 {
 		nodsp.Xoffset += 2 * int64(Widthptr) // leave room for size & fn
 	}
@@ -2541,11 +2519,6 @@
 	Ginscall(n.Left, proc)
 }
 
-func HasLinkRegister() bool {
-	c := Ctxt.Arch.Thechar
-	return c != '6' && c != '8'
-}
-
 /*
  * call to n has already been generated.
  * generate:
@@ -2568,10 +2541,7 @@
 	nod.Reg = int16(Thearch.REGSP)
 	nod.Addable = true
 
-	nod.Xoffset = fp.Width
-	if HasLinkRegister() {
-		nod.Xoffset += int64(Ctxt.Arch.Ptrsize)
-	}
+	nod.Xoffset = fp.Width + Ctxt.FixedFrameSize()
 	nod.Type = fp.Type
 	Cgen_as(res, &nod)
 }
@@ -2597,10 +2567,7 @@
 	nod1.Op = OINDREG
 	nod1.Reg = int16(Thearch.REGSP)
 	nod1.Addable = true
-	nod1.Xoffset = fp.Width
-	if HasLinkRegister() {
-		nod1.Xoffset += int64(Ctxt.Arch.Ptrsize)
-	}
+	nod1.Xoffset = fp.Width + Ctxt.FixedFrameSize()
 	nod1.Type = fp.Type
 
 	if res.Op != OREGISTER {
@@ -2858,10 +2825,7 @@
 	arg.Op = OINDREG
 	arg.Reg = int16(Thearch.REGSP)
 	arg.Addable = true
-	arg.Xoffset = 0
-	if HasLinkRegister() {
-		arg.Xoffset = int64(Ctxt.Arch.Ptrsize)
-	}
+	arg.Xoffset = Ctxt.FixedFrameSize()
 	arg.Type = Ptrto(Types[TUINT8])
 	Cgen(typename(res.Type), &arg)
 	arg.Xoffset += int64(Widthptr)
diff --git a/src/cmd/compile/internal/gc/const.go b/src/cmd/compile/internal/gc/const.go
index de23190..71b582b 100644
--- a/src/cmd/compile/internal/gc/const.go
+++ b/src/cmd/compile/internal/gc/const.go
@@ -10,6 +10,19 @@
 	"strings"
 )
 
+// IntLiteral returns the Node's literal value as an interger.
+func (n *Node) IntLiteral() (x int64, ok bool) {
+	switch {
+	case n == nil:
+		return
+	case Isconst(n, CTINT):
+		return n.Int(), true
+	case Isconst(n, CTBOOL):
+		return int64(obj.Bool2int(n.Bool())), true
+	}
+	return
+}
+
 // Int returns n as an int.
 // n must be an integer constant.
 func (n *Node) Int() int64 {
@@ -434,19 +447,8 @@
 		return
 	}
 
-	if !doesoverflow(v, t) {
-		return
-	}
-
-	switch v.Ctype() {
-	case CTINT, CTRUNE:
-		Yyerror("constant %v overflows %v", v.U.(*Mpint), t)
-
-	case CTFLT:
-		Yyerror("constant %v overflows %v", Fconv(v.U.(*Mpflt), obj.FmtSharp), t)
-
-	case CTCPLX:
-		Yyerror("constant %v overflows %v", Fconv(v.U.(*Mpflt), obj.FmtSharp), t)
+	if doesoverflow(v, t) {
+		Yyerror("constant %s overflows %v", Vconv(v, 0), t)
 	}
 }
 
@@ -997,37 +999,37 @@
 		goto setfalse
 
 	case OEQ<<16 | CTSTR:
-		if cmpslit(nl, nr) == 0 {
+		if strlit(nl) == strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
 
 	case ONE<<16 | CTSTR:
-		if cmpslit(nl, nr) != 0 {
+		if strlit(nl) != strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
 
 	case OLT<<16 | CTSTR:
-		if cmpslit(nl, nr) < 0 {
+		if strlit(nl) < strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
 
 	case OLE<<16 | CTSTR:
-		if cmpslit(nl, nr) <= 0 {
+		if strlit(nl) <= strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
 
 	case OGE<<16 | CTSTR:
-		if cmpslit(nl, nr) >= 0 {
+		if strlit(nl) >= strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
 
 	case OGT<<16 | CTSTR:
-		if cmpslit(nl, nr) > 0 {
+		if strlit(nl) > strlit(nr) {
 			goto settrue
 		}
 		goto setfalse
@@ -1352,8 +1354,9 @@
 	Convlit(rp, Types[TINT])
 }
 
-func cmpslit(l, r *Node) int {
-	return stringsCompare(l.Val().U.(string), r.Val().U.(string))
+// strlit returns the value of a literal string Node as a string.
+func strlit(n *Node) string {
+	return n.Val().U.(string)
 }
 
 func Smallintconst(n *Node) bool {
diff --git a/src/cmd/compile/internal/gc/dcl.go b/src/cmd/compile/internal/gc/dcl.go
index 0609274..771fe93 100644
--- a/src/cmd/compile/internal/gc/dcl.go
+++ b/src/cmd/compile/internal/gc/dcl.go
@@ -79,16 +79,6 @@
 	block = d.Block
 }
 
-func poptodcl() {
-	// pop the old marker and push a new one
-	// (cannot reuse the existing one)
-	// because we use the markers to identify blocks
-	// for the goto restriction checks.
-	popdcl()
-
-	markdcl()
-}
-
 func markdcl() {
 	d := push()
 	d.Name = "" // used as a mark in fifo
@@ -192,7 +182,7 @@
 
 	gen := 0
 	if ctxt == PEXTERN {
-		externdcl = list(externdcl, n)
+		externdcl = append(externdcl, n)
 		if dflag() {
 			fmt.Printf("\t%v global decl %v %p\n", Ctxt.Line(int(lineno)), s, n)
 		}
@@ -1509,5 +1499,5 @@
 	s1 := funcsym(s)
 	s1.Def = newfuncname(s1)
 	s1.Def.Func.Shortname = newname(s)
-	funcsyms = list(funcsyms, s1.Def)
+	funcsyms = append(funcsyms, s1.Def)
 }
diff --git a/src/cmd/compile/internal/gc/esc.go b/src/cmd/compile/internal/gc/esc.go
index 585b327..c989f51 100644
--- a/src/cmd/compile/internal/gc/esc.go
+++ b/src/cmd/compile/internal/gc/esc.go
@@ -248,17 +248,6 @@
 	return x + 1
 }
 
-func satAdd8(x, y int8) int8 {
-	z := x + y
-	if x^y < 0 || x^z >= 0 {
-		return z
-	}
-	if x < 0 {
-		return -128
-	}
-	return 127
-}
-
 func min8(a, b int8) int8 {
 	if a < b {
 		return a
@@ -385,10 +374,9 @@
 // something whose address is returned -- but that implies stored into the heap,
 // hence EscHeap, which means that the details are not currently relevant. )
 const (
-	bitsPerOutputInTag = 3                                         // For each output, the number of bits for a tag
-	bitsMaskForTag     = uint16(1<<bitsPerOutputInTag) - 1         // The bit mask to extract a single tag.
-	outputsPerTag      = (16 - EscReturnBits) / bitsPerOutputInTag // The number of outputs that can be tagged.
-	maxEncodedLevel    = int(bitsMaskForTag - 1)                   // The largest level that can be stored in a tag.
+	bitsPerOutputInTag = 3                                 // For each output, the number of bits for a tag
+	bitsMaskForTag     = uint16(1<<bitsPerOutputInTag) - 1 // The bit mask to extract a single tag.
+	maxEncodedLevel    = int(bitsMaskForTag - 1)           // The largest level that can be stored in a tag.
 )
 
 type EscState struct {
@@ -662,10 +650,15 @@
 
 		n.Left.Sym.Label = nil
 
-		// Everything but fixed array is a dereference.
 	case ORANGE:
 		if n.List != nil && n.List.Next != nil {
-			if Isfixedarray(n.Type) {
+			// Everything but fixed array is a dereference.
+
+			// If fixed array is really the address of fixed array,
+			// it is also a dereference, because it is implicitly
+			// dereferenced (see #12588)
+			if Isfixedarray(n.Type) &&
+				!(Isptr[n.Right.Type.Etype] && Eqtype(n.Right.Type.Type, n.Type)) {
 				escassign(e, n.List.Next.N, n.Right)
 			} else {
 				escassignDereference(e, n.List.Next.N, n.Right)
@@ -958,6 +951,7 @@
 		OMAPLIT,
 		OSTRUCTLIT,
 		OPTRLIT,
+		ODDDARG,
 		OCALLPART:
 		break
 
@@ -1463,8 +1457,9 @@
 		}
 	}
 
+	var src *Node
 	for t := getinargx(fntype).Type; ll != nil; ll = ll.Next {
-		src := ll.N
+		src = ll.N
 		if t.Isddd && !n.Isddd {
 			// Introduce ODDDARG node to represent ... allocation.
 			src = Nod(ODDDARG, nil, nil)
@@ -1505,17 +1500,17 @@
 		}
 
 		if src != ll.N {
+			// This occurs when function parameter type Isddd and n not Isddd
 			break
 		}
 		t = t.Down
 	}
 
-	// "..." arguments are untracked
 	for ; ll != nil; ll = ll.Next {
-		escassign(e, &e.theSink, ll.N)
 		if Debug['m'] > 2 {
-			fmt.Printf("%v::esccall:: ... <- %v, untracked\n", Ctxt.Line(int(lineno)), Nconv(ll.N, obj.FmtShort))
+			fmt.Printf("%v::esccall:: ... <- %v\n", Ctxt.Line(int(lineno)), Nconv(ll.N, obj.FmtShort))
 		}
+		escassign(e, src, ll.N) // args to slice
 	}
 }
 
@@ -1701,6 +1696,16 @@
 	case OAPPEND:
 		escwalk(e, level, dst, src.List.N)
 
+	case ODDDARG:
+		if leaks {
+			src.Esc = EscHeap
+			if Debug['m'] != 0 {
+				Warnl(int(src.Lineno), "%v escapes to heap", Nconv(src, obj.FmtShort))
+			}
+		}
+		// similar to a slice arraylit and its args.
+		level = level.dec()
+
 	case OARRAYLIT:
 		if Isfixedarray(src.Type) {
 			break
@@ -1711,8 +1716,7 @@
 
 		fallthrough
 
-	case ODDDARG,
-		OMAKECHAN,
+	case OMAKECHAN,
 		OMAKEMAP,
 		OMAKESLICE,
 		OARRAYRUNESTR,
diff --git a/src/cmd/compile/internal/gc/export.go b/src/cmd/compile/internal/gc/export.go
index de3edfe..878554b 100644
--- a/src/cmd/compile/internal/gc/export.go
+++ b/src/cmd/compile/internal/gc/export.go
@@ -253,21 +253,12 @@
 	}
 }
 
+// methodbyname sorts types by symbol name.
 type methodbyname []*Type
 
-func (x methodbyname) Len() int {
-	return len(x)
-}
-
-func (x methodbyname) Swap(i, j int) {
-	x[i], x[j] = x[j], x[i]
-}
-
-func (x methodbyname) Less(i, j int) bool {
-	a := x[i]
-	b := x[j]
-	return stringsCompare(a.Sym.Name, b.Sym.Name) < 0
-}
+func (x methodbyname) Len() int           { return len(x) }
+func (x methodbyname) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x methodbyname) Less(i, j int) bool { return x[i].Sym.Name < x[j].Sym.Name }
 
 func dumpexporttype(t *Type) {
 	if t == nil {
@@ -289,24 +280,15 @@
 		return
 	}
 
-	n := 0
+	var m []*Type
 	for f := t.Method; f != nil; f = f.Down {
 		dumpexporttype(f)
-		n++
+		m = append(m, f)
 	}
-
-	m := make([]*Type, n)
-	i := 0
-	for f := t.Method; f != nil; f = f.Down {
-		m[i] = f
-		i++
-	}
-	sort.Sort(methodbyname(m[:n]))
+	sort.Sort(methodbyname(m))
 
 	fmt.Fprintf(bout, "\ttype %v %v\n", Sconv(t.Sym, obj.FmtSharp), Tconv(t, obj.FmtSharp|obj.FmtLong))
-	var f *Type
-	for i := 0; i < n; i++ {
-		f = m[i]
+	for _, f := range m {
 		if f.Nointerface {
 			fmt.Fprintf(bout, "\t//go:nointerface\n")
 		}
diff --git a/src/cmd/compile/internal/gc/fmt.go b/src/cmd/compile/internal/gc/fmt.go
index 08994aa..4eb989e 100644
--- a/src/cmd/compile/internal/gc/fmt.go
+++ b/src/cmd/compile/internal/gc/fmt.go
@@ -1596,8 +1596,6 @@
 
 	sf := flag
 	sm := setfmode(&flag)
-	var r int
-	_ = r
 	str := symfmt(s, flag)
 	flag = sf
 	fmtmode = sm
@@ -1632,8 +1630,6 @@
 		flag |= obj.FmtUnsigned
 	}
 
-	var r int
-	_ = r
 	str := typefmt(t, flag)
 
 	if fmtmode == FTypeId && (sf&obj.FmtUnsigned != 0) {
@@ -1660,8 +1656,6 @@
 	sf := flag
 	sm := setfmode(&flag)
 
-	var r int
-	_ = r
 	var str string
 	switch fmtmode {
 	case FErr, FExp:
@@ -1694,8 +1688,6 @@
 
 	sf := flag
 	sm := setfmode(&flag)
-	var r int
-	_ = r
 	sep := "; "
 	if fmtmode == FDbg {
 		sep = "\n"
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index a0c1ab8..dcc16d0 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -210,7 +210,7 @@
 	Embedlineno int32 // first use of TFORW as embedded type
 
 	// for TFORW, where to copy the eventual value to
-	Copyto *NodeList
+	Copyto []*Node
 
 	Lastfn *Node // for usefield
 }
@@ -376,18 +376,17 @@
 	type_  *Type
 	mtype  *Type
 	offset int32
-	link   *Sig
 }
 
 type Io struct {
 	infile     string
 	bin        *obj.Biobuf
-	nlsemi     int
-	eofnl      int
+	cp         string // used for content when bin==nil
 	last       int
 	peekc      int
-	peekc1     int    // second peekc for ...
-	cp         string // used for content when bin==nil
+	peekc1     int // second peekc for ...
+	nlsemi     bool
+	eofnl      bool
 	importsafe bool
 }
 
@@ -584,13 +583,13 @@
 
 var xtop *NodeList
 
-var externdcl *NodeList
+var externdcl []*Node
 
 var exportlist []*Node
 
 var importlist []*Node // imported functions and methods with inlinable bodies
 
-var funcsyms *NodeList
+var funcsyms []*Node
 
 var dclcontext uint8 // PEXTERN/PAUTO
 
@@ -598,7 +597,7 @@
 
 var statuniqgen int // name generator for static temps
 
-var loophack int
+var loophack bool
 
 var iota_ int32
 
@@ -630,12 +629,6 @@
 
 var nblank *Node
 
-var hunk string
-
-var nhunk int32
-
-var thunk int32
-
 var Funcdepth int32
 
 var typecheckok bool
diff --git a/src/cmd/compile/internal/gc/go.y b/src/cmd/compile/internal/gc/go.y
index 6d148e4..599449f 100644
--- a/src/cmd/compile/internal/gc/go.y
+++ b/src/cmd/compile/internal/gc/go.y
@@ -2311,6 +2311,6 @@
 	// set up for another one now that we're done.
 	// See comment in lex.C about loophack.
 	if lbr == LBODY {
-		loophack = 1
+		loophack = true
 	}
 }
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index ecb2303..b52e14d 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -87,7 +87,9 @@
 	p.To.Val = nil
 	if as != obj.AJMP && likely != 0 && Thearch.Thechar != '9' && Thearch.Thechar != '7' {
 		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = int64(obj.Bool2int(likely > 0))
+		if likely > 0 {
+			p.From.Offset = 1
+		}
 	}
 
 	if Debug['g'] != 0 {
@@ -576,9 +578,7 @@
 		n.Op = OINDREG
 
 		n.Reg = int16(Thearch.REGSP)
-		if HasLinkRegister() {
-			n.Xoffset += int64(Ctxt.Arch.Ptrsize)
-		}
+		n.Xoffset += Ctxt.FixedFrameSize()
 
 	case 1: // input arg
 		n.Class = PPARAM
diff --git a/src/cmd/compile/internal/gc/lex.go b/src/cmd/compile/internal/gc/lex.go
index 5150e2b..340e37f 100644
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@@ -26,7 +26,7 @@
 
 var yylast int
 
-var imported_unsafe int
+var imported_unsafe bool
 
 var (
 	goos    string
@@ -60,26 +60,6 @@
 	{"wb", &Debug_wb},                 // print information about write barriers
 }
 
-// Our own isdigit, isspace, isalpha, isalnum that take care
-// of EOF and other out of range arguments.
-func yy_isdigit(c int) bool {
-	return c >= 0 && c <= 0xFF && isdigit(c)
-}
-
-func yy_isspace(c int) bool {
-	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
-}
-
-func yy_isalpha(c int) bool {
-	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'
-}
-
-func yy_isalnum(c int) bool {
-	return c >= 0 && c <= 0xFF && isalnum(c)
-}
-
-// Disallow use of isdigit etc.
-
 const (
 	EOF = -1
 )
@@ -334,8 +314,8 @@
 
 		curio.peekc = 0
 		curio.peekc1 = 0
-		curio.nlsemi = 0
-		curio.eofnl = 0
+		curio.nlsemi = false
+		curio.eofnl = false
 		curio.last = 0
 
 		// Skip initial BOM if present.
@@ -346,7 +326,7 @@
 		block = 1
 		iota_ = -1000000
 
-		imported_unsafe = 0
+		imported_unsafe = false
 
 		yyparse()
 		if nsyntaxerrors != 0 {
@@ -484,9 +464,9 @@
 	}
 
 	// Phase 9: Check external declarations.
-	for l := externdcl; l != nil; l = l.Next {
-		if l.N.Op == ONAME {
-			typecheck(&l.N, Erv)
+	for i, n := range externdcl {
+		if n.Op == ONAME {
+			typecheck(&externdcl[i], Erv)
 		}
 	}
 
@@ -587,7 +567,7 @@
 // is this path a local name?  begins with ./ or ../ or /
 func islocalname(name string) bool {
 	return strings.HasPrefix(name, "/") ||
-		Ctxt.Windows != 0 && len(name) >= 3 && yy_isalpha(int(name[0])) && name[1] == ':' && name[2] == '/' ||
+		Ctxt.Windows != 0 && len(name) >= 3 && isAlpha(int(name[0])) && name[1] == ':' && name[2] == '/' ||
 		strings.HasPrefix(name, "./") || name == "." ||
 		strings.HasPrefix(name, "../") || name == ".."
 }
@@ -615,9 +595,7 @@
 	// local imports should be canonicalized already.
 	// don't want to see "encoding/../encoding/base64"
 	// as different from "encoding/base64".
-	var q string
-	_ = q
-	if path.Clean(name) != name {
+	if q := path.Clean(name); q != name {
 		Yyerror("non-canonical import path %q (should be %q)", name, q)
 		return "", false
 	}
@@ -702,7 +680,7 @@
 
 		importpkg = mkpkg(f.U.(string))
 		cannedimports("unsafe.o", unsafeimport)
-		imported_unsafe = 1
+		imported_unsafe = true
 		return
 	}
 
@@ -803,19 +781,18 @@
 	curio.peekc = 0
 	curio.peekc1 = 0
 	curio.infile = file
-	curio.nlsemi = 0
+	curio.nlsemi = false
 	typecheckok = true
 
-	var c int32
 	for {
-		c = int32(getc())
+		c := getc()
 		if c == EOF {
 			break
 		}
 		if c != '$' {
 			continue
 		}
-		c = int32(getc())
+		c = getc()
 		if c == EOF {
 			break
 		}
@@ -854,17 +831,44 @@
 	curio.peekc1 = 0
 	curio.infile = file
 	curio.cp = cp
-	curio.nlsemi = 0
+	curio.nlsemi = false
 	curio.importsafe = false
 
 	typecheckok = true
 	incannedimport = 1
 }
 
+func isSpace(c int) bool {
+	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
+}
+
+func isAlpha(c int) bool {
+	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+func isDigit(c int) bool {
+	return '0' <= c && c <= '9'
+}
+func isAlnum(c int) bool {
+	return isAlpha(c) || isDigit(c)
+}
+
+func plan9quote(s string) string {
+	if s == "" {
+		return "''"
+	}
+	for _, c := range s {
+		if c <= ' ' || c == '\'' {
+			return "'" + strings.Replace(s, "'", "''", -1) + "'"
+		}
+	}
+	return s
+}
+
 func isfrog(c int) bool {
 	// complain about possibly invisible control characters
 	if c < ' ' {
-		return !yy_isspace(c) // exclude good white space
+		return !isSpace(c) // exclude good white space
 	}
 
 	if 0x7f <= c && c <= 0xa0 { // DEL, unicode block including unbreakable space.
@@ -874,8 +878,8 @@
 }
 
 type Loophack struct {
-	v    int
 	next *Loophack
+	v    bool
 }
 
 var _yylex_lstk *Loophack
@@ -885,7 +889,6 @@
 	var escflag int
 	var v int64
 	var cp *bytes.Buffer
-	var rune_ uint
 	var s *Sym
 	var h *Loophack
 	var str string
@@ -894,8 +897,8 @@
 
 l0:
 	c := getc()
-	if yy_isspace(c) {
-		if c == '\n' && curio.nlsemi != 0 {
+	if isSpace(c) {
+		if c == '\n' && curio.nlsemi {
 			ungetc(c)
 			if Debug['x'] != 0 {
 				fmt.Printf("lex: implicit semi\n")
@@ -916,20 +919,20 @@
 		goto talph
 	}
 
-	if yy_isalpha(c) {
+	if isAlpha(c) {
 		cp = &lexbuf
 		cp.Reset()
 		goto talph
 	}
 
-	if yy_isdigit(c) {
+	if isDigit(c) {
 		cp = &lexbuf
 		cp.Reset()
 		if c != '0' {
 			for {
 				cp.WriteByte(byte(c))
 				c = getc()
-				if yy_isdigit(c) {
+				if isDigit(c) {
 					continue
 				}
 				if c == '.' {
@@ -951,7 +954,7 @@
 			for {
 				cp.WriteByte(byte(c))
 				c = getc()
-				if yy_isdigit(c) {
+				if isDigit(c) {
 					continue
 				}
 				if c >= 'a' && c <= 'f' {
@@ -976,7 +979,7 @@
 
 		c1 = 0
 		for {
-			if !yy_isdigit(c) {
+			if !isDigit(c) {
 				break
 			}
 			if c < '0' || c > '7' {
@@ -1014,7 +1017,7 @@
 
 	case '.':
 		c1 = getc()
-		if yy_isdigit(c1) {
+		if isDigit(c1) {
 			cp = &lexbuf
 			cp.Reset()
 			cp.WriteByte(byte(c))
@@ -1048,8 +1051,7 @@
 			if v < utf8.RuneSelf || escflag != 0 {
 				cp.WriteByte(byte(v))
 			} else {
-				rune_ = uint(v)
-				cp.WriteRune(rune(rune_))
+				cp.WriteRune(rune(v))
 			}
 		}
 
@@ -1106,23 +1108,23 @@
 	case '/':
 		c1 = getc()
 		if c1 == '*' {
-			nl := 0
+			nl := false
 			for {
 				c = int(getr())
 				if c == '\n' {
-					nl = 1
+					nl = true
 				}
 				for c == '*' {
 					c = int(getr())
 					if c == '/' {
-						if nl != 0 {
+						if nl {
 							ungetc('\n')
 						}
 						goto l0
 					}
 
 					if c == '\n' {
-						nl = 1
+						nl = true
 					}
 				}
 
@@ -1308,15 +1310,15 @@
 		 *
 		 * when we see the keyword, the next
 		 * non-parenthesized '{' becomes an LBODY.
-		 * loophack is normally 0.
-		 * a keyword makes it go up to 1.
-		 * parens push loophack onto a stack and go back to 0.
-		 * a '{' with loophack == 1 becomes LBODY and disables loophack.
+		 * loophack is normally false.
+		 * a keyword sets it to true.
+		 * parens push loophack onto a stack and go back to false.
+		 * a '{' with loophack == true becomes LBODY and disables loophack.
 		 *
 		 * i said it was clumsy.
 		 */
 	case '(', '[':
-		if loophack != 0 || _yylex_lstk != nil {
+		if loophack || _yylex_lstk != nil {
 			h = new(Loophack)
 			if h == nil {
 				Flusherrors()
@@ -1327,7 +1329,7 @@
 			h.v = loophack
 			h.next = _yylex_lstk
 			_yylex_lstk = h
-			loophack = 0
+			loophack = false
 		}
 
 		goto lx
@@ -1342,11 +1344,11 @@
 		goto lx
 
 	case '{':
-		if loophack == 1 {
+		if loophack {
 			if Debug['x'] != 0 {
 				fmt.Printf("%v lex: LBODY\n", Ctxt.Line(int(lexlineno)))
 			}
-			loophack = 0
+			loophack = false
 			return LBODY
 		}
 
@@ -1395,14 +1397,14 @@
 	for {
 		if c >= utf8.RuneSelf {
 			ungetc(c)
-			rune_ = uint(getr())
+			r := rune(getr())
 
 			// 0xb7 · is used for internal names
-			if !unicode.IsLetter(rune(rune_)) && !unicode.IsDigit(rune(rune_)) && (importpkg == nil || rune_ != 0xb7) {
-				Yyerror("invalid identifier character U+%04x", rune_)
+			if !unicode.IsLetter(r) && !unicode.IsDigit(r) && (importpkg == nil || r != 0xb7) {
+				Yyerror("invalid identifier character U+%04x", r)
 			}
-			cp.WriteRune(rune(rune_))
-		} else if !yy_isalnum(c) && c != '_' {
+			cp.WriteRune(r)
+		} else if !isAlnum(c) && c != '_' {
 			break
 		} else {
 			cp.WriteByte(byte(c))
@@ -1419,7 +1421,7 @@
 		goto l0
 
 	case LFOR, LIF, LSWITCH, LSELECT:
-		loophack = 1 // see comment about loophack above
+		loophack = true // see comment about loophack above
 	}
 
 	if Debug['x'] != 0 {
@@ -1450,7 +1452,7 @@
 	for {
 		cp.WriteByte(byte(c))
 		c = getc()
-		if !yy_isdigit(c) {
+		if !isDigit(c) {
 			break
 		}
 	}
@@ -1475,10 +1477,10 @@
 		c = getc()
 	}
 
-	if !yy_isdigit(c) {
+	if !isDigit(c) {
 		Yyerror("malformed floating point constant exponent")
 	}
-	for yy_isdigit(c) {
+	for isDigit(c) {
 		cp.WriteByte(byte(c))
 		c = getc()
 	}
@@ -1548,7 +1550,7 @@
 
 func more(pp *string) bool {
 	p := *pp
-	for p != "" && yy_isspace(int(p[0])) {
+	for p != "" && isSpace(int(p[0])) {
 		p = p[1:]
 	}
 	*pp = p
@@ -1594,7 +1596,7 @@
 		}
 
 		if verb == "go:linkname" {
-			if imported_unsafe == 0 {
+			if !imported_unsafe {
 				Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"")
 			}
 			f := strings.Fields(cmd)
@@ -1711,7 +1713,7 @@
 		return ""
 	}
 	i := 0
-	for i < len(p) && !yy_isspace(int(p[i])) && p[i] != '"' {
+	for i < len(p) && !isSpace(int(p[i])) && p[i] != '"' {
 		i++
 	}
 	sym := p[:i]
@@ -1746,9 +1748,7 @@
 	verb := text[3:] // skip "go:"
 
 	if verb == "cgo_dynamic_linker" || verb == "dynlinker" {
-		var ok bool
-		var p string
-		p, ok = getquoted(&q)
+		p, ok := getquoted(&q)
 		if !ok {
 			Yyerror("usage: //go:cgo_dynamic_linker \"path\"")
 			return
@@ -1830,9 +1830,7 @@
 	}
 
 	if verb == "cgo_ldflag" {
-		var ok bool
-		var p string
-		p, ok = getquoted(&q)
+		p, ok := getquoted(&q)
 		if !ok {
 			Yyerror("usage: //go:cgo_ldflag \"arg\"")
 			return
@@ -1866,7 +1864,7 @@
 func yylex(yylval *yySymType) int32 {
 	lx := int(_yylex(yylval))
 
-	if curio.nlsemi != 0 && lx == EOF {
+	if curio.nlsemi && lx == EOF {
 		// Treat EOF as "end of line" for the purposes
 		// of inserting a semicolon.
 		lx = ';'
@@ -1884,10 +1882,10 @@
 		')',
 		'}',
 		']':
-		curio.nlsemi = 1
+		curio.nlsemi = true
 
 	default:
-		curio.nlsemi = 0
+		curio.nlsemi = false
 	}
 
 	// Track last two tokens returned by yylex.
@@ -1942,10 +1940,10 @@
 
 		// insert \n at EOF
 	case EOF:
-		if curio.eofnl != 0 || curio.last == '\n' {
+		if curio.eofnl || curio.last == '\n' {
 			return EOF
 		}
-		curio.eofnl = 1
+		curio.eofnl = true
 		c = '\n'
 		fallthrough
 
@@ -2189,32 +2187,22 @@
 	{"insofaras", LIGNORE, Txxx, OXXX},
 }
 
+// lexinit initializes known symbols and the basic types.
 func lexinit() {
-	var lex int
-	var s *Sym
-	var s1 *Sym
-	var t *Type
-	var etype int
+	for _, s := range syms {
+		lex := s.lexical
+		s1 := Lookup(s.name)
+		s1.Lexical = uint16(lex)
 
-	/*
-	 * initialize basic types array
-	 * initialize known symbols
-	 */
-	for i := 0; i < len(syms); i++ {
-		lex = syms[i].lexical
-		s = Lookup(syms[i].name)
-		s.Lexical = uint16(lex)
-
-		etype = syms[i].etype
-		if etype != Txxx {
+		if etype := s.etype; etype != Txxx {
 			if etype < 0 || etype >= len(Types) {
-				Fatalf("lexinit: %s bad etype", s.Name)
+				Fatalf("lexinit: %s bad etype", s.name)
 			}
-			s1 = Pkglookup(syms[i].name, builtinpkg)
-			t = Types[etype]
+			s2 := Pkglookup(s.name, builtinpkg)
+			t := Types[etype]
 			if t == nil {
 				t = typ(etype)
-				t.Sym = s1
+				t.Sym = s2
 
 				if etype != TANY && etype != TSTRING {
 					dowidth(t)
@@ -2222,19 +2210,18 @@
 				Types[etype] = t
 			}
 
-			s1.Lexical = LNAME
-			s1.Def = typenod(t)
-			s1.Def.Name = new(Name)
+			s2.Lexical = LNAME
+			s2.Def = typenod(t)
+			s2.Def.Name = new(Name)
 			continue
 		}
 
-		etype = syms[i].op
-		if etype != OXXX {
-			s1 = Pkglookup(syms[i].name, builtinpkg)
-			s1.Lexical = LNAME
-			s1.Def = Nod(ONAME, nil, nil)
-			s1.Def.Sym = s1
-			s1.Def.Etype = uint8(etype)
+		if etype := s.op; etype != OXXX {
+			s2 := Pkglookup(s.name, builtinpkg)
+			s2.Lexical = LNAME
+			s2.Def = Nod(ONAME, nil, nil)
+			s2.Def.Sym = s2
+			s2.Def.Etype = uint8(etype)
 		}
 	}
 
@@ -2247,7 +2234,7 @@
 
 	idealbool = typ(TBOOL)
 
-	s = Pkglookup("true", builtinpkg)
+	s := Pkglookup("true", builtinpkg)
 	s.Def = Nodbool(true)
 	s.Def.Sym = Lookup("true")
 	s.Def.Name = new(Name)
@@ -2446,136 +2433,121 @@
 	nodfp.Sym = Lookup(".fp")
 }
 
-var lexn = []struct {
-	lex  int
-	name string
-}{
-	{LANDAND, "ANDAND"},
-	{LANDNOT, "ANDNOT"},
-	{LASOP, "ASOP"},
-	{LBREAK, "BREAK"},
-	{LCASE, "CASE"},
-	{LCHAN, "CHAN"},
-	{LCOLAS, "COLAS"},
-	{LCOMM, "<-"},
-	{LCONST, "CONST"},
-	{LCONTINUE, "CONTINUE"},
-	{LDDD, "..."},
-	{LDEC, "DEC"},
-	{LDEFAULT, "DEFAULT"},
-	{LDEFER, "DEFER"},
-	{LELSE, "ELSE"},
-	{LEQ, "EQ"},
-	{LFALL, "FALL"},
-	{LFOR, "FOR"},
-	{LFUNC, "FUNC"},
-	{LGE, "GE"},
-	{LGO, "GO"},
-	{LGOTO, "GOTO"},
-	{LGT, "GT"},
-	{LIF, "IF"},
-	{LIMPORT, "IMPORT"},
-	{LINC, "INC"},
-	{LINTERFACE, "INTERFACE"},
-	{LLE, "LE"},
-	{LLITERAL, "LITERAL"},
-	{LLSH, "LSH"},
-	{LLT, "LT"},
-	{LMAP, "MAP"},
-	{LNAME, "NAME"},
-	{LNE, "NE"},
-	{LOROR, "OROR"},
-	{LPACKAGE, "PACKAGE"},
-	{LRANGE, "RANGE"},
-	{LRETURN, "RETURN"},
-	{LRSH, "RSH"},
-	{LSELECT, "SELECT"},
-	{LSTRUCT, "STRUCT"},
-	{LSWITCH, "SWITCH"},
-	{LTYPE, "TYPE"},
-	{LVAR, "VAR"},
+var lexn = map[int]string{
+	LANDAND:    "ANDAND",
+	LANDNOT:    "ANDNOT",
+	LASOP:      "ASOP",
+	LBREAK:     "BREAK",
+	LCASE:      "CASE",
+	LCHAN:      "CHAN",
+	LCOLAS:     "COLAS",
+	LCOMM:      "<-",
+	LCONST:     "CONST",
+	LCONTINUE:  "CONTINUE",
+	LDDD:       "...",
+	LDEC:       "DEC",
+	LDEFAULT:   "DEFAULT",
+	LDEFER:     "DEFER",
+	LELSE:      "ELSE",
+	LEQ:        "EQ",
+	LFALL:      "FALL",
+	LFOR:       "FOR",
+	LFUNC:      "FUNC",
+	LGE:        "GE",
+	LGO:        "GO",
+	LGOTO:      "GOTO",
+	LGT:        "GT",
+	LIF:        "IF",
+	LIMPORT:    "IMPORT",
+	LINC:       "INC",
+	LINTERFACE: "INTERFACE",
+	LLE:        "LE",
+	LLITERAL:   "LITERAL",
+	LLSH:       "LSH",
+	LLT:        "LT",
+	LMAP:       "MAP",
+	LNAME:      "NAME",
+	LNE:        "NE",
+	LOROR:      "OROR",
+	LPACKAGE:   "PACKAGE",
+	LRANGE:     "RANGE",
+	LRETURN:    "RETURN",
+	LRSH:       "RSH",
+	LSELECT:    "SELECT",
+	LSTRUCT:    "STRUCT",
+	LSWITCH:    "SWITCH",
+	LTYPE:      "TYPE",
+	LVAR:       "VAR",
 }
 
 func lexname(lex int) string {
-	for i := 0; i < len(lexn); i++ {
-		if lexn[i].lex == lex {
-			return lexn[i].name
-		}
+	if s, ok := lexn[lex]; ok {
+		return s
 	}
 	return fmt.Sprintf("LEX-%d", lex)
 }
 
-var yytfix = []struct {
-	have string
-	want string
-}{
-	{"$end", "EOF"},
-	{"LASOP", "op="},
-	{"LBREAK", "break"},
-	{"LCASE", "case"},
-	{"LCHAN", "chan"},
-	{"LCOLAS", ":="},
-	{"LCONST", "const"},
-	{"LCONTINUE", "continue"},
-	{"LDDD", "..."},
-	{"LDEFAULT", "default"},
-	{"LDEFER", "defer"},
-	{"LELSE", "else"},
-	{"LFALL", "fallthrough"},
-	{"LFOR", "for"},
-	{"LFUNC", "func"},
-	{"LGO", "go"},
-	{"LGOTO", "goto"},
-	{"LIF", "if"},
-	{"LIMPORT", "import"},
-	{"LINTERFACE", "interface"},
-	{"LMAP", "map"},
-	{"LNAME", "name"},
-	{"LPACKAGE", "package"},
-	{"LRANGE", "range"},
-	{"LRETURN", "return"},
-	{"LSELECT", "select"},
-	{"LSTRUCT", "struct"},
-	{"LSWITCH", "switch"},
-	{"LTYPE", "type"},
-	{"LVAR", "var"},
-	{"LANDAND", "&&"},
-	{"LANDNOT", "&^"},
-	{"LBODY", "{"},
-	{"LCOMM", "<-"},
-	{"LDEC", "--"},
-	{"LINC", "++"},
-	{"LEQ", "=="},
-	{"LGE", ">="},
-	{"LGT", ">"},
-	{"LLE", "<="},
-	{"LLT", "<"},
-	{"LLSH", "<<"},
-	{"LRSH", ">>"},
-	{"LOROR", "||"},
-	{"LNE", "!="},
+var yytfix = map[string]string{
+	"$end":       "EOF",
+	"LASOP":      "op=",
+	"LBREAK":     "break",
+	"LCASE":      "case",
+	"LCHAN":      "chan",
+	"LCOLAS":     ":=",
+	"LCONST":     "const",
+	"LCONTINUE":  "continue",
+	"LDDD":       "...",
+	"LDEFAULT":   "default",
+	"LDEFER":     "defer",
+	"LELSE":      "else",
+	"LFALL":      "fallthrough",
+	"LFOR":       "for",
+	"LFUNC":      "func",
+	"LGO":        "go",
+	"LGOTO":      "goto",
+	"LIF":        "if",
+	"LIMPORT":    "import",
+	"LINTERFACE": "interface",
+	"LMAP":       "map",
+	"LNAME":      "name",
+	"LPACKAGE":   "package",
+	"LRANGE":     "range",
+	"LRETURN":    "return",
+	"LSELECT":    "select",
+	"LSTRUCT":    "struct",
+	"LSWITCH":    "switch",
+	"LTYPE":      "type",
+	"LVAR":       "var",
+	"LANDAND":    "&&",
+	"LANDNOT":    "&^",
+	"LBODY":      "{",
+	"LCOMM":      "<-",
+	"LDEC":       "--",
+	"LINC":       "++",
+	"LEQ":        "==",
+	"LGE":        ">=",
+	"LGT":        ">",
+	"LLE":        "<=",
+	"LLT":        "<",
+	"LLSH":       "<<",
+	"LRSH":       ">>",
+	"LOROR":      "||",
+	"LNE":        "!=",
 	// spell out to avoid confusion with punctuation in error messages
-	{"';'", "semicolon or newline"},
-	{"','", "comma"},
+	"';'": "semicolon or newline",
+	"','": "comma",
 }
 
 func init() {
 	yyErrorVerbose = true
 
-Outer:
 	for i, s := range yyToknames {
 		// Apply yytfix if possible.
-		for _, fix := range yytfix {
-			if s == fix.have {
-				yyToknames[i] = fix.want
-				continue Outer
-			}
-		}
-
-		// Turn 'x' into x.
-		if len(s) == 3 && s[0] == '\'' && s[2] == '\'' {
+		if fix, ok := yytfix[s]; ok {
+			yyToknames[i] = fix
+		} else if len(s) == 3 && s[0] == '\'' && s[2] == '\'' {
+			// Turn 'x' into x.
 			yyToknames[i] = s[1:2]
-			continue
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/gc/mparith3.go b/src/cmd/compile/internal/gc/mparith3.go
index bf37f2d..f91a64b 100644
--- a/src/cmd/compile/internal/gc/mparith3.go
+++ b/src/cmd/compile/internal/gc/mparith3.go
@@ -9,7 +9,6 @@
 	"cmd/internal/obj"
 	"fmt"
 	"math"
-	"strings"
 )
 
 /// implements float arihmetic
@@ -154,30 +153,6 @@
 		as = as[1:]
 	}
 
-	// The spec requires accepting exponents that fit in int32.
-	// Don't accept much more than that.
-	// Count digits in exponent and stop early if there are too many.
-	if i := strings.Index(as, "e"); i >= 0 {
-		i++
-		if i < len(as) && (as[i] == '-' || as[i] == '+') {
-			i++
-		}
-		for i < len(as) && as[i] == '0' {
-			i++
-		}
-		// TODO(rsc): This should be > 10, because we're supposed
-		// to accept any signed 32-bit int as an exponent.
-		// But that's not working terribly well, so we deviate from the
-		// spec in order to make sure that what we accept works.
-		// We can remove this restriction once those larger exponents work.
-		// See golang.org/issue/11326 and test/fixedbugs/issue11326*.go.
-		if len(as)-i > 8 {
-			Yyerror("malformed constant: %s (exponent too large)", as)
-			a.Val.SetUint64(0)
-			return
-		}
-	}
-
 	f, ok := a.Val.SetString(as)
 	if !ok {
 		// At the moment we lose precise error cause;
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index ec74009..9d35dfd 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -72,10 +72,7 @@
 
 	fmt.Fprintf(bout, "\n!\n")
 
-	var externs *NodeList
-	if externdcl != nil {
-		externs = externdcl.End
-	}
+	externs := len(externdcl)
 
 	dumpglobls()
 	dumptypestructs()
@@ -83,8 +80,8 @@
 	// Dump extra globals.
 	tmp := externdcl
 
-	if externs != nil {
-		externdcl = externs.Next
+	if externdcl != nil {
+		externdcl = externdcl[externs:]
 	}
 	dumpglobls()
 	externdcl = tmp
@@ -107,11 +104,8 @@
 }
 
 func dumpglobls() {
-	var n *Node
-
 	// add globals
-	for l := externdcl; l != nil; l = l.Next {
-		n = l.N
+	for _, n := range externdcl {
 		if n.Op != ONAME {
 			continue
 		}
@@ -126,12 +120,10 @@
 			continue
 		}
 		dowidth(n.Type)
-
 		ggloblnod(n)
 	}
 
-	for l := funcsyms; l != nil; l = l.Next {
-		n = l.N
+	for _, n := range funcsyms {
 		dsymptr(n.Sym, 0, n.Sym.Def.Func.Shortname.Sym, 0)
 		ggloblsym(n.Sym, int32(Widthptr), obj.DUPOK|obj.RODATA)
 	}
@@ -187,10 +179,6 @@
 	return duintxx(s, off, uint64(v), 4)
 }
 
-func duint64(s *Sym, off int, v uint64) int {
-	return duintxx(s, off, v, 8)
-}
-
 func duintptr(s *Sym, off int, v uint64) int {
 	return duintxx(s, off, v, Widthptr)
 }
@@ -284,25 +272,6 @@
 	duintxx(nam.Sym, off, uint64(len), Widthint)
 }
 
-func dstringptr(s *Sym, off int, str string) int {
-	off = int(Rnd(int64(off), int64(Widthptr)))
-	p := Thearch.Gins(obj.ADATA, nil, nil)
-	p.From.Type = obj.TYPE_MEM
-	p.From.Name = obj.NAME_EXTERN
-	p.From.Sym = Linksym(s)
-	p.From.Offset = int64(off)
-	p.From3 = new(obj.Addr)
-	p.From3.Type = obj.TYPE_CONST
-	p.From3.Offset = int64(Widthptr)
-
-	Datastring(str+"\x00", &p.To) // TODO(rsc): Remove NUL
-	p.To.Type = obj.TYPE_ADDR
-	p.To.Etype = Simtype[TINT]
-	off += Widthptr
-
-	return off
-}
-
 func Datastring(s string, a *obj.Addr) {
 	_, symdata := stringsym(s)
 	a.Type = obj.TYPE_MEM
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index b15fb6d..a5010a3 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -165,6 +165,8 @@
 	ggloblsym(sym, int32(off), obj.RODATA|obj.LOCAL)
 }
 
+// cmpstackvarlt reports whether the stack variable a sorts before b.
+//
 // Sort the list of stack variables. Autos after anything else,
 // within autos, unused after used, within used, things with
 // pointers first, zeroed things first, and then decreasing size.
@@ -173,48 +175,48 @@
 // really means, in memory, things with pointers needing zeroing at
 // the top of the stack and increasing in size.
 // Non-autos sort on offset.
-func cmpstackvar(a *Node, b *Node) int {
+func cmpstackvarlt(a, b *Node) bool {
 	if a.Class != b.Class {
 		if a.Class == PAUTO {
-			return +1
+			return false
 		}
-		return -1
+		return true
 	}
 
 	if a.Class != PAUTO {
 		if a.Xoffset < b.Xoffset {
-			return -1
+			return true
 		}
 		if a.Xoffset > b.Xoffset {
-			return +1
+			return false
 		}
-		return 0
+		return false
 	}
 
 	if a.Used != b.Used {
-		return obj.Bool2int(b.Used) - obj.Bool2int(a.Used)
+		return a.Used
 	}
 
-	ap := obj.Bool2int(haspointers(a.Type))
-	bp := obj.Bool2int(haspointers(b.Type))
+	ap := haspointers(a.Type)
+	bp := haspointers(b.Type)
 	if ap != bp {
-		return bp - ap
+		return ap
 	}
 
-	ap = obj.Bool2int(a.Name.Needzero)
-	bp = obj.Bool2int(b.Name.Needzero)
+	ap = a.Name.Needzero
+	bp = b.Name.Needzero
 	if ap != bp {
-		return bp - ap
+		return ap
 	}
 
 	if a.Type.Width < b.Type.Width {
-		return +1
+		return false
 	}
 	if a.Type.Width > b.Type.Width {
-		return -1
+		return true
 	}
 
-	return stringsCompare(a.Sym.Name, b.Sym.Name)
+	return a.Sym.Name < b.Sym.Name
 }
 
 // stkdelta records the stack offset delta for a node
@@ -240,7 +242,7 @@
 
 	markautoused(ptxt)
 
-	listsort(&Curfn.Func.Dcl, cmpstackvar)
+	listsort(&Curfn.Func.Dcl, cmpstackvarlt)
 
 	// Unused autos are at the end, chop 'em off.
 	ll := Curfn.Func.Dcl
diff --git a/src/cmd/compile/internal/gc/pgen_test.go b/src/cmd/compile/internal/gc/pgen_test.go
new file mode 100644
index 0000000..ebc91011
--- /dev/null
+++ b/src/cmd/compile/internal/gc/pgen_test.go
@@ -0,0 +1,176 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+	"reflect"
+	"testing"
+)
+
+// Test all code paths for cmpstackvarlt.
+func TestCmpstackvar(t *testing.T) {
+	testdata := []struct {
+		a, b Node
+		lt   bool
+	}{
+		{
+			Node{Class: PAUTO},
+			Node{Class: PFUNC},
+			false,
+		},
+		{
+			Node{Class: PFUNC},
+			Node{Class: PAUTO},
+			true,
+		},
+		{
+			Node{Class: PFUNC, Xoffset: 0},
+			Node{Class: PFUNC, Xoffset: 10},
+			true,
+		},
+		{
+			Node{Class: PFUNC, Xoffset: 20},
+			Node{Class: PFUNC, Xoffset: 10},
+			false,
+		},
+		{
+			Node{Class: PFUNC, Xoffset: 10},
+			Node{Class: PFUNC, Xoffset: 10},
+			false,
+		},
+		{
+			Node{Class: PAUTO, Used: true},
+			Node{Class: PAUTO, Used: false},
+			true,
+		},
+		{
+			Node{Class: PAUTO, Used: false},
+			Node{Class: PAUTO, Used: true},
+			false,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{Haspointers: 1}}, // haspointers -> false
+			Node{Class: PAUTO, Type: &Type{Haspointers: 2}}, // haspointers -> true
+			false,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{Haspointers: 2}}, // haspointers -> true
+			Node{Class: PAUTO, Type: &Type{Haspointers: 1}}, // haspointers -> false
+			true,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: true}},
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: false}},
+			true,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: false}},
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: true}},
+			false,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{Width: 1}, Name: &Name{}},
+			Node{Class: PAUTO, Type: &Type{Width: 2}, Name: &Name{}},
+			false,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{Width: 2}, Name: &Name{}},
+			Node{Class: PAUTO, Type: &Type{Width: 1}, Name: &Name{}},
+			true,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "xyz"}},
+			true,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+			false,
+		},
+		{
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "xyz"}},
+			Node{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+			false,
+		},
+	}
+	for _, d := range testdata {
+		got := cmpstackvarlt(&d.a, &d.b)
+		if got != d.lt {
+			t.Errorf("want %#v < %#v", d.a, d.b)
+		}
+	}
+}
+
+func slice2nodelist(s []*Node) *NodeList {
+	var nl *NodeList
+	for _, n := range s {
+		nl = list(nl, n)
+	}
+	return nl
+}
+
+func nodelist2slice(nl *NodeList) []*Node {
+	var s []*Node
+	for l := nl; l != nil; l = l.Next {
+		s = append(s, l.N)
+	}
+	return s
+}
+
+func TestListsort(t *testing.T) {
+	inp := []*Node{
+		{Class: PFUNC, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 0, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 10, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 20, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Used: true, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{Haspointers: 1}, Name: &Name{}, Sym: &Sym{}}, // haspointers -> false
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: true}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{Width: 1}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{Width: 2}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "xyz"}},
+	}
+	want := []*Node{
+		{Class: PFUNC, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 0, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 10, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PFUNC, Xoffset: 20, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Used: true, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{Needzero: true}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{Width: 2}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{Width: 1}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "abc"}},
+		{Class: PAUTO, Type: &Type{}, Name: &Name{}, Sym: &Sym{Name: "xyz"}},
+		{Class: PAUTO, Type: &Type{Haspointers: 1}, Name: &Name{}, Sym: &Sym{}}, // haspointers -> false
+	}
+	// haspointers updates Type.Haspointers as a side effect, so
+	// exercise this function on all inputs so that reflect.DeepEqual
+	// doesn't produce false positives.
+	for i := range want {
+		haspointers(want[i].Type)
+		haspointers(inp[i].Type)
+	}
+
+	nl := slice2nodelist(inp)
+	listsort(&nl, cmpstackvarlt)
+	got := nodelist2slice(nl)
+	if !reflect.DeepEqual(want, got) {
+		t.Error("listsort failed")
+		for i := range got {
+			g := got[i]
+			w := want[i]
+			eq := reflect.DeepEqual(w, g)
+			if !eq {
+				t.Log(i, w, g)
+			}
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index c7414d0..00ae807 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -92,14 +92,6 @@
 	livepointers     []Bvec
 }
 
-func xmalloc(size uint32) interface{} {
-	result := (interface{})(make([]byte, size))
-	if result == nil {
-		Fatalf("malloc failed")
-	}
-	return result
-}
-
 // Constructs a new basic block containing a single instruction.
 func newblock(prog *obj.Prog) *BasicBlock {
 	if prog == nil {
@@ -115,13 +107,6 @@
 	return result
 }
 
-// Frees a basic block and all of its leaf data structures.
-func freeblock(bb *BasicBlock) {
-	if bb == nil {
-		Fatalf("freeblock: cannot free nil")
-	}
-}
-
 // Adds an edge between two basic blocks by making from a predecessor of to and
 // to a successor of from.
 func addedge(from *BasicBlock, to *BasicBlock) {
diff --git a/src/cmd/compile/internal/gc/popt.go b/src/cmd/compile/internal/gc/popt.go
index 0b3bde5..985ebb6 100644
--- a/src/cmd/compile/internal/gc/popt.go
+++ b/src/cmd/compile/internal/gc/popt.go
@@ -523,20 +523,15 @@
 	merge   *TempVar // merge var with this one
 	start   int64    // smallest Prog.pc in live range
 	end     int64    // largest Prog.pc in live range
-	addr    uint8    // address taken - no accurate end
-	removed uint8    // removed from program
+	addr    bool     // address taken - no accurate end
+	removed bool     // removed from program
 }
 
+// startcmp sorts TempVars by start, then id, then symbol name.
 type startcmp []*TempVar
 
-func (x startcmp) Len() int {
-	return len(x)
-}
-
-func (x startcmp) Swap(i, j int) {
-	x[i], x[j] = x[j], x[i]
-}
-
+func (x startcmp) Len() int      { return len(x) }
+func (x startcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
 func (x startcmp) Less(i, j int) bool {
 	a := x[i]
 	b := x[j]
@@ -556,7 +551,7 @@
 		return int(a.def.Id-b.def.Id) < 0
 	}
 	if a.node != b.node {
-		return stringsCompare(a.node.Sym.Name, b.node.Sym.Name) < 0
+		return a.node.Sym.Name < b.node.Sym.Name
 	}
 	return false
 }
@@ -577,22 +572,11 @@
 	}
 
 	// Build list of all mergeable variables.
-	nvar := 0
+	var vars []*TempVar
 	for l := Curfn.Func.Dcl; l != nil; l = l.Next {
-		if canmerge(l.N) {
-			nvar++
-		}
-	}
-
-	var_ := make([]TempVar, nvar)
-	nvar = 0
-	var n *Node
-	var v *TempVar
-	for l := Curfn.Func.Dcl; l != nil; l = l.Next {
-		n = l.N
-		if canmerge(n) {
-			v = &var_[nvar]
-			nvar++
+		if n := l.N; canmerge(n) {
+			v := &TempVar{}
+			vars = append(vars, v)
 			n.SetOpt(v)
 			v.node = n
 		}
@@ -607,8 +591,8 @@
 		if p.From.Node != nil && ((p.From.Node).(*Node)).Opt() != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt() != nil {
 			Fatalf("double node %v", p)
 		}
-		v = nil
-		n, _ = p.From.Node.(*Node)
+		var v *TempVar
+		n, _ := p.From.Node.(*Node)
 		if n != nil {
 			v, _ = n.Opt().(*TempVar)
 		}
@@ -625,7 +609,7 @@
 			f.Data = v.use
 			v.use = f
 			if n == p.From.Node && (p.Info.Flags&LeftAddr != 0) {
-				v.addr = 1
+				v.addr = true
 			}
 		}
 	}
@@ -637,9 +621,8 @@
 	nkill := 0
 
 	// Special case.
-	for i := 0; i < len(var_); i++ {
-		v = &var_[i]
-		if v.addr != 0 {
+	for _, v := range vars {
+		if v.addr {
 			continue
 		}
 
@@ -650,7 +633,7 @@
 			if p.To.Node == v.node && (p.Info.Flags&RightWrite != 0) && p.Info.Flags&RightRead == 0 {
 				p.As = obj.ANOP
 				p.To = obj.Addr{}
-				v.removed = 1
+				v.removed = true
 				if debugmerge > 0 && Debug['v'] != 0 {
 					fmt.Printf("drop write-only %v\n", v.node.Sym)
 				}
@@ -673,7 +656,7 @@
 			if p.From.Node == v.node && p1.To.Node == v.node && (p.Info.Flags&Move != 0) && (p.Info.Flags|p1.Info.Flags)&(LeftAddr|RightAddr) == 0 && p.Info.Flags&SizeAny == p1.Info.Flags&SizeAny {
 				p1.From = p.From
 				Thearch.Excise(f)
-				v.removed = 1
+				v.removed = true
 				if debugmerge > 0 && Debug['v'] != 0 {
 					fmt.Printf("drop immediate-use %v\n", v.node.Sym)
 				}
@@ -687,29 +670,25 @@
 	// Traverse live range of each variable to set start, end.
 	// Each flood uses a new value of gen so that we don't have
 	// to clear all the r->active words after each variable.
-	gen := int32(0)
+	gen := uint32(0)
 
-	for i := 0; i < len(var_); i++ {
-		v = &var_[i]
+	for _, v := range vars {
 		gen++
 		for f := v.use; f != nil; f = f.Data.(*Flow) {
-			mergewalk(v, f, uint32(gen))
+			mergewalk(v, f, gen)
 		}
-		if v.addr != 0 {
+		if v.addr {
 			gen++
 			for f := v.use; f != nil; f = f.Data.(*Flow) {
-				varkillwalk(v, f, uint32(gen))
+				varkillwalk(v, f, gen)
 			}
 		}
 	}
 
 	// Sort variables by start.
-	bystart := make([]*TempVar, len(var_))
-
-	for i := 0; i < len(var_); i++ {
-		bystart[i] = &var_[i]
-	}
-	sort.Sort(startcmp(bystart[:len(var_)]))
+	bystart := make([]*TempVar, len(vars))
+	copy(bystart, vars)
+	sort.Sort(startcmp(bystart))
 
 	// List of in-use variables, sorted by end, so that the ones that
 	// will last the longest are the earliest ones in the array.
@@ -717,40 +696,35 @@
 	// In theory we should use a sorted tree so that insertions are
 	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
 	// In practice, it doesn't really matter.
-	inuse := make([]*TempVar, len(var_))
+	inuse := make([]*TempVar, len(bystart))
 
 	ninuse := 0
-	nfree := len(var_)
-	var t *Type
-	var v1 *TempVar
-	var j int
-	for i := 0; i < len(var_); i++ {
-		v = bystart[i]
+	nfree := len(bystart)
+	for _, v := range bystart {
 		if debugmerge > 0 && Debug['v'] != 0 {
-			fmt.Printf("consider %v: removed=%d\n", Nconv(v.node, obj.FmtSharp), v.removed)
+			fmt.Printf("consider %v: removed=%t\n", Nconv(v.node, obj.FmtSharp), v.removed)
 		}
 
-		if v.removed != 0 {
+		if v.removed {
 			continue
 		}
 
 		// Expire no longer in use.
 		for ninuse > 0 && inuse[ninuse-1].end < v.start {
 			ninuse--
-			v1 = inuse[ninuse]
 			nfree--
-			inuse[nfree] = v1
+			inuse[nfree] = inuse[ninuse]
 		}
 
 		if debugmerge > 0 && Debug['v'] != 0 {
-			fmt.Printf("consider %v: removed=%d nfree=%d nvar=%d\n", Nconv(v.node, obj.FmtSharp), v.removed, nfree, len(var_))
+			fmt.Printf("consider %v: removed=%t nfree=%d nvar=%d\n", Nconv(v.node, obj.FmtSharp), v.removed, nfree, len(bystart))
 		}
 
 		// Find old temp to reuse if possible.
-		t = v.node.Type
+		t := v.node.Type
 
-		for j = nfree; j < len(var_); j++ {
-			v1 = inuse[j]
+		for j := nfree; j < len(inuse); j++ {
+			v1 := inuse[j]
 			if debugmerge > 0 && Debug['v'] != 0 {
 				fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, obj.FmtSharp), Nconv(v1.node, obj.FmtSharp), t, v1.node.Type, v.node.Addrtaken, v1.node.Addrtaken)
 			}
@@ -774,7 +748,7 @@
 		}
 
 		// Sort v into inuse.
-		j = ninuse
+		j := ninuse
 		ninuse++
 
 		for j > 0 && inuse[j-1].end < v.end {
@@ -786,16 +760,14 @@
 	}
 
 	if debugmerge > 0 && Debug['v'] != 0 {
-		fmt.Printf("%v [%d - %d]\n", Curfn.Func.Nname.Sym, len(var_), nkill)
-		var v *TempVar
-		for i := 0; i < len(var_); i++ {
-			v = &var_[i]
+		fmt.Printf("%v [%d - %d]\n", Curfn.Func.Nname.Sym, len(vars), nkill)
+		for _, v := range vars {
 			fmt.Printf("var %v %v %d-%d", Nconv(v.node, obj.FmtSharp), v.node.Type, v.start, v.end)
-			if v.addr != 0 {
-				fmt.Printf(" addr=1")
+			if v.addr {
+				fmt.Printf(" addr=true")
 			}
-			if v.removed != 0 {
-				fmt.Printf(" dead=1")
+			if v.removed {
+				fmt.Printf(" removed=true")
 			}
 			if v.merge != nil {
 				fmt.Printf(" merge %v", Nconv(v.merge.node, obj.FmtSharp))
@@ -814,16 +786,16 @@
 	// Update node references to use merged temporaries.
 	for f := g.Start; f != nil; f = f.Link {
 		p := f.Prog
-		n, _ = p.From.Node.(*Node)
+		n, _ := p.From.Node.(*Node)
 		if n != nil {
-			v, _ = n.Opt().(*TempVar)
+			v, _ := n.Opt().(*TempVar)
 			if v != nil && v.merge != nil {
 				p.From.Node = v.merge.node
 			}
 		}
 		n, _ = p.To.Node.(*Node)
 		if n != nil {
-			v, _ = n.Opt().(*TempVar)
+			v, _ := n.Opt().(*TempVar)
 			if v != nil && v.merge != nil {
 				p.To.Node = v.merge.node
 			}
@@ -831,17 +803,16 @@
 	}
 
 	// Delete merged nodes from declaration list.
-	var l *NodeList
 	for lp := &Curfn.Func.Dcl; ; {
-		l = *lp
+		l := *lp
 		if l == nil {
 			break
 		}
 
 		Curfn.Func.Dcl.End = l
-		n = l.N
-		v, _ = n.Opt().(*TempVar)
-		if v != nil && (v.merge != nil || v.removed != 0) {
+		n := l.N
+		v, _ := n.Opt().(*TempVar)
+		if v != nil && (v.merge != nil || v.removed) {
 			*lp = l.Next
 			continue
 		}
@@ -850,8 +821,8 @@
 	}
 
 	// Clear aux structures.
-	for i := 0; i < len(var_); i++ {
-		var_[i].node.SetOpt(nil)
+	for _, v := range vars {
+		v.node.SetOpt(nil)
 	}
 
 	Flowend(g)
diff --git a/src/cmd/compile/internal/gc/racewalk.go b/src/cmd/compile/internal/gc/racewalk.go
index 852ae98..f127dfd 100644
--- a/src/cmd/compile/internal/gc/racewalk.go
+++ b/src/cmd/compile/internal/gc/racewalk.go
@@ -32,8 +32,8 @@
 
 func ispkgin(pkgs []string) bool {
 	if myimportpath != "" {
-		for i := 0; i < len(pkgs); i++ {
-			if myimportpath == pkgs[i] {
+		for _, p := range pkgs {
+			if myimportpath == p {
 				return true
 			}
 		}
diff --git a/src/cmd/compile/internal/gc/range.go b/src/cmd/compile/internal/gc/range.go
index dbfd674..59f7d40 100644
--- a/src/cmd/compile/internal/gc/range.go
+++ b/src/cmd/compile/internal/gc/range.go
@@ -167,87 +167,10 @@
 	default:
 		Fatalf("walkrange")
 
-		// Lower n into runtime·memclr if possible, for
-	// fast zeroing of slices and arrays (issue 5373).
-	// Look for instances of
-	//
-	// for i := range a {
-	// 	a[i] = zero
-	// }
-	//
-	// in which the evaluation of a is side-effect-free.
 	case TARRAY:
-		if Debug['N'] == 0 {
-			if flag_race == 0 {
-				if v1 != nil {
-					if v2 == nil {
-						if n.Nbody != nil {
-							if n.Nbody.N != nil { // at least one statement in body
-								if n.Nbody.Next == nil { // at most one statement in body
-									tmp := n.Nbody.N // first statement of body
-									if tmp.Op == OAS {
-										if tmp.Left.Op == OINDEX {
-											if samesafeexpr(tmp.Left.Left, a) {
-												if samesafeexpr(tmp.Left.Right, v1) {
-													if t.Type.Width > 0 {
-														if iszero(tmp.Right) {
-															// Convert to
-															// if len(a) != 0 {
-															// 	hp = &a[0]
-															// 	hn = len(a)*sizeof(elem(a))
-															// 	memclr(hp, hn)
-															// 	i = len(a) - 1
-															// }
-															n.Op = OIF
-
-															n.Nbody = nil
-															n.Left = Nod(ONE, Nod(OLEN, a, nil), Nodintconst(0))
-
-															// hp = &a[0]
-															hp := temp(Ptrto(Types[TUINT8]))
-
-															tmp := Nod(OINDEX, a, Nodintconst(0))
-															tmp.Bounded = true
-															tmp = Nod(OADDR, tmp, nil)
-															tmp = Nod(OCONVNOP, tmp, nil)
-															tmp.Type = Ptrto(Types[TUINT8])
-															n.Nbody = list(n.Nbody, Nod(OAS, hp, tmp))
-
-															// hn = len(a) * sizeof(elem(a))
-															hn := temp(Types[TUINTPTR])
-
-															tmp = Nod(OLEN, a, nil)
-															tmp = Nod(OMUL, tmp, Nodintconst(t.Type.Width))
-															tmp = conv(tmp, Types[TUINTPTR])
-															n.Nbody = list(n.Nbody, Nod(OAS, hn, tmp))
-
-															// memclr(hp, hn)
-															fn := mkcall("memclr", nil, nil, hp, hn)
-
-															n.Nbody = list(n.Nbody, fn)
-
-															// i = len(a) - 1
-															v1 = Nod(OAS, v1, Nod(OSUB, Nod(OLEN, a, nil), Nodintconst(1)))
-
-															n.Nbody = list(n.Nbody, v1)
-
-															typecheck(&n.Left, Erv)
-															typechecklist(n.Nbody, Etop)
-															walkstmt(&n)
-															lineno = int32(lno)
-															return
-														}
-													}
-												}
-											}
-										}
-									}
-								}
-							}
-						}
-					}
-				}
-			}
+		if memclrrange(n, v1, v2, a) {
+			lineno = int32(lno)
+			return
 		}
 
 		// orderstmt arranged for a copy of the array/slice variable if needed.
@@ -404,3 +327,82 @@
 
 	lineno = int32(lno)
 }
+
+// Lower n into runtime·memclr if possible, for
+// fast zeroing of slices and arrays (issue 5373).
+// Look for instances of
+//
+// for i := range a {
+// 	a[i] = zero
+// }
+//
+// in which the evaluation of a is side-effect-free.
+//
+// Parameters are as in walkrange: "for v1, v2 = range a".
+func memclrrange(n, v1, v2, a *Node) bool {
+	if Debug['N'] != 0 || flag_race != 0 {
+		return false
+	}
+	if v1 == nil || v2 != nil {
+		return false
+	}
+	if n.Nbody == nil || n.Nbody.N == nil || n.Nbody.Next != nil {
+		return false
+	}
+	stmt := n.Nbody.N // only stmt in body
+	if stmt.Op != OAS || stmt.Left.Op != OINDEX {
+		return false
+	}
+	if !samesafeexpr(stmt.Left.Left, a) || !samesafeexpr(stmt.Left.Right, v1) {
+		return false
+	}
+	elemsize := n.Type.Type.Width
+	if elemsize <= 0 || !iszero(stmt.Right) {
+		return false
+	}
+
+	// Convert to
+	// if len(a) != 0 {
+	// 	hp = &a[0]
+	// 	hn = len(a)*sizeof(elem(a))
+	// 	memclr(hp, hn)
+	// 	i = len(a) - 1
+	// }
+	n.Op = OIF
+
+	n.Nbody = nil
+	n.Left = Nod(ONE, Nod(OLEN, a, nil), Nodintconst(0))
+
+	// hp = &a[0]
+	hp := temp(Ptrto(Types[TUINT8]))
+
+	tmp := Nod(OINDEX, a, Nodintconst(0))
+	tmp.Bounded = true
+	tmp = Nod(OADDR, tmp, nil)
+	tmp = Nod(OCONVNOP, tmp, nil)
+	tmp.Type = Ptrto(Types[TUINT8])
+	n.Nbody = list(n.Nbody, Nod(OAS, hp, tmp))
+
+	// hn = len(a) * sizeof(elem(a))
+	hn := temp(Types[TUINTPTR])
+
+	tmp = Nod(OLEN, a, nil)
+	tmp = Nod(OMUL, tmp, Nodintconst(elemsize))
+	tmp = conv(tmp, Types[TUINTPTR])
+	n.Nbody = list(n.Nbody, Nod(OAS, hn, tmp))
+
+	// memclr(hp, hn)
+	fn := mkcall("memclr", nil, nil, hp, hn)
+
+	n.Nbody = list(n.Nbody, fn)
+
+	// i = len(a) - 1
+	v1 = Nod(OAS, v1, Nod(OSUB, Nod(OLEN, a, nil), Nodintconst(1)))
+
+	n.Nbody = list(n.Nbody, v1)
+
+	typecheck(&n.Left, Erv)
+	typechecklist(n.Nbody, Etop)
+	walkstmt(&n)
+	return true
+}
diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index f579ef8..e7138d9 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -9,6 +9,7 @@
 	"cmd/internal/obj"
 	"fmt"
 	"os"
+	"sort"
 )
 
 /*
@@ -16,93 +17,30 @@
  */
 var signatlist *NodeList
 
-func sigcmp(a *Sig, b *Sig) int {
-	i := stringsCompare(a.name, b.name)
-	if i != 0 {
-		return i
-	}
-	if a.pkg == b.pkg {
-		return 0
-	}
-	if a.pkg == nil {
-		return -1
-	}
-	if b.pkg == nil {
-		return +1
-	}
-	return stringsCompare(a.pkg.Path, b.pkg.Path)
+// byMethodNameAndPackagePath sorts method signatures by name, then package path.
+type byMethodNameAndPackagePath []*Sig
+
+func (x byMethodNameAndPackagePath) Len() int      { return len(x) }
+func (x byMethodNameAndPackagePath) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+func (x byMethodNameAndPackagePath) Less(i, j int) bool {
+	return siglt(x[i], x[j])
 }
 
-func lsort(l *Sig, f func(*Sig, *Sig) int) *Sig {
-	if l == nil || l.link == nil {
-		return l
+// siglt reports whether a < b
+func siglt(a, b *Sig) bool {
+	if a.name != b.name {
+		return a.name < b.name
 	}
-
-	l1 := l
-	l2 := l
-	for {
-		l2 = l2.link
-		if l2 == nil {
-			break
-		}
-		l2 = l2.link
-		if l2 == nil {
-			break
-		}
-		l1 = l1.link
+	if a.pkg == b.pkg {
+		return false
 	}
-
-	l2 = l1.link
-	l1.link = nil
-	l1 = lsort(l, f)
-	l2 = lsort(l2, f)
-
-	/* set up lead element */
-	if f(l1, l2) < 0 {
-		l = l1
-		l1 = l1.link
-	} else {
-		l = l2
-		l2 = l2.link
+	if a.pkg == nil {
+		return true
 	}
-
-	le := l
-
-	for {
-		if l1 == nil {
-			for l2 != nil {
-				le.link = l2
-				le = l2
-				l2 = l2.link
-			}
-
-			le.link = nil
-			break
-		}
-
-		if l2 == nil {
-			for l1 != nil {
-				le.link = l1
-				le = l1
-				l1 = l1.link
-			}
-
-			break
-		}
-
-		if f(l1, l2) < 0 {
-			le.link = l1
-			le = l1
-			l1 = l1.link
-		} else {
-			le.link = l2
-			le = l2
-			l2 = l2.link
-		}
+	if b.pkg == nil {
+		return false
 	}
-
-	le.link = nil
-	return l
+	return a.pkg.Path < b.pkg.Path
 }
 
 // Builds a type representing a Bucket structure for
@@ -335,11 +273,9 @@
 	return t
 }
 
-/*
- * return methods of non-interface type t, sorted by name.
- * generates stub functions as needed.
- */
-func methods(t *Type) *Sig {
+// methods returns the methods of the non-interface type t, sorted by name.
+// Generates stub functions as needed.
+func methods(t *Type) []*Sig {
 	// method type
 	mt := methtype(t, 0)
 
@@ -357,11 +293,7 @@
 
 	// make list of methods for t,
 	// generating code if necessary.
-	var a *Sig
-
-	var this *Type
-	var b *Sig
-	var method *Sym
+	var ms []*Sig
 	for f := mt.Xmethod; f != nil; f = f.Down {
 		if f.Etype != TFIELD {
 			Fatalf("methods: not field %v", f)
@@ -376,7 +308,7 @@
 			continue
 		}
 
-		method = f.Sym
+		method := f.Sym
 		if method == nil {
 			continue
 		}
@@ -385,7 +317,7 @@
 		// if pointer receiver but non-pointer t and
 		// this is not an embedded pointer inside a struct,
 		// method does not apply.
-		this = getthisx(f.Type).Type.Type
+		this := getthisx(f.Type).Type.Type
 
 		if Isptr[this.Etype] && this.Type == t {
 			continue
@@ -394,55 +326,48 @@
 			continue
 		}
 
-		b = new(Sig)
-		b.link = a
-		a = b
+		var sig Sig
+		ms = append(ms, &sig)
 
-		a.name = method.Name
+		sig.name = method.Name
 		if !exportname(method.Name) {
 			if method.Pkg == nil {
 				Fatalf("methods: missing package")
 			}
-			a.pkg = method.Pkg
+			sig.pkg = method.Pkg
 		}
 
-		a.isym = methodsym(method, it, 1)
-		a.tsym = methodsym(method, t, 0)
-		a.type_ = methodfunc(f.Type, t)
-		a.mtype = methodfunc(f.Type, nil)
+		sig.isym = methodsym(method, it, 1)
+		sig.tsym = methodsym(method, t, 0)
+		sig.type_ = methodfunc(f.Type, t)
+		sig.mtype = methodfunc(f.Type, nil)
 
-		if a.isym.Flags&SymSiggen == 0 {
-			a.isym.Flags |= SymSiggen
+		if sig.isym.Flags&SymSiggen == 0 {
+			sig.isym.Flags |= SymSiggen
 			if !Eqtype(this, it) || this.Width < Types[Tptr].Width {
 				compiling_wrappers = 1
-				genwrapper(it, f, a.isym, 1)
+				genwrapper(it, f, sig.isym, 1)
 				compiling_wrappers = 0
 			}
 		}
 
-		if a.tsym.Flags&SymSiggen == 0 {
-			a.tsym.Flags |= SymSiggen
+		if sig.tsym.Flags&SymSiggen == 0 {
+			sig.tsym.Flags |= SymSiggen
 			if !Eqtype(this, t) {
 				compiling_wrappers = 1
-				genwrapper(t, f, a.tsym, 0)
+				genwrapper(t, f, sig.tsym, 0)
 				compiling_wrappers = 0
 			}
 		}
 	}
 
-	return lsort(a, sigcmp)
+	sort.Sort(byMethodNameAndPackagePath(ms))
+	return ms
 }
 
-/*
- * return methods of interface type t, sorted by name.
- */
-func imethods(t *Type) *Sig {
-	var a *Sig
-	var method *Sym
-	var isym *Sym
-
-	var all *Sig
-	var last *Sig
+// imethods returns the methods of the interface type t, sorted by name.
+func imethods(t *Type) []*Sig {
+	var methods []*Sig
 	for f := t.Type; f != nil; f = f.Down {
 		if f.Etype != TFIELD {
 			Fatalf("imethods: not field")
@@ -450,29 +375,28 @@
 		if f.Type.Etype != TFUNC || f.Sym == nil {
 			continue
 		}
-		method = f.Sym
-		a = new(Sig)
-		a.name = method.Name
+		method := f.Sym
+		var sig = Sig{
+			name: method.Name,
+		}
 		if !exportname(method.Name) {
 			if method.Pkg == nil {
 				Fatalf("imethods: missing package")
 			}
-			a.pkg = method.Pkg
+			sig.pkg = method.Pkg
 		}
 
-		a.mtype = f.Type
-		a.offset = 0
-		a.type_ = methodfunc(f.Type, nil)
+		sig.mtype = f.Type
+		sig.offset = 0
+		sig.type_ = methodfunc(f.Type, nil)
 
-		if last != nil && sigcmp(last, a) >= 0 {
-			Fatalf("sigcmp vs sortinter %s %s", last.name, a.name)
+		if n := len(methods); n > 0 {
+			last := methods[n-1]
+			if !(siglt(last, &sig)) {
+				Fatalf("sigcmp vs sortinter %s %s", last.name, sig.name)
+			}
 		}
-		if last == nil {
-			all = a
-		} else {
-			last.link = a
-		}
-		last = a
+		methods = append(methods, &sig)
 
 		// Compiler can only refer to wrappers for non-blank methods.
 		if isblanksym(method) {
@@ -483,7 +407,7 @@
 		// IfaceType.Method is not in the reflect data.
 		// Generate the method body, so that compiled
 		// code can refer to it.
-		isym = methodsym(method, t, 0)
+		isym := methodsym(method, t, 0)
 
 		if isym.Flags&SymSiggen == 0 {
 			isym.Flags |= SymSiggen
@@ -491,7 +415,7 @@
 		}
 	}
 
-	return all
+	return methods
 }
 
 var dimportpath_gopkg *Pkg
@@ -559,7 +483,7 @@
  */
 func dextratype(sym *Sym, off int, t *Type, ptroff int) int {
 	m := methods(t)
-	if t.Sym == nil && m == nil {
+	if t.Sym == nil && len(m) == 0 {
 		return off
 	}
 
@@ -568,10 +492,8 @@
 
 	dsymptr(sym, ptroff, sym, off)
 
-	n := 0
-	for a := m; a != nil; a = a.link {
+	for _, a := range m {
 		dtypesym(a.type_)
-		n++
 	}
 
 	ot := off
@@ -591,11 +513,12 @@
 	// slice header
 	ot = dsymptr(s, ot, s, ot+Widthptr+2*Widthint)
 
+	n := len(m)
 	ot = duintxx(s, ot, uint64(n), Widthint)
 	ot = duintxx(s, ot, uint64(n), Widthint)
 
 	// methods
-	for a := m; a != nil; a = a.link {
+	for _, a := range m {
 		// method
 		// ../../runtime/type.go:/method
 		ot = dgostringptr(s, ot, a.name)
@@ -943,10 +866,8 @@
 	return s
 }
 
-/*
- * Returns 1 if t has a reflexive equality operator.
- * That is, if x==x for all x of type t.
- */
+// isreflexive reports whether t has a reflexive equality operator.
+// That is, if x==x for all x of type t.
 func isreflexive(t *Type) bool {
 	switch t.Etype {
 	case TBOOL,
@@ -987,7 +908,6 @@
 				return false
 			}
 		}
-
 		return true
 
 	default:
@@ -996,6 +916,56 @@
 	}
 }
 
+// needkeyupdate reports whether map updates with t as a key
+// need the key to be updated.
+func needkeyupdate(t *Type) bool {
+	switch t.Etype {
+	case TBOOL,
+		TINT,
+		TUINT,
+		TINT8,
+		TUINT8,
+		TINT16,
+		TUINT16,
+		TINT32,
+		TUINT32,
+		TINT64,
+		TUINT64,
+		TUINTPTR,
+		TPTR32,
+		TPTR64,
+		TUNSAFEPTR,
+		TCHAN:
+		return false
+
+	case TFLOAT32, // floats can be +0/-0
+		TFLOAT64,
+		TCOMPLEX64,
+		TCOMPLEX128,
+		TINTER,
+		TSTRING: // strings might have smaller backing stores
+		return true
+
+	case TARRAY:
+		if Isslice(t) {
+			Fatalf("slice can't be a map key: %v", t)
+		}
+		return needkeyupdate(t.Type)
+
+	case TSTRUCT:
+		for t1 := t.Type; t1 != nil; t1 = t1.Down {
+			if needkeyupdate(t1.Type) {
+				return true
+			}
+		}
+		return false
+
+	default:
+		Fatalf("bad type for map key: %v", t)
+		return true
+	}
+}
+
 func dtypesym(t *Type) *Sym {
 	// Replace byte, rune aliases with real type.
 	// They've been separate internally to make error messages
@@ -1124,28 +1094,27 @@
 
 	case TINTER:
 		m := imethods(t)
-		n := 0
-		for a := m; a != nil; a = a.link {
+		n := len(m)
+		for _, a := range m {
 			dtypesym(a.type_)
-			n++
 		}
 
-		// ../../runtime/type.go:/InterfaceType
+		// ../../../runtime/type.go:/InterfaceType
 		ot = dcommontype(s, ot, t)
 
 		xt = ot - 2*Widthptr
 		ot = dsymptr(s, ot, s, ot+Widthptr+2*Widthint)
 		ot = duintxx(s, ot, uint64(n), Widthint)
 		ot = duintxx(s, ot, uint64(n), Widthint)
-		for a := m; a != nil; a = a.link {
-			// ../../runtime/type.go:/imethod
+		for _, a := range m {
+			// ../../../runtime/type.go:/imethod
 			ot = dgostringptr(s, ot, a.name)
 
 			ot = dgopkgpath(s, ot, a.pkg)
 			ot = dsymptr(s, ot, dtypesym(a.type_), 0)
 		}
 
-		// ../../runtime/type.go:/MapType
+		// ../../../runtime/type.go:/MapType
 	case TMAP:
 		s1 := dtypesym(t.Down)
 
@@ -1176,6 +1145,7 @@
 
 		ot = duint16(s, ot, uint16(mapbucket(t).Width))
 		ot = duint8(s, ot, uint8(obj.Bool2int(isreflexive(t.Down))))
+		ot = duint8(s, ot, uint8(obj.Bool2int(needkeyupdate(t.Down))))
 
 	case TPTR32, TPTR64:
 		if t.Type.Etype == TANY {
@@ -1269,8 +1239,7 @@
 	var n *Node
 
 	// copy types from externdcl list to signatlist
-	for l := externdcl; l != nil; l = l.Next {
-		n = l.N
+	for _, n := range externdcl {
 		if n.Op != OTYPE {
 			continue
 		}
diff --git a/src/cmd/compile/internal/gc/reflect_test.go b/src/cmd/compile/internal/gc/reflect_test.go
new file mode 100644
index 0000000..9e39933
--- /dev/null
+++ b/src/cmd/compile/internal/gc/reflect_test.go
@@ -0,0 +1,47 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+	"reflect"
+	"sort"
+	"testing"
+)
+
+func TestSortingByMethodNameAndPackagePath(t *testing.T) {
+	data := []*Sig{
+		&Sig{name: "b", pkg: &Pkg{Path: "abc"}},
+		&Sig{name: "b", pkg: nil},
+		&Sig{name: "c", pkg: nil},
+		&Sig{name: "c", pkg: &Pkg{Path: "uvw"}},
+		&Sig{name: "c", pkg: nil},
+		&Sig{name: "b", pkg: &Pkg{Path: "xyz"}},
+		&Sig{name: "a", pkg: &Pkg{Path: "abc"}},
+		&Sig{name: "b", pkg: nil},
+	}
+	want := []*Sig{
+		&Sig{name: "a", pkg: &Pkg{Path: "abc"}},
+		&Sig{name: "b", pkg: nil},
+		&Sig{name: "b", pkg: nil},
+		&Sig{name: "b", pkg: &Pkg{Path: "abc"}},
+		&Sig{name: "b", pkg: &Pkg{Path: "xyz"}},
+		&Sig{name: "c", pkg: nil},
+		&Sig{name: "c", pkg: nil},
+		&Sig{name: "c", pkg: &Pkg{Path: "uvw"}},
+	}
+	if len(data) != len(want) {
+		t.Fatal("want and data must match")
+	}
+	if reflect.DeepEqual(data, want) {
+		t.Fatal("data must be shuffled")
+	}
+	sort.Sort(byMethodNameAndPackagePath(data))
+	if !reflect.DeepEqual(data, want) {
+		t.Logf("want: %#v", want)
+		t.Logf("data: %#v", data)
+		t.Errorf("sorting failed")
+	}
+
+}
diff --git a/src/cmd/compile/internal/gc/reg.go b/src/cmd/compile/internal/gc/reg.go
index b3e9621..ff6ec32 100644
--- a/src/cmd/compile/internal/gc/reg.go
+++ b/src/cmd/compile/internal/gc/reg.go
@@ -170,7 +170,7 @@
 
 	for bany(&bit) {
 		// convert each bit to a variable
-		i = bnum(bit)
+		i = bnum(&bit)
 
 		node = vars[i].node
 		n = int(vars[i].name)
@@ -1321,7 +1321,7 @@
 			bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
 		}
 		for bany(&bit) {
-			i = bnum(bit)
+			i = bnum(&bit)
 			change = 0
 			paint1(f, i)
 			biclr(&bit, uint(i))
@@ -1465,7 +1465,7 @@
 }
 
 // bnum reports the lowest index of a 1 bit in a.
-func bnum(a Bits) int {
+func bnum(a *Bits) int {
 	for i, x := range &a.b { // & to avoid making a copy of a.b
 		if x != 0 {
 			return 64*i + Bitno(x)
@@ -1541,7 +1541,7 @@
 	var buf bytes.Buffer
 	sep := ""
 	for bany(&bits) {
-		i := bnum(bits)
+		i := bnum(&bits)
 		buf.WriteString(sep)
 		sep = " "
 		v := &vars[i]
diff --git a/src/cmd/compile/internal/gc/sinit.go b/src/cmd/compile/internal/gc/sinit.go
index a6eeaf0..70d32f7 100644
--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@@ -329,13 +329,13 @@
 			// copy slice
 			a := inittemps[r]
 
-			n1 := *l
-			n1.Xoffset = l.Xoffset + int64(Array_array)
-			gdata(&n1, Nod(OADDR, a, nil), Widthptr)
-			n1.Xoffset = l.Xoffset + int64(Array_nel)
-			gdata(&n1, r.Right, Widthint)
-			n1.Xoffset = l.Xoffset + int64(Array_cap)
-			gdata(&n1, r.Right, Widthint)
+			n := *l
+			n.Xoffset = l.Xoffset + int64(Array_array)
+			gdata(&n, Nod(OADDR, a, nil), Widthptr)
+			n.Xoffset = l.Xoffset + int64(Array_nel)
+			gdata(&n, r.Right, Widthint)
+			n.Xoffset = l.Xoffset + int64(Array_cap)
+			gdata(&n, r.Right, Widthint)
 			return true
 		}
 		fallthrough
@@ -344,24 +344,21 @@
 	case OSTRUCTLIT:
 		p := initplans[r]
 
-		n1 := *l
-		var e *InitEntry
-		var ll *Node
-		var rr *Node
-		for i := 0; i < len(p.E); i++ {
-			e = &p.E[i]
-			n1.Xoffset = l.Xoffset + e.Xoffset
-			n1.Type = e.Expr.Type
+		n := *l
+		for i := range p.E {
+			e := &p.E[i]
+			n.Xoffset = l.Xoffset + e.Xoffset
+			n.Type = e.Expr.Type
 			if e.Expr.Op == OLITERAL {
-				gdata(&n1, e.Expr, int(n1.Type.Width))
+				gdata(&n, e.Expr, int(n.Type.Width))
 			} else {
-				ll = Nod(OXXX, nil, nil)
-				*ll = n1
+				ll := Nod(OXXX, nil, nil)
+				*ll = n
 				ll.Orig = ll // completely separate copy
 				if !staticassign(ll, e.Expr, out) {
 					// Requires computation, but we're
 					// copying someone else's computation.
-					rr = Nod(OXXX, nil, nil)
+					rr := Nod(OXXX, nil, nil)
 
 					*rr = *orig
 					rr.Orig = rr // completely separate copy
@@ -380,8 +377,6 @@
 }
 
 func staticassign(l *Node, r *Node, out **NodeList) bool {
-	var n1 Node
-
 	for r.Op == OCONVNOP {
 		r = r.Left
 	}
@@ -404,9 +399,9 @@
 	case OADDR:
 		var nam Node
 		if stataddr(&nam, r.Left) {
-			n1 := *r
-			n1.Left = &nam
-			gdata(l, &n1, int(l.Type.Width))
+			n := *r
+			n.Left = &nam
+			gdata(l, &n, int(l.Type.Width))
 			return true
 		}
 		fallthrough
@@ -448,13 +443,13 @@
 			ta.Bound = Mpgetfix(r.Right.Val().U.(*Mpint))
 			a := staticname(ta, 1)
 			inittemps[r] = a
-			n1 = *l
-			n1.Xoffset = l.Xoffset + int64(Array_array)
-			gdata(&n1, Nod(OADDR, a, nil), Widthptr)
-			n1.Xoffset = l.Xoffset + int64(Array_nel)
-			gdata(&n1, r.Right, Widthint)
-			n1.Xoffset = l.Xoffset + int64(Array_cap)
-			gdata(&n1, r.Right, Widthint)
+			n := *l
+			n.Xoffset = l.Xoffset + int64(Array_array)
+			gdata(&n, Nod(OADDR, a, nil), Widthptr)
+			n.Xoffset = l.Xoffset + int64(Array_nel)
+			gdata(&n, r.Right, Widthint)
+			n.Xoffset = l.Xoffset + int64(Array_cap)
+			gdata(&n, r.Right, Widthint)
 
 			// Fall through to init underlying array.
 			l = a
@@ -466,19 +461,17 @@
 		initplan(r)
 
 		p := initplans[r]
-		n1 = *l
-		var e *InitEntry
-		var a *Node
-		for i := 0; i < len(p.E); i++ {
-			e = &p.E[i]
-			n1.Xoffset = l.Xoffset + e.Xoffset
-			n1.Type = e.Expr.Type
+		n := *l
+		for i := range p.E {
+			e := &p.E[i]
+			n.Xoffset = l.Xoffset + e.Xoffset
+			n.Type = e.Expr.Type
 			if e.Expr.Op == OLITERAL {
-				gdata(&n1, e.Expr, int(n1.Type.Width))
+				gdata(&n, e.Expr, int(n.Type.Width))
 			} else {
 				setlineno(e.Expr)
-				a = Nod(OXXX, nil, nil)
-				*a = n1
+				a := Nod(OXXX, nil, nil)
+				*a = n
 				a.Orig = a // completely separate copy
 				if !staticassign(a, e.Expr, out) {
 					*out = list(*out, Nod(OAS, a, e.Expr))
@@ -569,9 +562,8 @@
 		break
 	}
 
-	var value *Node
 	for nl := n.List; nl != nil; nl = nl.Next {
-		value = nl.N.Right
+		value := nl.N.Right
 		mode |= getdyn(value, 0)
 		if mode == MODEDYNAM|MODECONST {
 			break
@@ -582,18 +574,15 @@
 }
 
 func structlit(ctxt int, pass int, n *Node, var_ *Node, init **NodeList) {
-	var r *Node
-	var a *Node
-	var index *Node
-	var value *Node
-
 	for nl := n.List; nl != nil; nl = nl.Next {
-		r = nl.N
+		r := nl.N
 		if r.Op != OKEY {
 			Fatalf("structlit: rhs not OKEY: %v", r)
 		}
-		index = r.Left
-		value = r.Right
+		index := r.Left
+		value := r.Right
+
+		var a *Node
 
 		switch value.Op {
 		case OARRAYLIT:
@@ -650,18 +639,15 @@
 }
 
 func arraylit(ctxt int, pass int, n *Node, var_ *Node, init **NodeList) {
-	var r *Node
-	var a *Node
-	var index *Node
-	var value *Node
-
 	for l := n.List; l != nil; l = l.Next {
-		r = l.N
+		r := l.N
 		if r.Op != OKEY {
 			Fatalf("arraylit: rhs not OKEY: %v", r)
 		}
-		index = r.Left
-		value = r.Right
+		index := r.Left
+		value := r.Right
+
+		var a *Node
 
 		switch value.Op {
 		case OARRAYLIT:
@@ -828,17 +814,14 @@
 	*init = list(*init, a)
 
 	// put dynamics into slice (6)
-	var value *Node
-	var r *Node
-	var index *Node
 	for l := n.List; l != nil; l = l.Next {
-		r = l.N
+		r := l.N
 		if r.Op != OKEY {
 			Fatalf("slicelit: rhs not OKEY: %v", r)
 		}
-		index = r.Left
-		value = r.Right
-		a = Nod(OINDEX, var_, index)
+		index := r.Left
+		value := r.Right
+		a := Nod(OINDEX, var_, index)
 		a.Bounded = true
 
 		// TODO need to check bounds?
@@ -872,10 +855,6 @@
 }
 
 func maplit(ctxt int, n *Node, var_ *Node, init **NodeList) {
-	var r *Node
-	var index *Node
-	var value *Node
-
 	ctxt = 0
 
 	// make the map var
@@ -889,13 +868,12 @@
 	b := int64(0)
 
 	for l := n.List; l != nil; l = l.Next {
-		r = l.N
-
+		r := l.N
 		if r.Op != OKEY {
 			Fatalf("maplit: rhs not OKEY: %v", r)
 		}
-		index = r.Left
-		value = r.Right
+		index := r.Left
+		value := r.Right
 
 		if isliteral(index) && isliteral(value) {
 			b++
@@ -936,17 +914,14 @@
 		vstat := staticname(t, ctxt)
 
 		b := int64(0)
-		var index *Node
-		var r *Node
-		var value *Node
 		for l := n.List; l != nil; l = l.Next {
-			r = l.N
+			r := l.N
 
 			if r.Op != OKEY {
 				Fatalf("maplit: rhs not OKEY: %v", r)
 			}
-			index = r.Left
-			value = r.Right
+			index := r.Left
+			value := r.Right
 
 			if isliteral(index) && isliteral(value) {
 				// build vstat[b].a = key;
@@ -981,13 +956,13 @@
 		// for i = 0; i < len(vstat); i++ {
 		//	map[vstat[i].a] = vstat[i].b
 		// }
-		index = temp(Types[TINT])
+		index := temp(Types[TINT])
 
 		a = Nod(OINDEX, vstat, index)
 		a.Bounded = true
 		a = Nod(ODOT, a, newname(symb))
 
-		r = Nod(OINDEX, vstat, index)
+		r := Nod(OINDEX, vstat, index)
 		r.Bounded = true
 		r = Nod(ODOT, r, newname(syma))
 		r = Nod(OINDEX, var_, r)
@@ -1011,13 +986,13 @@
 
 	var val *Node
 	for l := n.List; l != nil; l = l.Next {
-		r = l.N
+		r := l.N
 
 		if r.Op != OKEY {
 			Fatalf("maplit: rhs not OKEY: %v", r)
 		}
-		index = r.Left
-		value = r.Right
+		index := r.Left
+		value := r.Right
 
 		if isliteral(index) && isliteral(value) {
 			continue
@@ -1291,9 +1266,8 @@
 		Fatalf("initplan")
 
 	case OARRAYLIT:
-		var a *Node
 		for l := n.List; l != nil; l = l.Next {
-			a = l.N
+			a := l.N
 			if a.Op != OKEY || !Smallintconst(a.Left) {
 				Fatalf("initplan arraylit")
 			}
@@ -1301,9 +1275,8 @@
 		}
 
 	case OSTRUCTLIT:
-		var a *Node
 		for l := n.List; l != nil; l = l.Next {
-			a = l.N
+			a := l.N
 			if a.Op != OKEY || a.Left.Type == nil {
 				Fatalf("initplan structlit")
 			}
@@ -1311,9 +1284,8 @@
 		}
 
 	case OMAPLIT:
-		var a *Node
 		for l := n.List; l != nil; l = l.Next {
-			a = l.N
+			a := l.N
 			if a.Op != OKEY {
 				Fatalf("initplan maplit")
 			}
@@ -1333,13 +1305,11 @@
 	if isvaluelit(n) {
 		initplan(n)
 		q := initplans[n]
-		var e *InitEntry
-		for i := 0; i < len(q.E); i++ {
-			e = entry(p)
-			*e = q.E[i]
+		for _, qe := range q.E {
+			e := entry(p)
+			*e = qe
 			e.Xoffset += xoffset
 		}
-
 		return
 	}
 
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 7219ffd..36f7435 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2120,10 +2120,7 @@
 
 	// Set receiver (for interface calls)
 	if rcvr != nil {
-		var argStart int64
-		if HasLinkRegister() {
-			argStart += int64(Widthptr)
-		}
+		argStart := Ctxt.FixedFrameSize()
 		if k != callNormal {
 			argStart += int64(2 * Widthptr)
 		}
@@ -3737,6 +3734,12 @@
 		p.To.Type = obj.TYPE_ADDR
 		p.To.Sym = Linksym(Pkglookup("duffzero", Runtimepkg))
 		p.To.Offset = v.AuxInt
+	case ssa.OpAMD64MOVOconst:
+		if v.AuxInt != 0 {
+			v.Unimplementedf("MOVOconst can only do constant=0")
+		}
+		r := regnum(v)
+		opregreg(x86.AXORPS, r, r)
 
 	case ssa.OpCopy: // TODO: lower to MOVQ earlier?
 		if v.Type.IsMemory() {
diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go
index df5e398..df6b6f6 100644
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@@ -59,26 +59,21 @@
 	})
 }
 
+// errcmp sorts errors by line, then seq, then message.
 type errcmp []Error
 
-func (x errcmp) Len() int {
-	return len(x)
-}
-
-func (x errcmp) Swap(i, j int) {
-	x[i], x[j] = x[j], x[i]
-}
-
+func (x errcmp) Len() int      { return len(x) }
+func (x errcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
 func (x errcmp) Less(i, j int) bool {
 	a := &x[i]
 	b := &x[j]
 	if a.lineno != b.lineno {
-		return a.lineno-b.lineno < 0
+		return a.lineno < b.lineno
 	}
 	if a.seq != b.seq {
-		return a.seq-b.seq < 0
+		return a.seq < b.seq
 	}
-	return stringsCompare(a.msg, b.msg) < 0
+	return a.msg < b.msg
 }
 
 func Flusherrors() {
@@ -86,7 +81,7 @@
 	if len(errors) == 0 {
 		return
 	}
-	sort.Sort(errcmp(errors[:len(errors)]))
+	sort.Sort(errcmp(errors))
 	for i := 0; i < len(errors); i++ {
 		if i == 0 || errors[i].msg != errors[i-1].msg {
 			fmt.Printf("%s", errors[i].msg)
@@ -127,7 +122,7 @@
 
 		// An unexpected EOF caused a syntax error. Use the previous
 		// line number since getc generated a fake newline character.
-		if curio.eofnl != 0 {
+		if curio.eofnl {
 			lexlineno = prevlineno
 		}
 
@@ -352,23 +347,6 @@
 	}
 }
 
-func gethunk() {
-	nh := int32(NHUNK)
-	if thunk >= 10*NHUNK {
-		nh = 10 * NHUNK
-	}
-	h := string(make([]byte, nh))
-	if h == "" {
-		Flusherrors()
-		Yyerror("out of memory")
-		errorexit()
-	}
-
-	hunk = h
-	nhunk = nh
-	thunk += nh
-}
-
 func Nod(op int, nleft *Node, nright *Node) *Node {
 	n := new(Node)
 	n.Op = uint8(op)
@@ -612,16 +590,11 @@
 	return t
 }
 
+// methcmp sorts by symbol, then by package path for unexported symbols.
 type methcmp []*Type
 
-func (x methcmp) Len() int {
-	return len(x)
-}
-
-func (x methcmp) Swap(i, j int) {
-	x[i], x[j] = x[j], x[i]
-}
-
+func (x methcmp) Len() int      { return len(x) }
+func (x methcmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
 func (x methcmp) Less(i, j int) bool {
 	a := x[i]
 	b := x[j]
@@ -632,16 +605,14 @@
 		return true
 	}
 	if b.Sym == nil {
-		return 1 < 0
+		return false
 	}
-	k := stringsCompare(a.Sym.Name, b.Sym.Name)
-	if k != 0 {
-		return k < 0
+	if a.Sym.Name != b.Sym.Name {
+		return a.Sym.Name < b.Sym.Name
 	}
 	if !exportname(a.Sym.Name) {
-		k := stringsCompare(a.Sym.Pkg.Path, b.Sym.Pkg.Path)
-		if k != 0 {
-			return k < 0
+		if a.Sym.Pkg.Path != b.Sym.Pkg.Path {
+			return a.Sym.Pkg.Path < b.Sym.Pkg.Path
 		}
 	}
 
@@ -653,24 +624,19 @@
 		return t
 	}
 
-	i := 0
+	var a []*Type
 	for f := t.Type; f != nil; f = f.Down {
-		i++
+		a = append(a, f)
 	}
-	a := make([]*Type, i)
-	i = 0
-	var f *Type
-	for f = t.Type; f != nil; f = f.Down {
-		a[i] = f
-		i++
-	}
-	sort.Sort(methcmp(a[:i]))
-	for i--; i >= 0; i-- {
-		a[i].Down = f
-		f = a[i]
-	}
+	sort.Sort(methcmp(a))
 
-	t.Type = f
+	n := len(a) // n > 0 due to initial conditions.
+	for i := 0; i < n-1; i++ {
+		a[i].Down = a[i+1]
+	}
+	a[n-1].Down = nil
+
+	t.Type = a[0]
 	return t
 }
 
@@ -1618,33 +1584,32 @@
 }
 
 func frame(context int) {
-	var l *NodeList
-
 	if context != 0 {
 		fmt.Printf("--- external frame ---\n")
-		l = externdcl
-	} else if Curfn != nil {
-		fmt.Printf("--- %v frame ---\n", Curfn.Func.Nname.Sym)
-		l = Curfn.Func.Dcl
-	} else {
+		for _, n := range externdcl {
+			printframenode(n)
+		}
 		return
 	}
 
-	var n *Node
-	var w int64
-	for ; l != nil; l = l.Next {
-		n = l.N
-		w = -1
-		if n.Type != nil {
-			w = n.Type.Width
+	if Curfn != nil {
+		fmt.Printf("--- %v frame ---\n", Curfn.Func.Nname.Sym)
+		for l := Curfn.Func.Dcl; l != nil; l = l.Next {
+			printframenode(l.N)
 		}
-		switch n.Op {
-		case ONAME:
-			fmt.Printf("%v %v G%d %v width=%d\n", Oconv(int(n.Op), 0), n.Sym, n.Name.Vargen, n.Type, w)
+	}
+}
 
-		case OTYPE:
-			fmt.Printf("%v %v width=%d\n", Oconv(int(n.Op), 0), n.Type, w)
-		}
+func printframenode(n *Node) {
+	w := int64(-1)
+	if n.Type != nil {
+		w = n.Type.Width
+	}
+	switch n.Op {
+	case ONAME:
+		fmt.Printf("%v %v G%d %v width=%d\n", Oconv(int(n.Op), 0), n.Sym, n.Name.Vargen, n.Type, w)
+	case OTYPE:
+		fmt.Printf("%v %v width=%d\n", Oconv(int(n.Op), 0), n.Type, w)
 	}
 }
 
@@ -1983,19 +1948,6 @@
 	return copyexpr(n, n.Type, init)
 }
 
-/*
- * return n in a local variable of type t if it is not already.
- * the value is guaranteed not to change except by direct
- * assignment to it.
- */
-func localexpr(n *Node, t *Type, init **NodeList) *Node {
-	if n.Op == ONAME && (!n.Addrtaken || strings.HasPrefix(n.Sym.Name, "autotmp_")) && (n.Class == PAUTO || n.Class == PPARAM || n.Class == PPARAMOUT) && convertop(n.Type, t, nil) == OCONVNOP {
-		return n
-	}
-
-	return copyexpr(n, t, init)
-}
-
 func Setmaxarg(t *Type, extra int32) {
 	dowidth(t)
 	w := t.Argwid
@@ -2163,17 +2115,17 @@
  */
 type Symlink struct {
 	field     *Type
-	good      uint8
-	followptr uint8
 	link      *Symlink
+	good      bool
+	followptr bool
 }
 
 var slist *Symlink
 
-func expand0(t *Type, followptr int) {
+func expand0(t *Type, followptr bool) {
 	u := t
 	if Isptr[u.Etype] {
-		followptr = 1
+		followptr = true
 		u = u.Type
 	}
 
@@ -2187,7 +2139,7 @@
 			sl = new(Symlink)
 			sl.field = f
 			sl.link = slist
-			sl.followptr = uint8(followptr)
+			sl.followptr = followptr
 			slist = sl
 		}
 
@@ -2205,13 +2157,13 @@
 			sl = new(Symlink)
 			sl.field = f
 			sl.link = slist
-			sl.followptr = uint8(followptr)
+			sl.followptr = followptr
 			slist = sl
 		}
 	}
 }
 
-func expand1(t *Type, d int, followptr int) {
+func expand1(t *Type, d int, followptr bool) {
 	if t.Trecur != 0 {
 		return
 	}
@@ -2226,7 +2178,7 @@
 
 	u := t
 	if Isptr[u.Etype] {
-		followptr = 1
+		followptr = true
 		u = u.Type
 	}
 
@@ -2263,7 +2215,7 @@
 	// generate all reachable methods
 	slist = nil
 
-	expand1(t, len(dotlist)-1, 0)
+	expand1(t, len(dotlist)-1, false)
 
 	// check each method to be uniquely reachable
 	var c int
@@ -2278,7 +2230,7 @@
 			if c == 1 {
 				// addot1 may have dug out arbitrary fields, we only want methods.
 				if f.Type.Etype == TFUNC && f.Type.Thistuple > 0 {
-					sl.good = 1
+					sl.good = true
 					sl.field = f
 				}
 			}
@@ -2293,13 +2245,13 @@
 
 	t.Xmethod = t.Method
 	for sl := slist; sl != nil; sl = sl.link {
-		if sl.good != 0 {
+		if sl.good {
 			// add it to the base type method list
 			f = typ(TFIELD)
 
 			*f = *sl.field
 			f.Embedded = 1 // needs a trampoline
-			if sl.followptr != 0 {
+			if sl.followptr {
 				f.Embedded = 2
 			}
 			f.Down = t.Xmethod
@@ -2616,21 +2568,6 @@
 		colasdefn(n.List, n)
 		ni = n.List.N
 
-		// TODO: with aeshash we don't need these shift/mul parts
-
-		// h = h<<3 | h>>61
-		n.Nbody = list(n.Nbody, Nod(OAS, nh, Nod(OOR, Nod(OLSH, nh, Nodintconst(3)), Nod(ORSH, nh, Nodintconst(int64(Widthptr)*8-3)))))
-
-		// h *= mul
-		// Same multipliers as in runtime.memhash.
-		var mul int64
-		if Widthptr == 4 {
-			mul = 3267000013
-		} else {
-			mul = 23344194077549503
-		}
-		n.Nbody = list(n.Nbody, Nod(OAS, nh, Nod(OMUL, nh, Nodintconst(mul))))
-
 		// h = hashel(&p[i], h)
 		call := Nod(OCALL, hashel, nil)
 
@@ -2968,8 +2905,8 @@
 	safemode = old_safemode
 }
 
-func ifacelookdot(s *Sym, t *Type, followptr *int, ignorecase int) *Type {
-	*followptr = 0
+func ifacelookdot(s *Sym, t *Type, followptr *bool, ignorecase int) *Type {
+	*followptr = false
 
 	if t == nil {
 		return nil
@@ -2988,7 +2925,7 @@
 		if c == 1 {
 			for i = 0; i < d; i++ {
 				if Isptr[dotlist[i].field.Type.Etype] {
-					*followptr = 1
+					*followptr = true
 					break
 				}
 			}
@@ -3046,9 +2983,12 @@
 	}
 	var tm *Type
 	var imtype *Type
-	var followptr int
+	var followptr bool
 	var rcvr *Type
 	for im := iface.Type; im != nil; im = im.Down {
+		if im.Broke {
+			continue
+		}
 		imtype = methodfunc(im.Type, nil)
 		tm = ifacelookdot(im.Sym, t, &followptr, 0)
 		if tm == nil || tm.Nointerface || !Eqtype(methodfunc(tm.Type, nil), imtype) {
@@ -3065,7 +3005,7 @@
 		// the method does not exist for value types.
 		rcvr = getthisx(tm.Type).Type.Type
 
-		if Isptr[rcvr.Etype] && !Isptr[t0.Etype] && followptr == 0 && !isifacemethod(tm.Type) {
+		if Isptr[rcvr.Etype] && !Isptr[t0.Etype] && !followptr && !isifacemethod(tm.Type) {
 			if false && Debug['r'] != 0 {
 				Yyerror("interface pointer mismatch")
 			}
@@ -3420,7 +3360,6 @@
  * only in the last segment of the path, and it makes for happier
  * users if we escape that as little as possible.
  *
- * If you edit this, edit ../ld/lib.c:/^pathtoprefix too.
  * If you edit this, edit ../../debug/goobj/read.go:/importPathToPrefix too.
  */
 func pathtoprefix(s string) string {
@@ -3492,17 +3431,13 @@
 		return true
 	}
 
-	for i := 0; i < len(reservedimports); i++ {
-		if path == reservedimports[i] {
+	for _, ri := range reservedimports {
+		if path == ri {
 			Yyerror("import path %q is reserved and cannot be used", path)
 			return true
 		}
 	}
 
-	var s string
-	_ = s
-	var r uint
-	_ = r
 	for _, r := range path {
 		if r == utf8.RuneError {
 			Yyerror("import path contains invalid UTF-8 sequence: %q", path)
diff --git a/src/cmd/compile/internal/gc/swt.go b/src/cmd/compile/internal/gc/swt.go
index a0e0c41..9ed30b2 100644
--- a/src/cmd/compile/internal/gc/swt.go
+++ b/src/cmd/compile/internal/gc/swt.go
@@ -6,7 +6,6 @@
 
 import (
 	"cmd/internal/obj"
-	"fmt"
 	"sort"
 	"strconv"
 )
@@ -779,7 +778,13 @@
 		if len(a) > len(b) {
 			return +1
 		}
-		return stringsCompare(a, b)
+		if a == b {
+			return 0
+		}
+		if a < b {
+			return -1
+		}
+		return +1
 	}
 
 	return 0
@@ -806,43 +811,3 @@
 	// sort by ordinal
 	return c1.ordinal < c2.ordinal
 }
-
-func dumpcase(cc []*caseClause) {
-	for _, c := range cc {
-		switch c.typ {
-		case caseKindDefault:
-			fmt.Printf("case-default\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-
-		case caseKindExprConst:
-			fmt.Printf("case-exprconst\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-
-		case caseKindExprVar:
-			fmt.Printf("case-exprvar\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-			fmt.Printf("\top=%v\n", Oconv(int(c.node.Left.Op), 0))
-
-		case caseKindTypeNil:
-			fmt.Printf("case-typenil\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-
-		case caseKindTypeConst:
-			fmt.Printf("case-typeconst\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-			fmt.Printf("\thash=%x\n", c.hash)
-
-		case caseKindTypeVar:
-			fmt.Printf("case-typevar\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-
-		default:
-			fmt.Printf("case-???\n")
-			fmt.Printf("\tord=%d\n", c.ordinal)
-			fmt.Printf("\top=%v\n", Oconv(int(c.node.Left.Op), 0))
-			fmt.Printf("\thash=%x\n", c.hash)
-		}
-	}
-
-	fmt.Printf("\n")
-}
diff --git a/src/cmd/compile/internal/gc/swt_test.go b/src/cmd/compile/internal/gc/swt_test.go
new file mode 100644
index 0000000..c1ee895
--- /dev/null
+++ b/src/cmd/compile/internal/gc/swt_test.go
@@ -0,0 +1,144 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+	"cmd/compile/internal/big"
+	"testing"
+)
+
+func TestExprcmp(t *testing.T) {
+	testdata := []struct {
+		a, b caseClause
+		want int
+	}{
+		// Non-constants.
+		{
+			caseClause{node: Nod(OXXX, nil, nil), typ: caseKindExprVar},
+			caseClause{node: Nod(OXXX, nil, nil), typ: caseKindExprConst},
+			+1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nil, nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nil, nil), typ: caseKindExprVar},
+			-1,
+		},
+		// Type switches
+		{
+			caseClause{node: Nod(OXXX, Nodintconst(0), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, Nodbool(true), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, Nodbool(true), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, Nodintconst(1), nil), typ: caseKindExprConst},
+			+1,
+		},
+		{
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TBOOL, Vargen: 1}}, nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TINT, Vargen: 0}}, nil), typ: caseKindExprConst},
+			+1,
+		},
+		{
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TBOOL, Vargen: 1}}, nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TINT, Vargen: 1}}, nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TBOOL, Vargen: 0}}, nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, &Node{Type: &Type{Etype: TINT, Vargen: 1}}, nil), typ: caseKindExprConst},
+			-1,
+		},
+		// Constant values.
+		// CTFLT
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.1)}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.2)}}), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.1)}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.1)}}), nil), typ: caseKindExprConst},
+			0,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.2)}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpflt{Val: *big.NewFloat(0.1)}}), nil), typ: caseKindExprConst},
+			+1,
+		},
+		// CTINT
+		{
+			caseClause{node: Nod(OXXX, Nodintconst(0), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, Nodintconst(1), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, Nodintconst(1), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, Nodintconst(1), nil), typ: caseKindExprConst},
+			0,
+		},
+		{
+			caseClause{node: Nod(OXXX, Nodintconst(1), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, Nodintconst(0), nil), typ: caseKindExprConst},
+			+1,
+		},
+		// CTRUNE
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('a'), Rune: true}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('b'), Rune: true}}), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('b'), Rune: true}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('b'), Rune: true}}), nil), typ: caseKindExprConst},
+			0,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('b'), Rune: true}}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{&Mpint{Val: *big.NewInt('a'), Rune: true}}), nil), typ: caseKindExprConst},
+			+1,
+		},
+		// CTSTR
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{"ab"}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{"xyz"}), nil), typ: caseKindExprConst},
+			-1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			0,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{"ab"}), nil), typ: caseKindExprConst},
+			+1,
+		},
+		{
+			caseClause{node: Nod(OXXX, nodlit(Val{"xyz"}), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodlit(Val{"abc"}), nil), typ: caseKindExprConst},
+			+1,
+		},
+		// Everything else should compare equal.
+		{
+			caseClause{node: Nod(OXXX, nodnil(), nil), typ: caseKindExprConst},
+			caseClause{node: Nod(OXXX, nodnil(), nil), typ: caseKindExprConst},
+			0,
+		},
+	}
+	for i, d := range testdata {
+		got := exprcmp(&d.a, &d.b)
+		if d.want != got {
+			t.Errorf("%d: exprcmp(a, b) = %d; want %d", i, got, d.want)
+			t.Logf("\ta = caseClause{node: %#v, typ: %#v}", d.a.node, d.a.typ)
+			t.Logf("\tb = caseClause{node: %#v, typ: %#v}", d.b.node, d.b.typ)
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index b71a1e7..6ef0cb0 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -409,9 +409,10 @@
 	return concat(l, list1(n))
 }
 
-// listsort sorts *l in place according to the 3-way comparison function f.
+// listsort sorts *l in place according to the comparison function lt.
+// The algorithm expects lt(a, b) to be equivalent to a < b.
 // The algorithm is mergesort, so it is guaranteed to be O(n log n).
-func listsort(l **NodeList, f func(*Node, *Node) int) {
+func listsort(l **NodeList, lt func(*Node, *Node) bool) {
 	if *l == nil || (*l).Next == nil {
 		return
 	}
@@ -436,10 +437,10 @@
 	(*l).End = l1
 
 	l1 = *l
-	listsort(&l1, f)
-	listsort(&l2, f)
+	listsort(&l1, lt)
+	listsort(&l2, lt)
 
-	if f(l1.N, l2.N) < 0 {
+	if lt(l1.N, l2.N) {
 		*l = l1
 	} else {
 		*l = l2
@@ -451,7 +452,7 @@
 
 	var le *NodeList
 	for (l1 != nil) && (l2 != nil) {
-		for (l1.Next != nil) && f(l1.Next.N, l2.N) < 0 {
+		for (l1.Next != nil) && lt(l1.Next.N, l2.N) {
 			l1 = l1.Next
 		}
 
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index 314c3a9..f30d071 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -18,7 +18,7 @@
  * marks variables that escape the local frame.
  * rewrites n->op to be more specific in some cases.
  */
-var typecheckdefstack *NodeList
+var typecheckdefstack []*Node
 
 /*
  * resolve ONONAME to definition, if any.
@@ -1026,11 +1026,11 @@
 				break
 			}
 
-			if Isconst(n.Right, CTINT) {
+			if !n.Bounded && Isconst(n.Right, CTINT) {
 				x := Mpgetfix(n.Right.Val().U.(*Mpint))
 				if x < 0 {
 					Yyerror("invalid %s index %v (index must be non-negative)", why, n.Right)
-				} else if Isfixedarray(t) && t.Bound > 0 && x >= t.Bound {
+				} else if Isfixedarray(t) && x >= t.Bound {
 					Yyerror("invalid array index %v (out of bounds for %d-element array)", n.Right, t.Bound)
 				} else if Isconst(n.Left, CTSTR) && x >= int64(len(n.Left.Val().U.(string))) {
 					Yyerror("invalid string index %v (out of bounds for %d-byte string)", n.Right, len(n.Left.Val().U.(string)))
@@ -1160,16 +1160,16 @@
 		}
 
 		lo := n.Right.Left
-		if lo != nil && checksliceindex(l, lo, tp) < 0 {
+		if lo != nil && !checksliceindex(l, lo, tp) {
 			n.Type = nil
 			return
 		}
 		hi := n.Right.Right
-		if hi != nil && checksliceindex(l, hi, tp) < 0 {
+		if hi != nil && !checksliceindex(l, hi, tp) {
 			n.Type = nil
 			return
 		}
-		if checksliceconst(lo, hi) < 0 {
+		if !checksliceconst(lo, hi) {
 			n.Type = nil
 			return
 		}
@@ -1227,21 +1227,21 @@
 		}
 
 		lo := n.Right.Left
-		if lo != nil && checksliceindex(l, lo, tp) < 0 {
+		if lo != nil && !checksliceindex(l, lo, tp) {
 			n.Type = nil
 			return
 		}
 		mid := n.Right.Right.Left
-		if mid != nil && checksliceindex(l, mid, tp) < 0 {
+		if mid != nil && !checksliceindex(l, mid, tp) {
 			n.Type = nil
 			return
 		}
 		hi := n.Right.Right.Right
-		if hi != nil && checksliceindex(l, hi, tp) < 0 {
+		if hi != nil && !checksliceindex(l, hi, tp) {
 			n.Type = nil
 			return
 		}
-		if checksliceconst(lo, hi) < 0 || checksliceconst(lo, mid) < 0 || checksliceconst(mid, hi) < 0 {
+		if !checksliceconst(lo, hi) || !checksliceconst(lo, mid) || !checksliceconst(mid, hi) {
 			n.Type = nil
 			return
 		}
@@ -1300,7 +1300,7 @@
 
 			n.Op = OCONV
 			n.Type = l.Type
-			if onearg(n, "conversion to %v", l.Type) < 0 {
+			if !onearg(n, "conversion to %v", l.Type) {
 				n.Type = nil
 				return
 			}
@@ -1388,7 +1388,7 @@
 
 	case OCAP, OLEN, OREAL, OIMAG:
 		ok |= Erv
-		if onearg(n, "%v", Oconv(int(n.Op), 0)) < 0 {
+		if !onearg(n, "%v", Oconv(int(n.Op), 0)) {
 			n.Type = nil
 			return
 		}
@@ -1484,7 +1484,7 @@
 			l = t.Nname
 			r = t.Down.Nname
 		} else {
-			if twoarg(n) < 0 {
+			if !twoarg(n) {
 				n.Type = nil
 				return
 			}
@@ -1538,7 +1538,7 @@
 		break OpSwitch
 
 	case OCLOSE:
-		if onearg(n, "%v", Oconv(int(n.Op), 0)) < 0 {
+		if !onearg(n, "%v", Oconv(int(n.Op), 0)) {
 			n.Type = nil
 			return
 		}
@@ -1837,9 +1837,7 @@
 				n.Type = nil
 				return
 			}
-			et := obj.Bool2int(checkmake(t, "len", l) < 0)
-			et |= obj.Bool2int(r != nil && checkmake(t, "cap", r) < 0)
-			if et != 0 {
+			if !checkmake(t, "len", l) || r != nil && !checkmake(t, "cap", r) {
 				n.Type = nil
 				return
 			}
@@ -1863,7 +1861,7 @@
 					n.Type = nil
 					return
 				}
-				if checkmake(t, "size", l) < 0 {
+				if !checkmake(t, "size", l) {
 					n.Type = nil
 					return
 				}
@@ -1884,7 +1882,7 @@
 					n.Type = nil
 					return
 				}
-				if checkmake(t, "buffer", l) < 0 {
+				if !checkmake(t, "buffer", l) {
 					n.Type = nil
 					return
 				}
@@ -1947,7 +1945,7 @@
 
 	case OPANIC:
 		ok |= Etop
-		if onearg(n, "panic") < 0 {
+		if !onearg(n, "panic") {
 			n.Type = nil
 			return
 		}
@@ -2228,42 +2226,42 @@
 	*/
 }
 
-func checksliceindex(l *Node, r *Node, tp *Type) int {
+func checksliceindex(l *Node, r *Node, tp *Type) bool {
 	t := r.Type
 	if t == nil {
-		return -1
+		return false
 	}
 	if !Isint[t.Etype] {
 		Yyerror("invalid slice index %v (type %v)", r, t)
-		return -1
+		return false
 	}
 
 	if r.Op == OLITERAL {
 		if Mpgetfix(r.Val().U.(*Mpint)) < 0 {
 			Yyerror("invalid slice index %v (index must be non-negative)", r)
-			return -1
+			return false
 		} else if tp != nil && tp.Bound > 0 && Mpgetfix(r.Val().U.(*Mpint)) > tp.Bound {
 			Yyerror("invalid slice index %v (out of bounds for %d-element array)", r, tp.Bound)
-			return -1
+			return false
 		} else if Isconst(l, CTSTR) && Mpgetfix(r.Val().U.(*Mpint)) > int64(len(l.Val().U.(string))) {
 			Yyerror("invalid slice index %v (out of bounds for %d-byte string)", r, len(l.Val().U.(string)))
-			return -1
+			return false
 		} else if Mpcmpfixfix(r.Val().U.(*Mpint), Maxintval[TINT]) > 0 {
 			Yyerror("invalid slice index %v (index too large)", r)
-			return -1
+			return false
 		}
 	}
 
-	return 0
+	return true
 }
 
-func checksliceconst(lo *Node, hi *Node) int {
+func checksliceconst(lo *Node, hi *Node) bool {
 	if lo != nil && hi != nil && lo.Op == OLITERAL && hi.Op == OLITERAL && Mpcmpfixfix(lo.Val().U.(*Mpint), hi.Val().U.(*Mpint)) > 0 {
 		Yyerror("invalid slice index: %v > %v", lo, hi)
-		return -1
+		return false
 	}
 
-	return 0
+	return true
 }
 
 func checkdefergo(n *Node) {
@@ -2341,14 +2339,14 @@
 	*nn = n
 }
 
-func onearg(n *Node, f string, args ...interface{}) int {
+func onearg(n *Node, f string, args ...interface{}) bool {
 	if n.Left != nil {
-		return 0
+		return true
 	}
 	if n.List == nil {
 		p := fmt.Sprintf(f, args...)
 		Yyerror("missing argument to %s: %v", p, n)
-		return -1
+		return false
 	}
 
 	if n.List.Next != nil {
@@ -2356,39 +2354,39 @@
 		Yyerror("too many arguments to %s: %v", p, n)
 		n.Left = n.List.N
 		n.List = nil
-		return -1
+		return false
 	}
 
 	n.Left = n.List.N
 	n.List = nil
-	return 0
+	return true
 }
 
-func twoarg(n *Node) int {
+func twoarg(n *Node) bool {
 	if n.Left != nil {
-		return 0
+		return true
 	}
 	if n.List == nil {
 		Yyerror("missing argument to %v - %v", Oconv(int(n.Op), 0), n)
-		return -1
+		return false
 	}
 
 	n.Left = n.List.N
 	if n.List.Next == nil {
 		Yyerror("missing argument to %v - %v", Oconv(int(n.Op), 0), n)
 		n.List = nil
-		return -1
+		return false
 	}
 
 	if n.List.Next.Next != nil {
 		Yyerror("too many arguments to %v - %v", Oconv(int(n.Op), 0), n)
 		n.List = nil
-		return -1
+		return false
 	}
 
 	n.Right = n.List.Next.N
 	n.List = nil
-	return 0
+	return true
 }
 
 func lookdot1(errnode *Node, s *Sym, t *Type, f *Type, dostrcmp int) *Type {
@@ -2849,20 +2847,25 @@
 	for _, a := range hash[h] {
 		cmp.Op = OEQ
 		cmp.Left = n
-		b := uint32(0)
+		b := false
 		if a.Op == OCONVIFACE && orign.Op == OCONVIFACE {
 			if Eqtype(a.Left.Type, n.Type) {
 				cmp.Right = a.Left
 				evconst(&cmp)
-				b = uint32(obj.Bool2int(cmp.Val().U.(bool)))
+				if cmp.Op == OLITERAL {
+					// Sometimes evconst fails.  See issue 12536.
+					b = cmp.Val().U.(bool)
+				}
 			}
 		} else if Eqtype(a.Type, n.Type) {
 			cmp.Right = a
 			evconst(&cmp)
-			b = uint32(obj.Bool2int(cmp.Val().U.(bool)))
+			if cmp.Op == OLITERAL {
+				b = cmp.Val().U.(bool)
+			}
 		}
 
-		if b != 0 {
+		if b {
 			Yyerror("duplicate key %v in map literal", n)
 			return
 		}
@@ -3539,7 +3542,7 @@
 func copytype(n *Node, t *Type) {
 	if t.Etype == TFORW {
 		// This type isn't computed yet; when it is, update n.
-		t.Copyto = list(t.Copyto, n)
+		t.Copyto = append(t.Copyto, n)
 
 		return
 	}
@@ -3564,8 +3567,8 @@
 	t.Copyto = nil
 
 	// Update nodes waiting on this type.
-	for ; l != nil; l = l.Next {
-		copytype(l.N, t)
+	for _, n := range l {
+		copytype(n, t)
 	}
 
 	// Double-check use of type as embedded type.
@@ -3674,16 +3677,13 @@
 		return n
 	}
 
-	l := new(NodeList)
-	l.N = n
-	l.Next = typecheckdefstack
-	typecheckdefstack = l
-
+	typecheckdefstack = append(typecheckdefstack, n)
 	if n.Walkdef == 2 {
 		Flusherrors()
 		fmt.Printf("typecheckdef loop:")
-		for l := typecheckdefstack; l != nil; l = l.Next {
-			fmt.Printf(" %v", l.N.Sym)
+		for i := len(typecheckdefstack) - 1; i >= 0; i-- {
+			n := typecheckdefstack[i]
+			fmt.Printf(" %v", n.Sym)
 		}
 		fmt.Printf("\n")
 		Fatalf("typecheckdef loop")
@@ -3819,37 +3819,38 @@
 	if n.Op != OLITERAL && n.Type != nil && isideal(n.Type) {
 		Fatalf("got %v for %v", n.Type, n)
 	}
-	if typecheckdefstack.N != n {
+	last := len(typecheckdefstack) - 1
+	if typecheckdefstack[last] != n {
 		Fatalf("typecheckdefstack mismatch")
 	}
-	l = typecheckdefstack
-	typecheckdefstack = l.Next
+	typecheckdefstack[last] = nil
+	typecheckdefstack = typecheckdefstack[:last]
 
 	lineno = int32(lno)
 	n.Walkdef = 1
 	return n
 }
 
-func checkmake(t *Type, arg string, n *Node) int {
+func checkmake(t *Type, arg string, n *Node) bool {
 	if n.Op == OLITERAL {
 		switch n.Val().Ctype() {
 		case CTINT, CTRUNE, CTFLT, CTCPLX:
 			n.SetVal(toint(n.Val()))
 			if mpcmpfixc(n.Val().U.(*Mpint), 0) < 0 {
 				Yyerror("negative %s argument in make(%v)", arg, t)
-				return -1
+				return false
 			}
 
 			if Mpcmpfixfix(n.Val().U.(*Mpint), Maxintval[TINT]) > 0 {
 				Yyerror("%s argument too large in make(%v)", arg, t)
-				return -1
+				return false
 			}
 
 			// Delay defaultlit until after we've checked range, to avoid
 			// a redundant "constant NNN overflows int" error.
 			defaultlit(&n, Types[TINT])
 
-			return 0
+			return true
 
 		default:
 			break
@@ -3858,13 +3859,13 @@
 
 	if !Isint[n.Type.Etype] && n.Type.Etype != TIDEAL {
 		Yyerror("non-integer %s argument in make(%v) - %v", arg, t, n.Type)
-		return -1
+		return false
 	}
 
 	// Defaultlit still necessary for non-constant: n might be 1<<k.
 	defaultlit(&n, Types[TINT])
 
-	return 0
+	return true
 }
 
 func markbreak(n *Node, implicit *Node) {
diff --git a/src/cmd/compile/internal/gc/util.go b/src/cmd/compile/internal/gc/util.go
index 8620e0b..6533c9a 100644
--- a/src/cmd/compile/internal/gc/util.go
+++ b/src/cmd/compile/internal/gc/util.go
@@ -5,7 +5,6 @@
 	"runtime"
 	"runtime/pprof"
 	"strconv"
-	"strings"
 )
 
 func (n *Node) Line() string {
@@ -18,41 +17,6 @@
 	return int(n)
 }
 
-func isalnum(c int) bool {
-	return isalpha(c) || isdigit(c)
-}
-
-func isalpha(c int) bool {
-	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
-}
-
-func isdigit(c int) bool {
-	return '0' <= c && c <= '9'
-}
-
-func plan9quote(s string) string {
-	if s == "" {
-		return "'" + strings.Replace(s, "'", "''", -1) + "'"
-	}
-	for i := 0; i < len(s); i++ {
-		if s[i] <= ' ' || s[i] == '\'' {
-			return "'" + strings.Replace(s, "'", "''", -1) + "'"
-		}
-	}
-	return s
-}
-
-// strings.Compare, introduced in Go 1.5.
-func stringsCompare(a, b string) int {
-	if a == b {
-		return 0
-	}
-	if a < b {
-		return -1
-	}
-	return +1
-}
-
 var atExitFuncs []func()
 
 func AtExit(f func()) {
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 27890f2..22d4478 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -1356,7 +1356,7 @@
 		}
 
 		// s + "badgerbadgerbadger" == "badgerbadgerbadger"
-		if (n.Etype == OEQ || n.Etype == ONE) && Isconst(n.Right, CTSTR) && n.Left.Op == OADDSTR && count(n.Left.List) == 2 && Isconst(n.Left.List.Next.N, CTSTR) && cmpslit(n.Right, n.Left.List.Next.N) == 0 {
+		if (n.Etype == OEQ || n.Etype == ONE) && Isconst(n.Right, CTSTR) && n.Left.Op == OADDSTR && count(n.Left.List) == 2 && Isconst(n.Left.List.Next.N, CTSTR) && strlit(n.Right) == strlit(n.Left.List.Next.N) {
 			r := Nod(int(n.Etype), Nod(OLEN, n.Left.List.N, nil), Nodintconst(0))
 			typecheck(&r, Erv)
 			walkexpr(&r, init)
@@ -2225,8 +2225,6 @@
 
 // TODO(rsc): Perhaps componentgen should run before this.
 
-var applywritebarrier_bv Bvec
-
 func applywritebarrier(n *Node, init **NodeList) *Node {
 	if n.Left != nil && n.Right != nil && needwritebarrier(n.Left, n.Right) {
 		if Debug_wb > 1 {
diff --git a/src/cmd/compile/internal/gc/y.go b/src/cmd/compile/internal/gc/y.go
index fafbdf1..dfb0fa4 100644
--- a/src/cmd/compile/internal/gc/y.go
+++ b/src/cmd/compile/internal/gc/y.go
@@ -160,7 +160,7 @@
 	// set up for another one now that we're done.
 	// See comment in lex.C about loophack.
 	if lbr == LBODY {
-		loophack = 1
+		loophack = true
 	}
 }
 
diff --git a/src/cmd/compile/internal/mips64/gsubr.go b/src/cmd/compile/internal/mips64/gsubr.go
index 4ef928c..dde05c4 100644
--- a/src/cmd/compile/internal/mips64/gsubr.go
+++ b/src/cmd/compile/internal/mips64/gsubr.go
@@ -545,30 +545,18 @@
 	return
 }
 
-func intLiteral(n *gc.Node) (x int64, ok bool) {
-	switch {
-	case n == nil:
-		return
-	case gc.Isconst(n, gc.CTINT):
-		return n.Int(), true
-	case gc.Isconst(n, gc.CTBOOL):
-		return int64(obj.Bool2int(n.Bool())), true
-	}
-	return
-}
-
 // gins is called by the front end.
 // It synthesizes some multiple-instruction sequences
 // so the front end can stay simpler.
 func gins(as int, f, t *gc.Node) *obj.Prog {
 	if as >= obj.A_ARCHSPECIFIC {
-		if x, ok := intLiteral(f); ok {
+		if x, ok := f.IntLiteral(); ok {
 			ginscon(as, x, t)
 			return nil // caller must not use
 		}
 	}
 	if as == ppc64.ACMP || as == ppc64.ACMPU {
-		if x, ok := intLiteral(t); ok {
+		if x, ok := t.IntLiteral(); ok {
 			ginscon2(as, f, x)
 			return nil // caller must not use
 		}
diff --git a/src/cmd/compile/internal/ppc64/ggen.go b/src/cmd/compile/internal/ppc64/ggen.go
index 173e2f0..d0bdebb 100644
--- a/src/cmd/compile/internal/ppc64/ggen.go
+++ b/src/cmd/compile/internal/ppc64/ggen.go
@@ -69,13 +69,10 @@
 	}
 	if cnt < int64(4*gc.Widthptr) {
 		for i := int64(0); i < cnt; i += int64(gc.Widthptr) {
-			p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, 8+frame+lo+i)
+			p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, gc.Ctxt.FixedFrameSize()+frame+lo+i)
 		}
-		// TODO(dfc): https://golang.org/issue/12108
-		// If DUFFZERO is used inside a tail call (see genwrapper) it will
-		// overwrite the link register.
-	} else if false && cnt <= int64(128*gc.Widthptr) {
-		p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0)
+	} else if cnt <= int64(128*gc.Widthptr) {
+		p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, gc.Ctxt.FixedFrameSize()+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0)
 		p.Reg = ppc64.REGSP
 		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
 		f := gc.Sysfunc("duffzero")
@@ -83,7 +80,7 @@
 		gc.Afunclit(&p.To, f)
 		p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr))
 	} else {
-		p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0)
+		p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, gc.Ctxt.FixedFrameSize()+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0)
 		p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0)
 		p.Reg = ppc64.REGSP
 		p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0)
@@ -443,10 +440,7 @@
 
 		// The loop leaves R3 on the last zeroed dword
 		boff = 8
-		// TODO(dfc): https://golang.org/issue/12108
-		// If DUFFZERO is used inside a tail call (see genwrapper) it will
-		// overwrite the link register.
-	} else if false && q >= 4 {
+	} else if q >= 4 {
 		p := gins(ppc64.ASUB, nil, &dst)
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = 8
diff --git a/src/cmd/compile/internal/ppc64/gsubr.go b/src/cmd/compile/internal/ppc64/gsubr.go
index 4ef928c..dde05c4 100644
--- a/src/cmd/compile/internal/ppc64/gsubr.go
+++ b/src/cmd/compile/internal/ppc64/gsubr.go
@@ -545,30 +545,18 @@
 	return
 }
 
-func intLiteral(n *gc.Node) (x int64, ok bool) {
-	switch {
-	case n == nil:
-		return
-	case gc.Isconst(n, gc.CTINT):
-		return n.Int(), true
-	case gc.Isconst(n, gc.CTBOOL):
-		return int64(obj.Bool2int(n.Bool())), true
-	}
-	return
-}
-
 // gins is called by the front end.
 // It synthesizes some multiple-instruction sequences
 // so the front end can stay simpler.
 func gins(as int, f, t *gc.Node) *obj.Prog {
 	if as >= obj.A_ARCHSPECIFIC {
-		if x, ok := intLiteral(f); ok {
+		if x, ok := f.IntLiteral(); ok {
 			ginscon(as, x, t)
 			return nil // caller must not use
 		}
 	}
 	if as == ppc64.ACMP || as == ppc64.ACMPU {
-		if x, ok := intLiteral(t); ok {
+		if x, ok := t.IntLiteral(); ok {
 			ginscon2(as, f, x)
 			return nil // caller must not use
 		}
diff --git a/src/cmd/compile/internal/ppc64/prog.go b/src/cmd/compile/internal/ppc64/prog.go
index 9b8719b..92293be 100644
--- a/src/cmd/compile/internal/ppc64/prog.go
+++ b/src/cmd/compile/internal/ppc64/prog.go
@@ -183,89 +183,89 @@
 // for the "base" form of each instruction.  On the first call to
 // as2variant or variant2as, we'll add the variants to the table.
 var varianttable = [ppc64.ALAST][4]int{
-	ppc64.AADD:     [4]int{ppc64.AADD, ppc64.AADDCC, ppc64.AADDV, ppc64.AADDVCC},
-	ppc64.AADDC:    [4]int{ppc64.AADDC, ppc64.AADDCCC, ppc64.AADDCV, ppc64.AADDCVCC},
-	ppc64.AADDE:    [4]int{ppc64.AADDE, ppc64.AADDECC, ppc64.AADDEV, ppc64.AADDEVCC},
-	ppc64.AADDME:   [4]int{ppc64.AADDME, ppc64.AADDMECC, ppc64.AADDMEV, ppc64.AADDMEVCC},
-	ppc64.AADDZE:   [4]int{ppc64.AADDZE, ppc64.AADDZECC, ppc64.AADDZEV, ppc64.AADDZEVCC},
-	ppc64.AAND:     [4]int{ppc64.AAND, ppc64.AANDCC, 0, 0},
-	ppc64.AANDN:    [4]int{ppc64.AANDN, ppc64.AANDNCC, 0, 0},
-	ppc64.ACNTLZD:  [4]int{ppc64.ACNTLZD, ppc64.ACNTLZDCC, 0, 0},
-	ppc64.ACNTLZW:  [4]int{ppc64.ACNTLZW, ppc64.ACNTLZWCC, 0, 0},
-	ppc64.ADIVD:    [4]int{ppc64.ADIVD, ppc64.ADIVDCC, ppc64.ADIVDV, ppc64.ADIVDVCC},
-	ppc64.ADIVDU:   [4]int{ppc64.ADIVDU, ppc64.ADIVDUCC, ppc64.ADIVDUV, ppc64.ADIVDUVCC},
-	ppc64.ADIVW:    [4]int{ppc64.ADIVW, ppc64.ADIVWCC, ppc64.ADIVWV, ppc64.ADIVWVCC},
-	ppc64.ADIVWU:   [4]int{ppc64.ADIVWU, ppc64.ADIVWUCC, ppc64.ADIVWUV, ppc64.ADIVWUVCC},
-	ppc64.AEQV:     [4]int{ppc64.AEQV, ppc64.AEQVCC, 0, 0},
-	ppc64.AEXTSB:   [4]int{ppc64.AEXTSB, ppc64.AEXTSBCC, 0, 0},
-	ppc64.AEXTSH:   [4]int{ppc64.AEXTSH, ppc64.AEXTSHCC, 0, 0},
-	ppc64.AEXTSW:   [4]int{ppc64.AEXTSW, ppc64.AEXTSWCC, 0, 0},
-	ppc64.AFABS:    [4]int{ppc64.AFABS, ppc64.AFABSCC, 0, 0},
-	ppc64.AFADD:    [4]int{ppc64.AFADD, ppc64.AFADDCC, 0, 0},
-	ppc64.AFADDS:   [4]int{ppc64.AFADDS, ppc64.AFADDSCC, 0, 0},
-	ppc64.AFCFID:   [4]int{ppc64.AFCFID, ppc64.AFCFIDCC, 0, 0},
-	ppc64.AFCTID:   [4]int{ppc64.AFCTID, ppc64.AFCTIDCC, 0, 0},
-	ppc64.AFCTIDZ:  [4]int{ppc64.AFCTIDZ, ppc64.AFCTIDZCC, 0, 0},
-	ppc64.AFCTIW:   [4]int{ppc64.AFCTIW, ppc64.AFCTIWCC, 0, 0},
-	ppc64.AFCTIWZ:  [4]int{ppc64.AFCTIWZ, ppc64.AFCTIWZCC, 0, 0},
-	ppc64.AFDIV:    [4]int{ppc64.AFDIV, ppc64.AFDIVCC, 0, 0},
-	ppc64.AFDIVS:   [4]int{ppc64.AFDIVS, ppc64.AFDIVSCC, 0, 0},
-	ppc64.AFMADD:   [4]int{ppc64.AFMADD, ppc64.AFMADDCC, 0, 0},
-	ppc64.AFMADDS:  [4]int{ppc64.AFMADDS, ppc64.AFMADDSCC, 0, 0},
-	ppc64.AFMOVD:   [4]int{ppc64.AFMOVD, ppc64.AFMOVDCC, 0, 0},
-	ppc64.AFMSUB:   [4]int{ppc64.AFMSUB, ppc64.AFMSUBCC, 0, 0},
-	ppc64.AFMSUBS:  [4]int{ppc64.AFMSUBS, ppc64.AFMSUBSCC, 0, 0},
-	ppc64.AFMUL:    [4]int{ppc64.AFMUL, ppc64.AFMULCC, 0, 0},
-	ppc64.AFMULS:   [4]int{ppc64.AFMULS, ppc64.AFMULSCC, 0, 0},
-	ppc64.AFNABS:   [4]int{ppc64.AFNABS, ppc64.AFNABSCC, 0, 0},
-	ppc64.AFNEG:    [4]int{ppc64.AFNEG, ppc64.AFNEGCC, 0, 0},
-	ppc64.AFNMADD:  [4]int{ppc64.AFNMADD, ppc64.AFNMADDCC, 0, 0},
-	ppc64.AFNMADDS: [4]int{ppc64.AFNMADDS, ppc64.AFNMADDSCC, 0, 0},
-	ppc64.AFNMSUB:  [4]int{ppc64.AFNMSUB, ppc64.AFNMSUBCC, 0, 0},
-	ppc64.AFNMSUBS: [4]int{ppc64.AFNMSUBS, ppc64.AFNMSUBSCC, 0, 0},
-	ppc64.AFRES:    [4]int{ppc64.AFRES, ppc64.AFRESCC, 0, 0},
-	ppc64.AFRSP:    [4]int{ppc64.AFRSP, ppc64.AFRSPCC, 0, 0},
-	ppc64.AFRSQRTE: [4]int{ppc64.AFRSQRTE, ppc64.AFRSQRTECC, 0, 0},
-	ppc64.AFSEL:    [4]int{ppc64.AFSEL, ppc64.AFSELCC, 0, 0},
-	ppc64.AFSQRT:   [4]int{ppc64.AFSQRT, ppc64.AFSQRTCC, 0, 0},
-	ppc64.AFSQRTS:  [4]int{ppc64.AFSQRTS, ppc64.AFSQRTSCC, 0, 0},
-	ppc64.AFSUB:    [4]int{ppc64.AFSUB, ppc64.AFSUBCC, 0, 0},
-	ppc64.AFSUBS:   [4]int{ppc64.AFSUBS, ppc64.AFSUBSCC, 0, 0},
-	ppc64.AMTFSB0:  [4]int{ppc64.AMTFSB0, ppc64.AMTFSB0CC, 0, 0},
-	ppc64.AMTFSB1:  [4]int{ppc64.AMTFSB1, ppc64.AMTFSB1CC, 0, 0},
-	ppc64.AMULHD:   [4]int{ppc64.AMULHD, ppc64.AMULHDCC, 0, 0},
-	ppc64.AMULHDU:  [4]int{ppc64.AMULHDU, ppc64.AMULHDUCC, 0, 0},
-	ppc64.AMULHW:   [4]int{ppc64.AMULHW, ppc64.AMULHWCC, 0, 0},
-	ppc64.AMULHWU:  [4]int{ppc64.AMULHWU, ppc64.AMULHWUCC, 0, 0},
-	ppc64.AMULLD:   [4]int{ppc64.AMULLD, ppc64.AMULLDCC, ppc64.AMULLDV, ppc64.AMULLDVCC},
-	ppc64.AMULLW:   [4]int{ppc64.AMULLW, ppc64.AMULLWCC, ppc64.AMULLWV, ppc64.AMULLWVCC},
-	ppc64.ANAND:    [4]int{ppc64.ANAND, ppc64.ANANDCC, 0, 0},
-	ppc64.ANEG:     [4]int{ppc64.ANEG, ppc64.ANEGCC, ppc64.ANEGV, ppc64.ANEGVCC},
-	ppc64.ANOR:     [4]int{ppc64.ANOR, ppc64.ANORCC, 0, 0},
-	ppc64.AOR:      [4]int{ppc64.AOR, ppc64.AORCC, 0, 0},
-	ppc64.AORN:     [4]int{ppc64.AORN, ppc64.AORNCC, 0, 0},
-	ppc64.AREM:     [4]int{ppc64.AREM, ppc64.AREMCC, ppc64.AREMV, ppc64.AREMVCC},
-	ppc64.AREMD:    [4]int{ppc64.AREMD, ppc64.AREMDCC, ppc64.AREMDV, ppc64.AREMDVCC},
-	ppc64.AREMDU:   [4]int{ppc64.AREMDU, ppc64.AREMDUCC, ppc64.AREMDUV, ppc64.AREMDUVCC},
-	ppc64.AREMU:    [4]int{ppc64.AREMU, ppc64.AREMUCC, ppc64.AREMUV, ppc64.AREMUVCC},
-	ppc64.ARLDC:    [4]int{ppc64.ARLDC, ppc64.ARLDCCC, 0, 0},
-	ppc64.ARLDCL:   [4]int{ppc64.ARLDCL, ppc64.ARLDCLCC, 0, 0},
-	ppc64.ARLDCR:   [4]int{ppc64.ARLDCR, ppc64.ARLDCRCC, 0, 0},
-	ppc64.ARLDMI:   [4]int{ppc64.ARLDMI, ppc64.ARLDMICC, 0, 0},
-	ppc64.ARLWMI:   [4]int{ppc64.ARLWMI, ppc64.ARLWMICC, 0, 0},
-	ppc64.ARLWNM:   [4]int{ppc64.ARLWNM, ppc64.ARLWNMCC, 0, 0},
-	ppc64.ASLD:     [4]int{ppc64.ASLD, ppc64.ASLDCC, 0, 0},
-	ppc64.ASLW:     [4]int{ppc64.ASLW, ppc64.ASLWCC, 0, 0},
-	ppc64.ASRAD:    [4]int{ppc64.ASRAD, ppc64.ASRADCC, 0, 0},
-	ppc64.ASRAW:    [4]int{ppc64.ASRAW, ppc64.ASRAWCC, 0, 0},
-	ppc64.ASRD:     [4]int{ppc64.ASRD, ppc64.ASRDCC, 0, 0},
-	ppc64.ASRW:     [4]int{ppc64.ASRW, ppc64.ASRWCC, 0, 0},
-	ppc64.ASUB:     [4]int{ppc64.ASUB, ppc64.ASUBCC, ppc64.ASUBV, ppc64.ASUBVCC},
-	ppc64.ASUBC:    [4]int{ppc64.ASUBC, ppc64.ASUBCCC, ppc64.ASUBCV, ppc64.ASUBCVCC},
-	ppc64.ASUBE:    [4]int{ppc64.ASUBE, ppc64.ASUBECC, ppc64.ASUBEV, ppc64.ASUBEVCC},
-	ppc64.ASUBME:   [4]int{ppc64.ASUBME, ppc64.ASUBMECC, ppc64.ASUBMEV, ppc64.ASUBMEVCC},
-	ppc64.ASUBZE:   [4]int{ppc64.ASUBZE, ppc64.ASUBZECC, ppc64.ASUBZEV, ppc64.ASUBZEVCC},
-	ppc64.AXOR:     [4]int{ppc64.AXOR, ppc64.AXORCC, 0, 0},
+	ppc64.AADD:     {ppc64.AADD, ppc64.AADDCC, ppc64.AADDV, ppc64.AADDVCC},
+	ppc64.AADDC:    {ppc64.AADDC, ppc64.AADDCCC, ppc64.AADDCV, ppc64.AADDCVCC},
+	ppc64.AADDE:    {ppc64.AADDE, ppc64.AADDECC, ppc64.AADDEV, ppc64.AADDEVCC},
+	ppc64.AADDME:   {ppc64.AADDME, ppc64.AADDMECC, ppc64.AADDMEV, ppc64.AADDMEVCC},
+	ppc64.AADDZE:   {ppc64.AADDZE, ppc64.AADDZECC, ppc64.AADDZEV, ppc64.AADDZEVCC},
+	ppc64.AAND:     {ppc64.AAND, ppc64.AANDCC, 0, 0},
+	ppc64.AANDN:    {ppc64.AANDN, ppc64.AANDNCC, 0, 0},
+	ppc64.ACNTLZD:  {ppc64.ACNTLZD, ppc64.ACNTLZDCC, 0, 0},
+	ppc64.ACNTLZW:  {ppc64.ACNTLZW, ppc64.ACNTLZWCC, 0, 0},
+	ppc64.ADIVD:    {ppc64.ADIVD, ppc64.ADIVDCC, ppc64.ADIVDV, ppc64.ADIVDVCC},
+	ppc64.ADIVDU:   {ppc64.ADIVDU, ppc64.ADIVDUCC, ppc64.ADIVDUV, ppc64.ADIVDUVCC},
+	ppc64.ADIVW:    {ppc64.ADIVW, ppc64.ADIVWCC, ppc64.ADIVWV, ppc64.ADIVWVCC},
+	ppc64.ADIVWU:   {ppc64.ADIVWU, ppc64.ADIVWUCC, ppc64.ADIVWUV, ppc64.ADIVWUVCC},
+	ppc64.AEQV:     {ppc64.AEQV, ppc64.AEQVCC, 0, 0},
+	ppc64.AEXTSB:   {ppc64.AEXTSB, ppc64.AEXTSBCC, 0, 0},
+	ppc64.AEXTSH:   {ppc64.AEXTSH, ppc64.AEXTSHCC, 0, 0},
+	ppc64.AEXTSW:   {ppc64.AEXTSW, ppc64.AEXTSWCC, 0, 0},
+	ppc64.AFABS:    {ppc64.AFABS, ppc64.AFABSCC, 0, 0},
+	ppc64.AFADD:    {ppc64.AFADD, ppc64.AFADDCC, 0, 0},
+	ppc64.AFADDS:   {ppc64.AFADDS, ppc64.AFADDSCC, 0, 0},
+	ppc64.AFCFID:   {ppc64.AFCFID, ppc64.AFCFIDCC, 0, 0},
+	ppc64.AFCTID:   {ppc64.AFCTID, ppc64.AFCTIDCC, 0, 0},
+	ppc64.AFCTIDZ:  {ppc64.AFCTIDZ, ppc64.AFCTIDZCC, 0, 0},
+	ppc64.AFCTIW:   {ppc64.AFCTIW, ppc64.AFCTIWCC, 0, 0},
+	ppc64.AFCTIWZ:  {ppc64.AFCTIWZ, ppc64.AFCTIWZCC, 0, 0},
+	ppc64.AFDIV:    {ppc64.AFDIV, ppc64.AFDIVCC, 0, 0},
+	ppc64.AFDIVS:   {ppc64.AFDIVS, ppc64.AFDIVSCC, 0, 0},
+	ppc64.AFMADD:   {ppc64.AFMADD, ppc64.AFMADDCC, 0, 0},
+	ppc64.AFMADDS:  {ppc64.AFMADDS, ppc64.AFMADDSCC, 0, 0},
+	ppc64.AFMOVD:   {ppc64.AFMOVD, ppc64.AFMOVDCC, 0, 0},
+	ppc64.AFMSUB:   {ppc64.AFMSUB, ppc64.AFMSUBCC, 0, 0},
+	ppc64.AFMSUBS:  {ppc64.AFMSUBS, ppc64.AFMSUBSCC, 0, 0},
+	ppc64.AFMUL:    {ppc64.AFMUL, ppc64.AFMULCC, 0, 0},
+	ppc64.AFMULS:   {ppc64.AFMULS, ppc64.AFMULSCC, 0, 0},
+	ppc64.AFNABS:   {ppc64.AFNABS, ppc64.AFNABSCC, 0, 0},
+	ppc64.AFNEG:    {ppc64.AFNEG, ppc64.AFNEGCC, 0, 0},
+	ppc64.AFNMADD:  {ppc64.AFNMADD, ppc64.AFNMADDCC, 0, 0},
+	ppc64.AFNMADDS: {ppc64.AFNMADDS, ppc64.AFNMADDSCC, 0, 0},
+	ppc64.AFNMSUB:  {ppc64.AFNMSUB, ppc64.AFNMSUBCC, 0, 0},
+	ppc64.AFNMSUBS: {ppc64.AFNMSUBS, ppc64.AFNMSUBSCC, 0, 0},
+	ppc64.AFRES:    {ppc64.AFRES, ppc64.AFRESCC, 0, 0},
+	ppc64.AFRSP:    {ppc64.AFRSP, ppc64.AFRSPCC, 0, 0},
+	ppc64.AFRSQRTE: {ppc64.AFRSQRTE, ppc64.AFRSQRTECC, 0, 0},
+	ppc64.AFSEL:    {ppc64.AFSEL, ppc64.AFSELCC, 0, 0},
+	ppc64.AFSQRT:   {ppc64.AFSQRT, ppc64.AFSQRTCC, 0, 0},
+	ppc64.AFSQRTS:  {ppc64.AFSQRTS, ppc64.AFSQRTSCC, 0, 0},
+	ppc64.AFSUB:    {ppc64.AFSUB, ppc64.AFSUBCC, 0, 0},
+	ppc64.AFSUBS:   {ppc64.AFSUBS, ppc64.AFSUBSCC, 0, 0},
+	ppc64.AMTFSB0:  {ppc64.AMTFSB0, ppc64.AMTFSB0CC, 0, 0},
+	ppc64.AMTFSB1:  {ppc64.AMTFSB1, ppc64.AMTFSB1CC, 0, 0},
+	ppc64.AMULHD:   {ppc64.AMULHD, ppc64.AMULHDCC, 0, 0},
+	ppc64.AMULHDU:  {ppc64.AMULHDU, ppc64.AMULHDUCC, 0, 0},
+	ppc64.AMULHW:   {ppc64.AMULHW, ppc64.AMULHWCC, 0, 0},
+	ppc64.AMULHWU:  {ppc64.AMULHWU, ppc64.AMULHWUCC, 0, 0},
+	ppc64.AMULLD:   {ppc64.AMULLD, ppc64.AMULLDCC, ppc64.AMULLDV, ppc64.AMULLDVCC},
+	ppc64.AMULLW:   {ppc64.AMULLW, ppc64.AMULLWCC, ppc64.AMULLWV, ppc64.AMULLWVCC},
+	ppc64.ANAND:    {ppc64.ANAND, ppc64.ANANDCC, 0, 0},
+	ppc64.ANEG:     {ppc64.ANEG, ppc64.ANEGCC, ppc64.ANEGV, ppc64.ANEGVCC},
+	ppc64.ANOR:     {ppc64.ANOR, ppc64.ANORCC, 0, 0},
+	ppc64.AOR:      {ppc64.AOR, ppc64.AORCC, 0, 0},
+	ppc64.AORN:     {ppc64.AORN, ppc64.AORNCC, 0, 0},
+	ppc64.AREM:     {ppc64.AREM, ppc64.AREMCC, ppc64.AREMV, ppc64.AREMVCC},
+	ppc64.AREMD:    {ppc64.AREMD, ppc64.AREMDCC, ppc64.AREMDV, ppc64.AREMDVCC},
+	ppc64.AREMDU:   {ppc64.AREMDU, ppc64.AREMDUCC, ppc64.AREMDUV, ppc64.AREMDUVCC},
+	ppc64.AREMU:    {ppc64.AREMU, ppc64.AREMUCC, ppc64.AREMUV, ppc64.AREMUVCC},
+	ppc64.ARLDC:    {ppc64.ARLDC, ppc64.ARLDCCC, 0, 0},
+	ppc64.ARLDCL:   {ppc64.ARLDCL, ppc64.ARLDCLCC, 0, 0},
+	ppc64.ARLDCR:   {ppc64.ARLDCR, ppc64.ARLDCRCC, 0, 0},
+	ppc64.ARLDMI:   {ppc64.ARLDMI, ppc64.ARLDMICC, 0, 0},
+	ppc64.ARLWMI:   {ppc64.ARLWMI, ppc64.ARLWMICC, 0, 0},
+	ppc64.ARLWNM:   {ppc64.ARLWNM, ppc64.ARLWNMCC, 0, 0},
+	ppc64.ASLD:     {ppc64.ASLD, ppc64.ASLDCC, 0, 0},
+	ppc64.ASLW:     {ppc64.ASLW, ppc64.ASLWCC, 0, 0},
+	ppc64.ASRAD:    {ppc64.ASRAD, ppc64.ASRADCC, 0, 0},
+	ppc64.ASRAW:    {ppc64.ASRAW, ppc64.ASRAWCC, 0, 0},
+	ppc64.ASRD:     {ppc64.ASRD, ppc64.ASRDCC, 0, 0},
+	ppc64.ASRW:     {ppc64.ASRW, ppc64.ASRWCC, 0, 0},
+	ppc64.ASUB:     {ppc64.ASUB, ppc64.ASUBCC, ppc64.ASUBV, ppc64.ASUBVCC},
+	ppc64.ASUBC:    {ppc64.ASUBC, ppc64.ASUBCCC, ppc64.ASUBCV, ppc64.ASUBCVCC},
+	ppc64.ASUBE:    {ppc64.ASUBE, ppc64.ASUBECC, ppc64.ASUBEV, ppc64.ASUBEVCC},
+	ppc64.ASUBME:   {ppc64.ASUBME, ppc64.ASUBMECC, ppc64.ASUBMEV, ppc64.ASUBMEVCC},
+	ppc64.ASUBZE:   {ppc64.ASUBZE, ppc64.ASUBZECC, ppc64.ASUBZEV, ppc64.ASUBZEVCC},
+	ppc64.AXOR:     {ppc64.AXOR, ppc64.AXORCC, 0, 0},
 }
 
 var initvariants_initialized int
diff --git a/src/cmd/compile/internal/ppc64/reg.go b/src/cmd/compile/internal/ppc64/reg.go
index fa1cb71..a301836 100644
--- a/src/cmd/compile/internal/ppc64/reg.go
+++ b/src/cmd/compile/internal/ppc64/reg.go
@@ -111,7 +111,7 @@
 
 func excludedregs() uint64 {
 	// Exclude registers with fixed functions
-	regbits := uint64(1<<0 | RtoB(ppc64.REGSP) | RtoB(ppc64.REGG) | RtoB(ppc64.REGTLS))
+	regbits := uint64(1<<0 | RtoB(ppc64.REGSP) | RtoB(ppc64.REGG) | RtoB(ppc64.REGTLS) | RtoB(ppc64.REGTMP))
 
 	// Also exclude floating point registers with fixed constants
 	regbits |= RtoB(ppc64.REG_F27) | RtoB(ppc64.REG_F28) | RtoB(ppc64.REG_F29) | RtoB(ppc64.REG_F30) | RtoB(ppc64.REG_F31)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index f160ce8..ffa5fea 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -602,8 +602,10 @@
 				(MOVQstore destptr (MOVQconst [0]) mem))))
 
 // Medium zeroing uses a duff device.
-(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 ->
-	(DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVQconst [0]) mem)
+(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 && size%16 != 0 ->
+	(Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
+(Zero [size] destptr mem) && size <= 1024 && size%16 == 0 ->
+	(DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVOconst [0]) mem)
 
 // Large zeroing uses REP STOSQ.
 (Zero [size] destptr mem) && size > 1024 && size%8 == 0 ->
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 5d171dc..c9b7f31 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -388,10 +388,11 @@
 		{
 			name: "DUFFZERO",
 			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("AX")},
+				inputs:   []regMask{buildReg("DI"), buildReg("X0")},
 				clobbers: buildReg("DI FLAGS"),
 			},
 		},
+		{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Float64"},
 
 		// arg0 = address of memory to zero
 		// arg1 = # of 8-byte words to zero
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index c52ef2d..d86dce3 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -261,6 +261,7 @@
 	OpAMD64MOVQstore
 	OpAMD64MOVQstoreidx8
 	OpAMD64DUFFZERO
+	OpAMD64MOVOconst
 	OpAMD64REPSTOSQ
 	OpAMD64CALLstatic
 	OpAMD64CALLclosure
@@ -3039,13 +3040,21 @@
 		name: "DUFFZERO",
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 128}, // .DI
-				{1, 1},   // .AX
+				{0, 128},   // .DI
+				{1, 65536}, // .X0
 			},
 			clobbers: 8589934720, // .DI .FLAGS
 		},
 	},
 	{
+		name: "MOVOconst",
+		reg: regInfo{
+			outputs: []regMask{
+				4294901760, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+			},
+		},
+	},
+	{
 		name: "REPSTOSQ",
 		reg: regInfo{
 			inputs: []inputInfo{
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 4e78306..c1e446f 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -179,23 +179,21 @@
 	return int64(math.Float64bits(f))
 }
 
-// DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
-// with 4 STOSQs at the very end.
-// The trailing STOSQs prevent the need for a DI preadjustment
-// for small numbers of words to clear.
+// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
 // See runtime/mkduff.go.
 const (
-	dzBlocks    = 31 // number of MOV/ADD blocks
+	dzBlocks    = 16 // number of MOV/ADD blocks
 	dzBlockLen  = 4  // number of clears per block
 	dzBlockSize = 19 // size of instructions in a single block
 	dzMovSize   = 4  // size of single MOV instruction w/ offset
 	dzAddSize   = 4  // size of single ADD instruction
-	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
+	dzClearStep = 16 // number of bytes cleared by each MOV instruction
 
 	dzTailLen  = 4 // number of final STOSQ instructions
 	dzTailSize = 2 // size of single STOSQ instruction
 
-	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
+	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
+	dzSize     = dzBlocks * dzBlockSize
 )
 
 func duffStart(size int64) int64 {
@@ -210,20 +208,19 @@
 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
 // required to use the duffzero mechanism for a block of the given size.
 func duff(size int64) (int64, int64) {
-	if size < 32 || size > 1024 || size%8 != 0 {
+	if size < 32 || size > 1024 || size%dzClearStep != 0 {
 		panic("bad duffzero size")
 	}
 	// TODO: arch-dependent
-	off := int64(dzSize)
-	off -= dzTailLen * dzTailSize
-	size -= dzTailLen * dzDIStep
-	q := size / dzDIStep
-	blocks, singles := q/dzBlockLen, q%dzBlockLen
-	off -= dzBlockSize * blocks
+	steps := size / dzClearStep
+	blocks := steps / dzBlockLen
+	steps %= dzBlockLen
+	off := dzBlockSize * (dzBlocks - blocks)
 	var adj int64
-	if singles > 0 {
-		off -= dzAddSize + dzMovSize*singles
-		adj -= dzDIStep * (dzBlockLen - singles)
+	if steps != 0 {
+		off -= dzAddSize
+		off -= dzMovSize * steps
+		adj -= dzClearStep * (dzBlockLen - steps)
 	}
 	return off, adj
 }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 4ac4744..8160156 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -10635,14 +10635,48 @@
 	end282b5e36693f06e2cd1ac563e0d419b5:
 		;
 		// match: (Zero [size] destptr mem)
-		// cond: size <= 1024 && size%8 == 0
-		// result: (DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVQconst [0]) mem)
+		// cond: size <= 1024 && size%8 == 0 && size%16 != 0
+		// result: (Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
 		{
 			size := v.AuxInt
 			destptr := v.Args[0]
 			mem := v.Args[1]
-			if !(size <= 1024 && size%8 == 0) {
-				goto endfae59ebc96f670276efea844c3b302ac
+			if !(size <= 1024 && size%8 == 0 && size%16 != 0) {
+				goto end240266449c3e493db1c3b38a78682ff0
+			}
+			v.Op = OpZero
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AuxInt = size - 8
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 8
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v1.AddArg(destptr)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v2.AuxInt = 0
+			v2.Type = config.fe.TypeUInt64()
+			v1.AddArg(v2)
+			v1.AddArg(mem)
+			v1.Type = TypeMem
+			v.AddArg(v1)
+			return true
+		}
+		goto end240266449c3e493db1c3b38a78682ff0
+	end240266449c3e493db1c3b38a78682ff0:
+		;
+		// match: (Zero [size] destptr mem)
+		// cond: size <= 1024 && size%16 == 0
+		// result: (DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVOconst [0]) mem)
+		{
+			size := v.AuxInt
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			if !(size <= 1024 && size%16 == 0) {
+				goto endf508bb887eee9119069b22c23dbca138
 			}
 			v.Op = OpAMD64DUFFZERO
 			v.AuxInt = 0
@@ -10654,15 +10688,15 @@
 			v0.AddArg(destptr)
 			v0.Type = config.fe.TypeUInt64()
 			v.AddArg(v0)
-			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInvalid)
 			v1.AuxInt = 0
-			v1.Type = config.fe.TypeUInt64()
+			v1.Type = config.fe.TypeFloat64()
 			v.AddArg(v1)
 			v.AddArg(mem)
 			return true
 		}
-		goto endfae59ebc96f670276efea844c3b302ac
-	endfae59ebc96f670276efea844c3b302ac:
+		goto endf508bb887eee9119069b22c23dbca138
+	endf508bb887eee9119069b22c23dbca138:
 		;
 		// match: (Zero [size] destptr mem)
 		// cond: size > 1024 && size%8 == 0
diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go
index 184f973..bbf7968 100644
--- a/src/cmd/dist/build.go
+++ b/src/cmd/dist/build.go
@@ -11,7 +11,9 @@
 	"os"
 	"os/exec"
 	"path/filepath"
+	"sort"
 	"strings"
+	"sync"
 )
 
 // Initialization for any invocation.
@@ -487,9 +489,20 @@
 	{"anames9.c", nil},
 }
 
+// installed maps from a dir name (as given to install) to a chan
+// closed when the dir's package is installed.
+var installed = make(map[string]chan struct{})
+
 // install installs the library, package, or binary associated with dir,
 // which is relative to $GOROOT/src.
 func install(dir string) {
+	if ch, ok := installed[dir]; ok {
+		defer close(ch)
+	}
+	for _, dep := range builddeps[dir] {
+		<-installed[dep]
+	}
+
 	if vflag > 0 {
 		if goos != gohostos || goarch != gohostarch {
 			errprintf("%s (%s/%s)\n", dir, goos, goarch)
@@ -498,6 +511,9 @@
 		}
 	}
 
+	workdir := pathf("%s/%s", workdir, dir)
+	xmkdirall(workdir)
+
 	var clean []string
 	defer func() {
 		for _, name := range clean {
@@ -610,6 +626,8 @@
 			pathf("%s/src/runtime/textflag.h", goroot), 0)
 		copyfile(pathf("%s/pkg/include/funcdata.h", goroot),
 			pathf("%s/src/runtime/funcdata.h", goroot), 0)
+		copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot),
+			pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0)
 	}
 
 	// Generate any missing files; regenerate existing ones.
@@ -673,6 +691,7 @@
 	run(path, CheckExit|ShowOutput, compile...)
 
 	// Compile the files.
+	var wg sync.WaitGroup
 	for _, p := range files {
 		if !strings.HasSuffix(p, ".s") {
 			continue
@@ -695,14 +714,14 @@
 		// Change the last character of the output file (which was c or s).
 		b = b[:len(b)-1] + "o"
 		compile = append(compile, "-o", b, p)
-		bgrun(path, compile...)
+		bgrun(&wg, path, compile...)
 
 		link = append(link, b)
 		if doclean {
 			clean = append(clean, b)
 		}
 	}
-	bgwait()
+	bgwait(&wg)
 
 	if ispackcmd {
 		xremove(link[targ])
@@ -839,62 +858,19 @@
 	writefile(bdst.String(), dst, 0)
 }
 
-// buildorder records the order of builds for the 'go bootstrap' command.
-// The Go packages and commands must be in dependency order,
-// maintained by hand, but the order doesn't change often.
-var buildorder = []string{
-	// Go libraries and programs for bootstrap.
-	"runtime",
-	"errors",
-	"sync/atomic",
-	"sync",
-	"internal/singleflight",
-	"io",
-	"unicode",
-	"unicode/utf8",
-	"unicode/utf16",
-	"bytes",
-	"math",
-	"strings",
-	"strconv",
-	"bufio",
-	"sort",
-	"container/heap",
-	"encoding/base64",
-	"syscall",
-	"internal/syscall/windows/registry",
-	"time",
-	"internal/syscall/windows",
-	"os",
-	"reflect",
-	"fmt",
-	"encoding",
-	"encoding/binary",
-	"encoding/json",
-	"flag",
-	"path/filepath",
-	"path",
-	"io/ioutil",
-	"log",
-	"regexp/syntax",
-	"regexp",
-	"go/token",
-	"go/scanner",
-	"go/ast",
-	"go/parser",
-	"os/exec",
-	"os/signal",
-	"net/url",
-	"text/template/parse",
-	"text/template",
-	"go/doc",
-	"go/build",
-	"hash",
-	"crypto",
-	"crypto/sha1",
-	"debug/dwarf",
-	"debug/elf",
-	"cmd/go",
+// builddeps records the build dependencies for the 'go bootstrap' command.
+// It is a map[string][]string and generated by mkdeps.bash into deps.go.
+
+// buildlist is the list of directories being built, sorted by name.
+var buildlist = makeBuildlist()
+
+func makeBuildlist() []string {
+	var all []string
+	for dir := range builddeps {
+		all = append(all, dir)
+	}
+	sort.Strings(all)
+	return all
 }
 
 var runtimegen = []string{
@@ -903,7 +879,7 @@
 }
 
 func clean() {
-	for _, name := range buildorder {
+	for _, name := range buildlist {
 		path := pathf("%s/src/%s", goroot, name)
 		// Remove generated files.
 		for _, elem := range xreaddir(path) {
@@ -1044,19 +1020,30 @@
 	// than in a standard release like Go 1.4, so don't do this rebuild by default.
 	if false {
 		xprintf("##### Building Go toolchain using itself.\n")
-		for _, dir := range buildorder {
-			if dir == "cmd/go" {
-				break
-			}
-			install(dir)
+		for _, dir := range buildlist {
+			installed[dir] = make(chan struct{})
 		}
+		var wg sync.WaitGroup
+		for _, dir := range builddeps["cmd/go"] {
+			wg.Add(1)
+			dir := dir
+			go func() {
+				defer wg.Done()
+				install(dir)
+			}()
+		}
+		wg.Wait()
 		xprintf("\n")
 	}
 
 	xprintf("##### Building go_bootstrap for host, %s/%s.\n", gohostos, gohostarch)
-	for _, dir := range buildorder {
-		install(dir)
+	for _, dir := range buildlist {
+		installed[dir] = make(chan struct{})
 	}
+	for _, dir := range buildlist {
+		go install(dir)
+	}
+	<-installed["cmd/go"]
 
 	goos = oldgoos
 	goarch = oldgoarch
@@ -1065,6 +1052,7 @@
 
 	// Build runtime for actual goos/goarch too.
 	if goos != gohostos || goarch != gohostarch {
+		installed["runtime"] = make(chan struct{})
 		install("runtime")
 	}
 }
diff --git a/src/cmd/dist/deps.go b/src/cmd/dist/deps.go
new file mode 100644
index 0000000..01d134e
--- /dev/null
+++ b/src/cmd/dist/deps.go
@@ -0,0 +1,57 @@
+// generated by mkdeps.bash
+
+package main
+
+var builddeps = map[string][]string{
+	"bufio":                             {"bytes", "errors", "io", "runtime", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"bytes":                             {"errors", "io", "runtime", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"container/heap":                    {"runtime", "sort"},
+	"crypto":                            {"errors", "hash", "io", "math", "runtime", "strconv", "sync", "sync/atomic", "unicode/utf8"},
+	"crypto/sha1":                       {"crypto", "errors", "hash", "io", "math", "runtime", "strconv", "sync", "sync/atomic", "unicode/utf8"},
+	"debug/dwarf":                       {"encoding/binary", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"debug/elf":                         {"bytes", "debug/dwarf", "encoding/binary", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"encoding":                          {"runtime"},
+	"encoding/base64":                   {"errors", "io", "math", "runtime", "strconv", "sync", "sync/atomic", "unicode/utf8"},
+	"encoding/binary":                   {"errors", "io", "math", "reflect", "runtime", "strconv", "sync", "sync/atomic", "unicode/utf8"},
+	"encoding/json":                     {"bytes", "encoding", "encoding/base64", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"errors":                            {"runtime"},
+	"flag":                              {"errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "sort", "strconv", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"fmt":                               {"errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "strconv", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"go/ast":                            {"bytes", "errors", "fmt", "go/scanner", "go/token", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path/filepath", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"go/build":                          {"bufio", "bytes", "errors", "fmt", "go/ast", "go/doc", "go/parser", "go/scanner", "go/token", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "io/ioutil", "log", "math", "net/url", "os", "path", "path/filepath", "reflect", "regexp", "regexp/syntax", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "text/template", "text/template/parse", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"go/doc":                            {"bytes", "errors", "fmt", "go/ast", "go/scanner", "go/token", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "io/ioutil", "math", "net/url", "os", "path", "path/filepath", "reflect", "regexp", "regexp/syntax", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "text/template", "text/template/parse", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"go/parser":                         {"bytes", "errors", "fmt", "go/ast", "go/scanner", "go/token", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "io/ioutil", "math", "os", "path/filepath", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"go/scanner":                        {"bytes", "errors", "fmt", "go/token", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path/filepath", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"go/token":                          {"errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "sort", "strconv", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"hash":                              {"errors", "io", "runtime", "sync", "sync/atomic"},
+	"internal/singleflight":             {"runtime", "sync", "sync/atomic"},
+	"internal/syscall/windows":          {"errors", "runtime", "sync", "sync/atomic", "syscall", "unicode/utf16"},
+	"internal/syscall/windows/registry": {"errors", "io", "runtime", "sync", "sync/atomic", "syscall", "unicode/utf16"},
+	"io":                  {"errors", "runtime", "sync", "sync/atomic"},
+	"io/ioutil":           {"bytes", "errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path/filepath", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"log":                 {"errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "strconv", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"math":                {"runtime"},
+	"net/url":             {"bytes", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"os":                  {"errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "runtime", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"os/exec":             {"bytes", "errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "path/filepath", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"os/signal":           {"errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "os", "runtime", "sync", "sync/atomic", "syscall", "time", "unicode/utf16", "unicode/utf8"},
+	"path":                {"errors", "io", "runtime", "strings", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"path/filepath":       {"bytes", "errors", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "os", "runtime", "sort", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"reflect":             {"errors", "math", "runtime", "strconv", "sync", "sync/atomic", "unicode/utf8"},
+	"regexp":              {"bytes", "errors", "io", "math", "regexp/syntax", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"regexp/syntax":       {"bytes", "errors", "io", "math", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"runtime":             {},
+	"sort":                {"runtime"},
+	"strconv":             {"errors", "math", "runtime", "unicode/utf8"},
+	"strings":             {"errors", "io", "runtime", "sync", "sync/atomic", "unicode", "unicode/utf8"},
+	"sync":                {"runtime", "sync/atomic"},
+	"sync/atomic":         {"runtime"},
+	"syscall":             {"errors", "runtime", "sync", "sync/atomic", "unicode/utf16"},
+	"text/template":       {"bytes", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "io/ioutil", "math", "net/url", "os", "path/filepath", "reflect", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "text/template/parse", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"text/template/parse": {"bytes", "errors", "fmt", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "math", "os", "reflect", "runtime", "strconv", "strings", "sync", "sync/atomic", "syscall", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+	"time":                {"errors", "internal/syscall/windows/registry", "io", "runtime", "sync", "sync/atomic", "syscall", "unicode/utf16"},
+	"unicode":             {"runtime"},
+	"unicode/utf16":       {"runtime"},
+	"unicode/utf8":        {"runtime"},
+	"cmd/go":              {"bufio", "bytes", "container/heap", "crypto", "crypto/sha1", "debug/dwarf", "debug/elf", "encoding", "encoding/base64", "encoding/binary", "encoding/json", "errors", "flag", "fmt", "go/ast", "go/build", "go/doc", "go/parser", "go/scanner", "go/token", "hash", "internal/singleflight", "internal/syscall/windows", "internal/syscall/windows/registry", "io", "io/ioutil", "log", "math", "net/url", "os", "os/exec", "os/signal", "path", "path/filepath", "reflect", "regexp", "regexp/syntax", "runtime", "sort", "strconv", "strings", "sync", "sync/atomic", "syscall", "text/template", "text/template/parse", "time", "unicode", "unicode/utf16", "unicode/utf8"},
+}
diff --git a/src/cmd/dist/mkdeps.bash b/src/cmd/dist/mkdeps.bash
new file mode 100755
index 0000000..283d6bf
--- /dev/null
+++ b/src/cmd/dist/mkdeps.bash
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -e
+
+# Windows has the most dependencies.
+export GOOS=windows
+
+(
+	echo '// generated by mkdeps.bash'
+	echo
+	echo 'package main'
+	echo
+	echo 'var builddeps = map[string][]string{'
+
+	deps=$(GOOS=windows go list -tags cmd_go_bootstrap -f '{{join .Deps "\n"}}' cmd/go | grep -v '^unsafe$')
+	GOOS=windows go list -tags cmd_go_bootstrap -f '{{printf "%q" .ImportPath}}: { {{range .Deps}}{{if not (eq . "unsafe")}}{{printf "%q" .}}, {{end}}{{end}} },' $deps cmd/go
+
+	echo '}'
+) |gofmt >deps.go
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index 2cc4610..c92109a 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -9,6 +9,7 @@
 	"errors"
 	"flag"
 	"fmt"
+	"io/ioutil"
 	"log"
 	"os"
 	"os/exec"
@@ -389,7 +390,7 @@
 		name:    testName,
 		heading: "GOMAXPROCS=2 runtime -cpu=1,2,4",
 		fn: func() error {
-			cmd := t.dirCmd("src", "go", "test", "-short", t.timeout(300), t.tags(), "runtime", "-cpu=1,2,4")
+			cmd := t.dirCmd("src", "go", "test", "-short", t.timeout(300), t.tags(), "-cpu=1,2,4", "runtime")
 			// We set GOMAXPROCS=2 in addition to -cpu=1,2,4 in order to test runtime bootstrap code,
 			// creation of first goroutines and first garbage collections in the parallel setting.
 			cmd.Env = mergeEnvLists([]string{"GOMAXPROCS=2"}, os.Environ())
@@ -402,7 +403,7 @@
 		name:    "sync_cpu",
 		heading: "sync -cpu=10",
 		fn: func() error {
-			return t.dirCmd("src", "go", "test", "sync", "-short", t.timeout(120), t.tags(), "-cpu=10").Run()
+			return t.dirCmd("src", "go", "test", "-short", t.timeout(120), t.tags(), "-cpu=10", "sync").Run()
 		},
 	})
 
@@ -477,6 +478,9 @@
 		if t.gohostos == "linux" && t.goarch == "amd64" {
 			t.registerTest("testasan", "../misc/cgo/testasan", "go", "run", "main.go")
 		}
+		if t.gohostos == "linux" && t.goarch == "amd64" {
+			t.registerTest("testsanitizers", "../misc/cgo/testsanitizers", "./test.bash")
+		}
 		if t.hasBash() && t.goos != "android" && !t.iOS() && t.gohostos != "windows" {
 			t.registerTest("cgo_errors", "../misc/cgo/errors", "./test.bash")
 		}
@@ -488,7 +492,14 @@
 		t.registerTest("doc_progs", "../doc/progs", "time", "go", "run", "run.go")
 		t.registerTest("wiki", "../doc/articles/wiki", "./test.bash")
 		t.registerTest("codewalk", "../doc/codewalk", "time", "./run")
-		t.registerTest("shootout", "../test/bench/shootout", "time", "./timing.sh", "-test")
+		for _, name := range t.shootoutTests() {
+			if name == "spectralnorm" && os.Getenv("GO_BUILDER_NAME") == "linux-arm-arm5" {
+				// Heavy on floating point and takes over 20 minutes with softfloat.
+				// Disabled per Issue 12688.
+				continue
+			}
+			t.registerTest("shootout:"+name, "../test/bench/shootout", "time", "./timing.sh", "-test", name)
+		}
 	}
 	if t.goos != "android" && !t.iOS() {
 		t.registerTest("bench_go1", "../test/bench/go1", "go", "test")
@@ -601,7 +612,7 @@
 			return false
 		}
 		switch pair {
-		case "darwin-amd64", "darwin-arm", "darwin-arm64",
+		case "darwin-386", "darwin-amd64", "darwin-arm", "darwin-arm64",
 			"linux-amd64", "linux-386":
 			return true
 		}
@@ -609,7 +620,7 @@
 	case "c-shared":
 		// TODO(hyangah): add linux-386.
 		switch pair {
-		case "linux-amd64", "darwin-amd64", "android-arm":
+		case "linux-amd64", "darwin-amd64", "android-arm", "linux-arm":
 			return true
 		}
 		return false
@@ -886,6 +897,18 @@
 	).Run()
 }
 
+func (t *tester) shootoutTests() []string {
+	sh, err := ioutil.ReadFile(filepath.Join(t.goroot, "test", "bench", "shootout", "timing.sh"))
+	if err != nil {
+		log.Fatal(err)
+	}
+	m := regexp.MustCompile(`(?m)^\s+run="([\w+ ]+)"\s*$`).FindSubmatch(sh)
+	if m == nil {
+		log.Fatal("failed to find run=\"...\" line in test/bench/shootout/timing.sh")
+	}
+	return strings.Fields(string(m[1]))
+}
+
 // mergeEnvLists merges the two environment lists such that
 // variables with the same name in "in" replace those in "out".
 // out may be mutated.
diff --git a/src/cmd/dist/util.go b/src/cmd/dist/util.go
index f13210f..2fcd9ca 100644
--- a/src/cmd/dist/util.go
+++ b/src/cmd/dist/util.go
@@ -16,7 +16,6 @@
 	"strconv"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
 )
 
@@ -109,7 +108,9 @@
 		}
 		outputLock.Unlock()
 		if mode&Background != 0 {
-			bgdied.Done()
+			// Prevent fatal from waiting on our own goroutine's
+			// bghelper to exit:
+			bghelpers.Done()
 		}
 		fatal("FAILED: %v: %v", strings.Join(cmd, " "), err)
 	}
@@ -130,62 +131,60 @@
 	bgwork = make(chan func(), 1e5)
 	bgdone = make(chan struct{}, 1e5)
 
-	bgdied sync.WaitGroup
-	nwork  int32
-	ndone  int32
+	bghelpers sync.WaitGroup
 
-	dying  = make(chan bool)
-	nfatal int32
+	dieOnce sync.Once // guards close of dying
+	dying   = make(chan struct{})
 )
 
 func bginit() {
-	bgdied.Add(maxbg)
+	bghelpers.Add(maxbg)
 	for i := 0; i < maxbg; i++ {
 		go bghelper()
 	}
 }
 
 func bghelper() {
+	defer bghelpers.Done()
 	for {
-		w := <-bgwork
-		w()
-
-		// Stop if we're dying.
-		if atomic.LoadInt32(&nfatal) > 0 {
-			bgdied.Done()
+		select {
+		case <-dying:
 			return
+		case w := <-bgwork:
+			// Dying takes precedence over doing more work.
+			select {
+			case <-dying:
+				return
+			default:
+				w()
+			}
 		}
 	}
 }
 
 // bgrun is like run but runs the command in the background.
 // CheckExit|ShowOutput mode is implied (since output cannot be returned).
-func bgrun(dir string, cmd ...string) {
+// bgrun adds 1 to wg immediately, and calls Done when the work completes.
+func bgrun(wg *sync.WaitGroup, dir string, cmd ...string) {
+	wg.Add(1)
 	bgwork <- func() {
+		defer wg.Done()
 		run(dir, CheckExit|ShowOutput|Background, cmd...)
 	}
 }
 
 // bgwait waits for pending bgruns to finish.
 // bgwait must be called from only a single goroutine at a time.
-func bgwait() {
-	var wg sync.WaitGroup
-	wg.Add(maxbg)
-	done := make(chan bool)
-	for i := 0; i < maxbg; i++ {
-		bgwork <- func() {
-			wg.Done()
-
-			// Hold up bg goroutine until either the wait finishes
-			// or the program starts dying due to a call to fatal.
-			select {
-			case <-dying:
-			case <-done:
-			}
-		}
+func bgwait(wg *sync.WaitGroup) {
+	done := make(chan struct{})
+	go func() {
+		wg.Wait()
+		close(done)
+	}()
+	select {
+	case <-done:
+	case <-dying:
 	}
-	wg.Wait()
-	close(done)
 }
 
 // xgetwd returns the current directory.
@@ -355,16 +354,12 @@
 func fatal(format string, args ...interface{}) {
 	fmt.Fprintf(os.Stderr, "go tool dist: %s\n", fmt.Sprintf(format, args...))
 
+	dieOnce.Do(func() { close(dying) })
+
 	// Wait for background goroutines to finish,
 	// so that exit handler that removes the work directory
 	// is not fighting with active writes or open files.
-	if atomic.AddInt32(&nfatal, 1) == 1 {
-		close(dying)
-	}
-	for i := 0; i < maxbg; i++ {
-		bgwork <- func() {} // wake up workers so they notice nfatal > 0
-	}
-	bgdied.Wait()
+	bghelpers.Wait()
 
 	xexit(2)
 }
@@ -480,6 +475,14 @@
 		}
 	}
 
+	if len(os.Args) > 1 && os.Args[1] == "-check-goarm" {
+		useVFPv1() // might fail with SIGILL
+		println("VFPv1 OK.")
+		useVFPv3() // might fail with SIGILL
+		println("VFPv3 OK.")
+		os.Exit(0)
+	}
+
 	xinit()
 	xmain()
 	xexit(0)
@@ -515,40 +518,21 @@
 		// OpenBSD currently only supports softfloat.
 		return "5"
 	}
-	if goos != "linux" {
-		// All other arm platforms that we support
-		// require ARMv7.
+
+	// Try to exec ourselves in a mode to detect VFP support.
+	// Seeing how far it gets determines which instructions failed.
+	// The test is OS-agnostic.
+	out := run("", 0, os.Args[0], "-check-goarm")
+	v1ok := strings.Contains(out, "VFPv1 OK.")
+	v3ok := strings.Contains(out, "VFPv3 OK.")
+
+	if v1ok && v3ok {
 		return "7"
 	}
-	cpuinfo := readfile("/proc/cpuinfo")
-	goarm := "5"
-	for _, line := range splitlines(cpuinfo) {
-		line := strings.SplitN(line, ":", 2)
-		if len(line) < 2 {
-			continue
-		}
-		if strings.TrimSpace(line[0]) != "Features" {
-			continue
-		}
-		features := splitfields(line[1])
-		sort.Strings(features) // so vfpv3 sorts after vfp
-
-		// Infer GOARM value from the vfp features available
-		// on this host. Values of GOARM detected are:
-		// 5: no vfp support was found
-		// 6: vfp (v1) support was detected, but no higher
-		// 7: vfpv3 support was detected.
-		// This matches the assertions in runtime.checkarm.
-		for _, f := range features {
-			switch f {
-			case "vfp":
-				goarm = "6"
-			case "vfpv3":
-				goarm = "7"
-			}
-		}
+	if v1ok {
+		return "6"
 	}
-	return goarm
+	return "5"
 }
 
 func min(a, b int) int {
diff --git a/src/cmd/dist/util_gc.go b/src/cmd/dist/util_gc.go
index 9f6cfd0..81e52b6 100644
--- a/src/cmd/dist/util_gc.go
+++ b/src/cmd/dist/util_gc.go
@@ -17,3 +17,11 @@
 	cpuid(&info, 1)
 	return info[3]&(1<<26) != 0 // SSE2
 }
+
+// useVFPv1 tries to execute one VFPv1 instruction on ARM.
+// It will crash the current process if VFPv1 is missing.
+func useVFPv1()
+
+// useVFPv3 tries to execute one VFPv3 instruction on ARM.
+// It will crash the current process if VFPv3 is missing.
+func useVFPv3()
diff --git a/src/cmd/dist/util_gccgo.go b/src/cmd/dist/util_gccgo.go
index 14ac70b..18e328f 100644
--- a/src/cmd/dist/util_gccgo.go
+++ b/src/cmd/dist/util_gccgo.go
@@ -18,3 +18,7 @@
 import "C"
 
 func cansse2() bool { return C.supports_sse2() != 0 }
+
+func useVFPv1() {}
+
+func useVFPv3() {}
diff --git a/src/cmd/dist/vfp_arm.s b/src/cmd/dist/vfp_arm.s
new file mode 100644
index 0000000..647c439
--- /dev/null
+++ b/src/cmd/dist/vfp_arm.s
@@ -0,0 +1,17 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build gc,arm
+
+#include "textflag.h"
+
+// try to run "vmov.f64 d0, d0" instruction
+TEXT ·useVFPv1(SB),NOSPLIT,$0
+	WORD $0xeeb00b40	// vmov.f64 d0, d0
+	RET
+
+// try to run VFPv3-only "vmov.f64 d0, #112" instruction
+TEXT ·useVFPv3(SB),NOSPLIT,$0
+	WORD $0xeeb70b00	// vmov.f64 d0, #112
+	RET
diff --git a/src/cmd/dist/vfp_default.s b/src/cmd/dist/vfp_default.s
new file mode 100644
index 0000000..5cf9997
--- /dev/null
+++ b/src/cmd/dist/vfp_default.s
@@ -0,0 +1,13 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !arm,gc
+
+#include "textflag.h"
+
+TEXT ·useVFPv1(SB),NOSPLIT,$0
+	RET
+
+TEXT ·useVFPv3(SB),NOSPLIT,$0
+	RET
diff --git a/src/cmd/doc/dirs.go b/src/cmd/doc/dirs.go
new file mode 100644
index 0000000..e33cbc8
--- /dev/null
+++ b/src/cmd/doc/dirs.go
@@ -0,0 +1,118 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"go/build"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+)
+
+// Dirs is a structure for scanning the directory tree.
+// Its Next method returns the next Go source directory it finds.
+// Although it can be used to scan the tree multiple times, it
+// only walks the tree once, caching the data it finds.
+type Dirs struct {
+	scan   chan string // directories generated by walk.
+	paths  []string    // Cache of known paths.
+	offset int         // Counter for Next.
+}
+
+var dirs Dirs
+
+func init() {
+	dirs.paths = make([]string, 0, 1000)
+	dirs.scan = make(chan string)
+	go dirs.walk()
+}
+
+// Reset puts the scan back at the beginning.
+func (d *Dirs) Reset() {
+	d.offset = 0
+}
+
+// Next returns the next directory in the scan. The boolean
+// is false when the scan is done.
+func (d *Dirs) Next() (string, bool) {
+	if d.offset < len(d.paths) {
+		path := d.paths[d.offset]
+		d.offset++
+		return path, true
+	}
+	path, ok := <-d.scan
+	if !ok {
+		return "", false
+	}
+	d.paths = append(d.paths, path)
+	d.offset++
+	return path, ok
+}
+
+// walk walks the trees in GOROOT and GOPATH.
+func (d *Dirs) walk() {
+	d.walkRoot(build.Default.GOROOT)
+	for _, root := range splitGopath() {
+		d.walkRoot(root)
+	}
+	close(d.scan)
+}
+
+// walkRoot walks a single directory. Each Go source directory it finds is
+// delivered on d.scan.
+func (d *Dirs) walkRoot(root string) {
+	root = path.Join(root, "src")
+	slashDot := string(filepath.Separator) + "."
+	// We put a slash on the pkg so can use simple string comparison below
+	// yet avoid inadvertent matches, like /foobar matching bar.
+
+	visit := func(pathName string, f os.FileInfo, err error) error {
+		if err != nil {
+			return nil
+		}
+		// One package per directory. Ignore the files themselves.
+		if !f.IsDir() {
+			return nil
+		}
+		// No .git or other dot nonsense please.
+		if strings.Contains(pathName, slashDot) {
+			return filepath.SkipDir
+		}
+		// Does the directory contain any Go files? If so, it's a candidate.
+		if hasGoFiles(pathName) {
+			d.scan <- pathName
+			return nil
+		}
+		return nil
+	}
+
+	filepath.Walk(root, visit)
+}
+
+// hasGoFiles tests whether the directory contains at least one file with ".go"
+// extension
+func hasGoFiles(path string) bool {
+	dir, err := os.Open(path)
+	if err != nil {
+		// ignore unreadable directories
+		return false
+	}
+	defer dir.Close()
+
+	names, err := dir.Readdirnames(0)
+	if err != nil {
+		// ignore unreadable directories
+		return false
+	}
+
+	for _, name := range names {
+		if strings.HasSuffix(name, ".go") {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/src/cmd/doc/doc_test.go b/src/cmd/doc/doc_test.go
index b97cc76..7c72b87 100644
--- a/src/cmd/doc/doc_test.go
+++ b/src/cmd/doc/doc_test.go
@@ -7,13 +7,21 @@
 import (
 	"bytes"
 	"flag"
-	"os"
-	"os/exec"
 	"regexp"
 	"runtime"
+	"strings"
 	"testing"
 )
 
+func maybeSkip(t *testing.T) {
+	if strings.HasPrefix(runtime.GOOS, "nacl") {
+		t.Skip("nacl does not have a full file tree")
+	}
+	if runtime.GOOS == "darwin" && strings.HasPrefix(runtime.GOARCH, "arm") {
+		t.Skip("darwin/arm does not have a full file tree")
+	}
+}
+
 const (
 	dataDir = "testdata"
 	binary  = "testdoc"
@@ -54,6 +62,7 @@
 			`Package comment`,
 			`const ExportedConstant = 1`,                            // Simple constant.
 			`const ConstOne = 1`,                                    // First entry in constant block.
+			`const ConstFive ...`,                                   // From block starting with unexported constant.
 			`var ExportedVariable = 1`,                              // Simple variable.
 			`var VarOne = 1`,                                        // First entry in variable block.
 			`func ExportedFunc\(a int\) bool`,                       // Function.
@@ -73,6 +82,7 @@
 			`Comment before VarOne`,             // No comment for first entry in variable block.
 			`ConstTwo = 2`,                      // No second entry in constant block.
 			`VarTwo = 2`,                        // No second entry in variable block.
+			`VarFive = 5`,                       // From block starting with unexported variable.
 			`type unexportedType`,               // No unexported type.
 			`unexportedTypedConstant`,           // No unexported typed constant.
 			`Field`,                             // No fields.
@@ -299,9 +309,7 @@
 }
 
 func TestDoc(t *testing.T) {
-	if runtime.GOOS == "darwin" && (runtime.GOARCH == "arm" || runtime.GOARCH == "arm64") {
-		t.Skip("TODO: on darwin/arm, test fails: no such package cmd/doc/testdata")
-	}
+	maybeSkip(t)
 	for _, test := range tests {
 		var b bytes.Buffer
 		var flagSet flag.FlagSet
@@ -337,12 +345,89 @@
 	}
 }
 
-// run runs the command, but calls t.Fatal if there is an error.
-func run(c *exec.Cmd, t *testing.T) []byte {
-	output, err := c.CombinedOutput()
-	if err != nil {
-		os.Stdout.Write(output)
-		t.Fatal(err)
+// Test the code to try multiple packages. Our test case is
+//	go doc rand.Float64
+// This needs to find math/rand.Float64; however crypto/rand, which doesn't
+// have the symbol, usually appears first in the directory listing.
+func TestMultiplePackages(t *testing.T) {
+	if testing.Short() {
+		t.Skip("scanning file system takes too long")
 	}
-	return output
+	maybeSkip(t)
+	var b bytes.Buffer // We don't care about the output.
+	// Make sure crypto/rand does not have the symbol.
+	{
+		var flagSet flag.FlagSet
+		err := do(&b, &flagSet, []string{"crypto/rand.float64"})
+		if err == nil {
+			t.Errorf("expected error from crypto/rand.float64")
+		} else if !strings.Contains(err.Error(), "no symbol float64") {
+			t.Errorf("unexpected error %q from crypto/rand.float64", err)
+		}
+	}
+	// Make sure math/rand does have the symbol.
+	{
+		var flagSet flag.FlagSet
+		err := do(&b, &flagSet, []string{"math/rand.float64"})
+		if err != nil {
+			t.Errorf("unexpected error %q from math/rand.float64", err)
+		}
+	}
+	// Try the shorthand.
+	{
+		var flagSet flag.FlagSet
+		err := do(&b, &flagSet, []string{"rand.float64"})
+		if err != nil {
+			t.Errorf("unexpected error %q from rand.float64", err)
+		}
+	}
+	// Now try a missing symbol. We should see both packages in the error.
+	{
+		var flagSet flag.FlagSet
+		err := do(&b, &flagSet, []string{"rand.doesnotexit"})
+		if err == nil {
+			t.Errorf("expected error from rand.doesnotexit")
+		} else {
+			errStr := err.Error()
+			if !strings.Contains(errStr, "no symbol") {
+				t.Errorf("error %q should contain 'no symbol", errStr)
+			}
+			if !strings.Contains(errStr, "crypto/rand") {
+				t.Errorf("error %q should contain crypto/rand", errStr)
+			}
+			if !strings.Contains(errStr, "math/rand") {
+				t.Errorf("error %q should contain math/rand", errStr)
+			}
+		}
+	}
+}
+
+type trimTest struct {
+	path   string
+	prefix string
+	result string
+	ok     bool
+}
+
+var trimTests = []trimTest{
+	{"", "", "", true},
+	{"/usr/gopher", "/usr/gopher", "/usr/gopher", true},
+	{"/usr/gopher/bar", "/usr/gopher", "bar", true},
+	{"/usr/gopher", "/usr/gopher", "/usr/gopher", true},
+	{"/usr/gopherflakes", "/usr/gopher", "/usr/gopherflakes", false},
+	{"/usr/gopher/bar", "/usr/zot", "/usr/gopher/bar", false},
+}
+
+func TestTrim(t *testing.T) {
+	for _, test := range trimTests {
+		result, ok := trim(test.path, test.prefix)
+		if ok != test.ok {
+			t.Errorf("%s %s expected %t got %t", test.path, test.prefix, test.ok, ok)
+			continue
+		}
+		if result != test.result {
+			t.Errorf("%s %s expected %q got %q", test.path, test.prefix, test.result, result)
+			continue
+		}
+	}
 }
diff --git a/src/cmd/doc/main.go b/src/cmd/doc/main.go
index 0985d95..df1890f 100644
--- a/src/cmd/doc/main.go
+++ b/src/cmd/doc/main.go
@@ -32,13 +32,13 @@
 package main
 
 import (
+	"bytes"
 	"flag"
 	"fmt"
 	"go/build"
 	"io"
 	"log"
 	"os"
-	"path"
 	"path/filepath"
 	"strings"
 	"unicode"
@@ -84,32 +84,72 @@
 	flagSet.BoolVar(&matchCase, "c", false, "symbol matching honors case (paths not affected)")
 	flagSet.BoolVar(&showCmd, "cmd", false, "show symbols with package docs even if package is a command")
 	flagSet.Parse(args)
-	buildPackage, userPath, symbol := parseArgs(flagSet.Args())
-	symbol, method := parseSymbol(symbol)
-	pkg := parsePackage(writer, buildPackage, userPath)
-	defer func() {
-		pkg.flush()
-		e := recover()
-		if e == nil {
-			return
+	var paths []string
+	var symbol, method string
+	// Loop until something is printed.
+	dirs.Reset()
+	for i := 0; ; i++ {
+		buildPackage, userPath, sym, more := parseArgs(flagSet.Args())
+		if i > 0 && !more { // Ignore the "more" bit on the first iteration.
+			return failMessage(paths, symbol, method)
 		}
-		pkgError, ok := e.(PackageError)
-		if ok {
-			err = pkgError
-			return
+		symbol, method = parseSymbol(sym)
+		pkg := parsePackage(writer, buildPackage, userPath)
+		paths = append(paths, pkg.prettyPath())
+
+		defer func() {
+			pkg.flush()
+			e := recover()
+			if e == nil {
+				return
+			}
+			pkgError, ok := e.(PackageError)
+			if ok {
+				err = pkgError
+				return
+			}
+			panic(e)
+		}()
+
+		// The builtin package needs special treatment: its symbols are lower
+		// case but we want to see them, always.
+		if pkg.build.ImportPath == "builtin" {
+			unexported = true
 		}
-		panic(e)
-	}()
-	switch {
-	case symbol == "":
-		pkg.packageDoc()
-		return
-	case method == "":
-		pkg.symbolDoc(symbol)
-	default:
-		pkg.methodDoc(symbol, method)
+
+		switch {
+		case symbol == "":
+			pkg.packageDoc() // The package exists, so we got some output.
+			return
+		case method == "":
+			if pkg.symbolDoc(symbol) {
+				return
+			}
+		default:
+			if pkg.methodDoc(symbol, method) {
+				return
+			}
+		}
 	}
-	return nil
+}
+
+// failMessage creates a nicely formatted error message when there is no result to show.
+func failMessage(paths []string, symbol, method string) error {
+	var b bytes.Buffer
+	if len(paths) > 1 {
+		b.WriteString("s")
+	}
+	b.WriteString(" ")
+	for i, path := range paths {
+		if i > 0 {
+			b.WriteString(", ")
+		}
+		b.WriteString(path)
+	}
+	if method == "" {
+		return fmt.Errorf("no symbol %s in package%s", symbol, &b)
+	}
+	return fmt.Errorf("no method %s.%s in package%s", symbol, method, &b)
 }
 
 // parseArgs analyzes the arguments (if any) and returns the package
@@ -117,13 +157,19 @@
 // the path (or "" if it's the current package) and the symbol
 // (possibly with a .method) within that package.
 // parseSymbol is used to analyze the symbol itself.
-func parseArgs(args []string) (*build.Package, string, string) {
+// The boolean final argument reports whether it is possible that
+// there may be more directories worth looking at. It will only
+// be true if the package path is a partial match for some directory
+// and there may be more matches. For example, if the argument
+// is rand.Float64, we must scan both crypto/rand and math/rand
+// to find the symbol, and the first call will return crypto/rand, true.
+func parseArgs(args []string) (pkg *build.Package, path, symbol string, more bool) {
 	switch len(args) {
 	default:
 		usage()
 	case 0:
 		// Easy: current directory.
-		return importDir(pwd()), "", ""
+		return importDir(pwd()), "", "", false
 	case 1:
 		// Done below.
 	case 2:
@@ -132,7 +178,7 @@
 		if err != nil {
 			log.Fatalf("%s", err)
 		}
-		return pkg, args[0], args[1]
+		return pkg, args[0], args[1], false
 	}
 	// Usual case: one argument.
 	arg := args[0]
@@ -142,7 +188,7 @@
 	// package paths as their prefix.
 	pkg, err := build.Import(arg, "", build.ImportComment)
 	if err == nil {
-		return pkg, arg, ""
+		return pkg, arg, "", false
 	}
 	// Another disambiguator: If the symbol starts with an upper
 	// case letter, it can only be a symbol in the current directory.
@@ -151,7 +197,7 @@
 	if isUpper(arg) {
 		pkg, err := build.ImportDir(".", build.ImportComment)
 		if err == nil {
-			return pkg, "", arg
+			return pkg, "", arg, false
 		}
 	}
 	// If it has a slash, it must be a package path but there is a symbol.
@@ -177,21 +223,23 @@
 		// Have we identified a package already?
 		pkg, err := build.Import(arg[0:period], "", build.ImportComment)
 		if err == nil {
-			return pkg, arg[0:period], symbol
+			return pkg, arg[0:period], symbol, false
 		}
 		// See if we have the basename or tail of a package, as in json for encoding/json
 		// or ivy/value for robpike.io/ivy/value.
-		path := findPackage(arg[0:period])
-		if path != "" {
-			return importDir(path), arg[0:period], symbol
+		// Launch findPackage as a goroutine so it can return multiple paths if required.
+		path, ok := findPackage(arg[0:period])
+		if ok {
+			return importDir(path), arg[0:period], symbol, true
 		}
+		dirs.Reset() // Next iteration of for loop must scan all the directories again.
 	}
 	// If it has a slash, we've failed.
 	if slash >= 0 {
 		log.Fatalf("no such package %s", arg[0:period])
 	}
 	// Guess it's a symbol in the current directory.
-	return importDir(pwd()), "", arg
+	return importDir(pwd()), "", arg, false
 }
 
 // importDir is just an error-catching wrapper for build.ImportDir.
@@ -252,26 +300,22 @@
 	return unicode.IsUpper(ch)
 }
 
-// findPackage returns the full file name path specified by the
-// (perhaps partial) package path pkg.
-func findPackage(pkg string) string {
-	if pkg == "" {
-		return ""
+// findPackage returns the full file name path that first matches the
+// (perhaps partial) package path pkg. The boolean reports if any match was found.
+func findPackage(pkg string) (string, bool) {
+	if pkg == "" || isUpper(pkg) { // Upper case symbol cannot be a package name.
+		return "", false
 	}
-	if isUpper(pkg) {
-		return "" // Upper case symbol cannot be a package name.
-	}
-	path := pathFor(build.Default.GOROOT, pkg)
-	if path != "" {
-		return path
-	}
-	for _, root := range splitGopath() {
-		path = pathFor(root, pkg)
-		if path != "" {
-			return path
+	pkgString := filepath.Clean(string(filepath.Separator) + pkg)
+	for {
+		path, ok := dirs.Next()
+		if !ok {
+			return "", false
+		}
+		if strings.HasSuffix(path, pkgString) {
+			return path, true
 		}
 	}
-	return ""
 }
 
 // splitGopath splits $GOPATH into a list of roots.
@@ -279,73 +323,6 @@
 	return filepath.SplitList(build.Default.GOPATH)
 }
 
-// pathsFor recursively walks the tree at root looking for possible directories for the package:
-// those whose package path is pkg or which have a proper suffix pkg.
-func pathFor(root, pkg string) (result string) {
-	root = path.Join(root, "src")
-	slashDot := string(filepath.Separator) + "."
-	// We put a slash on the pkg so can use simple string comparison below
-	// yet avoid inadvertent matches, like /foobar matching bar.
-	pkgString := filepath.Clean(string(filepath.Separator) + pkg)
-
-	// We use panic/defer to short-circuit processing at the first match.
-	// A nil panic reports that the path has been found.
-	defer func() {
-		err := recover()
-		if err != nil {
-			panic(err)
-		}
-	}()
-
-	visit := func(pathName string, f os.FileInfo, err error) error {
-		if err != nil {
-			return nil
-		}
-		// One package per directory. Ignore the files themselves.
-		if !f.IsDir() {
-			return nil
-		}
-		// No .git or other dot nonsense please.
-		if strings.Contains(pathName, slashDot) {
-			return filepath.SkipDir
-		}
-		// Is the tail of the path correct?
-		if strings.HasSuffix(pathName, pkgString) && hasGoFiles(pathName) {
-			result = pathName
-			panic(nil)
-		}
-		return nil
-	}
-
-	filepath.Walk(root, visit)
-	return "" // Call to panic above sets the real value.
-}
-
-// hasGoFiles tests whether the directory contains at least one file with ".go"
-// extension
-func hasGoFiles(path string) bool {
-	dir, err := os.Open(path)
-	if err != nil {
-		// ignore unreadable directories
-		return false
-	}
-	defer dir.Close()
-
-	names, err := dir.Readdirnames(0)
-	if err != nil {
-		// ignore unreadable directories
-		return false
-	}
-
-	for _, name := range names {
-		if strings.HasSuffix(name, ".go") {
-			return true
-		}
-	}
-
-	return false
-}
-
 // pwd returns the current directory.
 func pwd() string {
 	wd, err := os.Getwd()
diff --git a/src/cmd/doc/pkg.go b/src/cmd/doc/pkg.go
index 569c7a3..f99df59 100644
--- a/src/cmd/doc/pkg.go
+++ b/src/cmd/doc/pkg.go
@@ -16,6 +16,8 @@
 	"io"
 	"log"
 	"os"
+	"path/filepath"
+	"strings"
 	"unicode"
 	"unicode/utf8"
 )
@@ -46,6 +48,51 @@
 	return string(p)
 }
 
+// prettyPath returns a version of the package path that is suitable for an
+// error message. It obeys the import comment if present. Also, since
+// pkg.build.ImportPath is sometimes the unhelpful "" or ".", it looks for a
+// directory name in GOROOT or GOPATH if that happens.
+func (pkg *Package) prettyPath() string {
+	path := pkg.build.ImportComment
+	if path == "" {
+		path = pkg.build.ImportPath
+	}
+	if path != "." && path != "" {
+		return path
+	}
+	// Convert the source directory into a more useful path.
+	// Also convert everything to slash-separated paths for uniform handling.
+	path = filepath.Clean(filepath.ToSlash(pkg.build.Dir))
+	// Can we find a decent prefix?
+	goroot := filepath.Join(build.Default.GOROOT, "src")
+	if p, ok := trim(path, filepath.ToSlash(goroot)); ok {
+		return p
+	}
+	for _, gopath := range splitGopath() {
+		if p, ok := trim(path, filepath.ToSlash(gopath)); ok {
+			return p
+		}
+	}
+	return path
+}
+
+// trim trims the directory prefix from the path, paying attention
+// to the path separator. If they are the same string or the prefix
+// is not present the original is returned. The boolean reports whether
+// the prefix is present. That path and prefix have slashes for separators.
+func trim(path, prefix string) (string, bool) {
+	if !strings.HasPrefix(path, prefix) {
+		return path, false
+	}
+	if path == prefix {
+		return path, true
+	}
+	if path[len(prefix)] == '/' {
+		return path[len(prefix)+1:], true
+	}
+	return path, false // Textual prefix but not a path prefix.
+}
+
 // pkg.Fatalf is like log.Fatalf, but panics so it can be recovered in the
 // main do function, so it doesn't cause an exit. Allows testing to work
 // without running a subprocess. The log prefix will be added when
@@ -140,10 +187,12 @@
 			log.Fatal(err)
 		}
 		if comment != "" {
-			pkg.newlines(2) // Guarantee blank line before comment.
+			pkg.newlines(1)
 			doc.ToText(&pkg.buf, comment, "    ", indent, indentedWidth)
+			pkg.newlines(2) // Blank line after comment to separate from next item.
+		} else {
+			pkg.newlines(1)
 		}
-		pkg.newlines(1)
 	}
 }
 
@@ -218,7 +267,7 @@
 		return
 	}
 
-	pkg.newlines(1)
+	pkg.newlines(2) // Guarantee blank line before the components.
 	pkg.valueSummary(pkg.doc.Consts)
 	pkg.valueSummary(pkg.doc.Vars)
 	pkg.funcSummary(pkg.doc.Funcs)
@@ -257,18 +306,7 @@
 // valueSummary prints a one-line summary for each set of values and constants.
 func (pkg *Package) valueSummary(values []*doc.Value) {
 	for _, value := range values {
-		// Only print first item in spec, show ... to stand for the rest.
-		spec := value.Decl.Specs[0].(*ast.ValueSpec) // Must succeed.
-		exported := true
-		for _, name := range spec.Names {
-			if !isExported(name.Name) {
-				exported = false
-				break
-			}
-		}
-		if exported {
-			pkg.oneLineValueGenDecl(value.Decl)
-		}
+		pkg.oneLineValueGenDecl(value.Decl)
 	}
 }
 
@@ -355,7 +393,7 @@
 // symbolDoc prints the docs for symbol. There may be multiple matches.
 // If symbol matches a type, output includes its methods factories and associated constants.
 // If there is no top-level symbol, symbolDoc looks for methods that match.
-func (pkg *Package) symbolDoc(symbol string) {
+func (pkg *Package) symbolDoc(symbol string) bool {
 	defer pkg.flush()
 	found := false
 	// Functions.
@@ -424,9 +462,10 @@
 	if !found {
 		// See if there are methods.
 		if !pkg.printMethodDoc("", symbol) {
-			log.Printf("symbol %s not present in package %s installed in %q", symbol, pkg.name, pkg.build.ImportPath)
+			return false
 		}
 	}
+	return true
 }
 
 // trimUnexportedElems modifies spec in place to elide unexported fields from
@@ -467,11 +506,7 @@
 	unexportedField := &ast.Field{
 		Type: ast.NewIdent(""), // Hack: printer will treat this as a field with a named type.
 		Comment: &ast.CommentGroup{
-			List: []*ast.Comment{
-				&ast.Comment{
-					Text: fmt.Sprintf("// Has unexported %s.\n", what),
-				},
-			},
+			List: []*ast.Comment{{Text: fmt.Sprintf("// Has unexported %s.\n", what)}},
 		},
 	}
 	return &ast.FieldList{
@@ -508,11 +543,9 @@
 }
 
 // methodDoc prints the docs for matches of symbol.method.
-func (pkg *Package) methodDoc(symbol, method string) {
+func (pkg *Package) methodDoc(symbol, method string) bool {
 	defer pkg.flush()
-	if !pkg.printMethodDoc(symbol, method) {
-		pkg.Fatalf("no method %s.%s in package %s installed in %q", symbol, method, pkg.name, pkg.build.ImportPath)
-	}
+	return pkg.printMethodDoc(symbol, method)
 }
 
 // match reports whether the user's symbol matches the program's.
diff --git a/src/cmd/doc/testdata/pkg.go b/src/cmd/doc/testdata/pkg.go
index ebefb50..0f06651 100644
--- a/src/cmd/doc/testdata/pkg.go
+++ b/src/cmd/doc/testdata/pkg.go
@@ -21,6 +21,13 @@
 	constThree = 3 // Comment on line with constThree.
 )
 
+// Const block where first entry is unexported.
+const (
+	constFour = iota
+	ConstFive
+	ConstSix
+)
+
 // Variables
 
 // Comment about exported variable.
@@ -37,6 +44,13 @@
 	varThree = 3 // Comment on line with varThree.
 )
 
+// Var block where first entry is unexported.
+var (
+	varFour = 4
+	VarFive = 5
+	varSix  = 6
+)
+
 // Comment about exported function.
 func ExportedFunc(a int) bool
 
diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go
index d11a5a0..228ad19 100644
--- a/src/cmd/go/alldocs.go
+++ b/src/cmd/go/alldocs.go
@@ -886,7 +886,7 @@
 so that you can add DIR/bin to your PATH to get at the
 installed commands.  If the GOBIN environment variable is
 set, commands are installed to the directory it names instead
-of DIR/bin.
+of DIR/bin. GOBIN must be an absolute path.
 
 Here's an example directory layout:
 
diff --git a/src/cmd/go/build.go b/src/cmd/go/build.go
index df74338..d4aeb70 100644
--- a/src/cmd/go/build.go
+++ b/src/cmd/go/build.go
@@ -353,10 +353,28 @@
 		}
 		ldBuildmode = "c-shared"
 	case "default":
-		ldBuildmode = "exe"
+		switch platform {
+		case "android/arm":
+			codegenArg = "-shared"
+			ldBuildmode = "pie"
+		default:
+			ldBuildmode = "exe"
+		}
 	case "exe":
 		pkgsFilter = pkgsMain
 		ldBuildmode = "exe"
+	case "pie":
+		if gccgo {
+			fatalf("-buildmode=pie not supported by gccgo")
+		} else {
+			switch platform {
+			case "android/arm":
+				codegenArg = "-shared"
+			default:
+				fatalf("-buildmode=pie not supported on %s\n", platform)
+			}
+		}
+		ldBuildmode = "pie"
 	case "shared":
 		pkgsFilter = pkgsNotMain
 		if gccgo {
@@ -501,6 +519,10 @@
 }
 
 func runInstall(cmd *Command, args []string) {
+	if gobin != "" && !filepath.IsAbs(gobin) {
+		fatalf("cannot install, GOBIN must be an absolute path")
+	}
+
 	raceInit()
 	buildModeInit()
 	pkgs := pkgsFilter(packagesForBuild(args))
@@ -1235,12 +1257,6 @@
 		fmt.Fprintf(os.Stderr, "%s\n", a.p.ImportPath)
 	}
 
-	if a.p.Standard && a.p.ImportPath == "runtime" && buildContext.Compiler == "gc" &&
-		(!hasString(a.p.GoFiles, "zgoos_"+buildContext.GOOS+".go") ||
-			!hasString(a.p.GoFiles, "zgoarch_"+buildContext.GOARCH+".go")) {
-		return fmt.Errorf("%s/%s must be bootstrapped using make%v", buildContext.GOOS, buildContext.GOARCH, defaultSuffix())
-	}
-
 	// Make build directory.
 	obj := a.objdir
 	if err := b.mkdir(obj); err != nil {
@@ -2875,7 +2891,7 @@
 	cgoCPPFLAGS = append(cgoCPPFLAGS, "-I", obj)
 
 	// cgo
-	// TODO: CGOPKGPATH, CGO_FLAGS?
+	// TODO: CGO_FLAGS?
 	gofiles := []string{obj + "_cgo_gotypes.go"}
 	cfiles := []string{"_cgo_main.c", "_cgo_export.c"}
 	for _, fn := range cgofiles {
@@ -2943,7 +2959,9 @@
 	var linkobj []string
 
 	var bareLDFLAGS []string
-	// filter out -lsomelib, -l somelib, *.{so,dll,dylib}, and (on Darwin) -framework X
+	// When linking relocatable objects, various flags need to be
+	// filtered out as they are inapplicable and can cause some linkers
+	// to fail.
 	for i := 0; i < len(cgoLDFLAGS); i++ {
 		f := cgoLDFLAGS[i]
 		switch {
@@ -2959,7 +2977,6 @@
 		case strings.HasSuffix(f, ".dylib"),
 			strings.HasSuffix(f, ".so"),
 			strings.HasSuffix(f, ".dll"):
-			continue
 		// Remove any -fsanitize=foo flags.
 		// Otherwise the compiler driver thinks that we are doing final link
 		// and links sanitizer runtime into the object file. But we are not doing
@@ -2968,6 +2985,16 @@
 		// See issue 8788 for details.
 		case strings.HasPrefix(f, "-fsanitize="):
 			continue
+		// runpath flags not applicable unless building a shared
+		// object or executable; see issue 12115 for details.  This
+		// is necessary as Go currently does not offer a way to
+		// specify the set of LDFLAGS that only apply to shared
+		// objects.
+		case strings.HasPrefix(f, "-Wl,-rpath"):
+			if f == "-Wl,-rpath" || f == "-Wl,-rpath-link" {
+				// Skip following argument to -rpath* too.
+				i++
+			}
 		default:
 			bareLDFLAGS = append(bareLDFLAGS, f)
 		}
@@ -3336,6 +3363,10 @@
 		fmt.Fprintf(os.Stderr, "go %s: -race is only supported on linux/amd64, freebsd/amd64, darwin/amd64 and windows/amd64\n", flag.Args()[0])
 		os.Exit(2)
 	}
+	if !buildContext.CgoEnabled {
+		fmt.Fprintf(os.Stderr, "go %s: -race requires cgo; enable cgo by setting CGO_ENABLED=1\n", flag.Args()[0])
+		os.Exit(2)
+	}
 	buildGcflags = append(buildGcflags, "-race")
 	buildLdflags = append(buildLdflags, "-race")
 	if buildContext.InstallSuffix != "" {
@@ -3344,16 +3375,3 @@
 	buildContext.InstallSuffix += "race"
 	buildContext.BuildTags = append(buildContext.BuildTags, "race")
 }
-
-// defaultSuffix returns file extension used for command files in
-// current os environment.
-func defaultSuffix() string {
-	switch runtime.GOOS {
-	case "windows":
-		return ".bat"
-	case "plan9":
-		return ".rc"
-	default:
-		return ".bash"
-	}
-}
diff --git a/src/cmd/go/discovery.go b/src/cmd/go/discovery.go
index b9f4279..4d1df2f 100644
--- a/src/cmd/go/discovery.go
+++ b/src/cmd/go/discovery.go
@@ -41,7 +41,7 @@
 	d.Strict = false
 	var t xml.Token
 	for {
-		t, err = d.Token()
+		t, err = d.RawToken()
 		if err != nil {
 			if err == io.EOF {
 				err = nil
diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go
index 6ee9343..a4c91c9 100644
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@@ -31,8 +31,7 @@
 
 	exeSuffix string // ".exe" on Windows
 
-	builder             = testenv.Builder()
-	skipExternalBuilder = false // skip external tests on this builder
+	skipExternal = false // skip external tests
 )
 
 func init() {
@@ -44,14 +43,21 @@
 		case "arm", "arm64":
 			canRun = false
 		}
-	}
-
-	if strings.HasPrefix(builder+"-", "freebsd-arm-") {
-		skipExternalBuilder = true
-		canRun = false
-	}
-
-	switch runtime.GOOS {
+	case "linux":
+		switch runtime.GOARCH {
+		case "arm":
+			// many linux/arm machines are too slow to run
+			// the full set of external tests.
+			skipExternal = true
+		}
+	case "freebsd":
+		switch runtime.GOARCH {
+		case "arm":
+			// many freebsd/arm machines are too slow to run
+			// the full set of external tests.
+			skipExternal = true
+			canRun = false
+		}
 	case "windows":
 		exeSuffix = ".exe"
 	}
@@ -138,8 +144,8 @@
 func testgo(t *testing.T) *testgoData {
 	testenv.MustHaveGoBuild(t)
 
-	if skipExternalBuilder {
-		t.Skip("skipping external tests on %s builder", builder)
+	if skipExternal {
+		t.Skip("skipping external tests on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
 
 	return &testgoData{t: t}
@@ -1139,6 +1145,15 @@
 	tg.grepStderr("no buildable Go source files", "go install cgotest did not report 'no buildable Go Source files'")
 }
 
+func TestRelativeGOBINFail(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+	tg.tempFile("triv.go", `package main; func main() {}`)
+	tg.setenv("GOBIN", ".")
+	tg.runFail("install")
+	tg.grepStderr("cannot install, GOBIN must be an absolute path", "go install must fail if $GOBIN is a relative path")
+}
+
 // Test that without $GOBIN set, binaries get installed
 // into the GOPATH bin directory.
 func TestInstallIntoGOPATH(t *testing.T) {
@@ -1692,7 +1707,7 @@
 	tg := testgo(t)
 	defer tg.cleanup()
 	tg.creatingTemp("testdata/cover.out")
-	tg.run("test", "-short", "-cover", "encoding/binary", "-coverprofile=testdata/cover.out")
+	tg.run("test", "-short", "-coverprofile=testdata/cover.out", "encoding/binary")
 	data := tg.getStdout() + tg.getStderr()
 	if out, err := ioutil.ReadFile("testdata/cover.out"); err != nil {
 		t.Error(err)
@@ -1715,7 +1730,7 @@
 	tg := testgo(t)
 	defer tg.cleanup()
 	tg.creatingTemp("testdata/cover.out")
-	tg.run("test", "-short", "-race", "-cover", "encoding/binary", "-coverprofile=testdata/cover.out")
+	tg.run("test", "-short", "-race", "-coverprofile=testdata/cover.out", "encoding/binary")
 	data := tg.getStdout() + tg.getStderr()
 	if out, err := ioutil.ReadFile("testdata/cover.out"); err != nil {
 		t.Error(err)
@@ -1738,7 +1753,7 @@
 	tg := testgo(t)
 	defer tg.cleanup()
 	tg.creatingTemp("testdata/cover.out")
-	tg.run("test", "-short", "-race", "-cover", "encoding/binary", "-covermode=count", "-coverprofile=testdata/cover.out")
+	tg.run("test", "-short", "-race", "-covermode=count", "-coverprofile=testdata/cover.out", "encoding/binary")
 	data := tg.getStdout() + tg.getStderr()
 	if out, err := ioutil.ReadFile("testdata/cover.out"); err != nil {
 		t.Error(err)
@@ -1833,6 +1848,9 @@
 
 // cmd/cgo: undefined reference when linking a C-library using gccgo
 func TestIssue7573(t *testing.T) {
+	if !canCgo {
+		t.Skip("skipping because cgo not enabled")
+	}
 	if _, err := exec.LookPath("gccgo"); err != nil {
 		t.Skip("skipping because no gccgo compiler found")
 	}
@@ -1931,6 +1949,12 @@
 	tg.run("test", "testdata/standalone_test.go")
 }
 
+func TestGoTestFlagsAfterPackage(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+	tg.run("test", "-v", "testdata/flag_test.go", "-v=7") // Two distinct -v flags.
+}
+
 func TestGoTestXtestonlyWorks(t *testing.T) {
 	tg := testgo(t)
 	defer tg.cleanup()
diff --git a/src/cmd/go/help.go b/src/cmd/go/help.go
index 034dfc3..244451d 100644
--- a/src/cmd/go/help.go
+++ b/src/cmd/go/help.go
@@ -307,7 +307,7 @@
 so that you can add DIR/bin to your PATH to get at the
 installed commands.  If the GOBIN environment variable is
 set, commands are installed to the directory it names instead
-of DIR/bin.
+of DIR/bin. GOBIN must be an absolute path.
 
 Here's an example directory layout:
 
diff --git a/src/cmd/go/main.go b/src/cmd/go/main.go
index 8ebde89..b66049f 100644
--- a/src/cmd/go/main.go
+++ b/src/cmd/go/main.go
@@ -285,8 +285,8 @@
 func usage() {
 	// special case "go test -h"
 	if len(os.Args) > 1 && os.Args[1] == "test" {
-		os.Stdout.WriteString(testUsage + "\n\n" +
-			strings.TrimSpace(testFlag1) + "\n\n" +
+		os.Stderr.WriteString(testUsage + "\n\n" +
+			strings.TrimSpace(testFlag1) + "\n\n\t" +
 			strings.TrimSpace(testFlag2) + "\n")
 		os.Exit(2)
 	}
diff --git a/src/cmd/go/note_test.go b/src/cmd/go/note_test.go
index 0764b73..1809f94 100644
--- a/src/cmd/go/note_test.go
+++ b/src/cmd/go/note_test.go
@@ -24,23 +24,23 @@
 		t.Fatalf("buildID in hello binary = %q, want %q", id, buildID)
 	}
 
-	if runtime.GOOS == "linux" && (runtime.GOARCH == "ppc64le" || runtime.GOARCH == "ppc64") {
-		t.Skipf("skipping - golang.org/issue/11184")
+	switch {
+	case runtime.GOOS == "linux" && (runtime.GOARCH == "ppc64le" || runtime.GOARCH == "ppc64"):
+		t.Skipf("skipping - external linking not supported, golang.org/issue/11184")
+	case runtime.GOOS == "linux" && (runtime.GOARCH == "mips64le" || runtime.GOARCH == "mips64"):
+		t.Skipf("skipping - external linking not supported, golang.org/issue/12560")
+	case runtime.GOOS == "openbsd" && runtime.GOARCH == "arm":
+		t.Skipf("skipping - external linking not supported, golang.org/issue/10619")
+	case runtime.GOOS == "plan9":
+		t.Skipf("skipping - external linking not supported")
 	}
 
-	switch runtime.GOOS {
-	case "plan9":
-		// no external linking
-		t.Logf("no external linking - skipping linkmode=external test")
-
-	default:
-		tg.run("build", "-ldflags", "-buildid="+buildID+" -linkmode=external", "-o", tg.path("hello.exe"), tg.path("hello.go"))
-		id, err := main.ReadBuildIDFromBinary(tg.path("hello.exe"))
-		if err != nil {
-			t.Fatalf("reading build ID from hello binary (linkmode=external): %v", err)
-		}
-		if id != buildID {
-			t.Fatalf("buildID in hello binary = %q, want %q (linkmode=external)", id, buildID)
-		}
+	tg.run("build", "-ldflags", "-buildid="+buildID+" -linkmode=external", "-o", tg.path("hello.exe"), tg.path("hello.go"))
+	id, err = main.ReadBuildIDFromBinary(tg.path("hello.exe"))
+	if err != nil {
+		t.Fatalf("reading build ID from hello binary (linkmode=external): %v", err)
+	}
+	if id != buildID {
+		t.Fatalf("buildID in hello binary = %q, want %q (linkmode=external)", id, buildID)
 	}
 }
diff --git a/src/cmd/go/test.go b/src/cmd/go/test.go
index ba1ab82..f72c227 100644
--- a/src/cmd/go/test.go
+++ b/src/cmd/go/test.go
@@ -13,7 +13,6 @@
 	"go/doc"
 	"go/parser"
 	"go/token"
-	"log"
 	"os"
 	"os/exec"
 	"path"
@@ -439,7 +438,7 @@
 		}
 		for _, p := range testCoverPkgs {
 			if !used[p.ImportPath] {
-				log.Printf("warning: no packages being tested depend on %s", p.ImportPath)
+				fmt.Fprintf(os.Stderr, "warning: no packages being tested depend on %s\n", p.ImportPath)
 			}
 		}
 
@@ -817,10 +816,12 @@
 		}
 	}
 
-	// writeTestmain writes _testmain.go. This must happen after recompileForTest,
-	// because recompileForTest modifies XXX.
-	if err := writeTestmain(filepath.Join(testDir, "_testmain.go"), t); err != nil {
-		return nil, nil, nil, err
+	if !buildN {
+		// writeTestmain writes _testmain.go. This must happen after recompileForTest,
+		// because recompileForTest modifies XXX.
+		if err := writeTestmain(filepath.Join(testDir, "_testmain.go"), t); err != nil {
+			return nil, nil, nil, err
+		}
 	}
 
 	computeStale(pmain)
diff --git a/src/cmd/go/testdata/flag_test.go b/src/cmd/go/testdata/flag_test.go
new file mode 100644
index 0000000..ddf613d
--- /dev/null
+++ b/src/cmd/go/testdata/flag_test.go
@@ -0,0 +1,16 @@
+package flag_test
+
+import (
+	"flag"
+	"log"
+	"testing"
+)
+
+var v = flag.Int("v", 0, "v flag")
+
+// Run this as go test pkg -v=7
+func TestVFlagIsSet(t *testing.T) {
+	if *v != 7 {
+		log.Fatal("v flag not set")
+	}
+}
diff --git a/src/cmd/go/testflag.go b/src/cmd/go/testflag.go
index 1f3e3d3..32a84b6 100644
--- a/src/cmd/go/testflag.go
+++ b/src/cmd/go/testflag.go
@@ -80,40 +80,29 @@
 // Unfortunately for us, we need to do our own flag processing because go test
 // grabs some flags but otherwise its command line is just a holding place for
 // pkg.test's arguments.
-// We allow known flags both before and after the package name list,
-// to allow both
-//	go test fmt -custom-flag-for-fmt-test
-//	go test -x math
+// The usage is:
+//	go test [test flags] [packages] [flags for test binary]
+// Thus we process test flags (adding -test. to each) until we find a non-flag,
+// which introduces the optional list of packages. We collect the package paths
+// until we find another -flag, and pass that and the rest of the command line
+// to the test binary untouched.
+// For backwards compatibility with a poor design, if while processing test
+// flags we see an unrecognized flag, we accept it as an argument to the binary.
+// For this to work in general, one must say -foo=xxx not -foo xxx or else
+// xxx will be taken to be a package path. As said, the design is poor.
 func testFlags(args []string) (packageNames, passToTest []string) {
-	inPkg := false
 	outputDir := ""
-	for i := 0; i < len(args); i++ {
+	// Flags.
+	var i int
+	for i = 0; i < len(args); i++ {
 		if !strings.HasPrefix(args[i], "-") {
-			if !inPkg && packageNames == nil {
-				// First package name we've seen.
-				inPkg = true
-			}
-			if inPkg {
-				packageNames = append(packageNames, args[i])
-				continue
-			}
-		}
-
-		if inPkg {
-			// Found an argument beginning with "-"; end of package list.
-			inPkg = false
+			break // Start of packages.
 		}
 
 		f, value, extraWord := testFlag(args, i)
 		if f == nil {
-			// This is a flag we do not know; we must assume
-			// that any args we see after this might be flag
-			// arguments, not package names.
-			inPkg = false
-			if packageNames == nil {
-				// make non-nil: we have seen the empty package list
-				packageNames = []string{}
-			}
+			// This is a flag we do not know. Pass it to the test but keep
+			// processing flags.
 			passToTest = append(passToTest, args[i])
 			continue
 		}
@@ -174,6 +163,15 @@
 			passToTest = append(passToTest, "-test."+f.name+"="+value)
 		}
 	}
+	// Package names.
+	for ; i < len(args); i++ {
+		if strings.HasPrefix(args[i], "-") {
+			break // Start of trailing arguments.
+		}
+		packageNames = append(packageNames, args[i])
+	}
+	// Trailing arguments.
+	passToTest = append(passToTest, args[i:]...)
 
 	if testCoverMode == "" {
 		testCoverMode = "set"
diff --git a/src/cmd/go/tool.go b/src/cmd/go/tool.go
index 937ca1f..9ff0dcf 100644
--- a/src/cmd/go/tool.go
+++ b/src/cmd/go/tool.go
@@ -98,6 +98,7 @@
 		fmt.Printf("%s\n", cmd)
 		return
 	}
+	args[0] = toolPath // in case the tool wants to re-exec itself, e.g. cmd/dist
 	toolCmd := &exec.Cmd{
 		Path:   toolPath,
 		Args:   args,
diff --git a/src/cmd/go/vcs.go b/src/cmd/go/vcs.go
index 28a7540..a110096 100644
--- a/src/cmd/go/vcs.go
+++ b/src/cmd/go/vcs.go
@@ -567,16 +567,8 @@
 			lookup = lookup[:i]
 		}
 		rr, err = repoRootForImportDynamic(lookup, security)
-
-		// repoRootForImportDynamic returns error detail
-		// that is irrelevant if the user didn't intend to use a
-		// dynamic import in the first place.
-		// Squelch it.
 		if err != nil {
-			if buildV {
-				log.Printf("import %q: %v", importPath, err)
-			}
-			err = fmt.Errorf("unrecognized import path %q", importPath)
+			err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err)
 		}
 	}
 	if err != nil {
diff --git a/src/cmd/gofmt/gofmt.go b/src/cmd/gofmt/gofmt.go
index b2805ac..cfebeff 100644
--- a/src/cmd/gofmt/gofmt.go
+++ b/src/cmd/gofmt/gofmt.go
@@ -13,7 +13,6 @@
 	"go/printer"
 	"go/scanner"
 	"go/token"
-	"internal/format"
 	"io"
 	"io/ioutil"
 	"os"
@@ -88,7 +87,7 @@
 		return err
 	}
 
-	file, sourceAdj, indentAdj, err := format.Parse(fileSet, filename, src, stdin)
+	file, sourceAdj, indentAdj, err := parse(fileSet, filename, src, stdin)
 	if err != nil {
 		return err
 	}
@@ -107,7 +106,7 @@
 		simplify(file)
 	}
 
-	res, err := format.Format(fileSet, file, sourceAdj, indentAdj, src, printer.Config{Mode: printerMode, Tabwidth: tabWidth})
+	res, err := format(fileSet, file, sourceAdj, indentAdj, src, printer.Config{Mode: printerMode, Tabwidth: tabWidth})
 	if err != nil {
 		return err
 	}
diff --git a/src/internal/format/format.go b/src/cmd/gofmt/internal.go
similarity index 80%
copy from src/internal/format/format.go
copy to src/cmd/gofmt/internal.go
index a8270ba..f764b10 100644
--- a/src/internal/format/format.go
+++ b/src/cmd/gofmt/internal.go
@@ -2,7 +2,12 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-package format
+// TODO(gri): This file and the file src/go/format/internal.go are
+// the same (but for this comment and the package name). Do not modify
+// one without the other. Determine if we can factor out functionality
+// in a public API. See also #11844 for context.
+
+package main
 
 import (
 	"bytes"
@@ -13,11 +18,9 @@
 	"strings"
 )
 
-const parserMode = parser.ParseComments
-
-// Parse parses src, which was read from the named file,
+// parse parses src, which was read from the named file,
 // as a Go source file, declaration, or statement list.
-func Parse(fset *token.FileSet, filename string, src []byte, fragmentOk bool) (
+func parse(fset *token.FileSet, filename string, src []byte, fragmentOk bool) (
 	file *ast.File,
 	sourceAdj func(src []byte, indent int) []byte,
 	indentAdj int,
@@ -85,10 +88,10 @@
 	return
 }
 
-// Format formats the given package file originally obtained from src
+// format formats the given package file originally obtained from src
 // and adjusts the result based on the original source via sourceAdj
 // and indentAdj.
-func Format(
+func format(
 	fset *token.FileSet,
 	file *ast.File,
 	sourceAdj func(src []byte, indent int) []byte,
@@ -109,7 +112,7 @@
 	// Partial source file.
 	// Determine and prepend leading space.
 	i, j := 0, 0
-	for j < len(src) && IsSpace(src[j]) {
+	for j < len(src) && isSpace(src[j]) {
 		if src[j] == '\n' {
 			i = j + 1 // byte offset of last line in leading space
 		}
@@ -146,18 +149,28 @@
 	if err != nil {
 		return nil, err
 	}
-	res = append(res, sourceAdj(buf.Bytes(), cfg.Indent)...)
+	out := sourceAdj(buf.Bytes(), cfg.Indent)
+
+	// If the adjusted output is empty, the source
+	// was empty but (possibly) for white space.
+	// The result is the incoming source.
+	if len(out) == 0 {
+		return src, nil
+	}
+
+	// Otherwise, append output to leading space.
+	res = append(res, out...)
 
 	// Determine and append trailing space.
 	i = len(src)
-	for i > 0 && IsSpace(src[i-1]) {
+	for i > 0 && isSpace(src[i-1]) {
 		i--
 	}
 	return append(res, src[i:]...), nil
 }
 
-// IsSpace reports whether the byte is a space character.
-// IsSpace defines a space as being among the following bytes: ' ', '\t', '\n' and '\r'.
-func IsSpace(b byte) bool {
+// isSpace reports whether the byte is a space character.
+// isSpace defines a space as being among the following bytes: ' ', '\t', '\n' and '\r'.
+func isSpace(b byte) bool {
 	return b == ' ' || b == '\t' || b == '\n' || b == '\r'
 }
diff --git a/src/cmd/gofmt/long_test.go b/src/cmd/gofmt/long_test.go
index df9a878..237b860 100644
--- a/src/cmd/gofmt/long_test.go
+++ b/src/cmd/gofmt/long_test.go
@@ -15,7 +15,6 @@
 	"go/ast"
 	"go/printer"
 	"go/token"
-	"internal/format"
 	"io"
 	"os"
 	"path/filepath"
@@ -33,7 +32,7 @@
 )
 
 func gofmt(fset *token.FileSet, filename string, src *bytes.Buffer) error {
-	f, _, _, err := format.Parse(fset, filename, src.Bytes(), false)
+	f, _, _, err := parse(fset, filename, src.Bytes(), false)
 	if err != nil {
 		return err
 	}
@@ -61,7 +60,7 @@
 
 	// exclude files w/ syntax errors (typically test cases)
 	fset := token.NewFileSet()
-	if _, _, _, err = format.Parse(fset, filename, b1.Bytes(), false); err != nil {
+	if _, _, _, err = parse(fset, filename, b1.Bytes(), false); err != nil {
 		if *verbose {
 			fmt.Fprintf(os.Stderr, "ignoring %s\n", err)
 		}
diff --git a/src/cmd/internal/obj/arm/a.out.go b/src/cmd/internal/obj/arm/a.out.go
index 6c9cfd5..ca166e1 100644
--- a/src/cmd/internal/obj/arm/a.out.go
+++ b/src/cmd/internal/obj/arm/a.out.go
@@ -155,6 +155,16 @@
 	C_HREG
 
 	C_ADDR /* reference to relocatable address */
+
+	// TLS "var" in local exec mode: will become a constant offset from
+	// thread local base that is ultimately chosen by the program linker.
+	C_TLS_LE
+
+	// TLS "var" in initial exec mode: will become a memory address (chosen
+	// by the program linker) that the dynamic linker will fill with the
+	// offset from the thread local base.
+	C_TLS_IE
+
 	C_TEXTSIZE
 
 	C_GOK
diff --git a/src/cmd/internal/obj/arm/anames5.go b/src/cmd/internal/obj/arm/anames5.go
index 2e3a1f9..e3f98ce 100644
--- a/src/cmd/internal/obj/arm/anames5.go
+++ b/src/cmd/internal/obj/arm/anames5.go
@@ -41,6 +41,8 @@
 	"SP",
 	"HREG",
 	"ADDR",
+	"C_TLS_LE",
+	"C_TLS_IE",
 	"TEXTSIZE",
 	"GOK",
 	"NCLASS",
diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go
index 1201296..3bf4a317 100644
--- a/src/cmd/internal/obj/arm/asm5.go
+++ b/src/cmd/internal/obj/arm/asm5.go
@@ -67,197 +67,201 @@
 var optab = []Optab{
 	/* struct Optab:
 	OPCODE,	from, prog->reg, to,		 type,size,param,flag */
-	Optab{obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0},
-	Optab{AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
-	Optab{AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	Optab{AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	Optab{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
-	Optab{AADD, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
-	Optab{AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
-	Optab{AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
-	Optab{AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
-	Optab{ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0},
-	Optab{AADD, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
-	Optab{AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
-	Optab{AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
-	Optab{ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
-	Optab{AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0},
-	Optab{AB, C_NONE, C_NONE, C_SBRA, 5, 4, 0, LPOOL, 0},
-	Optab{ABL, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
-	Optab{ABX, C_NONE, C_NONE, C_SBRA, 74, 20, 0, 0, 0},
-	Optab{ABEQ, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
-	Optab{ABEQ, C_RCON, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // prediction hinted form, hint ignored
+	{obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0},
+	{AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
+	{AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+	{AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+	{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
+	{AADD, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
+	{AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
+	{AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
+	{AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
+	{ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0},
+	{AADD, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
+	{AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
+	{AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
+	{ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
+	{AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0},
+	{AB, C_NONE, C_NONE, C_SBRA, 5, 4, 0, LPOOL, 0},
+	{ABL, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
+	{ABX, C_NONE, C_NONE, C_SBRA, 74, 20, 0, 0, 0},
+	{ABEQ, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
+	{ABEQ, C_RCON, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // prediction hinted form, hint ignored
 
-	Optab{AB, C_NONE, C_NONE, C_ROREG, 6, 4, 0, LPOOL, 0},
-	Optab{ABL, C_NONE, C_NONE, C_ROREG, 7, 4, 0, 0, 0},
-	Optab{ABL, C_REG, C_NONE, C_ROREG, 7, 4, 0, 0, 0},
-	Optab{ABX, C_NONE, C_NONE, C_ROREG, 75, 12, 0, 0, 0},
-	Optab{ABXRET, C_NONE, C_NONE, C_ROREG, 76, 4, 0, 0, 0},
-	Optab{ASLL, C_RCON, C_REG, C_REG, 8, 4, 0, 0, 0},
-	Optab{ASLL, C_RCON, C_NONE, C_REG, 8, 4, 0, 0, 0},
-	Optab{ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
-	Optab{ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0},
-	Optab{ASWI, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0},
-	Optab{ASWI, C_NONE, C_NONE, C_LOREG, 10, 4, 0, 0, 0},
-	Optab{ASWI, C_NONE, C_NONE, C_LCON, 10, 4, 0, 0, 0},
-	Optab{AWORD, C_NONE, C_NONE, C_LCON, 11, 4, 0, 0, 0},
-	Optab{AWORD, C_NONE, C_NONE, C_LCONADDR, 11, 4, 0, 0, 0},
-	Optab{AWORD, C_NONE, C_NONE, C_ADDR, 11, 4, 0, 0, 0},
-	Optab{AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
-	Optab{AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
-	Optab{AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4},
-	Optab{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
-	Optab{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
-	Optab{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
-	Optab{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
-	Optab{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
-	Optab{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
-	Optab{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
-	Optab{ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0, 0, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	Optab{AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
-	Optab{AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
-	Optab{AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0},
-	Optab{AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0},
-	Optab{ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0},
-	Optab{ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0},
-	Optab{AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0},
-	Optab{AMULA, C_REG, C_REG, C_REGREG2, 17, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
-	Optab{AMOVW, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
-	Optab{AMOVW, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0},
-	Optab{AMOVBU, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
-	Optab{AMOVBU, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVBS, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVBU, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVW, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0},
-	Optab{AMOVW, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0},
-	Optab{AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVBU, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0},
-	Optab{AMOVBU, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0},
-	Optab{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVW, C_LACON, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0},
-	Optab{AMOVW, C_PSR, C_NONE, C_REG, 35, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_PSR, 36, 4, 0, 0, 0},
-	Optab{AMOVW, C_RCON, C_NONE, C_PSR, 37, 4, 0, 0, 0},
-	Optab{AMOVM, C_REGLIST, C_NONE, C_SOREG, 38, 4, 0, 0, 0},
-	Optab{AMOVM, C_SOREG, C_NONE, C_REGLIST, 39, 4, 0, 0, 0},
-	Optab{ASWPW, C_SOREG, C_REG, C_REG, 40, 4, 0, 0, 0},
-	Optab{ARFE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_FAUTO, 50, 4, REGSP, 0, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_FOREG, 50, 4, 0, 0, 0},
-	Optab{AMOVF, C_FAUTO, C_NONE, C_FREG, 51, 4, REGSP, 0, 0},
-	Optab{AMOVF, C_FOREG, C_NONE, C_FREG, 51, 4, 0, 0, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_LAUTO, 52, 12, REGSP, LTO, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_LOREG, 52, 12, 0, LTO, 0},
-	Optab{AMOVF, C_LAUTO, C_NONE, C_FREG, 53, 12, REGSP, LFROM, 0},
-	Optab{AMOVF, C_LOREG, C_NONE, C_FREG, 53, 12, 0, LFROM, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_ADDR, 68, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVF, C_ADDR, C_NONE, C_FREG, 69, 8, 0, LFROM | LPCREL, 4},
-	Optab{AADDF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
-	Optab{AADDF, C_FREG, C_REG, C_FREG, 54, 4, 0, 0, 0},
-	Optab{AMOVF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_FCR, 56, 4, 0, 0, 0},
-	Optab{AMOVW, C_FCR, C_NONE, C_REG, 57, 4, 0, 0, 0},
-	Optab{AMOVW, C_SHIFT, C_NONE, C_REG, 59, 4, 0, 0, 0},
-	Optab{AMOVBU, C_SHIFT, C_NONE, C_REG, 59, 4, 0, 0, 0},
-	Optab{AMOVB, C_SHIFT, C_NONE, C_REG, 60, 4, 0, 0, 0},
-	Optab{AMOVBS, C_SHIFT, C_NONE, C_REG, 60, 4, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
-	Optab{AMOVBS, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
-	Optab{AMOVBU, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
-	Optab{AMOVHS, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
-	Optab{AMOVHS, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
-	Optab{AMOVHU, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
-	Optab{AMOVHU, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
-	Optab{AMOVB, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
-	Optab{AMOVB, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
-	Optab{AMOVBS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
-	Optab{AMOVBS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
-	Optab{AMOVH, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
-	Optab{AMOVH, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
-	Optab{AMOVHS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
-	Optab{AMOVHS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
-	Optab{AMOVHU, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
-	Optab{AMOVHU, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
-	Optab{AMOVH, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVHS, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
-	Optab{AMOVHS, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
-	Optab{AMOVHS, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVHU, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
-	Optab{AMOVHU, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
-	Optab{AMOVHU, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
-	Optab{AMOVB, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
-	Optab{AMOVB, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
-	Optab{AMOVB, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVBS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
-	Optab{AMOVBS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
-	Optab{AMOVBS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVH, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
-	Optab{AMOVH, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
-	Optab{AMOVH, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVHS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
-	Optab{AMOVHS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
-	Optab{AMOVHS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
-	Optab{AMOVHU, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
-	Optab{AMOVHU, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
-	Optab{AMOVHU, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
-	Optab{ALDREX, C_SOREG, C_NONE, C_REG, 77, 4, 0, 0, 0},
-	Optab{ASTREX, C_SOREG, C_REG, C_REG, 78, 4, 0, 0, 0},
-	Optab{AMOVF, C_ZFCON, C_NONE, C_FREG, 80, 8, 0, 0, 0},
-	Optab{AMOVF, C_SFCON, C_NONE, C_FREG, 81, 4, 0, 0, 0},
-	Optab{ACMPF, C_FREG, C_REG, C_NONE, 82, 8, 0, 0, 0},
-	Optab{ACMPF, C_FREG, C_NONE, C_NONE, 83, 8, 0, 0, 0},
-	Optab{AMOVFW, C_FREG, C_NONE, C_FREG, 84, 4, 0, 0, 0},
-	Optab{AMOVWF, C_FREG, C_NONE, C_FREG, 85, 4, 0, 0, 0},
-	Optab{AMOVFW, C_FREG, C_NONE, C_REG, 86, 8, 0, 0, 0},
-	Optab{AMOVWF, C_REG, C_NONE, C_FREG, 87, 8, 0, 0, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_FREG, 88, 4, 0, 0, 0},
-	Optab{AMOVW, C_FREG, C_NONE, C_REG, 89, 4, 0, 0, 0},
-	Optab{ATST, C_REG, C_NONE, C_NONE, 90, 4, 0, 0, 0},
-	Optab{ALDREXD, C_SOREG, C_NONE, C_REG, 91, 4, 0, 0, 0},
-	Optab{ASTREXD, C_SOREG, C_REG, C_REG, 92, 4, 0, 0, 0},
-	Optab{APLD, C_SOREG, C_NONE, C_NONE, 95, 4, 0, 0, 0},
-	Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 96, 4, 0, 0, 0},
-	Optab{ACLZ, C_REG, C_NONE, C_REG, 97, 4, 0, 0, 0},
-	Optab{AMULWT, C_REG, C_REG, C_REG, 98, 4, 0, 0, 0},
-	Optab{AMULAWT, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0},
-	Optab{obj.AUSEFIELD, C_ADDR, C_NONE, C_NONE, 0, 0, 0, 0, 0},
-	Optab{obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0},
-	Optab{obj.AFUNCDATA, C_LCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0},
-	Optab{obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
-	Optab{obj.ADUFFZERO, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as ABL
-	Optab{obj.ADUFFCOPY, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as ABL
+	{AB, C_NONE, C_NONE, C_ROREG, 6, 4, 0, LPOOL, 0},
+	{ABL, C_NONE, C_NONE, C_ROREG, 7, 4, 0, 0, 0},
+	{ABL, C_REG, C_NONE, C_ROREG, 7, 4, 0, 0, 0},
+	{ABX, C_NONE, C_NONE, C_ROREG, 75, 12, 0, 0, 0},
+	{ABXRET, C_NONE, C_NONE, C_ROREG, 76, 4, 0, 0, 0},
+	{ASLL, C_RCON, C_REG, C_REG, 8, 4, 0, 0, 0},
+	{ASLL, C_RCON, C_NONE, C_REG, 8, 4, 0, 0, 0},
+	{ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
+	{ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0},
+	{ASWI, C_NONE, C_NONE, C_NONE, 10, 4, 0, 0, 0},
+	{ASWI, C_NONE, C_NONE, C_LOREG, 10, 4, 0, 0, 0},
+	{ASWI, C_NONE, C_NONE, C_LCON, 10, 4, 0, 0, 0},
+	{AWORD, C_NONE, C_NONE, C_LCON, 11, 4, 0, 0, 0},
+	{AWORD, C_NONE, C_NONE, C_LCONADDR, 11, 4, 0, 0, 0},
+	{AWORD, C_NONE, C_NONE, C_ADDR, 11, 4, 0, 0, 0},
+	{AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0},
+	{AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0},
+	{AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
+	{AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
+	{AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4},
+	{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
+	{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
+	{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
+	{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
+	{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
+	{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
+	{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
+	{ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
+	{AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+	{AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
+	{AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0, 0, 0},
+	{AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+	{AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
+	{AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0, 0, 0},
+	{AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0},
+	{AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0},
+	{ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0},
+	{ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0},
+	{AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0},
+	{AMULA, C_REG, C_REG, C_REGREG2, 17, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
+	{AMOVB, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
+	{AMOVB, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
+	{AMOVBS, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
+	{AMOVBS, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
+	{AMOVBU, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
+	{AMOVBU, C_REG, C_NONE, C_SOREG, 20, 4, 0, 0, 0},
+	{AMOVW, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
+	{AMOVW, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0},
+	{AMOVBU, C_SAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
+	{AMOVBU, C_SOREG, C_NONE, C_REG, 21, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
+	{AMOVW, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
+	{AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
+	{AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
+	{AMOVB, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
+	{AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
+	{AMOVBS, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
+	{AMOVBS, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
+	{AMOVBS, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
+	{AMOVBU, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0},
+	{AMOVBU, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0},
+	{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, LTO | LPCREL, 4},
+	{AMOVW, C_TLS_LE, C_NONE, C_REG, 101, 4, 0, LFROM, 0},
+	{AMOVW, C_TLS_IE, C_NONE, C_REG, 102, 8, 0, LFROM, 0},
+	{AMOVW, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0},
+	{AMOVW, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0},
+	{AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4},
+	{AMOVBU, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0},
+	{AMOVBU, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0},
+	{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, LFROM | LPCREL, 4},
+	{AMOVW, C_LACON, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0},
+	{AMOVW, C_PSR, C_NONE, C_REG, 35, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_PSR, 36, 4, 0, 0, 0},
+	{AMOVW, C_RCON, C_NONE, C_PSR, 37, 4, 0, 0, 0},
+	{AMOVM, C_REGLIST, C_NONE, C_SOREG, 38, 4, 0, 0, 0},
+	{AMOVM, C_SOREG, C_NONE, C_REGLIST, 39, 4, 0, 0, 0},
+	{ASWPW, C_SOREG, C_REG, C_REG, 40, 4, 0, 0, 0},
+	{ARFE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0},
+	{AMOVF, C_FREG, C_NONE, C_FAUTO, 50, 4, REGSP, 0, 0},
+	{AMOVF, C_FREG, C_NONE, C_FOREG, 50, 4, 0, 0, 0},
+	{AMOVF, C_FAUTO, C_NONE, C_FREG, 51, 4, REGSP, 0, 0},
+	{AMOVF, C_FOREG, C_NONE, C_FREG, 51, 4, 0, 0, 0},
+	{AMOVF, C_FREG, C_NONE, C_LAUTO, 52, 12, REGSP, LTO, 0},
+	{AMOVF, C_FREG, C_NONE, C_LOREG, 52, 12, 0, LTO, 0},
+	{AMOVF, C_LAUTO, C_NONE, C_FREG, 53, 12, REGSP, LFROM, 0},
+	{AMOVF, C_LOREG, C_NONE, C_FREG, 53, 12, 0, LFROM, 0},
+	{AMOVF, C_FREG, C_NONE, C_ADDR, 68, 8, 0, LTO | LPCREL, 4},
+	{AMOVF, C_ADDR, C_NONE, C_FREG, 69, 8, 0, LFROM | LPCREL, 4},
+	{AADDF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
+	{AADDF, C_FREG, C_REG, C_FREG, 54, 4, 0, 0, 0},
+	{AMOVF, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_FCR, 56, 4, 0, 0, 0},
+	{AMOVW, C_FCR, C_NONE, C_REG, 57, 4, 0, 0, 0},
+	{AMOVW, C_SHIFT, C_NONE, C_REG, 59, 4, 0, 0, 0},
+	{AMOVBU, C_SHIFT, C_NONE, C_REG, 59, 4, 0, 0, 0},
+	{AMOVB, C_SHIFT, C_NONE, C_REG, 60, 4, 0, 0, 0},
+	{AMOVBS, C_SHIFT, C_NONE, C_REG, 60, 4, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
+	{AMOVB, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
+	{AMOVBS, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
+	{AMOVBU, C_REG, C_NONE, C_SHIFT, 61, 4, 0, 0, 0},
+	{AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
+	{AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
+	{AMOVHS, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
+	{AMOVHS, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
+	{AMOVHU, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0, 0},
+	{AMOVHU, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0, 0},
+	{AMOVB, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
+	{AMOVB, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
+	{AMOVBS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
+	{AMOVBS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
+	{AMOVH, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
+	{AMOVH, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
+	{AMOVHS, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
+	{AMOVHS, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
+	{AMOVHU, C_HAUTO, C_NONE, C_REG, 71, 4, REGSP, 0, 0},
+	{AMOVHU, C_HOREG, C_NONE, C_REG, 71, 4, 0, 0, 0},
+	{AMOVH, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
+	{AMOVH, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
+	{AMOVH, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
+	{AMOVHS, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
+	{AMOVHS, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
+	{AMOVHS, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
+	{AMOVHU, C_REG, C_NONE, C_LAUTO, 72, 8, REGSP, LTO, 0},
+	{AMOVHU, C_REG, C_NONE, C_LOREG, 72, 8, 0, LTO, 0},
+	{AMOVHU, C_REG, C_NONE, C_ADDR, 94, 8, 0, LTO | LPCREL, 4},
+	{AMOVB, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
+	{AMOVB, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
+	{AMOVB, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
+	{AMOVBS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
+	{AMOVBS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
+	{AMOVBS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
+	{AMOVH, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
+	{AMOVH, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
+	{AMOVH, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
+	{AMOVHS, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
+	{AMOVHS, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
+	{AMOVHS, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
+	{AMOVHU, C_LAUTO, C_NONE, C_REG, 73, 8, REGSP, LFROM, 0},
+	{AMOVHU, C_LOREG, C_NONE, C_REG, 73, 8, 0, LFROM, 0},
+	{AMOVHU, C_ADDR, C_NONE, C_REG, 93, 8, 0, LFROM | LPCREL, 4},
+	{ALDREX, C_SOREG, C_NONE, C_REG, 77, 4, 0, 0, 0},
+	{ASTREX, C_SOREG, C_REG, C_REG, 78, 4, 0, 0, 0},
+	{AMOVF, C_ZFCON, C_NONE, C_FREG, 80, 8, 0, 0, 0},
+	{AMOVF, C_SFCON, C_NONE, C_FREG, 81, 4, 0, 0, 0},
+	{ACMPF, C_FREG, C_REG, C_NONE, 82, 8, 0, 0, 0},
+	{ACMPF, C_FREG, C_NONE, C_NONE, 83, 8, 0, 0, 0},
+	{AMOVFW, C_FREG, C_NONE, C_FREG, 84, 4, 0, 0, 0},
+	{AMOVWF, C_FREG, C_NONE, C_FREG, 85, 4, 0, 0, 0},
+	{AMOVFW, C_FREG, C_NONE, C_REG, 86, 8, 0, 0, 0},
+	{AMOVWF, C_REG, C_NONE, C_FREG, 87, 8, 0, 0, 0},
+	{AMOVW, C_REG, C_NONE, C_FREG, 88, 4, 0, 0, 0},
+	{AMOVW, C_FREG, C_NONE, C_REG, 89, 4, 0, 0, 0},
+	{ATST, C_REG, C_NONE, C_NONE, 90, 4, 0, 0, 0},
+	{ALDREXD, C_SOREG, C_NONE, C_REG, 91, 4, 0, 0, 0},
+	{ASTREXD, C_SOREG, C_REG, C_REG, 92, 4, 0, 0, 0},
+	{APLD, C_SOREG, C_NONE, C_NONE, 95, 4, 0, 0, 0},
+	{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 96, 4, 0, 0, 0},
+	{ACLZ, C_REG, C_NONE, C_REG, 97, 4, 0, 0, 0},
+	{AMULWT, C_REG, C_REG, C_REG, 98, 4, 0, 0, 0},
+	{AMULAWT, C_REG, C_REG, C_REGREG2, 99, 4, 0, 0, 0},
+	{obj.AUSEFIELD, C_ADDR, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0},
+	{obj.AFUNCDATA, C_LCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0},
+	{obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+	{obj.ADUFFZERO, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as ABL
+	{obj.ADUFFCOPY, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, // same as ABL
 
-	Optab{ADATABUNDLE, C_NONE, C_NONE, C_NONE, 100, 4, 0, 0, 0},
-	Optab{ADATABUNDLEEND, C_NONE, C_NONE, C_NONE, 100, 0, 0, 0, 0},
-	Optab{obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0},
+	{ADATABUNDLE, C_NONE, C_NONE, C_NONE, 100, 4, 0, 0, 0},
+	{ADATABUNDLEEND, C_NONE, C_NONE, C_NONE, 100, 0, 0, 0, 0},
+	{obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0},
 }
 
 var pool struct {
@@ -735,9 +739,6 @@
 	 * code references to be relocated too, and then
 	 * perhaps we'd be able to parallelize the span loop above.
 	 */
-	if ctxt.Tlsg == nil {
-		ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-	}
 
 	p = cursym.Text
 	ctxt.Autosize = int32(p.To.Offset + 4)
@@ -1016,6 +1017,14 @@
 			}
 
 			ctxt.Instoffset = 0 // s.b. unused but just in case
+			if a.Sym.Type == obj.STLSBSS {
+				if ctxt.Flag_shared != 0 {
+					return C_TLS_IE
+				} else {
+					return C_TLS_LE
+				}
+			}
+
 			return C_ADDR
 
 		case obj.NAME_AUTO:
@@ -1634,19 +1643,7 @@
 			rel.Sym = p.To.Sym
 			rel.Add = p.To.Offset
 
-			// runtime.tlsg is special.
-			// Its "address" is the offset from the TLS thread pointer
-			// to the thread-local g and m pointers.
-			// Emit a TLS relocation instead of a standard one if its
-			// type is not explicitly set by runtime. This assumes that
-			// all references to runtime.tlsg should be accompanied with
-			// its type declaration if necessary.
-			if rel.Sym == ctxt.Tlsg && ctxt.Tlsg.Type == 0 {
-				rel.Type = obj.R_TLS
-				if ctxt.Flag_shared != 0 {
-					rel.Add += ctxt.Pc - p.Rel.Pc - 8 - int64(rel.Siz)
-				}
-			} else if ctxt.Flag_shared != 0 {
+			if ctxt.Flag_shared != 0 {
 				rel.Type = obj.R_PCREL
 				rel.Add += ctxt.Pc - p.Rel.Pc - 8
 			} else {
@@ -2037,6 +2034,50 @@
 			o2 = oprrr(ctxt, AADD, int(p.Scond)) | REGTMP&15 | (REGPC&15)<<16 | (REGTMP&15)<<12
 		}
 
+	case 101: /* movw tlsvar,R, local exec*/
+		if p.Scond&C_SCOND != C_SCOND_NONE {
+			ctxt.Diag("conditional tls")
+		}
+		o1 = omvl(ctxt, p, &p.From, int(p.To.Reg))
+
+	case 102: /* movw tlsvar,R, initial exec*/
+		if p.Scond&C_SCOND != C_SCOND_NONE {
+			ctxt.Diag("conditional tls")
+		}
+		o1 = omvl(ctxt, p, &p.From, int(p.To.Reg))
+		o2 = olrr(ctxt, int(p.To.Reg)&15, (REGPC & 15), int(p.To.Reg), int(p.Scond))
+
+	case 103: /* word tlsvar, local exec */
+		if p.To.Sym == nil {
+			ctxt.Diag("nil sym in tls %v", p)
+		}
+		if p.To.Offset != 0 {
+			ctxt.Diag("offset against tls var in %v", p)
+		}
+		// This case happens with words generated in the PC stream as part of
+		// the literal pool.
+		rel := obj.Addrel(ctxt.Cursym)
+
+		rel.Off = int32(ctxt.Pc)
+		rel.Siz = 4
+		rel.Sym = p.To.Sym
+		rel.Type = obj.R_TLS_LE
+		o1 = 0
+
+	case 104: /* word tlsvar, initial exec */
+		if p.To.Sym == nil {
+			ctxt.Diag("nil sym in tls %v", p)
+		}
+		if p.To.Offset != 0 {
+			ctxt.Diag("offset against tls var in %v", p)
+		}
+		rel := obj.Addrel(ctxt.Cursym)
+		rel.Off = int32(ctxt.Pc)
+		rel.Siz = 4
+		rel.Sym = p.To.Sym
+		rel.Type = obj.R_TLS_IE
+		rel.Add = ctxt.Pc - p.Rel.Pc - 8 - int64(rel.Siz)
+
 	case 68: /* floating point store -> ADDR */
 		o1 = omvl(ctxt, p, &p.To, REGTMP)
 
diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go
index d9f587b..00ab13c 100644
--- a/src/cmd/internal/obj/arm/obj5.go
+++ b/src/cmd/internal/obj/arm/obj5.go
@@ -129,24 +129,6 @@
 			p.From.Offset = 0
 		}
 	}
-
-	if ctxt.Flag_shared != 0 {
-		// Shared libraries use R_ARM_TLS_IE32 instead of
-		// R_ARM_TLS_LE32, replacing the link time constant TLS offset in
-		// runtime.tlsg with an address to a GOT entry containing the
-		// offset. Rewrite $runtime.tlsg(SB) to runtime.tlsg(SB) to
-		// compensate.
-		if ctxt.Tlsg == nil {
-			ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-		}
-
-		if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && p.From.Sym == ctxt.Tlsg {
-			p.From.Type = obj.TYPE_MEM
-		}
-		if p.To.Type == obj.TYPE_ADDR && p.To.Name == obj.NAME_EXTERN && p.To.Sym == ctxt.Tlsg {
-			p.To.Type = obj.TYPE_MEM
-		}
-	}
 }
 
 // Prog.mark
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index f34dd6d..a78c37e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -631,9 +631,6 @@
 	/*
 	 * lay out the code, emitting code and data relocations.
 	 */
-	if ctxt.Tlsg == nil {
-		ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-	}
 	obj.Symgrow(ctxt, cursym, cursym.Size)
 	bp := cursym.P
 	psz := int32(0)
@@ -673,7 +670,7 @@
  * drop the pool now, and branch round it.
  */
 func checkpool(ctxt *obj.Link, p *obj.Prog, skip int) {
-	if pool.size >= 0xffff0 || !(ispcdisp(int32(p.Pc+4+int64(pool.size)-int64(pool.start)+8)) != 0) {
+	if pool.size >= 0xffff0 || !ispcdisp(int32(p.Pc+4+int64(pool.size)-int64(pool.start)+8)) {
 		flushpool(ctxt, p, skip)
 	} else if p.Link == nil {
 		flushpool(ctxt, p, 2)
@@ -826,27 +823,27 @@
 	return uint32(ctxt.Instoffset)
 }
 
-func ispcdisp(v int32) int {
+func ispcdisp(v int32) bool {
 	/* pc-relative addressing will reach? */
-	return obj.Bool2int(v >= -0xfffff && v <= 0xfffff && (v&3) == 0)
+	return v >= -0xfffff && v <= 0xfffff && (v&3) == 0
 }
 
-func isaddcon(v int64) int {
+func isaddcon(v int64) bool {
 	/* uimm12 or uimm24? */
 	if v < 0 {
-		return 0
+		return false
 	}
 	if (v & 0xFFF) == 0 {
 		v >>= 12
 	}
-	return obj.Bool2int(v <= 0xFFF)
+	return v <= 0xFFF
 }
 
-func isbitcon(v uint64) int {
+func isbitcon(v uint64) bool {
 	/*  fancy bimm32 or bimm64? */
 	// TODO(aram):
-	return 0
-	// return obj.Bool2int(findmask(v) != nil || (v>>32) == 0 && findmask(v|(v<<32)) != nil)
+	return false
+	// return findmask(v) != nil || (v>>32) == 0 && findmask(v|(v<<32)) != nil
 }
 
 func autoclass(l int64) int {
@@ -1007,11 +1004,11 @@
 			if v == 0 {
 				return C_ZCON
 			}
-			if isaddcon(v) != 0 {
+			if isaddcon(v) {
 				if v <= 0xFFF {
 					return C_ADDCON0
 				}
-				if isbitcon(uint64(v)) != 0 {
+				if isbitcon(uint64(v)) {
 					return C_ABCON
 				}
 				return C_ADDCON
@@ -1019,7 +1016,7 @@
 
 			t := movcon(v)
 			if t >= 0 {
-				if isbitcon(uint64(v)) != 0 {
+				if isbitcon(uint64(v)) {
 					return C_MBCON
 				}
 				return C_MOVCON
@@ -1027,13 +1024,13 @@
 
 			t = movcon(^v)
 			if t >= 0 {
-				if isbitcon(uint64(v)) != 0 {
+				if isbitcon(uint64(v)) {
 					return C_MBCON
 				}
 				return C_MOVCON
 			}
 
-			if isbitcon(uint64(v)) != 0 {
+			if isbitcon(uint64(v)) {
 				return C_BITCON
 			}
 
@@ -1062,7 +1059,7 @@
 		return C_GOK
 
 	aconsize:
-		if isaddcon(ctxt.Instoffset) != 0 {
+		if isaddcon(ctxt.Instoffset) {
 			return C_AACON
 		}
 		return C_LACON
@@ -2182,14 +2179,14 @@
 	case 24: /* mov/mvn Rs,Rd -> add $0,Rs,Rd or orr Rs,ZR,Rd */
 		rf := int(p.From.Reg)
 		rt := int(p.To.Reg)
-		s := obj.Bool2int(rf == REGSP || rt == REGSP)
+		s := rf == REGSP || rt == REGSP
 		if p.As == AMVN || p.As == AMVNW {
-			if s != 0 {
+			if s {
 				ctxt.Diag("illegal SP reference\n%v", p)
 			}
 			o1 = oprrr(ctxt, int(p.As))
 			o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31)
-		} else if s != 0 {
+		} else if s {
 			o1 = opirr(ctxt, int(p.As))
 			o1 |= (uint32(rf&31) << 5) | uint32(rt&31)
 		} else {
diff --git a/src/cmd/internal/obj/data.go b/src/cmd/internal/obj/data.go
index d3d6786..96129fd 100644
--- a/src/cmd/internal/obj/data.go
+++ b/src/cmd/internal/obj/data.go
@@ -36,10 +36,6 @@
 	"math"
 )
 
-func mangle(file string) {
-	log.Fatalf("%s: mangled input file", file)
-}
-
 func Symgrow(ctxt *Link, s *LSym, lsiz int64) {
 	siz := int(lsiz)
 	if int64(siz) != lsiz {
@@ -48,17 +44,19 @@
 	if len(s.P) >= siz {
 		return
 	}
+	// TODO(dfc) append cap-len at once, rather than
+	// one byte at a time.
 	for cap(s.P) < siz {
 		s.P = append(s.P[:cap(s.P)], 0)
 	}
 	s.P = s.P[:siz]
 }
 
-func savedata(ctxt *Link, s *LSym, p *Prog, pn string) {
+func savedata(ctxt *Link, s *LSym, p *Prog, file string) {
 	off := int32(p.From.Offset)
 	siz := int32(p.From3.Offset)
 	if off < 0 || siz < 0 || off >= 1<<30 || siz >= 100 {
-		mangle(pn)
+		log.Fatalf("%s: mangled input file", file)
 	}
 	if ctxt.Enforce_data_order != 0 && off < int32(len(s.P)) {
 		ctxt.Diag("data out of order (already have %d)\n%v", len(s.P), p)
@@ -142,115 +140,3 @@
 
 	return off + wid
 }
-
-func adduintxx(ctxt *Link, s *LSym, v uint64, wid int) int64 {
-	off := s.Size
-	Setuintxx(ctxt, s, off, v, int64(wid))
-	return off
-}
-
-func adduint8(ctxt *Link, s *LSym, v uint8) int64 {
-	return adduintxx(ctxt, s, uint64(v), 1)
-}
-
-func adduint16(ctxt *Link, s *LSym, v uint16) int64 {
-	return adduintxx(ctxt, s, uint64(v), 2)
-}
-
-func Adduint32(ctxt *Link, s *LSym, v uint32) int64 {
-	return adduintxx(ctxt, s, uint64(v), 4)
-}
-
-func Adduint64(ctxt *Link, s *LSym, v uint64) int64 {
-	return adduintxx(ctxt, s, v, 8)
-}
-
-func setuint8(ctxt *Link, s *LSym, r int64, v uint8) int64 {
-	return Setuintxx(ctxt, s, r, uint64(v), 1)
-}
-
-func setuint16(ctxt *Link, s *LSym, r int64, v uint16) int64 {
-	return Setuintxx(ctxt, s, r, uint64(v), 2)
-}
-
-func setuint32(ctxt *Link, s *LSym, r int64, v uint32) int64 {
-	return Setuintxx(ctxt, s, r, uint64(v), 4)
-}
-
-func setuint64(ctxt *Link, s *LSym, r int64, v uint64) int64 {
-	return Setuintxx(ctxt, s, r, v, 8)
-}
-
-func addaddrplus(ctxt *Link, s *LSym, t *LSym, add int64) int64 {
-	if s.Type == 0 {
-		s.Type = SDATA
-	}
-	i := s.Size
-	s.Size += int64(ctxt.Arch.Ptrsize)
-	Symgrow(ctxt, s, s.Size)
-	r := Addrel(s)
-	r.Sym = t
-	r.Off = int32(i)
-	r.Siz = uint8(ctxt.Arch.Ptrsize)
-	r.Type = R_ADDR
-	r.Add = add
-	return i + int64(r.Siz)
-}
-
-func addpcrelplus(ctxt *Link, s *LSym, t *LSym, add int64) int64 {
-	if s.Type == 0 {
-		s.Type = SDATA
-	}
-	i := s.Size
-	s.Size += 4
-	Symgrow(ctxt, s, s.Size)
-	r := Addrel(s)
-	r.Sym = t
-	r.Off = int32(i)
-	r.Add = add
-	r.Type = R_PCREL
-	r.Siz = 4
-	return i + int64(r.Siz)
-}
-
-func addaddr(ctxt *Link, s *LSym, t *LSym) int64 {
-	return addaddrplus(ctxt, s, t, 0)
-}
-
-func setaddrplus(ctxt *Link, s *LSym, off int64, t *LSym, add int64) int64 {
-	if s.Type == 0 {
-		s.Type = SDATA
-	}
-	if off+int64(ctxt.Arch.Ptrsize) > s.Size {
-		s.Size = off + int64(ctxt.Arch.Ptrsize)
-		Symgrow(ctxt, s, s.Size)
-	}
-
-	r := Addrel(s)
-	r.Sym = t
-	r.Off = int32(off)
-	r.Siz = uint8(ctxt.Arch.Ptrsize)
-	r.Type = R_ADDR
-	r.Add = add
-	return off + int64(r.Siz)
-}
-
-func setaddr(ctxt *Link, s *LSym, off int64, t *LSym) int64 {
-	return setaddrplus(ctxt, s, off, t, 0)
-}
-
-func addaddrplus4(ctxt *Link, s *LSym, t *LSym, add int64) int64 {
-	if s.Type == 0 {
-		s.Type = SDATA
-	}
-	i := s.Size
-	s.Size += 4
-	Symgrow(ctxt, s, s.Size)
-	r := Addrel(s)
-	r.Sym = t
-	r.Off = int32(i)
-	r.Siz = 4
-	r.Type = R_ADDR
-	r.Add = add
-	return i + int64(r.Siz)
-}
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index a5d622a..22dbc86 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -247,11 +247,11 @@
 // by clients such as the compiler. The exact meaning of this
 // data is up to the client and is not interpreted by the cmd/internal/obj/... packages.
 type ProgInfo struct {
+	_        struct{} // to prevent unkeyed literals. Trailing zero-sized field will take space.
 	Flags    uint32   // flag bits
 	Reguse   uint64   // registers implicitly used by this instruction
 	Regset   uint64   // registers implicitly set by this instruction
 	Regindex uint64   // registers used by addressing mode
-	_        struct{} // to prevent unkeyed literals
 }
 
 // Prog.as opcodes.
@@ -334,6 +334,7 @@
 	Sxxx = iota
 	STEXT
 	SELFRXSECT
+
 	STYPE
 	SSTRING
 	SGOSTRING
@@ -341,6 +342,25 @@
 	SGCBITS
 	SRODATA
 	SFUNCTAB
+
+	// Types STYPE-SFUNCTAB above are written to the .rodata section by default.
+	// When linking a shared object, some conceptually "read only" types need to
+	// be written to by relocations and putting them in a section called
+	// ".rodata" interacts poorly with the system linkers. The GNU linkers
+	// support this situation by arranging for sections of the name
+	// ".data.rel.ro.XXX" to be mprotected read only by the dynamic linker after
+	// relocations have applied, so when the Go linker is creating a shared
+	// object it checks all objects of the above types and bumps any object that
+	// has a relocation to it to the corresponding type below, which are then
+	// written to sections with appropriate magic names.
+	STYPERELRO
+	SSTRINGRELRO
+	SGOSTRINGRELRO
+	SGOFUNCRELRO
+	SGCBITSRELRO
+	SRODATARELRO
+	SFUNCTABRELRO
+
 	STYPELINK
 	SSYMTAB
 	SPCLNTAB
@@ -394,23 +414,16 @@
 	R_CALLPOWER
 	R_CONST
 	R_PCREL
-	// R_TLS (only used on arm currently, and not on android and darwin where tlsg is
-	// a regular variable) resolves to data needed to access the thread-local g. It is
-	// interpreted differently depending on toolchain flags to implement either the
-	// "local exec" or "inital exec" model for tls access.
-	// TODO(mwhudson): change to use R_TLS_LE or R_TLS_IE as appropriate, not having
-	// R_TLS do double duty.
-	R_TLS
-	// R_TLS_LE (only used on 386 and amd64 currently) resolves to the offset of the
-	// thread-local g from the thread local base and is used to implement the "local
-	// exec" model for tls access (r.Sym is not set by the compiler for this case but
-	// is set to Tlsg in the linker when externally linking).
+	// R_TLS_LE, used on 386, amd64, and ARM, resolves to the offset of the
+	// thread-local symbol from the thread local base and is used to implement the
+	// "local exec" model for tls access (r.Sym is not set on intel platforms but is
+	// set to a TLS symbol -- runtime.tlsg -- in the linker when externally linking).
 	R_TLS_LE
-	// R_TLS_IE (only used on 386 and amd64 currently) resolves to the PC-relative
-	// offset to a GOT slot containing the offset the thread-local g from the thread
-	// local base and is used to implemented the "initial exec" model for tls access
-	// (r.Sym is not set by the compiler for this case but is set to Tlsg in the
-	// linker when externally linking).
+	// R_TLS_IE, used 386, amd64, and ARM resolves to the PC-relative offset to a GOT
+	// slot containing the offset from the thread-local symbol from the thread local
+	// base and is used to implemented the "initial exec" model for tls access (r.Sym
+	// is not set on intel platforms but is set to a TLS symbol -- runtime.tlsg -- in
+	// the linker when externally linking).
 	R_TLS_IE
 	R_GOTOFF
 	R_PLT0
@@ -485,13 +498,13 @@
 	Sym_divu           *LSym
 	Sym_mod            *LSym
 	Sym_modu           *LSym
-	Tlsg               *LSym
 	Plan9privates      *LSym
 	Curp               *Prog
 	Printp             *Prog
 	Blitrl             *Prog
 	Elitrl             *Prog
 	Rexflag            int
+	Vexflag            int
 	Rep                int
 	Repn               int
 	Lock               int
@@ -510,6 +523,19 @@
 	Etextp             *LSym
 }
 
+// The smallest possible offset from the hardware stack pointer to a local
+// variable on the stack. Architectures that use a link register save its value
+// on the stack in the function prologue and so always have a pointer between
+// the hardware stack pointer and the local variable area.
+func (ctxt *Link) FixedFrameSize() int64 {
+	switch ctxt.Arch.Thechar {
+	case '6', '8':
+		return 0
+	default:
+		return int64(ctxt.Arch.Ptrsize)
+	}
+}
+
 type SymVer struct {
 	Name    string
 	Version int // TODO: make int16 to match LSym.Version?
diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go
index 2955a00..c7498ea 100644
--- a/src/cmd/internal/obj/mips/asm0.go
+++ b/src/cmd/internal/obj/mips/asm0.go
@@ -504,9 +504,6 @@
 	/*
 	 * lay out the code, emitting code and data relocations.
 	 */
-	if ctxt.Tlsg == nil {
-		ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-	}
 
 	obj.Symgrow(ctxt, cursym, cursym.Size)
 
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 2955a00..c7bd835 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -59,342 +59,342 @@
 }
 
 var optab = []Optab{
-	Optab{obj.ATEXT, C_LEXT, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0},
-	Optab{obj.ATEXT, C_LEXT, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0},
-	Optab{obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0},
-	Optab{obj.ATEXT, C_ADDR, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0},
+	{obj.ATEXT, C_LEXT, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0},
+	{obj.ATEXT, C_LEXT, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0},
+	{obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0},
+	{obj.ATEXT, C_ADDR, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0},
 	/* move register */
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_REG, 1, 4, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_REG, 12, 4, 0},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_REG, 12, 4, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0},
-	Optab{AADD, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
-	Optab{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
-	Optab{AADD, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
-	Optab{AADD, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
-	Optab{AADD, C_UCON, C_REG, C_NONE, C_REG, 20, 4, 0},
-	Optab{AADD, C_UCON, C_NONE, C_NONE, C_REG, 20, 4, 0},
-	Optab{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
-	Optab{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
-	Optab{AADDC, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
-	Optab{AADDC, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
-	Optab{AADDC, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
-	Optab{AADDC, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
-	Optab{AADDC, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
-	Optab{AADDC, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
-	Optab{AAND, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, /* logical, no literal */
-	Optab{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{AANDCC, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
-	Optab{AANDCC, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{AANDCC, C_ANDCON, C_NONE, C_NONE, C_REG, 58, 4, 0},
-	Optab{AANDCC, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
-	Optab{AANDCC, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
-	Optab{AANDCC, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
-	Optab{AANDCC, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
-	Optab{AANDCC, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
-	Optab{AMULLW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
-	Optab{AMULLW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
-	Optab{AMULLW, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
-	Optab{AMULLW, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
-	Optab{AMULLW, C_ANDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
-	Optab{AMULLW, C_ANDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
-	Optab{AMULLW, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
-	Optab{AMULLW, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
-	Optab{ASUBC, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0},
-	Optab{ASUBC, C_REG, C_NONE, C_NONE, C_REG, 10, 4, 0},
-	Optab{ASUBC, C_REG, C_NONE, C_ADDCON, C_REG, 27, 4, 0},
-	Optab{ASUBC, C_REG, C_NONE, C_LCON, C_REG, 28, 12, 0},
-	Optab{AOR, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, /* logical, literal not cc (or/xor) */
-	Optab{AOR, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{AOR, C_ANDCON, C_NONE, C_NONE, C_REG, 58, 4, 0},
-	Optab{AOR, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
-	Optab{AOR, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
-	Optab{AOR, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
-	Optab{AOR, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
-	Optab{AOR, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
-	Optab{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, /* op r1[,r2],r3 */
-	Optab{ADIVW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
-	Optab{ASUB, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0}, /* op r2[,r1],r3 */
-	Optab{ASUB, C_REG, C_NONE, C_NONE, C_REG, 10, 4, 0},
-	Optab{ASLW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASLW, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASLD, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0},
-	Optab{ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0},
-	Optab{ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0},
-	Optab{ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0},
-	Optab{ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASRAW, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASRAW, C_SCON, C_REG, C_NONE, C_REG, 56, 4, 0},
-	Optab{ASRAW, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0},
-	Optab{ASRAD, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASRAD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
-	Optab{ASRAD, C_SCON, C_REG, C_NONE, C_REG, 56, 4, 0},
-	Optab{ASRAD, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0},
-	Optab{ARLWMI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0},
-	Optab{ARLWMI, C_REG, C_REG, C_LCON, C_REG, 63, 4, 0},
-	Optab{ARLDMI, C_SCON, C_REG, C_LCON, C_REG, 30, 4, 0},
-	Optab{ARLDC, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0},
-	Optab{ARLDCL, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0},
-	Optab{ARLDCL, C_REG, C_REG, C_LCON, C_REG, 14, 4, 0},
-	Optab{ARLDCL, C_REG, C_NONE, C_LCON, C_REG, 14, 4, 0},
-	Optab{AFADD, C_FREG, C_NONE, C_NONE, C_FREG, 2, 4, 0},
-	Optab{AFADD, C_FREG, C_REG, C_NONE, C_FREG, 2, 4, 0},
-	Optab{AFABS, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
-	Optab{AFABS, C_NONE, C_NONE, C_NONE, C_FREG, 33, 4, 0},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
-	Optab{AFMADD, C_FREG, C_REG, C_FREG, C_FREG, 34, 4, 0},
-	Optab{AFMUL, C_FREG, C_NONE, C_NONE, C_FREG, 32, 4, 0},
-	Optab{AFMUL, C_FREG, C_REG, C_NONE, C_FREG, 32, 4, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_REG, 1, 4, 0},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_REG, 12, 4, 0},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_REG, 12, 4, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0},
+	{AADD, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
+	{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
+	{AADD, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
+	{AADD, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
+	{AADD, C_UCON, C_REG, C_NONE, C_REG, 20, 4, 0},
+	{AADD, C_UCON, C_NONE, C_NONE, C_REG, 20, 4, 0},
+	{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
+	{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
+	{AADDC, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
+	{AADDC, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
+	{AADDC, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
+	{AADDC, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
+	{AADDC, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
+	{AADDC, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
+	{AAND, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, /* logical, no literal */
+	{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{AANDCC, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
+	{AANDCC, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{AANDCC, C_ANDCON, C_NONE, C_NONE, C_REG, 58, 4, 0},
+	{AANDCC, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
+	{AANDCC, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
+	{AANDCC, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
+	{AANDCC, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
+	{AANDCC, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
+	{AMULLW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
+	{AMULLW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
+	{AMULLW, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
+	{AMULLW, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
+	{AMULLW, C_ANDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
+	{AMULLW, C_ANDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
+	{AMULLW, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
+	{AMULLW, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
+	{ASUBC, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0},
+	{ASUBC, C_REG, C_NONE, C_NONE, C_REG, 10, 4, 0},
+	{ASUBC, C_REG, C_NONE, C_ADDCON, C_REG, 27, 4, 0},
+	{ASUBC, C_REG, C_NONE, C_LCON, C_REG, 28, 12, 0},
+	{AOR, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, /* logical, literal not cc (or/xor) */
+	{AOR, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{AOR, C_ANDCON, C_NONE, C_NONE, C_REG, 58, 4, 0},
+	{AOR, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
+	{AOR, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
+	{AOR, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
+	{AOR, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
+	{AOR, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
+	{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, /* op r1[,r2],r3 */
+	{ADIVW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
+	{ASUB, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0}, /* op r2[,r1],r3 */
+	{ASUB, C_REG, C_NONE, C_NONE, C_REG, 10, 4, 0},
+	{ASLW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{ASLW, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
+	{ASLD, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
+	{ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0},
+	{ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0},
+	{ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0},
+	{ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0},
+	{ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{ASRAW, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
+	{ASRAW, C_SCON, C_REG, C_NONE, C_REG, 56, 4, 0},
+	{ASRAW, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0},
+	{ASRAD, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0},
+	{ASRAD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0},
+	{ASRAD, C_SCON, C_REG, C_NONE, C_REG, 56, 4, 0},
+	{ASRAD, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0},
+	{ARLWMI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0},
+	{ARLWMI, C_REG, C_REG, C_LCON, C_REG, 63, 4, 0},
+	{ARLDMI, C_SCON, C_REG, C_LCON, C_REG, 30, 4, 0},
+	{ARLDC, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0},
+	{ARLDCL, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0},
+	{ARLDCL, C_REG, C_REG, C_LCON, C_REG, 14, 4, 0},
+	{ARLDCL, C_REG, C_NONE, C_LCON, C_REG, 14, 4, 0},
+	{AFADD, C_FREG, C_NONE, C_NONE, C_FREG, 2, 4, 0},
+	{AFADD, C_FREG, C_REG, C_NONE, C_FREG, 2, 4, 0},
+	{AFABS, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
+	{AFABS, C_NONE, C_NONE, C_NONE, C_FREG, 33, 4, 0},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
+	{AFMADD, C_FREG, C_REG, C_FREG, C_FREG, 34, 4, 0},
+	{AFMUL, C_FREG, C_NONE, C_NONE, C_FREG, 32, 4, 0},
+	{AFMUL, C_FREG, C_REG, C_NONE, C_FREG, 32, 4, 0},
 
 	/* store, short offset */
-	Optab{AMOVD, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVW, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVWZ, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVBZ, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVBZU, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVB, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVBU, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVBZU, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVD, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVW, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVWZ, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVBZ, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVBZU, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVB, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVBU, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVBZU, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
 
 	/* load, short offset */
-	Optab{AMOVD, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVW, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVWZ, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVBZ, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVBZU, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVB, C_ZOREG, C_REG, C_NONE, C_REG, 9, 8, REGZERO},
-	Optab{AMOVBU, C_ZOREG, C_REG, C_NONE, C_REG, 9, 8, REGZERO},
-	Optab{AMOVD, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
-	Optab{AMOVW, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
-	Optab{AMOVWZ, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
-	Optab{AMOVBZ, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
-	Optab{AMOVB, C_SEXT, C_NONE, C_NONE, C_REG, 9, 8, REGSB},
-	Optab{AMOVD, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
-	Optab{AMOVW, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
-	Optab{AMOVWZ, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
-	Optab{AMOVBZ, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
-	Optab{AMOVB, C_SAUTO, C_NONE, C_NONE, C_REG, 9, 8, REGSP},
-	Optab{AMOVD, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVW, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVWZ, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVBZ, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVBZU, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
-	Optab{AMOVB, C_SOREG, C_NONE, C_NONE, C_REG, 9, 8, REGZERO},
-	Optab{AMOVBU, C_SOREG, C_NONE, C_NONE, C_REG, 9, 8, REGZERO},
+	{AMOVD, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVW, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVWZ, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVBZ, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVBZU, C_ZOREG, C_REG, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVB, C_ZOREG, C_REG, C_NONE, C_REG, 9, 8, REGZERO},
+	{AMOVBU, C_ZOREG, C_REG, C_NONE, C_REG, 9, 8, REGZERO},
+	{AMOVD, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
+	{AMOVW, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
+	{AMOVWZ, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
+	{AMOVBZ, C_SEXT, C_NONE, C_NONE, C_REG, 8, 4, REGSB},
+	{AMOVB, C_SEXT, C_NONE, C_NONE, C_REG, 9, 8, REGSB},
+	{AMOVD, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
+	{AMOVW, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
+	{AMOVWZ, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
+	{AMOVBZ, C_SAUTO, C_NONE, C_NONE, C_REG, 8, 4, REGSP},
+	{AMOVB, C_SAUTO, C_NONE, C_NONE, C_REG, 9, 8, REGSP},
+	{AMOVD, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVW, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVWZ, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVBZ, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVBZU, C_SOREG, C_NONE, C_NONE, C_REG, 8, 4, REGZERO},
+	{AMOVB, C_SOREG, C_NONE, C_NONE, C_REG, 9, 8, REGZERO},
+	{AMOVBU, C_SOREG, C_NONE, C_NONE, C_REG, 9, 8, REGZERO},
 
 	/* store, long offset */
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	Optab{AMOVBZ, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	Optab{AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{AMOVBZ, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
 
 	/* load, long offset */
-	Optab{AMOVD, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
-	Optab{AMOVW, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
-	Optab{AMOVWZ, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
-	Optab{AMOVBZ, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
-	Optab{AMOVB, C_LEXT, C_NONE, C_NONE, C_REG, 37, 12, REGSB},
-	Optab{AMOVD, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
-	Optab{AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
-	Optab{AMOVWZ, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
-	Optab{AMOVBZ, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
-	Optab{AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, 37, 12, REGSP},
-	Optab{AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
-	Optab{AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
-	Optab{AMOVWZ, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
-	Optab{AMOVBZ, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
-	Optab{AMOVB, C_LOREG, C_NONE, C_NONE, C_REG, 37, 12, REGZERO},
-	Optab{AMOVD, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
-	Optab{AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
-	Optab{AMOVWZ, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
-	Optab{AMOVBZ, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
-	Optab{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0},
+	{AMOVD, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
+	{AMOVW, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
+	{AMOVWZ, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
+	{AMOVBZ, C_LEXT, C_NONE, C_NONE, C_REG, 36, 8, REGSB},
+	{AMOVB, C_LEXT, C_NONE, C_NONE, C_REG, 37, 12, REGSB},
+	{AMOVD, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
+	{AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
+	{AMOVWZ, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
+	{AMOVBZ, C_LAUTO, C_NONE, C_NONE, C_REG, 36, 8, REGSP},
+	{AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, 37, 12, REGSP},
+	{AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
+	{AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
+	{AMOVWZ, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
+	{AMOVBZ, C_LOREG, C_NONE, C_NONE, C_REG, 36, 8, REGZERO},
+	{AMOVB, C_LOREG, C_NONE, C_NONE, C_REG, 37, 12, REGZERO},
+	{AMOVD, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
+	{AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
+	{AMOVWZ, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
+	{AMOVBZ, C_ADDR, C_NONE, C_NONE, C_REG, 75, 8, 0},
+	{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0},
 
 	/* load constant */
-	Optab{AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB},
-	Optab{AMOVD, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
-	Optab{AMOVD, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
-	Optab{AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
-	Optab{AMOVD, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
-	Optab{AMOVW, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
-	Optab{AMOVW, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
-	Optab{AMOVW, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
-	Optab{AMOVW, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
-	Optab{AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
-	Optab{AMOVWZ, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
-	Optab{AMOVWZ, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
-	Optab{AMOVWZ, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
-	Optab{AMOVWZ, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
-	Optab{AMOVWZ, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB},
+	{AMOVD, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
+	{AMOVD, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
+	{AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
+	{AMOVD, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVW, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
+	{AMOVW, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
+	{AMOVW, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
+	{AMOVW, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
+	{AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVWZ, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
+	{AMOVWZ, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
+	{AMOVWZ, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
+	{AMOVWZ, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
+	{AMOVWZ, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
 
 	/* load unsigned/long constants (TO DO: check) */
-	Optab{AMOVD, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
-	Optab{AMOVD, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
-	Optab{AMOVW, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
-	Optab{AMOVW, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
-	Optab{AMOVWZ, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
-	Optab{AMOVWZ, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
-	Optab{AMOVHBR, C_ZOREG, C_REG, C_NONE, C_REG, 45, 4, 0},
-	Optab{AMOVHBR, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
-	Optab{AMOVHBR, C_REG, C_REG, C_NONE, C_ZOREG, 44, 4, 0},
-	Optab{AMOVHBR, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
-	Optab{ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0},
-	Optab{ASYSCALL, C_REG, C_NONE, C_NONE, C_NONE, 77, 12, 0},
-	Optab{ASYSCALL, C_SCON, C_NONE, C_NONE, C_NONE, 77, 12, 0},
-	Optab{ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 16, 4, 0},
-	Optab{ABEQ, C_CREG, C_NONE, C_NONE, C_SBRA, 16, 4, 0},
-	Optab{ABR, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0},
-	Optab{ABC, C_SCON, C_REG, C_NONE, C_SBRA, 16, 4, 0},
-	Optab{ABC, C_SCON, C_REG, C_NONE, C_LBRA, 17, 4, 0},
-	Optab{ABR, C_NONE, C_NONE, C_NONE, C_LR, 18, 4, 0},
-	Optab{ABR, C_NONE, C_NONE, C_NONE, C_CTR, 18, 4, 0},
-	Optab{ABR, C_REG, C_NONE, C_NONE, C_CTR, 18, 4, 0},
-	Optab{ABR, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0},
-	Optab{ABC, C_NONE, C_REG, C_NONE, C_LR, 18, 4, 0},
-	Optab{ABC, C_NONE, C_REG, C_NONE, C_CTR, 18, 4, 0},
-	Optab{ABC, C_SCON, C_REG, C_NONE, C_LR, 18, 4, 0},
-	Optab{ABC, C_SCON, C_REG, C_NONE, C_CTR, 18, 4, 0},
-	Optab{ABC, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0},
-	Optab{AFMOVD, C_SEXT, C_NONE, C_NONE, C_FREG, 8, 4, REGSB},
-	Optab{AFMOVD, C_SAUTO, C_NONE, C_NONE, C_FREG, 8, 4, REGSP},
-	Optab{AFMOVD, C_SOREG, C_NONE, C_NONE, C_FREG, 8, 4, REGZERO},
-	Optab{AFMOVD, C_LEXT, C_NONE, C_NONE, C_FREG, 36, 8, REGSB},
-	Optab{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 36, 8, REGSP},
-	Optab{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 36, 8, REGZERO},
-	Optab{AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 75, 8, 0},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
-	Optab{AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	Optab{ASYNC, C_NONE, C_NONE, C_NONE, C_NONE, 46, 4, 0},
-	Optab{AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0},
-	Optab{ADWORD, C_LCON, C_NONE, C_NONE, C_NONE, 31, 8, 0},
-	Optab{ADWORD, C_DCON, C_NONE, C_NONE, C_NONE, 31, 8, 0},
-	Optab{AADDME, C_REG, C_NONE, C_NONE, C_REG, 47, 4, 0},
-	Optab{AEXTSB, C_REG, C_NONE, C_NONE, C_REG, 48, 4, 0},
-	Optab{AEXTSB, C_NONE, C_NONE, C_NONE, C_REG, 48, 4, 0},
-	Optab{ANEG, C_REG, C_NONE, C_NONE, C_REG, 47, 4, 0},
-	Optab{ANEG, C_NONE, C_NONE, C_NONE, C_REG, 47, 4, 0},
-	Optab{AREM, C_REG, C_NONE, C_NONE, C_REG, 50, 12, 0},
-	Optab{AREM, C_REG, C_REG, C_NONE, C_REG, 50, 12, 0},
-	Optab{AREMU, C_REG, C_NONE, C_NONE, C_REG, 50, 16, 0},
-	Optab{AREMU, C_REG, C_REG, C_NONE, C_REG, 50, 16, 0},
-	Optab{AREMD, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
-	Optab{AREMD, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
-	Optab{AREMDU, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
-	Optab{AREMDU, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
-	Optab{AMTFSB0, C_SCON, C_NONE, C_NONE, C_NONE, 52, 4, 0},
-	Optab{AMOVFL, C_FPSCR, C_NONE, C_NONE, C_FREG, 53, 4, 0},
-	Optab{AMOVFL, C_FREG, C_NONE, C_NONE, C_FPSCR, 64, 4, 0},
-	Optab{AMOVFL, C_FREG, C_NONE, C_LCON, C_FPSCR, 64, 4, 0},
-	Optab{AMOVFL, C_LCON, C_NONE, C_NONE, C_FPSCR, 65, 4, 0},
-	Optab{AMOVD, C_MSR, C_NONE, C_NONE, C_REG, 54, 4, 0},  /* mfmsr */
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0},  /* mtmsrd */
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
+	{AMOVD, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVD, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
+	{AMOVW, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVW, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
+	{AMOVWZ, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+	{AMOVWZ, C_LCON, C_NONE, C_NONE, C_REG, 19, 8, 0},
+	{AMOVHBR, C_ZOREG, C_REG, C_NONE, C_REG, 45, 4, 0},
+	{AMOVHBR, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
+	{AMOVHBR, C_REG, C_REG, C_NONE, C_ZOREG, 44, 4, 0},
+	{AMOVHBR, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
+	{ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0},
+	{ASYSCALL, C_REG, C_NONE, C_NONE, C_NONE, 77, 12, 0},
+	{ASYSCALL, C_SCON, C_NONE, C_NONE, C_NONE, 77, 12, 0},
+	{ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 16, 4, 0},
+	{ABEQ, C_CREG, C_NONE, C_NONE, C_SBRA, 16, 4, 0},
+	{ABR, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0},
+	{ABC, C_SCON, C_REG, C_NONE, C_SBRA, 16, 4, 0},
+	{ABC, C_SCON, C_REG, C_NONE, C_LBRA, 17, 4, 0},
+	{ABR, C_NONE, C_NONE, C_NONE, C_LR, 18, 4, 0},
+	{ABR, C_NONE, C_NONE, C_NONE, C_CTR, 18, 4, 0},
+	{ABR, C_REG, C_NONE, C_NONE, C_CTR, 18, 4, 0},
+	{ABR, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0},
+	{ABC, C_NONE, C_REG, C_NONE, C_LR, 18, 4, 0},
+	{ABC, C_NONE, C_REG, C_NONE, C_CTR, 18, 4, 0},
+	{ABC, C_SCON, C_REG, C_NONE, C_LR, 18, 4, 0},
+	{ABC, C_SCON, C_REG, C_NONE, C_CTR, 18, 4, 0},
+	{ABC, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0},
+	{AFMOVD, C_SEXT, C_NONE, C_NONE, C_FREG, 8, 4, REGSB},
+	{AFMOVD, C_SAUTO, C_NONE, C_NONE, C_FREG, 8, 4, REGSP},
+	{AFMOVD, C_SOREG, C_NONE, C_NONE, C_FREG, 8, 4, REGZERO},
+	{AFMOVD, C_LEXT, C_NONE, C_NONE, C_FREG, 36, 8, REGSB},
+	{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 36, 8, REGSP},
+	{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 36, 8, REGZERO},
+	{AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 75, 8, 0},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_SEXT, 7, 4, REGSB},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_SAUTO, 7, 4, REGSP},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_SOREG, 7, 4, REGZERO},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_LEXT, 35, 8, REGSB},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
+	{AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
+	{ASYNC, C_NONE, C_NONE, C_NONE, C_NONE, 46, 4, 0},
+	{AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0},
+	{ADWORD, C_LCON, C_NONE, C_NONE, C_NONE, 31, 8, 0},
+	{ADWORD, C_DCON, C_NONE, C_NONE, C_NONE, 31, 8, 0},
+	{AADDME, C_REG, C_NONE, C_NONE, C_REG, 47, 4, 0},
+	{AEXTSB, C_REG, C_NONE, C_NONE, C_REG, 48, 4, 0},
+	{AEXTSB, C_NONE, C_NONE, C_NONE, C_REG, 48, 4, 0},
+	{ANEG, C_REG, C_NONE, C_NONE, C_REG, 47, 4, 0},
+	{ANEG, C_NONE, C_NONE, C_NONE, C_REG, 47, 4, 0},
+	{AREM, C_REG, C_NONE, C_NONE, C_REG, 50, 12, 0},
+	{AREM, C_REG, C_REG, C_NONE, C_REG, 50, 12, 0},
+	{AREMU, C_REG, C_NONE, C_NONE, C_REG, 50, 16, 0},
+	{AREMU, C_REG, C_REG, C_NONE, C_REG, 50, 16, 0},
+	{AREMD, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
+	{AREMD, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
+	{AREMDU, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
+	{AREMDU, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
+	{AMTFSB0, C_SCON, C_NONE, C_NONE, C_NONE, 52, 4, 0},
+	{AMOVFL, C_FPSCR, C_NONE, C_NONE, C_FREG, 53, 4, 0},
+	{AMOVFL, C_FREG, C_NONE, C_NONE, C_FPSCR, 64, 4, 0},
+	{AMOVFL, C_FREG, C_NONE, C_LCON, C_FPSCR, 64, 4, 0},
+	{AMOVFL, C_LCON, C_NONE, C_NONE, C_FPSCR, 65, 4, 0},
+	{AMOVD, C_MSR, C_NONE, C_NONE, C_REG, 54, 4, 0},  /* mfmsr */
+	{AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0},  /* mtmsrd */
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
 
 	/* 64-bit special registers */
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_LR, 66, 4, 0},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
-	Optab{AMOVD, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
-	Optab{AMOVD, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVD, C_LR, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVD, C_CTR, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVD, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_LR, 66, 4, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
+	{AMOVD, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
+	{AMOVD, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVD, C_LR, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVD, C_CTR, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVD, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
 
 	/* 32-bit special registers (gloss over sign-extension or not?) */
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
-	Optab{AMOVW, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVW, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
-	Optab{AMOVWZ, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVWZ, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
-	Optab{AMOVFL, C_FPSCR, C_NONE, C_NONE, C_CREG, 73, 4, 0},
-	Optab{AMOVFL, C_CREG, C_NONE, C_NONE, C_CREG, 67, 4, 0},
-	Optab{AMOVW, C_CREG, C_NONE, C_NONE, C_REG, 68, 4, 0},
-	Optab{AMOVWZ, C_CREG, C_NONE, C_NONE, C_REG, 68, 4, 0},
-	Optab{AMOVFL, C_REG, C_NONE, C_LCON, C_CREG, 69, 4, 0},
-	Optab{AMOVFL, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
-	Optab{AMOVW, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
-	Optab{AMOVWZ, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
-	Optab{ACMP, C_REG, C_NONE, C_NONE, C_REG, 70, 4, 0},
-	Optab{ACMP, C_REG, C_REG, C_NONE, C_REG, 70, 4, 0},
-	Optab{ACMP, C_REG, C_NONE, C_NONE, C_ADDCON, 71, 4, 0},
-	Optab{ACMP, C_REG, C_REG, C_NONE, C_ADDCON, 71, 4, 0},
-	Optab{ACMPU, C_REG, C_NONE, C_NONE, C_REG, 70, 4, 0},
-	Optab{ACMPU, C_REG, C_REG, C_NONE, C_REG, 70, 4, 0},
-	Optab{ACMPU, C_REG, C_NONE, C_NONE, C_ANDCON, 71, 4, 0},
-	Optab{ACMPU, C_REG, C_REG, C_NONE, C_ANDCON, 71, 4, 0},
-	Optab{AFCMPO, C_FREG, C_NONE, C_NONE, C_FREG, 70, 4, 0},
-	Optab{AFCMPO, C_FREG, C_REG, C_NONE, C_FREG, 70, 4, 0},
-	Optab{ATW, C_LCON, C_REG, C_NONE, C_REG, 60, 4, 0},
-	Optab{ATW, C_LCON, C_REG, C_NONE, C_ADDCON, 61, 4, 0},
-	Optab{ADCBF, C_ZOREG, C_NONE, C_NONE, C_NONE, 43, 4, 0},
-	Optab{ADCBF, C_ZOREG, C_REG, C_NONE, C_NONE, 43, 4, 0},
-	Optab{AECOWX, C_REG, C_REG, C_NONE, C_ZOREG, 44, 4, 0},
-	Optab{AECIWX, C_ZOREG, C_REG, C_NONE, C_REG, 45, 4, 0},
-	Optab{AECOWX, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
-	Optab{AECIWX, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
-	Optab{AEIEIO, C_NONE, C_NONE, C_NONE, C_NONE, 46, 4, 0},
-	Optab{ATLBIE, C_REG, C_NONE, C_NONE, C_NONE, 49, 4, 0},
-	Optab{ATLBIE, C_SCON, C_NONE, C_NONE, C_REG, 49, 4, 0},
-	Optab{ASLBMFEE, C_REG, C_NONE, C_NONE, C_REG, 55, 4, 0},
-	Optab{ASLBMTE, C_REG, C_NONE, C_NONE, C_REG, 55, 4, 0},
-	Optab{ASTSW, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
-	Optab{ASTSW, C_REG, C_NONE, C_LCON, C_ZOREG, 41, 4, 0},
-	Optab{ALSW, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
-	Optab{ALSW, C_ZOREG, C_NONE, C_LCON, C_REG, 42, 4, 0},
-	Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 78, 4, 0},
-	Optab{obj.AUSEFIELD, C_ADDR, C_NONE, C_NONE, C_NONE, 0, 0, 0},
-	Optab{obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, 0, 0, 0},
-	Optab{obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0},
-	Optab{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
-	Optab{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
-	Optab{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
+	{AMOVW, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
+	{AMOVW, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVW, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_CTR, 66, 4, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_XER, 66, 4, 0},
+	{AMOVWZ, C_SPR, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVWZ, C_XER, C_NONE, C_NONE, C_REG, 66, 4, 0},
+	{AMOVFL, C_FPSCR, C_NONE, C_NONE, C_CREG, 73, 4, 0},
+	{AMOVFL, C_CREG, C_NONE, C_NONE, C_CREG, 67, 4, 0},
+	{AMOVW, C_CREG, C_NONE, C_NONE, C_REG, 68, 4, 0},
+	{AMOVWZ, C_CREG, C_NONE, C_NONE, C_REG, 68, 4, 0},
+	{AMOVFL, C_REG, C_NONE, C_LCON, C_CREG, 69, 4, 0},
+	{AMOVFL, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
+	{AMOVW, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
+	{AMOVWZ, C_REG, C_NONE, C_NONE, C_CREG, 69, 4, 0},
+	{ACMP, C_REG, C_NONE, C_NONE, C_REG, 70, 4, 0},
+	{ACMP, C_REG, C_REG, C_NONE, C_REG, 70, 4, 0},
+	{ACMP, C_REG, C_NONE, C_NONE, C_ADDCON, 71, 4, 0},
+	{ACMP, C_REG, C_REG, C_NONE, C_ADDCON, 71, 4, 0},
+	{ACMPU, C_REG, C_NONE, C_NONE, C_REG, 70, 4, 0},
+	{ACMPU, C_REG, C_REG, C_NONE, C_REG, 70, 4, 0},
+	{ACMPU, C_REG, C_NONE, C_NONE, C_ANDCON, 71, 4, 0},
+	{ACMPU, C_REG, C_REG, C_NONE, C_ANDCON, 71, 4, 0},
+	{AFCMPO, C_FREG, C_NONE, C_NONE, C_FREG, 70, 4, 0},
+	{AFCMPO, C_FREG, C_REG, C_NONE, C_FREG, 70, 4, 0},
+	{ATW, C_LCON, C_REG, C_NONE, C_REG, 60, 4, 0},
+	{ATW, C_LCON, C_REG, C_NONE, C_ADDCON, 61, 4, 0},
+	{ADCBF, C_ZOREG, C_NONE, C_NONE, C_NONE, 43, 4, 0},
+	{ADCBF, C_ZOREG, C_REG, C_NONE, C_NONE, 43, 4, 0},
+	{AECOWX, C_REG, C_REG, C_NONE, C_ZOREG, 44, 4, 0},
+	{AECIWX, C_ZOREG, C_REG, C_NONE, C_REG, 45, 4, 0},
+	{AECOWX, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
+	{AECIWX, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
+	{AEIEIO, C_NONE, C_NONE, C_NONE, C_NONE, 46, 4, 0},
+	{ATLBIE, C_REG, C_NONE, C_NONE, C_NONE, 49, 4, 0},
+	{ATLBIE, C_SCON, C_NONE, C_NONE, C_REG, 49, 4, 0},
+	{ASLBMFEE, C_REG, C_NONE, C_NONE, C_REG, 55, 4, 0},
+	{ASLBMTE, C_REG, C_NONE, C_NONE, C_REG, 55, 4, 0},
+	{ASTSW, C_REG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
+	{ASTSW, C_REG, C_NONE, C_LCON, C_ZOREG, 41, 4, 0},
+	{ALSW, C_ZOREG, C_NONE, C_NONE, C_REG, 45, 4, 0},
+	{ALSW, C_ZOREG, C_NONE, C_LCON, C_REG, 42, 4, 0},
+	{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 78, 4, 0},
+	{obj.AUSEFIELD, C_ADDR, C_NONE, C_NONE, C_NONE, 0, 0, 0},
+	{obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, 0, 0, 0},
+	{obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, 0, 0, 0},
+	{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
+	{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
+	{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
 
-	Optab{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0},
+	{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0},
 }
 
 type Oprang struct {
@@ -412,7 +412,7 @@
 		return
 	}
 	ctxt.Cursym = cursym
-	ctxt.Autosize = int32(p.To.Offset + 8)
+	ctxt.Autosize = int32(p.To.Offset)
 
 	if oprange[AANDN&obj.AMask].start == nil {
 		buildop(ctxt)
@@ -504,9 +504,6 @@
 	/*
 	 * lay out the code, emitting code and data relocations.
 	 */
-	if ctxt.Tlsg == nil {
-		ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-	}
 
 	obj.Symgrow(ctxt, cursym, cursym.Size)
 
@@ -598,7 +595,7 @@
 			return C_LAUTO
 
 		case obj.NAME_PARAM:
-			ctxt.Instoffset = int64(ctxt.Autosize) + a.Offset + 8
+			ctxt.Instoffset = int64(ctxt.Autosize) + a.Offset + ctxt.FixedFrameSize()
 			if ctxt.Instoffset >= -BIG && ctxt.Instoffset < BIG {
 				return C_SAUTO
 			}
@@ -661,7 +658,7 @@
 			return C_LACON
 
 		case obj.NAME_PARAM:
-			ctxt.Instoffset = int64(ctxt.Autosize) + a.Offset + 8
+			ctxt.Instoffset = int64(ctxt.Autosize) + a.Offset + ctxt.FixedFrameSize()
 			if ctxt.Instoffset >= -BIG && ctxt.Instoffset < BIG {
 				return C_SACON
 			}
diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go
index 1eddc6f..e72832d 100644
--- a/src/cmd/internal/obj/ppc64/obj9.go
+++ b/src/cmd/internal/obj/ppc64/obj9.go
@@ -124,6 +124,19 @@
 
 	p := cursym.Text
 	textstksiz := p.To.Offset
+	if textstksiz == -8 {
+		// Compatibility hack.
+		p.From3.Offset |= obj.NOFRAME
+		textstksiz = 0
+	}
+	if textstksiz%8 != 0 {
+		ctxt.Diag("frame size %d not a multiple of 8", textstksiz)
+	}
+	if p.From3.Offset&obj.NOFRAME != 0 {
+		if textstksiz != 0 {
+			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)
+		}
+	}
 
 	cursym.Args = p.To.Val.(int32)
 	cursym.Locals = int32(textstksiz)
@@ -314,13 +327,20 @@
 		case obj.ATEXT:
 			mov = AMOVD
 			aoffset = 0
-			autosize = int32(textstksiz + 8)
-			if (p.Mark&LEAF != 0) && autosize <= 8 {
-				autosize = 0
-			} else if autosize&4 != 0 {
-				autosize += 4
+			autosize = int32(textstksiz)
+
+			if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 {
+				// A leaf function with no locals has no frame.
+				p.From3.Offset |= obj.NOFRAME
 			}
-			p.To.Offset = int64(autosize) - 8
+
+			if p.From3.Offset&obj.NOFRAME == 0 {
+				// If there is a stack frame at all, it includes
+				// space to save the LR.
+				autosize += int32(ctxt.FixedFrameSize())
+			}
+
+			p.To.Offset = int64(autosize)
 
 			if p.From3.Offset&obj.NOSPLIT == 0 {
 				p = stacksplit(ctxt, p, autosize) // emit split check
@@ -344,11 +364,9 @@
 					q.Spadj = +autosize
 				}
 			} else if cursym.Text.Mark&LEAF == 0 {
-				if ctxt.Debugvlog != 0 {
-					fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Name)
-					ctxt.Bso.Flush()
-				}
-
+				// A very few functions that do not return to their caller
+				// (e.g. gogo) are not identified as leaves but still have
+				// no frame.
 				cursym.Text.Mark |= LEAF
 			}
 
@@ -427,7 +445,7 @@
 				q = obj.Appendp(ctxt, q)
 				q.As = AADD
 				q.From.Type = obj.TYPE_CONST
-				q.From.Offset = int64(autosize) + 8
+				q.From.Offset = int64(autosize) + ctxt.FixedFrameSize()
 				q.Reg = REGSP
 				q.To.Type = obj.TYPE_REG
 				q.To.Reg = REG_R5
@@ -447,7 +465,7 @@
 				q = obj.Appendp(ctxt, q)
 				q.As = AADD
 				q.From.Type = obj.TYPE_CONST
-				q.From.Offset = 8
+				q.From.Offset = ctxt.FixedFrameSize()
 				q.Reg = REGSP
 				q.To.Type = obj.TYPE_REG
 				q.To.Reg = REG_R6
@@ -473,18 +491,19 @@
 				break
 			}
 
-			if p.To.Sym != nil { // retjmp
-				p.As = ABR
-				p.To.Type = obj.TYPE_BRANCH
-				break
-			}
+			retTarget := p.To.Sym
 
 			if cursym.Text.Mark&LEAF != 0 {
 				if autosize == 0 {
 					p.As = ABR
 					p.From = obj.Addr{}
-					p.To.Type = obj.TYPE_REG
-					p.To.Reg = REG_LR
+					if retTarget == nil {
+						p.To.Type = obj.TYPE_REG
+						p.To.Reg = REG_LR
+					} else {
+						p.To.Type = obj.TYPE_BRANCH
+						p.To.Sym = retTarget
+					}
 					p.Mark |= BRANCH
 					break
 				}
@@ -562,14 +581,18 @@
 			q1 = ctxt.NewProg()
 			q1.As = ABR
 			q1.Lineno = p.Lineno
-			q1.To.Type = obj.TYPE_REG
-			q1.To.Reg = REG_LR
+			if retTarget == nil {
+				q1.To.Type = obj.TYPE_REG
+				q1.To.Reg = REG_LR
+			} else {
+				q1.To.Type = obj.TYPE_BRANCH
+				q1.To.Sym = retTarget
+			}
 			q1.Mark |= BRANCH
 			q1.Spadj = +autosize
 
 			q1.Link = q.Link
 			q.Link = q1
-
 		case AADD:
 			if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST {
 				p.Spadj = int32(-p.From.Offset)
diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go
index 77766c9..3d3b3b8 100644
--- a/src/cmd/internal/obj/textflag.go
+++ b/src/cmd/internal/obj/textflag.go
@@ -39,4 +39,9 @@
 	// Allocate a word of thread local storage and store the offset from the
 	// thread local base to the thread local storage in this variable.
 	TLSBSS = 256
+
+	// Do not insert instructions to allocate a stack frame for this function.
+	// Only valid on functions that declare a frame size of 0.
+	// TODO(mwhudson): only implemented for ppc64x at present.
+	NOFRAME = 512
 )
diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go
index f03eb69..73d3366 100644
--- a/src/cmd/internal/obj/util.go
+++ b/src/cmd/internal/obj/util.go
@@ -457,13 +457,25 @@
 		}
 
 	case NAME_EXTERN:
-		str = fmt.Sprintf("%s%s(SB)", a.Sym.Name, offConv(a.Offset))
+		if a.Sym != nil {
+			str = fmt.Sprintf("%s%s(SB)", a.Sym.Name, offConv(a.Offset))
+		} else {
+			str = fmt.Sprintf("%s(SB)", offConv(a.Offset))
+		}
 
 	case NAME_GOTREF:
-		str = fmt.Sprintf("%s%s@GOT(SB)", a.Sym.Name, offConv(a.Offset))
+		if a.Sym != nil {
+			str = fmt.Sprintf("%s%s@GOT(SB)", a.Sym.Name, offConv(a.Offset))
+		} else {
+			str = fmt.Sprintf("%s@GOT(SB)", offConv(a.Offset))
+		}
 
 	case NAME_STATIC:
-		str = fmt.Sprintf("%s<>%s(SB)", a.Sym.Name, offConv(a.Offset))
+		if a.Sym != nil {
+			str = fmt.Sprintf("%s<>%s(SB)", a.Sym.Name, offConv(a.Offset))
+		} else {
+			str = fmt.Sprintf("<>%s(SB)", offConv(a.Offset))
+		}
 
 	case NAME_AUTO:
 		if a.Sym != nil {
diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go
index e36cb9e..f8f3777 100644
--- a/src/cmd/internal/obj/x86/a.out.go
+++ b/src/cmd/internal/obj/x86/a.out.go
@@ -734,9 +734,19 @@
 	AAESIMC
 	AAESKEYGENASSIST
 
+	AROUNDPS
+	AROUNDSS
+	AROUNDPD
+	AROUNDSD
+
 	APSHUFD
 	APCLMULQDQ
 
+	AVZEROUPPER
+	AMOVHDU
+	AMOVNTHD
+	AMOVHDA
+
 	// from 386
 	AJCXZW
 	AFCMOVCC
diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go
index 7f7708c..c075a15 100644
--- a/src/cmd/internal/obj/x86/anames.go
+++ b/src/cmd/internal/obj/x86/anames.go
@@ -677,8 +677,16 @@
 	"AESDECLAST",
 	"AESIMC",
 	"AESKEYGENASSIST",
+	"ROUNDPS",
+	"ROUNDSS",
+	"ROUNDPD",
+	"ROUNDSD",
 	"PSHUFD",
 	"PCLMULQDQ",
+	"VZEROUPPER",
+	"MOVHDU",
+	"MOVNTHD",
+	"MOVHDA",
 	"JCXZW",
 	"FCMOVCC",
 	"FCMOVCS",
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 0aa9861..f3be6cd 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -181,6 +181,7 @@
 	Zm_r
 	Zm2_r
 	Zm_r_xm
+	Zm_r_xm_vex
 	Zm_r_i_xm
 	Zm_r_3d
 	Zm_r_xm_nr
@@ -193,6 +194,7 @@
 	Zpseudo
 	Zr_m
 	Zr_m_xm
+	Zr_m_xm_vex
 	Zrp_
 	Z_ib
 	Z_il
@@ -206,21 +208,23 @@
 )
 
 const (
-	Px  = 0
-	Px1 = 1    // symbolic; exact value doesn't matter
-	P32 = 0x32 /* 32-bit only */
-	Pe  = 0x66 /* operand escape */
-	Pm  = 0x0f /* 2byte opcode escape */
-	Pq  = 0xff /* both escapes: 66 0f */
-	Pb  = 0xfe /* byte operands */
-	Pf2 = 0xf2 /* xmm escape 1: f2 0f */
-	Pf3 = 0xf3 /* xmm escape 2: f3 0f */
-	Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
-	Pw  = 0x48 /* Rex.w */
-	Pw8 = 0x90 // symbolic; exact value doesn't matter
-	Py  = 0x80 /* defaults to 64-bit mode */
-	Py1 = 0x81 // symbolic; exact value doesn't matter
-	Py3 = 0x83 // symbolic; exact value doesn't matter
+	Px    = 0
+	Px1   = 1    // symbolic; exact value doesn't matter
+	P32   = 0x32 /* 32-bit only */
+	Pe    = 0x66 /* operand escape */
+	Pm    = 0x0f /* 2byte opcode escape */
+	Pq    = 0xff /* both escapes: 66 0f */
+	Pb    = 0xfe /* byte operands */
+	Pf2   = 0xf2 /* xmm escape 1: f2 0f */
+	Pf3   = 0xf3 /* xmm escape 2: f3 0f */
+	Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
+	Pvex1 = 0xc5 /* 66 escape, vex encoding */
+	Pvex2 = 0xc6 /* f3 escape, vex encoding */
+	Pw    = 0x48 /* Rex.w */
+	Pw8   = 0x90 // symbolic; exact value doesn't matter
+	Py    = 0x80 /* defaults to 64-bit mode */
+	Py1   = 0x81 // symbolic; exact value doesn't matter
+	Py3   = 0x83 // symbolic; exact value doesn't matter
 
 	Rxw = 1 << 3 /* =1, 64-bit operand size */
 	Rxr = 1 << 2 /* extend modrm reg */
@@ -622,6 +626,10 @@
 	{Yxr, Ynone, Yml, Zr_m_xm, 1},
 }
 
+var yxr_ml_vex = []ytab{
+	{Yxr, Ynone, Yml, Zr_m_xm_vex, 1},
+}
+
 var ymr = []ytab{
 	{Ymr, Ynone, Ymr, Zm_r, 1},
 }
@@ -638,6 +646,11 @@
 	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
 }
 
+var yxmov_vex = []ytab{
+	{Yxm, Ynone, Yxr, Zm_r_xm_vex, 1},
+	{Yxr, Ynone, Yxm, Zr_m_xm_vex, 1},
+}
+
 var yxmov = []ytab{
 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
 	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
@@ -789,704 +802,712 @@
 var optab =
 /*	as, ytab, andproto, opcode */
 []Optab{
-	Optab{obj.AXXX, nil, 0, [23]uint8{}},
-	Optab{AAAA, ynone, P32, [23]uint8{0x37}},
-	Optab{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
-	Optab{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
-	Optab{AAAS, ynone, P32, [23]uint8{0x3f}},
-	Optab{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
-	Optab{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
-	Optab{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
-	Optab{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
-	Optab{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
-	Optab{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
-	Optab{AADDPD, yxm, Pq, [23]uint8{0x58}},
-	Optab{AADDPS, yxm, Pm, [23]uint8{0x58}},
-	Optab{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
-	Optab{AADDSD, yxm, Pf2, [23]uint8{0x58}},
-	Optab{AADDSS, yxm, Pf3, [23]uint8{0x58}},
-	Optab{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
-	Optab{AADJSP, nil, 0, [23]uint8{}},
-	Optab{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
-	Optab{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
-	Optab{AANDNPD, yxm, Pq, [23]uint8{0x55}},
-	Optab{AANDNPS, yxm, Pm, [23]uint8{0x55}},
-	Optab{AANDPD, yxm, Pq, [23]uint8{0x54}},
-	Optab{AANDPS, yxm, Pq, [23]uint8{0x54}},
-	Optab{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
-	Optab{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
-	Optab{AARPL, yrl_ml, P32, [23]uint8{0x63}},
-	Optab{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
-	Optab{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
-	Optab{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
-	Optab{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
-	Optab{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
-	Optab{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
-	Optab{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
-	Optab{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
-	Optab{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
-	Optab{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
-	Optab{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
-	Optab{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
-	Optab{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
-	Optab{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
-	Optab{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
-	Optab{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
-	Optab{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
-	Optab{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
-	Optab{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
-	Optab{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
-	Optab{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
-	Optab{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
-	Optab{ABYTE, ybyte, Px, [23]uint8{1}},
-	Optab{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
-	Optab{ACDQ, ynone, Px, [23]uint8{0x99}},
-	Optab{ACLC, ynone, Px, [23]uint8{0xf8}},
-	Optab{ACLD, ynone, Px, [23]uint8{0xfc}},
-	Optab{ACLI, ynone, Px, [23]uint8{0xfa}},
-	Optab{ACLTS, ynone, Pm, [23]uint8{0x06}},
-	Optab{ACMC, ynone, Px, [23]uint8{0xf5}},
-	Optab{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
-	Optab{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
-	Optab{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
-	Optab{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
-	Optab{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
-	Optab{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
-	Optab{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
-	Optab{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
-	Optab{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
-	Optab{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
-	Optab{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
-	Optab{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
-	Optab{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
-	Optab{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
-	Optab{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
-	Optab{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
-	Optab{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
-	Optab{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
-	Optab{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
-	Optab{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
-	Optab{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
-	Optab{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
-	Optab{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
-	Optab{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
-	Optab{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
-	Optab{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
-	Optab{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
-	Optab{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
-	Optab{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
-	Optab{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
-	Optab{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
-	Optab{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
-	Optab{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
-	Optab{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
-	Optab{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
-	Optab{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
-	Optab{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
-	Optab{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
-	Optab{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
-	Optab{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
-	Optab{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
-	Optab{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
-	Optab{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
-	Optab{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
-	Optab{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
-	Optab{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
-	Optab{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
-	Optab{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
-	Optab{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
-	Optab{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
-	Optab{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
-	Optab{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
-	Optab{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
-	Optab{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
-	Optab{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
-	Optab{ACMPSL, ynone, Px, [23]uint8{0xa7}},
-	Optab{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
-	Optab{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
-	Optab{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
-	Optab{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
-	Optab{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
-	Optab{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
-	Optab{ACPUID, ynone, Pm, [23]uint8{0xa2}},
-	Optab{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
-	Optab{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
-	Optab{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
-	Optab{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
-	Optab{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
-	Optab{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
-	Optab{API2FW, ymfp, Px, [23]uint8{0x0c}},
-	Optab{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
-	Optab{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
-	Optab{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
-	Optab{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
-	Optab{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
-	Optab{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
-	Optab{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
-	Optab{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
-	Optab{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
-	Optab{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
-	Optab{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
-	Optab{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
-	Optab{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
-	Optab{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
-	Optab{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
-	Optab{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
-	Optab{ACWD, ynone, Pe, [23]uint8{0x99}},
-	Optab{ACQO, ynone, Pw, [23]uint8{0x99}},
-	Optab{ADAA, ynone, P32, [23]uint8{0x27}},
-	Optab{ADAS, ynone, P32, [23]uint8{0x2f}},
-	Optab{obj.ADATA, nil, 0, [23]uint8{}},
-	Optab{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
-	Optab{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
-	Optab{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
-	Optab{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
-	Optab{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
-	Optab{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
-	Optab{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
-	Optab{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
-	Optab{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
-	Optab{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
-	Optab{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
-	Optab{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
-	Optab{AEMMS, ynone, Pm, [23]uint8{0x77}},
-	Optab{AENTER, nil, 0, [23]uint8{}}, /* botch */
-	Optab{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
-	Optab{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
-	Optab{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
-	Optab{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
-	Optab{obj.AGLOBL, nil, 0, [23]uint8{}},
-	Optab{AHLT, ynone, Px, [23]uint8{0xf4}},
-	Optab{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
-	Optab{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
-	Optab{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
-	Optab{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
-	Optab{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
-	Optab{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
-	Optab{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
-	Optab{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
-	Optab{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
-	Optab{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
-	Optab{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
-	Optab{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
-	Optab{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
-	Optab{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
-	Optab{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
-	Optab{AINSB, ynone, Pb, [23]uint8{0x6c}},
-	Optab{AINSL, ynone, Px, [23]uint8{0x6d}},
-	Optab{AINSW, ynone, Pe, [23]uint8{0x6d}},
-	Optab{AINT, yint, Px, [23]uint8{0xcd}},
-	Optab{AINTO, ynone, P32, [23]uint8{0xce}},
-	Optab{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
-	Optab{AIRETL, ynone, Px, [23]uint8{0xcf}},
-	Optab{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
-	Optab{AIRETW, ynone, Pe, [23]uint8{0xcf}},
-	Optab{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
-	Optab{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
-	Optab{AJCXZL, yloop, Px, [23]uint8{0xe3}},
-	Optab{AJCXZW, yloop, Px, [23]uint8{0xe3}},
-	Optab{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
-	Optab{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
-	Optab{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
-	Optab{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
-	Optab{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
-	Optab{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
-	Optab{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
-	Optab{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
-	Optab{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
-	Optab{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
-	Optab{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
-	Optab{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
-	Optab{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
-	Optab{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
-	Optab{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
-	Optab{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
-	Optab{ALAHF, ynone, Px, [23]uint8{0x9f}},
-	Optab{ALARL, yml_rl, Pm, [23]uint8{0x02}},
-	Optab{ALARW, yml_rl, Pq, [23]uint8{0x02}},
-	Optab{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
-	Optab{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
-	Optab{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
-	Optab{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
-	Optab{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
-	Optab{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
-	Optab{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
-	Optab{ALOCK, ynone, Px, [23]uint8{0xf0}},
-	Optab{ALODSB, ynone, Pb, [23]uint8{0xac}},
-	Optab{ALODSL, ynone, Px, [23]uint8{0xad}},
-	Optab{ALODSQ, ynone, Pw, [23]uint8{0xad}},
-	Optab{ALODSW, ynone, Pe, [23]uint8{0xad}},
-	Optab{ALONG, ybyte, Px, [23]uint8{4}},
-	Optab{ALOOP, yloop, Px, [23]uint8{0xe2}},
-	Optab{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
-	Optab{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
-	Optab{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
-	Optab{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
-	Optab{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
-	Optab{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
-	Optab{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
-	Optab{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
-	Optab{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
-	Optab{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
-	Optab{AMINPD, yxm, Pe, [23]uint8{0x5d}},
-	Optab{AMINPS, yxm, Pm, [23]uint8{0x5d}},
-	Optab{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
-	Optab{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
-	Optab{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
-	Optab{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
-	Optab{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
-	Optab{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
-	Optab{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
-	Optab{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
-	Optab{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
-	Optab{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
-	Optab{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
-	Optab{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
-	Optab{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
-	Optab{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
-	Optab{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
-	Optab{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
-	Optab{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
-	Optab{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
-	Optab{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
-	Optab{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
-	Optab{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
-	Optab{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
-	Optab{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
-	Optab{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
-	Optab{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
-	Optab{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
-	Optab{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
-	Optab{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
-	Optab{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
-	Optab{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
-	Optab{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
-	Optab{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
-	Optab{AMOVSL, ynone, Px, [23]uint8{0xa5}},
-	Optab{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
-	Optab{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
-	Optab{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
-	Optab{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
-	Optab{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
-	Optab{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
-	Optab{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
-	Optab{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
-	Optab{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
-	Optab{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
-	Optab{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
-	Optab{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
-	Optab{AMULPD, yxm, Pe, [23]uint8{0x59}},
-	Optab{AMULPS, yxm, Ym, [23]uint8{0x59}},
-	Optab{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
-	Optab{AMULSD, yxm, Pf2, [23]uint8{0x59}},
-	Optab{AMULSS, yxm, Pf3, [23]uint8{0x59}},
-	Optab{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
-	Optab{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
-	Optab{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
-	Optab{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
-	Optab{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
-	Optab{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
-	Optab{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
-	Optab{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
-	Optab{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
-	Optab{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
-	Optab{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
-	Optab{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
-	Optab{AORPD, yxm, Pq, [23]uint8{0x56}},
-	Optab{AORPS, yxm, Pm, [23]uint8{0x56}},
-	Optab{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
-	Optab{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
-	Optab{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
-	Optab{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
-	Optab{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
-	Optab{AOUTSL, ynone, Px, [23]uint8{0x6f}},
-	Optab{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
-	Optab{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
-	Optab{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
-	Optab{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
-	Optab{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
-	Optab{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
-	Optab{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
-	Optab{APADDQ, yxm, Pe, [23]uint8{0xd4}},
-	Optab{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
-	Optab{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
-	Optab{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
-	Optab{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
-	Optab{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
-	Optab{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
-	Optab{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
-	Optab{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
-	Optab{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
-	Optab{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
-	Optab{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
-	Optab{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
-	Optab{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
-	Optab{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
-	Optab{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
-	Optab{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
-	Optab{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
-	Optab{APF2IL, ymfp, Px, [23]uint8{0x1d}},
-	Optab{APF2IW, ymfp, Px, [23]uint8{0x1c}},
-	Optab{API2FL, ymfp, Px, [23]uint8{0x0d}},
-	Optab{APFACC, ymfp, Px, [23]uint8{0xae}},
-	Optab{APFADD, ymfp, Px, [23]uint8{0x9e}},
-	Optab{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
-	Optab{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
-	Optab{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
-	Optab{APFMAX, ymfp, Px, [23]uint8{0xa4}},
-	Optab{APFMIN, ymfp, Px, [23]uint8{0x94}},
-	Optab{APFMUL, ymfp, Px, [23]uint8{0xb4}},
-	Optab{APFNACC, ymfp, Px, [23]uint8{0x8a}},
-	Optab{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
-	Optab{APFRCP, ymfp, Px, [23]uint8{0x96}},
-	Optab{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
-	Optab{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
-	Optab{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
-	Optab{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
-	Optab{APFSUB, ymfp, Px, [23]uint8{0x9a}},
-	Optab{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
-	Optab{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
-	Optab{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
-	Optab{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
-	Optab{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
-	Optab{APMAXSW, yxm, Pe, [23]uint8{0xee}},
-	Optab{APMAXUB, yxm, Pe, [23]uint8{0xde}},
-	Optab{APMINSW, yxm, Pe, [23]uint8{0xea}},
-	Optab{APMINUB, yxm, Pe, [23]uint8{0xda}},
-	Optab{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
-	Optab{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
-	Optab{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
-	Optab{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
-	Optab{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
-	Optab{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
-	Optab{APOPAL, ynone, P32, [23]uint8{0x61}},
-	Optab{APOPAW, ynone, Pe, [23]uint8{0x61}},
-	Optab{APOPFL, ynone, P32, [23]uint8{0x9d}},
-	Optab{APOPFQ, ynone, Py, [23]uint8{0x9d}},
-	Optab{APOPFW, ynone, Pe, [23]uint8{0x9d}},
-	Optab{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
-	Optab{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
-	Optab{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
-	Optab{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
-	Optab{APSADBW, yxm, Pq, [23]uint8{0xf6}},
-	Optab{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
-	Optab{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
-	Optab{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
-	Optab{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
-	Optab{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
-	Optab{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
-	Optab{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
-	Optab{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
-	Optab{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
-	Optab{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
-	Optab{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
-	Optab{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
-	Optab{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
-	Optab{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
-	Optab{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
-	Optab{APSUBB, yxm, Pe, [23]uint8{0xf8}},
-	Optab{APSUBL, yxm, Pe, [23]uint8{0xfa}},
-	Optab{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
-	Optab{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
-	Optab{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
-	Optab{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
-	Optab{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
-	Optab{APSUBW, yxm, Pe, [23]uint8{0xf9}},
-	Optab{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
-	Optab{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
-	Optab{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
-	Optab{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
-	Optab{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
-	Optab{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
-	Optab{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
-	Optab{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
-	Optab{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
-	Optab{APUSHAL, ynone, P32, [23]uint8{0x60}},
-	Optab{APUSHAW, ynone, Pe, [23]uint8{0x60}},
-	Optab{APUSHFL, ynone, P32, [23]uint8{0x9c}},
-	Optab{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
-	Optab{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
-	Optab{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
-	Optab{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
-	Optab{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
-	Optab{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
-	Optab{AQUAD, ybyte, Px, [23]uint8{8}},
-	Optab{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
-	Optab{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
-	Optab{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
-	Optab{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
-	Optab{ARCPPS, yxm, Pm, [23]uint8{0x53}},
-	Optab{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
-	Optab{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
-	Optab{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
-	Optab{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
-	Optab{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
-	Optab{AREP, ynone, Px, [23]uint8{0xf3}},
-	Optab{AREPN, ynone, Px, [23]uint8{0xf2}},
-	Optab{obj.ARET, ynone, Px, [23]uint8{0xc3}},
-	Optab{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
-	Optab{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
-	Optab{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
-	Optab{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
-	Optab{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
-	Optab{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
-	Optab{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
-	Optab{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
-	Optab{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
-	Optab{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
-	Optab{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
-	Optab{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
-	Optab{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
-	Optab{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
-	Optab{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
-	Optab{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
-	Optab{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
-	Optab{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
-	Optab{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
-	Optab{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
-	Optab{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
-	Optab{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
-	Optab{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
-	Optab{ASCASB, ynone, Pb, [23]uint8{0xae}},
-	Optab{ASCASL, ynone, Px, [23]uint8{0xaf}},
-	Optab{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
-	Optab{ASCASW, ynone, Pe, [23]uint8{0xaf}},
-	Optab{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
-	Optab{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
-	Optab{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
-	Optab{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
-	Optab{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
-	Optab{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
-	Optab{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
-	Optab{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
-	Optab{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
-	Optab{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
-	Optab{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
-	Optab{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
-	Optab{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
-	Optab{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
-	Optab{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
-	Optab{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
-	Optab{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
-	Optab{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
-	Optab{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
-	Optab{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
-	Optab{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
-	Optab{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
-	Optab{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
-	Optab{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
-	Optab{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
-	Optab{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
-	Optab{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
-	Optab{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
-	Optab{ASTC, ynone, Px, [23]uint8{0xf9}},
-	Optab{ASTD, ynone, Px, [23]uint8{0xfd}},
-	Optab{ASTI, ynone, Px, [23]uint8{0xfb}},
-	Optab{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
-	Optab{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
-	Optab{ASTOSL, ynone, Px, [23]uint8{0xab}},
-	Optab{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
-	Optab{ASTOSW, ynone, Pe, [23]uint8{0xab}},
-	Optab{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
-	Optab{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
-	Optab{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
-	Optab{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
-	Optab{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
-	Optab{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
-	Optab{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
-	Optab{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
-	Optab{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
-	Optab{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
-	Optab{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
-	Optab{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
-	Optab{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
-	Optab{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
-	Optab{obj.ATEXT, ytext, Px, [23]uint8{}},
-	Optab{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
-	Optab{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
-	Optab{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
-	Optab{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
-	Optab{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
-	Optab{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
-	Optab{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
-	Optab{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
-	Optab{AWAIT, ynone, Px, [23]uint8{0x9b}},
-	Optab{AWORD, ybyte, Px, [23]uint8{2}},
-	Optab{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
-	Optab{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
-	Optab{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
-	Optab{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
-	Optab{AXLAT, ynone, Px, [23]uint8{0xd7}},
-	Optab{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
-	Optab{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
-	Optab{AXORPD, yxm, Pe, [23]uint8{0x57}},
-	Optab{AXORPS, yxm, Pm, [23]uint8{0x57}},
-	Optab{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
-	Optab{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
-	Optab{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
-	Optab{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
-	Optab{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
-	Optab{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
-	Optab{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
-	Optab{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
-	Optab{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
-	Optab{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
-	Optab{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
-	Optab{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
-	Optab{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
-	Optab{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
-	Optab{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
-	Optab{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
-	Optab{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
-	Optab{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
-	Optab{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
-	Optab{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
-	Optab{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
-	Optab{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
-	Optab{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
-	Optab{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
-	Optab{AFCOMB, nil, 0, [23]uint8{}},
-	Optab{AFCOMBP, nil, 0, [23]uint8{}},
-	Optab{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
-	Optab{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
-	Optab{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
-	Optab{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
-	Optab{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
-	Optab{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
-	Optab{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
-	Optab{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
-	Optab{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
-	Optab{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
-	Optab{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
-	Optab{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
-	Optab{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
-	Optab{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
-	Optab{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
-	Optab{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
-	Optab{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
-	Optab{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
-	Optab{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
-	Optab{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
-	Optab{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
-	Optab{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
-	Optab{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
-	Optab{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
-	Optab{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
-	Optab{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
-	Optab{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
-	Optab{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
-	Optab{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
-	Optab{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
-	Optab{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
-	Optab{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
-	Optab{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
-	Optab{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
-	Optab{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
-	Optab{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
-	Optab{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
-	Optab{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
-	Optab{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
-	Optab{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
-	Optab{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
-	Optab{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
-	Optab{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
-	Optab{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
-	Optab{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
-	Optab{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
-	Optab{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
-	Optab{AFFREE, nil, 0, [23]uint8{}},
-	Optab{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
-	Optab{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
-	Optab{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
-	Optab{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
-	Optab{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
-	Optab{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
-	Optab{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
-	Optab{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
-	Optab{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
-	Optab{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
-	Optab{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
-	Optab{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
-	Optab{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
-	Optab{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
-	Optab{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
-	Optab{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
-	Optab{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
-	Optab{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
-	Optab{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
-	Optab{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
-	Optab{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
-	Optab{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
-	Optab{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
-	Optab{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
-	Optab{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
-	Optab{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
-	Optab{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
-	Optab{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
-	Optab{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
-	Optab{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
-	Optab{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
-	Optab{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
-	Optab{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
-	Optab{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
-	Optab{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
-	Optab{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
-	Optab{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
-	Optab{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
-	Optab{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
-	Optab{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
-	Optab{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
-	Optab{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
-	Optab{AINVD, ynone, Pm, [23]uint8{0x08}},
-	Optab{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
-	Optab{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
-	Optab{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
-	Optab{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
-	Optab{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
-	Optab{ARDMSR, ynone, Pm, [23]uint8{0x32}},
-	Optab{ARDPMC, ynone, Pm, [23]uint8{0x33}},
-	Optab{ARDTSC, ynone, Pm, [23]uint8{0x31}},
-	Optab{ARSM, ynone, Pm, [23]uint8{0xaa}},
-	Optab{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
-	Optab{ASYSRET, ynone, Pm, [23]uint8{0x07}},
-	Optab{AWBINVD, ynone, Pm, [23]uint8{0x09}},
-	Optab{AWRMSR, ynone, Pm, [23]uint8{0x30}},
-	Optab{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
-	Optab{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
-	Optab{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
-	Optab{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
-	Optab{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
-	Optab{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
-	Optab{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
-	Optab{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
-	Optab{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
-	Optab{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
-	Optab{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
-	Optab{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
-	Optab{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
-	Optab{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
-	Optab{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
-	Optab{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
-	Optab{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
-	Optab{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
-	Optab{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
-	Optab{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
-	Optab{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
-	Optab{obj.ATYPE, nil, 0, [23]uint8{}},
-	Optab{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
-	Optab{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
-	Optab{obj.ACHECKNIL, nil, 0, [23]uint8{}},
-	Optab{obj.AVARDEF, nil, 0, [23]uint8{}},
-	Optab{obj.AVARKILL, nil, 0, [23]uint8{}},
-	Optab{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
-	Optab{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
-	Optab{obj.AEND, nil, 0, [23]uint8{}},
-	Optab{0, nil, 0, [23]uint8{}},
+	{obj.AXXX, nil, 0, [23]uint8{}},
+	{AAAA, ynone, P32, [23]uint8{0x37}},
+	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
+	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
+	{AAAS, ynone, P32, [23]uint8{0x3f}},
+	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
+	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
+	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
+	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
+	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
+	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
+	{AADDPD, yxm, Pq, [23]uint8{0x58}},
+	{AADDPS, yxm, Pm, [23]uint8{0x58}},
+	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
+	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
+	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
+	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
+	{AADJSP, nil, 0, [23]uint8{}},
+	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
+	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
+	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
+	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
+	{AANDPD, yxm, Pq, [23]uint8{0x54}},
+	{AANDPS, yxm, Pq, [23]uint8{0x54}},
+	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
+	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
+	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
+	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
+	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
+	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
+	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
+	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
+	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
+	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
+	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
+	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
+	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
+	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
+	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
+	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
+	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
+	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
+	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
+	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
+	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
+	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
+	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
+	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
+	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
+	{ABYTE, ybyte, Px, [23]uint8{1}},
+	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
+	{ACDQ, ynone, Px, [23]uint8{0x99}},
+	{ACLC, ynone, Px, [23]uint8{0xf8}},
+	{ACLD, ynone, Px, [23]uint8{0xfc}},
+	{ACLI, ynone, Px, [23]uint8{0xfa}},
+	{ACLTS, ynone, Pm, [23]uint8{0x06}},
+	{ACMC, ynone, Px, [23]uint8{0xf5}},
+	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
+	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
+	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
+	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
+	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
+	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
+	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
+	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
+	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
+	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
+	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
+	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
+	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
+	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
+	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
+	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
+	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
+	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
+	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
+	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
+	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
+	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
+	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
+	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
+	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
+	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
+	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
+	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
+	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
+	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
+	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
+	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
+	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
+	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
+	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
+	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
+	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
+	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
+	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
+	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
+	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
+	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
+	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
+	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
+	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
+	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
+	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
+	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
+	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
+	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
+	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
+	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
+	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
+	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
+	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
+	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
+	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
+	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
+	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
+	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
+	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
+	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
+	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
+	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
+	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
+	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
+	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
+	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
+	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
+	{API2FW, ymfp, Px, [23]uint8{0x0c}},
+	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
+	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
+	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
+	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
+	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
+	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
+	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
+	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
+	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
+	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
+	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
+	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
+	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
+	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
+	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
+	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
+	{ACWD, ynone, Pe, [23]uint8{0x99}},
+	{ACQO, ynone, Pw, [23]uint8{0x99}},
+	{ADAA, ynone, P32, [23]uint8{0x27}},
+	{ADAS, ynone, P32, [23]uint8{0x2f}},
+	{obj.ADATA, nil, 0, [23]uint8{}},
+	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
+	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
+	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
+	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
+	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
+	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
+	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
+	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
+	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
+	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
+	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
+	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
+	{AEMMS, ynone, Pm, [23]uint8{0x77}},
+	{AENTER, nil, 0, [23]uint8{}}, /* botch */
+	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
+	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
+	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
+	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
+	{obj.AGLOBL, nil, 0, [23]uint8{}},
+	{AHLT, ynone, Px, [23]uint8{0xf4}},
+	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
+	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
+	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
+	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
+	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
+	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
+	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
+	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
+	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
+	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
+	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
+	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
+	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
+	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
+	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
+	{AINSB, ynone, Pb, [23]uint8{0x6c}},
+	{AINSL, ynone, Px, [23]uint8{0x6d}},
+	{AINSW, ynone, Pe, [23]uint8{0x6d}},
+	{AINT, yint, Px, [23]uint8{0xcd}},
+	{AINTO, ynone, P32, [23]uint8{0xce}},
+	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
+	{AIRETL, ynone, Px, [23]uint8{0xcf}},
+	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
+	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
+	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
+	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
+	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
+	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
+	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
+	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
+	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
+	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
+	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
+	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
+	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
+	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
+	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
+	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
+	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
+	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
+	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
+	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
+	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
+	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
+	{ALAHF, ynone, Px, [23]uint8{0x9f}},
+	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
+	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
+	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
+	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
+	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
+	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
+	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
+	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
+	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
+	{ALOCK, ynone, Px, [23]uint8{0xf0}},
+	{ALODSB, ynone, Pb, [23]uint8{0xac}},
+	{ALODSL, ynone, Px, [23]uint8{0xad}},
+	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
+	{ALODSW, ynone, Pe, [23]uint8{0xad}},
+	{ALONG, ybyte, Px, [23]uint8{4}},
+	{ALOOP, yloop, Px, [23]uint8{0xe2}},
+	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
+	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
+	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
+	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
+	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
+	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
+	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
+	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
+	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
+	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
+	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
+	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
+	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
+	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
+	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
+	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
+	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
+	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
+	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
+	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
+	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
+	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
+	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
+	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
+	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
+	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
+	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
+	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
+	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
+	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
+	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
+	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
+	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
+	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
+	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
+	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
+	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
+	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
+	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
+	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
+	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
+	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
+	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
+	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
+	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
+	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
+	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
+	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
+	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
+	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
+	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
+	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
+	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
+	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
+	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
+	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
+	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
+	{AMULPD, yxm, Pe, [23]uint8{0x59}},
+	{AMULPS, yxm, Ym, [23]uint8{0x59}},
+	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
+	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
+	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
+	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
+	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
+	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
+	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
+	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
+	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
+	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
+	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
+	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
+	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
+	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
+	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
+	{AORPD, yxm, Pq, [23]uint8{0x56}},
+	{AORPS, yxm, Pm, [23]uint8{0x56}},
+	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
+	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
+	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
+	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
+	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
+	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
+	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
+	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
+	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
+	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
+	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
+	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
+	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
+	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
+	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
+	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
+	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
+	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
+	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
+	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
+	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
+	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
+	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
+	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
+	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
+	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
+	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
+	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
+	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
+	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
+	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
+	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
+	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
+	{API2FL, ymfp, Px, [23]uint8{0x0d}},
+	{APFACC, ymfp, Px, [23]uint8{0xae}},
+	{APFADD, ymfp, Px, [23]uint8{0x9e}},
+	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
+	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
+	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
+	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
+	{APFMIN, ymfp, Px, [23]uint8{0x94}},
+	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
+	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
+	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
+	{APFRCP, ymfp, Px, [23]uint8{0x96}},
+	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
+	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
+	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
+	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
+	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
+	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
+	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
+	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
+	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
+	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
+	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
+	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
+	{APMINSW, yxm, Pe, [23]uint8{0xea}},
+	{APMINUB, yxm, Pe, [23]uint8{0xda}},
+	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
+	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
+	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
+	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
+	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
+	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
+	{APOPAL, ynone, P32, [23]uint8{0x61}},
+	{APOPAW, ynone, Pe, [23]uint8{0x61}},
+	{APOPFL, ynone, P32, [23]uint8{0x9d}},
+	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
+	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
+	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
+	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
+	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
+	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
+	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
+	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
+	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
+	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
+	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
+	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
+	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
+	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
+	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
+	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
+	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
+	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
+	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
+	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
+	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
+	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
+	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
+	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
+	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
+	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
+	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
+	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
+	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
+	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
+	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
+	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
+	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
+	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
+	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
+	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
+	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
+	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
+	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
+	{APUSHAL, ynone, P32, [23]uint8{0x60}},
+	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
+	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
+	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
+	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
+	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
+	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
+	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
+	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
+	{AQUAD, ybyte, Px, [23]uint8{8}},
+	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
+	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
+	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
+	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
+	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
+	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
+	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
+	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
+	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
+	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
+	{AREP, ynone, Px, [23]uint8{0xf3}},
+	{AREPN, ynone, Px, [23]uint8{0xf2}},
+	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
+	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
+	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
+	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
+	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
+	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
+	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
+	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
+	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
+	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
+	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
+	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
+	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
+	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
+	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
+	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
+	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
+	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
+	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
+	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
+	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
+	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
+	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
+	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
+	{ASCASB, ynone, Pb, [23]uint8{0xae}},
+	{ASCASL, ynone, Px, [23]uint8{0xaf}},
+	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
+	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
+	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
+	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
+	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
+	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
+	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
+	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
+	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
+	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
+	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
+	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
+	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
+	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
+	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
+	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
+	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
+	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
+	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
+	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
+	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
+	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
+	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
+	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
+	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
+	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
+	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
+	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
+	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
+	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
+	{ASTC, ynone, Px, [23]uint8{0xf9}},
+	{ASTD, ynone, Px, [23]uint8{0xfd}},
+	{ASTI, ynone, Px, [23]uint8{0xfb}},
+	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
+	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
+	{ASTOSL, ynone, Px, [23]uint8{0xab}},
+	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
+	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
+	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
+	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
+	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
+	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
+	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
+	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
+	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
+	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
+	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
+	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
+	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
+	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
+	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
+	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
+	{obj.ATEXT, ytext, Px, [23]uint8{}},
+	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
+	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
+	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
+	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
+	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
+	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
+	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
+	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
+	{AWAIT, ynone, Px, [23]uint8{0x9b}},
+	{AWORD, ybyte, Px, [23]uint8{2}},
+	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
+	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
+	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
+	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
+	{AXLAT, ynone, Px, [23]uint8{0xd7}},
+	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
+	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
+	{AXORPD, yxm, Pe, [23]uint8{0x57}},
+	{AXORPS, yxm, Pm, [23]uint8{0x57}},
+	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
+	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
+	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
+	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
+	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
+	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
+	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
+	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
+	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
+	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
+	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
+	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
+	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
+	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
+	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
+	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
+	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
+	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
+	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
+	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
+	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
+	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
+	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
+	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
+	{AFCOMB, nil, 0, [23]uint8{}},
+	{AFCOMBP, nil, 0, [23]uint8{}},
+	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
+	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
+	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
+	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
+	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
+	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
+	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
+	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
+	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
+	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
+	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
+	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
+	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
+	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
+	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
+	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
+	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
+	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
+	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
+	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
+	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
+	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
+	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
+	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
+	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
+	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
+	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
+	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
+	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
+	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
+	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
+	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
+	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
+	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
+	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
+	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
+	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
+	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
+	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
+	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
+	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
+	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
+	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
+	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
+	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
+	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
+	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
+	{AFFREE, nil, 0, [23]uint8{}},
+	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
+	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
+	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
+	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
+	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
+	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
+	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
+	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
+	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
+	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
+	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
+	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
+	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
+	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
+	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
+	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
+	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
+	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
+	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
+	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
+	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
+	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
+	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
+	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
+	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
+	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
+	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
+	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
+	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
+	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
+	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
+	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
+	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
+	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
+	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
+	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
+	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
+	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
+	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
+	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
+	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
+	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
+	{AINVD, ynone, Pm, [23]uint8{0x08}},
+	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
+	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
+	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
+	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
+	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
+	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
+	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
+	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
+	{ARSM, ynone, Pm, [23]uint8{0xaa}},
+	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
+	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
+	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
+	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
+	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
+	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
+	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
+	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
+	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
+	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
+	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
+	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
+	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
+	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
+	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
+	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
+	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
+	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
+	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
+	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
+	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
+	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
+	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
+	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
+	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
+	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
+	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
+	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
+	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
+	{AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}},
+	{AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}},
+	{AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}},
+	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
+	{obj.ATYPE, nil, 0, [23]uint8{}},
+	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
+	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
+	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
+	{obj.AVARDEF, nil, 0, [23]uint8{}},
+	{obj.AVARKILL, nil, 0, [23]uint8{}},
+	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
+	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
+	{obj.AEND, nil, 0, [23]uint8{}},
+	{0, nil, 0, [23]uint8{}},
 }
 
 var opindex [(ALAST + 1) & obj.AMask]*Optab
@@ -1505,15 +1526,15 @@
 // constructed by hand and disassembled with gdb to verify.
 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
 var nop = [][16]uint8{
-	[16]uint8{0x90},
-	[16]uint8{0x66, 0x90},
-	[16]uint8{0x0F, 0x1F, 0x00},
-	[16]uint8{0x0F, 0x1F, 0x40, 0x00},
-	[16]uint8{0x0F, 0x1F, 0x44, 0x00, 0x00},
-	[16]uint8{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
-	[16]uint8{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
-	[16]uint8{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
-	[16]uint8{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+	{0x90},
+	{0x66, 0x90},
+	{0x0F, 0x1F, 0x00},
+	{0x0F, 0x1F, 0x40, 0x00},
+	{0x0F, 0x1F, 0x44, 0x00, 0x00},
+	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
 }
 
 // Native Client rejects the repeated 0x66 prefix.
@@ -2690,112 +2711,112 @@
 
 var ymovtab = []Movtab{
 	/* push */
-	Movtab{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
-	Movtab{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
-	Movtab{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
-	Movtab{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
-	Movtab{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
-	Movtab{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
-	Movtab{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
-	Movtab{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
-	Movtab{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
-	Movtab{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
-	Movtab{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
-	Movtab{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
-	Movtab{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
-	Movtab{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
+	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
+	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
+	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
+	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
+	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
+	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
+	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
+	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
+	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
+	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
+	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
+	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
+	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
+	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
 
 	/* pop */
-	Movtab{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
-	Movtab{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
-	Movtab{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
-	Movtab{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
-	Movtab{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
-	Movtab{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
-	Movtab{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
-	Movtab{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
-	Movtab{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
-	Movtab{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
-	Movtab{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
-	Movtab{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
+	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
+	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
+	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
+	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
+	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
+	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
+	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
+	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
+	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
+	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
+	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
+	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
 
 	/* mov seg */
-	Movtab{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
-	Movtab{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
-	Movtab{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
-	Movtab{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
-	Movtab{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
-	Movtab{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
-	Movtab{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
+	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
+	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
+	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
+	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
+	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
+	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
+	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
+	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
+	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
+	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
+	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
+	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
 
 	/* mov cr */
-	Movtab{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
-	Movtab{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
-	Movtab{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
-	Movtab{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
-	Movtab{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
-	Movtab{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
-	Movtab{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
-	Movtab{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
-	Movtab{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
-	Movtab{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
+	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
+	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
+	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
+	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
+	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
+	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
+	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
+	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
+	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
+	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
+	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
+	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
+	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
+	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
+	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
+	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
+	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
+	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
+	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
+	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
 
 	/* mov dr */
-	Movtab{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
-	Movtab{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
-	Movtab{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
-	Movtab{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
-	Movtab{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
-	Movtab{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
-	Movtab{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
+	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
+	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
+	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
+	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
+	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
+	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
+	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
+	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
+	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
+	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
+	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
+	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
 
 	/* mov tr */
-	Movtab{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
-	Movtab{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
-	Movtab{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
-	Movtab{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
+	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
+	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
+	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
+	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
 
 	/* lgdt, sgdt, lidt, sidt */
-	Movtab{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
-	Movtab{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
-	Movtab{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
-	Movtab{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
-	Movtab{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
-	Movtab{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
-	Movtab{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
-	Movtab{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
+	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
+	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
+	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
+	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
+	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
+	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
+	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
+	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
 
 	/* lldt, sldt */
-	Movtab{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
-	Movtab{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
+	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
+	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
 
 	/* lmsw, smsw */
-	Movtab{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
-	Movtab{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
+	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
+	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
 
 	/* ltr, str */
-	Movtab{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
-	Movtab{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
+	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
+	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
 
 	/* load full pointer - unsupported
 	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
@@ -2803,29 +2824,29 @@
 	*/
 
 	/* double shift */
-	Movtab{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
-	Movtab{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
-	Movtab{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
-	Movtab{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
-	Movtab{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
-	Movtab{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
-	Movtab{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
-	Movtab{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
-	Movtab{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
-	Movtab{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
-	Movtab{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
-	Movtab{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
-	Movtab{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
-	Movtab{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
-	Movtab{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
-	Movtab{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
-	Movtab{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
-	Movtab{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
+	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
+	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
+	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
+	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
+	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
+	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
+	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
+	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
+	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
+	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
+	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
+	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
+	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
+	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
+	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
+	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
+	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
+	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
 
 	/* load TLS base */
-	Movtab{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
-	Movtab{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
-	Movtab{0, 0, 0, 0, 0, [4]uint8{}},
+	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
+	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
+	{0, 0, 0, 0, 0, [4]uint8{}},
 }
 
 func isax(a *obj.Addr) bool {
@@ -2907,6 +2928,50 @@
 	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
 }
 
+func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, pref uint8) {
+	rexR := regrex[to.Reg]
+	rexB := regrex[from.Reg]
+	rexX := regrex[from.Index]
+	var prefBit uint8
+	if pref == Pvex1 {
+		prefBit = 1
+	} else if pref == Pvex2 {
+		prefBit = 2
+	} // TODO add Pvex0,Pvex3
+
+	if rexX == 0 && rexB == 0 { // 2-byte vex prefix
+		ctxt.Andptr[0] = 0xc5
+		ctxt.Andptr = ctxt.Andptr[1:]
+
+		if rexR != 0 {
+			ctxt.Andptr[0] = 0x7c
+		} else {
+			ctxt.Andptr[0] = 0xfc
+		}
+		ctxt.Andptr[0] |= prefBit
+		ctxt.Andptr = ctxt.Andptr[1:]
+	} else {
+		ctxt.Andptr[0] = 0xc4
+		ctxt.Andptr = ctxt.Andptr[1:]
+
+		ctxt.Andptr[0] = 0x1 // TODO handle different prefix
+		if rexR == 0 {
+			ctxt.Andptr[0] |= 0x80
+		}
+		if rexX == 0 {
+			ctxt.Andptr[0] |= 0x40
+		}
+		if rexB == 0 {
+			ctxt.Andptr[0] |= 0x20
+		}
+		ctxt.Andptr = ctxt.Andptr[1:]
+
+		ctxt.Andptr[0] = 0x7c
+		ctxt.Andptr[0] |= prefBit
+		ctxt.Andptr = ctxt.Andptr[1:]
+	}
+}
+
 func doasm(ctxt *obj.Link, p *obj.Prog) {
 	ctxt.Curp = p // TODO
 
@@ -3140,6 +3205,13 @@
 				mediaop(ctxt, o, op, int(yt.zoffset), z)
 				asmand(ctxt, p, &p.From, &p.To)
 
+			case Zm_r_xm_vex:
+				ctxt.Vexflag = 1
+				vexprefix(ctxt, &p.To, &p.From, o.prefix)
+				ctxt.Andptr[0] = byte(op)
+				ctxt.Andptr = ctxt.Andptr[1:]
+				asmand(ctxt, p, &p.From, &p.To)
+
 			case Zm_r_xm_nr:
 				ctxt.Rexflag = 0
 				mediaop(ctxt, o, op, int(yt.zoffset), z)
@@ -3195,6 +3267,13 @@
 				ctxt.Andptr = ctxt.Andptr[1:]
 				asmand(ctxt, p, &p.To, &p.From)
 
+			case Zr_m_xm_vex:
+				ctxt.Vexflag = 1
+				vexprefix(ctxt, &p.From, &p.To, o.prefix)
+				ctxt.Andptr[0] = byte(op)
+				ctxt.Andptr = ctxt.Andptr[1:]
+				asmand(ctxt, p, &p.To, &p.From)
+
 			case Zr_m_xm:
 				mediaop(ctxt, o, op, int(yt.zoffset), z)
 				asmand(ctxt, p, &p.To, &p.From)
@@ -4303,10 +4382,11 @@
 	}
 
 	ctxt.Rexflag = 0
+	ctxt.Vexflag = 0
 	and0 := ctxt.Andptr
 	ctxt.Asmode = int(p.Mode)
 	doasm(ctxt, p)
-	if ctxt.Rexflag != 0 {
+	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
 		/*
 		 * as befits the whole approach of the architecture,
 		 * the rex prefix must appear before the first opcode byte
diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go
index d55dcc7..503971a 100644
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -437,10 +437,6 @@
 }
 
 func preprocess(ctxt *obj.Link, cursym *obj.LSym) {
-	if ctxt.Tlsg == nil {
-		ctxt.Tlsg = obj.Linklookup(ctxt, "runtime.tlsg", 0)
-	}
-
 	if ctxt.Headtype == obj.Hplan9 && ctxt.Plan9privates == nil {
 		ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
 	}
diff --git a/src/cmd/link/internal/arm/asm.go b/src/cmd/link/internal/arm/asm.go
index a0e31a3..01b2aaa 100644
--- a/src/cmd/link/internal/arm/asm.go
+++ b/src/cmd/link/internal/arm/asm.go
@@ -223,16 +223,11 @@
 			return -1
 		}
 
-	case obj.R_TLS:
-		if r.Siz == 4 {
-			if ld.Buildmode == ld.BuildmodeCShared {
-				ld.Thearch.Lput(ld.R_ARM_TLS_IE32 | uint32(elfsym)<<8)
-			} else {
-				ld.Thearch.Lput(ld.R_ARM_TLS_LE32 | uint32(elfsym)<<8)
-			}
-		} else {
-			return -1
-		}
+	case obj.R_TLS_LE:
+		ld.Thearch.Lput(ld.R_ARM_TLS_LE32 | uint32(elfsym)<<8)
+
+	case obj.R_TLS_IE:
+		ld.Thearch.Lput(ld.R_ARM_TLS_IE32 | uint32(elfsym)<<8)
 	}
 
 	return 0
@@ -339,7 +334,7 @@
 				rs = rs.Outer
 			}
 
-			if rs.Type != obj.SHOSTOBJ && rs.Sect == nil {
+			if rs.Type != obj.SHOSTOBJ && rs.Type != obj.SDYNIMPORT && rs.Sect == nil {
 				ld.Diag("missing section for %s", rs.Name)
 			}
 			r.Xsym = rs
diff --git a/src/cmd/link/internal/arm/obj.go b/src/cmd/link/internal/arm/obj.go
index c467820..10eb723 100644
--- a/src/cmd/link/internal/arm/obj.go
+++ b/src/cmd/link/internal/arm/obj.go
@@ -86,6 +86,10 @@
 		ld.Linkmode = ld.LinkInternal
 	}
 
+	if ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.DynlinkingGo() {
+		ld.Linkmode = ld.LinkExternal
+	}
+
 	switch ld.HEADTYPE {
 	default:
 		if ld.Linkmode == ld.LinkAuto {
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index f1561d3..2855e55 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -362,11 +362,6 @@
 			Diag("unreachable sym in relocation: %s %s", s.Name, r.Sym.Name)
 		}
 
-		// Android emulates runtime.tlsg as a regular variable.
-		if r.Type == obj.R_TLS && goos == "android" {
-			r.Type = obj.R_ADDR
-		}
-
 		switch r.Type {
 		default:
 			switch siz {
@@ -385,39 +380,13 @@
 				Diag("unknown reloc %d", r.Type)
 			}
 
-		case obj.R_TLS:
-			if Linkmode == LinkExternal && Iself && HEADTYPE != obj.Hopenbsd {
-				r.Done = 0
-				r.Sym = Ctxt.Tlsg
-				r.Xsym = Ctxt.Tlsg
-				r.Xadd = r.Add
-				o = r.Add
-				break
-			}
-			if Linkmode == LinkInternal && Iself && Thearch.Thechar == '5' {
-				// On ELF ARM, the thread pointer is 8 bytes before
-				// the start of the thread-local data block, so add 8
-				// to the actual TLS offset (r->sym->value).
-				// This 8 seems to be a fundamental constant of
-				// ELF on ARM (or maybe Glibc on ARM); it is not
-				// related to the fact that our own TLS storage happens
-				// to take up 8 bytes.
-				o = 8 + r.Sym.Value
-
-				break
-			}
-
-			r.Done = 0
-			o = 0
-			if Thearch.Thechar != '6' {
-				o = r.Add
-			}
-
 		case obj.R_TLS_LE:
 			if Linkmode == LinkExternal && Iself && HEADTYPE != obj.Hopenbsd {
 				r.Done = 0
-				r.Sym = Ctxt.Tlsg
-				r.Xsym = Ctxt.Tlsg
+				if r.Sym == nil {
+					r.Sym = Ctxt.Tlsg
+				}
+				r.Xsym = r.Sym
 				r.Xadd = r.Add
 				o = 0
 				if Thearch.Thechar != '6' {
@@ -426,7 +395,16 @@
 				break
 			}
 
-			if Iself || Ctxt.Headtype == obj.Hplan9 || Ctxt.Headtype == obj.Hdarwin {
+			if Iself && Thearch.Thechar == '5' {
+				// On ELF ARM, the thread pointer is 8 bytes before
+				// the start of the thread-local data block, so add 8
+				// to the actual TLS offset (r->sym->value).
+				// This 8 seems to be a fundamental constant of
+				// ELF on ARM (or maybe Glibc on ARM); it is not
+				// related to the fact that our own TLS storage happens
+				// to take up 8 bytes.
+				o = 8 + r.Sym.Value
+			} else if Iself || Ctxt.Headtype == obj.Hplan9 || Ctxt.Headtype == obj.Hdarwin {
 				o = int64(Ctxt.Tlsoffset) + r.Add
 			} else if Ctxt.Headtype == obj.Hwindows {
 				o = r.Add
@@ -437,8 +415,10 @@
 		case obj.R_TLS_IE:
 			if Linkmode == LinkExternal && Iself && HEADTYPE != obj.Hopenbsd {
 				r.Done = 0
-				r.Sym = Ctxt.Tlsg
-				r.Xsym = Ctxt.Tlsg
+				if r.Sym == nil {
+					r.Sym = Ctxt.Tlsg
+				}
+				r.Xsym = r.Sym
 				r.Xadd = r.Add
 				o = 0
 				if Thearch.Thechar != '6' {
@@ -1207,6 +1187,31 @@
 
 	*l = nil
 
+	if UseRelro() {
+		// "read only" data with relocations needs to go in its own section
+		// when building a shared library. We do this by boosting objects of
+		// type SXXX with relocations to type SXXXRELRO.
+		for s := datap; s != nil; s = s.Next {
+			if (s.Type >= obj.STYPE && s.Type <= obj.SFUNCTAB && len(s.R) > 0) || s.Type == obj.SGOSTRING {
+				s.Type += (obj.STYPERELRO - obj.STYPE)
+				if s.Outer != nil {
+					s.Outer.Type = s.Type
+				}
+			}
+		}
+		// Check that we haven't made two symbols with the same .Outer into
+		// different types (because references two symbols with non-nil Outer
+		// become references to the outer symbol + offset it's vital that the
+		// symbol and the outer end up in the same section).
+		for s := datap; s != nil; s = s.Next {
+			if s.Outer != nil && s.Outer.Type != s.Type {
+				Diag("inconsistent types for %s and its Outer %s (%d != %d)",
+					s.Name, s.Outer.Name, s.Type, s.Outer.Type)
+			}
+		}
+
+	}
+
 	datap = listsort(datap, datcmp, listnextp)
 
 	if Iself {
@@ -1465,12 +1470,12 @@
 	/* read-only data */
 	sect = addsection(segro, ".rodata", 04)
 
-	sect.Align = maxalign(s, obj.STYPELINK-1)
+	sect.Align = maxalign(s, obj.STYPERELRO-1)
 	datsize = Rnd(datsize, int64(sect.Align))
 	sect.Vaddr = 0
 	Linklookup(Ctxt, "runtime.rodata", 0).Sect = sect
 	Linklookup(Ctxt, "runtime.erodata", 0).Sect = sect
-	for ; s != nil && s.Type < obj.STYPELINK; s = s.Next {
+	for ; s != nil && s.Type < obj.STYPERELRO; s = s.Next {
 		datsize = aligndatsize(datsize, s)
 		s.Sect = sect
 		s.Type = obj.SRODATA
@@ -1480,8 +1485,45 @@
 
 	sect.Length = uint64(datsize) - sect.Vaddr
 
+	// There is some data that are conceptually read-only but are written to by
+	// relocations. On GNU systems, we can arrange for the dynamic linker to
+	// mprotect sections after relocations are applied by giving them write
+	// permissions in the object file and calling them ".data.rel.ro.FOO". We
+	// divide the .rodata section between actual .rodata and .data.rel.ro.rodata,
+	// but for the other sections that this applies to, we just write a read-only
+	// .FOO section or a read-write .data.rel.ro.FOO section depending on the
+	// situation.
+	// TODO(mwhudson): It would make sense to do this more widely, but it makes
+	// the system linker segfault on darwin.
+	relro_perms := 04
+	relro_prefix := ""
+
+	if UseRelro() {
+		relro_perms = 06
+		relro_prefix = ".data.rel.ro"
+		/* data only written by relocations */
+		sect = addsection(segro, ".data.rel.ro", 06)
+
+		sect.Align = maxalign(s, obj.STYPELINK-1)
+		datsize = Rnd(datsize, int64(sect.Align))
+		sect.Vaddr = 0
+		for ; s != nil && s.Type < obj.STYPELINK; s = s.Next {
+			datsize = aligndatsize(datsize, s)
+			if s.Outer != nil && s.Outer.Sect != nil && s.Outer.Sect != sect {
+				Diag("s.Outer (%s) in different section from s (%s)", s.Outer.Name, s.Name)
+			}
+			s.Sect = sect
+			s.Type = obj.SRODATA
+			s.Value = int64(uint64(datsize) - sect.Vaddr)
+			growdatsize(&datsize, s)
+		}
+
+		sect.Length = uint64(datsize) - sect.Vaddr
+
+	}
+
 	/* typelink */
-	sect = addsection(segro, ".typelink", 04)
+	sect = addsection(segro, relro_prefix+".typelink", relro_perms)
 
 	sect.Align = maxalign(s, obj.STYPELINK)
 	datsize = Rnd(datsize, int64(sect.Align))
@@ -1499,7 +1541,7 @@
 	sect.Length = uint64(datsize) - sect.Vaddr
 
 	/* gosymtab */
-	sect = addsection(segro, ".gosymtab", 04)
+	sect = addsection(segro, relro_prefix+".gosymtab", relro_perms)
 
 	sect.Align = maxalign(s, obj.SPCLNTAB-1)
 	datsize = Rnd(datsize, int64(sect.Align))
@@ -1517,7 +1559,7 @@
 	sect.Length = uint64(datsize) - sect.Vaddr
 
 	/* gopclntab */
-	sect = addsection(segro, ".gopclntab", 04)
+	sect = addsection(segro, relro_prefix+".gopclntab", relro_perms)
 
 	sect.Align = maxalign(s, obj.SELFROSECT-1)
 	datsize = Rnd(datsize, int64(sect.Align))
@@ -1723,6 +1765,11 @@
 		rodata = text.Next
 	}
 	typelink := rodata.Next
+	if UseRelro() {
+		// There is another section (.data.rel.ro) when building a shared
+		// object on elf systems.
+		typelink = typelink.Next
+	}
 	symtab := typelink.Next
 	pclntab := symtab.Next
 
diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go
index baca032..94b4753 100644
--- a/src/cmd/link/internal/ld/elf.go
+++ b/src/cmd/link/internal/ld/elf.go
@@ -206,6 +206,7 @@
 	SHT_GNU_VERNEED      = 0x6ffffffe
 	SHT_GNU_VERSYM       = 0x6fffffff
 	SHT_LOPROC           = 0x70000000
+	SHT_ARM_ATTRIBUTES   = 0x70000003
 	SHT_HIPROC           = 0x7fffffff
 	SHT_LOUSER           = 0x80000000
 	SHT_HIUSER           = 0xffffffff
@@ -776,11 +777,19 @@
 		ehdr.phentsize = ELF64PHDRSIZE /* Must be ELF64PHDRSIZE */
 		ehdr.shentsize = ELF64SHDRSIZE /* Must be ELF64SHDRSIZE */
 
-		// we use EABI on both linux/arm and freebsd/arm.
+	// we use EABI on both linux/arm and freebsd/arm.
 	// 32-bit architectures
 	case '5':
 		// we use EABI on both linux/arm and freebsd/arm.
 		if HEADTYPE == obj.Hlinux || HEADTYPE == obj.Hfreebsd {
+			// We set a value here that makes no indication of which
+			// float ABI the object uses, because this is information
+			// used by the dynamic linker to compare executables and
+			// shared libraries -- so it only matters for cgo calls, and
+			// the information properly comes from the object files
+			// produced by the host C compiler. parseArmAttributes in
+			// ldelf.go reads that information and updates this field as
+			// appropriate.
 			ehdr.flags = 0x5000002 // has entry point, Version5 EABI
 		}
 		fallthrough
@@ -1497,9 +1506,7 @@
 		sh.flags |= SHF_WRITE
 	}
 	if sect.Name == ".tbss" {
-		if goos != "android" {
-			sh.flags |= SHF_TLS // no TLS on android
-		}
+		sh.flags |= SHF_TLS
 		sh.type_ = SHT_NOBITS
 	}
 
@@ -1508,7 +1515,7 @@
 	}
 	sh.addralign = uint64(sect.Align)
 	sh.size = sect.Length
-	if sect.Name != ".tbss" || goos == "android" {
+	if sect.Name != ".tbss" {
 		sh.off = sect.Seg.Fileoff + sect.Vaddr - sect.Seg.Vaddr
 	}
 
@@ -1690,9 +1697,15 @@
 	}
 	Addstring(shstrtab, ".elfdata")
 	Addstring(shstrtab, ".rodata")
-	Addstring(shstrtab, ".typelink")
-	Addstring(shstrtab, ".gosymtab")
-	Addstring(shstrtab, ".gopclntab")
+	// See the comment about data.rel.ro.FOO section names in data.go.
+	relro_prefix := ""
+	if UseRelro() {
+		Addstring(shstrtab, ".data.rel.ro")
+		relro_prefix = ".data.rel.ro"
+	}
+	Addstring(shstrtab, relro_prefix+".typelink")
+	Addstring(shstrtab, relro_prefix+".gosymtab")
+	Addstring(shstrtab, relro_prefix+".gopclntab")
 
 	if Linkmode == LinkExternal {
 		Debug['d'] = 1
@@ -1701,20 +1714,26 @@
 		case '6', '7', '9':
 			Addstring(shstrtab, ".rela.text")
 			Addstring(shstrtab, ".rela.rodata")
-			Addstring(shstrtab, ".rela.typelink")
-			Addstring(shstrtab, ".rela.gosymtab")
-			Addstring(shstrtab, ".rela.gopclntab")
+			Addstring(shstrtab, ".rela"+relro_prefix+".typelink")
+			Addstring(shstrtab, ".rela"+relro_prefix+".gosymtab")
+			Addstring(shstrtab, ".rela"+relro_prefix+".gopclntab")
 			Addstring(shstrtab, ".rela.noptrdata")
 			Addstring(shstrtab, ".rela.data")
+			if UseRelro() {
+				Addstring(shstrtab, ".rela.data.rel.ro")
+			}
 
 		default:
 			Addstring(shstrtab, ".rel.text")
 			Addstring(shstrtab, ".rel.rodata")
-			Addstring(shstrtab, ".rel.typelink")
-			Addstring(shstrtab, ".rel.gosymtab")
-			Addstring(shstrtab, ".rel.gopclntab")
+			Addstring(shstrtab, ".rel"+relro_prefix+".typelink")
+			Addstring(shstrtab, ".rel"+relro_prefix+".gosymtab")
+			Addstring(shstrtab, ".rel"+relro_prefix+".gopclntab")
 			Addstring(shstrtab, ".rel.noptrdata")
 			Addstring(shstrtab, ".rel.data")
+			if UseRelro() {
+				Addstring(shstrtab, ".rel.data.rel.ro")
+			}
 		}
 
 		// add a .note.GNU-stack section to mark the stack as non-executable
diff --git a/src/cmd/link/internal/ld/ldelf.go b/src/cmd/link/internal/ld/ldelf.go
index 3efdb75..20e2311 100644
--- a/src/cmd/link/internal/ld/ldelf.go
+++ b/src/cmd/link/internal/ld/ldelf.go
@@ -5,6 +5,7 @@
 	"cmd/internal/obj"
 	"encoding/binary"
 	"fmt"
+	"io"
 	"log"
 	"sort"
 	"strings"
@@ -315,6 +316,135 @@
 	return 0
 }
 
+const (
+	Tag_file                 = 1
+	Tag_CPU_name             = 4
+	Tag_CPU_raw_name         = 5
+	Tag_compatibility        = 32
+	Tag_nodefaults           = 64
+	Tag_also_compatible_with = 65
+	Tag_ABI_VFP_args         = 28
+)
+
+type elfAttribute struct {
+	tag  uint64
+	sval string
+	ival uint64
+}
+
+type elfAttributeList struct {
+	data []byte
+	err  error
+}
+
+func (a *elfAttributeList) string() string {
+	if a.err != nil {
+		return ""
+	}
+	nul := bytes.IndexByte(a.data, 0)
+	if nul < 0 {
+		a.err = io.EOF
+		return ""
+	}
+	s := string(a.data[:nul])
+	a.data = a.data[nul+1:]
+	return s
+}
+
+func (a *elfAttributeList) uleb128() uint64 {
+	if a.err != nil {
+		return 0
+	}
+	v, size := binary.Uvarint(a.data)
+	a.data = a.data[size:]
+	return v
+}
+
+// Read an elfAttribute from the list following the rules used on ARM systems.
+func (a *elfAttributeList) armAttr() elfAttribute {
+	attr := elfAttribute{tag: a.uleb128()}
+	switch {
+	case attr.tag == Tag_compatibility:
+		attr.ival = a.uleb128()
+		attr.sval = a.string()
+
+	case attr.tag == 64: // Tag_nodefaults has no argument
+
+	case attr.tag == 65: // Tag_also_compatible_with
+		// Not really, but we don't actually care about this tag.
+		attr.sval = a.string()
+
+	// Tag with string argument
+	case attr.tag == Tag_CPU_name || attr.tag == Tag_CPU_raw_name || (attr.tag >= 32 && attr.tag&1 != 0):
+		attr.sval = a.string()
+
+	default: // Tag with integer argument
+		attr.ival = a.uleb128()
+	}
+	return attr
+}
+
+func (a *elfAttributeList) done() bool {
+	if a.err != nil || len(a.data) == 0 {
+		return true
+	}
+	return false
+}
+
+// Look for the attribute that indicates the object uses the hard-float ABI (a
+// file-level attribute with tag Tag_VFP_arch and value 1). Unfortunately the
+// format used means that we have to parse all of the file-level attributes to
+// find the one we are looking for. This format is slightly documented in "ELF
+// for the ARM Architecture" but mostly this is derived from reading the source
+// to gold and readelf.
+func parseArmAttributes(e binary.ByteOrder, data []byte) {
+	// We assume the soft-float ABI unless we see a tag indicating otherwise.
+	if ehdr.flags == 0x5000002 {
+		ehdr.flags = 0x5000202
+	}
+	if data[0] != 'A' {
+		fmt.Fprintf(&Bso, ".ARM.attributes has unexpected format %c\n", data[0])
+		return
+	}
+	data = data[1:]
+	for len(data) != 0 {
+		sectionlength := e.Uint32(data)
+		sectiondata := data[4:sectionlength]
+		data = data[sectionlength:]
+
+		nulIndex := bytes.IndexByte(sectiondata, 0)
+		if nulIndex < 0 {
+			fmt.Fprintf(&Bso, "corrupt .ARM.attributes (section name not NUL-terminated)\n")
+			return
+		}
+		name := string(sectiondata[:nulIndex])
+		sectiondata = sectiondata[nulIndex+1:]
+
+		if name != "aeabi" {
+			continue
+		}
+		for len(sectiondata) != 0 {
+			subsectiontag, sz := binary.Uvarint(sectiondata)
+			subsectionsize := e.Uint32(sectiondata[sz:])
+			subsectiondata := sectiondata[sz+4 : subsectionsize]
+			sectiondata = sectiondata[subsectionsize:]
+
+			if subsectiontag == Tag_file {
+				attrList := elfAttributeList{data: subsectiondata}
+				for !attrList.done() {
+					attr := attrList.armAttr()
+					if attr.tag == Tag_ABI_VFP_args && attr.ival == 1 {
+						ehdr.flags = 0x5000402 // has entry point, Version5 EABI, hard-float ABI
+					}
+				}
+				if attrList.err != nil {
+					fmt.Fprintf(&Bso, "could not parse .ARM.attributes\n")
+				}
+			}
+		}
+	}
+}
+
 func ldelf(f *obj.Biobuf, pkg string, length int64, pn string) {
 	if Debug['v'] != 0 {
 		fmt.Fprintf(&Bso, "%5.2f ldelf %s\n", obj.Cputime(), pn)
@@ -549,6 +679,12 @@
 	// create symbols for elfmapped sections
 	for i := 0; uint(i) < elfobj.nsect; i++ {
 		sect = &elfobj.sect[i]
+		if sect.type_ == SHT_ARM_ATTRIBUTES && sect.name == ".ARM.attributes" {
+			if err = elfmap(elfobj, sect); err != nil {
+				goto bad
+			}
+			parseArmAttributes(e, sect.base[:sect.size])
+		}
 		if (sect.type_ != ElfSectProgbits && sect.type_ != ElfSectNobits) || sect.flags&ElfSectFlagAlloc == 0 {
 			continue
 		}
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index e7aa217..764f610 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -174,6 +174,17 @@
 	return Buildmode == BuildmodeShared || Linkshared
 }
 
+// UseRelro returns whether to make use of "read only relocations" aka
+// relro.
+func UseRelro() bool {
+	switch Buildmode {
+	case BuildmodeCShared, BuildmodeShared, BuildmodePIE:
+		return Iself
+	default:
+		return false
+	}
+}
+
 var (
 	Thestring          string
 	Thelinkarch        *LinkArch
@@ -272,6 +283,7 @@
 const (
 	BuildmodeUnset BuildMode = iota
 	BuildmodeExe
+	BuildmodePIE
 	BuildmodeCArchive
 	BuildmodeCShared
 	BuildmodeShared
@@ -288,6 +300,13 @@
 		return fmt.Errorf("invalid buildmode: %q", s)
 	case "exe":
 		*mode = BuildmodeExe
+	case "pie":
+		switch goos {
+		case "android":
+		default:
+			return badmode()
+		}
+		*mode = BuildmodePIE
 	case "c-archive":
 		switch goos {
 		case "darwin", "linux":
@@ -301,7 +320,7 @@
 		}
 		*mode = BuildmodeCShared
 	case "shared":
-		if goos != "linux" || goarch != "amd64" {
+		if goos != "linux" || (goarch != "amd64" && goarch != "arm") {
 			return badmode()
 		}
 		*mode = BuildmodeShared
@@ -315,6 +334,8 @@
 		return "" // avoid showing a default in usage message
 	case BuildmodeExe:
 		return "exe"
+	case BuildmodePIE:
+		return "pie"
 	case BuildmodeCArchive:
 		return "c-archive"
 	case BuildmodeCShared:
@@ -369,7 +390,7 @@
 		switch Buildmode {
 		case BuildmodeCShared, BuildmodeCArchive:
 			INITENTRY = fmt.Sprintf("_rt0_%s_%s_lib", goarch, goos)
-		case BuildmodeExe:
+		case BuildmodeExe, BuildmodePIE:
 			INITENTRY = fmt.Sprintf("_rt0_%s_%s", goarch, goos)
 		case BuildmodeShared:
 			// No INITENTRY for -buildmode=shared
@@ -543,17 +564,14 @@
 
 	tlsg := Linklookup(Ctxt, "runtime.tlsg", 0)
 
-	// For most ports, runtime.tlsg is a placeholder symbol for TLS
-	// relocation. However, the Android and Darwin arm ports need it
-	// to be a real variable.
-	//
-	// TODO(crawshaw): android should require leaving the tlsg->type
-	// alone (as the runtime-provided SNOPTRBSS) just like darwin/arm.
-	// But some other part of the linker is expecting STLSBSS.
-	if tlsg.Type != obj.SDYNIMPORT && (goos != "darwin" || Thearch.Thechar != '5') {
+	// runtime.tlsg is used for external linking on platforms that do not define
+	// a variable to hold g in assembly (currently only intel).
+	if tlsg.Type == 0 {
 		tlsg.Type = obj.STLSBSS
+		tlsg.Size = int64(Thearch.Ptrsize)
+	} else if tlsg.Type != obj.SDYNIMPORT {
+		Diag("internal error: runtime declared tlsg variable %d", tlsg.Type)
 	}
-	tlsg.Size = int64(Thearch.Ptrsize)
 	tlsg.Reachable = true
 	Ctxt.Tlsg = tlsg
 
@@ -620,8 +638,11 @@
 	// binaries, so leave it enabled on OS X (Mach-O) binaries.
 	// Also leave it enabled on Solaris which doesn't support
 	// statically linked binaries.
-	if Buildmode == BuildmodeExe && havedynamic == 0 && HEADTYPE != obj.Hdarwin && HEADTYPE != obj.Hsolaris {
-		Debug['d'] = 1
+	switch Buildmode {
+	case BuildmodeExe, BuildmodePIE:
+		if havedynamic == 0 && HEADTYPE != obj.Hdarwin && HEADTYPE != obj.Hsolaris {
+			Debug['d'] = 1
+		}
 	}
 
 	importcycles()
@@ -975,12 +996,20 @@
 		if HEADTYPE == obj.Hdarwin {
 			argv = append(argv, "-Wl,-pagezero_size,4000000")
 		}
+	case BuildmodePIE:
+		argv = append(argv, "-pie")
 	case BuildmodeCShared:
 		if HEADTYPE == obj.Hdarwin {
 			argv = append(argv, "-dynamiclib")
 		} else {
+			// ELF.
 			argv = append(argv, "-Wl,-Bsymbolic")
-			argv = append(argv, "-shared")
+			if UseRelro() {
+				argv = append(argv, "-Wl,-z,relro")
+			}
+			// Pass -z nodelete to mark the shared library as
+			// non-closeable: a dlclose will do nothing.
+			argv = append(argv, "-shared", "-Wl,-z,nodelete")
 		}
 	case BuildmodeShared:
 		// TODO(mwhudson): unless you do this, dynamic relocations fill
@@ -991,10 +1020,13 @@
 		// think we may well end up wanting to use -Bsymbolic here
 		// anyway.
 		argv = append(argv, "-Wl,-Bsymbolic-functions")
+		if UseRelro() {
+			argv = append(argv, "-Wl,-z,relro")
+		}
 		argv = append(argv, "-shared")
 	}
 
-	if Linkshared && Iself {
+	if Iself && DynlinkingGo() {
 		// We force all symbol resolution to be done at program startup
 		// because lazy PLT resolution can use large amounts of stack at
 		// times we cannot allow it to do so.
@@ -1771,6 +1803,13 @@
 			obj.SGOSTRING,
 			obj.SGOFUNC,
 			obj.SGCBITS,
+			obj.STYPERELRO,
+			obj.SSTRINGRELRO,
+			obj.SGOSTRINGRELRO,
+			obj.SGOFUNCRELRO,
+			obj.SGCBITSRELRO,
+			obj.SRODATARELRO,
+			obj.STYPELINK,
 			obj.SWINDOWS:
 			if !s.Reachable {
 				continue
@@ -1802,13 +1841,7 @@
 
 		case obj.STLSBSS:
 			if Linkmode == LinkExternal && HEADTYPE != obj.Hopenbsd {
-				var type_ int
-				if goos == "android" {
-					type_ = 'B'
-				} else {
-					type_ = 't'
-				}
-				put(s, s.Name, type_, Symaddr(s), s.Size, int(s.Version), s.Gotype)
+				put(s, s.Name, 't', Symaddr(s), s.Size, int(s.Version), s.Gotype)
 			}
 		}
 	}
diff --git a/src/cmd/link/internal/ld/macho.go b/src/cmd/link/internal/ld/macho.go
index ccc8491..af93361 100644
--- a/src/cmd/link/internal/ld/macho.go
+++ b/src/cmd/link/internal/ld/macho.go
@@ -444,7 +444,7 @@
 		ms = newMachoSeg("", 40)
 
 		ms.fileoffset = Segtext.Fileoff
-		if Thearch.Thechar == '5' {
+		if Thearch.Thechar == '5' || Buildmode == BuildmodeCArchive {
 			ms.filesize = Segdata.Fileoff + Segdata.Filelen - Segtext.Fileoff
 		} else {
 			ms.filesize = Segdwarf.Fileoff + Segdwarf.Filelen - Segtext.Fileoff
@@ -625,10 +625,10 @@
 	k1 := symkind(s1)
 	k2 := symkind(s2)
 	if k1 != k2 {
-		return k1-k2 < 0
+		return k1 < k2
 	}
 
-	return stringsCompare(s1.Extname, s2.Extname) < 0
+	return s1.Extname < s2.Extname
 }
 
 func machogenasmsym(put func(*LSym, string, int, int64, int64, int, *LSym)) {
diff --git a/src/cmd/link/internal/ld/pe.go b/src/cmd/link/internal/ld/pe.go
index 4a7d710..16ce7bd 100644
--- a/src/cmd/link/internal/ld/pe.go
+++ b/src/cmd/link/internal/ld/pe.go
@@ -684,21 +684,11 @@
 	Cseek(endoff)
 }
 
-type pescmp []*LSym
+type byExtname []*LSym
 
-func (x pescmp) Len() int {
-	return len(x)
-}
-
-func (x pescmp) Swap(i, j int) {
-	x[i], x[j] = x[j], x[i]
-}
-
-func (x pescmp) Less(i, j int) bool {
-	s1 := x[i]
-	s2 := x[j]
-	return stringsCompare(s1.Extname, s2.Extname) < 0
-}
+func (s byExtname) Len() int           { return len(s) }
+func (s byExtname) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s byExtname) Less(i, j int) bool { return s[i].Extname < s[j].Extname }
 
 func initdynexport() {
 	nexport = 0
@@ -715,7 +705,7 @@
 		nexport++
 	}
 
-	sort.Sort(pescmp(dexport[:nexport]))
+	sort.Sort(byExtname(dexport[:nexport]))
 }
 
 func addexports() {
diff --git a/src/cmd/link/internal/ld/pobj.go b/src/cmd/link/internal/ld/pobj.go
index 5ce1977..f6c47dd 100644
--- a/src/cmd/link/internal/ld/pobj.go
+++ b/src/cmd/link/internal/ld/pobj.go
@@ -83,7 +83,6 @@
 	obj.Flagint32("R", "set address rounding `quantum`", &INITRND)
 	obj.Flagint64("T", "set text segment `address`", &INITTEXT)
 	obj.Flagfn0("V", "print version and exit", doversion)
-	obj.Flagcount("W", "disassemble input", &Debug['W'])
 	obj.Flagfn1("X", "add string value `definition` of the form importpath.name=value", addstrdata1)
 	obj.Flagcount("a", "disassemble output", &Debug['a'])
 	obj.Flagstr("buildid", "record `id` as Go toolchain build id", &buildid)
diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go
index 250c053..f711586 100644
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@@ -350,13 +350,29 @@
 
 	// pseudo-symbols to mark locations of type, string, and go string data.
 	var symtype *LSym
-	if !DynlinkingGo() {
+	var symtyperel *LSym
+	if UseRelro() && (Buildmode == BuildmodeCShared || Buildmode == BuildmodePIE) {
 		s = Linklookup(Ctxt, "type.*", 0)
 
 		s.Type = obj.STYPE
 		s.Size = 0
 		s.Reachable = true
 		symtype = s
+
+		s = Linklookup(Ctxt, "typerel.*", 0)
+
+		s.Type = obj.STYPERELRO
+		s.Size = 0
+		s.Reachable = true
+		symtyperel = s
+	} else if !DynlinkingGo() {
+		s = Linklookup(Ctxt, "type.*", 0)
+
+		s.Type = obj.STYPE
+		s.Size = 0
+		s.Reachable = true
+		symtype = s
+		symtyperel = s
 	}
 
 	s = Linklookup(Ctxt, "go.string.*", 0)
@@ -381,6 +397,7 @@
 	symgcbits := s
 
 	symtypelink := Linklookup(Ctxt, "runtime.typelink", 0)
+	symtypelink.Type = obj.STYPELINK
 
 	symt = Linklookup(Ctxt, "runtime.symtab", 0)
 	symt.Local = true
@@ -400,9 +417,14 @@
 		}
 
 		if strings.HasPrefix(s.Name, "type.") && !DynlinkingGo() {
-			s.Type = obj.STYPE
 			s.Hide = 1
-			s.Outer = symtype
+			if UseRelro() && len(s.R) > 0 {
+				s.Type = obj.STYPERELRO
+				s.Outer = symtyperel
+			} else {
+				s.Type = obj.STYPE
+				s.Outer = symtype
+			}
 		}
 
 		if strings.HasPrefix(s.Name, "go.typelink.") {
@@ -490,7 +512,8 @@
 	adduint(Ctxt, moduledata, uint64(ntypelinks))
 	if len(Ctxt.Shlibs) > 0 {
 		thismodulename := filepath.Base(outfile)
-		if Buildmode == BuildmodeExe {
+		switch Buildmode {
+		case BuildmodeExe, BuildmodePIE:
 			// When linking an executable, outfile is just "a.out". Make
 			// it something slightly more comprehensible.
 			thismodulename = "the executable"
diff --git a/src/cmd/newlink/testdata/autosection.6 b/src/cmd/newlink/testdata/autosection.6
index 3681f70..90c8427 100644
--- a/src/cmd/newlink/testdata/autosection.6
+++ b/src/cmd/newlink/testdata/autosection.6
Binary files differ
diff --git a/src/cmd/newlink/testdata/autoweak.6 b/src/cmd/newlink/testdata/autoweak.6
index 99cf465..c95dd20 100644
--- a/src/cmd/newlink/testdata/autoweak.6
+++ b/src/cmd/newlink/testdata/autoweak.6
Binary files differ
diff --git a/src/cmd/newlink/testdata/dead.6 b/src/cmd/newlink/testdata/dead.6
index 5b17ef1..a3ec719 100644
--- a/src/cmd/newlink/testdata/dead.6
+++ b/src/cmd/newlink/testdata/dead.6
Binary files differ
diff --git a/src/cmd/newlink/testdata/hello.6 b/src/cmd/newlink/testdata/hello.6
index 1f08d21..6731f52 100644
--- a/src/cmd/newlink/testdata/hello.6
+++ b/src/cmd/newlink/testdata/hello.6
Binary files differ
diff --git a/src/cmd/newlink/testdata/layout.6 b/src/cmd/newlink/testdata/layout.6
index d166986..fcfbe1b 100644
--- a/src/cmd/newlink/testdata/layout.6
+++ b/src/cmd/newlink/testdata/layout.6
Binary files differ
diff --git a/src/cmd/newlink/testdata/pclntab.6 b/src/cmd/newlink/testdata/pclntab.6
index dfe53dd..abc8aef 100644
--- a/src/cmd/newlink/testdata/pclntab.6
+++ b/src/cmd/newlink/testdata/pclntab.6
Binary files differ
diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go
index b6c339b..8ceaba0 100644
--- a/src/cmd/objdump/objdump_test.go
+++ b/src/cmd/objdump/objdump_test.go
@@ -105,6 +105,8 @@
 		t.Skipf("skipping on %s, issue 9039", runtime.GOARCH)
 	case "arm64":
 		t.Skipf("skipping on %s, issue 10106", runtime.GOARCH)
+	case "mips64", "mips64le":
+		t.Skipf("skipping on %s, issue 12559", runtime.GOARCH)
 	}
 	testDisasm(t)
 }
@@ -119,6 +121,8 @@
 		t.Skipf("skipping on %s, no support for external linking, issue 9038", runtime.GOARCH)
 	case "arm64":
 		t.Skipf("skipping on %s, issue 10106", runtime.GOARCH)
+	case "mips64", "mips64le":
+		t.Skipf("skipping on %s, issue 12559 and 12560", runtime.GOARCH)
 	}
 	// TODO(jsing): Reenable once openbsd/arm has external linking support.
 	if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" {
diff --git a/src/cmd/vet/copylock.go b/src/cmd/vet/copylock.go
index 6c71061..8d8399a 100644
--- a/src/cmd/vet/copylock.go
+++ b/src/cmd/vet/copylock.go
@@ -18,7 +18,7 @@
 	register("copylocks",
 		"check that locks are not passed by value",
 		checkCopyLocks,
-		funcDecl, rangeStmt, funcLit)
+		funcDecl, rangeStmt, funcLit, assignStmt)
 }
 
 // checkCopyLocks checks whether node might
@@ -31,6 +31,18 @@
 		checkCopyLocksFunc(f, node.Name.Name, node.Recv, node.Type)
 	case *ast.FuncLit:
 		checkCopyLocksFunc(f, "func", nil, node.Type)
+	case *ast.AssignStmt:
+		checkCopyLocksAssign(f, node)
+	}
+}
+
+// checkCopyLocksAssign checks whether an assignment
+// copies a lock.
+func checkCopyLocksAssign(f *File, as *ast.AssignStmt) {
+	for _, x := range as.Lhs {
+		if path := lockPath(f.pkg.typesPkg, f.pkg.types[x].Type); path != nil {
+			f.Badf(x.Pos(), "assignment copies lock value to %v: %v", f.gofmt(x), path)
+		}
 	}
 }
 
@@ -42,7 +54,7 @@
 	if recv != nil && len(recv.List) > 0 {
 		expr := recv.List[0].Type
 		if path := lockPath(f.pkg.typesPkg, f.pkg.types[expr].Type); path != nil {
-			f.Badf(expr.Pos(), "%s passes Lock by value: %v", name, path)
+			f.Badf(expr.Pos(), "%s passes lock by value: %v", name, path)
 		}
 	}
 
@@ -50,7 +62,7 @@
 		for _, field := range typ.Params.List {
 			expr := field.Type
 			if path := lockPath(f.pkg.typesPkg, f.pkg.types[expr].Type); path != nil {
-				f.Badf(expr.Pos(), "%s passes Lock by value: %v", name, path)
+				f.Badf(expr.Pos(), "%s passes lock by value: %v", name, path)
 			}
 		}
 	}
@@ -59,7 +71,7 @@
 		for _, field := range typ.Results.List {
 			expr := field.Type
 			if path := lockPath(f.pkg.typesPkg, f.pkg.types[expr].Type); path != nil {
-				f.Badf(expr.Pos(), "%s returns Lock by value: %v", name, path)
+				f.Badf(expr.Pos(), "%s returns lock by value: %v", name, path)
 			}
 		}
 	}
@@ -100,7 +112,7 @@
 		return
 	}
 	if path := lockPath(f.pkg.typesPkg, typ); path != nil {
-		f.Badf(e.Pos(), "range var %s copies Lock: %v", f.gofmt(e), path)
+		f.Badf(e.Pos(), "range var %s copies lock: %v", f.gofmt(e), path)
 	}
 }
 
diff --git a/src/cmd/vet/doc.go b/src/cmd/vet/doc.go
index ea4654a..55daf50 100644
--- a/src/cmd/vet/doc.go
+++ b/src/cmd/vet/doc.go
@@ -37,50 +37,6 @@
 
 Available checks:
 
-Printf family
-
-Flag: -printf
-
-Suspicious calls to functions in the Printf family, including any functions
-with these names, disregarding case:
-	Print Printf Println
-	Fprint Fprintf Fprintln
-	Sprint Sprintf Sprintln
-	Error Errorf
-	Fatal Fatalf
-	Log Logf
-	Panic Panicf Panicln
-The -printfuncs flag can be used to redefine this list.
-If the function name ends with an 'f', the function is assumed to take
-a format descriptor string in the manner of fmt.Printf. If not, vet
-complains about arguments that look like format descriptor strings.
-
-It also checks for errors such as using a Writer as the first argument of
-Printf.
-
-Methods
-
-Flag: -methods
-
-Non-standard signatures for methods with familiar names, including:
-	Format GobEncode GobDecode MarshalJSON MarshalXML
-	Peek ReadByte ReadFrom ReadRune Scan Seek
-	UnmarshalJSON UnreadByte UnreadRune WriteByte
-	WriteTo
-
-Struct tags
-
-Flag: -structtags
-
-Struct tags that do not follow the format understood by reflect.StructTag.Get.
-Well-known encoding struct tags (json, xml) used with unexported fields.
-
-Unkeyed composite literals
-
-Flag: -composites
-
-Composite struct literals that do not use the field-keyed syntax.
-
 Assembly declarations
 
 Flag: -asmdecl
@@ -111,36 +67,93 @@
 
 Badly formed or misplaced +build tags.
 
+Unkeyed composite literals
+
+Flag: -composites
+
+Composite struct literals that do not use the field-keyed syntax.
+
 Copying locks
 
 Flag: -copylocks
 
 Locks that are erroneously passed by value.
 
+Documentation examples
+
+Flag: -example
+
+Mistakes involving example tests, including examples with incorrect names or
+function signatures, or that document identifiers not in the package.
+
+Methods
+
+Flag: -methods
+
+Non-standard signatures for methods with familiar names, including:
+	Format GobEncode GobDecode MarshalJSON MarshalXML
+	Peek ReadByte ReadFrom ReadRune Scan Seek
+	UnmarshalJSON UnreadByte UnreadRune WriteByte
+	WriteTo
+
 Nil function comparison
 
 Flag: -nilfunc
 
 Comparisons between functions and nil.
 
+Printf family
+
+Flag: -printf
+
+Suspicious calls to functions in the Printf family, including any functions
+with these names, disregarding case:
+	Print Printf Println
+	Fprint Fprintf Fprintln
+	Sprint Sprintf Sprintln
+	Error Errorf
+	Fatal Fatalf
+	Log Logf
+	Panic Panicf Panicln
+The -printfuncs flag can be used to redefine this list.
+If the function name ends with an 'f', the function is assumed to take
+a format descriptor string in the manner of fmt.Printf. If not, vet
+complains about arguments that look like format descriptor strings.
+
+It also checks for errors such as using a Writer as the first argument of
+Printf.
+
+Struct tags
+
 Range loop variables
 
 Flag: -rangeloops
 
 Incorrect uses of range loop variables in closures.
 
-Unreachable code
-
-Flag: -unreachable
-
-Unreachable code.
-
 Shadowed variables
 
 Flag: -shadow=false (experimental; must be set explicitly)
 
 Variables that may have been unintentionally shadowed.
 
+Shifts
+
+Flag: -shift
+
+Shifts equal to or longer than the variable's length.
+
+Flag: -structtags
+
+Struct tags that do not follow the format understood by reflect.StructTag.Get.
+Well-known encoding struct tags (json, xml) used with unexported fields.
+
+Unreachable code
+
+Flag: -unreachable
+
+Unreachable code.
+
 Misuse of unsafe Pointers
 
 Flag: -unsafeptr
@@ -160,12 +173,6 @@
 fmt.Sprintf and methods like String and Error. The flags -unusedfuncs
 and -unusedstringmethods control the set.
 
-Shifts
-
-Flag: -shift
-
-Shifts equal to or longer than the variable's length.
-
 Other flags
 
 These flags configure the behavior of vet:
diff --git a/src/cmd/vet/example.go b/src/cmd/vet/example.go
new file mode 100644
index 0000000..797c3ce
--- /dev/null
+++ b/src/cmd/vet/example.go
@@ -0,0 +1,124 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"go/ast"
+	"go/types"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+func init() {
+	register("example",
+		"check for common mistaken usages of documentation examples",
+		checkExample,
+		funcDecl)
+}
+
+func isExampleSuffix(s string) bool {
+	r, size := utf8.DecodeRuneInString(s)
+	return size > 0 && unicode.IsLower(r)
+}
+
+// checkExample walks the documentation example functions checking for common
+// mistakes of misnamed functions, failure to map functions to existing
+// identifiers, etc.
+func checkExample(f *File, node ast.Node) {
+	if !strings.HasSuffix(f.name, "_test.go") {
+		return
+	}
+	var (
+		pkg     = f.pkg
+		pkgName = pkg.typesPkg.Name()
+		scopes  = []*types.Scope{pkg.typesPkg.Scope()}
+		lookup  = func(name string) types.Object {
+			for _, scope := range scopes {
+				if o := scope.Lookup(name); o != nil {
+					return o
+				}
+			}
+			return nil
+		}
+	)
+	if strings.HasSuffix(pkgName, "_test") {
+		// Treat 'package foo_test' as an alias for 'package foo'.
+		var (
+			basePkg = strings.TrimSuffix(pkgName, "_test")
+			pkg     = f.pkg
+		)
+		for _, p := range pkg.typesPkg.Imports() {
+			if p.Name() == basePkg {
+				scopes = append(scopes, p.Scope())
+				break
+			}
+		}
+	}
+	fn, ok := node.(*ast.FuncDecl)
+	if !ok {
+		// Ignore non-functions.
+		return
+	}
+	var (
+		fnName = fn.Name.Name
+		report = func(format string, args ...interface{}) { f.Badf(node.Pos(), format, args...) }
+	)
+	if fn.Recv != nil || !strings.HasPrefix(fnName, "Example") {
+		// Ignore methods and types not named "Example".
+		return
+	}
+	if params := fn.Type.Params; len(params.List) != 0 {
+		report("%s should be niladic", fnName)
+	}
+	if results := fn.Type.Results; results != nil && len(results.List) != 0 {
+		report("%s should return nothing", fnName)
+	}
+	if fnName == "Example" {
+		// Nothing more to do.
+		return
+	}
+	if filesRun && !includesNonTest {
+		// The coherence checks between a test and the package it tests
+		// will report false positives if no non-test files have
+		// been provided.
+		return
+	}
+	var (
+		exName = strings.TrimPrefix(fnName, "Example")
+		elems  = strings.SplitN(exName, "_", 3)
+		ident  = elems[0]
+		obj    = lookup(ident)
+	)
+	if ident != "" && obj == nil {
+		// Check ExampleFoo and ExampleBadFoo.
+		report("%s refers to unknown identifier: %s", fnName, ident)
+		// Abort since obj is absent and no subsequent checks can be performed.
+		return
+	}
+	if elemCnt := strings.Count(exName, "_"); elemCnt == 0 {
+		// Nothing more to do.
+		return
+	}
+	mmbr := elems[1]
+	if ident == "" {
+		// Check Example_suffix and Example_BadSuffix.
+		if residual := strings.TrimPrefix(exName, "_"); !isExampleSuffix(residual) {
+			report("%s has malformed example suffix: %s", fnName, residual)
+		}
+		return
+	}
+	if !isExampleSuffix(mmbr) {
+		// Check ExampleFoo_Method and ExampleFoo_BadMethod.
+		if obj, _, _ := types.LookupFieldOrMethod(obj.Type(), true, obj.Pkg(), mmbr); obj == nil {
+			report("%s refers to unknown field or method: %s.%s", fnName, ident, mmbr)
+		}
+	}
+	if len(elems) == 3 && !isExampleSuffix(elems[2]) {
+		// Check ExampleFoo_Method_suffix and ExampleFoo_Method_Badsuffix.
+		report("%s has malformed example suffix: %s", fnName, elems[2])
+	}
+	return
+}
diff --git a/src/cmd/vet/main.go b/src/cmd/vet/main.go
index 453cfe0c..fbba009 100644
--- a/src/cmd/vet/main.go
+++ b/src/cmd/vet/main.go
@@ -51,6 +51,14 @@
 // setTrueCount record how many flags are explicitly set to true.
 var setTrueCount int
 
+// dirsRun and filesRun indicate whether the vet is applied to directory or
+// file targets. The distinction affects which checks are run.
+var dirsRun, filesRun bool
+
+// includesNonTest indicates whether the vet is applied to non-test targets.
+// Certain checks are relevant only if they touch both test and non-test files.
+var includesNonTest bool
+
 // A triState is a boolean that knows whether it has been set to either true or false.
 // It is used to identify if a flag appears; the standard boolean flag cannot
 // distinguish missing from unset. It also satisfies flag.Value.
@@ -207,8 +215,6 @@
 	if flag.NArg() == 0 {
 		Usage()
 	}
-	dirs := false
-	files := false
 	for _, name := range flag.Args() {
 		// Is it a directory?
 		fi, err := os.Stat(name)
@@ -217,15 +223,18 @@
 			continue
 		}
 		if fi.IsDir() {
-			dirs = true
+			dirsRun = true
 		} else {
-			files = true
+			filesRun = true
+			if !strings.HasSuffix(name, "_test.go") {
+				includesNonTest = true
+			}
 		}
 	}
-	if dirs && files {
+	if dirsRun && filesRun {
 		Usage()
 	}
-	if dirs {
+	if dirsRun {
 		for _, name := range flag.Args() {
 			walkDir(name)
 		}
diff --git a/src/cmd/vet/testdata/copylock.go b/src/cmd/vet/testdata/copylock.go
new file mode 100644
index 0000000..03d0c33
--- /dev/null
+++ b/src/cmd/vet/testdata/copylock.go
@@ -0,0 +1,28 @@
+package testdata
+
+import "sync"
+
+func OkFunc() {
+	var x *sync.Mutex
+	p := x
+	var y sync.Mutex
+	p = &y
+}
+
+type Tlock struct {
+	once sync.Once
+}
+
+func BadFunc() {
+	var x *sync.Mutex
+	p := x
+	var y sync.Mutex
+	p = &y
+	*p = *x // ERROR "assignment copies lock value to \*p: sync.Mutex"
+
+	var t Tlock
+	var tp *Tlock
+	tp = &t
+	*tp = t // ERROR "assignment copies lock value to \*tp: testdata.Tlock contains sync.Once contains sync.Mutex"
+	t = *tp // ERROR "assignment copies lock value to t: testdata.Tlock contains sync.Once contains sync.Mutex"
+}
diff --git a/src/cmd/vet/testdata/copylock_func.go b/src/cmd/vet/testdata/copylock_func.go
index d83957f..62725d9 100644
--- a/src/cmd/vet/testdata/copylock_func.go
+++ b/src/cmd/vet/testdata/copylock_func.go
@@ -10,13 +10,13 @@
 import "sync"
 
 func OkFunc(*sync.Mutex) {}
-func BadFunc(sync.Mutex) {} // ERROR "BadFunc passes Lock by value: sync.Mutex"
+func BadFunc(sync.Mutex) {} // ERROR "BadFunc passes lock by value: sync.Mutex"
 func OkRet() *sync.Mutex {}
-func BadRet() sync.Mutex {} // ERROR "BadRet returns Lock by value: sync.Mutex"
+func BadRet() sync.Mutex {} // ERROR "BadRet returns lock by value: sync.Mutex"
 
 var (
 	OkClosure  = func(*sync.Mutex) {}
-	BadClosure = func(sync.Mutex) {} // ERROR "func passes Lock by value: sync.Mutex"
+	BadClosure = func(sync.Mutex) {} // ERROR "func passes lock by value: sync.Mutex"
 )
 
 type EmbeddedRWMutex struct {
@@ -24,20 +24,20 @@
 }
 
 func (*EmbeddedRWMutex) OkMeth() {}
-func (EmbeddedRWMutex) BadMeth() {} // ERROR "BadMeth passes Lock by value: testdata.EmbeddedRWMutex"
+func (EmbeddedRWMutex) BadMeth() {} // ERROR "BadMeth passes lock by value: testdata.EmbeddedRWMutex"
 func OkFunc(e *EmbeddedRWMutex)  {}
-func BadFunc(EmbeddedRWMutex)    {} // ERROR "BadFunc passes Lock by value: testdata.EmbeddedRWMutex"
+func BadFunc(EmbeddedRWMutex)    {} // ERROR "BadFunc passes lock by value: testdata.EmbeddedRWMutex"
 func OkRet() *EmbeddedRWMutex    {}
-func BadRet() EmbeddedRWMutex    {} // ERROR "BadRet returns Lock by value: testdata.EmbeddedRWMutex"
+func BadRet() EmbeddedRWMutex    {} // ERROR "BadRet returns lock by value: testdata.EmbeddedRWMutex"
 
 type FieldMutex struct {
 	s sync.Mutex
 }
 
 func (*FieldMutex) OkMeth()   {}
-func (FieldMutex) BadMeth()   {} // ERROR "BadMeth passes Lock by value: testdata.FieldMutex contains sync.Mutex"
+func (FieldMutex) BadMeth()   {} // ERROR "BadMeth passes lock by value: testdata.FieldMutex contains sync.Mutex"
 func OkFunc(*FieldMutex)      {}
-func BadFunc(FieldMutex, int) {} // ERROR "BadFunc passes Lock by value: testdata.FieldMutex contains sync.Mutex"
+func BadFunc(FieldMutex, int) {} // ERROR "BadFunc passes lock by value: testdata.FieldMutex contains sync.Mutex"
 
 type L0 struct {
 	L1
@@ -52,7 +52,7 @@
 }
 
 func (*L0) Ok() {}
-func (L0) Bad() {} // ERROR "Bad passes Lock by value: testdata.L0 contains testdata.L1 contains testdata.L2"
+func (L0) Bad() {} // ERROR "Bad passes lock by value: testdata.L0 contains testdata.L1 contains testdata.L2"
 
 type EmbeddedMutexPointer struct {
 	s *sync.Mutex // safe to copy this pointer
@@ -76,7 +76,7 @@
 func (*CustomLock) Unlock() {}
 
 func Ok(*CustomLock) {}
-func Bad(CustomLock) {} // ERROR "Bad passes Lock by value: testdata.CustomLock"
+func Bad(CustomLock) {} // ERROR "Bad passes lock by value: testdata.CustomLock"
 
 // TODO: Unfortunate cases
 
@@ -85,7 +85,7 @@
 // sync.Mutex gets called out, but without any reference to the sync.Once.
 type LocalOnce sync.Once
 
-func (LocalOnce) Bad() {} // ERROR "Bad passes Lock by value: testdata.LocalOnce contains sync.Mutex"
+func (LocalOnce) Bad() {} // ERROR "Bad passes lock by value: testdata.LocalOnce contains sync.Mutex"
 
 // False negative:
 // LocalMutex doesn't have a Lock method.
diff --git a/src/cmd/vet/testdata/copylock_range.go b/src/cmd/vet/testdata/copylock_range.go
index f95b025..f127381 100644
--- a/src/cmd/vet/testdata/copylock_range.go
+++ b/src/cmd/vet/testdata/copylock_range.go
@@ -24,37 +24,37 @@
 	}
 	for i, _ := range s {
 	}
-	for _, mu = range s { // ERROR "range var mu copies Lock: sync.Mutex"
+	for _, mu = range s { // ERROR "range var mu copies lock: sync.Mutex"
 	}
-	for _, m := range s { // ERROR "range var m copies Lock: sync.Mutex"
+	for _, m := range s { // ERROR "range var m copies lock: sync.Mutex"
 	}
-	for i, mu = range s { // ERROR "range var mu copies Lock: sync.Mutex"
+	for i, mu = range s { // ERROR "range var mu copies lock: sync.Mutex"
 	}
-	for i, m := range s { // ERROR "range var m copies Lock: sync.Mutex"
+	for i, m := range s { // ERROR "range var m copies lock: sync.Mutex"
 	}
 
 	var a [3]sync.Mutex
-	for _, m := range a { // ERROR "range var m copies Lock: sync.Mutex"
+	for _, m := range a { // ERROR "range var m copies lock: sync.Mutex"
 	}
 
 	var m map[sync.Mutex]sync.Mutex
-	for k := range m { // ERROR "range var k copies Lock: sync.Mutex"
+	for k := range m { // ERROR "range var k copies lock: sync.Mutex"
 	}
-	for mu, _ = range m { // ERROR "range var mu copies Lock: sync.Mutex"
+	for mu, _ = range m { // ERROR "range var mu copies lock: sync.Mutex"
 	}
-	for k, _ := range m { // ERROR "range var k copies Lock: sync.Mutex"
+	for k, _ := range m { // ERROR "range var k copies lock: sync.Mutex"
 	}
-	for _, mu = range m { // ERROR "range var mu copies Lock: sync.Mutex"
+	for _, mu = range m { // ERROR "range var mu copies lock: sync.Mutex"
 	}
-	for _, v := range m { // ERROR "range var v copies Lock: sync.Mutex"
+	for _, v := range m { // ERROR "range var v copies lock: sync.Mutex"
 	}
 
 	var c chan sync.Mutex
 	for range c {
 	}
-	for mu = range c { // ERROR "range var mu copies Lock: sync.Mutex"
+	for mu = range c { // ERROR "range var mu copies lock: sync.Mutex"
 	}
-	for v := range c { // ERROR "range var v copies Lock: sync.Mutex"
+	for v := range c { // ERROR "range var v copies lock: sync.Mutex"
 	}
 
 	// Test non-idents in range variables
@@ -62,6 +62,6 @@
 		i  int
 		mu sync.Mutex
 	}
-	for t.i, t.mu = range s { // ERROR "range var t.mu copies Lock: sync.Mutex"
+	for t.i, t.mu = range s { // ERROR "range var t.mu copies lock: sync.Mutex"
 	}
 }
diff --git a/src/cmd/vet/testdata/divergent/buf.go b/src/cmd/vet/testdata/divergent/buf.go
new file mode 100644
index 0000000..0efe0f8
--- /dev/null
+++ b/src/cmd/vet/testdata/divergent/buf.go
@@ -0,0 +1,17 @@
+// Test of examples with divergent packages.
+
+// Package buf ...
+package buf
+
+// Buf is a ...
+type Buf []byte
+
+// Append ...
+func (*Buf) Append([]byte) {}
+
+func (Buf) Reset() {}
+
+func (Buf) Len() int { return 0 }
+
+// DefaultBuf is a ...
+var DefaultBuf Buf
diff --git a/src/cmd/vet/testdata/divergent/buf_test.go b/src/cmd/vet/testdata/divergent/buf_test.go
new file mode 100644
index 0000000..6b9cba3
--- /dev/null
+++ b/src/cmd/vet/testdata/divergent/buf_test.go
@@ -0,0 +1,35 @@
+// Test of examples with divergent packages.
+
+package buf_test
+
+func Example() {} // OK because is package-level.
+
+func Example_suffix() // OK because refers to suffix annotation.
+
+func Example_BadSuffix() // ERROR "Example_BadSuffix has malformed example suffix: BadSuffix"
+
+func ExampleBuf() // OK because refers to known top-level type.
+
+func ExampleBuf_Append() {} // OK because refers to known method.
+
+func ExampleBuf_Clear() {} // ERROR "ExampleBuf_Clear refers to unknown field or method: Buf.Clear"
+
+func ExampleBuf_suffix() {} // OK because refers to suffix annotation.
+
+func ExampleBuf_Append_Bad() {} // ERROR "ExampleBuf_Append_Bad has malformed example suffix: Bad"
+
+func ExampleBuf_Append_suffix() {} // OK because refers to known method with valid suffix.
+
+func ExampleDefaultBuf() {} // OK because refers to top-level identifier.
+
+func ExampleBuf_Reset() bool { return true } // ERROR "ExampleBuf_Reset should return nothing"
+
+func ExampleBuf_Len(i int) {} // ERROR "ExampleBuf_Len should be niladic"
+
+// "Puffer" is German for "Buffer".
+
+func ExamplePuffer() // ERROR "ExamplePuffer refers to unknown identifier: Puffer"
+
+func ExamplePuffer_Append() // ERROR "ExamplePuffer_Append refers to unknown identifier: Puffer"
+
+func ExamplePuffer_suffix() // ERROR "ExamplePuffer_suffix refers to unknown identifier: Puffer"
diff --git a/src/cmd/vet/testdata/examples_test.go b/src/cmd/vet/testdata/examples_test.go
new file mode 100644
index 0000000..9c53672
--- /dev/null
+++ b/src/cmd/vet/testdata/examples_test.go
@@ -0,0 +1,48 @@
+// Test of examples.
+
+package testdata
+
+// Buf is a ...
+type Buf []byte
+
+// Append ...
+func (*Buf) Append([]byte) {}
+
+func (Buf) Reset() {}
+
+func (Buf) Len() int { return 0 }
+
+// DefaultBuf is a ...
+var DefaultBuf Buf
+
+func Example() {} // OK because is package-level.
+
+func Example_goodSuffix() // OK because refers to suffix annotation.
+
+func Example_BadSuffix() // ERROR "Example_BadSuffix has malformed example suffix: BadSuffix"
+
+func ExampleBuf() // OK because refers to known top-level type.
+
+func ExampleBuf_Append() {} // OK because refers to known method.
+
+func ExampleBuf_Clear() {} // ERROR "ExampleBuf_Clear refers to unknown field or method: Buf.Clear"
+
+func ExampleBuf_suffix() {} // OK because refers to suffix annotation.
+
+func ExampleBuf_Append_Bad() {} // ERROR "ExampleBuf_Append_Bad has malformed example suffix: Bad"
+
+func ExampleBuf_Append_suffix() {} // OK because refers to known method with valid suffix.
+
+func ExampleDefaultBuf() {} // OK because refers to top-level identifier.
+
+func ExampleBuf_Reset() bool { return true } // ERROR "ExampleBuf_Reset should return nothing"
+
+func ExampleBuf_Len(i int) {} // ERROR "ExampleBuf_Len should be niladic"
+
+// "Puffer" is German for "Buffer".
+
+func ExamplePuffer() // ERROR "ExamplePuffer refers to unknown identifier: Puffer"
+
+func ExamplePuffer_Append() // ERROR "ExamplePuffer_Append refers to unknown identifier: Puffer"
+
+func ExamplePuffer_suffix() // ERROR "ExamplePuffer_suffix refers to unknown identifier: Puffer"
diff --git a/src/cmd/vet/testdata/incomplete/examples_test.go b/src/cmd/vet/testdata/incomplete/examples_test.go
new file mode 100644
index 0000000..445502b
--- /dev/null
+++ b/src/cmd/vet/testdata/incomplete/examples_test.go
@@ -0,0 +1,33 @@
+// Test of examples.
+
+package testdata
+
+func Example() {} // OK because is package-level.
+
+func Example_suffix() // OK because refers to suffix annotation.
+
+func Example_BadSuffix() // OK because non-test package was excluded.  No false positives wanted.
+
+func ExampleBuf() // OK because non-test package was excluded.  No false positives wanted.
+
+func ExampleBuf_Append() {} // OK because non-test package was excluded.  No false positives wanted.
+
+func ExampleBuf_Clear() {} // OK because non-test package was excluded.  No false positives wanted.
+
+func ExampleBuf_suffix() {} // OK because refers to suffix annotation.
+
+func ExampleBuf_Append_Bad() {} // OK because non-test package was excluded.  No false positives wanted.
+
+func ExampleBuf_Append_suffix() {} // OK because refers to known method with valid suffix.
+
+func ExampleBuf_Reset() bool { return true } // ERROR "ExampleBuf_Reset should return nothing"
+
+func ExampleBuf_Len(i int) {} // ERROR "ExampleBuf_Len should be niladic"
+
+// "Puffer" is German for "Buffer".
+
+func ExamplePuffer() // OK because non-test package was excluded.  No false positives wanted.
+
+func ExamplePuffer_Append() // OK because non-test package was excluded.  No false positives wanted.
+
+func ExamplePuffer_suffix() // OK because non-test package was excluded.  No false positives wanted.
diff --git a/src/cmd/vet/vet_test.go b/src/cmd/vet/vet_test.go
index 9aae8dd..ea402ff 100644
--- a/src/cmd/vet/vet_test.go
+++ b/src/cmd/vet/vet_test.go
@@ -2,11 +2,16 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// No testdata on Android.
+
+// +build !android
+
 package main_test
 
 import (
 	"bytes"
-	"internal/testenv"
+	"flag"
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -19,26 +24,70 @@
 	binary  = "testvet.exe"
 )
 
+// We implement TestMain so remove the test binary when all is done.
+func TestMain(m *testing.M) {
+	flag.Parse()
+	result := m.Run()
+	os.Remove(binary)
+	os.Exit(result)
+}
+
+func CanRun() bool {
+	switch runtime.GOOS {
+	case "plan9", "windows":
+		// No Perl installed, can't run errcheck.
+		return false
+	case "nacl":
+		// Minimal and problematic file system.
+		return false
+	}
+	return true
+}
+
+var (
+	built  = false // We have built the binary.
+	failed = false // We have failed to build the binary, don't try again.
+)
+
+func Build(t *testing.T) {
+	if built {
+		return
+	}
+	if !CanRun() || failed {
+		t.Skip("cannot run on this environment")
+		return
+	}
+	cmd := exec.Command("go", "build", "-o", binary)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		failed = true
+		fmt.Fprintf(os.Stderr, "%s\n", output)
+		t.Fatal(err)
+	}
+	built = true
+}
+
+func Vet(t *testing.T, files []string) {
+	errchk := filepath.Join(runtime.GOROOT(), "test", "errchk")
+	flags := []string{
+		"./" + binary,
+		"-printfuncs=Warn:1,Warnf:1",
+		"-test", // TODO: Delete once -shadow is part of -all.
+	}
+	cmd := exec.Command(errchk, append(flags, files...)...)
+	if !run(cmd, t) {
+		t.Fatal("vet command failed")
+	}
+}
+
 // Run this shell script, but do it in Go so it can be run by "go test".
 // 	go build -o testvet
 // 	$(GOROOT)/test/errchk ./testvet -shadow -printfuncs='Warn:1,Warnf:1' testdata/*.go testdata/*.s
 // 	rm testvet
 //
+
 func TestVet(t *testing.T) {
-	testenv.MustHaveGoBuild(t)
-
-	switch runtime.GOOS {
-	case "plan9", "windows":
-		// Plan 9 and Windows systems can't be guaranteed to have Perl and so can't run errchk.
-		t.Skipf("skipping test; no Perl on %q", runtime.GOOS)
-	}
-
-	// go build
-	cmd := exec.Command("go", "build", "-o", binary)
-	run(cmd, t)
-
-	// defer removal of vet
-	defer os.Remove(binary)
+	Build(t)
 
 	// errchk ./testvet
 	gos, err := filepath.Glob(filepath.Join(dataDir, "*.go"))
@@ -50,16 +99,19 @@
 		t.Fatal(err)
 	}
 	files := append(gos, asms...)
-	errchk := filepath.Join(runtime.GOROOT(), "test", "errchk")
-	flags := []string{
-		"./" + binary,
-		"-printfuncs=Warn:1,Warnf:1",
-		"-test", // TODO: Delete once -shadow is part of -all.
-	}
-	cmd = exec.Command(errchk, append(flags, files...)...)
-	if !run(cmd, t) {
-		t.Fatal("vet command failed")
-	}
+	Vet(t, files)
+}
+
+func TestDivergentPackagesExamples(t *testing.T) {
+	Build(t)
+	// errchk ./testvet
+	Vet(t, []string{"testdata/divergent/buf.go", "testdata/divergent/buf_test.go"})
+}
+
+func TestIncompleteExamples(t *testing.T) {
+	Build(t)
+	// errchk ./testvet
+	Vet(t, []string{"testdata/incomplete/examples_test.go"})
 }
 
 func run(c *exec.Cmd, t *testing.T) bool {
@@ -78,21 +130,13 @@
 
 // TestTags verifies that the -tags argument controls which files to check.
 func TestTags(t *testing.T) {
-	testenv.MustHaveGoBuild(t)
-
-	// go build
-	cmd := exec.Command("go", "build", "-o", binary)
-	run(cmd, t)
-
-	// defer removal of vet
-	defer os.Remove(binary)
-
+	Build(t)
 	args := []string{
 		"-tags=testtag",
 		"-v", // We're going to look at the files it examines.
 		"testdata/tagtest",
 	}
-	cmd = exec.Command("./"+binary, args...)
+	cmd := exec.Command("./"+binary, args...)
 	output, err := cmd.CombinedOutput()
 	if err != nil {
 		t.Fatal(err)
diff --git a/src/cmd/yacc/yacc.go b/src/cmd/yacc/yacc.go
index 4f83f50..32d2e5e 100644
--- a/src/cmd/yacc/yacc.go
+++ b/src/cmd/yacc/yacc.go
@@ -1284,8 +1284,9 @@
 func cpyact(curprod []int, max int) {
 
 	if !lflag {
-		fmt.Fprintf(fcode, "\n\t\t//line %v:%v\n\t\t", infile, lineno)
+		fmt.Fprintf(fcode, "\n\t\t//line %v:%v", infile, lineno)
 	}
+	fmt.Fprint(fcode, "\n\t\t")
 
 	lno := lineno
 	brac := 0
diff --git a/src/compress/flate/flate_test.go b/src/compress/flate/flate_test.go
index 3f67025..341d807 100644
--- a/src/compress/flate/flate_test.go
+++ b/src/compress/flate/flate_test.go
@@ -11,7 +11,9 @@
 import (
 	"bytes"
 	"encoding/hex"
+	"io"
 	"io/ioutil"
+	"strings"
 	"testing"
 )
 
@@ -258,3 +260,15 @@
 		}
 	}
 }
+
+func TestTruncatedStreams(t *testing.T) {
+	const data = "\x00\f\x00\xf3\xffhello, world\x01\x00\x00\xff\xff"
+
+	for i := 0; i < len(data)-1; i++ {
+		r := NewReader(strings.NewReader(data[:i]))
+		_, err := io.Copy(ioutil.Discard, r)
+		if err != io.ErrUnexpectedEOF {
+			t.Errorf("io.Copy(%d) on truncated stream: got %v, want %v", i, err, io.ErrUnexpectedEOF)
+		}
+	}
+}
diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go
index 09f6115..42261e9 100644
--- a/src/compress/flate/inflate.go
+++ b/src/compress/flate/inflate.go
@@ -42,6 +42,8 @@
 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
 
 // A ReadError reports an error encountered while reading input.
+//
+// Deprecated: No longer returned.
 type ReadError struct {
 	Offset int64 // byte offset where error occurred
 	Err    error // error returned by underlying Read
@@ -52,6 +54,8 @@
 }
 
 // A WriteError reports an error encountered while writing output.
+//
+// Deprecated: No longer returned.
 type WriteError struct {
 	Offset int64 // byte offset where error occurred
 	Err    error // error returned by underlying Write
@@ -461,6 +465,14 @@
 		return CorruptInputError(f.roffset)
 	}
 
+	// As an optimization, we can initialize the min bits to read at a time
+	// for the HLIT tree to the length of the EOB marker since we know that
+	// every block must terminate with one. This preserves the property that
+	// we never read any extra bytes after the end of the DEFLATE stream.
+	if f.h1.min < f.bits[endBlockMarker] {
+		f.h1.min = f.bits[endBlockMarker]
+	}
+
 	return nil
 }
 
@@ -637,7 +649,10 @@
 	nr, err := io.ReadFull(f.r, f.buf[0:4])
 	f.roffset += int64(nr)
 	if err != nil {
-		f.err = &ReadError{f.roffset, err}
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		f.err = err
 		return
 	}
 	n := int(f.buf[0]) | int(f.buf[1])<<8
@@ -669,7 +684,10 @@
 		m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
 		f.roffset += int64(m)
 		if err != nil {
-			f.err = &ReadError{f.roffset, err}
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			f.err = err
 			return
 		}
 		n -= m
diff --git a/src/compress/gzip/gunzip.go b/src/compress/gzip/gunzip.go
index dc27653..91473bf 100644
--- a/src/compress/gzip/gunzip.go
+++ b/src/compress/gzip/gunzip.go
@@ -167,6 +167,9 @@
 func (z *Reader) read2() (uint32, error) {
 	_, err := io.ReadFull(z.r, z.buf[0:2])
 	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
 		return 0, err
 	}
 	return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
@@ -175,6 +178,13 @@
 func (z *Reader) readHeader(save bool) error {
 	_, err := io.ReadFull(z.r, z.buf[0:10])
 	if err != nil {
+		// RFC1952 section 2.2 says the following:
+		//	A gzip file consists of a series of "members" (compressed data sets).
+		//
+		// Other than this, the specification does not clarify whether a
+		// "series" is defined as "one or more" or "zero or more". To err on the
+		// side of caution, Go interprets this to mean "zero or more".
+		// Thus, it is okay to return io.EOF here.
 		return err
 	}
 	if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
@@ -196,6 +206,9 @@
 		}
 		data := make([]byte, n)
 		if _, err = io.ReadFull(z.r, data); err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
 			return err
 		}
 		if save {
@@ -260,6 +273,9 @@
 
 	// Finished file; check checksum + size.
 	if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
 		z.err = err
 		return 0, err
 	}
diff --git a/src/compress/gzip/gunzip_test.go b/src/compress/gzip/gunzip_test.go
index 0636dec..007d958 100644
--- a/src/compress/gzip/gunzip_test.go
+++ b/src/compress/gzip/gunzip_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"bytes"
+	"compress/flate"
 	"io"
 	"io/ioutil"
 	"os"
@@ -408,3 +409,34 @@
 		t.Fatalf("third reset: err=%v, want io.EOF", err)
 	}
 }
+
+func TestNilStream(t *testing.T) {
+	// Go liberally interprets RFC1952 section 2.2 to mean that a gzip file
+	// consist of zero or more members. Thus, we test that a nil stream is okay.
+	_, err := NewReader(bytes.NewReader(nil))
+	if err != io.EOF {
+		t.Fatalf("NewReader(nil) on empty stream: got %v, want io.EOF", err)
+	}
+}
+
+func TestTruncatedStreams(t *testing.T) {
+	const data = "\x1f\x8b\b\x04\x00\tn\x88\x00\xff\a\x00foo bar\xcbH\xcd\xc9\xc9\xd7Q(\xcf/\xcaI\x01\x04:r\xab\xff\f\x00\x00\x00"
+
+	// Intentionally iterate starting with at least one byte in the stream.
+	for i := 1; i < len(data)-1; i++ {
+		r, err := NewReader(strings.NewReader(data[:i]))
+		if err != nil {
+			if err != io.ErrUnexpectedEOF {
+				t.Errorf("NewReader(%d) on truncated stream: got %v, want %v", i, err, io.ErrUnexpectedEOF)
+			}
+			continue
+		}
+		_, err = io.Copy(ioutil.Discard, r)
+		if ferr, ok := err.(*flate.ReadError); ok {
+			err = ferr.Err
+		}
+		if err != io.ErrUnexpectedEOF {
+			t.Errorf("io.Copy(%d) on truncated stream: got %v, want %v", i, err, io.ErrUnexpectedEOF)
+		}
+	}
+}
diff --git a/src/compress/lzw/reader.go b/src/compress/lzw/reader.go
index 1353831..9eef2b2 100644
--- a/src/compress/lzw/reader.go
+++ b/src/compress/lzw/reader.go
@@ -132,6 +132,7 @@
 // litWidth is the width in bits of literal codes.
 func (d *decoder) decode() {
 	// Loop over the code stream, converting codes into decompressed bytes.
+loop:
 	for {
 		code, err := d.read(d)
 		if err != nil {
@@ -139,8 +140,7 @@
 				err = io.ErrUnexpectedEOF
 			}
 			d.err = err
-			d.flush()
-			return
+			break
 		}
 		switch {
 		case code < d.clear:
@@ -159,9 +159,8 @@
 			d.last = decoderInvalidCode
 			continue
 		case code == d.eof:
-			d.flush()
 			d.err = io.EOF
-			return
+			break loop
 		case code <= d.hi:
 			c, i := code, len(d.output)-1
 			if code == d.hi {
@@ -191,8 +190,7 @@
 			}
 		default:
 			d.err = errors.New("lzw: invalid code")
-			d.flush()
-			return
+			break loop
 		}
 		d.last, d.hi = code, d.hi+1
 		if d.hi >= d.overflow {
@@ -204,13 +202,10 @@
 			}
 		}
 		if d.o >= flushBuffer {
-			d.flush()
-			return
+			break
 		}
 	}
-}
-
-func (d *decoder) flush() {
+	// Flush pending output.
 	d.toRead = d.output[:d.o]
 	d.o = 0
 }
diff --git a/src/compress/zlib/reader.go b/src/compress/zlib/reader.go
index 816f1bf..78ea704 100644
--- a/src/compress/zlib/reader.go
+++ b/src/compress/zlib/reader.go
@@ -101,6 +101,9 @@
 
 	// Finished file; check checksum.
 	if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
 		z.err = err
 		return 0, err
 	}
@@ -130,6 +133,9 @@
 	}
 	_, err := io.ReadFull(z.r, z.scratch[0:2])
 	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
 		return err
 	}
 	h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
@@ -140,6 +146,9 @@
 	if haveDict {
 		_, err = io.ReadFull(z.r, z.scratch[0:4])
 		if err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
 			return err
 		}
 		checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
diff --git a/src/compress/zlib/reader_test.go b/src/compress/zlib/reader_test.go
index 218ccba..449f446 100644
--- a/src/compress/zlib/reader_test.go
+++ b/src/compress/zlib/reader_test.go
@@ -23,6 +23,30 @@
 
 var zlibTests = []zlibTest{
 	{
+		"truncated empty",
+		"",
+		[]byte{},
+		nil,
+		io.ErrUnexpectedEOF,
+	},
+	{
+		"truncated dict",
+		"",
+		[]byte{0x78, 0xbb},
+		[]byte{0x00},
+		io.ErrUnexpectedEOF,
+	},
+	{
+		"truncated checksum",
+		"",
+		[]byte{0x78, 0xbb, 0x00, 0x01, 0x00, 0x01, 0xca, 0x48,
+			0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x28, 0xcf, 0x2f,
+			0xca, 0x49, 0x01, 0x04, 0x00, 0x00, 0xff, 0xff,
+		},
+		[]byte{0x00},
+		io.ErrUnexpectedEOF,
+	},
+	{
 		"empty",
 		"",
 		[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
diff --git a/src/crypto/tls/tls.go b/src/crypto/tls/tls.go
index fb399d0..2554af6 100644
--- a/src/crypto/tls/tls.go
+++ b/src/crypto/tls/tls.go
@@ -12,6 +12,7 @@
 	"crypto/x509"
 	"encoding/pem"
 	"errors"
+	"fmt"
 	"io/ioutil"
 	"net"
 	"strings"
@@ -182,32 +183,50 @@
 // X509KeyPair parses a public/private key pair from a pair of
 // PEM encoded data.
 func X509KeyPair(certPEMBlock, keyPEMBlock []byte) (Certificate, error) {
-	var cert Certificate
-	var certDERBlock *pem.Block
 	fail := func(err error) (Certificate, error) { return Certificate{}, err }
+
+	var cert Certificate
+	var skippedBlockTypes []string
 	for {
+		var certDERBlock *pem.Block
 		certDERBlock, certPEMBlock = pem.Decode(certPEMBlock)
 		if certDERBlock == nil {
 			break
 		}
 		if certDERBlock.Type == "CERTIFICATE" {
 			cert.Certificate = append(cert.Certificate, certDERBlock.Bytes)
+		} else {
+			skippedBlockTypes = append(skippedBlockTypes, certDERBlock.Type)
 		}
 	}
 
 	if len(cert.Certificate) == 0 {
-		return fail(errors.New("crypto/tls: failed to parse certificate PEM data"))
+		if len(skippedBlockTypes) == 0 {
+			return fail(errors.New("crypto/tls: failed to find any PEM data in certificate input"))
+		} else if len(skippedBlockTypes) == 1 && strings.HasSuffix(skippedBlockTypes[0], "PRIVATE KEY") {
+			return fail(errors.New("crypto/tls: failed to find certificate PEM data in certificate input, but did find a private key; PEM inputs may have been switched"))
+		} else {
+			return fail(fmt.Errorf("crypto/tls: failed to find \"CERTIFICATE\" PEM block in certificate input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+		}
 	}
 
+	skippedBlockTypes = skippedBlockTypes[:0]
 	var keyDERBlock *pem.Block
 	for {
 		keyDERBlock, keyPEMBlock = pem.Decode(keyPEMBlock)
 		if keyDERBlock == nil {
-			return fail(errors.New("crypto/tls: failed to parse key PEM data"))
+			if len(skippedBlockTypes) == 0 {
+				return fail(errors.New("crypto/tls: failed to find any PEM data in key input"))
+			} else if len(skippedBlockTypes) == 1 && skippedBlockTypes[0] == "CERTIFICATE" {
+				return fail(errors.New("crypto/tls: found a certificate rather than a key in the PEM for the private key"))
+			} else {
+				return fail(fmt.Errorf("crypto/tls: failed to find PEM block with type ending in \"PRIVATE KEY\" in key input after skipping PEM blocks of the following types: %v", skippedBlockTypes))
+			}
 		}
 		if keyDERBlock.Type == "PRIVATE KEY" || strings.HasSuffix(keyDERBlock.Type, " PRIVATE KEY") {
 			break
 		}
+		skippedBlockTypes = append(skippedBlockTypes, keyDERBlock.Type)
 	}
 
 	var err error
diff --git a/src/crypto/tls/tls_test.go b/src/crypto/tls/tls_test.go
index c45c103..6b5d455 100644
--- a/src/crypto/tls/tls_test.go
+++ b/src/crypto/tls/tls_test.go
@@ -104,6 +104,38 @@
 	}
 }
 
+func TestX509KeyPairErrors(t *testing.T) {
+	_, err := X509KeyPair([]byte(rsaKeyPEM), []byte(rsaCertPEM))
+	if err == nil {
+		t.Fatalf("X509KeyPair didn't return an error when arguments were switched")
+	}
+	if subStr := "been switched"; !strings.Contains(err.Error(), subStr) {
+		t.Fatalf("Expected %q in the error when switching arguments to X509KeyPair, but the error was %q", subStr, err)
+	}
+
+	_, err = X509KeyPair([]byte(rsaCertPEM), []byte(rsaCertPEM))
+	if err == nil {
+		t.Fatalf("X509KeyPair didn't return an error when both arguments were certificates")
+	}
+	if subStr := "certificate"; !strings.Contains(err.Error(), subStr) {
+		t.Fatalf("Expected %q in the error when both arguments to X509KeyPair were certificates, but the error was %q", subStr, err)
+	}
+
+	const nonsensePEM = `
+-----BEGIN NONSENSE-----
+Zm9vZm9vZm9v
+-----END NONSENSE-----
+`
+
+	_, err = X509KeyPair([]byte(nonsensePEM), []byte(nonsensePEM))
+	if err == nil {
+		t.Fatalf("X509KeyPair didn't return an error when both arguments were nonsense")
+	}
+	if subStr := "NONSENSE"; !strings.Contains(err.Error(), subStr) {
+		t.Fatalf("Expected %q in the error when both arguments to X509KeyPair were nonsense, but the error was %q", subStr, err)
+	}
+}
+
 func TestX509MixedKeyPair(t *testing.T) {
 	if _, err := X509KeyPair([]byte(rsaCertPEM), []byte(ecdsaKeyPEM)); err == nil {
 		t.Error("Load of RSA certificate succeeded with ECDSA private key")
diff --git a/src/crypto/x509/verify.go b/src/crypto/x509/verify.go
index 21b870c..27e9bbf 100644
--- a/src/crypto/x509/verify.go
+++ b/src/crypto/x509/verify.go
@@ -5,6 +5,7 @@
 package x509
 
 import (
+	"errors"
 	"fmt"
 	"net"
 	"runtime"
@@ -122,6 +123,10 @@
 	return "x509: failed to load system roots and no roots provided"
 }
 
+// errNotParsed is returned when a certificate without ASN.1 contents is
+// verified. Platform-specific verification needs the ASN.1 contents.
+var errNotParsed = errors.New("x509: missing ASN.1 contents; use ParseCertificate")
+
 // VerifyOptions contains parameters for Certificate.Verify. It's a structure
 // because other PKIX verification APIs have ended up needing many options.
 type VerifyOptions struct {
@@ -210,6 +215,19 @@
 //
 // WARNING: this doesn't do any revocation checking.
 func (c *Certificate) Verify(opts VerifyOptions) (chains [][]*Certificate, err error) {
+	// Platform-specific verification needs the ASN.1 contents so
+	// this makes the behaviour consistent across platforms.
+	if len(c.Raw) == 0 {
+		return nil, errNotParsed
+	}
+	if opts.Intermediates != nil {
+		for _, intermediate := range opts.Intermediates.certs {
+			if len(intermediate.Raw) == 0 {
+				return nil, errNotParsed
+			}
+		}
+	}
+
 	// Use Windows's own verification and chain building.
 	if opts.Roots == nil && runtime.GOOS == "windows" {
 		return c.systemVerify(&opts)
diff --git a/src/crypto/x509/x509.go b/src/crypto/x509/x509.go
index be6c013..bbc63241 100644
--- a/src/crypto/x509/x509.go
+++ b/src/crypto/x509/x509.go
@@ -56,6 +56,9 @@
 			N: pub.N,
 			E: pub.E,
 		})
+		if err != nil {
+			return nil, pkix.AlgorithmIdentifier{}, err
+		}
 		publicKeyAlgorithm.Algorithm = oidPublicKeyRSA
 		// This is a NULL parameters value which is technically
 		// superfluous, but most other code includes it and, by
@@ -1704,9 +1707,7 @@
 
 	Subject pkix.Name
 
-	// Attributes is a collection of attributes providing
-	// additional information about the subject of the certificate.
-	// See RFC 2986 section 4.1.
+	// Attributes is the dried husk of a bug and shouldn't be used.
 	Attributes []pkix.AttributeTypeAndValueSET
 
 	// Extensions contains raw X.509 extensions. When parsing CSRs, this
@@ -1781,6 +1782,38 @@
 	return attributes
 }
 
+// parseCSRExtensions parses the attributes from a CSR and extracts any
+// requested extensions.
+func parseCSRExtensions(rawAttributes []asn1.RawValue) ([]pkix.Extension, error) {
+	// pkcs10Attribute reflects the Attribute structure from section 4.1 of
+	// https://tools.ietf.org/html/rfc2986.
+	type pkcs10Attribute struct {
+		Id     asn1.ObjectIdentifier
+		Values []asn1.RawValue `asn1:"set"`
+	}
+
+	var ret []pkix.Extension
+	for _, rawAttr := range rawAttributes {
+		var attr pkcs10Attribute
+		if rest, err := asn1.Unmarshal(rawAttr.FullBytes, &attr); err != nil || len(rest) != 0 || len(attr.Values) == 0 {
+			// Ignore attributes that don't parse.
+			continue
+		}
+
+		if !attr.Id.Equal(oidExtensionRequest) {
+			continue
+		}
+
+		var extensions []pkix.Extension
+		if _, err := asn1.Unmarshal(attr.Values[0].FullBytes, &extensions); err != nil {
+			return nil, err
+		}
+		ret = append(ret, extensions...)
+	}
+
+	return ret, nil
+}
+
 // CreateCertificateRequest creates a new certificate based on a template. The
 // following members of template are used: Subject, Attributes,
 // SignatureAlgorithm, Extensions, DNSNames, EmailAddresses, and IPAddresses.
@@ -1983,38 +2016,15 @@
 
 	out.Subject.FillFromRDNSequence(&subject)
 
-	var extensions []pkix.AttributeTypeAndValue
-
-	for _, atvSet := range out.Attributes {
-		if !atvSet.Type.Equal(oidExtensionRequest) {
-			continue
-		}
-
-		for _, atvs := range atvSet.Value {
-			extensions = append(extensions, atvs...)
-		}
+	if out.Extensions, err = parseCSRExtensions(in.TBSCSR.RawAttributes); err != nil {
+		return nil, err
 	}
 
-	out.Extensions = make([]pkix.Extension, 0, len(extensions))
-
-	for _, e := range extensions {
-		value, ok := e.Value.([]byte)
-		if !ok {
-			return nil, errors.New("x509: extension attribute contained non-OCTET STRING data")
-		}
-
-		out.Extensions = append(out.Extensions, pkix.Extension{
-			Id:    e.Type,
-			Value: value,
-		})
-
-		if len(e.Type) == 4 && e.Type[0] == 2 && e.Type[1] == 5 && e.Type[2] == 29 {
-			switch e.Type[3] {
-			case 17:
-				out.DNSNames, out.EmailAddresses, out.IPAddresses, err = parseSANExtension(value)
-				if err != nil {
-					return nil, err
-				}
+	for _, extension := range out.Extensions {
+		if extension.Id.Equal(oidExtensionSubjectAltName) {
+			out.DNSNames, out.EmailAddresses, out.IPAddresses, err = parseSANExtension(extension.Value)
+			if err != nil {
+				return nil, err
 			}
 		}
 	}
diff --git a/src/crypto/x509/x509_test.go b/src/crypto/x509/x509_test.go
index f4f9fa2..b1fe719 100644
--- a/src/crypto/x509/x509_test.go
+++ b/src/crypto/x509/x509_test.go
@@ -504,9 +504,9 @@
 
 	oids := []asn1.ObjectIdentifier{
 		// This OID is in the PKIX arc, but unknown.
-		asn1.ObjectIdentifier{2, 5, 29, 999999},
+		{2, 5, 29, 999999},
 		// This is a nonsense, unassigned OID.
-		asn1.ObjectIdentifier{1, 2, 3, 4},
+		{1, 2, 3, 4},
 	}
 
 	for _, oid := range oids {
@@ -1074,6 +1074,40 @@
 	}
 }
 
+func TestCriticalFlagInCSRRequestedExtensions(t *testing.T) {
+	// This CSR contains an extension request where the extensions have a
+	// critical flag in them. In the past we failed to handle this.
+	const csrBase64 = "MIICrTCCAZUCAQIwMzEgMB4GA1UEAwwXU0NFUCBDQSBmb3IgRGV2ZWxlciBTcmwxDzANBgNVBAsMBjQzNTk3MTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALFMAJ7Zy9YyfgbNlbUWAW0LalNRMPs7aXmLANsCpjhnw3lLlfDPaLeWyKh1nK5I5ojaJOW6KIOSAcJkDUe3rrE0wR0RVt3UxArqs0R/ND3u5Q+bDQY2X1HAFUHzUzcdm5JRAIA355v90teMckaWAIlkRQjDE22Lzc6NAl64KOd1rqOUNj8+PfX6fSo20jm94Pp1+a6mfk3G/RUWVuSm7owO5DZI/Fsi2ijdmb4NUar6K/bDKYTrDFkzcqAyMfP3TitUtBp19Mp3B1yAlHjlbp/r5fSSXfOGHZdgIvp0WkLuK2u5eQrX5l7HMB/5epgUs3HQxKY6ljhh5wAjDwz//LsCAwEAAaA1MDMGCSqGSIb3DQEJDjEmMCQwEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAoQwDQYJKoZIhvcNAQEFBQADggEBAAMq3bxJSPQEgzLYR/yaVvgjCDrc3zUbIwdOis6Go06Q4RnjH5yRaSZAqZQTDsPurQcnz2I39VMGEiSkFJFavf4QHIZ7QFLkyXadMtALc87tm17Ej719SbHcBSSZayR9VYJUNXRLayI6HvyUrmqcMKh+iX3WY3ICr59/wlM0tYa8DYN4yzmOa2Onb29gy3YlaF5A2AKAMmk003cRT9gY26mjpv7d21czOSSeNyVIoZ04IR9ee71vWTMdv0hu/af5kSjQ+ZG5/Qgc0+mnECLz/1gtxt1srLYbtYQ/qAY8oX1DCSGFS61tN/vl+4cxGMD/VGcGzADRLRHSlVqy2Qgss6Q="
+
+	csrBytes := fromBase64(csrBase64)
+	csr, err := ParseCertificateRequest(csrBytes)
+	if err != nil {
+		t.Fatalf("failed to parse CSR: %s", err)
+	}
+
+	expected := []struct {
+		Id    asn1.ObjectIdentifier
+		Value []byte
+	}{
+		{oidExtensionBasicConstraints, fromBase64("MAYBAf8CAQA=")},
+		{oidExtensionKeyUsage, fromBase64("AwIChA==")},
+	}
+
+	if n := len(csr.Extensions); n != len(expected) {
+		t.Fatalf("expected to find %d extensions but found %d", len(expected), n)
+	}
+
+	for i, extension := range csr.Extensions {
+		if !extension.Id.Equal(expected[i].Id) {
+			t.Fatalf("extension #%d has unexpected type %v (expected %v)", i, extension.Id, expected[i].Id)
+		}
+
+		if !bytes.Equal(extension.Value, expected[i].Value) {
+			t.Fatalf("extension #%d has unexpected contents %x (expected %x)", i, extension.Value, expected[i].Value)
+		}
+	}
+}
+
 func TestMaxPathLen(t *testing.T) {
 	block, _ := pem.Decode([]byte(pemPrivateKey))
 	rsaPriv, err := ParsePKCS1PrivateKey(block.Bytes)
@@ -1159,6 +1193,12 @@
 	}
 }
 
+func TestVerifyEmptyCertificate(t *testing.T) {
+	if _, err := new(Certificate).Verify(VerifyOptions{}); err != errNotParsed {
+		t.Errorf("Verifying empty certificate resulted in unexpected error: %q (wanted %q)", err, errNotParsed)
+	}
+}
+
 // These CSR was generated with OpenSSL:
 //  openssl req -out CSR.csr -new -sha256 -nodes -keyout privateKey.key -config openssl.cnf
 //
diff --git a/src/database/sql/convert.go b/src/database/sql/convert.go
index c0b38a2..bba5a88 100644
--- a/src/database/sql/convert.go
+++ b/src/database/sql/convert.go
@@ -203,11 +203,16 @@
 	}
 
 	dv := reflect.Indirect(dpv)
-	if dv.Kind() == sv.Kind() {
+	if sv.IsValid() && sv.Type().AssignableTo(dv.Type()) {
 		dv.Set(sv)
 		return nil
 	}
 
+	if dv.Kind() == sv.Kind() && sv.Type().ConvertibleTo(dv.Type()) {
+		dv.Set(sv.Convert(dv.Type()))
+		return nil
+	}
+
 	switch dv.Kind() {
 	case reflect.Ptr:
 		if src == nil {
diff --git a/src/database/sql/convert_test.go b/src/database/sql/convert_test.go
index 98af9fb..1fab282 100644
--- a/src/database/sql/convert_test.go
+++ b/src/database/sql/convert_test.go
@@ -16,23 +16,28 @@
 var someTime = time.Unix(123, 0)
 var answer int64 = 42
 
+type userDefined float64
+
+type userDefinedSlice []int
+
 type conversionTest struct {
 	s, d interface{} // source and destination
 
 	// following are used if they're non-zero
-	wantint   int64
-	wantuint  uint64
-	wantstr   string
-	wantbytes []byte
-	wantraw   RawBytes
-	wantf32   float32
-	wantf64   float64
-	wanttime  time.Time
-	wantbool  bool // used if d is of type *bool
-	wanterr   string
-	wantiface interface{}
-	wantptr   *int64 // if non-nil, *d's pointed value must be equal to *wantptr
-	wantnil   bool   // if true, *d must be *int64(nil)
+	wantint    int64
+	wantuint   uint64
+	wantstr    string
+	wantbytes  []byte
+	wantraw    RawBytes
+	wantf32    float32
+	wantf64    float64
+	wanttime   time.Time
+	wantbool   bool // used if d is of type *bool
+	wanterr    string
+	wantiface  interface{}
+	wantptr    *int64 // if non-nil, *d's pointed value must be equal to *wantptr
+	wantnil    bool   // if true, *d must be *int64(nil)
+	wantusrdef userDefined
 }
 
 // Target variables for scanning into.
@@ -145,6 +150,12 @@
 	{s: true, d: &scaniface, wantiface: true},
 	{s: nil, d: &scaniface},
 	{s: []byte(nil), d: &scaniface, wantiface: []byte(nil)},
+
+	// To a user-defined type
+	{s: 1.5, d: new(userDefined), wantusrdef: 1.5},
+	{s: int64(123), d: new(userDefined), wantusrdef: 123},
+	{s: "1.5", d: new(userDefined), wantusrdef: 1.5},
+	{s: []byte{1, 2, 3}, d: new(userDefinedSlice), wanterr: `unsupported driver -> Scan pair: []uint8 -> *sql.userDefinedSlice`},
 }
 
 func intPtrValue(intptr interface{}) interface{} {
@@ -228,6 +239,9 @@
 				}
 			}
 		}
+		if ct.wantusrdef != 0 && ct.wantusrdef != *ct.d.(*userDefined) {
+			errf("want userDefined %f, got %f", ct.wantusrdef, *ct.d.(*userDefined))
+		}
 	}
 }
 
diff --git a/src/database/sql/fakedb_test.go b/src/database/sql/fakedb_test.go
index 112f280e..f1e8f6c 100644
--- a/src/database/sql/fakedb_test.go
+++ b/src/database/sql/fakedb_test.go
@@ -153,12 +153,32 @@
 	}
 }
 
+// hook to simulate connection failures
+var hookOpenErr struct {
+	sync.Mutex
+	fn func() error
+}
+
+func setHookOpenErr(fn func() error) {
+	hookOpenErr.Lock()
+	defer hookOpenErr.Unlock()
+	hookOpenErr.fn = fn
+}
+
 // Supports dsn forms:
 //    <dbname>
 //    <dbname>;<opts>  (only currently supported option is `badConn`,
 //                      which causes driver.ErrBadConn to be returned on
 //                      every other conn.Begin())
 func (d *fakeDriver) Open(dsn string) (driver.Conn, error) {
+	hookOpenErr.Lock()
+	fn := hookOpenErr.fn
+	hookOpenErr.Unlock()
+	if fn != nil {
+		if err := fn(); err != nil {
+			return nil, err
+		}
+	}
 	parts := strings.Split(dsn, ";")
 	if len(parts) < 1 {
 		return nil, errors.New("fakedb: no database name")
diff --git a/src/database/sql/sql.go b/src/database/sql/sql.go
index 0120ae8..f3fed95 100644
--- a/src/database/sql/sql.go
+++ b/src/database/sql/sql.go
@@ -229,8 +229,7 @@
 	mu           sync.Mutex // protects following fields
 	freeConn     []*driverConn
 	connRequests []chan connRequest
-	numOpen      int
-	pendingOpens int
+	numOpen      int // number of opened and pending open connections
 	// Used to signal the need for new connections
 	// a goroutine running connectionOpener() reads on this chan and
 	// maybeOpenNewConnections sends on the chan (one send per needed connection)
@@ -441,7 +440,7 @@
 	}
 }
 
-// This is the size of the connectionOpener request chan (dn.openerCh).
+// This is the size of the connectionOpener request chan (DB.openerCh).
 // This value should be larger than the maximum typical value
 // used for db.maxOpen. If maxOpen is significantly larger than
 // connectionRequestQueueSize then it is possible for ALL calls into the *DB
@@ -615,15 +614,15 @@
 // If there are connRequests and the connection limit hasn't been reached,
 // then tell the connectionOpener to open new connections.
 func (db *DB) maybeOpenNewConnections() {
-	numRequests := len(db.connRequests) - db.pendingOpens
+	numRequests := len(db.connRequests)
 	if db.maxOpen > 0 {
-		numCanOpen := db.maxOpen - (db.numOpen + db.pendingOpens)
+		numCanOpen := db.maxOpen - db.numOpen
 		if numRequests > numCanOpen {
 			numRequests = numCanOpen
 		}
 	}
 	for numRequests > 0 {
-		db.pendingOpens++
+		db.numOpen++ // optimistically
 		numRequests--
 		db.openerCh <- struct{}{}
 	}
@@ -638,6 +637,9 @@
 
 // Open one new connection
 func (db *DB) openNewConnection() {
+	// maybeOpenNewConnctions has already executed db.numOpen++ before it sent
+	// on db.openerCh. This function must execute db.numOpen-- if the
+	// connection fails or is closed before returning.
 	ci, err := db.driver.Open(db.dsn)
 	db.mu.Lock()
 	defer db.mu.Unlock()
@@ -645,11 +647,13 @@
 		if err == nil {
 			ci.Close()
 		}
+		db.numOpen--
 		return
 	}
-	db.pendingOpens--
 	if err != nil {
+		db.numOpen--
 		db.putConnDBLocked(nil, err)
+		db.maybeOpenNewConnections()
 		return
 	}
 	dc := &driverConn{
@@ -658,8 +662,8 @@
 	}
 	if db.putConnDBLocked(dc, err) {
 		db.addDepLocked(dc, dc)
-		db.numOpen++
 	} else {
+		db.numOpen--
 		ci.Close()
 	}
 }
@@ -701,7 +705,10 @@
 		req := make(chan connRequest, 1)
 		db.connRequests = append(db.connRequests, req)
 		db.mu.Unlock()
-		ret := <-req
+		ret, ok := <-req
+		if !ok {
+			return nil, errDBClosed
+		}
 		return ret.conn, ret.err
 	}
 
@@ -711,6 +718,7 @@
 	if err != nil {
 		db.mu.Lock()
 		db.numOpen-- // correct for earlier optimism
+		db.maybeOpenNewConnections()
 		db.mu.Unlock()
 		return nil, err
 	}
@@ -1576,9 +1584,9 @@
 	s.closed = true
 
 	if s.tx != nil {
-		s.txsi.Close()
+		err := s.txsi.Close()
 		s.mu.Unlock()
-		return nil
+		return err
 	}
 	s.mu.Unlock()
 
diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go
index b4135a3..d835bc1 100644
--- a/src/database/sql/sql_test.go
+++ b/src/database/sql/sql_test.go
@@ -356,6 +356,44 @@
 	}
 }
 
+type stubDriverStmt struct {
+	err error
+}
+
+func (s stubDriverStmt) Close() error {
+	return s.err
+}
+
+func (s stubDriverStmt) NumInput() int {
+	return -1
+}
+
+func (s stubDriverStmt) Exec(args []driver.Value) (driver.Result, error) {
+	return nil, nil
+}
+
+func (s stubDriverStmt) Query(args []driver.Value) (driver.Rows, error) {
+	return nil, nil
+}
+
+// golang.org/issue/12798
+func TestStatementClose(t *testing.T) {
+	want := errors.New("STMT ERROR")
+
+	tests := []struct {
+		stmt *Stmt
+		msg  string
+	}{
+		{&Stmt{stickyErr: want}, "stickyErr not propagated"},
+		{&Stmt{tx: &Tx{}, txsi: &driverStmt{&sync.Mutex{}, stubDriverStmt{want}}}, "driverStmt.Close() error not propagated"},
+	}
+	for _, test := range tests {
+		if err := test.stmt.Close(); err != want {
+			t.Errorf("%s. Got stmt.Close() = %v, want = %v", test.msg, err, want)
+		}
+	}
+}
+
 // golang.org/issue/3734
 func TestStatementQueryRowConcurrent(t *testing.T) {
 	db := newTestDB(t, "people")
@@ -1121,6 +1159,67 @@
 	}
 }
 
+// Issue 10886: tests that all connection attempts return when more than
+// DB.maxOpen connections are in flight and the first DB.maxOpen fail.
+func TestPendingConnsAfterErr(t *testing.T) {
+	const (
+		maxOpen = 2
+		tryOpen = maxOpen*2 + 2
+	)
+
+	db := newTestDB(t, "people")
+	defer closeDB(t, db)
+	defer func() {
+		for k, v := range db.lastPut {
+			t.Logf("%p: %v", k, v)
+		}
+	}()
+
+	db.SetMaxOpenConns(maxOpen)
+	db.SetMaxIdleConns(0)
+
+	errOffline := errors.New("db offline")
+	defer func() { setHookOpenErr(nil) }()
+
+	errs := make(chan error, tryOpen)
+
+	unblock := make(chan struct{})
+	setHookOpenErr(func() error {
+		<-unblock // block until all connections are in flight
+		return errOffline
+	})
+
+	var opening sync.WaitGroup
+	opening.Add(tryOpen)
+	for i := 0; i < tryOpen; i++ {
+		go func() {
+			opening.Done() // signal one connection is in flight
+			_, err := db.Exec("INSERT|people|name=Julia,age=19")
+			errs <- err
+		}()
+	}
+
+	opening.Wait()                    // wait for all workers to begin running
+	time.Sleep(10 * time.Millisecond) // make extra sure all workers are blocked
+	close(unblock)                    // let all workers proceed
+
+	const timeout = 100 * time.Millisecond
+	to := time.NewTimer(timeout)
+	defer to.Stop()
+
+	// check that all connections fail without deadlock
+	for i := 0; i < tryOpen; i++ {
+		select {
+		case err := <-errs:
+			if got, want := err, errOffline; got != want {
+				t.Errorf("unexpected err: got %v, want %v", got, want)
+			}
+		case <-to.C:
+			t.Fatalf("orphaned connection request(s), still waiting after %v", timeout)
+		}
+	}
+}
+
 func TestSingleOpenConn(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
diff --git a/src/debug/dwarf/class_string.go b/src/debug/dwarf/class_string.go
index 0b1206b..d57d9f7 100644
--- a/src/debug/dwarf/class_string.go
+++ b/src/debug/dwarf/class_string.go
@@ -4,14 +4,13 @@
 
 import "fmt"
 
-const _Class_name = "ClassAddressClassBlockClassConstantClassExprLocClassFlagClassLinePtrClassLocListPtrClassMacPtrClassRangeListPtrClassReferenceClassReferenceSigClassStringClassReferenceAltClassStringAlt"
+const _Class_name = "ClassUnknownClassAddressClassBlockClassConstantClassExprLocClassFlagClassLinePtrClassLocListPtrClassMacPtrClassRangeListPtrClassReferenceClassReferenceSigClassStringClassReferenceAltClassStringAlt"
 
-var _Class_index = [...]uint8{0, 12, 22, 35, 47, 56, 68, 83, 94, 111, 125, 142, 153, 170, 184}
+var _Class_index = [...]uint8{0, 12, 24, 34, 47, 59, 68, 80, 95, 106, 123, 137, 154, 165, 182, 196}
 
 func (i Class) String() string {
-	i -= 1
 	if i < 0 || i+1 >= Class(len(_Class_index)) {
-		return fmt.Sprintf("Class(%d)", i+1)
+		return fmt.Sprintf("Class(%d)", i)
 	}
 	return _Class_name[_Class_index[i]:_Class_index[i+1]]
 }
diff --git a/src/debug/dwarf/entry.go b/src/debug/dwarf/entry.go
index d607e5b..5ca8667 100644
--- a/src/debug/dwarf/entry.go
+++ b/src/debug/dwarf/entry.go
@@ -193,8 +193,7 @@
 		if class, ok := attrPtrClass[attr]; ok {
 			return class
 		}
-		b.error("cannot determine class of unknown attribute with formSecOffset")
-		return 0
+		return ClassUnknown
 
 	case formExprloc:
 		return ClassExprLoc
@@ -235,6 +234,9 @@
 //    loclistptr        int64          ClassLocListPtr
 //    macptr            int64          ClassMacPtr
 //    rangelistptr      int64          ClassRangeListPtr
+//
+// For unrecognized or vendor-defined attributes, Class may be
+// ClassUnknown.
 type Field struct {
 	Attr  Attr
 	Val   interface{}
@@ -258,9 +260,12 @@
 type Class int
 
 const (
+	// ClassUnknown represents values of unknown DWARF class.
+	ClassUnknown Class = iota
+
 	// ClassAddress represents values of type uint64 that are
 	// addresses on the target machine.
-	ClassAddress Class = 1 + iota
+	ClassAddress
 
 	// ClassBlock represents values of type []byte whose
 	// interpretation depends on the attribute.
diff --git a/src/debug/dwarf/entry_test.go b/src/debug/dwarf/entry_test.go
new file mode 100644
index 0000000..8bd2d2a
--- /dev/null
+++ b/src/debug/dwarf/entry_test.go
@@ -0,0 +1,36 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dwarf_test
+
+import (
+	. "debug/dwarf"
+	"testing"
+)
+
+func TestSplit(t *testing.T) {
+	// debug/dwarf doesn't (currently) support split DWARF, but
+	// the attributes that pointed to the split DWARF used to
+	// cause loading the DWARF data to fail entirely (issue
+	// #12592). Test that we can at least read the DWARF data.
+	d := elfData(t, "testdata/split.elf")
+	r := d.Reader()
+	e, err := r.Next()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if e.Tag != TagCompileUnit {
+		t.Fatalf("bad tag: have %s, want %s", e.Tag, TagCompileUnit)
+	}
+	// Check that we were able to parse the unknown section offset
+	// field, even if we can't figure out its DWARF class.
+	const AttrGNUAddrBase Attr = 0x2133
+	f := e.AttrField(AttrGNUAddrBase)
+	if _, ok := f.Val.(int64); !ok {
+		t.Fatalf("bad attribute value type: have %T, want int64", f.Val)
+	}
+	if f.Class != ClassUnknown {
+		t.Fatalf("bad class: have %s, want %s", f.Class, ClassUnknown)
+	}
+}
diff --git a/src/debug/dwarf/testdata/split.c b/src/debug/dwarf/testdata/split.c
new file mode 100644
index 0000000..0ef3427
--- /dev/null
+++ b/src/debug/dwarf/testdata/split.c
@@ -0,0 +1,5 @@
+// gcc -gsplit-dwarf split.c -o split.elf
+
+int main() 
+{
+}
diff --git a/src/debug/dwarf/testdata/split.elf b/src/debug/dwarf/testdata/split.elf
new file mode 100644
index 0000000..99ee2c2
--- /dev/null
+++ b/src/debug/dwarf/testdata/split.elf
Binary files differ
diff --git a/src/debug/gosym/pclntab_test.go b/src/debug/gosym/pclntab_test.go
index 53f3e95..8d4aa54 100644
--- a/src/debug/gosym/pclntab_test.go
+++ b/src/debug/gosym/pclntab_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"debug/elf"
+	"internal/testenv"
 	"io/ioutil"
 	"os"
 	"os/exec"
@@ -20,25 +21,16 @@
 	pclinetestBinary string
 )
 
-func dotest(self bool) bool {
+func dotest(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
 	// For now, only works on amd64 platforms.
 	if runtime.GOARCH != "amd64" {
-		return false
-	}
-	// Self test reads test binary; only works on Linux.
-	if self && runtime.GOOS != "linux" {
-		return false
-	}
-	if pclinetestBinary != "" {
-		return true
+		t.Skipf("skipping on non-AMD64 system %s", runtime.GOARCH)
 	}
 	var err error
 	pclineTempDir, err = ioutil.TempDir("", "pclinetest")
 	if err != nil {
-		panic(err)
-	}
-	if strings.Contains(pclineTempDir, " ") {
-		panic("unexpected space in tempdir")
+		t.Fatal(err)
 	}
 	// This command builds pclinetest from pclinetest.asm;
 	// the resulting binary looks like it was built from pclinetest.s,
@@ -48,16 +40,15 @@
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
-		panic(err)
+		t.Fatal(err)
 	}
 	cmd = exec.Command("go", "tool", "link", "-H", "linux", "-E", "main",
 		"-o", pclinetestBinary, pclinetestBinary+".o")
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
-		panic(err)
+		t.Fatal(err)
 	}
-	return true
 }
 
 func endtest() {
@@ -68,6 +59,17 @@
 	}
 }
 
+// skipIfNotELF skips the test if we are not running on an ELF system.
+// These tests open and examine the test binary, and use elf.Open to do so.
+func skipIfNotELF(t *testing.T) {
+	switch runtime.GOOS {
+	case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris":
+		// OK.
+	default:
+		t.Skipf("skipping on non-ELF system %s", runtime.GOOS)
+	}
+}
+
 func getTable(t *testing.T) *Table {
 	f, tab := crack(os.Args[0], t)
 	f.Close()
@@ -112,10 +114,7 @@
 var goarch = os.Getenv("O")
 
 func TestLineFromAline(t *testing.T) {
-	if !dotest(true) {
-		return
-	}
-	defer endtest()
+	skipIfNotELF(t)
 
 	tab := getTable(t)
 	if tab.go12line != nil {
@@ -164,10 +163,7 @@
 }
 
 func TestLineAline(t *testing.T) {
-	if !dotest(true) {
-		return
-	}
-	defer endtest()
+	skipIfNotELF(t)
 
 	tab := getTable(t)
 	if tab.go12line != nil {
@@ -210,9 +206,7 @@
 }
 
 func TestPCLine(t *testing.T) {
-	if !dotest(false) {
-		return
-	}
+	dotest(t)
 	defer endtest()
 
 	f, tab := crack(pclinetestBinary, t)
diff --git a/src/encoding/base64/base64_test.go b/src/encoding/base64/base64_test.go
index d144b96..4bbb2dd 100644
--- a/src/encoding/base64/base64_test.go
+++ b/src/encoding/base64/base64_test.go
@@ -80,11 +80,11 @@
 }
 
 var encodingTests = []encodingTest{
-	encodingTest{StdEncoding, stdRef},
-	encodingTest{URLEncoding, urlRef},
-	encodingTest{RawStdEncoding, rawRef},
-	encodingTest{RawURLEncoding, rawUrlRef},
-	encodingTest{funnyEncoding, funnyRef},
+	{StdEncoding, stdRef},
+	{URLEncoding, urlRef},
+	{RawStdEncoding, rawRef},
+	{RawURLEncoding, rawUrlRef},
+	{funnyEncoding, funnyRef},
 }
 
 var bigtest = testpair{
diff --git a/src/encoding/binary/binary.go b/src/encoding/binary/binary.go
index 2bbe07c..1c2577b 100644
--- a/src/encoding/binary/binary.go
+++ b/src/encoding/binary/binary.go
@@ -135,6 +135,10 @@
 // blank (_) field names is skipped; i.e., blank field names
 // may be used for padding.
 // When reading into a struct, all non-blank fields must be exported.
+//
+// The error is EOF only if no bytes were read.
+// If an EOF happens after reading some but not all the bytes,
+// Read returns ErrUnexpectedEOF.
 func Read(r io.Reader, order ByteOrder, data interface{}) error {
 	// Fast path for basic types and slices.
 	if n := intDataSize(data); n != 0 {
diff --git a/src/encoding/binary/binary_test.go b/src/encoding/binary/binary_test.go
index 8ee595f..7fd36fa 100644
--- a/src/encoding/binary/binary_test.go
+++ b/src/encoding/binary/binary_test.go
@@ -309,6 +309,36 @@
 	read(&p)
 }
 
+func TestReadTruncated(t *testing.T) {
+	const data = "0123456789abcdef"
+
+	var b1 = make([]int32, 4)
+	var b2 struct {
+		A, B, C, D byte
+		E          int32
+		F          float64
+	}
+
+	for i := 0; i <= len(data); i++ {
+		var errWant error
+		switch i {
+		case 0:
+			errWant = io.EOF
+		case len(data):
+			errWant = nil
+		default:
+			errWant = io.ErrUnexpectedEOF
+		}
+
+		if err := Read(strings.NewReader(data[:i]), LittleEndian, &b1); err != errWant {
+			t.Errorf("Read(%d) with slice: got %v, want %v", i, err, errWant)
+		}
+		if err := Read(strings.NewReader(data[:i]), LittleEndian, &b2); err != errWant {
+			t.Errorf("Read(%d) with struct: got %v, want %v", i, err, errWant)
+		}
+	}
+}
+
 type byteSliceReader struct {
 	remain []byte
 }
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index 37bf80c..a6bb780 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go
@@ -155,7 +155,7 @@
 
 // ReadAll reads all the remaining records from r.
 // Each record is a slice of fields.
-// A successful call returns err == nil, not err == EOF. Because ReadAll is
+// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
 // defined to read until EOF, it does not treat end of file as an error to be
 // reported.
 func (r *Reader) ReadAll() (records [][]string, err error) {
diff --git a/src/encoding/gob/doc.go b/src/encoding/gob/doc.go
index 4d3d007..481c757 100644
--- a/src/encoding/gob/doc.go
+++ b/src/encoding/gob/doc.go
@@ -82,6 +82,12 @@
 allocated. Regardless, the length of the resulting slice reports the number of
 elements decoded.
 
+In general, if allocation is required, the decoder will allocate memory. If not,
+it will update the destination variables with values read from the stream. It does
+not initialize them first, so if the destination is a compound value such as a
+map, struct, or slice, the decoded values will be merged elementwise into the
+existing variables.
+
 Functions and channels will not be sent in a gob. Attempting to encode such a value
 at the top level will fail. A struct field of chan or func type is treated exactly
 like an unexported field and is ignored.
diff --git a/src/encoding/gob/type.go b/src/encoding/gob/type.go
index a49b71a..cf5cec0 100644
--- a/src/encoding/gob/type.go
+++ b/src/encoding/gob/type.go
@@ -787,7 +787,7 @@
 // contain things such as private fields, channels, and functions,
 // which are not usually transmissible in gob streams.
 //
-// Note: Since gobs can be stored permanently, It is good design
+// Note: Since gobs can be stored permanently, it is good design
 // to guarantee the encoding used by a GobEncoder is stable as the
 // software evolves.  For instance, it might make sense for GobEncode
 // to include a version number in the encoding.
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 530e852..e7e8d0b 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -57,7 +57,7 @@
 //
 // If a JSON value is not appropriate for a given target type,
 // or if a JSON number overflows the target type, Unmarshal
-// skips that field and completes the unmarshalling as best it can.
+// skips that field and completes the unmarshaling as best it can.
 // If no more serious errors are encountered, Unmarshal returns
 // an UnmarshalTypeError describing the earliest such error.
 //
@@ -241,7 +241,7 @@
 			newOp = d.scan.eof()
 			d.off = len(d.data) + 1 // mark processed EOF with len+1
 		} else {
-			c := int(d.data[d.off])
+			c := d.data[d.off]
 			d.off++
 			newOp = d.scan.step(&d.scan, c)
 		}
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go
index 8aa158f..e9e00e5 100644
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -716,7 +716,7 @@
 }
 
 func noSpace(c rune) rune {
-	if isSpace(c) {
+	if isSpace(byte(c)) { //only used for ascii
 		return -1
 	}
 	return c
@@ -1206,12 +1206,12 @@
 
 	data, err := Marshal(m1)
 	if err != nil {
-		t.Errorf("Unexpected error marshalling: %v", err)
+		t.Errorf("Unexpected error marshaling: %v", err)
 	}
 
 	err = Unmarshal(data, &m2)
 	if err != nil {
-		t.Errorf("Unexpected error unmarshalling: %v", err)
+		t.Errorf("Unexpected error unmarshaling: %v", err)
 	}
 
 	if !reflect.DeepEqual(m1, m2) {
diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go
index 90782de..60d1c90 100644
--- a/src/encoding/json/encode.go
+++ b/src/encoding/json/encode.go
@@ -30,7 +30,10 @@
 // Marshal traverses the value v recursively.
 // If an encountered value implements the Marshaler interface
 // and is not a nil pointer, Marshal calls its MarshalJSON method
-// to produce JSON.  The nil pointer exception is not strictly necessary
+// to produce JSON. If no MarshalJSON method is present but the
+// value implements encoding.TextMarshaler instead, Marshal calls
+// its MarshalText method.
+// The nil pointer exception is not strictly necessary
 // but mimics a similar, necessary exception in the behavior of
 // UnmarshalJSON.
 //
@@ -445,12 +448,10 @@
 	}
 	m := v.Interface().(encoding.TextMarshaler)
 	b, err := m.MarshalText()
-	if err == nil {
-		_, err = e.stringBytes(b)
-	}
 	if err != nil {
 		e.error(&MarshalerError{v.Type(), err})
 	}
+	e.stringBytes(b)
 }
 
 func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, quoted bool) {
@@ -461,12 +462,10 @@
 	}
 	m := va.Interface().(encoding.TextMarshaler)
 	b, err := m.MarshalText()
-	if err == nil {
-		_, err = e.stringBytes(b)
-	}
 	if err != nil {
 		e.error(&MarshalerError{v.Type(), err})
 	}
+	e.stringBytes(b)
 }
 
 func boolEncoder(e *encodeState, v reflect.Value, quoted bool) {
@@ -780,7 +779,7 @@
 func (sv stringValues) get(i int) string   { return sv[i].String() }
 
 // NOTE: keep in sync with stringBytes below.
-func (e *encodeState) string(s string) (int, error) {
+func (e *encodeState) string(s string) int {
 	len0 := e.Len()
 	e.WriteByte('"')
 	start := 0
@@ -852,11 +851,11 @@
 		e.WriteString(s[start:])
 	}
 	e.WriteByte('"')
-	return e.Len() - len0, nil
+	return e.Len() - len0
 }
 
 // NOTE: keep in sync with string above.
-func (e *encodeState) stringBytes(s []byte) (int, error) {
+func (e *encodeState) stringBytes(s []byte) int {
 	len0 := e.Len()
 	e.WriteByte('"')
 	start := 0
@@ -928,7 +927,7 @@
 		e.Write(s[start:])
 	}
 	e.WriteByte('"')
-	return e.Len() - len0, nil
+	return e.Len() - len0
 }
 
 // A field represents a single field found in a struct.
diff --git a/src/encoding/json/encode_test.go b/src/encoding/json/encode_test.go
index 7abfa85..2206b2e 100644
--- a/src/encoding/json/encode_test.go
+++ b/src/encoding/json/encode_test.go
@@ -381,16 +381,10 @@
 		r = append(r, i)
 	}
 	s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
-	_, err := es.string(s)
-	if err != nil {
-		t.Fatal(err)
-	}
+	es.string(s)
 
 	esBytes := &encodeState{}
-	_, err = esBytes.stringBytes([]byte(s))
-	if err != nil {
-		t.Fatal(err)
-	}
+	esBytes.stringBytes([]byte(s))
 
 	enc := es.Buffer.String()
 	encBytes := esBytes.Buffer.String()
diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go
index e1bacafd6..153109f 100644
--- a/src/encoding/json/indent.go
+++ b/src/encoding/json/indent.go
@@ -36,7 +36,7 @@
 			dst.WriteByte(hex[src[i+2]&0xF])
 			start = i + 3
 		}
-		v := scan.step(&scan, int(c))
+		v := scan.step(&scan, c)
 		if v >= scanSkipSpace {
 			if v == scanError {
 				break
@@ -80,7 +80,7 @@
 	depth := 0
 	for _, c := range src {
 		scan.bytes++
-		v := scan.step(&scan, int(c))
+		v := scan.step(&scan, c)
 		if v == scanSkipSpace {
 			continue
 		}
diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go
index 38d0b08..ee6622e 100644
--- a/src/encoding/json/scanner.go
+++ b/src/encoding/json/scanner.go
@@ -21,7 +21,7 @@
 	scan.reset()
 	for _, c := range data {
 		scan.bytes++
-		if scan.step(scan, int(c)) == scanError {
+		if scan.step(scan, c) == scanError {
 			return scan.err
 		}
 	}
@@ -37,7 +37,7 @@
 func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
 	scan.reset()
 	for i, c := range data {
-		v := scan.step(scan, int(c))
+		v := scan.step(scan, c)
 		if v >= scanEndObject {
 			switch v {
 			// probe the scanner with a space to determine whether we will
@@ -50,7 +50,7 @@
 			case scanError:
 				return nil, nil, scan.err
 			case scanEnd:
-				return data[0:i], data[i:], nil
+				return data[:i], data[i:], nil
 			}
 		}
 	}
@@ -85,7 +85,7 @@
 	// Also tried using an integer constant and a single func
 	// with a switch, but using the func directly was 10% faster
 	// on a 64-bit Mac Mini, and it's nicer to read.
-	step func(*scanner, int) int
+	step func(*scanner, byte) int
 
 	// Reached end of top-level value.
 	endTop bool
@@ -99,7 +99,7 @@
 	// 1-byte redo (see undo method)
 	redo      bool
 	redoCode  int
-	redoState func(*scanner, int) int
+	redoState func(*scanner, byte) int
 
 	// total bytes consumed, updated by decoder.Decode
 	bytes int64
@@ -188,13 +188,13 @@
 	}
 }
 
-func isSpace(c rune) bool {
+func isSpace(c byte) bool {
 	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
 }
 
 // stateBeginValueOrEmpty is the state after reading `[`.
-func stateBeginValueOrEmpty(s *scanner, c int) int {
-	if c <= ' ' && isSpace(rune(c)) {
+func stateBeginValueOrEmpty(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
 		return scanSkipSpace
 	}
 	if c == ']' {
@@ -204,8 +204,8 @@
 }
 
 // stateBeginValue is the state at the beginning of the input.
-func stateBeginValue(s *scanner, c int) int {
-	if c <= ' ' && isSpace(rune(c)) {
+func stateBeginValue(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
 		return scanSkipSpace
 	}
 	switch c {
@@ -244,8 +244,8 @@
 }
 
 // stateBeginStringOrEmpty is the state after reading `{`.
-func stateBeginStringOrEmpty(s *scanner, c int) int {
-	if c <= ' ' && isSpace(rune(c)) {
+func stateBeginStringOrEmpty(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
 		return scanSkipSpace
 	}
 	if c == '}' {
@@ -257,8 +257,8 @@
 }
 
 // stateBeginString is the state after reading `{"key": value,`.
-func stateBeginString(s *scanner, c int) int {
-	if c <= ' ' && isSpace(rune(c)) {
+func stateBeginString(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
 		return scanSkipSpace
 	}
 	if c == '"' {
@@ -270,7 +270,7 @@
 
 // stateEndValue is the state after completing a value,
 // such as after reading `{}` or `true` or `["x"`.
-func stateEndValue(s *scanner, c int) int {
+func stateEndValue(s *scanner, c byte) int {
 	n := len(s.parseState)
 	if n == 0 {
 		// Completed top-level before the current byte.
@@ -278,7 +278,7 @@
 		s.endTop = true
 		return stateEndTop(s, c)
 	}
-	if c <= ' ' && isSpace(rune(c)) {
+	if c <= ' ' && isSpace(c) {
 		s.step = stateEndValue
 		return scanSkipSpace
 	}
@@ -319,7 +319,7 @@
 // stateEndTop is the state after finishing the top-level value,
 // such as after reading `{}` or `[1,2,3]`.
 // Only space characters should be seen now.
-func stateEndTop(s *scanner, c int) int {
+func stateEndTop(s *scanner, c byte) int {
 	if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
 		// Complain about non-space byte on next call.
 		s.error(c, "after top-level value")
@@ -328,7 +328,7 @@
 }
 
 // stateInString is the state after reading `"`.
-func stateInString(s *scanner, c int) int {
+func stateInString(s *scanner, c byte) int {
 	if c == '"' {
 		s.step = stateEndValue
 		return scanContinue
@@ -344,13 +344,12 @@
 }
 
 // stateInStringEsc is the state after reading `"\` during a quoted string.
-func stateInStringEsc(s *scanner, c int) int {
+func stateInStringEsc(s *scanner, c byte) int {
 	switch c {
 	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
 		s.step = stateInString
 		return scanContinue
-	}
-	if c == 'u' {
+	case 'u':
 		s.step = stateInStringEscU
 		return scanContinue
 	}
@@ -358,7 +357,7 @@
 }
 
 // stateInStringEscU is the state after reading `"\u` during a quoted string.
-func stateInStringEscU(s *scanner, c int) int {
+func stateInStringEscU(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
 		s.step = stateInStringEscU1
 		return scanContinue
@@ -368,7 +367,7 @@
 }
 
 // stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
-func stateInStringEscU1(s *scanner, c int) int {
+func stateInStringEscU1(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
 		s.step = stateInStringEscU12
 		return scanContinue
@@ -378,7 +377,7 @@
 }
 
 // stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
-func stateInStringEscU12(s *scanner, c int) int {
+func stateInStringEscU12(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
 		s.step = stateInStringEscU123
 		return scanContinue
@@ -388,7 +387,7 @@
 }
 
 // stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
-func stateInStringEscU123(s *scanner, c int) int {
+func stateInStringEscU123(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
 		s.step = stateInString
 		return scanContinue
@@ -398,7 +397,7 @@
 }
 
 // stateNeg is the state after reading `-` during a number.
-func stateNeg(s *scanner, c int) int {
+func stateNeg(s *scanner, c byte) int {
 	if c == '0' {
 		s.step = state0
 		return scanContinue
@@ -412,7 +411,7 @@
 
 // state1 is the state after reading a non-zero integer during a number,
 // such as after reading `1` or `100` but not `0`.
-func state1(s *scanner, c int) int {
+func state1(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' {
 		s.step = state1
 		return scanContinue
@@ -421,7 +420,7 @@
 }
 
 // state0 is the state after reading `0` during a number.
-func state0(s *scanner, c int) int {
+func state0(s *scanner, c byte) int {
 	if c == '.' {
 		s.step = stateDot
 		return scanContinue
@@ -435,7 +434,7 @@
 
 // stateDot is the state after reading the integer and decimal point in a number,
 // such as after reading `1.`.
-func stateDot(s *scanner, c int) int {
+func stateDot(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' {
 		s.step = stateDot0
 		return scanContinue
@@ -445,9 +444,8 @@
 
 // stateDot0 is the state after reading the integer, decimal point, and subsequent
 // digits of a number, such as after reading `3.14`.
-func stateDot0(s *scanner, c int) int {
+func stateDot0(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' {
-		s.step = stateDot0
 		return scanContinue
 	}
 	if c == 'e' || c == 'E' {
@@ -459,12 +457,8 @@
 
 // stateE is the state after reading the mantissa and e in a number,
 // such as after reading `314e` or `0.314e`.
-func stateE(s *scanner, c int) int {
-	if c == '+' {
-		s.step = stateESign
-		return scanContinue
-	}
-	if c == '-' {
+func stateE(s *scanner, c byte) int {
+	if c == '+' || c == '-' {
 		s.step = stateESign
 		return scanContinue
 	}
@@ -473,7 +467,7 @@
 
 // stateESign is the state after reading the mantissa, e, and sign in a number,
 // such as after reading `314e-` or `0.314e+`.
-func stateESign(s *scanner, c int) int {
+func stateESign(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' {
 		s.step = stateE0
 		return scanContinue
@@ -484,16 +478,15 @@
 // stateE0 is the state after reading the mantissa, e, optional sign,
 // and at least one digit of the exponent in a number,
 // such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
-func stateE0(s *scanner, c int) int {
+func stateE0(s *scanner, c byte) int {
 	if '0' <= c && c <= '9' {
-		s.step = stateE0
 		return scanContinue
 	}
 	return stateEndValue(s, c)
 }
 
 // stateT is the state after reading `t`.
-func stateT(s *scanner, c int) int {
+func stateT(s *scanner, c byte) int {
 	if c == 'r' {
 		s.step = stateTr
 		return scanContinue
@@ -502,7 +495,7 @@
 }
 
 // stateTr is the state after reading `tr`.
-func stateTr(s *scanner, c int) int {
+func stateTr(s *scanner, c byte) int {
 	if c == 'u' {
 		s.step = stateTru
 		return scanContinue
@@ -511,7 +504,7 @@
 }
 
 // stateTru is the state after reading `tru`.
-func stateTru(s *scanner, c int) int {
+func stateTru(s *scanner, c byte) int {
 	if c == 'e' {
 		s.step = stateEndValue
 		return scanContinue
@@ -520,7 +513,7 @@
 }
 
 // stateF is the state after reading `f`.
-func stateF(s *scanner, c int) int {
+func stateF(s *scanner, c byte) int {
 	if c == 'a' {
 		s.step = stateFa
 		return scanContinue
@@ -529,7 +522,7 @@
 }
 
 // stateFa is the state after reading `fa`.
-func stateFa(s *scanner, c int) int {
+func stateFa(s *scanner, c byte) int {
 	if c == 'l' {
 		s.step = stateFal
 		return scanContinue
@@ -538,7 +531,7 @@
 }
 
 // stateFal is the state after reading `fal`.
-func stateFal(s *scanner, c int) int {
+func stateFal(s *scanner, c byte) int {
 	if c == 's' {
 		s.step = stateFals
 		return scanContinue
@@ -547,7 +540,7 @@
 }
 
 // stateFals is the state after reading `fals`.
-func stateFals(s *scanner, c int) int {
+func stateFals(s *scanner, c byte) int {
 	if c == 'e' {
 		s.step = stateEndValue
 		return scanContinue
@@ -556,7 +549,7 @@
 }
 
 // stateN is the state after reading `n`.
-func stateN(s *scanner, c int) int {
+func stateN(s *scanner, c byte) int {
 	if c == 'u' {
 		s.step = stateNu
 		return scanContinue
@@ -565,7 +558,7 @@
 }
 
 // stateNu is the state after reading `nu`.
-func stateNu(s *scanner, c int) int {
+func stateNu(s *scanner, c byte) int {
 	if c == 'l' {
 		s.step = stateNul
 		return scanContinue
@@ -574,7 +567,7 @@
 }
 
 // stateNul is the state after reading `nul`.
-func stateNul(s *scanner, c int) int {
+func stateNul(s *scanner, c byte) int {
 	if c == 'l' {
 		s.step = stateEndValue
 		return scanContinue
@@ -584,19 +577,19 @@
 
 // stateError is the state after reaching a syntax error,
 // such as after reading `[1}` or `5.1.2`.
-func stateError(s *scanner, c int) int {
+func stateError(s *scanner, c byte) int {
 	return scanError
 }
 
 // error records an error and switches to the error state.
-func (s *scanner) error(c int, context string) int {
+func (s *scanner) error(c byte, context string) int {
 	s.step = stateError
 	s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
 	return scanError
 }
 
 // quoteChar formats c as a quoted character literal
-func quoteChar(c int) string {
+func quoteChar(c byte) string {
 	// special cases - different from quoted strings
 	if c == '\'' {
 		return `'\''`
@@ -623,7 +616,7 @@
 }
 
 // stateRedo helps implement the scanner's 1-byte undo.
-func stateRedo(s *scanner, c int) int {
+func stateRedo(s *scanner, c byte) int {
 	s.redo = false
 	s.step = s.redoState
 	return s.redoCode
diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go
index dc53bce..8ddcf4d 100644
--- a/src/encoding/json/stream.go
+++ b/src/encoding/json/stream.go
@@ -90,7 +90,7 @@
 		// Look in the buffer for a new value.
 		for i, c := range dec.buf[scanp:] {
 			dec.scan.bytes++
-			v := dec.scan.step(&dec.scan, int(c))
+			v := dec.scan.step(&dec.scan, c)
 			if v == scanEnd {
 				scanp += i
 				break Input
@@ -157,7 +157,7 @@
 
 func nonSpace(b []byte) bool {
 	for _, c := range b {
-		if !isSpace(rune(c)) {
+		if !isSpace(c) {
 			return true
 		}
 	}
@@ -433,7 +433,7 @@
 	case tokenObjectComma:
 		context = " after object key:value pair"
 	}
-	return nil, &SyntaxError{"invalid character " + quoteChar(int(c)) + " " + context, 0}
+	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
 }
 
 // More reports whether there is another element in the
@@ -448,7 +448,7 @@
 	for {
 		for i := dec.scanp; i < len(dec.buf); i++ {
 			c := dec.buf[i]
-			if isSpace(rune(c)) {
+			if isSpace(c) {
 				continue
 			}
 			dec.scanp = i
diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go
index 66675d7..330fbee 100644
--- a/src/encoding/xml/marshal_test.go
+++ b/src/encoding/xml/marshal_test.go
@@ -1695,7 +1695,7 @@
 	for i := 0; i < 2; i++ {
 		wg.Add(1)
 		go func() {
-			Marshal(B{[]A{A{}}})
+			Marshal(B{[]A{{}}})
 			wg.Done()
 		}()
 	}
diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go
index 0a21c93..6c7debe 100644
--- a/src/encoding/xml/xml.go
+++ b/src/encoding/xml/xml.go
@@ -245,6 +245,9 @@
 		t = d.nextToken
 		d.nextToken = nil
 	} else if t, err = d.rawToken(); err != nil {
+		if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
+			err = d.syntaxError("unexpected EOF")
+		}
 		return
 	}
 
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go
index 312a7c9..19465d7 100644
--- a/src/encoding/xml/xml_test.go
+++ b/src/encoding/xml/xml_test.go
@@ -750,3 +750,24 @@
 		t.Errorf("Marshal generated invalid UTF-8: %x", data)
 	}
 }
+
+func TestIssue11405(t *testing.T) {
+	testCases := []string{
+		"<root>",
+		"<root><foo>",
+		"<root><foo></foo>",
+	}
+	for _, tc := range testCases {
+		d := NewDecoder(strings.NewReader(tc))
+		var err error
+		for {
+			_, err = d.Token()
+			if err != nil {
+				break
+			}
+		}
+		if _, ok := err.(*SyntaxError); !ok {
+			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
+		}
+	}
+}
diff --git a/src/fmt/doc.go b/src/fmt/doc.go
index 20a09c5..4eea48e 100644
--- a/src/fmt/doc.go
+++ b/src/fmt/doc.go
@@ -138,8 +138,8 @@
 	formatting considerations apply for operands that implement
 	certain interfaces. In order of application:
 
-	1. If the operand is a reflect.Value, the concrete value it
-	holds is printed as if it was the operand.
+	1. If the operand is a reflect.Value, the operand is replaced by the
+	concrete value that it holds, and printing continues with the next rule.
 
 	2. If an operand implements the Formatter interface, it will
 	be invoked. Formatter provides fine control of formatting.
diff --git a/src/fmt/fmt_test.go b/src/fmt/fmt_test.go
index 90a4031..26383f6 100644
--- a/src/fmt/fmt_test.go
+++ b/src/fmt/fmt_test.go
@@ -1188,6 +1188,11 @@
 	{"%.*d", args(4, 42), "0042"},
 	{"%*.*d", args(8, 4, 42), "    0042"},
 	{"%0*d", args(4, 42), "0042"},
+	// Some non-int types for width. (Issue 10732).
+	{"%0*d", args(uint(4), 42), "0042"},
+	{"%0*d", args(uint64(4), 42), "0042"},
+	{"%0*d", args('\x04', 42), "0042"},
+	{"%0*d", args(uintptr(4), 42), "0042"},
 
 	// erroneous
 	{"%*d", args(nil, 42), "%!(BADWIDTH)42"},
@@ -1196,17 +1201,19 @@
 	{"%.*d", args(nil, 42), "%!(BADPREC)42"},
 	{"%.*d", args(-1, 42), "%!(BADPREC)42"},
 	{"%.*d", args(int(1e7), 42), "%!(BADPREC)42"},
+	{"%.*d", args(uint(1e7), 42), "%!(BADPREC)42"},
+	{"%.*d", args(uint64(1<<63), 42), "%!(BADPREC)42"},   // Huge negative (-inf).
+	{"%.*d", args(uint64(1<<64-1), 42), "%!(BADPREC)42"}, // Small negative (-1).
 	{"%*d", args(5, "foo"), "%!d(string=  foo)"},
 	{"%*% %d", args(20, 5), "% 5"},
 	{"%*", args(4), "%!(NOVERB)"},
-	{"%*d", args(int32(4), 42), "%!(BADWIDTH)42"},
 }
 
 func TestWidthAndPrecision(t *testing.T) {
-	for _, tt := range startests {
+	for i, tt := range startests {
 		s := Sprintf(tt.fmt, tt.in...)
 		if s != tt.out {
-			t.Errorf("%q: got %q expected %q", tt.fmt, s, tt.out)
+			t.Errorf("#%d: %q: got %q expected %q", i, tt.fmt, s, tt.out)
 		}
 	}
 }
diff --git a/src/fmt/print.go b/src/fmt/print.go
index 8d3e97c..ebfa13e 100644
--- a/src/fmt/print.go
+++ b/src/fmt/print.go
@@ -1024,11 +1024,30 @@
 	return wasString
 }
 
-// intFromArg gets the argNumth element of a. On return, isInt reports whether the argument has type int.
+// intFromArg gets the argNumth element of a. On return, isInt reports whether the argument has integer type.
 func intFromArg(a []interface{}, argNum int) (num int, isInt bool, newArgNum int) {
 	newArgNum = argNum
 	if argNum < len(a) {
-		num, isInt = a[argNum].(int)
+		num, isInt = a[argNum].(int) // Almost always OK.
+		if !isInt {
+			// Work harder.
+			switch v := reflect.ValueOf(a[argNum]); v.Kind() {
+			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+				n := v.Int()
+				if int64(int(n)) == n {
+					num = int(n)
+					isInt = true
+				}
+			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+				n := v.Uint()
+				if int64(n) >= 0 && uint64(int(n)) == n {
+					num = int(n)
+					isInt = true
+				}
+			default:
+				// Already 0, false.
+			}
+		}
 		newArgNum = argNum + 1
 		if tooLarge(num) {
 			num = 0
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index 7cea949..e3c1469 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -271,8 +271,8 @@
 
 	// Basic networking.
 	// Because net must be used by any package that wants to
-	// do networking portably, it must have a small dependency set: just L1+basic os.
-	"net": {"L1", "CGO", "os", "syscall", "time", "internal/syscall/windows", "internal/singleflight"},
+	// do networking portably, it must have a small dependency set: just L0+basic os.
+	"net": {"L0", "CGO", "math/rand", "os", "sort", "syscall", "time", "internal/syscall/windows", "internal/singleflight"},
 
 	// NET enables use of basic network-related packages.
 	"NET": {
@@ -351,6 +351,7 @@
 		"L4", "NET", "OS",
 		"compress/gzip", "crypto/tls", "mime/multipart", "runtime/debug",
 		"net/http/internal",
+		"golang.org/x/net/http2/hpack",
 	},
 	"net/http/internal": {"L4"},
 
diff --git a/src/go/format/format.go b/src/go/format/format.go
index 1adfd7d..b9cacfe 100644
--- a/src/go/format/format.go
+++ b/src/go/format/format.go
@@ -12,7 +12,6 @@
 	"go/parser"
 	"go/printer"
 	"go/token"
-	"internal/format"
 	"io"
 )
 
@@ -82,7 +81,7 @@
 //
 func Source(src []byte) ([]byte, error) {
 	fset := token.NewFileSet()
-	file, sourceAdj, indentAdj, err := format.Parse(fset, "", src, true)
+	file, sourceAdj, indentAdj, err := parse(fset, "", src, true)
 	if err != nil {
 		return nil, err
 	}
@@ -93,7 +92,7 @@
 		ast.SortImports(fset, file)
 	}
 
-	return format.Format(fset, file, sourceAdj, indentAdj, src, config)
+	return format(fset, file, sourceAdj, indentAdj, src, config)
 }
 
 func hasUnsortedImports(file *ast.File) bool {
diff --git a/src/go/format/format_test.go b/src/go/format/format_test.go
index 000c611..b5817a5 100644
--- a/src/go/format/format_test.go
+++ b/src/go/format/format_test.go
@@ -72,6 +72,7 @@
 }
 
 // Test cases that are expected to fail are marked by the prefix "ERROR".
+// The formatted result must look the same as the input for successful tests.
 var tests = []string{
 	// declaration lists
 	`import "go/format"`,
@@ -91,11 +92,23 @@
 	"\n\t\t\n\n\t\t\tx := 0\n\t\t\tconst s = `\n\t\tfoo\n`\n\n\n", // no indentation removed inside raw strings
 
 	// comments
-	"i := 5 /* Comment */",         // Issue 5551.
-	"\ta()\n//line :1",             // Issue 11276.
-	"\t//xxx\n\ta()\n//line :2",    // Issue 11276.
-	"\ta() //line :1\n\tb()\n",     // Issue 11276.
-	"x := 0\n//line :1\n//line :2", // Issue 11276.
+	"/* Comment */",
+	"\t/* Comment */ ",
+	"\n/* Comment */ ",
+	"i := 5 /* Comment */",         // issue #5551
+	"\ta()\n//line :1",             // issue #11276
+	"\t//xxx\n\ta()\n//line :2",    // issue #11276
+	"\ta() //line :1\n\tb()\n",     // issue #11276
+	"x := 0\n//line :1\n//line :2", // issue #11276
+
+	// whitespace
+	"",     // issue #11275
+	" ",    // issue #11275
+	"\t",   // issue #11275
+	"\t\t", // issue #11275
+	"\n",   // issue #11275
+	"\n\n", // issue #11275
+	"\t\n", // issue #11275
 
 	// erroneous programs
 	"ERROR1 + 2 +",
diff --git a/src/internal/format/format.go b/src/go/format/internal.go
similarity index 80%
rename from src/internal/format/format.go
rename to src/go/format/internal.go
index a8270ba..9d04878f 100644
--- a/src/internal/format/format.go
+++ b/src/go/format/internal.go
@@ -2,6 +2,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// TODO(gri): This file and the file src/cmd/gofmt/internal.go are
+// the same (but for this comment and the package name). Do not modify
+// one without the other. Determine if we can factor out functionality
+// in a public API. See also #11844 for context.
+
 package format
 
 import (
@@ -13,11 +18,9 @@
 	"strings"
 )
 
-const parserMode = parser.ParseComments
-
-// Parse parses src, which was read from the named file,
+// parse parses src, which was read from the named file,
 // as a Go source file, declaration, or statement list.
-func Parse(fset *token.FileSet, filename string, src []byte, fragmentOk bool) (
+func parse(fset *token.FileSet, filename string, src []byte, fragmentOk bool) (
 	file *ast.File,
 	sourceAdj func(src []byte, indent int) []byte,
 	indentAdj int,
@@ -85,10 +88,10 @@
 	return
 }
 
-// Format formats the given package file originally obtained from src
+// format formats the given package file originally obtained from src
 // and adjusts the result based on the original source via sourceAdj
 // and indentAdj.
-func Format(
+func format(
 	fset *token.FileSet,
 	file *ast.File,
 	sourceAdj func(src []byte, indent int) []byte,
@@ -109,7 +112,7 @@
 	// Partial source file.
 	// Determine and prepend leading space.
 	i, j := 0, 0
-	for j < len(src) && IsSpace(src[j]) {
+	for j < len(src) && isSpace(src[j]) {
 		if src[j] == '\n' {
 			i = j + 1 // byte offset of last line in leading space
 		}
@@ -146,18 +149,28 @@
 	if err != nil {
 		return nil, err
 	}
-	res = append(res, sourceAdj(buf.Bytes(), cfg.Indent)...)
+	out := sourceAdj(buf.Bytes(), cfg.Indent)
+
+	// If the adjusted output is empty, the source
+	// was empty but (possibly) for white space.
+	// The result is the incoming source.
+	if len(out) == 0 {
+		return src, nil
+	}
+
+	// Otherwise, append output to leading space.
+	res = append(res, out...)
 
 	// Determine and append trailing space.
 	i = len(src)
-	for i > 0 && IsSpace(src[i-1]) {
+	for i > 0 && isSpace(src[i-1]) {
 		i--
 	}
 	return append(res, src[i:]...), nil
 }
 
-// IsSpace reports whether the byte is a space character.
-// IsSpace defines a space as being among the following bytes: ' ', '\t', '\n' and '\r'.
-func IsSpace(b byte) bool {
+// isSpace reports whether the byte is a space character.
+// isSpace defines a space as being among the following bytes: ' ', '\t', '\n' and '\r'.
+func isSpace(b byte) bool {
 	return b == ' ' || b == '\t' || b == '\n' || b == '\r'
 }
diff --git a/src/go/parser/parser.go b/src/go/parser/parser.go
index e82c0bd..73edaa0 100644
--- a/src/go/parser/parser.go
+++ b/src/go/parser/parser.go
@@ -410,9 +410,14 @@
 func (p *parser) expectSemi() {
 	// semicolon is optional before a closing ')' or '}'
 	if p.tok != token.RPAREN && p.tok != token.RBRACE {
-		if p.tok == token.SEMICOLON {
+		switch p.tok {
+		case token.COMMA:
+			// permit a ',' instead of a ';' but complain
+			p.errorExpected(p.pos, "';'")
+			fallthrough
+		case token.SEMICOLON:
 			p.next()
-		} else {
+		default:
 			p.errorExpected(p.pos, "';'")
 			syncStmt(p)
 		}
@@ -690,16 +695,19 @@
 
 	doc := p.leadComment
 
-	// FieldDecl
-	list, typ := p.parseVarList(false)
-
-	// Tag
-	var tag *ast.BasicLit
-	if p.tok == token.STRING {
-		tag = &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
+	// 1st FieldDecl
+	// A type name used as an anonymous field looks like a field identifier.
+	var list []ast.Expr
+	for {
+		list = append(list, p.parseVarType(false))
+		if p.tok != token.COMMA {
+			break
+		}
 		p.next()
 	}
 
+	typ := p.tryVarType(false)
+
 	// analyze case
 	var idents []*ast.Ident
 	if typ != nil {
@@ -708,13 +716,22 @@
 	} else {
 		// ["*"] TypeName (AnonymousField)
 		typ = list[0] // we always have at least one element
-		if n := len(list); n > 1 || !isTypeName(deref(typ)) {
-			pos := typ.Pos()
-			p.errorExpected(pos, "anonymous field")
-			typ = &ast.BadExpr{From: pos, To: p.safePos(list[n-1].End())}
+		if n := len(list); n > 1 {
+			p.errorExpected(p.pos, "type")
+			typ = &ast.BadExpr{From: p.pos, To: p.pos}
+		} else if !isTypeName(deref(typ)) {
+			p.errorExpected(typ.Pos(), "anonymous field")
+			typ = &ast.BadExpr{From: typ.Pos(), To: p.safePos(typ.End())}
 		}
 	}
 
+	// Tag
+	var tag *ast.BasicLit
+	if p.tok == token.STRING {
+		tag = &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
+		p.next()
+	}
+
 	p.expectSemi() // call before accessing p.linecomment
 
 	field := &ast.Field{Doc: doc, Names: idents, Type: typ, Tag: tag, Comment: p.lineComment}
@@ -791,42 +808,27 @@
 	return typ
 }
 
-// If any of the results are identifiers, they are not resolved.
-func (p *parser) parseVarList(isParam bool) (list []ast.Expr, typ ast.Expr) {
-	if p.trace {
-		defer un(trace(p, "VarList"))
-	}
-
-	// a list of identifiers looks like a list of type names
-	//
-	// parse/tryVarType accepts any type (including parenthesized
-	// ones) even though the syntax does not permit them here: we
-	// accept them all for more robust parsing and complain later
-	for typ := p.parseVarType(isParam); typ != nil; {
-		list = append(list, typ)
-		if p.tok != token.COMMA {
-			break
-		}
-		p.next()
-		typ = p.tryVarType(isParam) // maybe nil as in: func f(int,) {}
-	}
-
-	// if we had a list of identifiers, it must be followed by a type
-	typ = p.tryVarType(isParam)
-
-	return
-}
-
 func (p *parser) parseParameterList(scope *ast.Scope, ellipsisOk bool) (params []*ast.Field) {
 	if p.trace {
 		defer un(trace(p, "ParameterList"))
 	}
 
-	// ParameterDecl
-	list, typ := p.parseVarList(ellipsisOk)
+	// 1st ParameterDecl
+	// A list of identifiers looks like a list of type names.
+	var list []ast.Expr
+	for {
+		list = append(list, p.parseVarType(ellipsisOk))
+		if p.tok != token.COMMA {
+			break
+		}
+		p.next()
+		if p.tok == token.RPAREN {
+			break
+		}
+	}
 
 	// analyze case
-	if typ != nil {
+	if typ := p.tryVarType(ellipsisOk); typ != nil {
 		// IdentifierList Type
 		idents := p.makeIdentList(list)
 		field := &ast.Field{Names: idents, Type: typ}
@@ -1908,14 +1910,23 @@
 	return ok && a.Type == nil
 }
 
-func isTypeSwitchGuard(s ast.Stmt) bool {
+func (p *parser) isTypeSwitchGuard(s ast.Stmt) bool {
 	switch t := s.(type) {
 	case *ast.ExprStmt:
-		// x.(nil)
+		// x.(type)
 		return isTypeSwitchAssert(t.X)
 	case *ast.AssignStmt:
-		// v := x.(nil)
-		return len(t.Lhs) == 1 && t.Tok == token.DEFINE && len(t.Rhs) == 1 && isTypeSwitchAssert(t.Rhs[0])
+		// v := x.(type)
+		if len(t.Lhs) == 1 && len(t.Rhs) == 1 && isTypeSwitchAssert(t.Rhs[0]) {
+			switch t.Tok {
+			case token.ASSIGN:
+				// permit v = x.(type) but complain
+				p.error(t.TokPos, "expected ':=', found '='")
+				fallthrough
+			case token.DEFINE:
+				return true
+			}
+		}
 	}
 	return false
 }
@@ -1961,7 +1972,7 @@
 		p.exprLev = prevLev
 	}
 
-	typeSwitch := isTypeSwitchGuard(s2)
+	typeSwitch := p.isTypeSwitchGuard(s2)
 	lbrace := p.expect(token.LBRACE)
 	var list []ast.Stmt
 	for p.tok == token.CASE || p.tok == token.DEFAULT {
diff --git a/src/go/parser/short_test.go b/src/go/parser/short_test.go
index ef2ffad..e05ae8e 100644
--- a/src/go/parser/short_test.go
+++ b/src/go/parser/short_test.go
@@ -64,7 +64,7 @@
 	`package p; func f() { for _ = range x ; /* ERROR "expected '{'" */ ; {} };`,
 	`package p; func f() { for ; ; _ = range /* ERROR "expected operand" */ x {} };`,
 	`package p; func f() { for ; _ /* ERROR "expected boolean or range expression" */ = range x ; {} };`,
-	`package p; func f() { switch t /* ERROR "expected switch expression" */ = t.(type) {} };`,
+	`package p; func f() { switch t = /* ERROR "expected ':=', found '='" */ t.(type) {} };`,
 	`package p; func f() { switch t /* ERROR "expected switch expression" */ , t = t.(type) {} };`,
 	`package p; func f() { switch t /* ERROR "expected switch expression" */ = t.(type), t {} };`,
 	`package p; var a = [ /* ERROR "expected expression" */ 1]int;`,
@@ -101,11 +101,26 @@
 	`package p; func f() { defer func() {} /* ERROR HERE "function must be invoked" */ }`,
 	`package p; func f() { go func() { func() { f(x func /* ERROR "missing ','" */ (){}) } } }`,
 	`package p; func f(x func(), u v func /* ERROR "missing ','" */ ()){}`,
-	`package p; func f() (a b string /* ERROR "missing ','" */ , ok bool)`,           // issue 8656
-	`package p; var x /* ERROR "missing variable type or initialization" */ , y, z;`, // issue 9639
-	`package p; const x /* ERROR "missing constant value" */ ;`,                      // issue 9639
-	`package p; const x /* ERROR "missing constant value" */ int;`,                   // issue 9639
-	`package p; const (x = 0; y; z /* ERROR "missing constant value" */ int);`,       // issue 9639
+
+	// issue 8656
+	`package p; func f() (a b string /* ERROR "missing ','" */ , ok bool)`,
+
+	// issue 9639
+	`package p; var x /* ERROR "missing variable type or initialization" */ , y, z;`,
+	`package p; const x /* ERROR "missing constant value" */ ;`,
+	`package p; const x /* ERROR "missing constant value" */ int;`,
+	`package p; const (x = 0; y; z /* ERROR "missing constant value" */ int);`,
+
+	// issue 12437
+	`package p; var _ = struct { x int, /* ERROR "expected ';', found ','" */ }{};`,
+	`package p; var _ = struct { x int, /* ERROR "expected ';', found ','" */ y float }{};`,
+
+	// issue 11611
+	`package p; type _ struct { int, } /* ERROR "expected type, found '}'" */ ;`,
+	`package p; type _ struct { int, float } /* ERROR "expected type, found '}'" */ ;`,
+	`package p; type _ struct { ( /* ERROR "expected anonymous field" */ int) };`,
+	`package p; func _()(x, y, z ... /* ERROR "expected '\)', found '...'" */ int){}`,
+	`package p; func _()(... /* ERROR "expected type, found '...'" */ int){}`,
 }
 
 func TestInvalid(t *testing.T) {
diff --git a/src/go/printer/nodes.go b/src/go/printer/nodes.go
index fe04705..35c017d 100644
--- a/src/go/printer/nodes.go
+++ b/src/go/printer/nodes.go
@@ -747,13 +747,7 @@
 		}
 
 	case *ast.SelectorExpr:
-		p.expr1(x.X, token.HighestPrec, depth)
-		p.print(token.PERIOD)
-		if line := p.lineFor(x.Sel.Pos()); p.pos.IsValid() && p.pos.Line < line {
-			p.print(indent, newline, x.Sel.Pos(), x.Sel, unindent)
-		} else {
-			p.print(x.Sel.Pos(), x.Sel)
-		}
+		p.selectorExpr(x, depth, false)
 
 	case *ast.TypeAssertExpr:
 		p.expr1(x.X, token.HighestPrec, depth)
@@ -802,13 +796,14 @@
 		if len(x.Args) > 1 {
 			depth++
 		}
+		var wasIndented bool
 		if _, ok := x.Fun.(*ast.FuncType); ok {
 			// conversions to literal function types require parentheses around the type
 			p.print(token.LPAREN)
-			p.expr1(x.Fun, token.HighestPrec, depth)
+			wasIndented = p.possibleSelectorExpr(x.Fun, token.HighestPrec, depth)
 			p.print(token.RPAREN)
 		} else {
-			p.expr1(x.Fun, token.HighestPrec, depth)
+			wasIndented = p.possibleSelectorExpr(x.Fun, token.HighestPrec, depth)
 		}
 		p.print(x.Lparen, token.LPAREN)
 		if x.Ellipsis.IsValid() {
@@ -821,6 +816,9 @@
 			p.exprList(x.Lparen, x.Args, depth, commaTerm, x.Rparen)
 		}
 		p.print(x.Rparen, token.RPAREN)
+		if wasIndented {
+			p.print(unindent)
+		}
 
 	case *ast.CompositeLit:
 		// composite literal elements that are composite literals themselves may have the type omitted
@@ -891,6 +889,30 @@
 	return
 }
 
+func (p *printer) possibleSelectorExpr(expr ast.Expr, prec1, depth int) bool {
+	if x, ok := expr.(*ast.SelectorExpr); ok {
+		return p.selectorExpr(x, depth, true)
+	}
+	p.expr1(expr, prec1, depth)
+	return false
+}
+
+// selectorExpr handles an *ast.SelectorExpr node and returns whether x spans
+// multiple lines.
+func (p *printer) selectorExpr(x *ast.SelectorExpr, depth int, isMethod bool) bool {
+	p.expr1(x.X, token.HighestPrec, depth)
+	p.print(token.PERIOD)
+	if line := p.lineFor(x.Sel.Pos()); p.pos.IsValid() && p.pos.Line < line {
+		p.print(indent, newline, x.Sel.Pos(), x.Sel)
+		if !isMethod {
+			p.print(unindent)
+		}
+		return true
+	}
+	p.print(x.Sel.Pos(), x.Sel)
+	return false
+}
+
 func (p *printer) expr0(x ast.Expr, depth int) {
 	p.expr1(x, token.LowestPrec, depth)
 }
diff --git a/src/go/printer/printer.go b/src/go/printer/printer.go
index f9343d3..a3eaa66 100644
--- a/src/go/printer/printer.go
+++ b/src/go/printer/printer.go
@@ -1178,7 +1178,9 @@
 			case '\n', '\f':
 				_, err = p.output.Write(data[m:n])
 				p.resetSpace()
-				_, err = p.output.Write(aNewline)
+				if err == nil {
+					_, err = p.output.Write(aNewline)
+				}
 			case tabwriter.Escape:
 				_, err = p.output.Write(data[m:n])
 				p.state = inEscape
diff --git a/src/go/printer/printer_test.go b/src/go/printer/printer_test.go
index 3b0570e..73f9ead 100644
--- a/src/go/printer/printer_test.go
+++ b/src/go/printer/printer_test.go
@@ -12,6 +12,7 @@
 	"go/ast"
 	"go/parser"
 	"go/token"
+	"io"
 	"io/ioutil"
 	"path/filepath"
 	"testing"
@@ -548,6 +549,46 @@
 	}
 }
 
+type limitWriter struct {
+	remaining int
+	errCount  int
+}
+
+func (l *limitWriter) Write(buf []byte) (n int, err error) {
+	n = len(buf)
+	if n >= l.remaining {
+		n = l.remaining
+		err = io.EOF
+		l.errCount++
+	}
+	l.remaining -= n
+	return n, err
+}
+
+// Test whether the printer stops writing after the first error
+func TestWriteErrors(t *testing.T) {
+	const filename = "printer.go"
+	src, err := ioutil.ReadFile(filename)
+	if err != nil {
+		panic(err) // error in test
+	}
+	file, err := parser.ParseFile(fset, filename, src, 0)
+	if err != nil {
+		panic(err) // error in test
+	}
+	for i := 0; i < 20; i++ {
+		lw := &limitWriter{remaining: i}
+		err := (&Config{Mode: RawFormat}).Fprint(lw, fset, file)
+		if lw.errCount > 1 {
+			t.Fatal("Writes continued after first error returned")
+		}
+		// We expect errCount be 1 iff err is set
+		if (lw.errCount != 0) != (err != nil) {
+			t.Fatal("Expected err when errCount != 0")
+		}
+	}
+}
+
 // TextX is a skeleton test that can be filled in for debugging one-off cases.
 // Do not remove.
 func TestX(t *testing.T) {
diff --git a/src/go/printer/testdata/expressions.golden b/src/go/printer/testdata/expressions.golden
index e3d17a4..cab991f 100644
--- a/src/go/printer/testdata/expressions.golden
+++ b/src/go/printer/testdata/expressions.golden
@@ -567,7 +567,7 @@
 	// handle multiline argument list correctly
 	_ = new(T).
 		foo(
-		1).
+			1).
 		foo(2)
 
 	_ = new(T).foo(
@@ -614,7 +614,7 @@
 		Blob.(*Type).
 		Slices[1:4].
 		Method(1, 2,
-		3).
+			3).
 		Thingy
 
 	_ = a.b.c
@@ -684,3 +684,21 @@
 	_ = (func(x int) float)(nil)
 	_ = (func() func() func())(nil)
 }
+
+func _() {
+	_ = f().
+		f(func() {
+			f()
+		}).
+		f(map[int]int{
+			1:	2,
+			3:	4,
+		})
+
+	_ = f().
+		f(
+			func() {
+				f()
+			},
+		)
+}
diff --git a/src/go/printer/testdata/expressions.input b/src/go/printer/testdata/expressions.input
index d20a593..7c88042 100644
--- a/src/go/printer/testdata/expressions.input
+++ b/src/go/printer/testdata/expressions.input
@@ -713,3 +713,21 @@
 	_ = (func(x int)(float))(nil)
 	_ = (func() func() func()())(nil)
 }
+
+func _() {
+	_ = f().
+	f(func() {
+		f()
+	}).
+	f(map[int]int{
+	1: 2,
+	3: 4,
+})
+
+	_ = f().
+	f(
+	func() {
+		f()
+	},
+	)
+}
diff --git a/src/go/printer/testdata/expressions.raw b/src/go/printer/testdata/expressions.raw
index 2357336..d906062 100644
--- a/src/go/printer/testdata/expressions.raw
+++ b/src/go/printer/testdata/expressions.raw
@@ -567,7 +567,7 @@
 	// handle multiline argument list correctly
 	_ = new(T).
 		foo(
-		1).
+			1).
 		foo(2)
 
 	_ = new(T).foo(
@@ -614,7 +614,7 @@
 		Blob.(*Type).
 		Slices[1:4].
 		Method(1, 2,
-		3).
+			3).
 		Thingy
 
 	_ = a.b.c
@@ -684,3 +684,21 @@
 	_ = (func(x int) float)(nil)
 	_ = (func() func() func())(nil)
 }
+
+func _() {
+	_ = f().
+		f(func() {
+			f()
+		}).
+		f(map[int]int{
+			1:	2,
+			3:	4,
+		})
+
+	_ = f().
+		f(
+			func() {
+				f()
+			},
+		)
+}
diff --git a/src/go/types/api.go b/src/go/types/api.go
index b3bf6f0..d252259 100644
--- a/src/go/types/api.go
+++ b/src/go/types/api.go
@@ -142,7 +142,7 @@
 	//
 	//	*ast.ImportSpec    *PkgName for dot-imports and imports without renames
 	//	*ast.CaseClause    type-specific *Var for each type switch case clause (incl. default)
-	//      *ast.Field         anonymous struct field or parameter *Var
+	//      *ast.Field         anonymous parameter *Var
 	//
 	Implicits map[ast.Node]Object
 
@@ -297,8 +297,10 @@
 	return buf.String()
 }
 
-// Check type-checks a package and returns the resulting package object,
-// the first error if any, and if info != nil, additional type information.
+// Check type-checks a package and returns the resulting package object and
+// the first error if any. Additionally, if info != nil, Check populates each
+// of the non-nil maps in the Info struct.
+//
 // The package is marked as complete if no errors occurred, otherwise it is
 // incomplete. See Config.Error for controlling behavior in the presence of
 // errors.
@@ -320,7 +322,7 @@
 // AssignableTo reports whether a value of type V is assignable to a variable of type T.
 func AssignableTo(V, T Type) bool {
 	x := operand{mode: value, typ: V}
-	return x.assignableTo(nil, T) // config not needed for non-constant x
+	return x.assignableTo(nil, T, nil) // config not needed for non-constant x
 }
 
 // ConvertibleTo reports whether a value of type V is convertible to a value of type T.
diff --git a/src/go/types/api_test.go b/src/go/types/api_test.go
index eeda0d8..76c34b4 100644
--- a/src/go/types/api_test.go
+++ b/src/go/types/api_test.go
@@ -797,7 +797,7 @@
 	makePkg("main", mainSrc)
 
 	for e, sel := range selections {
-		sel.String() // assertion: must not panic
+		_ = sel.String() // assertion: must not panic
 
 		start := fset.Position(e.Pos()).Offset
 		end := fset.Position(e.End()).Offset
diff --git a/src/go/types/assignments.go b/src/go/types/assignments.go
index e88de56..10ab17b 100644
--- a/src/go/types/assignments.go
+++ b/src/go/types/assignments.go
@@ -13,31 +13,21 @@
 
 // assignment reports whether x can be assigned to a variable of type T,
 // if necessary by attempting to convert untyped values to the appropriate
-// type. If x.mode == invalid upon return, then assignment has already
-// issued an error message and the caller doesn't have to report another.
+// type. context describes the context in which the assignment takes place.
 // Use T == nil to indicate assignment to an untyped blank identifier.
-//
-// TODO(gri) Should find a better way to handle in-band errors.
-//
-func (check *Checker) assignment(x *operand, T Type) bool {
+// x.mode is set to invalid if the assignment failed.
+func (check *Checker) assignment(x *operand, T Type, context string) {
+	check.singleValue(x)
+
 	switch x.mode {
 	case invalid:
-		return true // error reported before
+		return // error reported before
 	case constant_, variable, mapindex, value, commaok:
 		// ok
 	default:
 		unreachable()
 	}
 
-	// x must be a single value
-	// (tuple types are never named - no need for underlying type)
-	if t, _ := x.typ.(*Tuple); t != nil {
-		assert(t.Len() > 1)
-		check.errorf(x.pos(), "%d-valued expression %s used as single value", t.Len(), x)
-		x.mode = invalid
-		return false
-	}
-
 	if isUntyped(x.typ) {
 		target := T
 		// spec: "If an untyped constant is assigned to a variable of interface
@@ -47,22 +37,34 @@
 		// or string constant."
 		if T == nil || IsInterface(T) {
 			if T == nil && x.typ == Typ[UntypedNil] {
-				check.errorf(x.pos(), "use of untyped nil")
+				check.errorf(x.pos(), "use of untyped nil in %s", context)
 				x.mode = invalid
-				return false
+				return
 			}
 			target = defaultType(x.typ)
 		}
 		check.convertUntyped(x, target)
 		if x.mode == invalid {
-			return false
+			return
 		}
 	}
+	// x.typ is typed
 
 	// spec: "If a left-hand side is the blank identifier, any typed or
 	// non-constant value except for the predeclared identifier nil may
 	// be assigned to it."
-	return T == nil || x.assignableTo(check.conf, T)
+	if T == nil {
+		return
+	}
+
+	if reason := ""; !x.assignableTo(check.conf, T, &reason) {
+		if reason != "" {
+			check.errorf(x.pos(), "cannot use %s as %s value in %s: %s", x, T, context, reason)
+		} else {
+			check.errorf(x.pos(), "cannot use %s as %s value in %s", x, T, context)
+		}
+		x.mode = invalid
+	}
 }
 
 func (check *Checker) initConst(lhs *Const, x *operand) {
@@ -88,18 +90,15 @@
 		lhs.typ = x.typ
 	}
 
-	if !check.assignment(x, lhs.typ) {
-		if x.mode != invalid {
-			check.errorf(x.pos(), "cannot define constant %s (type %s) as %s", lhs.Name(), lhs.typ, x)
-		}
+	check.assignment(x, lhs.typ, "constant declaration")
+	if x.mode == invalid {
 		return
 	}
 
 	lhs.val = x.val
 }
 
-// If result is set, lhs is a function result parameter and x is a return result.
-func (check *Checker) initVar(lhs *Var, x *operand, result bool) Type {
+func (check *Checker) initVar(lhs *Var, x *operand, context string) Type {
 	if x.mode == invalid || x.typ == Typ[Invalid] || lhs.typ == Typ[Invalid] {
 		if lhs.typ == nil {
 			lhs.typ = Typ[Invalid]
@@ -113,7 +112,7 @@
 		if isUntyped(typ) {
 			// convert untyped types to default types
 			if typ == Typ[UntypedNil] {
-				check.errorf(x.pos(), "use of untyped nil")
+				check.errorf(x.pos(), "use of untyped nil in %s", context)
 				lhs.typ = Typ[Invalid]
 				return nil
 			}
@@ -122,15 +121,8 @@
 		lhs.typ = typ
 	}
 
-	if !check.assignment(x, lhs.typ) {
-		if x.mode != invalid {
-			if result {
-				// don't refer to lhs.name because it may be an anonymous result parameter
-				check.errorf(x.pos(), "cannot return %s as value of type %s", x, lhs.typ)
-			} else {
-				check.errorf(x.pos(), "cannot initialize %s with %s", lhs, x)
-			}
-		}
+	check.assignment(x, lhs.typ, context)
+	if x.mode == invalid {
 		return nil
 	}
 
@@ -148,9 +140,9 @@
 	// Don't evaluate lhs if it is the blank identifier.
 	if ident != nil && ident.Name == "_" {
 		check.recordDef(ident, nil)
-		if !check.assignment(x, nil) {
-			assert(x.mode == invalid)
-			x.typ = nil
+		check.assignment(x, nil, "assignment to _ identifier")
+		if x.mode == invalid {
+			return nil
 		}
 		return x.typ
 	}
@@ -191,10 +183,8 @@
 		return nil
 	}
 
-	if !check.assignment(x, z.typ) {
-		if x.mode != invalid {
-			check.errorf(x.pos(), "cannot assign %s to %s", x, &z)
-		}
+	check.assignment(x, z.typ, "assignment")
+	if x.mode == invalid {
 		return nil
 	}
 
@@ -205,7 +195,7 @@
 // return expressions, and returnPos is the position of the return statement.
 func (check *Checker) initVars(lhs []*Var, rhs []ast.Expr, returnPos token.Pos) {
 	l := len(lhs)
-	get, r, commaOk := unpack(func(x *operand, i int) { check.expr(x, rhs[i]) }, len(rhs), l == 2 && !returnPos.IsValid())
+	get, r, commaOk := unpack(func(x *operand, i int) { check.multiExpr(x, rhs[i]) }, len(rhs), l == 2 && !returnPos.IsValid())
 	if get == nil || l != r {
 		// invalidate lhs and use rhs
 		for _, obj := range lhs {
@@ -225,12 +215,17 @@
 		return
 	}
 
+	context := "assignment"
+	if returnPos.IsValid() {
+		context = "return statement"
+	}
+
 	var x operand
 	if commaOk {
 		var a [2]Type
 		for i := range a {
 			get(&x, i)
-			a[i] = check.initVar(lhs[i], &x, returnPos.IsValid())
+			a[i] = check.initVar(lhs[i], &x, context)
 		}
 		check.recordCommaOkTypes(rhs[0], a)
 		return
@@ -238,13 +233,13 @@
 
 	for i, lhs := range lhs {
 		get(&x, i)
-		check.initVar(lhs, &x, returnPos.IsValid())
+		check.initVar(lhs, &x, context)
 	}
 }
 
 func (check *Checker) assignVars(lhs, rhs []ast.Expr) {
 	l := len(lhs)
-	get, r, commaOk := unpack(func(x *operand, i int) { check.expr(x, rhs[i]) }, len(rhs), l == 2)
+	get, r, commaOk := unpack(func(x *operand, i int) { check.multiExpr(x, rhs[i]) }, len(rhs), l == 2)
 	if get == nil {
 		return // error reported by unpack
 	}
diff --git a/src/go/types/builtins.go b/src/go/types/builtins.go
index 7857a44..c288024 100644
--- a/src/go/types/builtins.go
+++ b/src/go/types/builtins.go
@@ -44,7 +44,7 @@
 	switch id {
 	default:
 		// make argument getter
-		arg, nargs, _ = unpack(func(x *operand, i int) { check.expr(x, call.Args[i]) }, nargs, false)
+		arg, nargs, _ = unpack(func(x *operand, i int) { check.multiExpr(x, call.Args[i]) }, nargs, false)
 		if arg == nil {
 			return
 		}
@@ -95,7 +95,7 @@
 		// spec: "As a special case, append also accepts a first argument assignable
 		// to type []byte with a second argument of string type followed by ... .
 		// This form appends the bytes of the string.
-		if nargs == 2 && call.Ellipsis.IsValid() && x.assignableTo(check.conf, NewSlice(universeByte)) {
+		if nargs == 2 && call.Ellipsis.IsValid() && x.assignableTo(check.conf, NewSlice(universeByte), nil) {
 			arg(x, 1)
 			if x.mode == invalid {
 				return
@@ -341,7 +341,7 @@
 			return
 		}
 
-		if !x.assignableTo(check.conf, m.key) {
+		if !x.assignableTo(check.conf, m.key, nil) {
 			check.invalidArg(x.pos(), "%s is not assignable to %s", x, m.key)
 			return
 		}
@@ -471,8 +471,8 @@
 	case _Panic:
 		// panic(x)
 		T := new(Interface)
-		if !check.assignment(x, T) {
-			assert(x.mode == invalid)
+		check.assignment(x, T, "argument to panic")
+		if x.mode == invalid {
 			return
 		}
 
@@ -491,8 +491,9 @@
 				if i > 0 {
 					arg(x, i) // first argument already evaluated
 				}
-				if !check.assignment(x, nil) {
-					assert(x.mode == invalid)
+				check.assignment(x, nil, "argument to "+predeclaredFuncs[id].name)
+				if x.mode == invalid {
+					// TODO(gri) "use" all arguments?
 					return
 				}
 				params[i] = x.typ
@@ -514,8 +515,8 @@
 
 	case _Alignof:
 		// unsafe.Alignof(x T) uintptr
-		if !check.assignment(x, nil) {
-			assert(x.mode == invalid)
+		check.assignment(x, nil, "argument to unsafe.Alignof")
+		if x.mode == invalid {
 			return
 		}
 
@@ -571,8 +572,8 @@
 
 	case _Sizeof:
 		// unsafe.Sizeof(x T) uintptr
-		if !check.assignment(x, nil) {
-			assert(x.mode == invalid)
+		check.assignment(x, nil, "argument to unsafe.Sizeof")
+		if x.mode == invalid {
 			return
 		}
 
diff --git a/src/go/types/call.go b/src/go/types/call.go
index 62cefc0..8aeb862 100644
--- a/src/go/types/call.go
+++ b/src/go/types/call.go
@@ -61,7 +61,7 @@
 			return statement
 		}
 
-		arg, n, _ := unpack(func(x *operand, i int) { check.expr(x, e.Args[i]) }, len(e.Args), false)
+		arg, n, _ := unpack(func(x *operand, i int) { check.multiExpr(x, e.Args[i]) }, len(e.Args), false)
 		if arg == nil {
 			x.mode = invalid
 			x.expr = e
@@ -181,14 +181,14 @@
 func (check *Checker) arguments(x *operand, call *ast.CallExpr, sig *Signature, arg getter, n int) {
 	if call.Ellipsis.IsValid() {
 		// last argument is of the form x...
-		if len(call.Args) == 1 && n > 1 {
-			// f()... is not permitted if f() is multi-valued
-			check.errorf(call.Ellipsis, "cannot use ... with %d-valued expression %s", n, call.Args[0])
+		if !sig.variadic {
+			check.errorf(call.Ellipsis, "cannot use ... in call to non-variadic %s", call.Fun)
 			check.useGetter(arg, n)
 			return
 		}
-		if !sig.variadic {
-			check.errorf(call.Ellipsis, "cannot use ... in call to non-variadic %s", call.Fun)
+		if len(call.Args) == 1 && n > 1 {
+			// f()... is not permitted if f() is multi-valued
+			check.errorf(call.Ellipsis, "cannot use ... with %d-valued %s", n, call.Args[0])
 			check.useGetter(arg, n)
 			return
 		}
@@ -202,7 +202,7 @@
 			if i == n-1 && call.Ellipsis.IsValid() {
 				ellipsis = call.Ellipsis
 			}
-			check.argument(sig, i, x, ellipsis)
+			check.argument(call.Fun, sig, i, x, ellipsis)
 		}
 	}
 
@@ -220,7 +220,12 @@
 
 // argument checks passing of argument x to the i'th parameter of the given signature.
 // If ellipsis is valid, the argument is followed by ... at that position in the call.
-func (check *Checker) argument(sig *Signature, i int, x *operand, ellipsis token.Pos) {
+func (check *Checker) argument(fun ast.Expr, sig *Signature, i int, x *operand, ellipsis token.Pos) {
+	check.singleValue(x)
+	if x.mode == invalid {
+		return
+	}
+
 	n := sig.params.Len()
 
 	// determine parameter type
@@ -241,18 +246,12 @@
 	}
 
 	if ellipsis.IsValid() {
-		// argument is of the form x...
+		// argument is of the form x... and x is single-valued
 		if i != n-1 {
 			check.errorf(ellipsis, "can only use ... with matching parameter")
 			return
 		}
-		switch t := x.typ.Underlying().(type) {
-		case *Slice:
-			// ok
-		case *Tuple:
-			check.errorf(ellipsis, "cannot use ... with %d-valued expression %s", t.Len(), x)
-			return
-		default:
+		if _, ok := x.typ.Underlying().(*Slice); !ok {
 			check.errorf(x.pos(), "cannot use %s as parameter of type %s", x, typ)
 			return
 		}
@@ -261,9 +260,7 @@
 		typ = typ.(*Slice).elem
 	}
 
-	if !check.assignment(x, typ) && x.mode != invalid {
-		check.errorf(x.pos(), "cannot pass argument %s to parameter of type %s", x, typ)
-	}
+	check.assignment(x, typ, check.sprintf("argument to %s", fun))
 }
 
 func (check *Checker) selector(x *operand, e *ast.SelectorExpr) {
diff --git a/src/go/types/check_test.go b/src/go/types/check_test.go
index 5e34c65..5e2043b 100644
--- a/src/go/types/check_test.go
+++ b/src/go/types/check_test.go
@@ -55,6 +55,7 @@
 	{"testdata/errors.src"},
 	{"testdata/importdecl0a.src", "testdata/importdecl0b.src"},
 	{"testdata/importdecl1a.src", "testdata/importdecl1b.src"},
+	{"testdata/importC.src"}, // special handling in checkFiles
 	{"testdata/cycles.src"},
 	{"testdata/cycles1.src"},
 	{"testdata/cycles2.src"},
@@ -245,6 +246,10 @@
 
 	// typecheck and collect typechecker errors
 	var conf Config
+	// special case for importC.src
+	if len(testfiles) == 1 && testfiles[0] == "testdata/importC.src" {
+		conf.FakeImportC = true
+	}
 	conf.Importer = importer.Default()
 	conf.Error = func(err error) {
 		if *listErrors {
diff --git a/src/go/types/conversions.go b/src/go/types/conversions.go
index 74826ce..33e8930 100644
--- a/src/go/types/conversions.go
+++ b/src/go/types/conversions.go
@@ -65,7 +65,7 @@
 
 func (x *operand) convertibleTo(conf *Config, T Type) bool {
 	// "x is assignable to T"
-	if x.assignableTo(conf, T) {
+	if x.assignableTo(conf, T, nil) {
 		return true
 	}
 
diff --git a/src/go/types/decl.go b/src/go/types/decl.go
index 8e9e5f3..f064f68 100644
--- a/src/go/types/decl.go
+++ b/src/go/types/decl.go
@@ -156,7 +156,7 @@
 		assert(lhs == nil || lhs[0] == obj)
 		var x operand
 		check.expr(&x, init)
-		check.initVar(obj, &x, false)
+		check.initVar(obj, &x, "variable declaration")
 		return
 	}
 
diff --git a/src/go/types/expr.go b/src/go/types/expr.go
index e26607b..387a32f 100644
--- a/src/go/types/expr.go
+++ b/src/go/types/expr.go
@@ -570,7 +570,7 @@
 	// spec: "In any comparison, the first operand must be assignable
 	// to the type of the second operand, or vice versa."
 	err := ""
-	if x.assignableTo(check.conf, y.typ) || y.assignableTo(check.conf, x.typ) {
+	if x.assignableTo(check.conf, y.typ, nil) || y.assignableTo(check.conf, x.typ, nil) {
 		defined := false
 		switch op {
 		case token.EQL, token.NEQ:
@@ -618,7 +618,7 @@
 	x.typ = Typ[UntypedBool]
 }
 
-func (check *Checker) shift(x, y *operand, op token.Token) {
+func (check *Checker) shift(x, y *operand, e *ast.BinaryExpr, op token.Token) {
 	untypedx := isUntyped(x.typ)
 
 	// The lhs must be of integer type or be representable
@@ -671,6 +671,14 @@
 				x.typ = Typ[UntypedInt]
 			}
 			x.val = constant.Shift(x.val, op, uint(s))
+			// Typed constants must be representable in
+			// their type after each constant operation.
+			if isTyped(x.typ) {
+				if e != nil {
+					x.expr = e // for better error message
+				}
+				check.representable(x, x.typ.Underlying().(*Basic))
+			}
 			return
 		}
 
@@ -753,7 +761,7 @@
 	}
 
 	if isShift(op) {
-		check.shift(x, &y, op)
+		check.shift(x, &y, e, op)
 		return
 	}
 
@@ -898,9 +906,7 @@
 		// check element against composite literal element type
 		var x operand
 		check.exprWithHint(&x, eval, typ)
-		if !check.assignment(&x, typ) && x.mode != invalid {
-			check.errorf(x.pos(), "cannot use %s as %s value in array or slice literal", &x, typ)
-		}
+		check.assignment(&x, typ, "array or slice literal")
 	}
 	return max
 }
@@ -1062,12 +1068,7 @@
 					visited[i] = true
 					check.expr(x, kv.Value)
 					etyp := fld.typ
-					if !check.assignment(x, etyp) {
-						if x.mode != invalid {
-							check.errorf(x.pos(), "cannot use %s as %s value in struct literal", x, etyp)
-						}
-						continue
-					}
+					check.assignment(x, etyp, "struct literal")
 				}
 			} else {
 				// no element must have a key
@@ -1088,12 +1089,7 @@
 						continue
 					}
 					etyp := fld.typ
-					if !check.assignment(x, etyp) {
-						if x.mode != invalid {
-							check.errorf(x.pos(), "cannot use %s as %s value in struct literal", x, etyp)
-						}
-						continue
-					}
+					check.assignment(x, etyp, "struct literal")
 				}
 				if len(e.Elts) < len(fields) {
 					check.error(e.Rbrace, "too few values in struct literal")
@@ -1120,10 +1116,8 @@
 					continue
 				}
 				check.exprWithHint(x, kv.Key, utyp.key)
-				if !check.assignment(x, utyp.key) {
-					if x.mode != invalid {
-						check.errorf(x.pos(), "cannot use %s as %s key in map literal", x, utyp.key)
-					}
+				check.assignment(x, utyp.key, "map literal")
+				if x.mode == invalid {
 					continue
 				}
 				if x.mode == constant_ {
@@ -1147,12 +1141,7 @@
 					}
 				}
 				check.exprWithHint(x, kv.Value, utyp.elem)
-				if !check.assignment(x, utyp.elem) {
-					if x.mode != invalid {
-						check.errorf(x.pos(), "cannot use %s as %s value in map literal", x, utyp.elem)
-					}
-					continue
-				}
+				check.assignment(x, utyp.elem, "map literal")
 			}
 
 		default:
@@ -1220,10 +1209,8 @@
 		case *Map:
 			var key operand
 			check.expr(&key, e.Index)
-			if !check.assignment(&key, typ.key) {
-				if key.mode != invalid {
-					check.invalidOp(key.pos(), "cannot use %s as map index of type %s", &key, typ.key)
-				}
+			check.assignment(&key, typ.key, "map index")
+			if x.mode == invalid {
 				goto Error
 			}
 			x.mode = mapindex
@@ -1453,23 +1440,41 @@
 	check.errorf(pos, "%s cannot have dynamic type %s (%s %s)", x, T, msg, method.name)
 }
 
+func (check *Checker) singleValue(x *operand) {
+	if x.mode == value {
+		// tuple types are never named - no need for underlying type below
+		if t, ok := x.typ.(*Tuple); ok {
+			assert(t.Len() != 1)
+			check.errorf(x.pos(), "%d-valued %s where single value is expected", t.Len(), x)
+			x.mode = invalid
+		}
+	}
+}
+
 // expr typechecks expression e and initializes x with the expression value.
+// The result must be a single value.
 // If an error occurred, x.mode is set to invalid.
 //
 func (check *Checker) expr(x *operand, e ast.Expr) {
+	check.multiExpr(x, e)
+	check.singleValue(x)
+}
+
+// multiExpr is like expr but the result may be a multi-value.
+func (check *Checker) multiExpr(x *operand, e ast.Expr) {
 	check.rawExpr(x, e, nil)
 	var msg string
 	switch x.mode {
 	default:
 		return
 	case novalue:
-		msg = "used as value"
+		msg = "%s used as value"
 	case builtin:
-		msg = "must be called"
+		msg = "%s must be called"
 	case typexpr:
-		msg = "is not an expression"
+		msg = "%s is not an expression"
 	}
-	check.errorf(x.pos(), "%s %s", x, msg)
+	check.errorf(x.pos(), msg, x)
 	x.mode = invalid
 }
 
@@ -1480,18 +1485,19 @@
 func (check *Checker) exprWithHint(x *operand, e ast.Expr, hint Type) {
 	assert(hint != nil)
 	check.rawExpr(x, e, hint)
+	check.singleValue(x)
 	var msg string
 	switch x.mode {
 	default:
 		return
 	case novalue:
-		msg = "used as value"
+		msg = "%s used as value"
 	case builtin:
-		msg = "must be called"
+		msg = "%s must be called"
 	case typexpr:
-		msg = "is not an expression"
+		msg = "%s is not an expression"
 	}
-	check.errorf(x.pos(), "%s %s", x, msg)
+	check.errorf(x.pos(), msg, x)
 	x.mode = invalid
 }
 
@@ -1500,6 +1506,7 @@
 //
 func (check *Checker) exprOrType(x *operand, e ast.Expr) {
 	check.rawExpr(x, e, nil)
+	check.singleValue(x)
 	if x.mode == novalue {
 		check.errorf(x.pos(), "%s used as value or type", x)
 		x.mode = invalid
diff --git a/src/go/types/gotype.go b/src/go/types/gotype.go
new file mode 100644
index 0000000..0a36c08
--- /dev/null
+++ b/src/go/types/gotype.go
@@ -0,0 +1,322 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Build this command explicitly: go build gotype.go
+
+/*
+The gotype command does syntactic and semantic analysis of Go files
+and packages like the front-end of a Go compiler. Errors are reported
+if the analysis fails; otherwise gotype is quiet (unless -v is set).
+
+Without a list of paths, gotype reads from standard input, which
+must provide a single Go source file defining a complete package.
+
+If a single path is specified that is a directory, gotype checks
+the Go files in that directory; they must all belong to the same
+package.
+
+Otherwise, each path must be the filename of Go file belonging to
+the same package.
+
+Usage:
+	gotype [flags] [path...]
+
+The flags are:
+	-a
+		use all (incl. _test.go) files when processing a directory
+	-e
+		report all errors (not just the first 10)
+	-v
+		verbose mode
+	-c
+		compiler used to compile packages (gc or gccgo); default: gc
+		(gotype based on Go1.5 and up only)
+	-gccgo
+		use gccimporter instead of gcimporter
+		(gotype based on Go1.4 and before only)
+
+Debugging flags:
+	-seq
+		parse sequentially, rather than in parallel
+	-ast
+		print AST (forces -seq)
+	-trace
+		print parse trace (forces -seq)
+	-comments
+		parse comments (ignored unless -ast or -trace is provided)
+
+Examples:
+
+To check the files a.go, b.go, and c.go:
+
+	gotype a.go b.go c.go
+
+To check an entire package in the directory dir and print the processed files:
+
+	gotype -v dir
+
+To check an entire package including tests in the local directory:
+
+	gotype -a .
+
+To verify the output of a pipe:
+
+	echo "package foo" | gotype
+
+*/
+package main
+
+import (
+	"flag"
+	"fmt"
+	"go/ast"
+	"go/build"
+	"go/importer"
+	"go/parser"
+	"go/scanner"
+	"go/token"
+	"go/types"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+var (
+	// main operation modes
+	allFiles  = flag.Bool("a", false, "use all (incl. _test.go) files when processing a directory")
+	allErrors = flag.Bool("e", false, "report all errors (not just the first 10)")
+	verbose   = flag.Bool("v", false, "verbose mode")
+	gccgo     = flag.Bool("gccgo", false, "use gccgoimporter instead of gcimporter")
+
+	// debugging support
+	sequential    = flag.Bool("seq", false, "parse sequentially, rather than in parallel")
+	printAST      = flag.Bool("ast", false, "print AST (forces -seq)")
+	printTrace    = flag.Bool("trace", false, "print parse trace (forces -seq)")
+	parseComments = flag.Bool("comments", false, "parse comments (ignored unless -ast or -trace is provided)")
+)
+
+var (
+	fset       = token.NewFileSet()
+	errorCount = 0
+	parserMode parser.Mode
+	sizes      types.Sizes
+)
+
+func initParserMode() {
+	if *allErrors {
+		parserMode |= parser.AllErrors
+	}
+	if *printTrace {
+		parserMode |= parser.Trace
+	}
+	if *parseComments && (*printAST || *printTrace) {
+		parserMode |= parser.ParseComments
+	}
+}
+
+func initSizes() {
+	wordSize := 8
+	maxAlign := 8
+	switch build.Default.GOARCH {
+	case "386", "arm":
+		wordSize = 4
+		maxAlign = 4
+		// add more cases as needed
+	}
+	sizes = &types.StdSizes{WordSize: int64(wordSize), MaxAlign: int64(maxAlign)}
+}
+
+func usage() {
+	fmt.Fprintln(os.Stderr, "usage: gotype [flags] [path ...]")
+	flag.PrintDefaults()
+	os.Exit(2)
+}
+
+func report(err error) {
+	scanner.PrintError(os.Stderr, err)
+	if list, ok := err.(scanner.ErrorList); ok {
+		errorCount += len(list)
+		return
+	}
+	errorCount++
+}
+
+// parse may be called concurrently
+func parse(filename string, src interface{}) (*ast.File, error) {
+	if *verbose {
+		fmt.Println(filename)
+	}
+	file, err := parser.ParseFile(fset, filename, src, parserMode) // ok to access fset concurrently
+	if *printAST {
+		ast.Print(fset, file)
+	}
+	return file, err
+}
+
+func parseStdin() (*ast.File, error) {
+	src, err := ioutil.ReadAll(os.Stdin)
+	if err != nil {
+		return nil, err
+	}
+	return parse("<standard input>", src)
+}
+
+func parseFiles(filenames []string) ([]*ast.File, error) {
+	files := make([]*ast.File, len(filenames))
+
+	if *sequential {
+		for i, filename := range filenames {
+			var err error
+			files[i], err = parse(filename, nil)
+			if err != nil {
+				return nil, err // leave unfinished goroutines hanging
+			}
+		}
+	} else {
+		type parseResult struct {
+			file *ast.File
+			err  error
+		}
+
+		out := make(chan parseResult)
+		for _, filename := range filenames {
+			go func(filename string) {
+				file, err := parse(filename, nil)
+				out <- parseResult{file, err}
+			}(filename)
+		}
+
+		for i := range filenames {
+			res := <-out
+			if res.err != nil {
+				return nil, res.err // leave unfinished goroutines hanging
+			}
+			files[i] = res.file
+		}
+	}
+
+	return files, nil
+}
+
+func parseDir(dirname string) ([]*ast.File, error) {
+	ctxt := build.Default
+	pkginfo, err := ctxt.ImportDir(dirname, 0)
+	if _, nogo := err.(*build.NoGoError); err != nil && !nogo {
+		return nil, err
+	}
+	filenames := append(pkginfo.GoFiles, pkginfo.CgoFiles...)
+	if *allFiles {
+		filenames = append(filenames, pkginfo.TestGoFiles...)
+	}
+
+	// complete file names
+	for i, filename := range filenames {
+		filenames[i] = filepath.Join(dirname, filename)
+	}
+
+	return parseFiles(filenames)
+}
+
+func getPkgFiles(args []string) ([]*ast.File, error) {
+	if len(args) == 0 {
+		// stdin
+		file, err := parseStdin()
+		if err != nil {
+			return nil, err
+		}
+		return []*ast.File{file}, nil
+	}
+
+	if len(args) == 1 {
+		// possibly a directory
+		path := args[0]
+		info, err := os.Stat(path)
+		if err != nil {
+			return nil, err
+		}
+		if info.IsDir() {
+			return parseDir(path)
+		}
+	}
+
+	// list of files
+	return parseFiles(args)
+}
+
+func checkPkgFiles(files []*ast.File) {
+	compiler := "gc"
+	if *gccgo {
+		compiler = "gccgo"
+	}
+	type bailout struct{}
+	conf := types.Config{
+		FakeImportC: true,
+		Error: func(err error) {
+			if !*allErrors && errorCount >= 10 {
+				panic(bailout{})
+			}
+			report(err)
+		},
+		Importer: importer.For(compiler, nil),
+		Sizes:    sizes,
+	}
+
+	defer func() {
+		switch p := recover().(type) {
+		case nil, bailout:
+			// normal return or early exit
+		default:
+			// re-panic
+			panic(p)
+		}
+	}()
+
+	const path = "pkg" // any non-empty string will do for now
+	conf.Check(path, fset, files, nil)
+}
+
+func printStats(d time.Duration) {
+	fileCount := 0
+	lineCount := 0
+	fset.Iterate(func(f *token.File) bool {
+		fileCount++
+		lineCount += f.LineCount()
+		return true
+	})
+
+	fmt.Printf(
+		"%s (%d files, %d lines, %d lines/s)\n",
+		d, fileCount, lineCount, int64(float64(lineCount)/d.Seconds()),
+	)
+}
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	if *printAST || *printTrace {
+		*sequential = true
+	}
+	initParserMode()
+	initSizes()
+
+	start := time.Now()
+
+	files, err := getPkgFiles(flag.Args())
+	if err != nil {
+		report(err)
+		os.Exit(2)
+	}
+
+	checkPkgFiles(files)
+	if errorCount > 0 {
+		os.Exit(2)
+	}
+
+	if *verbose {
+		printStats(time.Since(start))
+	}
+}
diff --git a/src/go/types/operand.go b/src/go/types/operand.go
index d3bab51..09eac835 100644
--- a/src/go/types/operand.go
+++ b/src/go/types/operand.go
@@ -202,7 +202,9 @@
 //           overlapping in functionality. Need to simplify and clean up.
 
 // assignableTo reports whether x is assignable to a variable of type T.
-func (x *operand) assignableTo(conf *Config, T Type) bool {
+// If the result is false and a non-nil reason is provided, it may be set
+// to a more detailed explanation of the failure (result != "").
+func (x *operand) assignableTo(conf *Config, T Type, reason *string) bool {
 	if x.mode == invalid || T == Typ[Invalid] {
 		return true // avoid spurious errors
 	}
@@ -217,49 +219,15 @@
 	Vu := V.Underlying()
 	Tu := T.Underlying()
 
-	// T is an interface type and x implements T
-	// (Do this check first as it might succeed early.)
-	if Ti, ok := Tu.(*Interface); ok {
-		if Implements(x.typ, Ti) {
-			return true
-		}
-	}
-
-	// x's type V and T have identical underlying types
-	// and at least one of V or T is not a named type
-	if Identical(Vu, Tu) && (!isNamed(V) || !isNamed(T)) {
-		return true
-	}
-
-	// x is a bidirectional channel value, T is a channel
-	// type, x's type V and T have identical element types,
-	// and at least one of V or T is not a named type
-	if Vc, ok := Vu.(*Chan); ok && Vc.dir == SendRecv {
-		if Tc, ok := Tu.(*Chan); ok && Identical(Vc.elem, Tc.elem) {
-			return !isNamed(V) || !isNamed(T)
-		}
-	}
-
-	// x is the predeclared identifier nil and T is a pointer,
-	// function, slice, map, channel, or interface type
-	if x.isNil() {
-		switch t := Tu.(type) {
-		case *Basic:
-			if t.kind == UnsafePointer {
-				return true
-			}
-		case *Pointer, *Signature, *Slice, *Map, *Chan, *Interface:
-			return true
-		}
-		return false
-	}
-
-	// x is an untyped constant representable by a value of type T
+	// x is an untyped value representable by a value of type T
 	// TODO(gri) This is borrowing from checker.convertUntyped and
 	//           checker.representable. Need to clean up.
 	if isUntyped(Vu) {
 		switch t := Tu.(type) {
 		case *Basic:
+			if x.isNil() && t.kind == UnsafePointer {
+				return true
+			}
 			if x.mode == constant_ {
 				return representableConst(x.val, conf, t.kind, nil)
 			}
@@ -274,6 +242,37 @@
 			return x.isNil()
 		}
 	}
+	// Vu is typed
+
+	// x's type V and T have identical underlying types
+	// and at least one of V or T is not a named type
+	if Identical(Vu, Tu) && (!isNamed(V) || !isNamed(T)) {
+		return true
+	}
+
+	// T is an interface type and x implements T
+	if Ti, ok := Tu.(*Interface); ok {
+		if m, wrongType := MissingMethod(x.typ, Ti, true); m != nil /* Implements(x.typ, Ti) */ {
+			if reason != nil {
+				if wrongType {
+					*reason = "wrong type for method " + m.Name()
+				} else {
+					*reason = "missing method " + m.Name()
+				}
+			}
+			return false
+		}
+		return true
+	}
+
+	// x is a bidirectional channel value, T is a channel
+	// type, x's type V and T have identical element types,
+	// and at least one of V or T is not a named type
+	if Vc, ok := Vu.(*Chan); ok && Vc.dir == SendRecv {
+		if Tc, ok := Tu.(*Chan); ok && Identical(Vc.elem, Tc.elem) {
+			return !isNamed(V) || !isNamed(T)
+		}
+	}
 
 	return false
 }
diff --git a/src/go/types/resolver.go b/src/go/types/resolver.go
index c31ef42..b52c3b2 100644
--- a/src/go/types/resolver.go
+++ b/src/go/types/resolver.go
@@ -202,6 +202,11 @@
 						name := imp.name
 						if s.Name != nil {
 							name = s.Name.Name
+							if path == "C" {
+								// match cmd/compile (not prescribed by spec)
+								check.errorf(s.Name.Pos(), `cannot rename import "C"`)
+								continue
+							}
 							if name == "init" {
 								check.errorf(s.Name.Pos(), "cannot declare init - must be func")
 								continue
@@ -216,6 +221,11 @@
 							check.recordImplicit(s, obj)
 						}
 
+						if path == "C" {
+							// match cmd/compile (not prescribed by spec)
+							obj.used = true
+						}
+
 						// add import to file scope
 						if name == "." {
 							// merge imported scope with file scope
diff --git a/src/go/types/stdlib_test.go b/src/go/types/stdlib_test.go
index c6c946e..8fc2ee1 100644
--- a/src/go/types/stdlib_test.go
+++ b/src/go/types/stdlib_test.go
@@ -150,6 +150,7 @@
 		"issue7746.go",   // large constants - consumes too much memory
 		"issue11326.go",  // large constants
 		"issue11326b.go", // large constants
+		"issue11362.go",  // canonical import path check
 	)
 }
 
diff --git a/src/go/types/stmt.go b/src/go/types/stmt.go
index 062b767..973af42 100644
--- a/src/go/types/stmt.go
+++ b/src/go/types/stmt.go
@@ -321,12 +321,20 @@
 		if ch.mode == invalid || x.mode == invalid {
 			return
 		}
-		if tch, ok := ch.typ.Underlying().(*Chan); !ok || tch.dir == RecvOnly || !check.assignment(&x, tch.elem) {
-			if x.mode != invalid {
-				check.invalidOp(ch.pos(), "cannot send %s to channel %s", &x, &ch)
-			}
+
+		tch, ok := ch.typ.Underlying().(*Chan)
+		if !ok {
+			check.invalidOp(s.Arrow, "cannot send to non-chan type %s", ch.typ)
+			return
 		}
 
+		if tch.dir == RecvOnly {
+			check.invalidOp(s.Arrow, "cannot send to receive-only type %s", tch)
+			return
+		}
+
+		check.assignment(&x, tch.elem, "send")
+
 	case *ast.IncDecStmt:
 		var op token.Token
 		switch s.Tok {
@@ -464,7 +472,7 @@
 			check.expr(&x, s.Tag)
 			// By checking assignment of x to an invisible temporary
 			// (as a compiler would), we get all the relevant checks.
-			check.assignment(&x, nil)
+			check.assignment(&x, nil, "switch expression")
 		} else {
 			// spec: "A missing switch expression is
 			// equivalent to the boolean value true."
@@ -766,7 +774,7 @@
 					x.mode = value
 					x.expr = lhs // we don't have a better rhs expression to use here
 					x.typ = typ
-					check.initVar(obj, &x, false)
+					check.initVar(obj, &x, "range clause")
 				} else {
 					obj.typ = Typ[Invalid]
 					obj.used = true // don't complain about unused variable
diff --git a/src/go/types/testdata/builtins.src b/src/go/types/testdata/builtins.src
index 1931d56..7fb7b58 100644
--- a/src/go/types/testdata/builtins.src
+++ b/src/go/types/testdata/builtins.src
@@ -22,12 +22,12 @@
 	append /* ERROR not used */ (s)
 
 	_ = append(s, b)
-	_ = append(s, x /* ERROR cannot pass argument x */ )
-	_ = append(s, s /* ERROR cannot pass argument s */ )
+	_ = append(s, x /* ERROR cannot use x */ )
+	_ = append(s, s /* ERROR cannot use s */ )
 	_ = append(s... /* ERROR can only use ... with matching parameter */ )
 	_ = append(s, b, s... /* ERROR can only use ... with matching parameter */ )
 	_ = append(s, 1, 2, 3)
-	_ = append(s, 1, 2, 3, x /* ERROR cannot pass argument x */ , 5, 6, 6)
+	_ = append(s, 1, 2, 3, x /* ERROR cannot use x */ , 5, 6, 6)
 	_ = append(s, 1, 2, s... /* ERROR can only use ... with matching parameter */ )
 	_ = append([]interface{}(nil), 1, 2, "foo", x, 3.1425, false)
 
@@ -38,13 +38,13 @@
 	_ = append(s, "foo"...)
 	_ = append(S(s), "foo" /* ERROR cannot convert */ )
 	_ = append(S(s), "foo"...)
-	_ = append(s, t /* ERROR cannot pass argument t */ )
+	_ = append(s, t /* ERROR cannot use t */ )
 	_ = append(s, t...)
 	_ = append(s, T("foo")...)
-	_ = append(S(s), t /* ERROR cannot pass argument t */ )
+	_ = append(S(s), t /* ERROR cannot use t */ )
 	_ = append(S(s), t...)
 	_ = append(S(s), T("foo")...)
-	_ = append([]string{}, t /* ERROR cannot pass argument t */ , "foo")
+	_ = append([]string{}, t /* ERROR cannot use t */ , "foo")
 	_ = append([]T{}, t, "foo")
 }
 
@@ -192,9 +192,9 @@
 	complex /* ERROR not used */ (1, 2)
 
 	var _ complex64 = complex(f32, f32)
-	var _ complex64 = complex /* ERROR cannot initialize */ (f64, f64)
+	var _ complex64 = complex /* ERROR cannot use .* in variable declaration */ (f64, f64)
 
-	var _ complex128 = complex /* ERROR cannot initialize */ (f32, f32)
+	var _ complex128 = complex /* ERROR cannot use .* in variable declaration */ (f32, f32)
 	var _ complex128 = complex(f64, f64)
 
 	// untyped constants
@@ -213,7 +213,7 @@
 	var s uint
 	_ = complex(1 /* ERROR integer */ <<s, 0)
 	const _ = complex /* ERROR not constant */ (1 /* ERROR integer */ <<s, 0)
-	var _ int = complex /* ERROR cannot initialize */ (1 /* ERROR integer */ <<s, 0)
+	var _ int = complex /* ERROR cannot use .* in variable declaration */ (1 /* ERROR integer */ <<s, 0)
 
 	// floating-point argument types must be identical
 	type F32 float32
@@ -319,8 +319,8 @@
 	assert(_6 == 0)
 	f32 = imag(c64)
 	f64 = imag(c128)
-	f32 = imag /* ERROR cannot assign */ (c128)
-	f64 = imag /* ERROR cannot assign */ (c64)
+	f32 = imag /* ERROR cannot use .* in assignment */ (c128)
+	f64 = imag /* ERROR cannot use .* in assignment */ (c64)
 	imag /* ERROR not used */ (c64)
 	_, _ = f32, f64
 
@@ -599,8 +599,8 @@
 	assert(_6 == 0)
 	f32 = real(c64)
 	f64 = real(c128)
-	f32 = real /* ERROR cannot assign */ (c128)
-	f64 = real /* ERROR cannot assign */ (c64)
+	f32 = real /* ERROR cannot use .* in assignment */ (c128)
+	f64 = real /* ERROR cannot use .* in assignment */ (c64)
 	real /* ERROR not used */ (c64)
 
 	// complex type may not be predeclared
diff --git a/src/go/types/testdata/const1.src b/src/go/types/testdata/const1.src
index 88e9fad..d827704 100644
--- a/src/go/types/testdata/const1.src
+++ b/src/go/types/testdata/const1.src
@@ -312,3 +312,11 @@
 	y64 = float64(f64)
 	_ = assert(x64 - y64 == 0)
 )
+
+const (
+	_ = int8(-1) << 7
+	_ = int8 /* ERROR "overflows" */ (-1) << 8
+
+	_ = uint32(1) << 31
+	_ = uint32 /* ERROR "overflows" */ (1) << 32
+)
diff --git a/src/go/types/testdata/decls1.src b/src/go/types/testdata/decls1.src
index 7855e46..cb162f7 100644
--- a/src/go/types/testdata/decls1.src
+++ b/src/go/types/testdata/decls1.src
@@ -25,7 +25,7 @@
 	s, t string
 	array []byte
 	iface interface{}
-	
+
 	blank _ /* ERROR "cannot use _" */
 )
 
@@ -43,33 +43,33 @@
 	s11 = &v
 	s12 = -(u + *t11) / *&v
 	s13 = a /* ERROR "shifted operand" */ << d
-	s14 = i << j /* ERROR "must be unsigned" */ 
+	s14 = i << j /* ERROR "must be unsigned" */
 	s18 = math.Pi * 10.0
 	s19 = s1 /* ERROR "cannot call" */ ()
  	s20 = f0 /* ERROR "no value" */ ()
 	s21 = f6(1, s1, i)
-	s22 = f6(1, s1, uu /* ERROR "cannot pass argument" */ )
-	
+	s22 = f6(1, s1, uu /* ERROR "cannot use .* in argument" */ )
+
 	t1 int = i + j
 	t2 int = i /* ERROR "mismatched types" */ + x
-	t3 int = c /* ERROR "cannot initialize" */ + d
+	t3 int = c /* ERROR "cannot use .* variable declaration" */ + d
 	t4 string = s + t
 	t5 string = s /* ERROR "invalid operation" */ / t
 	t6 byte = array[t1]
 	t7 byte = array[x /* ERROR "must be integer" */]
-	t8 *int = & /* ERROR "cannot initialize" */ a
+	t8 *int = & /* ERROR "cannot use .* variable declaration" */ a
 	t10 *int = &42 /* ERROR "cannot take address" */
 	t11 *complex64 = &v
 	t12 complex64 = -(u + *t11) / *&v
 	t13 int = a /* ERROR "shifted operand" */ << d
-	t14 int = i << j /* ERROR "must be unsigned" */ 
+	t14 int = i << j /* ERROR "must be unsigned" */
 	t15 math /* ERROR "not in selector" */
 	t16 math /* ERROR "not declared" */ .xxx
 	t17 math /* ERROR "not a type" */ .Pi
 	t18 float64 = math.Pi * 10.0
 	t19 int = t1 /* ERROR "cannot call" */ ()
 	t20 int = f0 /* ERROR "no value" */ ()
-	t21 int = a /* ERROR "cannot initialize" */
+	t21 int = a /* ERROR "cannot use .* variable declaration" */
 )
 
 // Various more complex expressions
diff --git a/src/go/types/testdata/expr0.src b/src/go/types/testdata/expr0.src
index 3120c6f..1aac726 100644
--- a/src/go/types/testdata/expr0.src
+++ b/src/go/types/testdata/expr0.src
@@ -144,7 +144,7 @@
 	ch10, ok = <-ch
 	// ok is of type bool
 	ch11, myok = <-ch
-	_ mybool = myok /* ERROR "cannot initialize" */
+	_ mybool = myok /* ERROR "cannot use .* in variable declaration" */
 )
 
 // address of composite literals
@@ -172,3 +172,9 @@
 	p3 P = &p2
 )
 
+func g() (a, b int) { return }
+
+func _() {
+	_ = -g /* ERROR 2-valued g */ ()
+	_ = <-g /* ERROR 2-valued g */ ()
+}
diff --git a/src/go/types/testdata/expr1.src b/src/go/types/testdata/expr1.src
index 8ef0aed..eaaf610 100644
--- a/src/go/types/testdata/expr1.src
+++ b/src/go/types/testdata/expr1.src
@@ -5,3 +5,123 @@
 // binary expressions
 
 package expr1
+
+type mybool bool
+
+func _(x, y bool, z mybool) {
+	x = x || y
+	x = x || true
+	x = x || false
+	x = x && y
+	x = x && true
+	x = x && false
+
+	z = z /* ERROR mismatched types */ || y
+	z = z || true
+	z = z || false
+	z = z /* ERROR mismatched types */ && y
+	z = z && true
+	z = z && false
+}
+
+type myint int
+
+func _(x, y int, z myint) {
+	x = x + 1
+	x = x + 1.0
+	x = x + 1.1 // ERROR truncated to int
+	x = x + y
+	x = x - y
+	x = x * y
+	x = x / y
+	x = x % y
+	x = x << y // ERROR must be unsigned integer
+	x = x >> y // ERROR must be unsigned integer
+
+	z = z + 1
+	z = z + 1.0
+	z = z + 1.1 // ERROR truncated to int
+	z = z /* ERROR mismatched types */ + y
+	z = z /* ERROR mismatched types */ - y
+	z = z /* ERROR mismatched types */ * y
+	z = z /* ERROR mismatched types */ / y
+	z = z /* ERROR mismatched types */ % y
+	z = z << y // ERROR must be unsigned integer
+	z = z >> y // ERROR must be unsigned integer
+}
+
+type myuint uint
+
+func _(x, y uint, z myuint) {
+	x = x + 1
+	x = x + - /* ERROR overflows uint */ 1
+	x = x + 1.0
+	x = x + 1.1 // ERROR truncated to uint
+	x = x + y
+	x = x - y
+	x = x * y
+	x = x / y
+	x = x % y
+	x = x << y
+	x = x >> y
+
+	z = z + 1
+	z = x + - /* ERROR overflows uint */ 1
+	z = z + 1.0
+	z = z + 1.1 // ERROR truncated to uint
+	z = z /* ERROR mismatched types */ + y
+	z = z /* ERROR mismatched types */ - y
+	z = z /* ERROR mismatched types */ * y
+	z = z /* ERROR mismatched types */ / y
+	z = z /* ERROR mismatched types */ % y
+	z = z << y
+	z = z >> y
+}
+
+type myfloat64 float64
+
+func _(x, y float64, z myfloat64) {
+	x = x + 1
+	x = x + -1
+	x = x + 1.0
+	x = x + 1.1
+	x = x + y
+	x = x - y
+	x = x * y
+	x = x / y
+	x = x /* ERROR not defined */ % y
+	x = x /* ERROR operand x .* must be integer */ << y
+	x = x /* ERROR operand x .* must be integer */ >> y
+
+	z = z + 1
+	z = z + -1
+	z = z + 1.0
+	z = z + 1.1
+	z = z /* ERROR mismatched types */ + y
+	z = z /* ERROR mismatched types */ - y
+	z = z /* ERROR mismatched types */ * y
+	z = z /* ERROR mismatched types */ / y
+	z = z /* ERROR mismatched types */ % y
+	z = z /* ERROR operand z .* must be integer */ << y
+	z = z /* ERROR operand z .* must be integer */ >> y
+}
+
+type mystring string
+
+func _(x, y string, z mystring) {
+	x = x + "foo"
+	x = x /* ERROR not defined */ - "foo"
+	x = x + 1 // ERROR cannot convert
+	x = x + y
+	x = x /* ERROR not defined */ - y
+	x = x * 10 // ERROR cannot convert
+}
+
+func f() (a, b int) { return }
+
+func _(x int) {
+	_ = f /* ERROR 2-valued f */ () + 1
+	_ = x + f /* ERROR 2-valued f */ ()
+	_ = f /* ERROR 2-valued f */ () + f
+	_ = f /* ERROR 2-valued f */ () + f /* ERROR 2-valued f */ ()
+}
diff --git a/src/go/types/testdata/expr3.src b/src/go/types/testdata/expr3.src
index 5772095..53c03e7 100644
--- a/src/go/types/testdata/expr3.src
+++ b/src/go/types/testdata/expr3.src
@@ -28,7 +28,7 @@
 	a0 = a[0]
 	_ = a0
 	var a1 int32
-	a1 = a /* ERROR "cannot assign" */ [1]
+	a1 = a /* ERROR "cannot use .* in assignment" */ [1]
 	_ = a1
 
 	_ = a[9]
@@ -113,7 +113,7 @@
 	t0 = t[0]
 	_ = t0
 	var t1 rune
-	t1 = t /* ERROR "cannot assign" */ [2]
+	t1 = t /* ERROR "cannot use .* in assignment" */ [2]
 	_ = t1
 	_ = ("foo" + "bar")[5]
 	_ = ("foo" + "bar")[6 /* ERROR "index .* out of bounds" */ ]
@@ -126,7 +126,7 @@
 	c0 = c[0]
 	_ = c0
 	var c2 float32
-	c2 = c /* ERROR "cannot assign" */ [2]
+	c2 = c /* ERROR "cannot use .* in assignment" */ [2]
 	_ = c[3 /* ERROR "index .* out of bounds" */ ]
 	_ = ""[0 /* ERROR "index .* out of bounds" */ ]
 	_ = c2
@@ -140,8 +140,8 @@
 	var i, j int
 	ss = "foo"[1:2]
 	ss = "foo"[i:j]
-	ms = "foo" /* ERROR "cannot assign" */ [1:2]
-	ms = "foo" /* ERROR "cannot assign" */ [i:j]
+	ms = "foo" /* ERROR "cannot use .* in assignment" */ [1:2]
+	ms = "foo" /* ERROR "cannot use .* in assignment" */ [i:j]
 	_, _ = ss, ms
 }
 
@@ -253,7 +253,7 @@
 	var a13 [3]int
 	var a14 [4]int
 	a13 = a1
-	a14 = a1 /* ERROR "cannot assign" */
+	a14 = a1 /* ERROR "cannot use .* in assignment" */
 	_, _ = a13, a14
 
 	a2 := [...]int{- /* ERROR "negative" */ 1: 0}
@@ -465,7 +465,7 @@
 	f1(10.0)
 	f1() /* ERROR "too few arguments" */
 	f1(x, y /* ERROR "too many arguments" */ )
-	f1(s /* ERROR "cannot pass" */ )
+	f1(s /* ERROR "cannot use .* in argument" */ )
 	f1(x ... /* ERROR "cannot use ..." */ )
 	f1(g0 /* ERROR "used as value" */ ())
 	f1(g1())
@@ -474,57 +474,57 @@
 	f2() /* ERROR "too few arguments" */
 	f2(3.14) /* ERROR "too few arguments" */
 	f2(3.14, "foo")
-	f2(x /* ERROR "cannot pass" */ , "foo")
+	f2(x /* ERROR "cannot use .* in argument" */ , "foo")
 	f2(g0 /* ERROR "used as value" */ ())
-	f2(g1 /* ERROR "cannot pass" */ ()) /* ERROR "too few arguments" */
+	f2(g1 /* ERROR "cannot use .* in argument" */ ()) /* ERROR "too few arguments" */
 	f2(g2())
 
 	fs() /* ERROR "too few arguments" */
 	fs(g0 /* ERROR "used as value" */ ())
-	fs(g1 /* ERROR "cannot pass" */ ())
-	fs(g2 /* ERROR "cannot pass" */ /* ERROR "too many arguments" */ ())
+	fs(g1 /* ERROR "cannot use .* in argument" */ ())
+	fs(g2 /* ERROR "cannot use .* in argument" */ /* ERROR "too many arguments" */ ())
 	fs(gs())
 
 	fv()
 	fv(1, 2.0, x)
-	fv(s /* ERROR "cannot pass" */ )
+	fv(s /* ERROR "cannot use .* in argument" */ )
 	fv(s...)
 	fv(x /* ERROR "cannot use" */ ...)
 	fv(1, s... /* ERROR "can only use ... with matching parameter" */ )
-	fv(gs /* ERROR "cannot pass" */ ())
-	fv(gs /* ERROR "cannot pass" */ ()...)
+	fv(gs /* ERROR "cannot use .* in argument" */ ())
+	fv(gs /* ERROR "cannot use .* in argument" */ ()...)
 
 	var t T
 	t.fm()
 	t.fm(1, 2.0, x)
-	t.fm(s /* ERROR "cannot pass" */ )
+	t.fm(s /* ERROR "cannot use .* in argument" */ )
 	t.fm(g1())
 	t.fm(1, s... /* ERROR "can only use ... with matching parameter" */ )
-	t.fm(gs /* ERROR "cannot pass" */ ())
-	t.fm(gs /* ERROR "cannot pass" */ ()...)
+	t.fm(gs /* ERROR "cannot use .* in argument" */ ())
+	t.fm(gs /* ERROR "cannot use .* in argument" */ ()...)
 
 	T.fm(t, )
 	T.fm(t, 1, 2.0, x)
-	T.fm(t, s /* ERROR "cannot pass" */ )
+	T.fm(t, s /* ERROR "cannot use .* in argument" */ )
 	T.fm(t, g1())
 	T.fm(t, 1, s... /* ERROR "can only use ... with matching parameter" */ )
-	T.fm(t, gs /* ERROR "cannot pass" */ ())
-	T.fm(t, gs /* ERROR "cannot pass" */ ()...)
+	T.fm(t, gs /* ERROR "cannot use .* in argument" */ ())
+	T.fm(t, gs /* ERROR "cannot use .* in argument" */ ()...)
 
 	var i interface{ fm(x ...int) } = t
 	i.fm()
 	i.fm(1, 2.0, x)
-	i.fm(s /* ERROR "cannot pass" */ )
+	i.fm(s /* ERROR "cannot use .* in argument" */ )
 	i.fm(g1())
 	i.fm(1, s... /* ERROR "can only use ... with matching parameter" */ )
-	i.fm(gs /* ERROR "cannot pass" */ ())
-	i.fm(gs /* ERROR "cannot pass" */ ()...)
+	i.fm(gs /* ERROR "cannot use .* in argument" */ ())
+	i.fm(gs /* ERROR "cannot use .* in argument" */ ()...)
 
 	fi()
 	fi(1, 2.0, x, 3.14, "foo")
 	fi(g2())
 	fi(0, g2)
-	fi(0, g2 /* ERROR "2-valued expression" */ ())
+	fi(0, g2 /* ERROR "2-valued g2" */ ())
 }
 
 func issue6344() {
diff --git a/src/go/types/testdata/importC.src b/src/go/types/testdata/importC.src
new file mode 100644
index 0000000..31436be6
--- /dev/null
+++ b/src/go/types/testdata/importC.src
@@ -0,0 +1,10 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package importC
+
+import "C"
+import _ /* ERROR cannot rename import "C" */ "C"
+import foo /* ERROR cannot rename import "C" */ "C"
+import . /* ERROR cannot rename import "C" */ "C"
diff --git a/src/go/types/testdata/issues.src b/src/go/types/testdata/issues.src
index 9e2c901..564d064 100644
--- a/src/go/types/testdata/issues.src
+++ b/src/go/types/testdata/issues.src
@@ -17,8 +17,7 @@
 
 func issue8066() {
 	const (
-		// TODO(gri) Enable test below for releases 1.4 and higher
-		// _ = float32(340282356779733661637539395458142568447)
+		_ = float32(340282356779733661637539395458142568447)
 		_ = float32(340282356779733661637539395458142568448 /* ERROR cannot convert */ )
 	)
 }
@@ -53,23 +52,23 @@
 	_ = append(f0())
 	_ = append(f0(), f0()...)
 	_ = append(f1())
-	_ = append(f2 /* ERROR cannot pass argument */ ())
+	_ = append(f2 /* ERROR cannot use .* in argument */ ())
 	_ = append(f2()... /* ERROR cannot use ... */ )
-	_ = append(f0(), f1 /* ERROR 2-valued expression */ ())
-	_ = append(f0(), f2 /* ERROR 2-valued expression */ ())
-	_ = append(f0(), f1()... /* ERROR cannot use ... */ )
-	_ = append(f0(), f2()... /* ERROR cannot use ... */ )
+	_ = append(f0(), f1 /* ERROR 2-valued f1 */ ())
+	_ = append(f0(), f2 /* ERROR 2-valued f2 */ ())
+	_ = append(f0(), f1 /* ERROR 2-valued f1 */ ()...)
+	_ = append(f0(), f2 /* ERROR 2-valued f2 */ ()...)
 
 	// variadic user-defined function
 	append_(f0())
 	append_(f0(), f0()...)
 	append_(f1())
-	append_(f2 /* ERROR cannot pass argument */ ())
+	append_(f2 /* ERROR cannot use .* in argument */ ())
 	append_(f2()... /* ERROR cannot use ... */ )
-	append_(f0(), f1 /* ERROR 2-valued expression */ ())
-	append_(f0(), f2 /* ERROR 2-valued expression */ ())
-	append_(f0(), f1()... /* ERROR cannot use */ )
-	append_(f0(), f2()... /* ERROR cannot use */ )
+	append_(f0(), f1 /* ERROR 2-valued f1 */ ())
+	append_(f0(), f2 /* ERROR 2-valued f2 */ ())
+	append_(f0(), f1 /* ERROR 2-valued f1 */ ()...)
+	append_(f0(), f2 /* ERROR 2-valued f2 */ ()...)
 }
 
 // Check that embedding a non-interface type in an interface results in a good error message.
@@ -101,3 +100,56 @@
 var a1, b1 /* ERROR cycle */ , c1 /* ERROR cycle */ b1 = 0 > 0<<""[""[c1]]>c1
 var a2, b2 /* ERROR cycle */ = 0 /* ERROR mismatch */ /* ERROR mismatch */ > 0<<""[b2]
 var a3, b3 /* ERROR cycle */ = int /* ERROR mismatch */ /* ERROR mismatch */ (1<<""[b3])
+
+// issue10260
+// Check that error messages explain reason for interface assignment failures.
+type (
+	I0 interface{}
+	I1 interface{ foo() }
+	I2 interface{ foo(x int) }
+	T0 struct{}
+	T1 struct{}
+	T2 struct{}
+)
+
+func (*T1) foo() {}
+func (*T2) foo(x int) {}
+
+func issue10260() {
+	var (
+		i0 I0
+		i1 I1
+		i2 I2
+		t0 *T0
+		t1 *T1
+		t2 *T2
+	)
+	i1 = i0 /* ERROR cannot use .* missing method foo */
+	i1 = t0 /* ERROR cannot use .* missing method foo */
+	i1 = i2 /* ERROR cannot use .* wrong type for method foo */
+	i1 = t2 /* ERROR cannot use .* wrong type for method foo */
+	i2 = i1 /* ERROR cannot use .* wrong type for method foo */
+	i2 = t1 /* ERROR cannot use .* wrong type for method foo */
+
+	_ = func() I1 { return i0 /* ERROR cannot use .* missing method foo */ }
+	_ = func() I1 { return t0 /* ERROR cannot use .* missing method foo */ }
+	_ = func() I1 { return i2 /* ERROR cannot use .* wrong type for method foo */ }
+	_ = func() I1 { return t2 /* ERROR cannot use .* wrong type for method foo */ }
+	_ = func() I2 { return i1 /* ERROR cannot use .* wrong type for method foo */ }
+	_ = func() I2 { return t1 /* ERROR cannot use .* wrong type for method foo */ }
+
+	// a few more - less exhaustive now
+
+	f := func(I1, I2){}
+	f(i0 /* ERROR cannot use .* missing method foo */ , i1 /* ERROR cannot use .* wrong type for method foo */)
+
+	_ = [...]I1{i0 /* ERROR cannot use .* missing method foo */ }
+	_ = [...]I1{i2 /* ERROR cannot use .* wrong type for method foo */ }
+	_ = []I1{i0 /* ERROR cannot use .* missing method foo */ }
+	_ = []I1{i2 /* ERROR cannot use .* wrong type for method foo */ }
+	_ = map[int]I1{0: i0 /* ERROR cannot use .* missing method foo */ }
+	_ = map[int]I1{0: i2 /* ERROR cannot use .* wrong type for method foo */ }
+
+	make(chan I1) <- i0 /* ERROR cannot use .* in send: missing method foo */
+	make(chan I1) <- i2 /* ERROR cannot use .* in send: wrong type for method foo */
+}
diff --git a/src/go/types/testdata/stmt0.src b/src/go/types/testdata/stmt0.src
index e946066..b7966ed 100644
--- a/src/go/types/testdata/stmt0.src
+++ b/src/go/types/testdata/stmt0.src
@@ -37,11 +37,11 @@
 
 func assignments1() {
 	b, i, f, c, s := false, 1, 1.0, 1i, "foo"
-	b = i /* ERROR "cannot assign" */
-	i = f /* ERROR "cannot assign" */
-	f = c /* ERROR "cannot assign" */
-	c = s /* ERROR "cannot assign" */
-	s = b /* ERROR "cannot assign" */
+	b = i /* ERROR "cannot use .* in assignment" */
+	i = f /* ERROR "cannot use .* in assignment" */
+	f = c /* ERROR "cannot use .* in assignment" */
+	c = s /* ERROR "cannot use .* in assignment" */
+	s = b /* ERROR "cannot use .* in assignment" */
 
 	v0, v1, v2 := 1 /* ERROR "mismatch" */ , 2, 3, 4
 	_, _, _ = v0, v1, v2
@@ -180,8 +180,8 @@
 	var ch chan int
 	var rch <-chan int
 	var x int
-	x /* ERROR "cannot send" */ <- x
-	rch /* ERROR "cannot send" */ <- x
+	x <- /* ERROR "cannot send" */ x
+	rch <- /* ERROR "cannot send" */ x
 	ch <- "foo" /* ERROR "cannot convert" */
 	ch <- x
 }
@@ -381,7 +381,7 @@
 func returns1(x float64) (int, *float64) {
 	return 0, &x
 	return /* ERROR wrong number of return values */
-	return "foo" /* ERROR "cannot convert" */, x /* ERROR "cannot return" */
+	return "foo" /* ERROR "cannot convert" */, x /* ERROR "cannot use .* in return statement" */
 	return /* ERROR wrong number of return values */ 0, &x, 1
 }
 
@@ -421,7 +421,7 @@
 
 	true := "false"
 	_ = true
-	// A tagless switch is equivalent to the bool 
+	// A tagless switch is equivalent to the bool
         // constant true, not the identifier 'true'.
 	switch {
 	case "false" /* ERROR "cannot convert" */:
@@ -631,14 +631,14 @@
 
 func issue11687() {
 	f := func() (_, _ int) { return }
-	switch f /* ERROR "2-valued expression" */ () {
+	switch f /* ERROR "2-valued f" */ () {
 	}
 	var x int
-	switch f /* ERROR "2-valued expression" */ () {
+	switch f /* ERROR "2-valued f" */ () {
 	case x:
 	}
 	switch x {
-	case f /* ERROR "cannot compare" */ (): // TODO(gri) better error message (issue 11896)
+	case f /* ERROR "2-valued f" */ ():
 	}
 }
 
@@ -682,16 +682,16 @@
 
 	switch t := x.(type) {
 	case nil:
-		var v bool = t /* ERROR "cannot initialize" */
+		var v bool = t /* ERROR "cannot use .* in variable declaration" */
 		_ = v
 	case int:
 		var v int = t
 		_ = v
 	case float32, complex64:
-		var v float32 = t /* ERROR "cannot initialize" */
+		var v float32 = t /* ERROR "cannot use .* in variable declaration" */
 		_ = v
 	default:
-		var v float32 = t /* ERROR "cannot initialize" */
+		var v float32 = t /* ERROR "cannot use .* in variable declaration" */
 		_ = v
 	}
 
@@ -801,7 +801,7 @@
 		ii = i
 		_ = ii
 		var xx float64
-		xx = x /* ERROR "cannot assign" */
+		xx = x /* ERROR "cannot use .* in assignment" */
 		_ = xx
 	}
 	var ii int
@@ -852,7 +852,7 @@
 	for range m {}
 	for k := range m {
 		var kk int32
-		kk = k /* ERROR "cannot assign" */
+		kk = k /* ERROR "cannot use .* in assignment" */
 		_ = kk
 	}
 	for k, v := range m {
@@ -894,17 +894,17 @@
 	var a [10]int
 	var i I
 	_ = i
-	for i /* ERROR cannot assign */ = range a {}
-	for i /* ERROR cannot assign */ = range &a {}
-	for i /* ERROR cannot assign */ = range a[:] {}
+	for i /* ERROR cannot use .* in assignment */ = range a {}
+	for i /* ERROR cannot use .* in assignment */ = range &a {}
+	for i /* ERROR cannot use .* in assignment */ = range a[:] {}
 
 	var s string
 	var r R
 	_ = r
-	for i /* ERROR cannot assign */ = range s {}
-	for i /* ERROR cannot assign */ = range "foo" {}
-	for _, r /* ERROR cannot assign */ = range s {}
-	for _, r /* ERROR cannot assign */ = range "foo" {}
+	for i /* ERROR cannot use .* in assignment */ = range s {}
+	for i /* ERROR cannot use .* in assignment */ = range "foo" {}
+	for _, r /* ERROR cannot use .* in assignment */ = range s {}
+	for _, r /* ERROR cannot use .* in assignment */ = range "foo" {}
 }
 
 func issue6766b() {
@@ -937,7 +937,7 @@
 	L1:
 	L1 /* ERROR "already declared" */ :
 	if true {
-		goto L2		
+		goto L2
 		L2:
 		L0 /* ERROR "already declared" */ :
 	}
diff --git a/src/go/types/testdata/vardecl.src b/src/go/types/testdata/vardecl.src
index fb6b5f7..0082537 100644
--- a/src/go/types/testdata/vardecl.src
+++ b/src/go/types/testdata/vardecl.src
@@ -31,7 +31,7 @@
 var _, _ = 1 /* ERROR "assignment count mismatch" */
 var _, _, _ /* ERROR "missing init expr for _" */ = 1, 2
 
-var _ = g /* ERROR "2-valued expr" */ ()
+var _ = g /* ERROR "2-valued g" */ ()
 var _, _ = g()
 var _, _, _ = g /* ERROR "assignment count mismatch" */ ()
 
@@ -50,7 +50,7 @@
 	_, _ = 1 /* ERROR "assignment count mismatch" */
 	_, _, _ /* ERROR "missing init expr for _" */ = 1, 2
 
-	_ = g /* ERROR "2-valued expr" */ ()
+	_ = g /* ERROR "2-valued g" */ ()
 	_, _ = g()
 	_, _, _ = g /* ERROR "assignment count mismatch" */ ()
 
diff --git a/src/go/types/type.go b/src/go/types/type.go
index 1df8b45..b93039a 100644
--- a/src/go/types/type.go
+++ b/src/go/types/type.go
@@ -360,7 +360,7 @@
 // A ChanDir value indicates a channel direction.
 type ChanDir int
 
-// The direction of a channel is indicated by one of the following constants.
+// The direction of a channel is indicated by one of these constants.
 const (
 	SendRecv ChanDir = iota
 	SendOnly
diff --git a/src/hash/adler32/adler32.go b/src/hash/adler32/adler32.go
index 7c80796..0c733f7 100644
--- a/src/hash/adler32/adler32.go
+++ b/src/hash/adler32/adler32.go
@@ -33,6 +33,7 @@
 func (d *digest) Reset() { *d = 1 }
 
 // New returns a new hash.Hash32 computing the Adler-32 checksum.
+// Its Sum method will lay the value out in big-endian byte order.
 func New() hash.Hash32 {
 	d := new(digest)
 	d.Reset()
diff --git a/src/hash/crc32/crc32.go b/src/hash/crc32/crc32.go
index 228cc04..d417555 100644
--- a/src/hash/crc32/crc32.go
+++ b/src/hash/crc32/crc32.go
@@ -113,10 +113,12 @@
 
 // New creates a new hash.Hash32 computing the CRC-32 checksum
 // using the polynomial represented by the Table.
+// Its Sum method will lay the value out in big-endian byte order.
 func New(tab *Table) hash.Hash32 { return &digest{0, tab} }
 
 // NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
 // using the IEEE polynomial.
+// Its Sum method will lay the value out in big-endian byte order.
 func NewIEEE() hash.Hash32 { return New(IEEETable) }
 
 func (d *digest) Size() int { return Size }
@@ -149,15 +151,11 @@
 
 // Update returns the result of adding the bytes in p to the crc.
 func Update(crc uint32, tab *Table, p []byte) uint32 {
-	if tab == castagnoliTable {
+	switch tab {
+	case castagnoliTable:
 		return updateCastagnoli(crc, p)
-	}
-	// only use slicing-by-8 when input is larger than 4KB
-	if tab == IEEETable && len(p) >= 4096 {
-		iEEETable8Once.Do(func() {
-			iEEETable8 = makeTable8(IEEE)
-		})
-		return updateSlicingBy8(crc, iEEETable8, p)
+	case IEEETable:
+		return updateIEEE(crc, p)
 	}
 	return update(crc, tab, p)
 }
diff --git a/src/hash/crc32/crc32_amd64.go b/src/hash/crc32/crc32_amd64.go
new file mode 100644
index 0000000..13e483d
--- /dev/null
+++ b/src/hash/crc32/crc32_amd64.go
@@ -0,0 +1,54 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package crc32
+
+// This file contains the code to call the SSE 4.2 version of the Castagnoli
+// and IEEE CRC.
+
+// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use
+// CPUID to test for SSE 4.1, 4.2 and CLMUL support.
+func haveSSE41() bool
+func haveSSE42() bool
+func haveCLMUL() bool
+
+// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
+// instruction.
+func castagnoliSSE42(crc uint32, p []byte) uint32
+
+// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ
+// instruction as well as SSE 4.1.
+func ieeeCLMUL(crc uint32, p []byte) uint32
+
+var sse42 = haveSSE42()
+var useFastIEEE = haveCLMUL() && haveSSE41()
+
+func updateCastagnoli(crc uint32, p []byte) uint32 {
+	if sse42 {
+		return castagnoliSSE42(crc, p)
+	}
+	return update(crc, castagnoliTable, p)
+}
+
+func updateIEEE(crc uint32, p []byte) uint32 {
+	if useFastIEEE && len(p) >= 64 {
+		left := len(p) & 15
+		do := len(p) - left
+		crc = ^ieeeCLMUL(^crc, p[:do])
+		if left > 0 {
+			crc = update(crc, IEEETable, p[do:])
+		}
+		return crc
+	}
+
+	// only use slicing-by-8 when input is >= 4KB
+	if len(p) >= 4096 {
+		iEEETable8Once.Do(func() {
+			iEEETable8 = makeTable8(IEEE)
+		})
+		return updateSlicingBy8(crc, iEEETable8, p)
+	}
+
+	return update(crc, IEEETable, p)
+}
diff --git a/src/hash/crc32/crc32_amd64.s b/src/hash/crc32/crc32_amd64.s
index 30b0d06..11d9bb5 100644
--- a/src/hash/crc32/crc32_amd64.s
+++ b/src/hash/crc32/crc32_amd64.s
@@ -62,3 +62,172 @@
 	MOVB CX, ret+0(FP)
 	RET
 
+// func haveCLMUL() bool
+TEXT ·haveCLMUL(SB),NOSPLIT,$0
+	XORQ AX, AX
+	INCL AX
+	CPUID
+	SHRQ $1, CX
+	ANDQ $1, CX
+	MOVB CX, ret+0(FP)
+	RET
+
+// func haveSSE41() bool
+TEXT ·haveSSE41(SB),NOSPLIT,$0
+	XORQ AX, AX
+	INCL AX
+	CPUID
+	SHRQ $19, CX
+	ANDQ $1, CX
+	MOVB CX, ret+0(FP)
+	RET
+
+// CRC32 polynomial data
+//
+// These constants are lifted from the
+// Linux kernel, since they avoid the costly
+// PSHUFB 16 byte reversal proposed in the
+// original Intel paper.
+DATA r2r1<>+0(SB)/8, $0x154442bd4
+DATA r2r1<>+8(SB)/8, $0x1c6e41596
+DATA r4r3<>+0(SB)/8, $0x1751997d0
+DATA r4r3<>+8(SB)/8, $0x0ccaa009e
+DATA rupoly<>+0(SB)/8, $0x1db710641
+DATA rupoly<>+8(SB)/8, $0x1f7011641
+DATA r5<>+0(SB)/8, $0x163cd6124
+
+GLOBL r2r1<>(SB),RODATA,$16
+GLOBL r4r3<>(SB),RODATA,$16
+GLOBL rupoly<>(SB),RODATA,$16
+GLOBL r5<>(SB),RODATA,$8
+
+// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+// len(p) must be at least 64, and must be a multiple of 16.
+
+// func ieeeCLMUL(crc uint32, p []byte) uint32
+TEXT ·ieeeCLMUL(SB),NOSPLIT,$0
+	MOVL   crc+0(FP), X0             // Initial CRC value
+	MOVQ   p+8(FP), SI  	         // data pointer
+	MOVQ   p_len+16(FP), CX          // len(p)
+
+	MOVOU  (SI), X1
+	MOVOU  16(SI), X2
+	MOVOU  32(SI), X3
+	MOVOU  48(SI), X4
+	PXOR   X0, X1
+	ADDQ   $64, SI                  // buf+=64
+	SUBQ   $64, CX                  // len-=64
+	CMPQ   CX, $64                  // Less than 64 bytes left
+	JB     remain64
+
+	MOVOA  r2r1<>+0(SB), X0
+loopback64:
+	MOVOA  X1, X5
+	MOVOA  X2, X6
+	MOVOA  X3, X7
+	MOVOA  X4, X8
+
+	PCLMULQDQ $0, X0, X1
+	PCLMULQDQ $0, X0, X2
+	PCLMULQDQ $0, X0, X3
+	PCLMULQDQ $0, X0, X4
+
+	/* Load next early */
+	MOVOU    (SI), X11
+	MOVOU    16(SI), X12
+	MOVOU    32(SI), X13
+	MOVOU    48(SI), X14
+
+	PCLMULQDQ $0x11, X0, X5
+	PCLMULQDQ $0x11, X0, X6
+	PCLMULQDQ $0x11, X0, X7
+	PCLMULQDQ $0x11, X0, X8
+
+	PXOR     X5, X1
+	PXOR     X6, X2
+	PXOR     X7, X3
+	PXOR     X8, X4
+
+	PXOR     X11, X1
+	PXOR     X12, X2
+	PXOR     X13, X3
+	PXOR     X14, X4
+
+	ADDQ    $0x40, DI
+	ADDQ    $64, SI      // buf+=64
+	SUBQ    $64, CX      // len-=64
+	CMPQ    CX, $64      // Less than 64 bytes left?
+	JGE     loopback64
+
+	/* Fold result into a single register (X1) */
+remain64:
+	MOVOA       r4r3<>+0(SB), X0
+
+	MOVOA       X1, X5
+	PCLMULQDQ   $0, X0, X1
+	PCLMULQDQ   $0x11, X0, X5
+	PXOR        X5, X1
+	PXOR        X2, X1
+
+	MOVOA       X1, X5
+	PCLMULQDQ   $0, X0, X1
+	PCLMULQDQ   $0x11, X0, X5
+	PXOR        X5, X1
+	PXOR        X3, X1
+
+	MOVOA       X1, X5
+	PCLMULQDQ   $0, X0, X1
+	PCLMULQDQ   $0x11, X0, X5
+	PXOR        X5, X1
+	PXOR        X4, X1
+
+	/* If there is less than 16 bytes left we are done */
+	CMPQ        CX, $16
+	JB          finish
+
+	/* Encode 16 bytes */
+remain16:
+	MOVOU       (SI), X10
+	MOVOA       X1, X5
+	PCLMULQDQ   $0, X0, X1
+	PCLMULQDQ   $0x11, X0, X5
+	PXOR        X5, X1
+	PXOR        X10, X1
+	SUBQ        $16, CX
+	ADDQ        $16, SI
+	CMPQ        CX, $16
+	JGE         remain16
+
+finish:
+	/* Fold final result into 32 bits and return it */
+	PCMPEQB     X3, X3
+	PCLMULQDQ   $1, X1, X0
+	PSRLDQ      $8, X1
+	PXOR        X0, X1
+
+	MOVOA       X1, X2
+	MOVQ        r5<>+0(SB), X0
+
+	/* Creates 32 bit mask. Note that we don't care about upper half. */
+	PSRLQ       $32, X3
+
+	PSRLDQ      $4, X2
+	PAND        X3, X1
+	PCLMULQDQ   $0, X0, X1
+	PXOR        X2, X1
+
+	MOVOA       rupoly<>+0(SB), X0
+
+	MOVOA       X1, X2
+	PAND        X3, X1
+	PCLMULQDQ   $0x10, X0, X1
+	PAND        X3, X1
+	PCLMULQDQ   $0, X0, X1
+	PXOR        X2, X1
+
+	/* PEXTRD   $1, X1, AX  (SSE 4.1) */
+	BYTE $0x66; BYTE $0x0f; BYTE $0x3a;
+	BYTE $0x16; BYTE $0xc8; BYTE $0x01;
+	MOVL        AX, ret+32(FP)
+
+	RET
diff --git a/src/hash/crc32/crc32_amd64x.go b/src/hash/crc32/crc32_amd64p32.go
similarity index 63%
rename from src/hash/crc32/crc32_amd64x.go
rename to src/hash/crc32/crc32_amd64p32.go
index b7e3599..40241c5 100644
--- a/src/hash/crc32/crc32_amd64x.go
+++ b/src/hash/crc32/crc32_amd64p32.go
@@ -2,14 +2,12 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64 amd64p32
-
 package crc32
 
 // This file contains the code to call the SSE 4.2 version of the Castagnoli
 // CRC.
 
-// haveSSE42 is defined in crc_amd64.s and uses CPUID to test for SSE 4.2
+// haveSSE42 is defined in crc_amd64p32.s and uses CPUID to test for SSE 4.2
 // support.
 func haveSSE42() bool
 
@@ -25,3 +23,15 @@
 	}
 	return update(crc, castagnoliTable, p)
 }
+
+func updateIEEE(crc uint32, p []byte) uint32 {
+	// only use slicing-by-8 when input is >= 4KB
+	if len(p) >= 4096 {
+		iEEETable8Once.Do(func() {
+			iEEETable8 = makeTable8(IEEE)
+		})
+		return updateSlicingBy8(crc, iEEETable8, p)
+	}
+
+	return update(crc, IEEETable, p)
+}
diff --git a/src/hash/crc32/crc32_generic.go b/src/hash/crc32/crc32_generic.go
index 416c1b7..d2355c8 100644
--- a/src/hash/crc32/crc32_generic.go
+++ b/src/hash/crc32/crc32_generic.go
@@ -12,3 +12,14 @@
 func updateCastagnoli(crc uint32, p []byte) uint32 {
 	return update(crc, castagnoliTable, p)
 }
+
+func updateIEEE(crc uint32, p []byte) uint32 {
+	// only use slicing-by-8 when input is >= 4KB
+	if len(p) >= 4096 {
+		iEEETable8Once.Do(func() {
+			iEEETable8 = makeTable8(IEEE)
+		})
+		return updateSlicingBy8(crc, iEEETable8, p)
+	}
+	return update(crc, IEEETable, p)
+}
diff --git a/src/hash/crc64/crc64.go b/src/hash/crc64/crc64.go
index b420a22..54cc560 100644
--- a/src/hash/crc64/crc64.go
+++ b/src/hash/crc64/crc64.go
@@ -50,6 +50,7 @@
 
 // New creates a new hash.Hash64 computing the CRC-64 checksum
 // using the polynomial represented by the Table.
+// Its Sum method will lay the value out in big-endian byte order.
 func New(tab *Table) hash.Hash64 { return &digest{0, tab} }
 
 func (d *digest) Size() int { return Size }
diff --git a/src/hash/fnv/fnv.go b/src/hash/fnv/fnv.go
index 09c5b2a..f1fbb25 100644
--- a/src/hash/fnv/fnv.go
+++ b/src/hash/fnv/fnv.go
@@ -27,24 +27,28 @@
 )
 
 // New32 returns a new 32-bit FNV-1 hash.Hash.
+// Its Sum method will lay the value out in big-endian byte order.
 func New32() hash.Hash32 {
 	var s sum32 = offset32
 	return &s
 }
 
 // New32a returns a new 32-bit FNV-1a hash.Hash.
+// Its Sum method will lay the value out in big-endian byte order.
 func New32a() hash.Hash32 {
 	var s sum32a = offset32
 	return &s
 }
 
 // New64 returns a new 64-bit FNV-1 hash.Hash.
+// Its Sum method will lay the value out in big-endian byte order.
 func New64() hash.Hash64 {
 	var s sum64 = offset64
 	return &s
 }
 
 // New64a returns a new 64-bit FNV-1a hash.Hash.
+// Its Sum method will lay the value out in big-endian byte order.
 func New64a() hash.Hash64 {
 	var s sum64a = offset64
 	return &s
diff --git a/src/html/template/clone_test.go b/src/html/template/clone_test.go
index c89d22a..a0f1d6a 100644
--- a/src/html/template/clone_test.go
+++ b/src/html/template/clone_test.go
@@ -78,9 +78,17 @@
 	Must(t0.Parse(`{{define "lhs"}} ( {{end}}`))
 	Must(t0.Parse(`{{define "rhs"}} ) {{end}}`))
 
-	// Clone t0 as t4. Redefining the "lhs" template should fail.
+	// Clone t0 as t4. Redefining the "lhs" template should not fail.
 	t4 := Must(t0.Clone())
-	if _, err := t4.Parse(`{{define "lhs"}} FAIL {{end}}`); err == nil {
+	if _, err := t4.Parse(`{{define "lhs"}} OK {{end}}`); err != nil {
+		t.Error(`redefine "lhs": got err %v want non-nil`, err)
+	}
+	// Cloning t1 should fail as it has been executed.
+	if _, err := t1.Clone(); err == nil {
+		t.Error("cloning t1: got nil err want non-nil")
+	}
+	// Redefining the "lhs" template in t1 should fail as it has been executed.
+	if _, err := t1.Parse(`{{define "lhs"}} OK {{end}}`); err == nil {
 		t.Error(`redefine "lhs": got nil err want non-nil`)
 	}
 
diff --git a/src/html/template/context.go b/src/html/template/context.go
index 59e794d..c90fc1f 100644
--- a/src/html/template/context.go
+++ b/src/html/template/context.go
@@ -310,7 +310,8 @@
 	return fmt.Sprintf("illegal element %d", int(e))
 }
 
-// attr identifies the most recent HTML attribute when inside a start tag.
+// attr identifies the current HTML attribute when inside the attribute,
+// that is, starting from stateAttrName until stateTag/stateText (exclusive).
 type attr uint8
 
 const (
diff --git a/src/html/template/escape_test.go b/src/html/template/escape_test.go
index bea2d13..707394e 100644
--- a/src/html/template/escape_test.go
+++ b/src/html/template/escape_test.go
@@ -1054,7 +1054,7 @@
 		},
 		{
 			`<a href=x`,
-			context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href=x `,
@@ -1070,7 +1070,7 @@
 		},
 		{
 			`<a href ='`,
-			context{state: stateURL, delim: delimSingleQuote},
+			context{state: stateURL, delim: delimSingleQuote, attr: attrURL},
 		},
 		{
 			`<a href=''`,
@@ -1078,7 +1078,7 @@
 		},
 		{
 			`<a href= "`,
-			context{state: stateURL, delim: delimDoubleQuote},
+			context{state: stateURL, delim: delimDoubleQuote, attr: attrURL},
 		},
 		{
 			`<a href=""`,
@@ -1090,35 +1090,35 @@
 		},
 		{
 			`<a HREF='http:`,
-			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a Href='/`,
-			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href='"`,
-			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href="'`,
-			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href='&apos;`,
-			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href="&quot;`,
-			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href="&#34;`,
-			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<a href=&quot;`,
-			context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
+			context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery, attr: attrURL},
 		},
 		{
 			`<img alt="1">`,
@@ -1138,83 +1138,83 @@
 		},
 		{
 			`<a onclick="`,
-			context{state: stateJS, delim: delimDoubleQuote},
+			context{state: stateJS, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="//foo`,
-			context{state: stateJSLineCmt, delim: delimDoubleQuote},
+			context{state: stateJSLineCmt, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			"<a onclick='//\n",
-			context{state: stateJS, delim: delimSingleQuote},
+			context{state: stateJS, delim: delimSingleQuote, attr: attrScript},
 		},
 		{
 			"<a onclick='//\r\n",
-			context{state: stateJS, delim: delimSingleQuote},
+			context{state: stateJS, delim: delimSingleQuote, attr: attrScript},
 		},
 		{
 			"<a onclick='//\u2028",
-			context{state: stateJS, delim: delimSingleQuote},
+			context{state: stateJS, delim: delimSingleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/*`,
-			context{state: stateJSBlockCmt, delim: delimDoubleQuote},
+			context{state: stateJSBlockCmt, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/*/`,
-			context{state: stateJSBlockCmt, delim: delimDoubleQuote},
+			context{state: stateJSBlockCmt, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/**/`,
-			context{state: stateJS, delim: delimDoubleQuote},
+			context{state: stateJS, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onkeypress="&quot;`,
-			context{state: stateJSDqStr, delim: delimDoubleQuote},
+			context{state: stateJSDqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick='&quot;foo&quot;`,
-			context{state: stateJS, delim: delimSingleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimSingleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<a onclick=&#39;foo&#39;`,
-			context{state: stateJS, delim: delimSpaceOrTagEnd, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimSpaceOrTagEnd, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<a onclick=&#39;foo`,
-			context{state: stateJSSqStr, delim: delimSpaceOrTagEnd},
+			context{state: stateJSSqStr, delim: delimSpaceOrTagEnd, attr: attrScript},
 		},
 		{
 			`<a onclick="&quot;foo'`,
-			context{state: stateJSDqStr, delim: delimDoubleQuote},
+			context{state: stateJSDqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="'foo&quot;`,
-			context{state: stateJSSqStr, delim: delimDoubleQuote},
+			context{state: stateJSSqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<A ONCLICK="'`,
-			context{state: stateJSSqStr, delim: delimDoubleQuote},
+			context{state: stateJSSqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/`,
-			context{state: stateJSRegexp, delim: delimDoubleQuote},
+			context{state: stateJSRegexp, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="'foo'`,
-			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<a onclick="'foo\'`,
-			context{state: stateJSSqStr, delim: delimDoubleQuote},
+			context{state: stateJSSqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="'foo\'`,
-			context{state: stateJSSqStr, delim: delimDoubleQuote},
+			context{state: stateJSSqStr, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/foo/`,
-			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<script>/foo/ /=`,
@@ -1222,111 +1222,111 @@
 		},
 		{
 			`<a onclick="1 /foo`,
-			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<a onclick="1 /*c*/ /foo`,
-			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<a onclick="/foo[/]`,
-			context{state: stateJSRegexp, delim: delimDoubleQuote},
+			context{state: stateJSRegexp, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/foo\/`,
-			context{state: stateJSRegexp, delim: delimDoubleQuote},
+			context{state: stateJSRegexp, delim: delimDoubleQuote, attr: attrScript},
 		},
 		{
 			`<a onclick="/foo/`,
-			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+			context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp, attr: attrScript},
 		},
 		{
 			`<input checked style="`,
-			context{state: stateCSS, delim: delimDoubleQuote},
+			context{state: stateCSS, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="//`,
-			context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+			context{state: stateCSSLineCmt, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="//</script>`,
-			context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+			context{state: stateCSSLineCmt, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			"<a style='//\n",
-			context{state: stateCSS, delim: delimSingleQuote},
+			context{state: stateCSS, delim: delimSingleQuote, attr: attrStyle},
 		},
 		{
 			"<a style='//\r",
-			context{state: stateCSS, delim: delimSingleQuote},
+			context{state: stateCSS, delim: delimSingleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="/*`,
-			context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+			context{state: stateCSSBlockCmt, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="/*/`,
-			context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+			context{state: stateCSSBlockCmt, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="/**/`,
-			context{state: stateCSS, delim: delimDoubleQuote},
+			context{state: stateCSS, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: '`,
-			context{state: stateCSSSqStr, delim: delimDoubleQuote},
+			context{state: stateCSSSqStr, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: &quot;`,
-			context{state: stateCSSDqStr, delim: delimDoubleQuote},
+			context{state: stateCSSDqStr, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: '/foo?img=`,
-			context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+			context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag, attr: attrStyle},
 		},
 		{
 			`<a style="background: '/`,
-			context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url(&#x22;/`,
-			context{state: stateCSSDqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSDqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url('/`,
-			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url('/)`,
-			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url('/ `,
-			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url(/`,
-			context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+			context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery, attr: attrStyle},
 		},
 		{
 			`<a style="background: url( `,
-			context{state: stateCSSURL, delim: delimDoubleQuote},
+			context{state: stateCSSURL, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: url( /image?name=`,
-			context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+			context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag, attr: attrStyle},
 		},
 		{
 			`<a style="background: url(x)`,
-			context{state: stateCSS, delim: delimDoubleQuote},
+			context{state: stateCSS, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: url('x'`,
-			context{state: stateCSS, delim: delimDoubleQuote},
+			context{state: stateCSS, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<a style="background: url( x `,
-			context{state: stateCSS, delim: delimDoubleQuote},
+			context{state: stateCSS, delim: delimDoubleQuote, attr: attrStyle},
 		},
 		{
 			`<!-- foo`,
@@ -1466,7 +1466,7 @@
 		},
 		{
 			`<a svg:style='`,
-			context{state: stateCSS, delim: delimSingleQuote},
+			context{state: stateCSS, delim: delimSingleQuote, attr: attrStyle},
 		},
 		{
 			`<svg:font-face`,
@@ -1474,7 +1474,11 @@
 		},
 		{
 			`<svg:a svg:onclick="`,
-			context{state: stateJS, delim: delimDoubleQuote},
+			context{state: stateJS, delim: delimDoubleQuote, attr: attrScript},
+		},
+		{
+			`<svg:a svg:onclick="x()">`,
+			context{},
 		},
 	}
 
diff --git a/src/html/template/example_test.go b/src/html/template/example_test.go
index a75ceec..40fc3ba 100644
--- a/src/html/template/example_test.go
+++ b/src/html/template/example_test.go
@@ -9,6 +9,7 @@
 	"html/template"
 	"log"
 	"os"
+	"strings"
 )
 
 func Example() {
@@ -120,3 +121,38 @@
 	// %22Fran+%26+Freddie%27s+Diner%2232%3Ctasty%40example.com%3E
 
 }
+
+// The following example is duplicated in text/template; keep them in sync.
+
+func ExampleTemplate_block() {
+	const (
+		master  = `Names:{{block "list" .}}{{"\n"}}{{range .}}{{println "-" .}}{{end}}{{end}}`
+		overlay = `{{define "list"}} {{join . ", "}}{{end}} `
+	)
+	var (
+		funcs     = template.FuncMap{"join": strings.Join}
+		guardians = []string{"Gamora", "Groot", "Nebula", "Rocket", "Star-Lord"}
+	)
+	masterTmpl, err := template.New("master").Funcs(funcs).Parse(master)
+	if err != nil {
+		log.Fatal(err)
+	}
+	overlayTmpl, err := template.Must(masterTmpl.Clone()).Parse(overlay)
+	if err != nil {
+		log.Fatal(err)
+	}
+	if err := masterTmpl.Execute(os.Stdout, guardians); err != nil {
+		log.Fatal(err)
+	}
+	if err := overlayTmpl.Execute(os.Stdout, guardians); err != nil {
+		log.Fatal(err)
+	}
+	// Output:
+	// Names:
+	// - Gamora
+	// - Groot
+	// - Nebula
+	// - Rocket
+	// - Star-Lord
+	// Names: Gamora, Groot, Nebula, Rocket, Star-Lord
+}
diff --git a/src/html/template/template.go b/src/html/template/template.go
index bb9140a..5cfac49 100644
--- a/src/html/template/template.go
+++ b/src/html/template/template.go
@@ -17,7 +17,7 @@
 // Template is a specialized Template from "text/template" that produces a safe
 // HTML document fragment.
 type Template struct {
-	// Sticky error if escaping fails.
+	// Sticky error if escaping fails, or escapeOK if succeeded.
 	escapeErr error
 	// We could embed the text/template field, but it's safer not to because
 	// we need to keep our version of the name space and the underlying
@@ -169,6 +169,8 @@
 		tmpl := t.set[name]
 		if tmpl == nil {
 			tmpl = t.new(name)
+		} else if tmpl.escapeErr != nil {
+			return nil, fmt.Errorf("html/template: cannot redefine %q after it has executed", name)
 		}
 		// Restore our record of this text/template to its unescaped original state.
 		tmpl.escapeErr = nil
@@ -413,3 +415,10 @@
 	}
 	return parseFiles(t, filenames...)
 }
+
+// IsTrue reports whether the value is 'true', in the sense of not the zero of its type,
+// and whether the value has a meaningful truth value. This is the definition of
+// truth used by if and other such actions.
+func IsTrue(val interface{}) (truth, ok bool) {
+	return template.IsTrue(val)
+}
diff --git a/src/html/template/transition.go b/src/html/template/transition.go
index d2e0287..aefe035 100644
--- a/src/html/template/transition.go
+++ b/src/html/template/transition.go
@@ -169,7 +169,7 @@
 	case '"':
 		delim, i = delimDoubleQuote, i+1
 	}
-	c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
+	c.state, c.delim = attrStartStates[c.attr], delim
 	return c, i
 }
 
diff --git a/src/image/color/ycbcr.go b/src/image/color/ycbcr.go
index 4bcb07d..0c2ba84 100644
--- a/src/image/color/ycbcr.go
+++ b/src/image/color/ycbcr.go
@@ -137,6 +137,46 @@
 	return YCbCr{y, u, v}
 }
 
+// NYCbCrA represents a non-alpha-premultiplied Y'CbCr-with-alpha color, having
+// 8 bits each for one luma, two chroma and one alpha component.
+type NYCbCrA struct {
+	YCbCr
+	A uint8
+}
+
+func (c NYCbCrA) RGBA() (r, g, b, a uint32) {
+	r8, g8, b8 := YCbCrToRGB(c.Y, c.Cb, c.Cr)
+	a = uint32(c.A) * 0x101
+	r = uint32(r8) * 0x101 * a / 0xffff
+	g = uint32(g8) * 0x101 * a / 0xffff
+	b = uint32(b8) * 0x101 * a / 0xffff
+	return
+}
+
+// NYCbCrAModel is the Model for non-alpha-premultiplied Y'CbCr-with-alpha
+// colors.
+var NYCbCrAModel Model = ModelFunc(nYCbCrAModel)
+
+func nYCbCrAModel(c Color) Color {
+	switch c := c.(type) {
+	case NYCbCrA:
+		return c
+	case YCbCr:
+		return NYCbCrA{c, 0xff}
+	}
+	r, g, b, a := c.RGBA()
+
+	// Convert from alpha-premultiplied to non-alpha-premultiplied.
+	if a != 0 {
+		r = (r * 0xffff) / a
+		g = (g * 0xffff) / a
+		b = (b * 0xffff) / a
+	}
+
+	y, u, v := RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
+	return NYCbCrA{YCbCr{Y: y, Cb: u, Cr: v}, uint8(a >> 8)}
+}
+
 // RGBToCMYK converts an RGB triple to a CMYK quadruple.
 func RGBToCMYK(r, g, b uint8) (uint8, uint8, uint8, uint8) {
 	rr := uint32(r)
diff --git a/src/image/image.go b/src/image/image.go
index 20b64d7..bebb9f7 100644
--- a/src/image/image.go
+++ b/src/image/image.go
@@ -148,7 +148,7 @@
 	return true
 }
 
-// NewRGBA returns a new RGBA with the given bounds.
+// NewRGBA returns a new RGBA image with the given bounds.
 func NewRGBA(r Rectangle) *RGBA {
 	w, h := r.Dx(), r.Dy()
 	buf := make([]uint8, 4*w*h)
@@ -260,7 +260,7 @@
 	return true
 }
 
-// NewRGBA64 returns a new RGBA64 with the given bounds.
+// NewRGBA64 returns a new RGBA64 image with the given bounds.
 func NewRGBA64(r Rectangle) *RGBA64 {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 8*w*h)
@@ -359,7 +359,7 @@
 	return true
 }
 
-// NewNRGBA returns a new NRGBA with the given bounds.
+// NewNRGBA returns a new NRGBA image with the given bounds.
 func NewNRGBA(r Rectangle) *NRGBA {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 4*w*h)
@@ -471,7 +471,7 @@
 	return true
 }
 
-// NewNRGBA64 returns a new NRGBA64 with the given bounds.
+// NewNRGBA64 returns a new NRGBA64 image with the given bounds.
 func NewNRGBA64(r Rectangle) *NRGBA64 {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 8*w*h)
@@ -563,7 +563,7 @@
 	return true
 }
 
-// NewAlpha returns a new Alpha with the given bounds.
+// NewAlpha returns a new Alpha image with the given bounds.
 func NewAlpha(r Rectangle) *Alpha {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 1*w*h)
@@ -658,7 +658,7 @@
 	return true
 }
 
-// NewAlpha16 returns a new Alpha16 with the given bounds.
+// NewAlpha16 returns a new Alpha16 image with the given bounds.
 func NewAlpha16(r Rectangle) *Alpha16 {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 2*w*h)
@@ -737,7 +737,7 @@
 	return true
 }
 
-// NewGray returns a new Gray with the given bounds.
+// NewGray returns a new Gray image with the given bounds.
 func NewGray(r Rectangle) *Gray {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 1*w*h)
@@ -819,7 +819,7 @@
 	return true
 }
 
-// NewGray16 returns a new Gray16 with the given bounds.
+// NewGray16 returns a new Gray16 image with the given bounds.
 func NewGray16(r Rectangle) *Gray16 {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 2*w*h)
@@ -905,7 +905,7 @@
 	return true
 }
 
-// NewCMYK returns a new CMYK with the given bounds.
+// NewCMYK returns a new CMYK image with the given bounds.
 func NewCMYK(r Rectangle) *CMYK {
 	w, h := r.Dx(), r.Dy()
 	buf := make([]uint8, 4*w*h)
@@ -1014,7 +1014,8 @@
 	return true
 }
 
-// NewPaletted returns a new Paletted with the given width, height and palette.
+// NewPaletted returns a new Paletted image with the given width, height and
+// palette.
 func NewPaletted(r Rectangle, p color.Palette) *Paletted {
 	w, h := r.Dx(), r.Dy()
 	pix := make([]uint8, 1*w*h)
diff --git a/src/image/png/reader.go b/src/image/png/reader.go
index bbd6f75..9e6f985 100644
--- a/src/image/png/reader.go
+++ b/src/image/png/reader.go
@@ -154,8 +154,8 @@
 	d.interlace = int(d.tmp[12])
 	w := int32(binary.BigEndian.Uint32(d.tmp[0:4]))
 	h := int32(binary.BigEndian.Uint32(d.tmp[4:8]))
-	if w < 0 || h < 0 {
-		return FormatError("negative dimension")
+	if w <= 0 || h <= 0 {
+		return FormatError("non-positive dimension")
 	}
 	nPixels := int64(w) * int64(h)
 	if nPixels != int64(int(nPixels)) {
@@ -727,6 +727,9 @@
 		d.stage = dsSeenIEND
 		return d.parseIEND(length)
 	}
+	if length > 0x7fffffff {
+		return FormatError(fmt.Sprintf("Bad chunk length: %d", length))
+	}
 	// Ignore this chunk (of a known length).
 	var ignored [4096]byte
 	for length > 0 {
diff --git a/src/image/png/reader_test.go b/src/image/png/reader_test.go
index f89e7ef..f058f6b 100644
--- a/src/image/png/reader_test.go
+++ b/src/image/png/reader_test.go
@@ -408,6 +408,18 @@
 	}
 }
 
+func TestUnknownChunkLengthUnderflow(t *testing.T) {
+	data := []byte{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0xff, 0xff,
+		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0xf4, 0x7c, 0x55, 0x04, 0x1a,
+		0xd3, 0x11, 0x9a, 0x73, 0x00, 0x00, 0xf8, 0x1e, 0xf3, 0x2e, 0x00, 0x00,
+		0x01, 0x00, 0xff, 0xff, 0xff, 0xff, 0x07, 0xf4, 0x7c, 0x55, 0x04, 0x1a,
+		0xd3}
+	_, err := Decode(bytes.NewReader(data))
+	if err == nil {
+		t.Errorf("Didn't fail reading an unknown chunk with length 0xffffffff")
+	}
+}
+
 func benchmarkDecode(b *testing.B, filename string, bytesPerPixel int) {
 	b.StopTimer()
 	data, err := ioutil.ReadFile(filename)
diff --git a/src/image/ycbcr.go b/src/image/ycbcr.go
index 93c354b3..71c0518 100644
--- a/src/image/ycbcr.go
+++ b/src/image/ycbcr.go
@@ -138,9 +138,8 @@
 	return true
 }
 
-// NewYCbCr returns a new YCbCr with the given bounds and subsample ratio.
-func NewYCbCr(r Rectangle, subsampleRatio YCbCrSubsampleRatio) *YCbCr {
-	w, h, cw, ch := r.Dx(), r.Dy(), 0, 0
+func yCbCrSize(r Rectangle, subsampleRatio YCbCrSubsampleRatio) (w, h, cw, ch int) {
+	w, h = r.Dx(), r.Dy()
 	switch subsampleRatio {
 	case YCbCrSubsampleRatio422:
 		cw = (r.Max.X+1)/2 - r.Min.X/2
@@ -162,6 +161,13 @@
 		cw = w
 		ch = h
 	}
+	return
+}
+
+// NewYCbCr returns a new YCbCr image with the given bounds and subsample
+// ratio.
+func NewYCbCr(r Rectangle, subsampleRatio YCbCrSubsampleRatio) *YCbCr {
+	w, h, cw, ch := yCbCrSize(r, subsampleRatio)
 	i0 := w*h + 0*cw*ch
 	i1 := w*h + 1*cw*ch
 	i2 := w*h + 2*cw*ch
@@ -176,3 +182,117 @@
 		Rect:           r,
 	}
 }
+
+// NYCbCrA is an in-memory image of non-alpha-premultiplied Y'CbCr-with-alpha
+// colors. A and AStride are analogous to the Y and YStride fields of the
+// embedded YCbCr.
+type NYCbCrA struct {
+	YCbCr
+	A       []uint8
+	AStride int
+}
+
+func (p *NYCbCrA) ColorModel() color.Model {
+	return color.NYCbCrAModel
+}
+
+func (p *NYCbCrA) At(x, y int) color.Color {
+	return p.NYCbCrAAt(x, y)
+}
+
+func (p *NYCbCrA) NYCbCrAAt(x, y int) color.NYCbCrA {
+	if !(Point{X: x, Y: y}.In(p.Rect)) {
+		return color.NYCbCrA{}
+	}
+	yi := p.YOffset(x, y)
+	ci := p.COffset(x, y)
+	ai := p.AOffset(x, y)
+	return color.NYCbCrA{
+		color.YCbCr{
+			Y:  p.Y[yi],
+			Cb: p.Cb[ci],
+			Cr: p.Cr[ci],
+		},
+		p.A[ai],
+	}
+}
+
+// AOffset returns the index of the first element of A that corresponds to the
+// pixel at (x, y).
+func (p *NYCbCrA) AOffset(x, y int) int {
+	return (y-p.Rect.Min.Y)*p.AStride + (x - p.Rect.Min.X)
+}
+
+// SubImage returns an image representing the portion of the image p visible
+// through r. The returned value shares pixels with the original image.
+func (p *NYCbCrA) SubImage(r Rectangle) Image {
+	r = r.Intersect(p.Rect)
+	// If r1 and r2 are Rectangles, r1.Intersect(r2) is not guaranteed to be inside
+	// either r1 or r2 if the intersection is empty. Without explicitly checking for
+	// this, the Pix[i:] expression below can panic.
+	if r.Empty() {
+		return &NYCbCrA{
+			YCbCr: YCbCr{
+				SubsampleRatio: p.SubsampleRatio,
+			},
+		}
+	}
+	yi := p.YOffset(r.Min.X, r.Min.Y)
+	ci := p.COffset(r.Min.X, r.Min.Y)
+	ai := p.AOffset(r.Min.X, r.Min.Y)
+	return &NYCbCrA{
+		YCbCr: YCbCr{
+			Y:              p.Y[yi:],
+			Cb:             p.Cb[ci:],
+			Cr:             p.Cr[ci:],
+			SubsampleRatio: p.SubsampleRatio,
+			YStride:        p.YStride,
+			CStride:        p.CStride,
+			Rect:           r,
+		},
+		A:       p.A[ai:],
+		AStride: p.AStride,
+	}
+}
+
+// Opaque scans the entire image and reports whether it is fully opaque.
+func (p *NYCbCrA) Opaque() bool {
+	if p.Rect.Empty() {
+		return true
+	}
+	i0, i1 := 0, p.Rect.Dx()
+	for y := p.Rect.Min.Y; y < p.Rect.Max.Y; y++ {
+		for _, a := range p.A[i0:i1] {
+			if a != 0xff {
+				return false
+			}
+		}
+		i0 += p.AStride
+		i1 += p.AStride
+	}
+	return true
+}
+
+// NewNYCbCrA returns a new NYCbCrA image with the given bounds and subsample
+// ratio.
+func NewNYCbCrA(r Rectangle, subsampleRatio YCbCrSubsampleRatio) *NYCbCrA {
+	w, h, cw, ch := yCbCrSize(r, subsampleRatio)
+	i0 := 1*w*h + 0*cw*ch
+	i1 := 1*w*h + 1*cw*ch
+	i2 := 1*w*h + 2*cw*ch
+	i3 := 2*w*h + 2*cw*ch
+	b := make([]byte, i3)
+	return &NYCbCrA{
+		YCbCr: YCbCr{
+			Y:              b[:i0:i0],
+			Cb:             b[i0:i1:i1],
+			Cr:             b[i1:i2:i2],
+			SubsampleRatio: subsampleRatio,
+			YStride:        w,
+			CStride:        cw,
+			Rect:           r,
+		},
+		A:       b[i2:],
+		AStride: w,
+	}
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index 1eb39dd..11f9aba 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -479,7 +479,7 @@
 			p.g = ev.G
 			if g.evCreate != nil {
 				// +1 because symbolizer expects return pc.
-				ev.Stk = []*Frame{&Frame{PC: g.evCreate.Args[1] + 1}}
+				ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
 				g.evCreate = nil
 			}
 
diff --git a/src/io/io.go b/src/io/io.go
index 8851eaf..8e7855c 100644
--- a/src/io/io.go
+++ b/src/io/io.go
@@ -95,14 +95,14 @@
 // Seeker is the interface that wraps the basic Seek method.
 //
 // Seek sets the offset for the next Read or Write to offset,
-// interpreted according to whence: 0 means relative to the origin of
+// interpreted according to whence: 0 means relative to the start of
 // the file, 1 means relative to the current offset, and 2 means
-// relative to the end.  Seek returns the new offset and an error, if
-// any.
+// relative to the end. Seek returns the new offset relative to the
+// start of the file and an error, if any.
 //
-// Seeking to a negative offset is an error. Seeking to any positive
-// offset is legal, but the behavior of subsequent I/O operations on
-// the underlying object is implementation-dependent.
+// Seeking to an offset before the start of the file is an error.
+// Seeking to any positive offset is legal, but the behavior of subsequent
+// I/O operations on the underlying object is implementation-dependent.
 type Seeker interface {
 	Seek(offset int64, whence int) (int64, error)
 }
@@ -225,7 +225,6 @@
 // ByteReader is the interface that wraps the ReadByte method.
 //
 // ReadByte reads and returns the next byte from the input.
-// If no byte is available, err will be set.
 type ByteReader interface {
 	ReadByte() (c byte, err error)
 }
diff --git a/src/math/all_test.go b/src/math/all_test.go
index e18e45e..38f6f95 100644
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@@ -447,7 +447,7 @@
 var modf = [][2]float64{
 	{4.0000000000000000e+00, 9.7901192488367350108546816e-01},
 	{7.0000000000000000e+00, 7.3887247457810456552351752e-01},
-	{0.0000000000000000e+00, -2.7688005719200159404635997e-01},
+	{Copysign(0, -1), -2.7688005719200159404635997e-01},
 	{-5.0000000000000000e+00, -1.060361827107492160848778e-02},
 	{9.0000000000000000e+00, 6.3629370719841737980004837e-01},
 	{2.0000000000000000e+00, 9.2637723924396464525443662e-01},
@@ -1356,12 +1356,14 @@
 
 var vfmodfSC = []float64{
 	Inf(-1),
+	Copysign(0, -1),
 	Inf(1),
 	NaN(),
 }
 var modfSC = [][2]float64{
 	{Inf(-1), NaN()}, // [2]float64{Copysign(0, -1), Inf(-1)},
-	{Inf(1), NaN()},  // [2]float64{0, Inf(1)},
+	{Copysign(0, -1), Copysign(0, -1)},
+	{Inf(1), NaN()}, // [2]float64{0, Inf(1)},
 	{NaN(), NaN()},
 }
 
diff --git a/src/math/big/decimal.go b/src/math/big/decimal.go
index 2595e5f..2c0c9da 100644
--- a/src/math/big/decimal.go
+++ b/src/math/big/decimal.go
@@ -20,7 +20,7 @@
 package big
 
 // A decimal represents an unsigned floating-point number in decimal representation.
-// The value of a non-zero decimal x is x.mant * 10 ** x.exp with 0.5 <= x.mant < 1,
+// The value of a non-zero decimal d is d.mant * 10**d.exp with 0.5 <= d.mant < 1,
 // with the most-significant mantissa digit at index 0. For the zero decimal, the
 // mantissa length and exponent are 0.
 // The zero value for decimal represents a ready-to-use 0.0.
@@ -29,6 +29,14 @@
 	exp  int    // exponent
 }
 
+// at returns the i'th mantissa digit, starting with the most significant digit at 0.
+func (d *decimal) at(i int) byte {
+	if 0 <= i && i < len(d.mant) {
+		return d.mant[i]
+	}
+	return '0'
+}
+
 // Maximum shift amount that can be done in one pass without overflow.
 // A Word has _W bits and (1<<maxShift - 1)*10 + 9 must fit into Word.
 const maxShift = _W - 4
@@ -72,7 +80,7 @@
 	}
 
 	// Convert mantissa into decimal representation.
-	s := m.decimalString() // TODO(gri) avoid string conversion here
+	s := m.utoa(10)
 	n := len(s)
 	x.exp = n
 	// Trim trailing zeros; instead the exponent is tracking
@@ -92,12 +100,6 @@
 	}
 }
 
-// Possibly optimization: The current implementation of nat.string takes
-// a charset argument. When a right shift is needed, we could provide
-// "\x00\x01...\x09" instead of "012..9" (as in nat.decimalString) and
-// avoid the repeated +'0' and -'0' operations in decimal.shr (and do a
-// single +'0' pass at the end).
-
 // shr implements x >> s, for s <= maxShift.
 func shr(x *decimal, s uint) {
 	// Division by 1<<s using shift-and-subtract algorithm.
diff --git a/src/math/big/decimal_test.go b/src/math/big/decimal_test.go
index 81e022a..15bdb18 100644
--- a/src/math/big/decimal_test.go
+++ b/src/math/big/decimal_test.go
@@ -104,3 +104,13 @@
 		}
 	}
 }
+
+func BenchmarkDecimalConversion(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for shift := -100; shift <= +100; shift++ {
+			var d decimal
+			d.init(natOne, shift)
+			d.String()
+		}
+	}
+}
diff --git a/src/math/big/doc.go b/src/math/big/doc.go
new file mode 100644
index 0000000..a3c2375
--- /dev/null
+++ b/src/math/big/doc.go
@@ -0,0 +1,99 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package big implements arbitrary-precision arithmetic (big numbers).
+The following numeric types are supported:
+
+	Int    signed integers
+	Rat    rational numbers
+	Float  floating-point numbers
+
+The zero value for an Int, Rat, or Float correspond to 0. Thus, new
+values can be declared in the usual ways and denote 0 without further
+initialization:
+
+	var x Int        // &x is an *Int of value 0
+	var r = &Rat{}   // r is a *Rat of value 0
+	y := new(Float)  // y is a *Float of value 0
+
+Alternatively, new values can be allocated and initialized with factory
+functions of the form:
+
+	func NewT(v V) *T
+
+For instance, NewInt(x) returns an *Int set to the value of the int64
+argument x, NewRat(a, b) returns a *Rat set to the fraction a/b where
+a and b are int64 values, and NewFloat(f) returns a *Float initialized
+to the float64 argument f. More flexibility is provided with explicit
+setters, for instance:
+
+	var z1 Int
+	z1.SetUint64(123)                 // z1 := 123
+	z2 := new(Rat).SetFloat64(1.2)    // z2 := 6/5
+	z3 := new(Float).SetInt(z1)       // z3 := 123.0
+
+Setters, numeric operations and predicates are represented as methods of
+the form:
+
+	func (z *T) SetV(v V) *T          // z = v
+	func (z *T) Unary(x *T) *T        // z = unary x
+	func (z *T) Binary(x, y *T) *T    // z = x binary y
+	func (x *T) Pred() P              // p = pred(x)
+
+with T one of Int, Rat, or Float. For unary and binary operations, the
+result is the receiver (usually named z in that case; see below); if it
+is one of the operands x or y it may be safely overwritten (and its memory
+reused).
+
+Arithmetic expressions are typically written as a sequence of individual
+method calls, with each call corresponding to an operation. The receiver
+denotes the result and the method arguments are the operation's operands.
+For instance, given three *Int values a, b and c, the invocation
+
+	c.Add(a, b)
+
+computes the sum a + b and stores the result in c, overwriting whatever
+value was held in c before. Unless specified otherwise, operations permit
+aliasing of parameters, so it is perfectly ok to write
+
+	sum.Add(sum, x)
+
+to accumulate values x in a sum.
+
+(By always passing in a result value via the receiver, memory use can be
+much better controlled. Instead of having to allocate new memory for each
+result, an operation can reuse the space allocated for the result value,
+and overwrite that value with the new result in the process.)
+
+Notational convention: Incoming method parameters (including the receiver)
+are named consistently in the API to clarify their use. Incoming operands
+are usually named x, y, a, b, and so on, but never z. A parameter specifying
+the result is named z (typically the receiver).
+
+For instance, the arguments for (*Int).Add are named x and y, and because
+the receiver specifies the result destination, it is called z:
+
+	func (z *Int) Add(x, y *Int) *Int
+
+Methods of this form typically return the incoming receiver as well, to
+enable simple call chaining.
+
+Methods which don't require a result value to be passed in (for instance,
+Int.Sign), simply return the result. In this case, the receiver is typically
+the first operand, named x:
+
+	func (x *Int) Sign() int
+
+Various methods support conversions between strings and corresponding
+numeric values, and vice versa: *Int, *Rat, and *Float values implement
+the Stringer interface for a (default) string representation of the value,
+but also provide SetString methods to initialize a value from a string in
+a variety of supported formats (see the respective SetString documentation).
+
+Finally, *Int, *Rat, and *Float satisfy the fmt package's Scanner interface
+for scanning and (except for *Rat) the Formatter interface for formatted
+printing.
+*/
+package big
diff --git a/src/math/big/example_rat_test.go b/src/math/big/example_rat_test.go
new file mode 100644
index 0000000..a971170
--- /dev/null
+++ b/src/math/big/example_rat_test.go
@@ -0,0 +1,65 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big_test
+
+import (
+	"fmt"
+	"math/big"
+)
+
+// Use the classic continued fraction for e
+//     e = [1; 0, 1, 1, 2, 1, 1, ... 2n, 1, 1, ...]
+// i.e., for the nth term, use
+//     1          if   n mod 3 != 1
+//  (n-1)/3 * 2   if   n mod 3 == 1
+func recur(n, lim int64) *big.Rat {
+	term := new(big.Rat)
+	if n%3 != 1 {
+		term.SetInt64(1)
+	} else {
+		term.SetInt64((n - 1) / 3 * 2)
+	}
+
+	if n > lim {
+		return term
+	}
+
+	// Directly initialize frac as the fractional
+	// inverse of the result of recur.
+	frac := new(big.Rat).Inv(recur(n+1, lim))
+
+	return term.Add(term, frac)
+}
+
+// This example demonstrates how to use big.Rat to compute the
+// first 15 terms in the sequence of rational convergents for
+// the constant e (base of natural logarithm).
+func Example_eConvergents() {
+	for i := 1; i <= 15; i++ {
+		r := recur(0, int64(i))
+
+		// Print r both as a fraction and as a floating-point number.
+		// Since big.Rat implements fmt.Formatter, we can use %-13s to
+		// get a left-aligned string representation of the fraction.
+		fmt.Printf("%-13s = %s\n", r, r.FloatString(8))
+	}
+
+	// Output:
+	// 2/1           = 2.00000000
+	// 3/1           = 3.00000000
+	// 8/3           = 2.66666667
+	// 11/4          = 2.75000000
+	// 19/7          = 2.71428571
+	// 87/32         = 2.71875000
+	// 106/39        = 2.71794872
+	// 193/71        = 2.71830986
+	// 1264/465      = 2.71827957
+	// 1457/536      = 2.71828358
+	// 2721/1001     = 2.71828172
+	// 23225/8544    = 2.71828184
+	// 25946/9545    = 2.71828182
+	// 49171/18089   = 2.71828183
+	// 517656/190435 = 2.71828183
+}
diff --git a/src/math/big/float.go b/src/math/big/float.go
index d7aa895..b92d5ce 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -124,7 +124,7 @@
 // rounding error is described by the Float's Accuracy.
 type RoundingMode byte
 
-// The following rounding modes are supported.
+// These constants define supported rounding modes.
 const (
 	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
 	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
@@ -1272,7 +1272,7 @@
 		ex = ey
 	}
 
-	// operands may have cancelled each other out
+	// operands may have canceled each other out
 	if len(z.mant) == 0 {
 		z.acc = Exact
 		z.form = zero
diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go
index 0e8b7b6..37d5c06 100644
--- a/src/math/big/floatconv.go
+++ b/src/math/big/floatconv.go
@@ -72,37 +72,46 @@
 	// ebase**exp. Finally, mantissa normalization (shift left) requires
 	// a correcting multiplication by 2**(-shiftcount). Multiplications
 	// are commutative, so we can apply them in any order as long as there
-	// is no loss of precision. We only have powers of 2 and 10; keep
-	// track via separate exponents exp2 and exp10.
+	// is no loss of precision. We only have powers of 2 and 10, and
+	// we split powers of 10 into the product of the same powers of
+	// 2 and 5. This reduces the size of the multiplication factor
+	// needed for base-10 exponents.
 
-	// normalize mantissa and get initial binary exponent
-	var exp2 = int64(len(z.mant))*_W - fnorm(z.mant)
+	// normalize mantissa and determine initial exponent contributions
+	exp2 := int64(len(z.mant))*_W - fnorm(z.mant)
+	exp5 := int64(0)
 
 	// determine binary or decimal exponent contribution of decimal point
-	var exp10 int64
 	if fcount < 0 {
 		// The mantissa has a "decimal" point ddd.dddd; and
 		// -fcount is the number of digits to the right of '.'.
 		// Adjust relevant exponent accodingly.
+		d := int64(fcount)
 		switch b {
-		case 16:
-			fcount *= 4 // hexadecimal digits are 4 bits each
-			fallthrough
+		case 10:
+			exp5 = d
+			fallthrough // 10**e == 5**e * 2**e
 		case 2:
-			exp2 += int64(fcount)
-		default: // b == 10
-			exp10 = int64(fcount)
+			exp2 += d
+		case 16:
+			exp2 += d * 4 // hexadecimal digits are 4 bits each
+		default:
+			panic("unexpected mantissa base")
 		}
-		// we don't need fcount anymore
+		// fcount consumed - not needed anymore
 	}
 
 	// take actual exponent into account
-	if ebase == 2 {
+	switch ebase {
+	case 10:
+		exp5 += exp
+		fallthrough
+	case 2:
 		exp2 += exp
-	} else { // ebase == 10
-		exp10 += exp
+	default:
+		panic("unexpected exponent base")
 	}
-	// we don't need exp anymore
+	// exp consumed - not needed anymore
 
 	// apply 2**exp2
 	if MinExp <= exp2 && exp2 <= MaxExp {
@@ -115,49 +124,76 @@
 		return
 	}
 
-	if exp10 == 0 {
-		// no decimal exponent to consider
+	if exp5 == 0 {
+		// no decimal exponent contribution
 		z.round(0)
 		return
 	}
-	// exp10 != 0
+	// exp5 != 0
 
-	// apply 10**exp10
+	// apply 5**exp5
 	p := new(Float).SetPrec(z.Prec() + 64) // use more bits for p -- TODO(gri) what is the right number?
-	if exp10 < 0 {
-		z.Quo(z, p.pow10(-exp10))
+	if exp5 < 0 {
+		z.Quo(z, p.pow5(uint64(-exp5)))
 	} else {
-		z.Mul(z, p.pow10(exp10))
+		z.Mul(z, p.pow5(uint64(exp5)))
 	}
 
 	return
 }
 
-// These powers of 10 can be represented exactly as a float64.
-var pow10tab = [...]float64{
-	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
-	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+// These powers of 5 fit into a uint64.
+//
+//	for p, q := uint64(0), uint64(1); p < q; p, q = q, q*5 {
+//		fmt.Println(q)
+//	}
+//
+var pow5tab = [...]uint64{
+	1,
+	5,
+	25,
+	125,
+	625,
+	3125,
+	15625,
+	78125,
+	390625,
+	1953125,
+	9765625,
+	48828125,
+	244140625,
+	1220703125,
+	6103515625,
+	30517578125,
+	152587890625,
+	762939453125,
+	3814697265625,
+	19073486328125,
+	95367431640625,
+	476837158203125,
+	2384185791015625,
+	11920928955078125,
+	59604644775390625,
+	298023223876953125,
+	1490116119384765625,
+	7450580596923828125,
 }
 
-// pow10 sets z to 10**n and returns z.
+// pow5 sets z to 5**n and returns z.
 // n must not be negative.
-func (z *Float) pow10(n int64) *Float {
-	if n < 0 {
-		panic("pow10 called with negative argument")
-	}
-
-	const m = int64(len(pow10tab) - 1)
+func (z *Float) pow5(n uint64) *Float {
+	const m = uint64(len(pow5tab) - 1)
 	if n <= m {
-		return z.SetFloat64(pow10tab[n])
+		return z.SetUint64(pow5tab[n])
 	}
 	// n > m
 
-	z.SetFloat64(pow10tab[m])
+	z.SetUint64(pow5tab[m])
 	n -= m
 
 	// use more bits for f than for z
 	// TODO(gri) what is the right number?
-	f := new(Float).SetPrec(z.Prec() + 64).SetInt64(10)
+	f := new(Float).SetPrec(z.Prec() + 64).SetUint64(5)
 
 	for n > 0 {
 		if n&1 != 0 {
diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go
index 156e1af..b6f9993 100644
--- a/src/math/big/floatconv_test.go
+++ b/src/math/big/floatconv_test.go
@@ -139,6 +139,8 @@
 	}
 }
 
+func fdiv(a, b float64) float64 { return a / b }
+
 const (
 	below1e23 = 99999999999999974834176
 	above1e23 = 100000000000000008388608
@@ -187,11 +189,11 @@
 		{1, 'e', 5, "1.00000e+00"},
 		{1, 'f', 5, "1.00000"},
 		{1, 'g', 5, "1"},
-		// {1, 'g', -1, "1"},
-		// {20, 'g', -1, "20"},
-		// {1234567.8, 'g', -1, "1.2345678e+06"},
-		// {200000, 'g', -1, "200000"},
-		// {2000000, 'g', -1, "2e+06"},
+		{1, 'g', -1, "1"},
+		{20, 'g', -1, "20"},
+		{1234567.8, 'g', -1, "1.2345678e+06"},
+		{200000, 'g', -1, "200000"},
+		{2000000, 'g', -1, "2e+06"},
 
 		// g conversion and zero suppression
 		{400, 'g', 2, "4e+02"},
@@ -207,22 +209,22 @@
 		{0, 'e', 5, "0.00000e+00"},
 		{0, 'f', 5, "0.00000"},
 		{0, 'g', 5, "0"},
-		// {0, 'g', -1, "0"},
+		{0, 'g', -1, "0"},
 
 		{-1, 'e', 5, "-1.00000e+00"},
 		{-1, 'f', 5, "-1.00000"},
 		{-1, 'g', 5, "-1"},
-		// {-1, 'g', -1, "-1"},
+		{-1, 'g', -1, "-1"},
 
 		{12, 'e', 5, "1.20000e+01"},
 		{12, 'f', 5, "12.00000"},
 		{12, 'g', 5, "12"},
-		// {12, 'g', -1, "12"},
+		{12, 'g', -1, "12"},
 
 		{123456700, 'e', 5, "1.23457e+08"},
 		{123456700, 'f', 5, "123456700.00000"},
 		{123456700, 'g', 5, "1.2346e+08"},
-		// {123456700, 'g', -1, "1.234567e+08"},
+		{123456700, 'g', -1, "1.234567e+08"},
 
 		{1.2345e6, 'e', 5, "1.23450e+06"},
 		{1.2345e6, 'f', 5, "1234500.00000"},
@@ -232,36 +234,38 @@
 		{1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
 		{1e23, 'g', 17, "9.9999999999999992e+22"},
 
-		// {1e23, 'e', -1, "1e+23"},
-		// {1e23, 'f', -1, "100000000000000000000000"},
-		// {1e23, 'g', -1, "1e+23"},
+		{1e23, 'e', -1, "1e+23"},
+		{1e23, 'f', -1, "100000000000000000000000"},
+		{1e23, 'g', -1, "1e+23"},
 
 		{below1e23, 'e', 17, "9.99999999999999748e+22"},
 		{below1e23, 'f', 17, "99999999999999974834176.00000000000000000"},
 		{below1e23, 'g', 17, "9.9999999999999975e+22"},
 
-		// {below1e23, 'e', -1, "9.999999999999997e+22"},
-		// {below1e23, 'f', -1, "99999999999999970000000"},
-		// {below1e23, 'g', -1, "9.999999999999997e+22"},
+		{below1e23, 'e', -1, "9.999999999999997e+22"},
+		{below1e23, 'f', -1, "99999999999999970000000"},
+		{below1e23, 'g', -1, "9.999999999999997e+22"},
 
 		{above1e23, 'e', 17, "1.00000000000000008e+23"},
 		{above1e23, 'f', 17, "100000000000000008388608.00000000000000000"},
-		// {above1e23, 'g', 17, "1.0000000000000001e+23"},
+		{above1e23, 'g', 17, "1.0000000000000001e+23"},
 
-		// {above1e23, 'e', -1, "1.0000000000000001e+23"},
-		// {above1e23, 'f', -1, "100000000000000010000000"},
-		// {above1e23, 'g', -1, "1.0000000000000001e+23"},
+		{above1e23, 'e', -1, "1.0000000000000001e+23"},
+		{above1e23, 'f', -1, "100000000000000010000000"},
+		{above1e23, 'g', -1, "1.0000000000000001e+23"},
 
-		// {fdiv(5e-304, 1e20), 'g', -1, "5e-324"},
-		// {fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"},
+		{5e-304 / 1e20, 'g', -1, "5e-324"},
+		{-5e-304 / 1e20, 'g', -1, "-5e-324"},
+		{fdiv(5e-304, 1e20), 'g', -1, "5e-324"},   // avoid constant arithmetic
+		{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic
 
-		// {32, 'g', -1, "32"},
-		// {32, 'g', 0, "3e+01"},
+		{32, 'g', -1, "32"},
+		{32, 'g', 0, "3e+01"},
 
-		// {100, 'x', -1, "%x"},
+		{100, 'x', -1, "%x"},
 
-		// {math.NaN(), 'g', -1, "NaN"},
-		// {-math.NaN(), 'g', -1, "NaN"},
+		// {math.NaN(), 'g', -1, "NaN"},  // Float doesn't support NaNs
+		// {-math.NaN(), 'g', -1, "NaN"}, // Float doesn't support NaNs
 		{math.Inf(0), 'g', -1, "+Inf"},
 		{math.Inf(-1), 'g', -1, "-Inf"},
 		{-math.Inf(0), 'g', -1, "-Inf"},
@@ -279,18 +283,24 @@
 		{1.5, 'f', 0, "2"},
 
 		// http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
-		// {2.2250738585072012e-308, 'g', -1, "2.2250738585072014e-308"},
+		{2.2250738585072012e-308, 'g', -1, "2.2250738585072014e-308"},
 		// http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
-		// {2.2250738585072011e-308, 'g', -1, "2.225073858507201e-308"},
+		{2.2250738585072011e-308, 'g', -1, "2.225073858507201e-308"},
 
 		// Issue 2625.
 		{383260575764816448, 'f', 0, "383260575764816448"},
-		// {383260575764816448, 'g', -1, "3.8326057576481645e+17"},
+		{383260575764816448, 'g', -1, "3.8326057576481645e+17"},
 	} {
-		f := new(Float).SetFloat64(test.x)
+		// The test cases are from the strconv package which tests float64 values.
+		// When formatting values with prec = -1 (shortest representation),
+		// the actually available mantissa precision matters.
+		// For denormalized values, that precision is < 53 (SetFloat64 default).
+		// Compute and set the actual precision explicitly.
+		f := new(Float).SetPrec(actualPrec(test.x)).SetFloat64(test.x)
 		got := f.Text(test.format, test.prec)
 		if got != test.want {
 			t.Errorf("%v: got %s; want %s", test, got, test.want)
+			continue
 		}
 
 		if test.format == 'b' && test.x == 0 {
@@ -308,6 +318,15 @@
 	}
 }
 
+// actualPrec returns the number of actually used mantissa bits.
+func actualPrec(x float64) uint {
+	if bits := math.Float64bits(x); x != 0 && bits&(0x7ff<<52) == 0 {
+		// x is denormalized
+		return 64 - nlz64(bits&(1<<52-1))
+	}
+	return 53
+}
+
 func TestFloatText(t *testing.T) {
 	for _, test := range []struct {
 		x      string
@@ -367,10 +386,20 @@
 
 		// make sure "stupid" exponents don't stall the machine
 		{"1e1000000", 64, 'p', 0, "0x.88b3a28a05eade3ap+3321929"},
+		{"1e646456992", 64, 'p', 0, "0x.e883a0c5c8c7c42ap+2147483644"},
+		{"1e646456993", 64, 'p', 0, "+Inf"},
 		{"1e1000000000", 64, 'p', 0, "+Inf"},
 		{"1e-1000000", 64, 'p', 0, "0x.efb4542cc8ca418ap-3321928"},
+		{"1e-646456993", 64, 'p', 0, "0x.e17c8956983d9d59p-2147483647"},
+		{"1e-646456994", 64, 'p', 0, "0"},
 		{"1e-1000000000", 64, 'p', 0, "0"},
 
+		// minimum and maximum values
+		{"1p2147483646", 64, 'p', 0, "0x.8p+2147483647"},
+		{"0x.8p2147483647", 64, 'p', 0, "0x.8p+2147483647"},
+		{"0x.8p-2147483647", 64, 'p', 0, "0x.8p-2147483647"},
+		{"1p-2147483649", 64, 'p', 0, "0x.8p-2147483648"},
+
 		// TODO(gri) need tests for actual large Floats
 
 		{"0", 53, 'b', 0, "0"},
@@ -438,9 +467,6 @@
 		value  interface{} // float32, float64, or string (== 512bit *Float)
 		want   string
 	}{
-		// TODO(gri) uncomment the disabled 'g'/'G' formats
-		// 	     below once (*Float).Text supports prec < 0
-
 		// from fmt/fmt_test.go
 		{"%+.3e", 0.0, "+0.000e+00"},
 		{"%+.3e", 1.0, "+1.000e+00"},
@@ -471,9 +497,9 @@
 		{"%f", 1234.5678e-8, "0.000012"},
 		{"%f", -7.0, "-7.000000"},
 		{"%f", -1e-9, "-0.000000"},
-		// {"%g", 1234.5678e3, "1.2345678e+06"},
-		// {"%g", float32(1234.5678e3), "1.2345678e+06"},
-		// {"%g", 1234.5678e-8, "1.2345678e-05"},
+		{"%g", 1234.5678e3, "1.2345678e+06"},
+		{"%g", float32(1234.5678e3), "1.2345678e+06"},
+		{"%g", 1234.5678e-8, "1.2345678e-05"},
 		{"%g", -7.0, "-7"},
 		{"%g", -1e-9, "-1e-09"},
 		{"%g", float32(-1e-9), "-1e-09"},
@@ -482,9 +508,9 @@
 		{"%E", 1234.5678e-8, "1.234568E-05"},
 		{"%E", -7.0, "-7.000000E+00"},
 		{"%E", -1e-9, "-1.000000E-09"},
-		// {"%G", 1234.5678e3, "1.2345678E+06"},
-		// {"%G", float32(1234.5678e3), "1.2345678E+06"},
-		// {"%G", 1234.5678e-8, "1.2345678E-05"},
+		{"%G", 1234.5678e3, "1.2345678E+06"},
+		{"%G", float32(1234.5678e3), "1.2345678E+06"},
+		{"%G", 1234.5678e-8, "1.2345678E-05"},
 		{"%G", -7.0, "-7"},
 		{"%G", -1e-9, "-1E-09"},
 		{"%G", float32(-1e-9), "-1E-09"},
@@ -500,9 +526,9 @@
 		{"%-20f", 1.23456789e3, "1234.567890         "},
 		{"%20.8f", 1.23456789e3, "       1234.56789000"},
 		{"%20.8f", 1.23456789e-3, "          0.00123457"},
-		// {"%g", 1.23456789e3, "1234.56789"},
-		// {"%g", 1.23456789e-3, "0.00123456789"},
-		// {"%g", 1.23456789e20, "1.23456789e+20"},
+		{"%g", 1.23456789e3, "1234.56789"},
+		{"%g", 1.23456789e-3, "0.00123456789"},
+		{"%g", 1.23456789e20, "1.23456789e+20"},
 		{"%20e", math.Inf(1), "                +Inf"},
 		{"%-20f", math.Inf(-1), "-Inf                "},
 
@@ -541,7 +567,6 @@
 		{"%v", -1e-9, "-1e-09"},
 		{"%v", float32(-1e-9), "-1e-09"},
 		{"%010v", 0.0, "0000000000"},
-		{"%010v", 0.0, "0000000000"},
 
 		// *Float cases
 		{"%.20f", "1e-20", "0.00000000000000000001"},
@@ -571,3 +596,67 @@
 		}
 	}
 }
+
+func BenchmarkParseFloatSmallExp(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, s := range []string{
+			"1e0",
+			"1e-1",
+			"1e-2",
+			"1e-3",
+			"1e-4",
+			"1e-5",
+			"1e-10",
+			"1e-20",
+			"1e-50",
+			"1e1",
+			"1e2",
+			"1e3",
+			"1e4",
+			"1e5",
+			"1e10",
+			"1e20",
+			"1e50",
+		} {
+			var x Float
+			_, _, err := x.Parse(s, 0)
+			if err != nil {
+				b.Fatalf("%s: %v", s, err)
+			}
+		}
+	}
+}
+
+func BenchmarkParseFloatLargeExp(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, s := range []string{
+			"1e0",
+			"1e-10",
+			"1e-20",
+			"1e-30",
+			"1e-40",
+			"1e-50",
+			"1e-100",
+			"1e-500",
+			"1e-1000",
+			"1e-5000",
+			"1e-10000",
+			"1e10",
+			"1e20",
+			"1e30",
+			"1e40",
+			"1e50",
+			"1e100",
+			"1e500",
+			"1e1000",
+			"1e5000",
+			"1e10000",
+		} {
+			var x Float
+			_, _, err := x.Parse(s, 0)
+			if err != nil {
+				b.Fatalf("%s: %v", s, err)
+			}
+		}
+	}
+}
diff --git a/src/math/big/floatexample_test.go b/src/math/big/floatexample_test.go
index 358776e..fb799d5 100644
--- a/src/math/big/floatexample_test.go
+++ b/src/math/big/floatexample_test.go
@@ -109,3 +109,33 @@
 	// +Inf   1.2    1
 	// +Inf  +Inf    0
 }
+
+func ExampleRoundingMode() {
+	operands := []float64{2.6, 2.5, 2.1, -2.1, -2.5, -2.6}
+
+	fmt.Print("   x")
+	for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ {
+		fmt.Printf("  %s", mode)
+	}
+	fmt.Println()
+
+	for _, f64 := range operands {
+		fmt.Printf("%4g", f64)
+		for mode := big.ToNearestEven; mode <= big.ToPositiveInf; mode++ {
+			// sample operands above require 2 bits to represent mantissa
+			// set binary precision to 2 to round them to integer values
+			f := new(big.Float).SetPrec(2).SetMode(mode).SetFloat64(f64)
+			fmt.Printf("  %*g", len(mode.String()), f)
+		}
+		fmt.Println()
+	}
+
+	// Output:
+	//    x  ToNearestEven  ToNearestAway  ToZero  AwayFromZero  ToNegativeInf  ToPositiveInf
+	//  2.6              3              3       2             3              2              3
+	//  2.5              2              3       2             3              2              3
+	//  2.1              2              2       2             3              2              3
+	// -2.1             -2             -2      -2            -3             -3             -2
+	// -2.5             -2             -3      -2            -3             -3             -2
+	// -2.6             -3             -3      -2            -3             -3             -2
+}
diff --git a/src/math/big/floatmarsh.go b/src/math/big/floatmarsh.go
new file mode 100644
index 0000000..44987ee
--- /dev/null
+++ b/src/math/big/floatmarsh.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Floats.
+
+package big
+
+import "fmt"
+
+// MarshalText implements the encoding.TextMarshaler interface.
+// Only the Float value is marshaled (in full precision), other
+// attributes such as precision or accuracy are ignored.
+func (x *Float) MarshalText() (text []byte, err error) {
+	if x == nil {
+		return []byte("<nil>"), nil
+	}
+	var buf []byte
+	return x.Append(buf, 'g', -1), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+// The result is rounded per the precision and rounding mode of z.
+// If z's precision is 0, it is changed to 64 before rounding takes
+// effect.
+func (z *Float) UnmarshalText(text []byte) error {
+	// TODO(gri): get rid of the []byte/string conversion
+	_, _, err := z.Parse(string(text), 0)
+	if err != nil {
+		err = fmt.Errorf("math/big: cannot unmarshal %q into a *big.Float (%v)", text, err)
+	}
+	return err
+}
diff --git a/src/math/big/floatmarsh_test.go b/src/math/big/floatmarsh_test.go
new file mode 100644
index 0000000..d7ef2fc
--- /dev/null
+++ b/src/math/big/floatmarsh_test.go
@@ -0,0 +1,54 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+var floatVals = []string{
+	"0",
+	"1",
+	"0.1",
+	"2.71828",
+	"1234567890",
+	"3.14e1234",
+	"3.14e-1234",
+	"0.738957395793475734757349579759957975985497e100",
+	"0.73895739579347546656564656573475734957975995797598589749859834759476745986795497e100",
+	"inf",
+	"Inf",
+}
+
+func TestFloatJSONEncoding(t *testing.T) {
+	for _, test := range floatVals {
+		for _, sign := range []string{"", "+", "-"} {
+			for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} {
+				x := sign + test
+				var tx Float
+				_, _, err := tx.SetPrec(prec).Parse(x, 0)
+				if err != nil {
+					t.Errorf("parsing of %s (prec = %d) failed (invalid test case): %v", x, prec, err)
+					continue
+				}
+				b, err := json.Marshal(&tx)
+				if err != nil {
+					t.Errorf("marshaling of %v (prec = %d) failed: %v", &tx, prec, err)
+					continue
+				}
+				var rx Float
+				rx.SetPrec(prec)
+				if err := json.Unmarshal(b, &rx); err != nil {
+					t.Errorf("unmarshaling of %v (prec = %d) failed: %v", &tx, prec, err)
+					continue
+				}
+				if rx.Cmp(&tx) != 0 {
+					t.Errorf("JSON encoding of %v (prec = %d) failed: got %v want %v", &tx, prec, &rx, &tx)
+				}
+			}
+		}
+	}
+}
diff --git a/src/math/big/ftoa.go b/src/math/big/ftoa.go
index 5c5f2ce..c5cdb5e 100644
--- a/src/math/big/ftoa.go
+++ b/src/math/big/ftoa.go
@@ -9,9 +9,9 @@
 package big
 
 import (
+	"bytes"
 	"fmt"
 	"strconv"
-	"strings"
 )
 
 // Text converts the floating-point number x to a string according
@@ -37,16 +37,16 @@
 // printed by the 'e', 'E', 'f', 'g', and 'G' formats. For 'e', 'E', and 'f'
 // it is the number of digits after the decimal point. For 'g' and 'G' it is
 // the total number of digits. A negative precision selects the smallest
-// number of digits necessary to identify the value x uniquely.
+// number of decimal digits necessary to identify the value x uniquely using
+// x.Prec() mantissa bits.
 // The prec value is ignored for the 'b' or 'p' format.
-//
-// BUG(gri) Float.Text does not accept negative precisions (issue #10991).
 func (x *Float) Text(format byte, prec int) string {
 	const extra = 10 // TODO(gri) determine a good/better value here
 	return string(x.Append(make([]byte, 0, prec+extra), format, prec))
 }
 
 // String formats x like x.Text('g', 10).
+// (String must be called explicitly, Float.Format does not support %s verb.)
 func (x *Float) String() string {
 	return x.Text('g', 10)
 }
@@ -83,6 +83,7 @@
 	// 1) convert Float to multiprecision decimal
 	var d decimal // == 0.0
 	if x.form == finite {
+		// x != 0
 		d.init(x.mant, int(x.exp)-x.mant.bitLen())
 	}
 
@@ -90,9 +91,7 @@
 	shortest := false
 	if prec < 0 {
 		shortest = true
-		panic("unimplemented")
-		// TODO(gri) complete this
-		// roundShortest(&d, f.mant, int(f.exp))
+		roundShortest(&d, x)
 		// Precision for shortest representation mode.
 		switch fmt {
 		case 'e', 'E':
@@ -158,6 +157,80 @@
 	return append(buf, '%', fmt)
 }
 
+func roundShortest(d *decimal, x *Float) {
+	// if the mantissa is zero, the number is zero - stop now
+	if len(d.mant) == 0 {
+		return
+	}
+
+	// Approach: All numbers in the interval [x - 1/2ulp, x + 1/2ulp]
+	// (possibly exclusive) round to x for the given precision of x.
+	// Compute the lower and upper bound in decimal form and find the
+	// shortest decimal number d such that lower <= d <= upper.
+
+	// TODO(gri) strconv/ftoa.do describes a shortcut in some cases.
+	// See if we can use it (in adjusted form) here as well.
+
+	// 1) Compute normalized mantissa mant and exponent exp for x such
+	// that the lsb of mant corresponds to 1/2 ulp for the precision of
+	// x (i.e., for mant we want x.prec + 1 bits).
+	mant := nat(nil).set(x.mant)
+	exp := int(x.exp) - mant.bitLen()
+	s := mant.bitLen() - int(x.prec+1)
+	switch {
+	case s < 0:
+		mant = mant.shl(mant, uint(-s))
+	case s > 0:
+		mant = mant.shr(mant, uint(+s))
+	}
+	exp += s
+	// x = mant * 2**exp with lsb(mant) == 1/2 ulp of x.prec
+
+	// 2) Compute lower bound by subtracting 1/2 ulp.
+	var lower decimal
+	var tmp nat
+	lower.init(tmp.sub(mant, natOne), exp)
+
+	// 3) Compute upper bound by adding 1/2 ulp.
+	var upper decimal
+	upper.init(tmp.add(mant, natOne), exp)
+
+	// The upper and lower bounds are possible outputs only if
+	// the original mantissa is even, so that ToNearestEven rounding
+	// would round to the original mantissa and not the neighbors.
+	inclusive := mant[0]&2 == 0 // test bit 1 since original mantissa was shifted by 1
+
+	// Now we can figure out the minimum number of digits required.
+	// Walk along until d has distinguished itself from upper and lower.
+	for i, m := range d.mant {
+		l := lower.at(i)
+		u := upper.at(i)
+
+		// Okay to round down (truncate) if lower has a different digit
+		// or if lower is inclusive and is exactly the result of rounding
+		// down (i.e., and we have reached the final digit of lower).
+		okdown := l != m || inclusive && i+1 == len(lower.mant)
+
+		// Okay to round up if upper has a different digit and either upper
+		// is inclusive or upper is bigger than the result of rounding up.
+		okup := m != u && (inclusive || m+1 < u || i+1 < len(upper.mant))
+
+		// If it's okay to do either, then round to the nearest one.
+		// If it's okay to do only one, do it.
+		switch {
+		case okdown && okup:
+			d.round(i + 1)
+			return
+		case okdown:
+			d.roundDown(i + 1)
+			return
+		case okup:
+			d.roundUp(i + 1)
+			return
+		}
+	}
+}
+
 // %e: d.ddddde±dd
 func fmtE(buf []byte, fmt byte, prec int, d decimal) []byte {
 	// first digit
@@ -219,11 +292,7 @@
 	if prec > 0 {
 		buf = append(buf, '.')
 		for i := 0; i < prec; i++ {
-			ch := byte('0')
-			if j := d.exp + i; 0 <= j && j < len(d.mant) {
-				ch = d.mant[j]
-			}
-			buf = append(buf, ch)
+			buf = append(buf, d.at(d.exp+i))
 		}
 	}
 
@@ -255,7 +324,7 @@
 		m = nat(nil).shr(m, uint(w-x.prec))
 	}
 
-	buf = append(buf, m.decimalString()...)
+	buf = append(buf, m.utoa(10)...)
 	buf = append(buf, 'p')
 	e := int64(x.exp) - int64(x.prec)
 	if e >= 0 {
@@ -289,7 +358,7 @@
 	m = m[i:]
 
 	buf = append(buf, "0x."...)
-	buf = append(buf, strings.TrimRight(m.hexString(), "0")...)
+	buf = append(buf, bytes.TrimRight(m.utoa(16), "0")...)
 	buf = append(buf, 'p')
 	if x.exp >= 0 {
 		buf = append(buf, '+')
@@ -314,10 +383,6 @@
 // '+' and ' ' for sign control, '0' for space or zero padding,
 // and '-' for left or right justification. See the fmt package
 // for details.
-//
-// BUG(gri) A missing precision for the 'g' format, or a negative
-//          (via '*') precision is not yet supported. Instead the
-//          default precision (6) is used in that case (issue #10991).
 func (x *Float) Format(s fmt.State, format rune) {
 	prec, hasPrec := s.Precision()
 	if !hasPrec {
@@ -336,8 +401,7 @@
 		fallthrough
 	case 'g', 'G':
 		if !hasPrec {
-			// TODO(gri) uncomment once (*Float).Text handles prec < 0
-			// prec = -1 // default precision for 'g', 'G'
+			prec = -1 // default precision for 'g', 'G'
 		}
 	default:
 		fmt.Fprintf(s, "%%!%c(*big.Float=%s)", format, x.String())
diff --git a/src/math/big/int.go b/src/math/big/int.go
index 84485a2..16b7cd1 100644
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -551,8 +551,11 @@
 }
 
 // ProbablyPrime performs n Miller-Rabin tests to check whether x is prime.
-// If it returns true, x is prime with probability 1 - 1/4^n.
-// If it returns false, x is not prime. n must be > 0.
+// If x is prime, it returns true.
+// If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
+//
+// It is not suitable for judging primes that an adversary may have crafted
+// to fool this test.
 func (x *Int) ProbablyPrime(n int) bool {
 	if n <= 0 {
 		panic("non-positive n for ProbablyPrime")
@@ -929,65 +932,3 @@
 	z.neg = true // z cannot be zero if x is positive
 	return z
 }
-
-// Gob codec version. Permits backward-compatible changes to the encoding.
-const intGobVersion byte = 1
-
-// GobEncode implements the gob.GobEncoder interface.
-func (x *Int) GobEncode() ([]byte, error) {
-	if x == nil {
-		return nil, nil
-	}
-	buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit
-	i := x.abs.bytes(buf) - 1            // i >= 0
-	b := intGobVersion << 1              // make space for sign bit
-	if x.neg {
-		b |= 1
-	}
-	buf[i] = b
-	return buf[i:], nil
-}
-
-// GobDecode implements the gob.GobDecoder interface.
-func (z *Int) GobDecode(buf []byte) error {
-	if len(buf) == 0 {
-		// Other side sent a nil or default value.
-		*z = Int{}
-		return nil
-	}
-	b := buf[0]
-	if b>>1 != intGobVersion {
-		return fmt.Errorf("Int.GobDecode: encoding version %d not supported", b>>1)
-	}
-	z.neg = b&1 != 0
-	z.abs = z.abs.setBytes(buf[1:])
-	return nil
-}
-
-// MarshalJSON implements the json.Marshaler interface.
-func (z *Int) MarshalJSON() ([]byte, error) {
-	// TODO(gri): get rid of the []byte/string conversions
-	return []byte(z.String()), nil
-}
-
-// UnmarshalJSON implements the json.Unmarshaler interface.
-func (z *Int) UnmarshalJSON(text []byte) error {
-	// TODO(gri): get rid of the []byte/string conversions
-	if _, ok := z.SetString(string(text), 0); !ok {
-		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text)
-	}
-	return nil
-}
-
-// MarshalText implements the encoding.TextMarshaler interface.
-func (z *Int) MarshalText() (text []byte, err error) {
-	return []byte(z.String()), nil
-}
-
-// UnmarshalText implements the encoding.TextUnmarshaler interface.
-func (z *Int) UnmarshalText(text []byte) error {
-	if _, ok := z.SetString(string(text), 0); !ok {
-		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text)
-	}
-	return nil
-}
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go
index 5b80509..5d65217 100644
--- a/src/math/big/int_test.go
+++ b/src/math/big/int_test.go
@@ -6,10 +6,7 @@
 
 import (
 	"bytes"
-	"encoding/gob"
 	"encoding/hex"
-	"encoding/json"
-	"encoding/xml"
 	"fmt"
 	"math/rand"
 	"testing"
@@ -698,7 +695,9 @@
 		testGcd(t, d, x, y, a, b)
 	}
 
-	quick.Check(checkGcd, nil)
+	if err := quick.Check(checkGcd, nil); err != nil {
+		t.Error(err)
+	}
 }
 
 var primes = []string{
@@ -1453,138 +1452,6 @@
 	panic(failureMsg)
 }
 
-var encodingTests = []string{
-	"-539345864568634858364538753846587364875430589374589",
-	"-678645873",
-	"-100",
-	"-2",
-	"-1",
-	"0",
-	"1",
-	"2",
-	"10",
-	"42",
-	"1234567890",
-	"298472983472983471903246121093472394872319615612417471234712061",
-}
-
-func TestIntGobEncoding(t *testing.T) {
-	var medium bytes.Buffer
-	enc := gob.NewEncoder(&medium)
-	dec := gob.NewDecoder(&medium)
-	for _, test := range encodingTests {
-		medium.Reset() // empty buffer for each test case (in case of failures)
-		var tx Int
-		tx.SetString(test, 10)
-		if err := enc.Encode(&tx); err != nil {
-			t.Errorf("encoding of %s failed: %s", &tx, err)
-		}
-		var rx Int
-		if err := dec.Decode(&rx); err != nil {
-			t.Errorf("decoding of %s failed: %s", &tx, err)
-		}
-		if rx.Cmp(&tx) != 0 {
-			t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
-		}
-	}
-}
-
-// Sending a nil Int pointer (inside a slice) on a round trip through gob should yield a zero.
-// TODO: top-level nils.
-func TestGobEncodingNilIntInSlice(t *testing.T) {
-	buf := new(bytes.Buffer)
-	enc := gob.NewEncoder(buf)
-	dec := gob.NewDecoder(buf)
-
-	var in = make([]*Int, 1)
-	err := enc.Encode(&in)
-	if err != nil {
-		t.Errorf("gob encode failed: %q", err)
-	}
-	var out []*Int
-	err = dec.Decode(&out)
-	if err != nil {
-		t.Fatalf("gob decode failed: %q", err)
-	}
-	if len(out) != 1 {
-		t.Fatalf("wrong len; want 1 got %d", len(out))
-	}
-	var zero Int
-	if out[0].Cmp(&zero) != 0 {
-		t.Errorf("transmission of (*Int)(nill) failed: got %s want 0", out)
-	}
-}
-
-func TestIntJSONEncoding(t *testing.T) {
-	for _, test := range encodingTests {
-		var tx Int
-		tx.SetString(test, 10)
-		b, err := json.Marshal(&tx)
-		if err != nil {
-			t.Errorf("marshaling of %s failed: %s", &tx, err)
-		}
-		var rx Int
-		if err := json.Unmarshal(b, &rx); err != nil {
-			t.Errorf("unmarshaling of %s failed: %s", &tx, err)
-		}
-		if rx.Cmp(&tx) != 0 {
-			t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
-		}
-	}
-}
-
-var intVals = []string{
-	"-141592653589793238462643383279502884197169399375105820974944592307816406286",
-	"-1415926535897932384626433832795028841971",
-	"-141592653589793",
-	"-1",
-	"0",
-	"1",
-	"141592653589793",
-	"1415926535897932384626433832795028841971",
-	"141592653589793238462643383279502884197169399375105820974944592307816406286",
-}
-
-func TestIntJSONEncodingTextMarshaller(t *testing.T) {
-	for _, num := range intVals {
-		var tx Int
-		tx.SetString(num, 0)
-		b, err := json.Marshal(&tx)
-		if err != nil {
-			t.Errorf("marshaling of %s failed: %s", &tx, err)
-			continue
-		}
-		var rx Int
-		if err := json.Unmarshal(b, &rx); err != nil {
-			t.Errorf("unmarshaling of %s failed: %s", &tx, err)
-			continue
-		}
-		if rx.Cmp(&tx) != 0 {
-			t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
-		}
-	}
-}
-
-func TestIntXMLEncodingTextMarshaller(t *testing.T) {
-	for _, num := range intVals {
-		var tx Int
-		tx.SetString(num, 0)
-		b, err := xml.Marshal(&tx)
-		if err != nil {
-			t.Errorf("marshaling of %s failed: %s", &tx, err)
-			continue
-		}
-		var rx Int
-		if err := xml.Unmarshal(b, &rx); err != nil {
-			t.Errorf("unmarshaling of %s failed: %s", &tx, err)
-			continue
-		}
-		if rx.Cmp(&tx) != 0 {
-			t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
-		}
-	}
-}
-
 func TestIssue2607(t *testing.T) {
 	// This code sequence used to hang.
 	n := NewInt(10)
diff --git a/src/math/big/intconv.go b/src/math/big/intconv.go
index 737d176..56a75f8 100644
--- a/src/math/big/intconv.go
+++ b/src/math/big/intconv.go
@@ -12,30 +12,34 @@
 	"io"
 )
 
-func (x *Int) String() string {
-	switch {
-	case x == nil:
+// TODO(gri) Should rename itoa to utoa (there's no sign). That
+// would permit the introduction of itoa which is like utoa but
+// reserves a byte for a possible sign that's passed in. That
+// would permit Int.Text to be implemented w/o the need for
+// string copy if the number is negative.
+
+// Text returns the string representation of x in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values >= 10. No base
+// prefix (such as "0x") is added to the string.
+func (x *Int) Text(base int) string {
+	if x == nil {
 		return "<nil>"
-	case x.neg:
-		return "-" + x.abs.decimalString()
 	}
-	return x.abs.decimalString()
+	return string(x.abs.itoa(x.neg, base))
 }
 
-func charset(ch rune) string {
-	switch ch {
-	case 'b':
-		return lowercaseDigits[0:2]
-	case 'o':
-		return lowercaseDigits[0:8]
-	case 'd', 's', 'v':
-		return lowercaseDigits[0:10]
-	case 'x':
-		return lowercaseDigits[0:16]
-	case 'X':
-		return uppercaseDigits[0:16]
+// Append appends the string representation of x, as generated by
+// x.Text(base), to buf and returns the extended buffer.
+func (x *Int) Append(buf []byte, base int) []byte {
+	if x == nil {
+		return append(buf, "<nil>"...)
 	}
-	return "" // unknown format
+	return append(buf, x.abs.itoa(x.neg, base)...)
+}
+
+func (x *Int) String() string {
+	return x.Text(10)
 }
 
 // write count copies of text to s
@@ -60,15 +64,24 @@
 // right justification.
 //
 func (x *Int) Format(s fmt.State, ch rune) {
-	cs := charset(ch)
-
-	// special cases
-	switch {
-	case cs == "":
+	// determine base
+	var base int
+	switch ch {
+	case 'b':
+		base = 2
+	case 'o':
+		base = 8
+	case 'd', 's', 'v':
+		base = 10
+	case 'x', 'X':
+		base = 16
+	default:
 		// unknown format
 		fmt.Fprintf(s, "%%!%c(big.Int=%s)", ch, x.String())
 		return
-	case x == nil:
+	}
+
+	if x == nil {
 		fmt.Fprint(s, "<nil>")
 		return
 	}
@@ -97,8 +110,15 @@
 		}
 	}
 
-	// determine digits with base set by len(cs) and digit characters from cs
-	digits := x.abs.string(cs)
+	digits := x.abs.utoa(base)
+	if ch == 'X' {
+		// faster than bytes.ToUpper
+		for i, d := range digits {
+			if 'a' <= d && d <= 'z' {
+				digits[i] = 'A' + (d - 'a')
+			}
+		}
+	}
 
 	// number of characters for the three classes of number padding
 	var left int  // space characters to left of digits for right justification ("%8d")
@@ -111,7 +131,7 @@
 		switch {
 		case len(digits) < precision:
 			zeros = precision - len(digits) // count of zero padding
-		case digits == "0" && precision == 0:
+		case len(digits) == 1 && digits[0] == '0' && precision == 0:
 			return // print nothing if zero value (x == 0) and zero precision ("." or ".0")
 		}
 	}
@@ -137,7 +157,7 @@
 	writeMultiple(s, sign, 1)
 	writeMultiple(s, prefix, 1)
 	writeMultiple(s, "0", zeros)
-	writeMultiple(s, digits, 1)
+	s.Write(digits)
 	writeMultiple(s, " ", right)
 }
 
diff --git a/src/math/big/intconv_test.go b/src/math/big/intconv_test.go
index 2deb84b..5142081 100644
--- a/src/math/big/intconv_test.go
+++ b/src/math/big/intconv_test.go
@@ -17,19 +17,19 @@
 	val  int64
 	ok   bool
 }{
-	{in: "", ok: false},
-	{in: "a", ok: false},
-	{in: "z", ok: false},
-	{in: "+", ok: false},
-	{in: "-", ok: false},
-	{in: "0b", ok: false},
-	{in: "0x", ok: false},
-	{in: "2", base: 2, ok: false},
-	{in: "0b2", base: 0, ok: false},
-	{in: "08", ok: false},
-	{in: "8", base: 8, ok: false},
-	{in: "0xg", base: 0, ok: false},
-	{in: "g", base: 16, ok: false},
+	{in: ""},
+	{in: "a"},
+	{in: "z"},
+	{in: "+"},
+	{in: "-"},
+	{in: "0b"},
+	{in: "0x"},
+	{in: "2", base: 2},
+	{in: "0b2", base: 0},
+	{in: "08"},
+	{in: "8", base: 8},
+	{in: "0xg", base: 0},
+	{in: "g", base: 16},
 	{"0", "0", 0, 0, true},
 	{"0", "0", 10, 0, true},
 	{"0", "0", 16, 0, true},
@@ -41,7 +41,7 @@
 	{"-10", "-10", 16, -16, true},
 	{"+10", "10", 16, 16, true},
 	{"0x10", "16", 0, 16, true},
-	{in: "0x10", base: 16, ok: false},
+	{in: "0x10", base: 16},
 	{"-0x10", "-16", 0, -16, true},
 	{"+0x10", "16", 0, 16, true},
 	{"00", "0", 0, 0, true},
@@ -58,6 +58,57 @@
 	{"1001010111", "1001010111", 2, 0x257, true},
 }
 
+func TestIntText(t *testing.T) {
+	z := new(Int)
+	for _, test := range stringTests {
+		if !test.ok {
+			continue
+		}
+
+		_, ok := z.SetString(test.in, test.base)
+		if !ok {
+			t.Errorf("%v: failed to parse", test)
+			continue
+		}
+
+		base := test.base
+		if base == 0 {
+			base = 10
+		}
+
+		if got := z.Text(base); got != test.out {
+			t.Errorf("%v: got %s; want %s", test, got, test.out)
+		}
+	}
+}
+
+func TestAppendText(t *testing.T) {
+	z := new(Int)
+	var buf []byte
+	for _, test := range stringTests {
+		if !test.ok {
+			continue
+		}
+
+		_, ok := z.SetString(test.in, test.base)
+		if !ok {
+			t.Errorf("%v: failed to parse", test)
+			continue
+		}
+
+		base := test.base
+		if base == 0 {
+			base = 10
+		}
+
+		i := len(buf)
+		buf = z.Append(buf, base)
+		if got := string(buf[i:]); got != test.out {
+			t.Errorf("%v: got %s; want %s", test, got, test.out)
+		}
+	}
+}
+
 func format(base int) string {
 	switch base {
 	case 2:
@@ -79,15 +130,13 @@
 		z.SetInt64(test.val)
 
 		if test.base == 10 {
-			s := z.String()
-			if s != test.out {
-				t.Errorf("#%da got %s; want %s", i, s, test.out)
+			if got := z.String(); got != test.out {
+				t.Errorf("#%da got %s; want %s", i, got, test.out)
 			}
 		}
 
-		s := fmt.Sprintf(format(test.base), z)
-		if s != test.out {
-			t.Errorf("#%db got %s; want %s", i, s, test.out)
+		if got := fmt.Sprintf(format(test.base), z); got != test.out {
+			t.Errorf("#%db got %s; want %s", i, got, test.out)
 		}
 	}
 }
diff --git a/src/math/big/intmarsh.go b/src/math/big/intmarsh.go
new file mode 100644
index 0000000..4ff57b6
--- /dev/null
+++ b/src/math/big/intmarsh.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Ints.
+
+package big
+
+import "fmt"
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const intGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+func (x *Int) GobEncode() ([]byte, error) {
+	if x == nil {
+		return nil, nil
+	}
+	buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit
+	i := x.abs.bytes(buf) - 1            // i >= 0
+	b := intGobVersion << 1              // make space for sign bit
+	if x.neg {
+		b |= 1
+	}
+	buf[i] = b
+	return buf[i:], nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+func (z *Int) GobDecode(buf []byte) error {
+	if len(buf) == 0 {
+		// Other side sent a nil or default value.
+		*z = Int{}
+		return nil
+	}
+	b := buf[0]
+	if b>>1 != intGobVersion {
+		return fmt.Errorf("Int.GobDecode: encoding version %d not supported", b>>1)
+	}
+	z.neg = b&1 != 0
+	z.abs = z.abs.setBytes(buf[1:])
+	return nil
+}
+
+// MarshalText implements the encoding.TextMarshaler interface.
+func (x *Int) MarshalText() (text []byte, err error) {
+	if x == nil {
+		return []byte("<nil>"), nil
+	}
+	return x.abs.itoa(x.neg, 10), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+func (z *Int) UnmarshalText(text []byte) error {
+	// TODO(gri): get rid of the []byte/string conversion
+	if _, ok := z.SetString(string(text), 0); !ok {
+		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text)
+	}
+	return nil
+}
+
+// The JSON marshallers are only here for API backward compatibility
+// (programs that explicitly look for these two methods). JSON works
+// fine with the TextMarshaler only.
+
+// MarshalJSON implements the json.Marshaler interface.
+func (x *Int) MarshalJSON() ([]byte, error) {
+	return x.MarshalText()
+}
+
+// UnmarshalJSON implements the json.Unmarshaler interface.
+func (z *Int) UnmarshalJSON(text []byte) error {
+	return z.UnmarshalText(text)
+}
diff --git a/src/math/big/intmarsh_test.go b/src/math/big/intmarsh_test.go
new file mode 100644
index 0000000..f82956c
--- /dev/null
+++ b/src/math/big/intmarsh_test.go
@@ -0,0 +1,121 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/gob"
+	"encoding/json"
+	"encoding/xml"
+	"testing"
+)
+
+var encodingTests = []string{
+	"0",
+	"1",
+	"2",
+	"10",
+	"1000",
+	"1234567890",
+	"298472983472983471903246121093472394872319615612417471234712061",
+}
+
+func TestIntGobEncoding(t *testing.T) {
+	var medium bytes.Buffer
+	enc := gob.NewEncoder(&medium)
+	dec := gob.NewDecoder(&medium)
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			medium.Reset() // empty buffer for each test case (in case of failures)
+			var tx Int
+			tx.SetString(x, 10)
+			if err := enc.Encode(&tx); err != nil {
+				t.Errorf("encoding of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := dec.Decode(&rx); err != nil {
+				t.Errorf("decoding of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+// Sending a nil Int pointer (inside a slice) on a round trip through gob should yield a zero.
+// TODO: top-level nils.
+func TestGobEncodingNilIntInSlice(t *testing.T) {
+	buf := new(bytes.Buffer)
+	enc := gob.NewEncoder(buf)
+	dec := gob.NewDecoder(buf)
+
+	var in = make([]*Int, 1)
+	err := enc.Encode(&in)
+	if err != nil {
+		t.Errorf("gob encode failed: %q", err)
+	}
+	var out []*Int
+	err = dec.Decode(&out)
+	if err != nil {
+		t.Fatalf("gob decode failed: %q", err)
+	}
+	if len(out) != 1 {
+		t.Fatalf("wrong len; want 1 got %d", len(out))
+	}
+	var zero Int
+	if out[0].Cmp(&zero) != 0 {
+		t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out)
+	}
+}
+
+func TestIntJSONEncoding(t *testing.T) {
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			var tx Int
+			tx.SetString(x, 10)
+			b, err := json.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := json.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+func TestIntXMLEncoding(t *testing.T) {
+	for _, test := range encodingTests {
+		for _, sign := range []string{"", "+", "-"} {
+			x := sign + test
+			var tx Int
+			tx.SetString(x, 0)
+			b, err := xml.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Int
+			if err := xml.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index 6545bc1..54f4011 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -2,31 +2,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package big implements multi-precision arithmetic (big numbers).
-// The following numeric types are supported:
-//
-//   Int    signed integers
-//   Rat    rational numbers
-//   Float  floating-point numbers
-//
-// Methods are typically of the form:
-//
-//   func (z *T) Unary(x *T) *T        // z = op x
-//   func (z *T) Binary(x, y *T) *T    // z = x op y
-//   func (x *T) M() T1                // v = x.M()
-//
-// with T one of Int, Rat, or Float. For unary and binary operations, the
-// result is the receiver (usually named z in that case); if it is one of
-// the operands x or y it may be overwritten (and its memory reused).
-// To enable chaining of operations, the result is also returned. Methods
-// returning a result other than *Int, *Rat, or *Float take an operand as
-// the receiver (usually named x in that case).
-//
-package big
+// This file implements unsigned multi-precision integers (natural
+// numbers). They are the building blocks for the implementation
+// of signed integers, rationals, and floating-point numbers.
 
-// This file contains operations on unsigned multi-precision integers.
-// These are the building blocks for the operations on signed integers
-// and rationals.
+package big
 
 import "math/rand"
 
@@ -1141,9 +1121,12 @@
 	return zz.norm()
 }
 
-// probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
-// If it returns true, n is prime with probability 1 - 1/4^reps.
-// If it returns false, n is not prime.
+// probablyPrime performs n Miller-Rabin tests to check whether x is prime.
+// If x is prime, it returns true.
+// If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
+//
+// It is not suitable for judging primes that an adversary may have crafted
+// to fool this test.
 func (n nat) probablyPrime(reps int) bool {
 	if len(n) == 0 {
 		return false
diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go
index 7ac3cb8..3eefffc 100644
--- a/src/math/big/nat_test.go
+++ b/src/math/big/nat_test.go
@@ -158,7 +158,7 @@
 
 func TestMulRangeN(t *testing.T) {
 	for i, r := range mulRangesN {
-		prod := nat(nil).mulRange(r.a, r.b).decimalString()
+		prod := string(nat(nil).mulRange(r.a, r.b).utoa(10))
 		if prod != r.prod {
 			t.Errorf("#%d: got %s; want %s", i, prod, r.prod)
 		}
@@ -326,7 +326,7 @@
 	for i := uint(0); i <= 3*_W; i++ {
 		n := y.trailingZeroBits()
 		if n != i {
-			t.Errorf("got 0x%s.trailingZeroBits() = %d; want %d", y.hexString(), n, i)
+			t.Errorf("got 0x%s.trailingZeroBits() = %d; want %d", y.utoa(16), n, i)
 		}
 		y = y.shl(y, 1)
 	}
@@ -388,7 +388,7 @@
 		z := nat(nil).montgomery(x, y, m, k0, len(m))
 		z = z.norm()
 		if z.cmp(out) != 0 {
-			t.Errorf("#%d got %s want %s", i, z.decimalString(), out.decimalString())
+			t.Errorf("#%d got %s want %s", i, z.utoa(10), out.utoa(10))
 		}
 	}
 }
@@ -429,7 +429,7 @@
 
 		z := nat(nil).expNN(x, y, m)
 		if z.cmp(out) != 0 {
-			t.Errorf("#%d got %s want %s", i, z.decimalString(), out.decimalString())
+			t.Errorf("#%d got %s want %s", i, z.utoa(10), out.utoa(10))
 		}
 	}
 }
@@ -486,7 +486,7 @@
 func TestFibo(t *testing.T) {
 	for i, want := range fiboNums {
 		n := i * 10
-		got := fibo(n).decimalString()
+		got := string(fibo(n).utoa(10))
 		if got != want {
 			t.Errorf("fibo(%d) failed: got %s want %s", n, got, want)
 		}
diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go
index 80da307..d2ce667 100644
--- a/src/math/big/natconv.go
+++ b/src/math/big/natconv.go
@@ -14,6 +14,11 @@
 	"sync"
 )
 
+const digits = "0123456789abcdefghijklmnopqrstuvwxyz"
+
+// Note: MaxBase = len(digits), but it must remain a rune constant
+//       for API compatibility.
+
 // MaxBase is the largest number base accepted for string conversions.
 const MaxBase = 'z' - 'a' + 10 + 1
 
@@ -229,45 +234,33 @@
 	return
 }
 
-// Character sets for string conversion.
-const (
-	lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
-	uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-)
-
-// decimalString returns a decimal representation of x.
-// It calls x.string with the charset "0123456789".
-func (x nat) decimalString() string {
-	return x.string(lowercaseDigits[:10])
+// utoa converts x to an ASCII representation in the given base;
+// base must be between 2 and MaxBase, inclusive.
+func (x nat) utoa(base int) []byte {
+	return x.itoa(false, base)
 }
 
-// hexString returns a hexadecimal representation of x.
-// It calls x.string with the charset "0123456789abcdef".
-func (x nat) hexString() string {
-	return x.string(lowercaseDigits[:16])
-}
-
-// string converts x to a string using digits from a charset; a digit with
-// value d is represented by charset[d]. The conversion base is determined
-// by len(charset), which must be >= 2 and <= 256.
-func (x nat) string(charset string) string {
-	b := Word(len(charset))
-	if b < 2 || b > 256 {
-		panic("invalid character set length")
+// itoa is like utoa but it prepends a '-' if neg && x != 0.
+func (x nat) itoa(neg bool, base int) []byte {
+	if base < 2 || base > MaxBase {
+		panic("invalid base")
 	}
 
 	// x == 0
 	if len(x) == 0 {
-		return string(charset[0])
+		return []byte("0")
 	}
 	// len(x) > 0
 
 	// allocate buffer for conversion
-	i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
+	i := int(float64(x.bitLen())/math.Log2(float64(base))) + 1 // off by 1 at most
+	if neg {
+		i++
+	}
 	s := make([]byte, i)
 
 	// convert power of two and non power of two bases separately
-	if b == b&-b {
+	if b := Word(base); b == b&-b {
 		// shift is base b digit size in bits
 		shift := trailingZeroBits(b) // shift > 0 because b >= 2
 		mask := Word(1<<shift - 1)
@@ -279,7 +272,7 @@
 			// convert full digits
 			for nbits >= shift {
 				i--
-				s[i] = charset[w&mask]
+				s[i] = digits[w&mask]
 				w >>= shift
 				nbits -= shift
 			}
@@ -293,7 +286,7 @@
 				// partial digit in current word w (== x[k-1]) and next word x[k]
 				w |= x[k] << nbits
 				i--
-				s[i] = charset[w&mask]
+				s[i] = digits[w&mask]
 
 				// advance
 				w = x[k] >> (shift - nbits)
@@ -304,7 +297,7 @@
 		// convert digits of most-significant word w (omit leading zeros)
 		for w != 0 {
 			i--
-			s[i] = charset[w&mask]
+			s[i] = digits[w&mask]
 			w >>= shift
 		}
 
@@ -319,18 +312,23 @@
 		q := nat(nil).set(x)
 
 		// convert q to string s in base b
-		q.convertWords(s, charset, b, ndigits, bb, table)
+		q.convertWords(s, b, ndigits, bb, table)
 
 		// strip leading zeros
 		// (x != 0; thus s must contain at least one non-zero digit
 		// and the loop will terminate)
 		i = 0
-		for zero := charset[0]; s[i] == zero; {
+		for s[i] == '0' {
 			i++
 		}
 	}
 
-	return string(s[i:])
+	if neg {
+		i--
+		s[i] = '-'
+	}
+
+	return s[i:]
 }
 
 // Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
@@ -349,7 +347,7 @@
 // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
 // specific hardware.
 //
-func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
+func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []divisor) {
 	// split larger blocks recursively
 	if table != nil {
 		// len(q) > leafSize > 0
@@ -374,8 +372,8 @@
 
 			// convert subblocks and collect results in s[:h] and s[h:]
 			h := len(s) - table[index].ndigits
-			r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
-			s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
+			r.convertWords(s[h:], b, ndigits, bb, table[0:index])
+			s = s[:h] // == q.convertWords(s, b, ndigits, bb, table[0:index+1])
 		}
 	}
 
@@ -393,7 +391,7 @@
 				// this appears to be faster for BenchmarkString10000Base10
 				// and smaller strings (but a bit slower for larger ones)
 				t := r / 10
-				s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
+				s[i] = '0' + byte(r-t<<3-t-t) // TODO(gri) replace w/ t*10 once compiler produces better code
 				r = t
 			}
 		}
@@ -403,17 +401,16 @@
 			q, r = q.divW(q, bb)
 			for j := 0; j < ndigits && i > 0; j++ {
 				i--
-				s[i] = charset[r%b]
+				s[i] = digits[r%b]
 				r /= b
 			}
 		}
 	}
 
 	// prepend high-order zeros
-	zero := charset[0]
 	for i > 0 { // while need more leading zeros
 		i--
-		s[i] = zero
+		s[i] = '0'
 	}
 }
 
diff --git a/src/math/big/natconv_test.go b/src/math/big/natconv_test.go
index f321fbc..028e5a8 100644
--- a/src/math/big/natconv_test.go
+++ b/src/math/big/natconv_test.go
@@ -5,20 +5,19 @@
 package big
 
 import (
+	"bytes"
 	"io"
 	"strings"
 	"testing"
 )
 
-func toString(x nat, charset string) string {
-	base := len(charset)
-
+func itoa(x nat, base int) []byte {
 	// special cases
 	switch {
 	case base < 2:
 		panic("illegal base")
 	case len(x) == 0:
-		return string(charset[0])
+		return []byte("0")
 	}
 
 	// allocate buffer for conversion
@@ -33,54 +32,53 @@
 		i--
 		var r Word
 		q, r = q.divW(q, Word(base))
-		s[i] = charset[r]
+		s[i] = digits[r]
 	}
 
-	return string(s[i:])
+	return s[i:]
 }
 
 var strTests = []struct {
 	x nat    // nat value to be converted
-	c string // conversion charset
+	b int    // conversion base
 	s string // expected result
 }{
-	{nil, "01", "0"},
-	{nat{1}, "01", "1"},
-	{nat{0xc5}, "01", "11000101"},
-	{nat{03271}, lowercaseDigits[:8], "3271"},
-	{nat{10}, lowercaseDigits[:10], "10"},
-	{nat{1234567890}, uppercaseDigits[:10], "1234567890"},
-	{nat{0xdeadbeef}, lowercaseDigits[:16], "deadbeef"},
-	{nat{0xdeadbeef}, uppercaseDigits[:16], "DEADBEEF"},
-	{nat{0x229be7}, lowercaseDigits[:17], "1a2b3c"},
-	{nat{0x309663e6}, uppercaseDigits[:32], "O9COV6"},
+	{nil, 2, "0"},
+	{nat{1}, 2, "1"},
+	{nat{0xc5}, 2, "11000101"},
+	{nat{03271}, 8, "3271"},
+	{nat{10}, 10, "10"},
+	{nat{1234567890}, 10, "1234567890"},
+	{nat{0xdeadbeef}, 16, "deadbeef"},
+	{nat{0x229be7}, 17, "1a2b3c"},
+	{nat{0x309663e6}, 32, "o9cov6"},
 }
 
 func TestString(t *testing.T) {
-	// test invalid character set explicitly
+	// test invalid base explicitly
 	var panicStr string
 	func() {
 		defer func() {
 			panicStr = recover().(string)
 		}()
-		natOne.string("0")
+		natOne.utoa(1)
 	}()
-	if panicStr != "invalid character set length" {
-		t.Errorf("expected panic for invalid character set")
+	if panicStr != "invalid base" {
+		t.Errorf("expected panic for invalid base")
 	}
 
 	for _, a := range strTests {
-		s := a.x.string(a.c)
+		s := string(a.x.utoa(a.b))
 		if s != a.s {
 			t.Errorf("string%+v\n\tgot s = %s; want %s", a, s, a.s)
 		}
 
-		x, b, _, err := nat(nil).scan(strings.NewReader(a.s), len(a.c), false)
+		x, b, _, err := nat(nil).scan(strings.NewReader(a.s), a.b, false)
 		if x.cmp(a.x) != 0 {
 			t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
 		}
-		if b != len(a.c) {
-			t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, len(a.c))
+		if b != a.b {
+			t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.b)
 		}
 		if err != nil {
 			t.Errorf("scan%+v\n\tgot error = %s", a, err)
@@ -236,7 +234,7 @@
 	if err != nil {
 		t.Errorf("scanning pi: %s", err)
 	}
-	if s := z.decimalString(); s != pi {
+	if s := string(z.utoa(10)); s != pi {
 		t.Errorf("scanning pi: got %s", s)
 	}
 }
@@ -265,12 +263,12 @@
 func BenchmarkStringPiParallel(b *testing.B) {
 	var x nat
 	x, _, _, _ = x.scan(strings.NewReader(pi), 0, false)
-	if x.decimalString() != pi {
+	if string(x.utoa(10)) != pi {
 		panic("benchmark incorrect: conversion failed")
 	}
 	b.RunParallel(func(pb *testing.PB) {
 		for pb.Next() {
-			x.decimalString()
+			x.utoa(10)
 		}
 	})
 }
@@ -304,15 +302,14 @@
 	var z nat
 	z = z.expWW(x, y)
 
-	var s string
-	s = z.string(lowercaseDigits[:base])
-	if t := toString(z, lowercaseDigits[:base]); t != s {
+	s := z.utoa(base)
+	if t := itoa(z, base); !bytes.Equal(s, t) {
 		b.Fatalf("scanning: got %s; want %s", s, t)
 	}
 	b.StartTimer()
 
 	for i := 0; i < b.N; i++ {
-		z.scan(strings.NewReader(s), base, false)
+		z.scan(bytes.NewReader(s), base, false)
 	}
 }
 
@@ -344,11 +341,11 @@
 	b.StopTimer()
 	var z nat
 	z = z.expWW(x, y)
-	z.string(lowercaseDigits[:base]) // warm divisor cache
+	z.utoa(base) // warm divisor cache
 	b.StartTimer()
 
 	for i := 0; i < b.N; i++ {
-		_ = z.string(lowercaseDigits[:base])
+		_ = z.utoa(base)
 	}
 }
 
@@ -372,7 +369,7 @@
 func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths
 func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) }
 
-func LeafSizeHelper(b *testing.B, base Word, size int) {
+func LeafSizeHelper(b *testing.B, base, size int) {
 	b.StopTimer()
 	originalLeafSize := leafSize
 	resetTable(cacheBase10.table[:])
@@ -382,12 +379,12 @@
 	for d := 1; d <= 10000; d *= 10 {
 		b.StopTimer()
 		var z nat
-		z = z.expWW(base, Word(d))           // build target number
-		_ = z.string(lowercaseDigits[:base]) // warm divisor cache
+		z = z.expWW(Word(base), Word(d)) // build target number
+		_ = z.utoa(base)                 // warm divisor cache
 		b.StartTimer()
 
 		for i := 0; i < b.N; i++ {
-			_ = z.string(lowercaseDigits[:base])
+			_ = z.utoa(base)
 		}
 	}
 
@@ -408,13 +405,13 @@
 }
 
 func TestStringPowers(t *testing.T) {
-	var b, p Word
-	for b = 2; b <= 16; b++ {
+	var p Word
+	for b := 2; b <= 16; b++ {
 		for p = 0; p <= 512; p++ {
-			x := nat(nil).expWW(b, p)
-			xs := x.string(lowercaseDigits[:b])
-			xs2 := toString(x, lowercaseDigits[:b])
-			if xs != xs2 {
+			x := nat(nil).expWW(Word(b), p)
+			xs := x.utoa(b)
+			xs2 := itoa(x, b)
+			if !bytes.Equal(xs, xs2) {
 				t.Errorf("failed at %d ** %d in base %d: %s != %s", b, p, b, xs, xs2)
 			}
 		}
diff --git a/src/math/big/rat.go b/src/math/big/rat.go
index fb16f18..2cd9ed0 100644
--- a/src/math/big/rat.go
+++ b/src/math/big/rat.go
@@ -7,8 +7,6 @@
 package big
 
 import (
-	"encoding/binary"
-	"errors"
 	"fmt"
 	"math"
 )
@@ -510,61 +508,3 @@
 	z.a.neg = a.neg != b.neg
 	return z.norm()
 }
-
-// Gob codec version. Permits backward-compatible changes to the encoding.
-const ratGobVersion byte = 1
-
-// GobEncode implements the gob.GobEncoder interface.
-func (x *Rat) GobEncode() ([]byte, error) {
-	if x == nil {
-		return nil, nil
-	}
-	buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4)
-	i := x.b.abs.bytes(buf)
-	j := x.a.abs.bytes(buf[:i])
-	n := i - j
-	if int(uint32(n)) != n {
-		// this should never happen
-		return nil, errors.New("Rat.GobEncode: numerator too large")
-	}
-	binary.BigEndian.PutUint32(buf[j-4:j], uint32(n))
-	j -= 1 + 4
-	b := ratGobVersion << 1 // make space for sign bit
-	if x.a.neg {
-		b |= 1
-	}
-	buf[j] = b
-	return buf[j:], nil
-}
-
-// GobDecode implements the gob.GobDecoder interface.
-func (z *Rat) GobDecode(buf []byte) error {
-	if len(buf) == 0 {
-		// Other side sent a nil or default value.
-		*z = Rat{}
-		return nil
-	}
-	b := buf[0]
-	if b>>1 != ratGobVersion {
-		return fmt.Errorf("Rat.GobDecode: encoding version %d not supported", b>>1)
-	}
-	const j = 1 + 4
-	i := j + binary.BigEndian.Uint32(buf[j-4:j])
-	z.a.neg = b&1 != 0
-	z.a.abs = z.a.abs.setBytes(buf[j:i])
-	z.b.abs = z.b.abs.setBytes(buf[i:])
-	return nil
-}
-
-// MarshalText implements the encoding.TextMarshaler interface.
-func (r *Rat) MarshalText() (text []byte, err error) {
-	return []byte(r.RatString()), nil
-}
-
-// UnmarshalText implements the encoding.TextUnmarshaler interface.
-func (r *Rat) UnmarshalText(text []byte) error {
-	if _, ok := r.SetString(string(text)); !ok {
-		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text)
-	}
-	return nil
-}
diff --git a/src/math/big/rat_test.go b/src/math/big/rat_test.go
index 012d0c4..3a06fca 100644
--- a/src/math/big/rat_test.go
+++ b/src/math/big/rat_test.go
@@ -5,10 +5,6 @@
 package big
 
 import (
-	"bytes"
-	"encoding/gob"
-	"encoding/json"
-	"encoding/xml"
 	"math"
 	"testing"
 )
@@ -280,116 +276,6 @@
 	}
 }
 
-func TestRatGobEncoding(t *testing.T) {
-	var medium bytes.Buffer
-	enc := gob.NewEncoder(&medium)
-	dec := gob.NewDecoder(&medium)
-	for _, test := range encodingTests {
-		medium.Reset() // empty buffer for each test case (in case of failures)
-		var tx Rat
-		tx.SetString(test + ".14159265")
-		if err := enc.Encode(&tx); err != nil {
-			t.Errorf("encoding of %s failed: %s", &tx, err)
-		}
-		var rx Rat
-		if err := dec.Decode(&rx); err != nil {
-			t.Errorf("decoding of %s failed: %s", &tx, err)
-		}
-		if rx.Cmp(&tx) != 0 {
-			t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
-		}
-	}
-}
-
-// Sending a nil Rat pointer (inside a slice) on a round trip through gob should yield a zero.
-// TODO: top-level nils.
-func TestGobEncodingNilRatInSlice(t *testing.T) {
-	buf := new(bytes.Buffer)
-	enc := gob.NewEncoder(buf)
-	dec := gob.NewDecoder(buf)
-
-	var in = make([]*Rat, 1)
-	err := enc.Encode(&in)
-	if err != nil {
-		t.Errorf("gob encode failed: %q", err)
-	}
-	var out []*Rat
-	err = dec.Decode(&out)
-	if err != nil {
-		t.Fatalf("gob decode failed: %q", err)
-	}
-	if len(out) != 1 {
-		t.Fatalf("wrong len; want 1 got %d", len(out))
-	}
-	var zero Rat
-	if out[0].Cmp(&zero) != 0 {
-		t.Errorf("transmission of (*Int)(nill) failed: got %s want 0", out)
-	}
-}
-
-var ratNums = []string{
-	"-141592653589793238462643383279502884197169399375105820974944592307816406286",
-	"-1415926535897932384626433832795028841971",
-	"-141592653589793",
-	"-1",
-	"0",
-	"1",
-	"141592653589793",
-	"1415926535897932384626433832795028841971",
-	"141592653589793238462643383279502884197169399375105820974944592307816406286",
-}
-
-var ratDenoms = []string{
-	"1",
-	"718281828459045",
-	"7182818284590452353602874713526624977572",
-	"718281828459045235360287471352662497757247093699959574966967627724076630353",
-}
-
-func TestRatJSONEncoding(t *testing.T) {
-	for _, num := range ratNums {
-		for _, denom := range ratDenoms {
-			var tx Rat
-			tx.SetString(num + "/" + denom)
-			b, err := json.Marshal(&tx)
-			if err != nil {
-				t.Errorf("marshaling of %s failed: %s", &tx, err)
-				continue
-			}
-			var rx Rat
-			if err := json.Unmarshal(b, &rx); err != nil {
-				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
-				continue
-			}
-			if rx.Cmp(&tx) != 0 {
-				t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
-			}
-		}
-	}
-}
-
-func TestRatXMLEncoding(t *testing.T) {
-	for _, num := range ratNums {
-		for _, denom := range ratDenoms {
-			var tx Rat
-			tx.SetString(num + "/" + denom)
-			b, err := xml.Marshal(&tx)
-			if err != nil {
-				t.Errorf("marshaling of %s failed: %s", &tx, err)
-				continue
-			}
-			var rx Rat
-			if err := xml.Unmarshal(b, &rx); err != nil {
-				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
-				continue
-			}
-			if rx.Cmp(&tx) != 0 {
-				t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
-			}
-		}
-	}
-}
-
 func TestIssue2379(t *testing.T) {
 	// 1) no aliasing
 	q := NewRat(3, 2)
diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go
index 961ff64..4566ff4 100644
--- a/src/math/big/ratconv.go
+++ b/src/math/big/ratconv.go
@@ -188,11 +188,15 @@
 
 // String returns a string representation of x in the form "a/b" (even if b == 1).
 func (x *Rat) String() string {
-	s := "/1"
+	var buf []byte
+	buf = x.a.Append(buf, 10)
+	buf = append(buf, '/')
 	if len(x.b.abs) != 0 {
-		s = "/" + x.b.abs.decimalString()
+		buf = x.b.Append(buf, 10)
+	} else {
+		buf = append(buf, '1')
 	}
-	return x.a.String() + s
+	return string(buf)
 }
 
 // RatString returns a string representation of x in the form "a/b" if b != 1,
@@ -208,12 +212,17 @@
 // digits of precision after the decimal point. The last digit is rounded to
 // nearest, with halves rounded away from zero.
 func (x *Rat) FloatString(prec int) string {
+	var buf []byte
+
 	if x.IsInt() {
-		s := x.a.String()
+		buf = x.a.Append(buf, 10)
 		if prec > 0 {
-			s += "." + strings.Repeat("0", prec)
+			buf = append(buf, '.')
+			for i := prec; i > 0; i-- {
+				buf = append(buf, '0')
+			}
 		}
-		return s
+		return string(buf)
 	}
 	// x.b.abs != 0
 
@@ -237,16 +246,19 @@
 		}
 	}
 
-	s := q.decimalString()
 	if x.a.neg {
-		s = "-" + s
+		buf = append(buf, '-')
 	}
+	buf = append(buf, q.utoa(10)...) // itoa ignores sign if q == 0
 
 	if prec > 0 {
-		rs := r.decimalString()
-		leadingZeros := prec - len(rs)
-		s += "." + strings.Repeat("0", leadingZeros) + rs
+		buf = append(buf, '.')
+		rs := r.utoa(10)
+		for i := prec - len(rs); i > 0; i-- {
+			buf = append(buf, '0')
+		}
+		buf = append(buf, rs...)
 	}
 
-	return s
+	return string(buf)
 }
diff --git a/src/math/big/ratmarsh.go b/src/math/big/ratmarsh.go
new file mode 100644
index 0000000..b82e8d4
--- /dev/null
+++ b/src/math/big/ratmarsh.go
@@ -0,0 +1,73 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements encoding/decoding of Rats.
+
+package big
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+)
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const ratGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+func (x *Rat) GobEncode() ([]byte, error) {
+	if x == nil {
+		return nil, nil
+	}
+	buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4)
+	i := x.b.abs.bytes(buf)
+	j := x.a.abs.bytes(buf[:i])
+	n := i - j
+	if int(uint32(n)) != n {
+		// this should never happen
+		return nil, errors.New("Rat.GobEncode: numerator too large")
+	}
+	binary.BigEndian.PutUint32(buf[j-4:j], uint32(n))
+	j -= 1 + 4
+	b := ratGobVersion << 1 // make space for sign bit
+	if x.a.neg {
+		b |= 1
+	}
+	buf[j] = b
+	return buf[j:], nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+func (z *Rat) GobDecode(buf []byte) error {
+	if len(buf) == 0 {
+		// Other side sent a nil or default value.
+		*z = Rat{}
+		return nil
+	}
+	b := buf[0]
+	if b>>1 != ratGobVersion {
+		return fmt.Errorf("Rat.GobDecode: encoding version %d not supported", b>>1)
+	}
+	const j = 1 + 4
+	i := j + binary.BigEndian.Uint32(buf[j-4:j])
+	z.a.neg = b&1 != 0
+	z.a.abs = z.a.abs.setBytes(buf[j:i])
+	z.b.abs = z.b.abs.setBytes(buf[i:])
+	return nil
+}
+
+// MarshalText implements the encoding.TextMarshaler interface.
+func (x *Rat) MarshalText() (text []byte, err error) {
+	// TODO(gri): get rid of the []byte/string conversion
+	return []byte(x.RatString()), nil
+}
+
+// UnmarshalText implements the encoding.TextUnmarshaler interface.
+func (z *Rat) UnmarshalText(text []byte) error {
+	// TODO(gri): get rid of the []byte/string conversion
+	if _, ok := z.SetString(string(text)); !ok {
+		return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text)
+	}
+	return nil
+}
diff --git a/src/math/big/ratmarsh_test.go b/src/math/big/ratmarsh_test.go
new file mode 100644
index 0000000..351d109
--- /dev/null
+++ b/src/math/big/ratmarsh_test.go
@@ -0,0 +1,125 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+	"bytes"
+	"encoding/gob"
+	"encoding/json"
+	"encoding/xml"
+	"testing"
+)
+
+func TestRatGobEncoding(t *testing.T) {
+	var medium bytes.Buffer
+	enc := gob.NewEncoder(&medium)
+	dec := gob.NewDecoder(&medium)
+	for _, test := range encodingTests {
+		medium.Reset() // empty buffer for each test case (in case of failures)
+		var tx Rat
+		tx.SetString(test + ".14159265")
+		if err := enc.Encode(&tx); err != nil {
+			t.Errorf("encoding of %s failed: %s", &tx, err)
+			continue
+		}
+		var rx Rat
+		if err := dec.Decode(&rx); err != nil {
+			t.Errorf("decoding of %s failed: %s", &tx, err)
+			continue
+		}
+		if rx.Cmp(&tx) != 0 {
+			t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
+		}
+	}
+}
+
+// Sending a nil Rat pointer (inside a slice) on a round trip through gob should yield a zero.
+// TODO: top-level nils.
+func TestGobEncodingNilRatInSlice(t *testing.T) {
+	buf := new(bytes.Buffer)
+	enc := gob.NewEncoder(buf)
+	dec := gob.NewDecoder(buf)
+
+	var in = make([]*Rat, 1)
+	err := enc.Encode(&in)
+	if err != nil {
+		t.Errorf("gob encode failed: %q", err)
+	}
+	var out []*Rat
+	err = dec.Decode(&out)
+	if err != nil {
+		t.Fatalf("gob decode failed: %q", err)
+	}
+	if len(out) != 1 {
+		t.Fatalf("wrong len; want 1 got %d", len(out))
+	}
+	var zero Rat
+	if out[0].Cmp(&zero) != 0 {
+		t.Fatalf("transmission of (*Int)(nil) failed: got %s want 0", out)
+	}
+}
+
+var ratNums = []string{
+	"-141592653589793238462643383279502884197169399375105820974944592307816406286",
+	"-1415926535897932384626433832795028841971",
+	"-141592653589793",
+	"-1",
+	"0",
+	"1",
+	"141592653589793",
+	"1415926535897932384626433832795028841971",
+	"141592653589793238462643383279502884197169399375105820974944592307816406286",
+}
+
+var ratDenoms = []string{
+	"1",
+	"718281828459045",
+	"7182818284590452353602874713526624977572",
+	"718281828459045235360287471352662497757247093699959574966967627724076630353",
+}
+
+func TestRatJSONEncoding(t *testing.T) {
+	for _, num := range ratNums {
+		for _, denom := range ratDenoms {
+			var tx Rat
+			tx.SetString(num + "/" + denom)
+			b, err := json.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Rat
+			if err := json.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
+
+func TestRatXMLEncoding(t *testing.T) {
+	for _, num := range ratNums {
+		for _, denom := range ratDenoms {
+			var tx Rat
+			tx.SetString(num + "/" + denom)
+			b, err := xml.Marshal(&tx)
+			if err != nil {
+				t.Errorf("marshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			var rx Rat
+			if err := xml.Unmarshal(b, &rx); err != nil {
+				t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+				continue
+			}
+			if rx.Cmp(&tx) != 0 {
+				t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx)
+			}
+		}
+	}
+}
diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go
index f285646..18d9be8 100644
--- a/src/math/cmplx/cmath_test.go
+++ b/src/math/cmplx/cmath_test.go
@@ -438,8 +438,10 @@
 		d = -d
 	}
 
-	if a != 0 {
-		e = e * a
+	// note: b is correct (expected) value, a is actual value.
+	// make error tolerance a fraction of b, not a.
+	if b != 0 {
+		e = e * b
 		if e < 0 {
 			e = -e
 		}
@@ -460,8 +462,8 @@
 
 func cTolerance(a, b complex128, e float64) bool {
 	d := Abs(a - b)
-	if a != 0 {
-		e = e * Abs(a)
+	if b != 0 {
+		e = e * Abs(b)
 		if e < 0 {
 			e = -e
 		}
diff --git a/src/math/cmplx/sqrt.go b/src/math/cmplx/sqrt.go
index 4ef6807..276be07 100644
--- a/src/math/cmplx/sqrt.go
+++ b/src/math/cmplx/sqrt.go
@@ -40,7 +40,7 @@
 //                       1/2
 // Im w  =  [ (r - x)/2 ]   .
 //
-// Cancellation error in r-x or r+x is avoided by using the
+// Cancelation error in r-x or r+x is avoided by using the
 // identity  2 Re w Im w  =  y.
 //
 // Note that -w is also a square root of z.  The root chosen
diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s
index 67b7cde..7f512e7 100644
--- a/src/math/floor_amd64.s
+++ b/src/math/floor_amd64.s
@@ -6,8 +6,25 @@
 
 #define Big		0x4330000000000000 // 2**52
 
+// func hasSSE4() bool
+// returns whether SSE4.1 is supported
+TEXT ·hasSSE4(SB),NOSPLIT,$0
+	XORQ AX, AX
+	INCL AX
+	CPUID
+	SHRQ $19, CX
+	ANDQ $1, CX
+	MOVB CX, ret+0(FP)
+	RET
+
 // func Floor(x float64) float64
 TEXT ·Floor(SB),NOSPLIT,$0
+	CMPB    math·useSSE4(SB), $1
+	JNE     nosse4
+	ROUNDSD $1, x+0(FP), X0
+	MOVQ X0, ret+8(FP)
+	RET
+nosse4:
 	MOVQ	x+0(FP), AX
 	MOVQ	$~(1<<63), DX // sign bit mask
 	ANDQ	AX,DX // DX = |x|
@@ -30,6 +47,12 @@
 
 // func Ceil(x float64) float64
 TEXT ·Ceil(SB),NOSPLIT,$0
+	CMPB    math·useSSE4(SB), $1
+	JNE     nosse4
+	ROUNDSD $2, x+0(FP), X0
+	MOVQ X0, ret+8(FP)
+	RET
+nosse4:
 	MOVQ	x+0(FP), AX
 	MOVQ	$~(1<<63), DX // sign bit mask
 	MOVQ	AX, BX // BX = copy of x
diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go
new file mode 100644
index 0000000..28e56a5
--- /dev/null
+++ b/src/math/floor_asm.go
@@ -0,0 +1,12 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+
+package math
+
+//defined in floor_amd64.s
+func hasSSE4() bool
+
+var useSSE4 = hasSSE4()
diff --git a/src/math/j0.go b/src/math/j0.go
index c20a9b2..de77388 100644
--- a/src/math/j0.go
+++ b/src/math/j0.go
@@ -38,7 +38,7 @@
 //                      = 1/sqrt(2) * (cos(x) + sin(x))
 //              sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4)
 //                      = 1/sqrt(2) * (sin(x) - cos(x))
-//         (To avoid cancellation, use
+//         (To avoid cancelation, use
 //              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 //         to compute the worse one.)
 //
@@ -188,7 +188,7 @@
 		//             =  1/sqrt(2) * (sin(x) + cos(x))
 		//     sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
 		//             =  1/sqrt(2) * (sin(x) - cos(x))
-		// To avoid cancellation, use
+		// To avoid cancelation, use
 		//     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 		// to compute the worse one.
 
diff --git a/src/math/j1.go b/src/math/j1.go
index 7ac186b..c537a72 100644
--- a/src/math/j1.go
+++ b/src/math/j1.go
@@ -39,7 +39,7 @@
 //                      =  1/sqrt(2) * (sin(x) - cos(x))
 //              sin(x1) =  sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
 //                      = -1/sqrt(2) * (sin(x) + cos(x))
-//         (To avoid cancellation, use
+//         (To avoid cancelation, use
 //              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 //         to compute the worse one.)
 //
@@ -197,7 +197,7 @@
 		//                 =  1/sqrt(2) * (sin(x) - cos(x))
 		//         sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
 		//                 = -1/sqrt(2) * (cos(x) + sin(x))
-		// To avoid cancellation, use
+		// To avoid cancelation, use
 		//     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 		// to compute the worse one.
 
diff --git a/src/math/modf.go b/src/math/modf.go
index 1e8376a..81cb8b5 100644
--- a/src/math/modf.go
+++ b/src/math/modf.go
@@ -14,9 +14,12 @@
 
 func modf(f float64) (int float64, frac float64) {
 	if f < 1 {
-		if f < 0 {
+		switch {
+		case f < 0:
 			int, frac = Modf(-f)
 			return -int, -frac
+		case f == 0:
+			return f, f // Return -0, -0 when f == -0
 		}
 		return 0, f
 	}
diff --git a/src/math/modf_386.s b/src/math/modf_386.s
index 3debd3b..d549f1d 100644
--- a/src/math/modf_386.s
+++ b/src/math/modf_386.s
@@ -6,6 +6,19 @@
 
 // func Modf(f float64) (int float64, frac float64)
 TEXT ·Modf(SB),NOSPLIT,$0
+	// special case for f == -0.0
+	MOVL f+4(FP), DX	// high word
+	MOVL f+0(FP), AX	// low word
+	CMPL DX, $(1<<31)	// beginning of -0.0
+	JNE notNegativeZero
+	CMPL AX, $0			// could be denormalized
+	JNE notNegativeZero
+	MOVL AX, int+8(FP)
+	MOVL DX, int+12(FP)
+	MOVL AX, frac+16(FP)
+	MOVL DX, frac+20(FP)
+	RET
+notNegativeZero:
 	FMOVD   f+0(FP), F0  // F0=f
 	FMOVD   F0, F1       // F0=f, F1=f
 	FSTCW   -2(SP)       // save old Control Word
diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go
index 6360128..075b0e5 100644
--- a/src/math/rand/rand.go
+++ b/src/math/rand/rand.go
@@ -156,6 +156,19 @@
 	return m
 }
 
+// Read generates len(p) random bytes and writes them into p. It
+// always returns len(p) and a nil error.
+func (r *Rand) Read(p []byte) (n int, err error) {
+	for i := 0; i < len(p); i += 7 {
+		val := r.src.Int63()
+		for j := 0; i+j < len(p) && j < 7; j++ {
+			p[i+j] = byte(val)
+			val >>= 8
+		}
+	}
+	return len(p), nil
+}
+
 /*
  * Top-level convenience functions
  */
@@ -209,6 +222,10 @@
 // from the default Source.
 func Perm(n int) []int { return globalRand.Perm(n) }
 
+// Read generates len(p) random bytes from the default Source and
+// writes them into p. It always returns len(p) and a nil error.
+func Read(p []byte) (n int, err error) { return globalRand.Read(p) }
+
 // NormFloat64 returns a normally distributed float64 in the range
 // [-math.MaxFloat64, +math.MaxFloat64] with
 // standard normal distribution (mean = 0, stddev = 1)
diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go
index c61494f..6a1df4d 100644
--- a/src/math/rand/rand_test.go
+++ b/src/math/rand/rand_test.go
@@ -342,6 +342,59 @@
 	}
 }
 
+func testReadUniformity(t *testing.T, n int, seed int64) {
+	r := New(NewSource(seed))
+	buf := make([]byte, n)
+	nRead, err := r.Read(buf)
+	if err != nil {
+		t.Errorf("Read err %v", err)
+	}
+	if nRead != n {
+		t.Errorf("Read returned unexpected n; %d != %d", nRead, n)
+	}
+
+	// Expect a uniform distribution of byte values, which lie in [0, 255].
+	var (
+		mean       = 255.0 / 2
+		stddev     = math.Sqrt(255.0 * 255.0 / 12.0)
+		errorScale = stddev / math.Sqrt(float64(n))
+	)
+
+	expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale}
+
+	// Cast bytes as floats to use the common distribution-validity checks.
+	samples := make([]float64, n)
+	for i, val := range buf {
+		samples[i] = float64(val)
+	}
+	// Make sure that the entire set matches the expected distribution.
+	checkSampleDistribution(t, samples, expected)
+}
+
+func TestRead(t *testing.T) {
+	testBufferSizes := []int{
+		2, 4, 7, 64, 1024, 1 << 16, 1 << 20,
+	}
+	for _, seed := range testSeeds {
+		for _, n := range testBufferSizes {
+			testReadUniformity(t, n, seed)
+		}
+	}
+}
+
+func TestReadEmpty(t *testing.T) {
+	r := New(NewSource(1))
+	buf := make([]byte, 0)
+	n, err := r.Read(buf)
+	if err != nil {
+		t.Errorf("Read err into empty buffer; %v", err)
+	}
+	if n != 0 {
+		t.Errorf("Read into empty buffer returned unexpected n of %d", n)
+	}
+
+}
+
 // Benchmarks
 
 func BenchmarkInt63Threadsafe(b *testing.B) {
@@ -405,3 +458,30 @@
 		r.Perm(30)
 	}
 }
+
+func BenchmarkRead3(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 3)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
+
+func BenchmarkRead64(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 64)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
+
+func BenchmarkRead1000(b *testing.B) {
+	r := New(NewSource(1))
+	buf := make([]byte, 1000)
+	b.ResetTimer()
+	for n := b.N; n > 0; n-- {
+		r.Read(buf)
+	}
+}
diff --git a/src/math/rand/regress_test.go b/src/math/rand/regress_test.go
index 2b012af..9ae5357 100644
--- a/src/math/rand/regress_test.go
+++ b/src/math/rand/regress_test.go
@@ -25,6 +25,7 @@
 	var int32s = []int32{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1}
 	var int64s = []int64{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1, 1000000000000000000, 1 << 60, 1<<63 - 2, 1<<63 - 1}
 	var permSizes = []int{0, 1, 5, 8, 9, 10, 16}
+	var readBufferSizes = []int{1, 7, 8, 9, 10}
 	r := New(NewSource(0))
 
 	rv := reflect.ValueOf(r)
@@ -40,9 +41,6 @@
 		if mt.NumOut() == 0 {
 			continue
 		}
-		if mt.NumOut() != 1 {
-			t.Fatalf("unexpected result count for r.%s", m.Name)
-		}
 		r.Seed(0)
 		for repeat := 0; repeat < 20; repeat++ {
 			var args []reflect.Value
@@ -74,14 +72,25 @@
 
 				case reflect.Int64:
 					x = int64s[repeat%len(int64s)]
+
+				case reflect.Slice:
+					if m.Name == "Read" {
+						n := readBufferSizes[repeat%len(readBufferSizes)]
+						x = make([]byte, n)
+					}
 				}
 				argstr = fmt.Sprint(x)
 				args = append(args, reflect.ValueOf(x))
 			}
-			out := mv.Call(args)[0].Interface()
+
+			var out interface{}
+			out = mv.Call(args)[0].Interface()
 			if m.Name == "Int" || m.Name == "Intn" {
 				out = int64(out.(int))
 			}
+			if m.Name == "Read" {
+				out = args[0].Interface().([]byte)
+			}
 			if *printgolden {
 				var val string
 				big := int64(1 << 60)
@@ -332,24 +341,44 @@
 	[]int{2, 1, 7, 0, 6, 3, 4, 5},       // Perm(8)
 	[]int{8, 7, 5, 3, 4, 6, 0, 1, 2},    // Perm(9)
 	[]int{1, 0, 2, 5, 7, 6, 9, 8, 3, 4}, // Perm(10)
-	uint32(4059586549),                  // Uint32()
-	uint32(1052117029),                  // Uint32()
-	uint32(2817310706),                  // Uint32()
-	uint32(233405013),                   // Uint32()
-	uint32(1578775030),                  // Uint32()
-	uint32(1243308993),                  // Uint32()
-	uint32(826517535),                   // Uint32()
-	uint32(2814630155),                  // Uint32()
-	uint32(3853314576),                  // Uint32()
-	uint32(718781857),                   // Uint32()
-	uint32(1239465936),                  // Uint32()
-	uint32(3876658295),                  // Uint32()
-	uint32(3649778518),                  // Uint32()
-	uint32(1172727096),                  // Uint32()
-	uint32(2615979505),                  // Uint32()
-	uint32(1089444252),                  // Uint32()
-	uint32(3327114623),                  // Uint32()
-	uint32(75079301),                    // Uint32()
-	uint32(3380456901),                  // Uint32()
-	uint32(3433369789),                  // Uint32()
+	[]byte{0x1},                         // Read([0])
+	[]byte{0xc0, 0x41, 0xd3, 0xff, 0x12, 0x4, 0x5b},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0x73, 0xc8, 0x6e, 0x4f, 0xf9, 0x5f, 0xf6, 0x62},            // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x4a, 0x2d, 0xb, 0x75, 0xfb, 0x18, 0xd, 0xaf, 0x48},        // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0x39, 0x46, 0x51, 0x85, 0xf, 0xd4, 0xa1, 0x78, 0x89, 0x2e}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0x51}, // Read([0])
+	[]byte{0x4e, 0xe2, 0xd3, 0xd0, 0xd0, 0xde, 0x6b},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0xf8, 0xf9, 0xb4, 0x4c, 0xe8, 0x5f, 0xf0, 0x44},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x3b, 0xbf, 0x85, 0x7a, 0xab, 0x99, 0xc5, 0xb2, 0x52},       // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0xa8, 0xae, 0xb7, 0x9e, 0xf8, 0x56, 0xf6, 0x59, 0xc1, 0x8f}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0xc7}, // Read([0])
+	[]byte{0x5f, 0x67, 0xcf, 0xe2, 0x42, 0xcf, 0x3c},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0xc3, 0x54, 0xf3, 0xed, 0xe2, 0xd6, 0xbe, 0xcc},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x6a, 0x9f, 0x4a, 0x57, 0x8b, 0xcb, 0x9e, 0xf2, 0xd4},       // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0x6d, 0x29, 0x97, 0x61, 0xea, 0x9e, 0x4f, 0x5a, 0xa6, 0xae}, // Read([0 0 0 0 0 0 0 0 0 0])
+	[]byte{0xaa}, // Read([0])
+	[]byte{0x20, 0xef, 0xcd, 0x6c, 0xea, 0x84, 0xb6},                   // Read([0 0 0 0 0 0 0])
+	[]byte{0x92, 0x5e, 0x60, 0x7b, 0xe0, 0x63, 0x71, 0x6f},             // Read([0 0 0 0 0 0 0 0])
+	[]byte{0x4, 0x5c, 0x3f, 0x0, 0xf, 0x8a, 0x79, 0x6b, 0xce},          // Read([0 0 0 0 0 0 0 0 0])
+	[]byte{0xaa, 0xca, 0xee, 0xdf, 0xad, 0x5b, 0x50, 0x66, 0x64, 0xe8}, // Read([0 0 0 0 0 0 0 0 0 0])
+	uint32(4059586549),                                                 // Uint32()
+	uint32(1052117029),                                                 // Uint32()
+	uint32(2817310706),                                                 // Uint32()
+	uint32(233405013),                                                  // Uint32()
+	uint32(1578775030),                                                 // Uint32()
+	uint32(1243308993),                                                 // Uint32()
+	uint32(826517535),                                                  // Uint32()
+	uint32(2814630155),                                                 // Uint32()
+	uint32(3853314576),                                                 // Uint32()
+	uint32(718781857),                                                  // Uint32()
+	uint32(1239465936),                                                 // Uint32()
+	uint32(3876658295),                                                 // Uint32()
+	uint32(3649778518),                                                 // Uint32()
+	uint32(1172727096),                                                 // Uint32()
+	uint32(2615979505),                                                 // Uint32()
+	uint32(1089444252),                                                 // Uint32()
+	uint32(3327114623),                                                 // Uint32()
+	uint32(75079301),                                                   // Uint32()
+	uint32(3380456901),                                                 // Uint32()
+	uint32(3433369789),                                                 // Uint32()
 }
diff --git a/src/mime/encodedword.go b/src/mime/encodedword.go
index 9796f50..db4b5f4 100644
--- a/src/mime/encodedword.go
+++ b/src/mime/encodedword.go
@@ -54,35 +54,129 @@
 	buf := getBuffer()
 	defer putBuffer(buf)
 
+	e.openWord(buf, charset)
+	if e == BEncoding {
+		e.bEncode(buf, charset, s)
+	} else {
+		e.qEncode(buf, charset, s)
+	}
+	closeWord(buf)
+
+	return buf.String()
+}
+
+const (
+	// The maximum length of an encoded-word is 75 characters.
+	// See RFC 2047, section 2.
+	maxEncodedWordLen = 75
+	// maxContentLen is how much content can be encoded, ignoring the header and
+	// 2-byte footer.
+	maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
+)
+
+var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
+
+// bEncode encodes s using base64 encoding and writes it to buf.
+func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
+	w := base64.NewEncoder(base64.StdEncoding, buf)
+	// If the charset is not UTF-8 or if the content is short, do not bother
+	// splitting the encoded-word.
+	if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
+		io.WriteString(w, s)
+		w.Close()
+		return
+	}
+
+	var currentLen, last, runeLen int
+	for i := 0; i < len(s); i += runeLen {
+		// Multi-byte characters must not be split accross encoded-words.
+		// See RFC 2047, section 5.3.
+		_, runeLen = utf8.DecodeRuneInString(s[i:])
+
+		if currentLen+runeLen <= maxBase64Len {
+			currentLen += runeLen
+		} else {
+			io.WriteString(w, s[last:i])
+			w.Close()
+			e.splitWord(buf, charset)
+			last = i
+			currentLen = runeLen
+		}
+	}
+	io.WriteString(w, s[last:])
+	w.Close()
+}
+
+// qEncode encodes s using Q encoding and writes it to buf. It splits the
+// encoded-words when necessary.
+func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
+	// We only split encoded-words when the charset is UTF-8.
+	if !isUTF8(charset) {
+		writeQString(buf, s)
+		return
+	}
+
+	var currentLen, runeLen int
+	for i := 0; i < len(s); i += runeLen {
+		b := s[i]
+		// Multi-byte characters must not be split accross encoded-words.
+		// See RFC 2047, section 5.3.
+		var encLen int
+		if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
+			runeLen, encLen = 1, 1
+		} else {
+			_, runeLen = utf8.DecodeRuneInString(s[i:])
+			encLen = 3 * runeLen
+		}
+
+		if currentLen+encLen > maxContentLen {
+			e.splitWord(buf, charset)
+			currentLen = 0
+		}
+		writeQString(buf, s[i:i+runeLen])
+		currentLen += encLen
+	}
+}
+
+// writeQString encodes s using Q encoding and writes it to buf.
+func writeQString(buf *bytes.Buffer, s string) {
+	for i := 0; i < len(s); i++ {
+		switch b := s[i]; {
+		case b == ' ':
+			buf.WriteByte('_')
+		case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
+			buf.WriteByte(b)
+		default:
+			buf.WriteByte('=')
+			buf.WriteByte(upperhex[b>>4])
+			buf.WriteByte(upperhex[b&0x0f])
+		}
+	}
+}
+
+// openWord writes the beginning of an encoded-word into buf.
+func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
 	buf.WriteString("=?")
 	buf.WriteString(charset)
 	buf.WriteByte('?')
 	buf.WriteByte(byte(e))
 	buf.WriteByte('?')
+}
 
-	if e == BEncoding {
-		w := base64.NewEncoder(base64.StdEncoding, buf)
-		io.WriteString(w, s)
-		w.Close()
-	} else {
-		enc := make([]byte, 3)
-		for i := 0; i < len(s); i++ {
-			b := s[i]
-			switch {
-			case b == ' ':
-				buf.WriteByte('_')
-			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
-				buf.WriteByte(b)
-			default:
-				enc[0] = '='
-				enc[1] = upperhex[b>>4]
-				enc[2] = upperhex[b&0x0f]
-				buf.Write(enc)
-			}
-		}
-	}
+// closeWord writes the end of an encoded-word into buf.
+func closeWord(buf *bytes.Buffer) {
 	buf.WriteString("?=")
-	return buf.String()
+}
+
+// splitWord closes the current encoded-word and opens a new one.
+func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
+	closeWord(buf)
+	buf.WriteByte(' ')
+	e.openWord(buf, charset)
+}
+
+func isUTF8(charset string) bool {
+	return strings.EqualFold(charset, "UTF-8")
 }
 
 const upperhex = "0123456789ABCDEF"
@@ -98,15 +192,26 @@
 	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
 }
 
-// Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
-// word is returned unchanged.
+// Decode decodes an RFC 2047 encoded-word.
 func (d *WordDecoder) Decode(word string) (string, error) {
-	fields := strings.Split(word, "?") // TODO: remove allocation?
-	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
+	if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
+		return "", errInvalidWord
+	}
+	word = word[2 : len(word)-2]
+
+	// split delimits the first 2 fields
+	split := strings.IndexByte(word, '?')
+	// the field after split must only be one byte
+	if word[split+2] != '?' {
 		return "", errInvalidWord
 	}
 
-	content, err := decode(fields[2][0], fields[3])
+	// split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
+	charset := word[:split]
+	encoding := word[split+1]
+	text := word[split+3:]
+
+	content, err := decode(encoding, text)
 	if err != nil {
 		return "", err
 	}
@@ -114,7 +219,7 @@
 	buf := getBuffer()
 	defer putBuffer(buf)
 
-	if err := d.convert(buf, fields[1], content); err != nil {
+	if err := d.convert(buf, charset, content); err != nil {
 		return "", err
 	}
 
diff --git a/src/mime/encodedword_test.go b/src/mime/encodedword_test.go
index b30ecba..5fcd7a0 100644
--- a/src/mime/encodedword_test.go
+++ b/src/mime/encodedword_test.go
@@ -27,6 +27,14 @@
 		{QEncoding, iso88591, "a", "a"},
 		{QEncoding, utf8, "123 456", "123 456"},
 		{QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"},
+		{QEncoding, utf8, strings.Repeat("é", 10), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?="},
+		{QEncoding, utf8, strings.Repeat("é", 11), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?= =?utf-8?q?=C3=A9?="},
+		{QEncoding, iso88591, strings.Repeat("\xe9", 22), "=?iso-8859-1?q?" + strings.Repeat("=E9", 22) + "?="},
+		{QEncoding, utf8, strings.Repeat("\x80", 22), "=?utf-8?q?" + strings.Repeat("=80", 21) + "?= =?utf-8?q?=80?="},
+		{BEncoding, utf8, strings.Repeat("é", 24), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?="},
+		{BEncoding, utf8, strings.Repeat("é", 27), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?= =?utf-8?b?w6nDqcOp?="},
+		{BEncoding, iso88591, strings.Repeat("\xe9", 45), "=?iso-8859-1?b?" + strings.Repeat("6enp", 15) + "?="},
+		{BEncoding, utf8, strings.Repeat("\x80", 51), "=?utf-8?b?" + strings.Repeat("gICA", 16) + "?= =?utf-8?b?gICA?="},
 	}
 
 	for _, test := range tests {
diff --git a/src/mime/multipart/multipart.go b/src/mime/multipart/multipart.go
index 6f65a55..3b746a5 100644
--- a/src/mime/multipart/multipart.go
+++ b/src/mime/multipart/multipart.go
@@ -25,6 +25,11 @@
 
 var emptyParams = make(map[string]string)
 
+// This constant needs to be at least 76 for this package to work correctly.
+// This is because \r\n--separator_of_len_70- would fill the buffer and it
+// wouldn't be safe to consume a single byte from it.
+const peekBufferSize = 4096
+
 // A Part represents a single part in a multipart body.
 type Part struct {
 	// The headers of the body, if any, with the keys canonicalized
@@ -91,7 +96,7 @@
 func NewReader(r io.Reader, boundary string) *Reader {
 	b := []byte("\r\n--" + boundary + "--")
 	return &Reader{
-		bufReader:        bufio.NewReader(r),
+		bufReader:        bufio.NewReaderSize(r, peekBufferSize),
 		nl:               b[:2],
 		nlDashBoundary:   b[:len(b)-2],
 		dashBoundaryDash: b[2:],
@@ -148,7 +153,7 @@
 		// the read request.  No need to parse more at the moment.
 		return p.buffer.Read(d)
 	}
-	peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
+	peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor
 
 	// Look for an immediate empty part without a leading \r\n
 	// before the boundary separator.  Some MIME code makes empty
@@ -229,6 +234,7 @@
 	expectNewPart := false
 	for {
 		line, err := r.bufReader.ReadSlice('\n')
+
 		if err == io.EOF && r.isFinalBoundary(line) {
 			// If the buffer ends in "--boundary--" without the
 			// trailing "\r\n", ReadSlice will return an error
@@ -343,20 +349,24 @@
 // peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in
 // peek and whether it is a real boundary (and not a prefix of an
 // unrelated separator). To be the end, the peek buffer must contain a
-// newline after the boundary.
+// newline after the boundary or contain the ending boundary (--separator--).
 func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) {
 	idx = bytes.Index(peek, mr.nlDashBoundary)
 	if idx == -1 {
 		return
 	}
+
 	peek = peek[idx+len(mr.nlDashBoundary):]
+	if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' {
+		return idx, false
+	}
 	if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' {
 		return idx, true
 	}
 	peek = skipLWSPChar(peek)
 	// Don't have a complete line after the peek.
 	if bytes.IndexByte(peek, '\n') == -1 {
-		return -1, false
+		return idx, false
 	}
 	if len(peek) > 0 && peek[0] == '\n' {
 		return idx, true
diff --git a/src/mime/multipart/multipart_test.go b/src/mime/multipart/multipart_test.go
index 30452d1..d06bb41 100644
--- a/src/mime/multipart/multipart_test.go
+++ b/src/mime/multipart/multipart_test.go
@@ -616,6 +616,86 @@
 			},
 		},
 	},
+	// Issue 12662: Check that we don't consume the leading \r if the peekBuffer
+	// ends in '\r\n--separator-'
+	{
+		name: "peek buffer boundary condition",
+		sep:  "00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db",
+		in: strings.Replace(`--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db
+Content-Disposition: form-data; name="block"; filename="block"
+Content-Type: application/octet-stream
+
+`+strings.Repeat("A", peekBufferSize-65)+"\n--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--", "\n", "\r\n", -1),
+		want: []headerBody{
+			{textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}},
+				strings.Repeat("A", peekBufferSize-65),
+			},
+		},
+	},
+	// Issue 12662: Same test as above with \r\n at the end
+	{
+		name: "peek buffer boundary condition",
+		sep:  "00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db",
+		in: strings.Replace(`--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db
+Content-Disposition: form-data; name="block"; filename="block"
+Content-Type: application/octet-stream
+
+`+strings.Repeat("A", peekBufferSize-65)+"\n--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--\n", "\n", "\r\n", -1),
+		want: []headerBody{
+			{textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}},
+				strings.Repeat("A", peekBufferSize-65),
+			},
+		},
+	},
+	// Issue 12662v2: We want to make sure that for short buffers that end with
+	// '\r\n--separator-' we always consume at least one (valid) symbol from the
+	// peekBuffer
+	{
+		name: "peek buffer boundary condition",
+		sep:  "aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db",
+		in: strings.Replace(`--aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db
+Content-Disposition: form-data; name="block"; filename="block"
+Content-Type: application/octet-stream
+
+`+strings.Repeat("A", peekBufferSize)+"\n--aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--", "\n", "\r\n", -1),
+		want: []headerBody{
+			{textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}},
+				strings.Repeat("A", peekBufferSize),
+			},
+		},
+	},
+	// Context: https://github.com/camlistore/camlistore/issues/642
+	// If the file contents in the form happens to have a size such as:
+	// size = peekBufferSize - (len("\n--") + len(boundary) + len("\r") + 1), (modulo peekBufferSize)
+	// then peekBufferSeparatorIndex was wrongly returning (-1, false), which was leading to an nCopy
+	// cut such as:
+	// "somedata\r| |\n--Boundary\r" (instead of "somedata| |\r\n--Boundary\r"), which was making the
+	// subsequent Read miss the boundary.
+	{
+		name: "safeCount off by one",
+		sep:  "08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74",
+		in: strings.Replace(`--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74
+Content-Disposition: form-data; name="myfile"; filename="my-file.txt"
+Content-Type: application/octet-stream
+
+`, "\n", "\r\n", -1) +
+			strings.Repeat("A", peekBufferSize-(len("\n--")+len("08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74")+len("\r")+1)) +
+			strings.Replace(`
+--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74
+Content-Disposition: form-data; name="key"
+
+val
+--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74--
+`, "\n", "\r\n", -1),
+		want: []headerBody{
+			{textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="myfile"; filename="my-file.txt"`}},
+				strings.Repeat("A", peekBufferSize-(len("\n--")+len("08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74")+len("\r")+1)),
+			},
+			{textproto.MIMEHeader{"Content-Disposition": {`form-data; name="key"`}},
+				"val",
+			},
+		},
+	},
 
 	roundTripParseTest(),
 }
@@ -656,6 +736,53 @@
 	}
 }
 
+func partsFromReader(r *Reader) ([]headerBody, error) {
+	got := []headerBody{}
+	for {
+		p, err := r.NextPart()
+		if err == io.EOF {
+			return got, nil
+		}
+		if err != nil {
+			return nil, fmt.Errorf("NextPart: %v", err)
+		}
+		pbody, err := ioutil.ReadAll(p)
+		if err != nil {
+			return nil, fmt.Errorf("error reading part: %v", err)
+		}
+		got = append(got, headerBody{p.Header, string(pbody)})
+	}
+}
+
+func TestParseAllSizes(t *testing.T) {
+	const maxSize = 5 << 10
+	var buf bytes.Buffer
+	body := strings.Repeat("a", maxSize)
+	bodyb := []byte(body)
+	for size := 0; size < maxSize; size++ {
+		buf.Reset()
+		w := NewWriter(&buf)
+		part, _ := w.CreateFormField("f")
+		part.Write(bodyb[:size])
+		part, _ = w.CreateFormField("key")
+		part.Write([]byte("val"))
+		w.Close()
+		r := NewReader(&buf, w.Boundary())
+		got, err := partsFromReader(r)
+		if err != nil {
+			t.Errorf("For size %d: %v", size, err)
+			continue
+		}
+		if len(got) != 2 {
+			t.Errorf("For size %d, num parts = %d; want 2", size, len(got))
+			continue
+		}
+		if got[0].body != body[:size] {
+			t.Errorf("For size %d, got unexpected len %d: %q", size, len(got[0].body), got[0].body)
+		}
+	}
+}
+
 func roundTripParseTest() parseTest {
 	t := parseTest{
 		name: "round trip",
diff --git a/src/net/conf.go b/src/net/conf.go
index c92e579..ddaa978 100644
--- a/src/net/conf.go
+++ b/src/net/conf.go
@@ -9,7 +9,6 @@
 import (
 	"os"
 	"runtime"
-	"strconv"
 	"sync"
 	"syscall"
 )
@@ -293,7 +292,7 @@
 			return
 		}
 		if '0' <= s[0] && s[0] <= '9' {
-			debugLevel, _ = strconv.Atoi(s)
+			debugLevel, _, _ = dtoi(s, 0)
 		} else {
 			dnsMode = s
 		}
diff --git a/src/net/dial_test.go b/src/net/dial_test.go
index 9843306..bd3b2dd 100644
--- a/src/net/dial_test.go
+++ b/src/net/dial_test.go
@@ -509,6 +509,9 @@
 }
 
 func TestDialSerialAsyncSpuriousConnection(t *testing.T) {
+	if runtime.GOOS == "plan9" {
+		t.Skip("skipping on plan9; no deadline support, golang.org/issue/11932")
+	}
 	ln, err := newLocalListener("tcp")
 	if err != nil {
 		t.Fatal(err)
diff --git a/src/net/dnsclient_unix.go b/src/net/dnsclient_unix.go
index 1fbe085..94282ee 100644
--- a/src/net/dnsclient_unix.go
+++ b/src/net/dnsclient_unix.go
@@ -20,7 +20,6 @@
 	"io"
 	"math/rand"
 	"os"
-	"strconv"
 	"sync"
 	"time"
 )
@@ -371,7 +370,7 @@
 	if s, ok := lookupOrderName[o]; ok {
 		return s
 	}
-	return "hostLookupOrder=" + strconv.Itoa(int(o)) + "??"
+	return "hostLookupOrder=" + itoa(int(o)) + "??"
 }
 
 // goLookupHost is the native Go implementation of LookupHost.
diff --git a/src/net/dnsmsg.go b/src/net/dnsmsg.go
index 6ecaa94..93078fe 100644
--- a/src/net/dnsmsg.go
+++ b/src/net/dnsmsg.go
@@ -691,6 +691,9 @@
 	// off1 is end of header
 	// off2 is end of rr
 	off1, ok = packStruct(rr.Header(), msg, off)
+	if !ok {
+		return len(msg), false
+	}
 	off2, ok = packStruct(rr, msg, off)
 	if !ok {
 		return len(msg), false
diff --git a/src/net/fd_plan9.go b/src/net/fd_plan9.go
index 32766f5..cec8860 100644
--- a/src/net/fd_plan9.go
+++ b/src/net/fd_plan9.go
@@ -171,6 +171,14 @@
 	if !fd.ok() {
 		return syscall.EINVAL
 	}
+	if fd.net == "tcp" {
+		// The following line is required to unblock Reads.
+		// For some reason, WriteString returns an error:
+		// "write /net/tcp/39/listen: inappropriate use of fd"
+		// But without it, Reads on dead conns hang forever.
+		// See Issue 9554.
+		fd.ctl.WriteString("hangup")
+	}
 	err := fd.ctl.Close()
 	if fd.data != nil {
 		if err1 := fd.data.Close(); err1 != nil && err == nil {
diff --git a/src/net/fd_windows.go b/src/net/fd_windows.go
index 205daff..43b2f0c 100644
--- a/src/net/fd_windows.go
+++ b/src/net/fd_windows.go
@@ -208,7 +208,7 @@
 		s.req <- ioSrvReq{o, nil}
 		<-o.errc
 	}
-	// Wait for cancellation to complete.
+	// Wait for cancelation to complete.
 	fd.pd.WaitCanceled(int(o.mode))
 	if o.errno != 0 {
 		err = syscall.Errno(o.errno)
@@ -217,8 +217,8 @@
 		}
 		return 0, err
 	}
-	// We issued cancellation request. But, it seems, IO operation succeeded
-	// before cancellation request run. We need to treat IO operation as
+	// We issued a cancelation request. But, it seems, IO operation succeeded
+	// before the cancelation request run. We need to treat the IO operation as
 	// succeeded (the bytes are actually sent/recv from network).
 	return int(o.qty), nil
 }
diff --git a/src/net/hosts.go b/src/net/hosts.go
index 27958c7..aba2cea 100644
--- a/src/net/hosts.go
+++ b/src/net/hosts.go
@@ -60,9 +60,11 @@
 				continue
 			}
 			for i := 1; i < len(f); i++ {
-				h := f[i]
-				hs[h] = append(hs[h], addr)
-				is[addr] = append(is[addr], h)
+				h := []byte(f[i])
+				lowerASCIIBytes(h)
+				lh := string(h)
+				hs[lh] = append(hs[lh], addr)
+				is[addr] = append(is[addr], lh)
 			}
 		}
 		// Update the data cache.
@@ -80,7 +82,11 @@
 	defer hosts.Unlock()
 	readHosts()
 	if len(hosts.byName) != 0 {
-		if ips, ok := hosts.byName[host]; ok {
+		// TODO(jbd,bradfitz): avoid this alloc if host is already all lowercase?
+		// or linear scan the byName map if it's small enough?
+		lowerHost := []byte(host)
+		lowerASCIIBytes(lowerHost)
+		if ips, ok := hosts.byName[string(lowerHost)]; ok {
 			return ips
 		}
 	}
diff --git a/src/net/hosts_test.go b/src/net/hosts_test.go
index aca64c3..99569cd 100644
--- a/src/net/hosts_test.go
+++ b/src/net/hosts_test.go
@@ -74,6 +74,7 @@
 			{"255.255.255.255", []string{"broadcasthost"}},
 			{"127.0.0.2", []string{"odin"}},
 			{"127.0.0.3", []string{"odin"}},
+			{"127.0.0.4", []string{"bor"}},
 			{"::2", []string{"odin"}},
 			{"127.1.1.1", []string{"thor"}},
 			{"127.1.1.2", []string{"ullr", "ullrhost"}},
diff --git a/src/net/http/cgi/host.go b/src/net/http/cgi/host.go
index 4efbe7a..9b4d875 100644
--- a/src/net/http/cgi/host.go
+++ b/src/net/http/cgi/host.go
@@ -77,15 +77,15 @@
 //      Env: []string{"SCRIPT_FILENAME=foo.php"},
 //    }
 func removeLeadingDuplicates(env []string) (ret []string) {
-	n := len(env)
-	for i := 0; i < n; i++ {
-		e := env[i]
-		s := strings.SplitN(e, "=", 2)[0]
+	for i, e := range env {
 		found := false
-		for j := i + 1; j < n; j++ {
-			if s == strings.SplitN(env[j], "=", 2)[0] {
-				found = true
-				break
+		if eq := strings.IndexByte(e, '='); eq != -1 {
+			keq := e[:eq+1] // "key="
+			for _, e2 := range env[i+1:] {
+				if strings.HasPrefix(e2, keq) {
+					found = true
+					break
+				}
 			}
 		}
 		if !found {
@@ -159,10 +159,6 @@
 		env = append(env, "CONTENT_TYPE="+ctype)
 	}
 
-	if h.Env != nil {
-		env = append(env, h.Env...)
-	}
-
 	envPath := os.Getenv("PATH")
 	if envPath == "" {
 		envPath = "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin"
@@ -181,6 +177,10 @@
 		}
 	}
 
+	if h.Env != nil {
+		env = append(env, h.Env...)
+	}
+
 	env = removeLeadingDuplicates(env)
 
 	var cwd, path string
diff --git a/src/net/http/cgi/host_test.go b/src/net/http/cgi/host_test.go
index 4aa67e4..3327764 100644
--- a/src/net/http/cgi/host_test.go
+++ b/src/net/http/cgi/host_test.go
@@ -16,6 +16,7 @@
 	"os"
 	"os/exec"
 	"path/filepath"
+	"reflect"
 	"runtime"
 	"strconv"
 	"strings"
@@ -487,12 +488,36 @@
 		Args: []string{cgifile},
 		Env: []string{
 			"SCRIPT_FILENAME=" + cgifile,
-			"REQUEST_URI=/foo/bar"},
+			"REQUEST_URI=/foo/bar",
+			"PATH=/wibble"},
 	}
 	expectedMap := map[string]string{
 		"cwd": cwd,
 		"env-SCRIPT_FILENAME": cgifile,
 		"env-REQUEST_URI":     "/foo/bar",
+		"env-PATH":            "/wibble",
 	}
 	runCgiTest(t, h, "GET /test.cgi HTTP/1.0\nHost: example.com\n\n", expectedMap)
 }
+
+func TestRemoveLeadingDuplicates(t *testing.T) {
+	tests := []struct {
+		env  []string
+		want []string
+	}{
+		{
+			env:  []string{"a=b", "b=c", "a=b2"},
+			want: []string{"b=c", "a=b2"},
+		},
+		{
+			env:  []string{"a=b", "b=c", "d", "e=f"},
+			want: []string{"a=b", "b=c", "d", "e=f"},
+		},
+	}
+	for _, tt := range tests {
+		got := removeLeadingDuplicates(tt.env)
+		if !reflect.DeepEqual(got, tt.want) {
+			t.Errorf("removeLeadingDuplicates(%q) = %q; want %q", tt.env, got, tt.want)
+		}
+	}
+}
diff --git a/src/net/http/client_test.go b/src/net/http/client_test.go
index 7b524d3..86ec83a 100644
--- a/src/net/http/client_test.go
+++ b/src/net/http/client_test.go
@@ -983,13 +983,12 @@
 	if err == nil {
 		t.Fatal("got response from Get; expected error")
 	}
-	ue, ok := err.(*url.Error)
-	if !ok {
+	if _, ok := err.(*url.Error); !ok {
 		t.Fatalf("Got error of type %T; want *url.Error", err)
 	}
-	ne, ok := ue.Err.(net.Error)
+	ne, ok := err.(net.Error)
 	if !ok {
-		t.Fatalf("Got url.Error.Err of type %T; want some net.Error", err)
+		t.Fatalf("Got error of type %T; want some net.Error", err)
 	}
 	if !ne.Timeout() {
 		t.Error("net.Error.Timeout = false; want true")
diff --git a/src/net/http/export_test.go b/src/net/http/export_test.go
index 0457be5..68fbd43 100644
--- a/src/net/http/export_test.go
+++ b/src/net/http/export_test.go
@@ -130,3 +130,5 @@
 var ExportCloseWriteAndWait = (*conn).closeWriteAndWait
 
 var ExportErrRequestCanceled = errRequestCanceled
+
+var ExportServeFile = serveFile
diff --git a/src/net/http/fcgi/child.go b/src/net/http/fcgi/child.go
index da824ed..8870424 100644
--- a/src/net/http/fcgi/child.go
+++ b/src/net/http/fcgi/child.go
@@ -56,6 +56,9 @@
 			return
 		}
 		text = text[n:]
+		if int(keyLen)+int(valLen) > len(text) {
+			return
+		}
 		key := readString(text, keyLen)
 		text = text[keyLen:]
 		val := readString(text, valLen)
diff --git a/src/net/http/fcgi/fcgi_test.go b/src/net/http/fcgi/fcgi_test.go
index de0f7f8..b6013bf 100644
--- a/src/net/http/fcgi/fcgi_test.go
+++ b/src/net/http/fcgi/fcgi_test.go
@@ -254,3 +254,27 @@
 		<-done
 	}
 }
+
+type rwNopCloser struct {
+	io.Reader
+	io.Writer
+}
+
+func (rwNopCloser) Close() error {
+	return nil
+}
+
+// Verifies it doesn't crash. 	Issue 11824.
+func TestMalformedParams(t *testing.T) {
+	input := []byte{
+		// beginRequest, requestId=1, contentLength=8, role=1, keepConn=1
+		1, 1, 0, 1, 0, 8, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
+		// params, requestId=1, contentLength=10, k1Len=50, v1Len=50 (malformed, wrong length)
+		1, 4, 0, 1, 0, 10, 0, 0, 50, 50, 3, 4, 5, 6, 7, 8, 9, 10,
+		// end of params
+		1, 4, 0, 1, 0, 0, 0, 0,
+	}
+	rw := rwNopCloser{bytes.NewReader(input), ioutil.Discard}
+	c := newChild(rw, http.DefaultServeMux)
+	c.serve()
+}
diff --git a/src/net/http/fs.go b/src/net/http/fs.go
index 394c87d2..ac7e1b5 100644
--- a/src/net/http/fs.go
+++ b/src/net/http/fs.go
@@ -369,8 +369,8 @@
 	}
 	defer f.Close()
 
-	d, err1 := f.Stat()
-	if err1 != nil {
+	d, err := f.Stat()
+	if err != nil {
 		msg, code := toHTTPError(err)
 		Error(w, msg, code)
 		return
diff --git a/src/net/http/fs_test.go b/src/net/http/fs_test.go
index 9b235d2..a3d64f3 100644
--- a/src/net/http/fs_test.go
+++ b/src/net/http/fs_test.go
@@ -850,6 +850,28 @@
 	}
 }
 
+// Issue 12991
+func TestServerFileStatError(t *testing.T) {
+	rec := httptest.NewRecorder()
+	r, _ := NewRequest("GET", "http://foo/", nil)
+	redirect := false
+	name := "file.txt"
+	fs := issue12991FS{}
+	ExportServeFile(rec, r, fs, name, redirect)
+	if body := rec.Body.String(); !strings.Contains(body, "403") || !strings.Contains(body, "Forbidden") {
+		t.Errorf("wanted 403 forbidden message; got: %s", body)
+	}
+}
+
+type issue12991FS struct{}
+
+func (issue12991FS) Open(string) (File, error) { return issue12991File{}, nil }
+
+type issue12991File struct{ File }
+
+func (issue12991File) Stat() (os.FileInfo, error) { return nil, os.ErrPermission }
+func (issue12991File) Close() error               { return nil }
+
 func TestServeContentErrorMessages(t *testing.T) {
 	defer afterTest(t)
 	fs := fakeFS{
diff --git a/src/net/http/h2_bundle.go b/src/net/http/h2_bundle.go
new file mode 100644
index 0000000..b3ef715
--- /dev/null
+++ b/src/net/http/h2_bundle.go
@@ -0,0 +1,4334 @@
+// This file is autogenerated using x/tools/cmd/bundle from
+// https://go-review.googlesource.com/#/c/15850/
+// Usage:
+// $ bundle golang.org/x/net/http2 net/http http2 > /tmp/x.go; mv /tmp/x.go $GOROOT/src/net/http/h2_bundle.go
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http
+
+import (
+	"bufio"
+	"bytes"
+	"crypto/tls"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"net/url"
+	"os"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/net/http2/hpack"
+)
+
+// buffer is an io.ReadWriteCloser backed by a fixed size buffer.
+// It never allocates, but moves old data as new data is written.
+type http2buffer struct {
+	buf    []byte
+	r, w   int
+	closed bool
+	err    error // err to return to reader
+}
+
+var (
+	http2errReadEmpty   = errors.New("read from empty buffer")
+	http2errWriteClosed = errors.New("write on closed buffer")
+	http2errWriteFull   = errors.New("write on full buffer")
+)
+
+// Read copies bytes from the buffer into p.
+// It is an error to read when no data is available.
+func (b *http2buffer) Read(p []byte) (n int, err error) {
+	n = copy(p, b.buf[b.r:b.w])
+	b.r += n
+	if b.closed && b.r == b.w {
+		err = b.err
+	} else if b.r == b.w && n == 0 {
+		err = http2errReadEmpty
+	}
+	return n, err
+}
+
+// Len returns the number of bytes of the unread portion of the buffer.
+func (b *http2buffer) Len() int {
+	return b.w - b.r
+}
+
+// Write copies bytes from p into the buffer.
+// It is an error to write more data than the buffer can hold.
+func (b *http2buffer) Write(p []byte) (n int, err error) {
+	if b.closed {
+		return 0, http2errWriteClosed
+	}
+
+	if b.r > 0 && len(p) > len(b.buf)-b.w {
+		copy(b.buf, b.buf[b.r:b.w])
+		b.w -= b.r
+		b.r = 0
+	}
+
+	n = copy(b.buf[b.w:], p)
+	b.w += n
+	if n < len(p) {
+		err = http2errWriteFull
+	}
+	return n, err
+}
+
+// Close marks the buffer as closed. Future calls to Write will
+// return an error. Future calls to Read, once the buffer is
+// empty, will return err.
+func (b *http2buffer) Close(err error) {
+	if !b.closed {
+		b.closed = true
+		b.err = err
+	}
+}
+
+// An ErrCode is an unsigned 32-bit error code as defined in the HTTP/2 spec.
+type http2ErrCode uint32
+
+const (
+	http2ErrCodeNo                 http2ErrCode = 0x0
+	http2ErrCodeProtocol           http2ErrCode = 0x1
+	http2ErrCodeInternal           http2ErrCode = 0x2
+	http2ErrCodeFlowControl        http2ErrCode = 0x3
+	http2ErrCodeSettingsTimeout    http2ErrCode = 0x4
+	http2ErrCodeStreamClosed       http2ErrCode = 0x5
+	http2ErrCodeFrameSize          http2ErrCode = 0x6
+	http2ErrCodeRefusedStream      http2ErrCode = 0x7
+	http2ErrCodeCancel             http2ErrCode = 0x8
+	http2ErrCodeCompression        http2ErrCode = 0x9
+	http2ErrCodeConnect            http2ErrCode = 0xa
+	http2ErrCodeEnhanceYourCalm    http2ErrCode = 0xb
+	http2ErrCodeInadequateSecurity http2ErrCode = 0xc
+	http2ErrCodeHTTP11Required     http2ErrCode = 0xd
+)
+
+var http2errCodeName = map[http2ErrCode]string{
+	http2ErrCodeNo:                 "NO_ERROR",
+	http2ErrCodeProtocol:           "PROTOCOL_ERROR",
+	http2ErrCodeInternal:           "INTERNAL_ERROR",
+	http2ErrCodeFlowControl:        "FLOW_CONTROL_ERROR",
+	http2ErrCodeSettingsTimeout:    "SETTINGS_TIMEOUT",
+	http2ErrCodeStreamClosed:       "STREAM_CLOSED",
+	http2ErrCodeFrameSize:          "FRAME_SIZE_ERROR",
+	http2ErrCodeRefusedStream:      "REFUSED_STREAM",
+	http2ErrCodeCancel:             "CANCEL",
+	http2ErrCodeCompression:        "COMPRESSION_ERROR",
+	http2ErrCodeConnect:            "CONNECT_ERROR",
+	http2ErrCodeEnhanceYourCalm:    "ENHANCE_YOUR_CALM",
+	http2ErrCodeInadequateSecurity: "INADEQUATE_SECURITY",
+	http2ErrCodeHTTP11Required:     "HTTP_1_1_REQUIRED",
+}
+
+func (e http2ErrCode) String() string {
+	if s, ok := http2errCodeName[e]; ok {
+		return s
+	}
+	return fmt.Sprintf("unknown error code 0x%x", uint32(e))
+}
+
+// ConnectionError is an error that results in the termination of the
+// entire connection.
+type http2ConnectionError http2ErrCode
+
+func (e http2ConnectionError) Error() string {
+	return fmt.Sprintf("connection error: %s", http2ErrCode(e))
+}
+
+// StreamError is an error that only affects one stream within an
+// HTTP/2 connection.
+type http2StreamError struct {
+	StreamID uint32
+	Code     http2ErrCode
+}
+
+func (e http2StreamError) Error() string {
+	return fmt.Sprintf("stream error: stream ID %d; %v", e.StreamID, e.Code)
+}
+
+// 6.9.1 The Flow Control Window
+// "If a sender receives a WINDOW_UPDATE that causes a flow control
+// window to exceed this maximum it MUST terminate either the stream
+// or the connection, as appropriate. For streams, [...]; for the
+// connection, a GOAWAY frame with a FLOW_CONTROL_ERROR code."
+type http2goAwayFlowError struct{}
+
+func (http2goAwayFlowError) Error() string { return "connection exceeded flow control window size" }
+
+// flow is the flow control window's size.
+type http2flow struct {
+	// n is the number of DATA bytes we're allowed to send.
+	// A flow is kept both on a conn and a per-stream.
+	n int32
+
+	// conn points to the shared connection-level flow that is
+	// shared by all streams on that conn. It is nil for the flow
+	// that's on the conn directly.
+	conn *http2flow
+}
+
+func (f *http2flow) setConnFlow(cf *http2flow) { f.conn = cf }
+
+func (f *http2flow) available() int32 {
+	n := f.n
+	if f.conn != nil && f.conn.n < n {
+		n = f.conn.n
+	}
+	return n
+}
+
+func (f *http2flow) take(n int32) {
+	if n > f.available() {
+		panic("internal error: took too much")
+	}
+	f.n -= n
+	if f.conn != nil {
+		f.conn.n -= n
+	}
+}
+
+// add adds n bytes (positive or negative) to the flow control window.
+// It returns false if the sum would exceed 2^31-1.
+func (f *http2flow) add(n int32) bool {
+	remain := (1<<31 - 1) - f.n
+	if n > remain {
+		return false
+	}
+	f.n += n
+	return true
+}
+
+const http2frameHeaderLen = 9
+
+var http2padZeros = make([]byte, 255) // zeros for padding
+
+// A FrameType is a registered frame type as defined in
+// http://http2.github.io/http2-spec/#rfc.section.11.2
+type http2FrameType uint8
+
+const (
+	http2FrameData         http2FrameType = 0x0
+	http2FrameHeaders      http2FrameType = 0x1
+	http2FramePriority     http2FrameType = 0x2
+	http2FrameRSTStream    http2FrameType = 0x3
+	http2FrameSettings     http2FrameType = 0x4
+	http2FramePushPromise  http2FrameType = 0x5
+	http2FramePing         http2FrameType = 0x6
+	http2FrameGoAway       http2FrameType = 0x7
+	http2FrameWindowUpdate http2FrameType = 0x8
+	http2FrameContinuation http2FrameType = 0x9
+)
+
+var http2frameName = map[http2FrameType]string{
+	http2FrameData:         "DATA",
+	http2FrameHeaders:      "HEADERS",
+	http2FramePriority:     "PRIORITY",
+	http2FrameRSTStream:    "RST_STREAM",
+	http2FrameSettings:     "SETTINGS",
+	http2FramePushPromise:  "PUSH_PROMISE",
+	http2FramePing:         "PING",
+	http2FrameGoAway:       "GOAWAY",
+	http2FrameWindowUpdate: "WINDOW_UPDATE",
+	http2FrameContinuation: "CONTINUATION",
+}
+
+func (t http2FrameType) String() string {
+	if s, ok := http2frameName[t]; ok {
+		return s
+	}
+	return fmt.Sprintf("UNKNOWN_FRAME_TYPE_%d", uint8(t))
+}
+
+// Flags is a bitmask of HTTP/2 flags.
+// The meaning of flags varies depending on the frame type.
+type http2Flags uint8
+
+// Has reports whether f contains all (0 or more) flags in v.
+func (f http2Flags) Has(v http2Flags) bool {
+	return (f & v) == v
+}
+
+// Frame-specific FrameHeader flag bits.
+const (
+	// Data Frame
+	http2FlagDataEndStream http2Flags = 0x1
+	http2FlagDataPadded    http2Flags = 0x8
+
+	// Headers Frame
+	http2FlagHeadersEndStream  http2Flags = 0x1
+	http2FlagHeadersEndHeaders http2Flags = 0x4
+	http2FlagHeadersPadded     http2Flags = 0x8
+	http2FlagHeadersPriority   http2Flags = 0x20
+
+	// Settings Frame
+	http2FlagSettingsAck http2Flags = 0x1
+
+	// Ping Frame
+	http2FlagPingAck http2Flags = 0x1
+
+	// Continuation Frame
+	http2FlagContinuationEndHeaders http2Flags = 0x4
+
+	http2FlagPushPromiseEndHeaders http2Flags = 0x4
+	http2FlagPushPromisePadded     http2Flags = 0x8
+)
+
+var http2flagName = map[http2FrameType]map[http2Flags]string{
+	http2FrameData: {
+		http2FlagDataEndStream: "END_STREAM",
+		http2FlagDataPadded:    "PADDED",
+	},
+	http2FrameHeaders: {
+		http2FlagHeadersEndStream:  "END_STREAM",
+		http2FlagHeadersEndHeaders: "END_HEADERS",
+		http2FlagHeadersPadded:     "PADDED",
+		http2FlagHeadersPriority:   "PRIORITY",
+	},
+	http2FrameSettings: {
+		http2FlagSettingsAck: "ACK",
+	},
+	http2FramePing: {
+		http2FlagPingAck: "ACK",
+	},
+	http2FrameContinuation: {
+		http2FlagContinuationEndHeaders: "END_HEADERS",
+	},
+	http2FramePushPromise: {
+		http2FlagPushPromiseEndHeaders: "END_HEADERS",
+		http2FlagPushPromisePadded:     "PADDED",
+	},
+}
+
+// a frameParser parses a frame given its FrameHeader and payload
+// bytes. The length of payload will always equal fh.Length (which
+// might be 0).
+type http2frameParser func(fh http2FrameHeader, payload []byte) (http2Frame, error)
+
+var http2frameParsers = map[http2FrameType]http2frameParser{
+	http2FrameData:         http2parseDataFrame,
+	http2FrameHeaders:      http2parseHeadersFrame,
+	http2FramePriority:     http2parsePriorityFrame,
+	http2FrameRSTStream:    http2parseRSTStreamFrame,
+	http2FrameSettings:     http2parseSettingsFrame,
+	http2FramePushPromise:  http2parsePushPromise,
+	http2FramePing:         http2parsePingFrame,
+	http2FrameGoAway:       http2parseGoAwayFrame,
+	http2FrameWindowUpdate: http2parseWindowUpdateFrame,
+	http2FrameContinuation: http2parseContinuationFrame,
+}
+
+func http2typeFrameParser(t http2FrameType) http2frameParser {
+	if f := http2frameParsers[t]; f != nil {
+		return f
+	}
+	return http2parseUnknownFrame
+}
+
+// A FrameHeader is the 9 byte header of all HTTP/2 frames.
+//
+// See http://http2.github.io/http2-spec/#FrameHeader
+type http2FrameHeader struct {
+	valid bool // caller can access []byte fields in the Frame
+
+	// Type is the 1 byte frame type. There are ten standard frame
+	// types, but extension frame types may be written by WriteRawFrame
+	// and will be returned by ReadFrame (as UnknownFrame).
+	Type http2FrameType
+
+	// Flags are the 1 byte of 8 potential bit flags per frame.
+	// They are specific to the frame type.
+	Flags http2Flags
+
+	// Length is the length of the frame, not including the 9 byte header.
+	// The maximum size is one byte less than 16MB (uint24), but only
+	// frames up to 16KB are allowed without peer agreement.
+	Length uint32
+
+	// StreamID is which stream this frame is for. Certain frames
+	// are not stream-specific, in which case this field is 0.
+	StreamID uint32
+}
+
+// Header returns h. It exists so FrameHeaders can be embedded in other
+// specific frame types and implement the Frame interface.
+func (h http2FrameHeader) Header() http2FrameHeader { return h }
+
+func (h http2FrameHeader) String() string {
+	var buf bytes.Buffer
+	buf.WriteString("[FrameHeader ")
+	buf.WriteString(h.Type.String())
+	if h.Flags != 0 {
+		buf.WriteString(" flags=")
+		set := 0
+		for i := uint8(0); i < 8; i++ {
+			if h.Flags&(1<<i) == 0 {
+				continue
+			}
+			set++
+			if set > 1 {
+				buf.WriteByte('|')
+			}
+			name := http2flagName[h.Type][http2Flags(1<<i)]
+			if name != "" {
+				buf.WriteString(name)
+			} else {
+				fmt.Fprintf(&buf, "0x%x", 1<<i)
+			}
+		}
+	}
+	if h.StreamID != 0 {
+		fmt.Fprintf(&buf, " stream=%d", h.StreamID)
+	}
+	fmt.Fprintf(&buf, " len=%d]", h.Length)
+	return buf.String()
+}
+
+func (h *http2FrameHeader) checkValid() {
+	if !h.valid {
+		panic("Frame accessor called on non-owned Frame")
+	}
+}
+
+func (h *http2FrameHeader) invalidate() { h.valid = false }
+
+// frame header bytes.
+// Used only by ReadFrameHeader.
+var http2fhBytes = sync.Pool{
+	New: func() interface{} {
+		buf := make([]byte, http2frameHeaderLen)
+		return &buf
+	},
+}
+
+// ReadFrameHeader reads 9 bytes from r and returns a FrameHeader.
+// Most users should use Framer.ReadFrame instead.
+func http2ReadFrameHeader(r io.Reader) (http2FrameHeader, error) {
+	bufp := http2fhBytes.Get().(*[]byte)
+	defer http2fhBytes.Put(bufp)
+	return http2readFrameHeader(*bufp, r)
+}
+
+func http2readFrameHeader(buf []byte, r io.Reader) (http2FrameHeader, error) {
+	_, err := io.ReadFull(r, buf[:http2frameHeaderLen])
+	if err != nil {
+		return http2FrameHeader{}, err
+	}
+	return http2FrameHeader{
+		Length:   (uint32(buf[0])<<16 | uint32(buf[1])<<8 | uint32(buf[2])),
+		Type:     http2FrameType(buf[3]),
+		Flags:    http2Flags(buf[4]),
+		StreamID: binary.BigEndian.Uint32(buf[5:]) & (1<<31 - 1),
+		valid:    true,
+	}, nil
+}
+
+// A Frame is the base interface implemented by all frame types.
+// Callers will generally type-assert the specific frame type:
+// *HeadersFrame, *SettingsFrame, *WindowUpdateFrame, etc.
+//
+// Frames are only valid until the next call to Framer.ReadFrame.
+type http2Frame interface {
+	Header() http2FrameHeader
+
+	// invalidate is called by Framer.ReadFrame to make this
+	// frame's buffers as being invalid, since the subsequent
+	// frame will reuse them.
+	invalidate()
+}
+
+// A Framer reads and writes Frames.
+type http2Framer struct {
+	r         io.Reader
+	lastFrame http2Frame
+
+	maxReadSize uint32
+	headerBuf   [http2frameHeaderLen]byte
+
+	// TODO: let getReadBuf be configurable, and use a less memory-pinning
+	// allocator in server.go to minimize memory pinned for many idle conns.
+	// Will probably also need to make frame invalidation have a hook too.
+	getReadBuf func(size uint32) []byte
+	readBuf    []byte // cache for default getReadBuf
+
+	maxWriteSize uint32 // zero means unlimited; TODO: implement
+
+	w    io.Writer
+	wbuf []byte
+
+	// AllowIllegalWrites permits the Framer's Write methods to
+	// write frames that do not conform to the HTTP/2 spec.  This
+	// permits using the Framer to test other HTTP/2
+	// implementations' conformance to the spec.
+	// If false, the Write methods will prefer to return an error
+	// rather than comply.
+	AllowIllegalWrites bool
+}
+
+func (f *http2Framer) startWrite(ftype http2FrameType, flags http2Flags, streamID uint32) {
+
+	f.wbuf = append(f.wbuf[:0],
+		0,
+		0,
+		0,
+		byte(ftype),
+		byte(flags),
+		byte(streamID>>24),
+		byte(streamID>>16),
+		byte(streamID>>8),
+		byte(streamID))
+}
+
+func (f *http2Framer) endWrite() error {
+
+	length := len(f.wbuf) - http2frameHeaderLen
+	if length >= (1 << 24) {
+		return http2ErrFrameTooLarge
+	}
+	_ = append(f.wbuf[:0],
+		byte(length>>16),
+		byte(length>>8),
+		byte(length))
+	n, err := f.w.Write(f.wbuf)
+	if err == nil && n != len(f.wbuf) {
+		err = io.ErrShortWrite
+	}
+	return err
+}
+
+func (f *http2Framer) writeByte(v byte) { f.wbuf = append(f.wbuf, v) }
+
+func (f *http2Framer) writeBytes(v []byte) { f.wbuf = append(f.wbuf, v...) }
+
+func (f *http2Framer) writeUint16(v uint16) { f.wbuf = append(f.wbuf, byte(v>>8), byte(v)) }
+
+func (f *http2Framer) writeUint32(v uint32) {
+	f.wbuf = append(f.wbuf, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
+}
+
+const (
+	http2minMaxFrameSize = 1 << 14
+	http2maxFrameSize    = 1<<24 - 1
+)
+
+// NewFramer returns a Framer that writes frames to w and reads them from r.
+func http2NewFramer(w io.Writer, r io.Reader) *http2Framer {
+	fr := &http2Framer{
+		w: w,
+		r: r,
+	}
+	fr.getReadBuf = func(size uint32) []byte {
+		if cap(fr.readBuf) >= int(size) {
+			return fr.readBuf[:size]
+		}
+		fr.readBuf = make([]byte, size)
+		return fr.readBuf
+	}
+	fr.SetMaxReadFrameSize(http2maxFrameSize)
+	return fr
+}
+
+// SetMaxReadFrameSize sets the maximum size of a frame
+// that will be read by a subsequent call to ReadFrame.
+// It is the caller's responsibility to advertise this
+// limit with a SETTINGS frame.
+func (fr *http2Framer) SetMaxReadFrameSize(v uint32) {
+	if v > http2maxFrameSize {
+		v = http2maxFrameSize
+	}
+	fr.maxReadSize = v
+}
+
+// ErrFrameTooLarge is returned from Framer.ReadFrame when the peer
+// sends a frame that is larger than declared with SetMaxReadFrameSize.
+var http2ErrFrameTooLarge = errors.New("http2: frame too large")
+
+// ReadFrame reads a single frame. The returned Frame is only valid
+// until the next call to ReadFrame.
+// If the frame is larger than previously set with SetMaxReadFrameSize,
+// the returned error is ErrFrameTooLarge.
+func (fr *http2Framer) ReadFrame() (http2Frame, error) {
+	if fr.lastFrame != nil {
+		fr.lastFrame.invalidate()
+	}
+	fh, err := http2readFrameHeader(fr.headerBuf[:], fr.r)
+	if err != nil {
+		return nil, err
+	}
+	if fh.Length > fr.maxReadSize {
+		return nil, http2ErrFrameTooLarge
+	}
+	payload := fr.getReadBuf(fh.Length)
+	if _, err := io.ReadFull(fr.r, payload); err != nil {
+		return nil, err
+	}
+	f, err := http2typeFrameParser(fh.Type)(fh, payload)
+	if err != nil {
+		return nil, err
+	}
+	fr.lastFrame = f
+	return f, nil
+}
+
+// A DataFrame conveys arbitrary, variable-length sequences of octets
+// associated with a stream.
+// See http://http2.github.io/http2-spec/#rfc.section.6.1
+type http2DataFrame struct {
+	http2FrameHeader
+	data []byte
+}
+
+func (f *http2DataFrame) StreamEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagDataEndStream)
+}
+
+// Data returns the frame's data octets, not including any padding
+// size byte or padding suffix bytes.
+// The caller must not retain the returned memory past the next
+// call to ReadFrame.
+func (f *http2DataFrame) Data() []byte {
+	f.checkValid()
+	return f.data
+}
+
+func http2parseDataFrame(fh http2FrameHeader, payload []byte) (http2Frame, error) {
+	if fh.StreamID == 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	f := &http2DataFrame{
+		http2FrameHeader: fh,
+	}
+	var padSize byte
+	if fh.Flags.Has(http2FlagDataPadded) {
+		var err error
+		payload, padSize, err = http2readByte(payload)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if int(padSize) > len(payload) {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	f.data = payload[:len(payload)-int(padSize)]
+	return f, nil
+}
+
+var http2errStreamID = errors.New("invalid streamid")
+
+func http2validStreamID(streamID uint32) bool {
+	return streamID != 0 && streamID&(1<<31) == 0
+}
+
+// WriteData writes a DATA frame.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteData(streamID uint32, endStream bool, data []byte) error {
+
+	if !http2validStreamID(streamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	var flags http2Flags
+	if endStream {
+		flags |= http2FlagDataEndStream
+	}
+	f.startWrite(http2FrameData, flags, streamID)
+	f.wbuf = append(f.wbuf, data...)
+	return f.endWrite()
+}
+
+// A SettingsFrame conveys configuration parameters that affect how
+// endpoints communicate, such as preferences and constraints on peer
+// behavior.
+//
+// See http://http2.github.io/http2-spec/#SETTINGS
+type http2SettingsFrame struct {
+	http2FrameHeader
+	p []byte
+}
+
+func http2parseSettingsFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	if fh.Flags.Has(http2FlagSettingsAck) && fh.Length > 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	if fh.StreamID != 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	if len(p)%6 != 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	f := &http2SettingsFrame{http2FrameHeader: fh, p: p}
+	if v, ok := f.Value(http2SettingInitialWindowSize); ok && v > (1<<31)-1 {
+
+		return nil, http2ConnectionError(http2ErrCodeFlowControl)
+	}
+	return f, nil
+}
+
+func (f *http2SettingsFrame) IsAck() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagSettingsAck)
+}
+
+func (f *http2SettingsFrame) Value(s http2SettingID) (v uint32, ok bool) {
+	f.checkValid()
+	buf := f.p
+	for len(buf) > 0 {
+		settingID := http2SettingID(binary.BigEndian.Uint16(buf[:2]))
+		if settingID == s {
+			return binary.BigEndian.Uint32(buf[2:6]), true
+		}
+		buf = buf[6:]
+	}
+	return 0, false
+}
+
+// ForeachSetting runs fn for each setting.
+// It stops and returns the first error.
+func (f *http2SettingsFrame) ForeachSetting(fn func(http2Setting) error) error {
+	f.checkValid()
+	buf := f.p
+	for len(buf) > 0 {
+		if err := fn(http2Setting{
+			http2SettingID(binary.BigEndian.Uint16(buf[:2])),
+			binary.BigEndian.Uint32(buf[2:6]),
+		}); err != nil {
+			return err
+		}
+		buf = buf[6:]
+	}
+	return nil
+}
+
+// WriteSettings writes a SETTINGS frame with zero or more settings
+// specified and the ACK bit not set.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteSettings(settings ...http2Setting) error {
+	f.startWrite(http2FrameSettings, 0, 0)
+	for _, s := range settings {
+		f.writeUint16(uint16(s.ID))
+		f.writeUint32(s.Val)
+	}
+	return f.endWrite()
+}
+
+// WriteSettings writes an empty SETTINGS frame with the ACK bit set.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteSettingsAck() error {
+	f.startWrite(http2FrameSettings, http2FlagSettingsAck, 0)
+	return f.endWrite()
+}
+
+// A PingFrame is a mechanism for measuring a minimal round trip time
+// from the sender, as well as determining whether an idle connection
+// is still functional.
+// See http://http2.github.io/http2-spec/#rfc.section.6.7
+type http2PingFrame struct {
+	http2FrameHeader
+	Data [8]byte
+}
+
+func http2parsePingFrame(fh http2FrameHeader, payload []byte) (http2Frame, error) {
+	if len(payload) != 8 {
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	if fh.StreamID != 0 {
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	f := &http2PingFrame{http2FrameHeader: fh}
+	copy(f.Data[:], payload)
+	return f, nil
+}
+
+func (f *http2Framer) WritePing(ack bool, data [8]byte) error {
+	var flags http2Flags
+	if ack {
+		flags = http2FlagPingAck
+	}
+	f.startWrite(http2FramePing, flags, 0)
+	f.writeBytes(data[:])
+	return f.endWrite()
+}
+
+// A GoAwayFrame informs the remote peer to stop creating streams on this connection.
+// See http://http2.github.io/http2-spec/#rfc.section.6.8
+type http2GoAwayFrame struct {
+	http2FrameHeader
+	LastStreamID uint32
+	ErrCode      http2ErrCode
+	debugData    []byte
+}
+
+// DebugData returns any debug data in the GOAWAY frame. Its contents
+// are not defined.
+// The caller must not retain the returned memory past the next
+// call to ReadFrame.
+func (f *http2GoAwayFrame) DebugData() []byte {
+	f.checkValid()
+	return f.debugData
+}
+
+func http2parseGoAwayFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	if fh.StreamID != 0 {
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	if len(p) < 8 {
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	return &http2GoAwayFrame{
+		http2FrameHeader: fh,
+		LastStreamID:     binary.BigEndian.Uint32(p[:4]) & (1<<31 - 1),
+		ErrCode:          http2ErrCode(binary.BigEndian.Uint32(p[4:8])),
+		debugData:        p[8:],
+	}, nil
+}
+
+func (f *http2Framer) WriteGoAway(maxStreamID uint32, code http2ErrCode, debugData []byte) error {
+	f.startWrite(http2FrameGoAway, 0, 0)
+	f.writeUint32(maxStreamID & (1<<31 - 1))
+	f.writeUint32(uint32(code))
+	f.writeBytes(debugData)
+	return f.endWrite()
+}
+
+// An UnknownFrame is the frame type returned when the frame type is unknown
+// or no specific frame type parser exists.
+type http2UnknownFrame struct {
+	http2FrameHeader
+	p []byte
+}
+
+// Payload returns the frame's payload (after the header).  It is not
+// valid to call this method after a subsequent call to
+// Framer.ReadFrame, nor is it valid to retain the returned slice.
+// The memory is owned by the Framer and is invalidated when the next
+// frame is read.
+func (f *http2UnknownFrame) Payload() []byte {
+	f.checkValid()
+	return f.p
+}
+
+func http2parseUnknownFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	return &http2UnknownFrame{fh, p}, nil
+}
+
+// A WindowUpdateFrame is used to implement flow control.
+// See http://http2.github.io/http2-spec/#rfc.section.6.9
+type http2WindowUpdateFrame struct {
+	http2FrameHeader
+	Increment uint32
+}
+
+func http2parseWindowUpdateFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	if len(p) != 4 {
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	inc := binary.BigEndian.Uint32(p[:4]) & 0x7fffffff
+	if inc == 0 {
+
+		if fh.StreamID == 0 {
+			return nil, http2ConnectionError(http2ErrCodeProtocol)
+		}
+		return nil, http2StreamError{fh.StreamID, http2ErrCodeProtocol}
+	}
+	return &http2WindowUpdateFrame{
+		http2FrameHeader: fh,
+		Increment:        inc,
+	}, nil
+}
+
+// WriteWindowUpdate writes a WINDOW_UPDATE frame.
+// The increment value must be between 1 and 2,147,483,647, inclusive.
+// If the Stream ID is zero, the window update applies to the
+// connection as a whole.
+func (f *http2Framer) WriteWindowUpdate(streamID, incr uint32) error {
+
+	if (incr < 1 || incr > 2147483647) && !f.AllowIllegalWrites {
+		return errors.New("illegal window increment value")
+	}
+	f.startWrite(http2FrameWindowUpdate, 0, streamID)
+	f.writeUint32(incr)
+	return f.endWrite()
+}
+
+// A HeadersFrame is used to open a stream and additionally carries a
+// header block fragment.
+type http2HeadersFrame struct {
+	http2FrameHeader
+
+	// Priority is set if FlagHeadersPriority is set in the FrameHeader.
+	Priority http2PriorityParam
+
+	headerFragBuf []byte // not owned
+}
+
+func (f *http2HeadersFrame) HeaderBlockFragment() []byte {
+	f.checkValid()
+	return f.headerFragBuf
+}
+
+func (f *http2HeadersFrame) HeadersEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagHeadersEndHeaders)
+}
+
+func (f *http2HeadersFrame) StreamEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagHeadersEndStream)
+}
+
+func (f *http2HeadersFrame) HasPriority() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagHeadersPriority)
+}
+
+func http2parseHeadersFrame(fh http2FrameHeader, p []byte) (_ http2Frame, err error) {
+	hf := &http2HeadersFrame{
+		http2FrameHeader: fh,
+	}
+	if fh.StreamID == 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	var padLength uint8
+	if fh.Flags.Has(http2FlagHeadersPadded) {
+		if p, padLength, err = http2readByte(p); err != nil {
+			return
+		}
+	}
+	if fh.Flags.Has(http2FlagHeadersPriority) {
+		var v uint32
+		p, v, err = http2readUint32(p)
+		if err != nil {
+			return nil, err
+		}
+		hf.Priority.StreamDep = v & 0x7fffffff
+		hf.Priority.Exclusive = (v != hf.Priority.StreamDep)
+		p, hf.Priority.Weight, err = http2readByte(p)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if len(p)-int(padLength) <= 0 {
+		return nil, http2StreamError{fh.StreamID, http2ErrCodeProtocol}
+	}
+	hf.headerFragBuf = p[:len(p)-int(padLength)]
+	return hf, nil
+}
+
+// HeadersFrameParam are the parameters for writing a HEADERS frame.
+type http2HeadersFrameParam struct {
+	// StreamID is the required Stream ID to initiate.
+	StreamID uint32
+	// BlockFragment is part (or all) of a Header Block.
+	BlockFragment []byte
+
+	// EndStream indicates that the header block is the last that
+	// the endpoint will send for the identified stream. Setting
+	// this flag causes the stream to enter one of "half closed"
+	// states.
+	EndStream bool
+
+	// EndHeaders indicates that this frame contains an entire
+	// header block and is not followed by any
+	// CONTINUATION frames.
+	EndHeaders bool
+
+	// PadLength is the optional number of bytes of zeros to add
+	// to this frame.
+	PadLength uint8
+
+	// Priority, if non-zero, includes stream priority information
+	// in the HEADER frame.
+	Priority http2PriorityParam
+}
+
+// WriteHeaders writes a single HEADERS frame.
+//
+// This is a low-level header writing method. Encoding headers and
+// splitting them into any necessary CONTINUATION frames is handled
+// elsewhere.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteHeaders(p http2HeadersFrameParam) error {
+	if !http2validStreamID(p.StreamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	var flags http2Flags
+	if p.PadLength != 0 {
+		flags |= http2FlagHeadersPadded
+	}
+	if p.EndStream {
+		flags |= http2FlagHeadersEndStream
+	}
+	if p.EndHeaders {
+		flags |= http2FlagHeadersEndHeaders
+	}
+	if !p.Priority.IsZero() {
+		flags |= http2FlagHeadersPriority
+	}
+	f.startWrite(http2FrameHeaders, flags, p.StreamID)
+	if p.PadLength != 0 {
+		f.writeByte(p.PadLength)
+	}
+	if !p.Priority.IsZero() {
+		v := p.Priority.StreamDep
+		if !http2validStreamID(v) && !f.AllowIllegalWrites {
+			return errors.New("invalid dependent stream id")
+		}
+		if p.Priority.Exclusive {
+			v |= 1 << 31
+		}
+		f.writeUint32(v)
+		f.writeByte(p.Priority.Weight)
+	}
+	f.wbuf = append(f.wbuf, p.BlockFragment...)
+	f.wbuf = append(f.wbuf, http2padZeros[:p.PadLength]...)
+	return f.endWrite()
+}
+
+// A PriorityFrame specifies the sender-advised priority of a stream.
+// See http://http2.github.io/http2-spec/#rfc.section.6.3
+type http2PriorityFrame struct {
+	http2FrameHeader
+	http2PriorityParam
+}
+
+// PriorityParam are the stream prioritzation parameters.
+type http2PriorityParam struct {
+	// StreamDep is a 31-bit stream identifier for the
+	// stream that this stream depends on. Zero means no
+	// dependency.
+	StreamDep uint32
+
+	// Exclusive is whether the dependency is exclusive.
+	Exclusive bool
+
+	// Weight is the stream's zero-indexed weight. It should be
+	// set together with StreamDep, or neither should be set.  Per
+	// the spec, "Add one to the value to obtain a weight between
+	// 1 and 256."
+	Weight uint8
+}
+
+func (p http2PriorityParam) IsZero() bool {
+	return p == http2PriorityParam{}
+}
+
+func http2parsePriorityFrame(fh http2FrameHeader, payload []byte) (http2Frame, error) {
+	if fh.StreamID == 0 {
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	if len(payload) != 5 {
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	v := binary.BigEndian.Uint32(payload[:4])
+	streamID := v & 0x7fffffff
+	return &http2PriorityFrame{
+		http2FrameHeader: fh,
+		http2PriorityParam: http2PriorityParam{
+			Weight:    payload[4],
+			StreamDep: streamID,
+			Exclusive: streamID != v,
+		},
+	}, nil
+}
+
+// WritePriority writes a PRIORITY frame.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WritePriority(streamID uint32, p http2PriorityParam) error {
+	if !http2validStreamID(streamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	f.startWrite(http2FramePriority, 0, streamID)
+	v := p.StreamDep
+	if p.Exclusive {
+		v |= 1 << 31
+	}
+	f.writeUint32(v)
+	f.writeByte(p.Weight)
+	return f.endWrite()
+}
+
+// A RSTStreamFrame allows for abnormal termination of a stream.
+// See http://http2.github.io/http2-spec/#rfc.section.6.4
+type http2RSTStreamFrame struct {
+	http2FrameHeader
+	ErrCode http2ErrCode
+}
+
+func http2parseRSTStreamFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	if len(p) != 4 {
+		return nil, http2ConnectionError(http2ErrCodeFrameSize)
+	}
+	if fh.StreamID == 0 {
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	return &http2RSTStreamFrame{fh, http2ErrCode(binary.BigEndian.Uint32(p[:4]))}, nil
+}
+
+// WriteRSTStream writes a RST_STREAM frame.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteRSTStream(streamID uint32, code http2ErrCode) error {
+	if !http2validStreamID(streamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	f.startWrite(http2FrameRSTStream, 0, streamID)
+	f.writeUint32(uint32(code))
+	return f.endWrite()
+}
+
+// A ContinuationFrame is used to continue a sequence of header block fragments.
+// See http://http2.github.io/http2-spec/#rfc.section.6.10
+type http2ContinuationFrame struct {
+	http2FrameHeader
+	headerFragBuf []byte
+}
+
+func http2parseContinuationFrame(fh http2FrameHeader, p []byte) (http2Frame, error) {
+	return &http2ContinuationFrame{fh, p}, nil
+}
+
+func (f *http2ContinuationFrame) StreamEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagDataEndStream)
+}
+
+func (f *http2ContinuationFrame) HeaderBlockFragment() []byte {
+	f.checkValid()
+	return f.headerFragBuf
+}
+
+func (f *http2ContinuationFrame) HeadersEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagContinuationEndHeaders)
+}
+
+// WriteContinuation writes a CONTINUATION frame.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WriteContinuation(streamID uint32, endHeaders bool, headerBlockFragment []byte) error {
+	if !http2validStreamID(streamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	var flags http2Flags
+	if endHeaders {
+		flags |= http2FlagContinuationEndHeaders
+	}
+	f.startWrite(http2FrameContinuation, flags, streamID)
+	f.wbuf = append(f.wbuf, headerBlockFragment...)
+	return f.endWrite()
+}
+
+// A PushPromiseFrame is used to initiate a server stream.
+// See http://http2.github.io/http2-spec/#rfc.section.6.6
+type http2PushPromiseFrame struct {
+	http2FrameHeader
+	PromiseID     uint32
+	headerFragBuf []byte // not owned
+}
+
+func (f *http2PushPromiseFrame) HeaderBlockFragment() []byte {
+	f.checkValid()
+	return f.headerFragBuf
+}
+
+func (f *http2PushPromiseFrame) HeadersEnded() bool {
+	return f.http2FrameHeader.Flags.Has(http2FlagPushPromiseEndHeaders)
+}
+
+func http2parsePushPromise(fh http2FrameHeader, p []byte) (_ http2Frame, err error) {
+	pp := &http2PushPromiseFrame{
+		http2FrameHeader: fh,
+	}
+	if pp.StreamID == 0 {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	// The PUSH_PROMISE frame includes optional padding.
+	// Padding fields and flags are identical to those defined for DATA frames
+	var padLength uint8
+	if fh.Flags.Has(http2FlagPushPromisePadded) {
+		if p, padLength, err = http2readByte(p); err != nil {
+			return
+		}
+	}
+
+	p, pp.PromiseID, err = http2readUint32(p)
+	if err != nil {
+		return
+	}
+	pp.PromiseID = pp.PromiseID & (1<<31 - 1)
+
+	if int(padLength) > len(p) {
+
+		return nil, http2ConnectionError(http2ErrCodeProtocol)
+	}
+	pp.headerFragBuf = p[:len(p)-int(padLength)]
+	return pp, nil
+}
+
+// PushPromiseParam are the parameters for writing a PUSH_PROMISE frame.
+type http2PushPromiseParam struct {
+	// StreamID is the required Stream ID to initiate.
+	StreamID uint32
+
+	// PromiseID is the required Stream ID which this
+	// Push Promises
+	PromiseID uint32
+
+	// BlockFragment is part (or all) of a Header Block.
+	BlockFragment []byte
+
+	// EndHeaders indicates that this frame contains an entire
+	// header block and is not followed by any
+	// CONTINUATION frames.
+	EndHeaders bool
+
+	// PadLength is the optional number of bytes of zeros to add
+	// to this frame.
+	PadLength uint8
+}
+
+// WritePushPromise writes a single PushPromise Frame.
+//
+// As with Header Frames, This is the low level call for writing
+// individual frames. Continuation frames are handled elsewhere.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility to not call other Write methods concurrently.
+func (f *http2Framer) WritePushPromise(p http2PushPromiseParam) error {
+	if !http2validStreamID(p.StreamID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	var flags http2Flags
+	if p.PadLength != 0 {
+		flags |= http2FlagPushPromisePadded
+	}
+	if p.EndHeaders {
+		flags |= http2FlagPushPromiseEndHeaders
+	}
+	f.startWrite(http2FramePushPromise, flags, p.StreamID)
+	if p.PadLength != 0 {
+		f.writeByte(p.PadLength)
+	}
+	if !http2validStreamID(p.PromiseID) && !f.AllowIllegalWrites {
+		return http2errStreamID
+	}
+	f.writeUint32(p.PromiseID)
+	f.wbuf = append(f.wbuf, p.BlockFragment...)
+	f.wbuf = append(f.wbuf, http2padZeros[:p.PadLength]...)
+	return f.endWrite()
+}
+
+// WriteRawFrame writes a raw frame. This can be used to write
+// extension frames unknown to this package.
+func (f *http2Framer) WriteRawFrame(t http2FrameType, flags http2Flags, streamID uint32, payload []byte) error {
+	f.startWrite(t, flags, streamID)
+	f.writeBytes(payload)
+	return f.endWrite()
+}
+
+func http2readByte(p []byte) (remain []byte, b byte, err error) {
+	if len(p) == 0 {
+		return nil, 0, io.ErrUnexpectedEOF
+	}
+	return p[1:], p[0], nil
+}
+
+func http2readUint32(p []byte) (remain []byte, v uint32, err error) {
+	if len(p) < 4 {
+		return nil, 0, io.ErrUnexpectedEOF
+	}
+	return p[4:], binary.BigEndian.Uint32(p[:4]), nil
+}
+
+type http2streamEnder interface {
+	StreamEnded() bool
+}
+
+type http2headersEnder interface {
+	HeadersEnded() bool
+}
+
+var http2DebugGoroutines = os.Getenv("DEBUG_HTTP2_GOROUTINES") == "1"
+
+type http2goroutineLock uint64
+
+func http2newGoroutineLock() http2goroutineLock {
+	if !http2DebugGoroutines {
+		return 0
+	}
+	return http2goroutineLock(http2curGoroutineID())
+}
+
+func (g http2goroutineLock) check() {
+	if !http2DebugGoroutines {
+		return
+	}
+	if http2curGoroutineID() != uint64(g) {
+		panic("running on the wrong goroutine")
+	}
+}
+
+func (g http2goroutineLock) checkNotOn() {
+	if !http2DebugGoroutines {
+		return
+	}
+	if http2curGoroutineID() == uint64(g) {
+		panic("running on the wrong goroutine")
+	}
+}
+
+var http2goroutineSpace = []byte("goroutine ")
+
+func http2curGoroutineID() uint64 {
+	bp := http2littleBuf.Get().(*[]byte)
+	defer http2littleBuf.Put(bp)
+	b := *bp
+	b = b[:runtime.Stack(b, false)]
+
+	b = bytes.TrimPrefix(b, http2goroutineSpace)
+	i := bytes.IndexByte(b, ' ')
+	if i < 0 {
+		panic(fmt.Sprintf("No space found in %q", b))
+	}
+	b = b[:i]
+	n, err := http2parseUintBytes(b, 10, 64)
+	if err != nil {
+		panic(fmt.Sprintf("Failed to parse goroutine ID out of %q: %v", b, err))
+	}
+	return n
+}
+
+var http2littleBuf = sync.Pool{
+	New: func() interface{} {
+		buf := make([]byte, 64)
+		return &buf
+	},
+}
+
+// parseUintBytes is like strconv.ParseUint, but using a []byte.
+func http2parseUintBytes(s []byte, base int, bitSize int) (n uint64, err error) {
+	var cutoff, maxVal uint64
+
+	if bitSize == 0 {
+		bitSize = int(strconv.IntSize)
+	}
+
+	s0 := s
+	switch {
+	case len(s) < 1:
+		err = strconv.ErrSyntax
+		goto Error
+
+	case 2 <= base && base <= 36:
+
+	case base == 0:
+
+		switch {
+		case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
+			base = 16
+			s = s[2:]
+			if len(s) < 1 {
+				err = strconv.ErrSyntax
+				goto Error
+			}
+		case s[0] == '0':
+			base = 8
+		default:
+			base = 10
+		}
+
+	default:
+		err = errors.New("invalid base " + strconv.Itoa(base))
+		goto Error
+	}
+
+	n = 0
+	cutoff = http2cutoff64(base)
+	maxVal = 1<<uint(bitSize) - 1
+
+	for i := 0; i < len(s); i++ {
+		var v byte
+		d := s[i]
+		switch {
+		case '0' <= d && d <= '9':
+			v = d - '0'
+		case 'a' <= d && d <= 'z':
+			v = d - 'a' + 10
+		case 'A' <= d && d <= 'Z':
+			v = d - 'A' + 10
+		default:
+			n = 0
+			err = strconv.ErrSyntax
+			goto Error
+		}
+		if int(v) >= base {
+			n = 0
+			err = strconv.ErrSyntax
+			goto Error
+		}
+
+		if n >= cutoff {
+
+			n = 1<<64 - 1
+			err = strconv.ErrRange
+			goto Error
+		}
+		n *= uint64(base)
+
+		n1 := n + uint64(v)
+		if n1 < n || n1 > maxVal {
+
+			n = 1<<64 - 1
+			err = strconv.ErrRange
+			goto Error
+		}
+		n = n1
+	}
+
+	return n, nil
+
+Error:
+	return n, &strconv.NumError{Func: "ParseUint", Num: string(s0), Err: err}
+}
+
+// Return the first number n such that n*base >= 1<<64.
+func http2cutoff64(base int) uint64 {
+	if base < 2 {
+		return 0
+	}
+	return (1<<64-1)/uint64(base) + 1
+}
+
+var (
+	http2commonLowerHeader = map[string]string{} // Go-Canonical-Case -> lower-case
+	http2commonCanonHeader = map[string]string{} // lower-case -> Go-Canonical-Case
+)
+
+func init() {
+	for _, v := range []string{
+		"accept",
+		"accept-charset",
+		"accept-encoding",
+		"accept-language",
+		"accept-ranges",
+		"age",
+		"access-control-allow-origin",
+		"allow",
+		"authorization",
+		"cache-control",
+		"content-disposition",
+		"content-encoding",
+		"content-language",
+		"content-length",
+		"content-location",
+		"content-range",
+		"content-type",
+		"cookie",
+		"date",
+		"etag",
+		"expect",
+		"expires",
+		"from",
+		"host",
+		"if-match",
+		"if-modified-since",
+		"if-none-match",
+		"if-unmodified-since",
+		"last-modified",
+		"link",
+		"location",
+		"max-forwards",
+		"proxy-authenticate",
+		"proxy-authorization",
+		"range",
+		"referer",
+		"refresh",
+		"retry-after",
+		"server",
+		"set-cookie",
+		"strict-transport-security",
+		"transfer-encoding",
+		"user-agent",
+		"vary",
+		"via",
+		"www-authenticate",
+	} {
+		chk := CanonicalHeaderKey(v)
+		http2commonLowerHeader[chk] = v
+		http2commonCanonHeader[v] = chk
+	}
+}
+
+func http2lowerHeader(v string) string {
+	if s, ok := http2commonLowerHeader[v]; ok {
+		return s
+	}
+	return strings.ToLower(v)
+}
+
+var http2VerboseLogs = false
+
+const (
+	// ClientPreface is the string that must be sent by new
+	// connections from clients.
+	http2ClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
+
+	// SETTINGS_MAX_FRAME_SIZE default
+	// http://http2.github.io/http2-spec/#rfc.section.6.5.2
+	http2initialMaxFrameSize = 16384
+
+	// NextProtoTLS is the NPN/ALPN protocol negotiated during
+	// HTTP/2's TLS setup.
+	http2NextProtoTLS = "h2"
+
+	// http://http2.github.io/http2-spec/#SettingValues
+	http2initialHeaderTableSize = 4096
+
+	http2initialWindowSize = 65535 // 6.9.2 Initial Flow Control Window Size
+
+	http2defaultMaxReadFrameSize = 1 << 20
+)
+
+var (
+	http2clientPreface = []byte(http2ClientPreface)
+)
+
+type http2streamState int
+
+const (
+	http2stateIdle http2streamState = iota
+	http2stateOpen
+	http2stateHalfClosedLocal
+	http2stateHalfClosedRemote
+	http2stateResvLocal
+	http2stateResvRemote
+	http2stateClosed
+)
+
+var http2stateName = [...]string{
+	http2stateIdle:             "Idle",
+	http2stateOpen:             "Open",
+	http2stateHalfClosedLocal:  "HalfClosedLocal",
+	http2stateHalfClosedRemote: "HalfClosedRemote",
+	http2stateResvLocal:        "ResvLocal",
+	http2stateResvRemote:       "ResvRemote",
+	http2stateClosed:           "Closed",
+}
+
+func (st http2streamState) String() string {
+	return http2stateName[st]
+}
+
+// Setting is a setting parameter: which setting it is, and its value.
+type http2Setting struct {
+	// ID is which setting is being set.
+	// See http://http2.github.io/http2-spec/#SettingValues
+	ID http2SettingID
+
+	// Val is the value.
+	Val uint32
+}
+
+func (s http2Setting) String() string {
+	return fmt.Sprintf("[%v = %d]", s.ID, s.Val)
+}
+
+// Valid reports whether the setting is valid.
+func (s http2Setting) Valid() error {
+
+	switch s.ID {
+	case http2SettingEnablePush:
+		if s.Val != 1 && s.Val != 0 {
+			return http2ConnectionError(http2ErrCodeProtocol)
+		}
+	case http2SettingInitialWindowSize:
+		if s.Val > 1<<31-1 {
+			return http2ConnectionError(http2ErrCodeFlowControl)
+		}
+	case http2SettingMaxFrameSize:
+		if s.Val < 16384 || s.Val > 1<<24-1 {
+			return http2ConnectionError(http2ErrCodeProtocol)
+		}
+	}
+	return nil
+}
+
+// A SettingID is an HTTP/2 setting as defined in
+// http://http2.github.io/http2-spec/#iana-settings
+type http2SettingID uint16
+
+const (
+	http2SettingHeaderTableSize      http2SettingID = 0x1
+	http2SettingEnablePush           http2SettingID = 0x2
+	http2SettingMaxConcurrentStreams http2SettingID = 0x3
+	http2SettingInitialWindowSize    http2SettingID = 0x4
+	http2SettingMaxFrameSize         http2SettingID = 0x5
+	http2SettingMaxHeaderListSize    http2SettingID = 0x6
+)
+
+var http2settingName = map[http2SettingID]string{
+	http2SettingHeaderTableSize:      "HEADER_TABLE_SIZE",
+	http2SettingEnablePush:           "ENABLE_PUSH",
+	http2SettingMaxConcurrentStreams: "MAX_CONCURRENT_STREAMS",
+	http2SettingInitialWindowSize:    "INITIAL_WINDOW_SIZE",
+	http2SettingMaxFrameSize:         "MAX_FRAME_SIZE",
+	http2SettingMaxHeaderListSize:    "MAX_HEADER_LIST_SIZE",
+}
+
+func (s http2SettingID) String() string {
+	if v, ok := http2settingName[s]; ok {
+		return v
+	}
+	return fmt.Sprintf("UNKNOWN_SETTING_%d", uint16(s))
+}
+
+func http2validHeader(v string) bool {
+	if len(v) == 0 {
+		return false
+	}
+	for _, r := range v {
+
+		if r >= 127 || ('A' <= r && r <= 'Z') {
+			return false
+		}
+	}
+	return true
+}
+
+var http2httpCodeStringCommon = map[int]string{} // n -> strconv.Itoa(n)
+
+func init() {
+	for i := 100; i <= 999; i++ {
+		if v := StatusText(i); v != "" {
+			http2httpCodeStringCommon[i] = strconv.Itoa(i)
+		}
+	}
+}
+
+func http2httpCodeString(code int) string {
+	if s, ok := http2httpCodeStringCommon[code]; ok {
+		return s
+	}
+	return strconv.Itoa(code)
+}
+
+// from pkg io
+type http2stringWriter interface {
+	WriteString(s string) (n int, err error)
+}
+
+// A gate lets two goroutines coordinate their activities.
+type http2gate chan struct{}
+
+func (g http2gate) Done() { g <- struct{}{} }
+
+func (g http2gate) Wait() { <-g }
+
+// A closeWaiter is like a sync.WaitGroup but only goes 1 to 0 (open to closed).
+type http2closeWaiter chan struct{}
+
+// Init makes a closeWaiter usable.
+// It exists because so a closeWaiter value can be placed inside a
+// larger struct and have the Mutex and Cond's memory in the same
+// allocation.
+func (cw *http2closeWaiter) Init() {
+	*cw = make(chan struct{})
+}
+
+// Close marks the closeWaiter as closed and unblocks any waiters.
+func (cw http2closeWaiter) Close() {
+	close(cw)
+}
+
+// Wait waits for the closeWaiter to become closed.
+func (cw http2closeWaiter) Wait() {
+	<-cw
+}
+
+// bufferedWriter is a buffered writer that writes to w.
+// Its buffered writer is lazily allocated as needed, to minimize
+// idle memory usage with many connections.
+type http2bufferedWriter struct {
+	w  io.Writer     // immutable
+	bw *bufio.Writer // non-nil when data is buffered
+}
+
+func http2newBufferedWriter(w io.Writer) *http2bufferedWriter {
+	return &http2bufferedWriter{w: w}
+}
+
+var http2bufWriterPool = sync.Pool{
+	New: func() interface{} {
+
+		return bufio.NewWriterSize(nil, 4<<10)
+	},
+}
+
+func (w *http2bufferedWriter) Write(p []byte) (n int, err error) {
+	if w.bw == nil {
+		bw := http2bufWriterPool.Get().(*bufio.Writer)
+		bw.Reset(w.w)
+		w.bw = bw
+	}
+	return w.bw.Write(p)
+}
+
+func (w *http2bufferedWriter) Flush() error {
+	bw := w.bw
+	if bw == nil {
+		return nil
+	}
+	err := bw.Flush()
+	bw.Reset(nil)
+	http2bufWriterPool.Put(bw)
+	w.bw = nil
+	return err
+}
+
+type http2pipe struct {
+	b http2buffer
+	c sync.Cond
+	m sync.Mutex
+}
+
+// Read waits until data is available and copies bytes
+// from the buffer into p.
+func (r *http2pipe) Read(p []byte) (n int, err error) {
+	r.c.L.Lock()
+	defer r.c.L.Unlock()
+	for r.b.Len() == 0 && !r.b.closed {
+		r.c.Wait()
+	}
+	return r.b.Read(p)
+}
+
+// Write copies bytes from p into the buffer and wakes a reader.
+// It is an error to write more data than the buffer can hold.
+func (w *http2pipe) Write(p []byte) (n int, err error) {
+	w.c.L.Lock()
+	defer w.c.L.Unlock()
+	defer w.c.Signal()
+	return w.b.Write(p)
+}
+
+func (c *http2pipe) Close(err error) {
+	c.c.L.Lock()
+	defer c.c.L.Unlock()
+	defer c.c.Signal()
+	c.b.Close(err)
+}
+
+const (
+	http2prefaceTimeout        = 10 * time.Second
+	http2firstSettingsTimeout  = 2 * time.Second // should be in-flight with preface anyway
+	http2handlerChunkWriteSize = 4 << 10
+	http2defaultMaxStreams     = 250 // TODO: make this 100 as the GFE seems to?
+)
+
+var (
+	http2errClientDisconnected = errors.New("client disconnected")
+	http2errClosedBody         = errors.New("body closed by handler")
+	http2errStreamBroken       = errors.New("http2: stream broken")
+)
+
+var http2responseWriterStatePool = sync.Pool{
+	New: func() interface{} {
+		rws := &http2responseWriterState{}
+		rws.bw = bufio.NewWriterSize(http2chunkWriter{rws}, http2handlerChunkWriteSize)
+		return rws
+	},
+}
+
+// Test hooks.
+var (
+	http2testHookOnConn        func()
+	http2testHookGetServerConn func(*http2serverConn)
+	http2testHookOnPanicMu     *sync.Mutex // nil except in tests
+	http2testHookOnPanic       func(sc *http2serverConn, panicVal interface{}) (rePanic bool)
+)
+
+// Server is an HTTP/2 server.
+type http2Server struct {
+	// MaxHandlers limits the number of http.Handler ServeHTTP goroutines
+	// which may run at a time over all connections.
+	// Negative or zero no limit.
+	// TODO: implement
+	MaxHandlers int
+
+	// MaxConcurrentStreams optionally specifies the number of
+	// concurrent streams that each client may have open at a
+	// time. This is unrelated to the number of http.Handler goroutines
+	// which may be active globally, which is MaxHandlers.
+	// If zero, MaxConcurrentStreams defaults to at least 100, per
+	// the HTTP/2 spec's recommendations.
+	MaxConcurrentStreams uint32
+
+	// MaxReadFrameSize optionally specifies the largest frame
+	// this server is willing to read. A valid value is between
+	// 16k and 16M, inclusive. If zero or otherwise invalid, a
+	// default value is used.
+	MaxReadFrameSize uint32
+
+	// PermitProhibitedCipherSuites, if true, permits the use of
+	// cipher suites prohibited by the HTTP/2 spec.
+	PermitProhibitedCipherSuites bool
+}
+
+func (s *http2Server) maxReadFrameSize() uint32 {
+	if v := s.MaxReadFrameSize; v >= http2minMaxFrameSize && v <= http2maxFrameSize {
+		return v
+	}
+	return http2defaultMaxReadFrameSize
+}
+
+func (s *http2Server) maxConcurrentStreams() uint32 {
+	if v := s.MaxConcurrentStreams; v > 0 {
+		return v
+	}
+	return http2defaultMaxStreams
+}
+
+// ConfigureServer adds HTTP/2 support to a net/http Server.
+//
+// The configuration conf may be nil.
+//
+// ConfigureServer must be called before s begins serving.
+func http2ConfigureServer(s *Server, conf *http2Server) {
+	if conf == nil {
+		conf = new(http2Server)
+	}
+	if s.TLSConfig == nil {
+		s.TLSConfig = new(tls.Config)
+	}
+
+	if s.TLSConfig.CipherSuites != nil {
+		const requiredCipher = tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
+		haveRequired := false
+		for _, v := range s.TLSConfig.CipherSuites {
+			if v == requiredCipher {
+				haveRequired = true
+				break
+			}
+		}
+		if !haveRequired {
+			s.TLSConfig.CipherSuites = append(s.TLSConfig.CipherSuites, requiredCipher)
+		}
+	}
+
+	haveNPN := false
+	for _, p := range s.TLSConfig.NextProtos {
+		if p == http2NextProtoTLS {
+			haveNPN = true
+			break
+		}
+	}
+	if !haveNPN {
+		s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, http2NextProtoTLS)
+	}
+
+	s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, "h2-14")
+
+	if s.TLSNextProto == nil {
+		s.TLSNextProto = map[string]func(*Server, *tls.Conn, Handler){}
+	}
+	protoHandler := func(hs *Server, c *tls.Conn, h Handler) {
+		if http2testHookOnConn != nil {
+			http2testHookOnConn()
+		}
+		conf.handleConn(hs, c, h)
+	}
+	s.TLSNextProto[http2NextProtoTLS] = protoHandler
+	s.TLSNextProto["h2-14"] = protoHandler
+}
+
+func (srv *http2Server) handleConn(hs *Server, c net.Conn, h Handler) {
+	sc := &http2serverConn{
+		srv:              srv,
+		hs:               hs,
+		conn:             c,
+		remoteAddrStr:    c.RemoteAddr().String(),
+		bw:               http2newBufferedWriter(c),
+		handler:          h,
+		streams:          make(map[uint32]*http2stream),
+		readFrameCh:      make(chan http2readFrameResult),
+		wantWriteFrameCh: make(chan http2frameWriteMsg, 8),
+		wroteFrameCh:     make(chan struct{}, 1),
+		bodyReadCh:       make(chan http2bodyReadMsg),
+		doneServing:      make(chan struct{}),
+		advMaxStreams:    srv.maxConcurrentStreams(),
+		writeSched: http2writeScheduler{
+			maxFrameSize: http2initialMaxFrameSize,
+		},
+		initialWindowSize: http2initialWindowSize,
+		headerTableSize:   http2initialHeaderTableSize,
+		serveG:            http2newGoroutineLock(),
+		pushEnabled:       true,
+	}
+	sc.flow.add(http2initialWindowSize)
+	sc.inflow.add(http2initialWindowSize)
+	sc.hpackEncoder = hpack.NewEncoder(&sc.headerWriteBuf)
+	sc.hpackDecoder = hpack.NewDecoder(http2initialHeaderTableSize, sc.onNewHeaderField)
+	sc.hpackDecoder.SetMaxStringLength(sc.maxHeaderStringLen())
+
+	fr := http2NewFramer(sc.bw, c)
+	fr.SetMaxReadFrameSize(srv.maxReadFrameSize())
+	sc.framer = fr
+
+	if tc, ok := c.(*tls.Conn); ok {
+		sc.tlsState = new(tls.ConnectionState)
+		*sc.tlsState = tc.ConnectionState()
+
+		if sc.tlsState.Version < tls.VersionTLS12 {
+			sc.rejectConn(http2ErrCodeInadequateSecurity, "TLS version too low")
+			return
+		}
+
+		if sc.tlsState.ServerName == "" {
+
+		}
+
+		if !srv.PermitProhibitedCipherSuites && http2isBadCipher(sc.tlsState.CipherSuite) {
+
+			sc.rejectConn(http2ErrCodeInadequateSecurity, fmt.Sprintf("Prohibited TLS 1.2 Cipher Suite: %x", sc.tlsState.CipherSuite))
+			return
+		}
+	}
+
+	if hook := http2testHookGetServerConn; hook != nil {
+		hook(sc)
+	}
+	sc.serve()
+}
+
+// isBadCipher reports whether the cipher is blacklisted by the HTTP/2 spec.
+func http2isBadCipher(cipher uint16) bool {
+	switch cipher {
+	case tls.TLS_RSA_WITH_RC4_128_SHA,
+		tls.TLS_RSA_WITH_3DES_EDE_CBC_SHA,
+		tls.TLS_RSA_WITH_AES_128_CBC_SHA,
+		tls.TLS_RSA_WITH_AES_256_CBC_SHA,
+		tls.TLS_ECDHE_ECDSA_WITH_RC4_128_SHA,
+		tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
+		tls.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
+		tls.TLS_ECDHE_RSA_WITH_RC4_128_SHA,
+		tls.TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+		tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+		tls.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:
+
+		return true
+	default:
+		return false
+	}
+}
+
+func (sc *http2serverConn) rejectConn(err http2ErrCode, debug string) {
+	sc.vlogf("REJECTING conn: %v, %s", err, debug)
+
+	sc.framer.WriteGoAway(0, err, []byte(debug))
+	sc.bw.Flush()
+	sc.conn.Close()
+}
+
+type http2serverConn struct {
+	// Immutable:
+	srv              *http2Server
+	hs               *Server
+	conn             net.Conn
+	bw               *http2bufferedWriter // writing to conn
+	handler          Handler
+	framer           *http2Framer
+	hpackDecoder     *hpack.Decoder
+	doneServing      chan struct{}             // closed when serverConn.serve ends
+	readFrameCh      chan http2readFrameResult // written by serverConn.readFrames
+	wantWriteFrameCh chan http2frameWriteMsg   // from handlers -> serve
+	wroteFrameCh     chan struct{}             // from writeFrameAsync -> serve, tickles more frame writes
+	bodyReadCh       chan http2bodyReadMsg     // from handlers -> serve
+	testHookCh       chan func(int)            // code to run on the serve loop
+	flow             http2flow                 // conn-wide (not stream-specific) outbound flow control
+	inflow           http2flow                 // conn-wide inbound flow control
+	tlsState         *tls.ConnectionState      // shared by all handlers, like net/http
+	remoteAddrStr    string
+
+	// Everything following is owned by the serve loop; use serveG.check():
+	serveG                http2goroutineLock // used to verify funcs are on serve()
+	pushEnabled           bool
+	sawFirstSettings      bool // got the initial SETTINGS frame after the preface
+	needToSendSettingsAck bool
+	unackedSettings       int    // how many SETTINGS have we sent without ACKs?
+	clientMaxStreams      uint32 // SETTINGS_MAX_CONCURRENT_STREAMS from client (our PUSH_PROMISE limit)
+	advMaxStreams         uint32 // our SETTINGS_MAX_CONCURRENT_STREAMS advertised the client
+	curOpenStreams        uint32 // client's number of open streams
+	maxStreamID           uint32 // max ever seen
+	streams               map[uint32]*http2stream
+	initialWindowSize     int32
+	headerTableSize       uint32
+	peerMaxHeaderListSize uint32            // zero means unknown (default)
+	canonHeader           map[string]string // http2-lower-case -> Go-Canonical-Case
+	req                   http2requestParam // non-zero while reading request headers
+	writingFrame          bool              // started write goroutine but haven't heard back on wroteFrameCh
+	needsFrameFlush       bool              // last frame write wasn't a flush
+	writeSched            http2writeScheduler
+	inGoAway              bool // we've started to or sent GOAWAY
+	needToSendGoAway      bool // we need to schedule a GOAWAY frame write
+	goAwayCode            http2ErrCode
+	shutdownTimerCh       <-chan time.Time // nil until used
+	shutdownTimer         *time.Timer      // nil until used
+
+	// Owned by the writeFrameAsync goroutine:
+	headerWriteBuf bytes.Buffer
+	hpackEncoder   *hpack.Encoder
+}
+
+func (sc *http2serverConn) maxHeaderStringLen() int {
+	v := sc.maxHeaderListSize()
+	if uint32(int(v)) == v {
+		return int(v)
+	}
+
+	return 0
+}
+
+func (sc *http2serverConn) maxHeaderListSize() uint32 {
+	n := sc.hs.MaxHeaderBytes
+	if n <= 0 {
+		n = DefaultMaxHeaderBytes
+	}
+	// http2's count is in a slightly different unit and includes 32 bytes per pair.
+	// So, take the net/http.Server value and pad it up a bit, assuming 10 headers.
+	const perFieldOverhead = 32 // per http2 spec
+	const typicalHeaders = 10   // conservative
+	return uint32(n + typicalHeaders*perFieldOverhead)
+}
+
+// requestParam is the state of the next request, initialized over
+// potentially several frames HEADERS + zero or more CONTINUATION
+// frames.
+type http2requestParam struct {
+	// stream is non-nil if we're reading (HEADER or CONTINUATION)
+	// frames for a request (but not DATA).
+	stream            *http2stream
+	header            Header
+	method, path      string
+	scheme, authority string
+	sawRegularHeader  bool  // saw a non-pseudo header already
+	invalidHeader     bool  // an invalid header was seen
+	headerListSize    int64 // actually uint32, but easier math this way
+}
+
+// stream represents a stream. This is the minimal metadata needed by
+// the serve goroutine. Most of the actual stream state is owned by
+// the http.Handler's goroutine in the responseWriter. Because the
+// responseWriter's responseWriterState is recycled at the end of a
+// handler, this struct intentionally has no pointer to the
+// *responseWriter{,State} itself, as the Handler ending nils out the
+// responseWriter's state field.
+type http2stream struct {
+	// immutable:
+	id   uint32
+	body *http2pipe       // non-nil if expecting DATA frames
+	cw   http2closeWaiter // closed wait stream transitions to closed state
+
+	// owned by serverConn's serve loop:
+	bodyBytes     int64        // body bytes seen so far
+	declBodyBytes int64        // or -1 if undeclared
+	flow          http2flow    // limits writing from Handler to client
+	inflow        http2flow    // what the client is allowed to POST/etc to us
+	parent        *http2stream // or nil
+	weight        uint8
+	state         http2streamState
+	sentReset     bool // only true once detached from streams map
+	gotReset      bool // only true once detacted from streams map
+}
+
+func (sc *http2serverConn) Framer() *http2Framer { return sc.framer }
+
+func (sc *http2serverConn) CloseConn() error { return sc.conn.Close() }
+
+func (sc *http2serverConn) Flush() error { return sc.bw.Flush() }
+
+func (sc *http2serverConn) HeaderEncoder() (*hpack.Encoder, *bytes.Buffer) {
+	return sc.hpackEncoder, &sc.headerWriteBuf
+}
+
+func (sc *http2serverConn) state(streamID uint32) (http2streamState, *http2stream) {
+	sc.serveG.check()
+
+	if st, ok := sc.streams[streamID]; ok {
+		return st.state, st
+	}
+
+	if streamID <= sc.maxStreamID {
+		return http2stateClosed, nil
+	}
+	return http2stateIdle, nil
+}
+
+func (sc *http2serverConn) vlogf(format string, args ...interface{}) {
+	if http2VerboseLogs {
+		sc.logf(format, args...)
+	}
+}
+
+func (sc *http2serverConn) logf(format string, args ...interface{}) {
+	if lg := sc.hs.ErrorLog; lg != nil {
+		lg.Printf(format, args...)
+	} else {
+		log.Printf(format, args...)
+	}
+}
+
+func (sc *http2serverConn) condlogf(err error, format string, args ...interface{}) {
+	if err == nil {
+		return
+	}
+	str := err.Error()
+	if err == io.EOF || strings.Contains(str, "use of closed network connection") {
+
+		sc.vlogf(format, args...)
+	} else {
+		sc.logf(format, args...)
+	}
+}
+
+func (sc *http2serverConn) onNewHeaderField(f hpack.HeaderField) {
+	sc.serveG.check()
+	sc.vlogf("got header field %+v", f)
+	switch {
+	case !http2validHeader(f.Name):
+		sc.req.invalidHeader = true
+	case strings.HasPrefix(f.Name, ":"):
+		if sc.req.sawRegularHeader {
+			sc.logf("pseudo-header after regular header")
+			sc.req.invalidHeader = true
+			return
+		}
+		var dst *string
+		switch f.Name {
+		case ":method":
+			dst = &sc.req.method
+		case ":path":
+			dst = &sc.req.path
+		case ":scheme":
+			dst = &sc.req.scheme
+		case ":authority":
+			dst = &sc.req.authority
+		default:
+
+			sc.logf("invalid pseudo-header %q", f.Name)
+			sc.req.invalidHeader = true
+			return
+		}
+		if *dst != "" {
+			sc.logf("duplicate pseudo-header %q sent", f.Name)
+			sc.req.invalidHeader = true
+			return
+		}
+		*dst = f.Value
+	default:
+		sc.req.sawRegularHeader = true
+		sc.req.header.Add(sc.canonicalHeader(f.Name), f.Value)
+		const headerFieldOverhead = 32 // per spec
+		sc.req.headerListSize += int64(len(f.Name)) + int64(len(f.Value)) + headerFieldOverhead
+		if sc.req.headerListSize > int64(sc.maxHeaderListSize()) {
+			sc.hpackDecoder.SetEmitEnabled(false)
+		}
+	}
+}
+
+func (sc *http2serverConn) canonicalHeader(v string) string {
+	sc.serveG.check()
+	cv, ok := http2commonCanonHeader[v]
+	if ok {
+		return cv
+	}
+	cv, ok = sc.canonHeader[v]
+	if ok {
+		return cv
+	}
+	if sc.canonHeader == nil {
+		sc.canonHeader = make(map[string]string)
+	}
+	cv = CanonicalHeaderKey(v)
+	sc.canonHeader[v] = cv
+	return cv
+}
+
+type http2readFrameResult struct {
+	f   http2Frame // valid until readMore is called
+	err error
+
+	// readMore should be called once the consumer no longer needs or
+	// retains f. After readMore, f is invalid and more frames can be
+	// read.
+	readMore func()
+}
+
+// readFrames is the loop that reads incoming frames.
+// It takes care to only read one frame at a time, blocking until the
+// consumer is done with the frame.
+// It's run on its own goroutine.
+func (sc *http2serverConn) readFrames() {
+	gate := make(http2gate)
+	for {
+		f, err := sc.framer.ReadFrame()
+		select {
+		case sc.readFrameCh <- http2readFrameResult{f, err, gate.Done}:
+		case <-sc.doneServing:
+			return
+		}
+		select {
+		case <-gate:
+		case <-sc.doneServing:
+			return
+		}
+	}
+}
+
+// writeFrameAsync runs in its own goroutine and writes a single frame
+// and then reports when it's done.
+// At most one goroutine can be running writeFrameAsync at a time per
+// serverConn.
+func (sc *http2serverConn) writeFrameAsync(wm http2frameWriteMsg) {
+	err := wm.write.writeFrame(sc)
+	if ch := wm.done; ch != nil {
+		select {
+		case ch <- err:
+		default:
+			panic(fmt.Sprintf("unbuffered done channel passed in for type %T", wm.write))
+		}
+	}
+	sc.wroteFrameCh <- struct{}{}
+}
+
+func (sc *http2serverConn) closeAllStreamsOnConnClose() {
+	sc.serveG.check()
+	for _, st := range sc.streams {
+		sc.closeStream(st, http2errClientDisconnected)
+	}
+}
+
+func (sc *http2serverConn) stopShutdownTimer() {
+	sc.serveG.check()
+	if t := sc.shutdownTimer; t != nil {
+		t.Stop()
+	}
+}
+
+func (sc *http2serverConn) notePanic() {
+	if http2testHookOnPanicMu != nil {
+		http2testHookOnPanicMu.Lock()
+		defer http2testHookOnPanicMu.Unlock()
+	}
+	if http2testHookOnPanic != nil {
+		if e := recover(); e != nil {
+			if http2testHookOnPanic(sc, e) {
+				panic(e)
+			}
+		}
+	}
+}
+
+func (sc *http2serverConn) serve() {
+	sc.serveG.check()
+	defer sc.notePanic()
+	defer sc.conn.Close()
+	defer sc.closeAllStreamsOnConnClose()
+	defer sc.stopShutdownTimer()
+	defer close(sc.doneServing)
+
+	sc.vlogf("HTTP/2 connection from %v on %p", sc.conn.RemoteAddr(), sc.hs)
+
+	sc.writeFrame(http2frameWriteMsg{
+		write: http2writeSettings{
+			{http2SettingMaxFrameSize, sc.srv.maxReadFrameSize()},
+			{http2SettingMaxConcurrentStreams, sc.advMaxStreams},
+			{http2SettingMaxHeaderListSize, sc.maxHeaderListSize()},
+		},
+	})
+	sc.unackedSettings++
+
+	if err := sc.readPreface(); err != nil {
+		sc.condlogf(err, "error reading preface from client %v: %v", sc.conn.RemoteAddr(), err)
+		return
+	}
+
+	go sc.readFrames()
+
+	settingsTimer := time.NewTimer(http2firstSettingsTimeout)
+	loopNum := 0
+	for {
+		loopNum++
+		select {
+		case wm := <-sc.wantWriteFrameCh:
+			sc.writeFrame(wm)
+		case <-sc.wroteFrameCh:
+			if sc.writingFrame != true {
+				panic("internal error: expected to be already writing a frame")
+			}
+			sc.writingFrame = false
+			sc.scheduleFrameWrite()
+		case res := <-sc.readFrameCh:
+			if !sc.processFrameFromReader(res) {
+				return
+			}
+			res.readMore()
+			if settingsTimer.C != nil {
+				settingsTimer.Stop()
+				settingsTimer.C = nil
+			}
+		case m := <-sc.bodyReadCh:
+			sc.noteBodyRead(m.st, m.n)
+		case <-settingsTimer.C:
+			sc.logf("timeout waiting for SETTINGS frames from %v", sc.conn.RemoteAddr())
+			return
+		case <-sc.shutdownTimerCh:
+			sc.vlogf("GOAWAY close timer fired; closing conn from %v", sc.conn.RemoteAddr())
+			return
+		case fn := <-sc.testHookCh:
+			fn(loopNum)
+		}
+	}
+}
+
+// readPreface reads the ClientPreface greeting from the peer
+// or returns an error on timeout or an invalid greeting.
+func (sc *http2serverConn) readPreface() error {
+	errc := make(chan error, 1)
+	go func() {
+
+		buf := make([]byte, len(http2ClientPreface))
+		if _, err := io.ReadFull(sc.conn, buf); err != nil {
+			errc <- err
+		} else if !bytes.Equal(buf, http2clientPreface) {
+			errc <- fmt.Errorf("bogus greeting %q", buf)
+		} else {
+			errc <- nil
+		}
+	}()
+	timer := time.NewTimer(http2prefaceTimeout)
+	defer timer.Stop()
+	select {
+	case <-timer.C:
+		return errors.New("timeout waiting for client preface")
+	case err := <-errc:
+		if err == nil {
+			sc.vlogf("client %v said hello", sc.conn.RemoteAddr())
+		}
+		return err
+	}
+}
+
+var http2errChanPool = sync.Pool{
+	New: func() interface{} { return make(chan error, 1) },
+}
+
+// writeDataFromHandler writes the data described in req to stream.id.
+//
+// The flow control currently happens in the Handler where it waits
+// for 1 or more bytes to be available to then write here.  So at this
+// point we know that we have flow control. But this might have to
+// change when priority is implemented, so the serve goroutine knows
+// the total amount of bytes waiting to be sent and can can have more
+// scheduling decisions available.
+func (sc *http2serverConn) writeDataFromHandler(stream *http2stream, writeData *http2writeData) error {
+	ch := http2errChanPool.Get().(chan error)
+	sc.writeFrameFromHandler(http2frameWriteMsg{
+		write:  writeData,
+		stream: stream,
+		done:   ch,
+	})
+	select {
+	case err := <-ch:
+		http2errChanPool.Put(ch)
+		return err
+	case <-sc.doneServing:
+		return http2errClientDisconnected
+	case <-stream.cw:
+		return http2errStreamBroken
+	}
+}
+
+// writeFrameFromHandler sends wm to sc.wantWriteFrameCh, but aborts
+// if the connection has gone away.
+//
+// This must not be run from the serve goroutine itself, else it might
+// deadlock writing to sc.wantWriteFrameCh (which is only mildly
+// buffered and is read by serve itself). If you're on the serve
+// goroutine, call writeFrame instead.
+func (sc *http2serverConn) writeFrameFromHandler(wm http2frameWriteMsg) {
+	sc.serveG.checkNotOn()
+	var scheduled bool
+	select {
+	case sc.wantWriteFrameCh <- wm:
+		scheduled = true
+	case <-sc.doneServing:
+
+	case <-wm.stream.cw:
+
+	}
+
+	if !scheduled && wm.done != nil {
+		select {
+		case wm.done <- http2errStreamBroken:
+		default:
+			panic("expected buffered channel")
+		}
+	}
+}
+
+// writeFrame schedules a frame to write and sends it if there's nothing
+// already being written.
+//
+// There is no pushback here (the serve goroutine never blocks). It's
+// the http.Handlers that block, waiting for their previous frames to
+// make it onto the wire
+//
+// If you're not on the serve goroutine, use writeFrameFromHandler instead.
+func (sc *http2serverConn) writeFrame(wm http2frameWriteMsg) {
+	sc.serveG.check()
+	sc.writeSched.add(wm)
+	sc.scheduleFrameWrite()
+}
+
+// startFrameWrite starts a goroutine to write wm (in a separate
+// goroutine since that might block on the network), and updates the
+// serve goroutine's state about the world, updated from info in wm.
+func (sc *http2serverConn) startFrameWrite(wm http2frameWriteMsg) {
+	sc.serveG.check()
+	if sc.writingFrame {
+		panic("internal error: can only be writing one frame at a time")
+	}
+	sc.writingFrame = true
+
+	st := wm.stream
+	if st != nil {
+		switch st.state {
+		case http2stateHalfClosedLocal:
+			panic("internal error: attempt to send frame on half-closed-local stream")
+		case http2stateClosed:
+			if st.sentReset || st.gotReset {
+
+				sc.wroteFrameCh <- struct{}{}
+				return
+			}
+			panic(fmt.Sprintf("internal error: attempt to send a write %v on a closed stream", wm))
+		}
+	}
+
+	sc.needsFrameFlush = true
+	if http2endsStream(wm.write) {
+		if st == nil {
+			panic("internal error: expecting non-nil stream")
+		}
+		switch st.state {
+		case http2stateOpen:
+
+			st.state = http2stateHalfClosedLocal
+			errCancel := http2StreamError{st.id, http2ErrCodeCancel}
+			sc.resetStream(errCancel)
+		case http2stateHalfClosedRemote:
+			sc.closeStream(st, nil)
+		}
+	}
+	go sc.writeFrameAsync(wm)
+}
+
+// scheduleFrameWrite tickles the frame writing scheduler.
+//
+// If a frame is already being written, nothing happens. This will be called again
+// when the frame is done being written.
+//
+// If a frame isn't being written we need to send one, the best frame
+// to send is selected, preferring first things that aren't
+// stream-specific (e.g. ACKing settings), and then finding the
+// highest priority stream.
+//
+// If a frame isn't being written and there's nothing else to send, we
+// flush the write buffer.
+func (sc *http2serverConn) scheduleFrameWrite() {
+	sc.serveG.check()
+	if sc.writingFrame {
+		return
+	}
+	if sc.needToSendGoAway {
+		sc.needToSendGoAway = false
+		sc.startFrameWrite(http2frameWriteMsg{
+			write: &http2writeGoAway{
+				maxStreamID: sc.maxStreamID,
+				code:        sc.goAwayCode,
+			},
+		})
+		return
+	}
+	if sc.needToSendSettingsAck {
+		sc.needToSendSettingsAck = false
+		sc.startFrameWrite(http2frameWriteMsg{write: http2writeSettingsAck{}})
+		return
+	}
+	if !sc.inGoAway {
+		if wm, ok := sc.writeSched.take(); ok {
+			sc.startFrameWrite(wm)
+			return
+		}
+	}
+	if sc.needsFrameFlush {
+		sc.startFrameWrite(http2frameWriteMsg{write: http2flushFrameWriter{}})
+		sc.needsFrameFlush = false
+		return
+	}
+}
+
+func (sc *http2serverConn) goAway(code http2ErrCode) {
+	sc.serveG.check()
+	if sc.inGoAway {
+		return
+	}
+	if code != http2ErrCodeNo {
+		sc.shutDownIn(250 * time.Millisecond)
+	} else {
+
+		sc.shutDownIn(1 * time.Second)
+	}
+	sc.inGoAway = true
+	sc.needToSendGoAway = true
+	sc.goAwayCode = code
+	sc.scheduleFrameWrite()
+}
+
+func (sc *http2serverConn) shutDownIn(d time.Duration) {
+	sc.serveG.check()
+	sc.shutdownTimer = time.NewTimer(d)
+	sc.shutdownTimerCh = sc.shutdownTimer.C
+}
+
+func (sc *http2serverConn) resetStream(se http2StreamError) {
+	sc.serveG.check()
+	sc.writeFrame(http2frameWriteMsg{write: se})
+	if st, ok := sc.streams[se.StreamID]; ok {
+		st.sentReset = true
+		sc.closeStream(st, se)
+	}
+}
+
+// curHeaderStreamID returns the stream ID of the header block we're
+// currently in the middle of reading. If this returns non-zero, the
+// next frame must be a CONTINUATION with this stream id.
+func (sc *http2serverConn) curHeaderStreamID() uint32 {
+	sc.serveG.check()
+	st := sc.req.stream
+	if st == nil {
+		return 0
+	}
+	return st.id
+}
+
+// processFrameFromReader processes the serve loop's read from readFrameCh from the
+// frame-reading goroutine.
+// processFrameFromReader returns whether the connection should be kept open.
+func (sc *http2serverConn) processFrameFromReader(res http2readFrameResult) bool {
+	sc.serveG.check()
+	err := res.err
+	if err != nil {
+		if err == http2ErrFrameTooLarge {
+			sc.goAway(http2ErrCodeFrameSize)
+			return true
+		}
+		clientGone := err == io.EOF || strings.Contains(err.Error(), "use of closed network connection")
+		if clientGone {
+
+			return false
+		}
+	} else {
+		f := res.f
+		sc.vlogf("got %v: %#v", f.Header(), f)
+		err = sc.processFrame(f)
+		if err == nil {
+			return true
+		}
+	}
+
+	switch ev := err.(type) {
+	case http2StreamError:
+		sc.resetStream(ev)
+		return true
+	case http2goAwayFlowError:
+		sc.goAway(http2ErrCodeFlowControl)
+		return true
+	case http2ConnectionError:
+		sc.logf("%v: %v", sc.conn.RemoteAddr(), ev)
+		sc.goAway(http2ErrCode(ev))
+		return true
+	default:
+		if res.err != nil {
+			sc.logf("disconnecting; error reading frame from client %s: %v", sc.conn.RemoteAddr(), err)
+		} else {
+			sc.logf("disconnection due to other error: %v", err)
+		}
+		return false
+	}
+}
+
+func (sc *http2serverConn) processFrame(f http2Frame) error {
+	sc.serveG.check()
+
+	if !sc.sawFirstSettings {
+		if _, ok := f.(*http2SettingsFrame); !ok {
+			return http2ConnectionError(http2ErrCodeProtocol)
+		}
+		sc.sawFirstSettings = true
+	}
+
+	if s := sc.curHeaderStreamID(); s != 0 {
+		if cf, ok := f.(*http2ContinuationFrame); !ok {
+			return http2ConnectionError(http2ErrCodeProtocol)
+		} else if cf.Header().StreamID != s {
+			return http2ConnectionError(http2ErrCodeProtocol)
+		}
+	}
+
+	switch f := f.(type) {
+	case *http2SettingsFrame:
+		return sc.processSettings(f)
+	case *http2HeadersFrame:
+		return sc.processHeaders(f)
+	case *http2ContinuationFrame:
+		return sc.processContinuation(f)
+	case *http2WindowUpdateFrame:
+		return sc.processWindowUpdate(f)
+	case *http2PingFrame:
+		return sc.processPing(f)
+	case *http2DataFrame:
+		return sc.processData(f)
+	case *http2RSTStreamFrame:
+		return sc.processResetStream(f)
+	case *http2PriorityFrame:
+		return sc.processPriority(f)
+	case *http2PushPromiseFrame:
+
+		return http2ConnectionError(http2ErrCodeProtocol)
+	default:
+		sc.vlogf("Ignoring frame: %v", f.Header())
+		return nil
+	}
+}
+
+func (sc *http2serverConn) processPing(f *http2PingFrame) error {
+	sc.serveG.check()
+	if f.Flags.Has(http2FlagSettingsAck) {
+
+		return nil
+	}
+	if f.StreamID != 0 {
+
+		return http2ConnectionError(http2ErrCodeProtocol)
+	}
+	sc.writeFrame(http2frameWriteMsg{write: http2writePingAck{f}})
+	return nil
+}
+
+func (sc *http2serverConn) processWindowUpdate(f *http2WindowUpdateFrame) error {
+	sc.serveG.check()
+	switch {
+	case f.StreamID != 0:
+		st := sc.streams[f.StreamID]
+		if st == nil {
+
+			return nil
+		}
+		if !st.flow.add(int32(f.Increment)) {
+			return http2StreamError{f.StreamID, http2ErrCodeFlowControl}
+		}
+	default:
+		if !sc.flow.add(int32(f.Increment)) {
+			return http2goAwayFlowError{}
+		}
+	}
+	sc.scheduleFrameWrite()
+	return nil
+}
+
+func (sc *http2serverConn) processResetStream(f *http2RSTStreamFrame) error {
+	sc.serveG.check()
+
+	state, st := sc.state(f.StreamID)
+	if state == http2stateIdle {
+
+		return http2ConnectionError(http2ErrCodeProtocol)
+	}
+	if st != nil {
+		st.gotReset = true
+		sc.closeStream(st, http2StreamError{f.StreamID, f.ErrCode})
+	}
+	return nil
+}
+
+func (sc *http2serverConn) closeStream(st *http2stream, err error) {
+	sc.serveG.check()
+	if st.state == http2stateIdle || st.state == http2stateClosed {
+		panic(fmt.Sprintf("invariant; can't close stream in state %v", st.state))
+	}
+	st.state = http2stateClosed
+	sc.curOpenStreams--
+	delete(sc.streams, st.id)
+	if p := st.body; p != nil {
+		p.Close(err)
+	}
+	st.cw.Close()
+	sc.writeSched.forgetStream(st.id)
+}
+
+func (sc *http2serverConn) processSettings(f *http2SettingsFrame) error {
+	sc.serveG.check()
+	if f.IsAck() {
+		sc.unackedSettings--
+		if sc.unackedSettings < 0 {
+
+			return http2ConnectionError(http2ErrCodeProtocol)
+		}
+		return nil
+	}
+	if err := f.ForeachSetting(sc.processSetting); err != nil {
+		return err
+	}
+	sc.needToSendSettingsAck = true
+	sc.scheduleFrameWrite()
+	return nil
+}
+
+func (sc *http2serverConn) processSetting(s http2Setting) error {
+	sc.serveG.check()
+	if err := s.Valid(); err != nil {
+		return err
+	}
+	sc.vlogf("processing setting %v", s)
+	switch s.ID {
+	case http2SettingHeaderTableSize:
+		sc.headerTableSize = s.Val
+		sc.hpackEncoder.SetMaxDynamicTableSize(s.Val)
+	case http2SettingEnablePush:
+		sc.pushEnabled = s.Val != 0
+	case http2SettingMaxConcurrentStreams:
+		sc.clientMaxStreams = s.Val
+	case http2SettingInitialWindowSize:
+		return sc.processSettingInitialWindowSize(s.Val)
+	case http2SettingMaxFrameSize:
+		sc.writeSched.maxFrameSize = s.Val
+	case http2SettingMaxHeaderListSize:
+		sc.peerMaxHeaderListSize = s.Val
+	default:
+
+	}
+	return nil
+}
+
+func (sc *http2serverConn) processSettingInitialWindowSize(val uint32) error {
+	sc.serveG.check()
+
+	old := sc.initialWindowSize
+	sc.initialWindowSize = int32(val)
+	growth := sc.initialWindowSize - old
+	for _, st := range sc.streams {
+		if !st.flow.add(growth) {
+
+			return http2ConnectionError(http2ErrCodeFlowControl)
+		}
+	}
+	return nil
+}
+
+func (sc *http2serverConn) processData(f *http2DataFrame) error {
+	sc.serveG.check()
+
+	id := f.Header().StreamID
+	st, ok := sc.streams[id]
+	if !ok || st.state != http2stateOpen {
+
+		return http2StreamError{id, http2ErrCodeStreamClosed}
+	}
+	if st.body == nil {
+		panic("internal error: should have a body in this state")
+	}
+	data := f.Data()
+
+	if st.declBodyBytes != -1 && st.bodyBytes+int64(len(data)) > st.declBodyBytes {
+		st.body.Close(fmt.Errorf("sender tried to send more than declared Content-Length of %d bytes", st.declBodyBytes))
+		return http2StreamError{id, http2ErrCodeStreamClosed}
+	}
+	if len(data) > 0 {
+
+		if int(st.inflow.available()) < len(data) {
+			return http2StreamError{id, http2ErrCodeFlowControl}
+		}
+		st.inflow.take(int32(len(data)))
+		wrote, err := st.body.Write(data)
+		if err != nil {
+			return http2StreamError{id, http2ErrCodeStreamClosed}
+		}
+		if wrote != len(data) {
+			panic("internal error: bad Writer")
+		}
+		st.bodyBytes += int64(len(data))
+	}
+	if f.StreamEnded() {
+		if st.declBodyBytes != -1 && st.declBodyBytes != st.bodyBytes {
+			st.body.Close(fmt.Errorf("request declared a Content-Length of %d but only wrote %d bytes",
+				st.declBodyBytes, st.bodyBytes))
+		} else {
+			st.body.Close(io.EOF)
+		}
+		st.state = http2stateHalfClosedRemote
+	}
+	return nil
+}
+
+func (sc *http2serverConn) processHeaders(f *http2HeadersFrame) error {
+	sc.serveG.check()
+	id := f.Header().StreamID
+	if sc.inGoAway {
+
+		return nil
+	}
+
+	if id%2 != 1 || id <= sc.maxStreamID || sc.req.stream != nil {
+
+		return http2ConnectionError(http2ErrCodeProtocol)
+	}
+	if id > sc.maxStreamID {
+		sc.maxStreamID = id
+	}
+	st := &http2stream{
+		id:    id,
+		state: http2stateOpen,
+	}
+	if f.StreamEnded() {
+		st.state = http2stateHalfClosedRemote
+	}
+	st.cw.Init()
+
+	st.flow.conn = &sc.flow
+	st.flow.add(sc.initialWindowSize)
+	st.inflow.conn = &sc.inflow
+	st.inflow.add(http2initialWindowSize)
+
+	sc.streams[id] = st
+	if f.HasPriority() {
+		http2adjustStreamPriority(sc.streams, st.id, f.Priority)
+	}
+	sc.curOpenStreams++
+	sc.req = http2requestParam{
+		stream: st,
+		header: make(Header),
+	}
+	sc.hpackDecoder.SetEmitEnabled(true)
+	return sc.processHeaderBlockFragment(st, f.HeaderBlockFragment(), f.HeadersEnded())
+}
+
+func (sc *http2serverConn) processContinuation(f *http2ContinuationFrame) error {
+	sc.serveG.check()
+	st := sc.streams[f.Header().StreamID]
+	if st == nil || sc.curHeaderStreamID() != st.id {
+		return http2ConnectionError(http2ErrCodeProtocol)
+	}
+	return sc.processHeaderBlockFragment(st, f.HeaderBlockFragment(), f.HeadersEnded())
+}
+
+func (sc *http2serverConn) processHeaderBlockFragment(st *http2stream, frag []byte, end bool) error {
+	sc.serveG.check()
+	if _, err := sc.hpackDecoder.Write(frag); err != nil {
+		return http2ConnectionError(http2ErrCodeCompression)
+	}
+	if !end {
+		return nil
+	}
+	if err := sc.hpackDecoder.Close(); err != nil {
+		return http2ConnectionError(http2ErrCodeCompression)
+	}
+	defer sc.resetPendingRequest()
+	if sc.curOpenStreams > sc.advMaxStreams {
+
+		if sc.unackedSettings == 0 {
+
+			return http2StreamError{st.id, http2ErrCodeProtocol}
+		}
+
+		return http2StreamError{st.id, http2ErrCodeRefusedStream}
+	}
+
+	rw, req, err := sc.newWriterAndRequest()
+	if err != nil {
+		return err
+	}
+	st.body = req.Body.(*http2requestBody).pipe
+	st.declBodyBytes = req.ContentLength
+
+	handler := sc.handler.ServeHTTP
+	if !sc.hpackDecoder.EmitEnabled() {
+
+		handler = http2handleHeaderListTooLong
+	}
+
+	go sc.runHandler(rw, req, handler)
+	return nil
+}
+
+func (sc *http2serverConn) processPriority(f *http2PriorityFrame) error {
+	http2adjustStreamPriority(sc.streams, f.StreamID, f.http2PriorityParam)
+	return nil
+}
+
+func http2adjustStreamPriority(streams map[uint32]*http2stream, streamID uint32, priority http2PriorityParam) {
+	st, ok := streams[streamID]
+	if !ok {
+
+		return
+	}
+	st.weight = priority.Weight
+	parent := streams[priority.StreamDep]
+	if parent == st {
+
+		return
+	}
+
+	for piter := parent; piter != nil; piter = piter.parent {
+		if piter == st {
+			parent.parent = st.parent
+			break
+		}
+	}
+	st.parent = parent
+	if priority.Exclusive && (st.parent != nil || priority.StreamDep == 0) {
+		for _, openStream := range streams {
+			if openStream != st && openStream.parent == st.parent {
+				openStream.parent = st
+			}
+		}
+	}
+}
+
+// resetPendingRequest zeros out all state related to a HEADERS frame
+// and its zero or more CONTINUATION frames sent to start a new
+// request.
+func (sc *http2serverConn) resetPendingRequest() {
+	sc.serveG.check()
+	sc.req = http2requestParam{}
+}
+
+func (sc *http2serverConn) newWriterAndRequest() (*http2responseWriter, *Request, error) {
+	sc.serveG.check()
+	rp := &sc.req
+	if rp.invalidHeader || rp.method == "" || rp.path == "" ||
+		(rp.scheme != "https" && rp.scheme != "http") {
+
+		return nil, nil, http2StreamError{rp.stream.id, http2ErrCodeProtocol}
+	}
+	var tlsState *tls.ConnectionState // nil if not scheme https
+	if rp.scheme == "https" {
+		tlsState = sc.tlsState
+	}
+	authority := rp.authority
+	if authority == "" {
+		authority = rp.header.Get("Host")
+	}
+	needsContinue := rp.header.Get("Expect") == "100-continue"
+	if needsContinue {
+		rp.header.Del("Expect")
+	}
+
+	if cookies := rp.header["Cookie"]; len(cookies) > 1 {
+		rp.header.Set("Cookie", strings.Join(cookies, "; "))
+	}
+	bodyOpen := rp.stream.state == http2stateOpen
+	body := &http2requestBody{
+		conn:          sc,
+		stream:        rp.stream,
+		needsContinue: needsContinue,
+	}
+
+	url, err := url.ParseRequestURI(rp.path)
+	if err != nil {
+
+		return nil, nil, http2StreamError{rp.stream.id, http2ErrCodeProtocol}
+	}
+	req := &Request{
+		Method:     rp.method,
+		URL:        url,
+		RemoteAddr: sc.remoteAddrStr,
+		Header:     rp.header,
+		RequestURI: rp.path,
+		Proto:      "HTTP/2.0",
+		ProtoMajor: 2,
+		ProtoMinor: 0,
+		TLS:        tlsState,
+		Host:       authority,
+		Body:       body,
+	}
+	if bodyOpen {
+		body.pipe = &http2pipe{
+			b: http2buffer{buf: make([]byte, http2initialWindowSize)},
+		}
+		body.pipe.c.L = &body.pipe.m
+
+		if vv, ok := rp.header["Content-Length"]; ok {
+			req.ContentLength, _ = strconv.ParseInt(vv[0], 10, 64)
+		} else {
+			req.ContentLength = -1
+		}
+	}
+
+	rws := http2responseWriterStatePool.Get().(*http2responseWriterState)
+	bwSave := rws.bw
+	*rws = http2responseWriterState{}
+	rws.conn = sc
+	rws.bw = bwSave
+	rws.bw.Reset(http2chunkWriter{rws})
+	rws.stream = rp.stream
+	rws.req = req
+	rws.body = body
+
+	rw := &http2responseWriter{rws: rws}
+	return rw, req, nil
+}
+
+// Run on its own goroutine.
+func (sc *http2serverConn) runHandler(rw *http2responseWriter, req *Request, handler func(ResponseWriter, *Request)) {
+	defer rw.handlerDone()
+
+	handler(rw, req)
+}
+
+func http2handleHeaderListTooLong(w ResponseWriter, r *Request) {
+	// 10.5.1 Limits on Header Block Size:
+	// .. "A server that receives a larger header block than it is
+	// willing to handle can send an HTTP 431 (Request Header Fields Too
+	// Large) status code"
+	const statusRequestHeaderFieldsTooLarge = 431 // only in Go 1.6+
+	w.WriteHeader(statusRequestHeaderFieldsTooLarge)
+	io.WriteString(w, "<h1>HTTP Error 431</h1><p>Request Header Field(s) Too Large</p>")
+}
+
+// called from handler goroutines.
+// h may be nil.
+func (sc *http2serverConn) writeHeaders(st *http2stream, headerData *http2writeResHeaders) {
+	sc.serveG.checkNotOn()
+	var errc chan error
+	if headerData.h != nil {
+
+		errc = http2errChanPool.Get().(chan error)
+	}
+	sc.writeFrameFromHandler(http2frameWriteMsg{
+		write:  headerData,
+		stream: st,
+		done:   errc,
+	})
+	if errc != nil {
+		select {
+		case <-errc:
+
+			http2errChanPool.Put(errc)
+		case <-sc.doneServing:
+
+		case <-st.cw:
+
+		}
+	}
+}
+
+// called from handler goroutines.
+func (sc *http2serverConn) write100ContinueHeaders(st *http2stream) {
+	sc.writeFrameFromHandler(http2frameWriteMsg{
+		write:  http2write100ContinueHeadersFrame{st.id},
+		stream: st,
+	})
+}
+
+// A bodyReadMsg tells the server loop that the http.Handler read n
+// bytes of the DATA from the client on the given stream.
+type http2bodyReadMsg struct {
+	st *http2stream
+	n  int
+}
+
+// called from handler goroutines.
+// Notes that the handler for the given stream ID read n bytes of its body
+// and schedules flow control tokens to be sent.
+func (sc *http2serverConn) noteBodyReadFromHandler(st *http2stream, n int) {
+	sc.serveG.checkNotOn()
+	sc.bodyReadCh <- http2bodyReadMsg{st, n}
+}
+
+func (sc *http2serverConn) noteBodyRead(st *http2stream, n int) {
+	sc.serveG.check()
+	sc.sendWindowUpdate(nil, n)
+	if st.state != http2stateHalfClosedRemote && st.state != http2stateClosed {
+
+		sc.sendWindowUpdate(st, n)
+	}
+}
+
+// st may be nil for conn-level
+func (sc *http2serverConn) sendWindowUpdate(st *http2stream, n int) {
+	sc.serveG.check()
+	// "The legal range for the increment to the flow control
+	// window is 1 to 2^31-1 (2,147,483,647) octets."
+	// A Go Read call on 64-bit machines could in theory read
+	// a larger Read than this. Very unlikely, but we handle it here
+	// rather than elsewhere for now.
+	const maxUint31 = 1<<31 - 1
+	for n >= maxUint31 {
+		sc.sendWindowUpdate32(st, maxUint31)
+		n -= maxUint31
+	}
+	sc.sendWindowUpdate32(st, int32(n))
+}
+
+// st may be nil for conn-level
+func (sc *http2serverConn) sendWindowUpdate32(st *http2stream, n int32) {
+	sc.serveG.check()
+	if n == 0 {
+		return
+	}
+	if n < 0 {
+		panic("negative update")
+	}
+	var streamID uint32
+	if st != nil {
+		streamID = st.id
+	}
+	sc.writeFrame(http2frameWriteMsg{
+		write:  http2writeWindowUpdate{streamID: streamID, n: uint32(n)},
+		stream: st,
+	})
+	var ok bool
+	if st == nil {
+		ok = sc.inflow.add(n)
+	} else {
+		ok = st.inflow.add(n)
+	}
+	if !ok {
+		panic("internal error; sent too many window updates without decrements?")
+	}
+}
+
+type http2requestBody struct {
+	stream        *http2stream
+	conn          *http2serverConn
+	closed        bool
+	pipe          *http2pipe // non-nil if we have a HTTP entity message body
+	needsContinue bool       // need to send a 100-continue
+}
+
+func (b *http2requestBody) Close() error {
+	if b.pipe != nil {
+		b.pipe.Close(http2errClosedBody)
+	}
+	b.closed = true
+	return nil
+}
+
+func (b *http2requestBody) Read(p []byte) (n int, err error) {
+	if b.needsContinue {
+		b.needsContinue = false
+		b.conn.write100ContinueHeaders(b.stream)
+	}
+	if b.pipe == nil {
+		return 0, io.EOF
+	}
+	n, err = b.pipe.Read(p)
+	if n > 0 {
+		b.conn.noteBodyReadFromHandler(b.stream, n)
+	}
+	return
+}
+
+// responseWriter is the http.ResponseWriter implementation.  It's
+// intentionally small (1 pointer wide) to minimize garbage.  The
+// responseWriterState pointer inside is zeroed at the end of a
+// request (in handlerDone) and calls on the responseWriter thereafter
+// simply crash (caller's mistake), but the much larger responseWriterState
+// and buffers are reused between multiple requests.
+type http2responseWriter struct {
+	rws *http2responseWriterState
+}
+
+// Optional http.ResponseWriter interfaces implemented.
+var (
+	_ CloseNotifier     = (*http2responseWriter)(nil)
+	_ Flusher           = (*http2responseWriter)(nil)
+	_ http2stringWriter = (*http2responseWriter)(nil)
+)
+
+type http2responseWriterState struct {
+	// immutable within a request:
+	stream *http2stream
+	req    *Request
+	body   *http2requestBody // to close at end of request, if DATA frames didn't
+	conn   *http2serverConn
+
+	// TODO: adjust buffer writing sizes based on server config, frame size updates from peer, etc
+	bw *bufio.Writer // writing to a chunkWriter{this *responseWriterState}
+
+	// mutated by http.Handler goroutine:
+	handlerHeader Header // nil until called
+	snapHeader    Header // snapshot of handlerHeader at WriteHeader time
+	status        int    // status code passed to WriteHeader
+	wroteHeader   bool   // WriteHeader called (explicitly or implicitly). Not necessarily sent to user yet.
+	sentHeader    bool   // have we sent the header frame?
+	handlerDone   bool   // handler has finished
+	curWrite      http2writeData
+
+	closeNotifierMu sync.Mutex // guards closeNotifierCh
+	closeNotifierCh chan bool  // nil until first used
+}
+
+type http2chunkWriter struct{ rws *http2responseWriterState }
+
+func (cw http2chunkWriter) Write(p []byte) (n int, err error) { return cw.rws.writeChunk(p) }
+
+// writeChunk writes chunks from the bufio.Writer. But because
+// bufio.Writer may bypass its chunking, sometimes p may be
+// arbitrarily large.
+//
+// writeChunk is also responsible (on the first chunk) for sending the
+// HEADER response.
+func (rws *http2responseWriterState) writeChunk(p []byte) (n int, err error) {
+	if !rws.wroteHeader {
+		rws.writeHeader(200)
+	}
+	if !rws.sentHeader {
+		rws.sentHeader = true
+		var ctype, clen string // implicit ones, if we can calculate it
+		if rws.handlerDone && rws.snapHeader.Get("Content-Length") == "" {
+			clen = strconv.Itoa(len(p))
+		}
+		if rws.snapHeader.Get("Content-Type") == "" {
+			ctype = DetectContentType(p)
+		}
+		endStream := rws.handlerDone && len(p) == 0
+		rws.conn.writeHeaders(rws.stream, &http2writeResHeaders{
+			streamID:      rws.stream.id,
+			httpResCode:   rws.status,
+			h:             rws.snapHeader,
+			endStream:     endStream,
+			contentType:   ctype,
+			contentLength: clen,
+		})
+		if endStream {
+			return 0, nil
+		}
+	}
+	if len(p) == 0 && !rws.handlerDone {
+		return 0, nil
+	}
+	curWrite := &rws.curWrite
+	curWrite.streamID = rws.stream.id
+	curWrite.p = p
+	curWrite.endStream = rws.handlerDone
+	if err := rws.conn.writeDataFromHandler(rws.stream, curWrite); err != nil {
+		return 0, err
+	}
+	return len(p), nil
+}
+
+func (w *http2responseWriter) Flush() {
+	rws := w.rws
+	if rws == nil {
+		panic("Header called after Handler finished")
+	}
+	if rws.bw.Buffered() > 0 {
+		if err := rws.bw.Flush(); err != nil {
+
+			return
+		}
+	} else {
+
+		rws.writeChunk(nil)
+	}
+}
+
+func (w *http2responseWriter) CloseNotify() <-chan bool {
+	rws := w.rws
+	if rws == nil {
+		panic("CloseNotify called after Handler finished")
+	}
+	rws.closeNotifierMu.Lock()
+	ch := rws.closeNotifierCh
+	if ch == nil {
+		ch = make(chan bool, 1)
+		rws.closeNotifierCh = ch
+		go func() {
+			rws.stream.cw.Wait()
+			ch <- true
+		}()
+	}
+	rws.closeNotifierMu.Unlock()
+	return ch
+}
+
+func (w *http2responseWriter) Header() Header {
+	rws := w.rws
+	if rws == nil {
+		panic("Header called after Handler finished")
+	}
+	if rws.handlerHeader == nil {
+		rws.handlerHeader = make(Header)
+	}
+	return rws.handlerHeader
+}
+
+func (w *http2responseWriter) WriteHeader(code int) {
+	rws := w.rws
+	if rws == nil {
+		panic("WriteHeader called after Handler finished")
+	}
+	rws.writeHeader(code)
+}
+
+func (rws *http2responseWriterState) writeHeader(code int) {
+	if !rws.wroteHeader {
+		rws.wroteHeader = true
+		rws.status = code
+		if len(rws.handlerHeader) > 0 {
+			rws.snapHeader = http2cloneHeader(rws.handlerHeader)
+		}
+	}
+}
+
+func http2cloneHeader(h Header) Header {
+	h2 := make(Header, len(h))
+	for k, vv := range h {
+		vv2 := make([]string, len(vv))
+		copy(vv2, vv)
+		h2[k] = vv2
+	}
+	return h2
+}
+
+// The Life Of A Write is like this:
+//
+// * Handler calls w.Write or w.WriteString ->
+// * -> rws.bw (*bufio.Writer) ->
+// * (Handler migth call Flush)
+// * -> chunkWriter{rws}
+// * -> responseWriterState.writeChunk(p []byte)
+// * -> responseWriterState.writeChunk (most of the magic; see comment there)
+func (w *http2responseWriter) Write(p []byte) (n int, err error) {
+	return w.write(len(p), p, "")
+}
+
+func (w *http2responseWriter) WriteString(s string) (n int, err error) {
+	return w.write(len(s), nil, s)
+}
+
+// either dataB or dataS is non-zero.
+func (w *http2responseWriter) write(lenData int, dataB []byte, dataS string) (n int, err error) {
+	rws := w.rws
+	if rws == nil {
+		panic("Write called after Handler finished")
+	}
+	if !rws.wroteHeader {
+		w.WriteHeader(200)
+	}
+	if dataB != nil {
+		return rws.bw.Write(dataB)
+	} else {
+		return rws.bw.WriteString(dataS)
+	}
+}
+
+func (w *http2responseWriter) handlerDone() {
+	rws := w.rws
+	if rws == nil {
+		panic("handlerDone called twice")
+	}
+	rws.handlerDone = true
+	w.Flush()
+	w.rws = nil
+	http2responseWriterStatePool.Put(rws)
+}
+
+type http2Transport struct {
+	Fallback RoundTripper
+
+	// TODO: remove this and make more general with a TLS dial hook, like http
+	InsecureTLSDial bool
+
+	connMu sync.Mutex
+	conns  map[string][]*http2clientConn // key is host:port
+}
+
+type http2clientConn struct {
+	t        *http2Transport
+	tconn    *tls.Conn
+	tlsState *tls.ConnectionState
+	connKey  []string // key(s) this connection is cached in, in t.conns
+
+	readerDone chan struct{} // closed on error
+	readerErr  error         // set before readerDone is closed
+	hdec       *hpack.Decoder
+	nextRes    *Response
+
+	mu           sync.Mutex
+	closed       bool
+	goAway       *http2GoAwayFrame // if non-nil, the GoAwayFrame we received
+	streams      map[uint32]*http2clientStream
+	nextStreamID uint32
+	bw           *bufio.Writer
+	werr         error // first write error that has occurred
+	br           *bufio.Reader
+	fr           *http2Framer
+	// Settings from peer:
+	maxFrameSize         uint32
+	maxConcurrentStreams uint32
+	initialWindowSize    uint32
+	hbuf                 bytes.Buffer // HPACK encoder writes into this
+	henc                 *hpack.Encoder
+}
+
+type http2clientStream struct {
+	ID   uint32
+	resc chan http2resAndError
+	pw   *io.PipeWriter
+	pr   *io.PipeReader
+}
+
+type http2stickyErrWriter struct {
+	w   io.Writer
+	err *error
+}
+
+func (sew http2stickyErrWriter) Write(p []byte) (n int, err error) {
+	if *sew.err != nil {
+		return 0, *sew.err
+	}
+	n, err = sew.w.Write(p)
+	*sew.err = err
+	return
+}
+
+func (t *http2Transport) RoundTrip(req *Request) (*Response, error) {
+	if req.URL.Scheme != "https" {
+		if t.Fallback == nil {
+			return nil, errors.New("http2: unsupported scheme and no Fallback")
+		}
+		return t.Fallback.RoundTrip(req)
+	}
+
+	host, port, err := net.SplitHostPort(req.URL.Host)
+	if err != nil {
+		host = req.URL.Host
+		port = "443"
+	}
+
+	for {
+		cc, err := t.getClientConn(host, port)
+		if err != nil {
+			return nil, err
+		}
+		res, err := cc.roundTrip(req)
+		if http2shouldRetryRequest(err) {
+			continue
+		}
+		if err != nil {
+			return nil, err
+		}
+		return res, nil
+	}
+}
+
+// CloseIdleConnections closes any connections which were previously
+// connected from previous requests but are now sitting idle.
+// It does not interrupt any connections currently in use.
+func (t *http2Transport) CloseIdleConnections() {
+	t.connMu.Lock()
+	defer t.connMu.Unlock()
+	for _, vv := range t.conns {
+		for _, cc := range vv {
+			cc.closeIfIdle()
+		}
+	}
+}
+
+var http2errClientConnClosed = errors.New("http2: client conn is closed")
+
+func http2shouldRetryRequest(err error) bool {
+
+	return err == http2errClientConnClosed
+}
+
+func (t *http2Transport) removeClientConn(cc *http2clientConn) {
+	t.connMu.Lock()
+	defer t.connMu.Unlock()
+	for _, key := range cc.connKey {
+		vv, ok := t.conns[key]
+		if !ok {
+			continue
+		}
+		newList := http2filterOutClientConn(vv, cc)
+		if len(newList) > 0 {
+			t.conns[key] = newList
+		} else {
+			delete(t.conns, key)
+		}
+	}
+}
+
+func http2filterOutClientConn(in []*http2clientConn, exclude *http2clientConn) []*http2clientConn {
+	out := in[:0]
+	for _, v := range in {
+		if v != exclude {
+			out = append(out, v)
+		}
+	}
+	return out
+}
+
+func (t *http2Transport) getClientConn(host, port string) (*http2clientConn, error) {
+	t.connMu.Lock()
+	defer t.connMu.Unlock()
+
+	key := net.JoinHostPort(host, port)
+
+	for _, cc := range t.conns[key] {
+		if cc.canTakeNewRequest() {
+			return cc, nil
+		}
+	}
+	if t.conns == nil {
+		t.conns = make(map[string][]*http2clientConn)
+	}
+	cc, err := t.newClientConn(host, port, key)
+	if err != nil {
+		return nil, err
+	}
+	t.conns[key] = append(t.conns[key], cc)
+	return cc, nil
+}
+
+func (t *http2Transport) newClientConn(host, port, key string) (*http2clientConn, error) {
+	cfg := &tls.Config{
+		ServerName:         host,
+		NextProtos:         []string{http2NextProtoTLS},
+		InsecureSkipVerify: t.InsecureTLSDial,
+	}
+	tconn, err := tls.Dial("tcp", net.JoinHostPort(host, port), cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := tconn.Handshake(); err != nil {
+		return nil, err
+	}
+	if !t.InsecureTLSDial {
+		if err := tconn.VerifyHostname(cfg.ServerName); err != nil {
+			return nil, err
+		}
+	}
+	state := tconn.ConnectionState()
+	if p := state.NegotiatedProtocol; p != http2NextProtoTLS {
+
+		return nil, fmt.Errorf("bad protocol: %v", p)
+	}
+	if !state.NegotiatedProtocolIsMutual {
+		return nil, errors.New("could not negotiate protocol mutually")
+	}
+	if _, err := tconn.Write(http2clientPreface); err != nil {
+		return nil, err
+	}
+
+	cc := &http2clientConn{
+		t:                    t,
+		tconn:                tconn,
+		connKey:              []string{key},
+		tlsState:             &state,
+		readerDone:           make(chan struct{}),
+		nextStreamID:         1,
+		maxFrameSize:         16 << 10,
+		initialWindowSize:    65535,
+		maxConcurrentStreams: 1000,
+		streams:              make(map[uint32]*http2clientStream),
+	}
+	cc.bw = bufio.NewWriter(http2stickyErrWriter{tconn, &cc.werr})
+	cc.br = bufio.NewReader(tconn)
+	cc.fr = http2NewFramer(cc.bw, cc.br)
+	cc.henc = hpack.NewEncoder(&cc.hbuf)
+
+	cc.fr.WriteSettings()
+
+	cc.fr.WriteWindowUpdate(0, 1<<30)
+	cc.bw.Flush()
+	if cc.werr != nil {
+		return nil, cc.werr
+	}
+
+	f, err := cc.fr.ReadFrame()
+	if err != nil {
+		return nil, err
+	}
+	sf, ok := f.(*http2SettingsFrame)
+	if !ok {
+		return nil, fmt.Errorf("expected settings frame, got: %T", f)
+	}
+	cc.fr.WriteSettingsAck()
+	cc.bw.Flush()
+
+	sf.ForeachSetting(func(s http2Setting) error {
+		switch s.ID {
+		case http2SettingMaxFrameSize:
+			cc.maxFrameSize = s.Val
+		case http2SettingMaxConcurrentStreams:
+			cc.maxConcurrentStreams = s.Val
+		case http2SettingInitialWindowSize:
+			cc.initialWindowSize = s.Val
+		default:
+
+			log.Printf("Unhandled Setting: %v", s)
+		}
+		return nil
+	})
+
+	cc.hdec = hpack.NewDecoder(http2initialHeaderTableSize, cc.onNewHeaderField)
+
+	go cc.readLoop()
+	return cc, nil
+}
+
+func (cc *http2clientConn) setGoAway(f *http2GoAwayFrame) {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	cc.goAway = f
+}
+
+func (cc *http2clientConn) canTakeNewRequest() bool {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	return cc.goAway == nil &&
+		int64(len(cc.streams)+1) < int64(cc.maxConcurrentStreams) &&
+		cc.nextStreamID < 2147483647
+}
+
+func (cc *http2clientConn) closeIfIdle() {
+	cc.mu.Lock()
+	if len(cc.streams) > 0 {
+		cc.mu.Unlock()
+		return
+	}
+	cc.closed = true
+
+	cc.mu.Unlock()
+
+	cc.tconn.Close()
+}
+
+func (cc *http2clientConn) roundTrip(req *Request) (*Response, error) {
+	cc.mu.Lock()
+
+	if cc.closed {
+		cc.mu.Unlock()
+		return nil, http2errClientConnClosed
+	}
+
+	cs := cc.newStream()
+	hasBody := false
+
+	hdrs := cc.encodeHeaders(req)
+	first := true
+	for len(hdrs) > 0 {
+		chunk := hdrs
+		if len(chunk) > int(cc.maxFrameSize) {
+			chunk = chunk[:cc.maxFrameSize]
+		}
+		hdrs = hdrs[len(chunk):]
+		endHeaders := len(hdrs) == 0
+		if first {
+			cc.fr.WriteHeaders(http2HeadersFrameParam{
+				StreamID:      cs.ID,
+				BlockFragment: chunk,
+				EndStream:     !hasBody,
+				EndHeaders:    endHeaders,
+			})
+			first = false
+		} else {
+			cc.fr.WriteContinuation(cs.ID, endHeaders, chunk)
+		}
+	}
+	cc.bw.Flush()
+	werr := cc.werr
+	cc.mu.Unlock()
+
+	if hasBody {
+
+	}
+
+	if werr != nil {
+		return nil, werr
+	}
+
+	re := <-cs.resc
+	if re.err != nil {
+		return nil, re.err
+	}
+	res := re.res
+	res.Request = req
+	res.TLS = cc.tlsState
+	return res, nil
+}
+
+// requires cc.mu be held.
+func (cc *http2clientConn) encodeHeaders(req *Request) []byte {
+	cc.hbuf.Reset()
+
+	host := req.Host
+	if host == "" {
+		host = req.URL.Host
+	}
+
+	path := req.URL.Path
+	if path == "" {
+		path = "/"
+	}
+
+	cc.writeHeader(":authority", host)
+	cc.writeHeader(":method", req.Method)
+	cc.writeHeader(":path", path)
+	cc.writeHeader(":scheme", "https")
+
+	for k, vv := range req.Header {
+		lowKey := strings.ToLower(k)
+		if lowKey == "host" {
+			continue
+		}
+		for _, v := range vv {
+			cc.writeHeader(lowKey, v)
+		}
+	}
+	return cc.hbuf.Bytes()
+}
+
+func (cc *http2clientConn) writeHeader(name, value string) {
+	log.Printf("sending %q = %q", name, value)
+	cc.henc.WriteField(hpack.HeaderField{Name: name, Value: value})
+}
+
+type http2resAndError struct {
+	res *Response
+	err error
+}
+
+// requires cc.mu be held.
+func (cc *http2clientConn) newStream() *http2clientStream {
+	cs := &http2clientStream{
+		ID:   cc.nextStreamID,
+		resc: make(chan http2resAndError, 1),
+	}
+	cc.nextStreamID += 2
+	cc.streams[cs.ID] = cs
+	return cs
+}
+
+func (cc *http2clientConn) streamByID(id uint32, andRemove bool) *http2clientStream {
+	cc.mu.Lock()
+	defer cc.mu.Unlock()
+	cs := cc.streams[id]
+	if andRemove {
+		delete(cc.streams, id)
+	}
+	return cs
+}
+
+// runs in its own goroutine.
+func (cc *http2clientConn) readLoop() {
+	defer cc.t.removeClientConn(cc)
+	defer close(cc.readerDone)
+
+	activeRes := map[uint32]*http2clientStream{}
+
+	defer func() {
+		err := cc.readerErr
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		for _, cs := range activeRes {
+			cs.pw.CloseWithError(err)
+		}
+	}()
+
+	// continueStreamID is the stream ID we're waiting for
+	// continuation frames for.
+	var continueStreamID uint32
+
+	for {
+		f, err := cc.fr.ReadFrame()
+		if err != nil {
+			cc.readerErr = err
+			return
+		}
+		log.Printf("Transport received %v: %#v", f.Header(), f)
+
+		streamID := f.Header().StreamID
+
+		_, isContinue := f.(*http2ContinuationFrame)
+		if isContinue {
+			if streamID != continueStreamID {
+				log.Printf("Protocol violation: got CONTINUATION with id %d; want %d", streamID, continueStreamID)
+				cc.readerErr = http2ConnectionError(http2ErrCodeProtocol)
+				return
+			}
+		} else if continueStreamID != 0 {
+
+			log.Printf("Protocol violation: got %T for stream %d, want CONTINUATION for %d", f, streamID, continueStreamID)
+			cc.readerErr = http2ConnectionError(http2ErrCodeProtocol)
+			return
+		}
+
+		if streamID%2 == 0 {
+
+			continue
+		}
+		streamEnded := false
+		if ff, ok := f.(http2streamEnder); ok {
+			streamEnded = ff.StreamEnded()
+		}
+
+		cs := cc.streamByID(streamID, streamEnded)
+		if cs == nil {
+			log.Printf("Received frame for untracked stream ID %d", streamID)
+			continue
+		}
+
+		switch f := f.(type) {
+		case *http2HeadersFrame:
+			cc.nextRes = &Response{
+				Proto:      "HTTP/2.0",
+				ProtoMajor: 2,
+				Header:     make(Header),
+			}
+			cs.pr, cs.pw = io.Pipe()
+			cc.hdec.Write(f.HeaderBlockFragment())
+		case *http2ContinuationFrame:
+			cc.hdec.Write(f.HeaderBlockFragment())
+		case *http2DataFrame:
+			log.Printf("DATA: %q", f.Data())
+			cs.pw.Write(f.Data())
+		case *http2GoAwayFrame:
+			cc.t.removeClientConn(cc)
+			if f.ErrCode != 0 {
+
+				log.Printf("transport got GOAWAY with error code = %v", f.ErrCode)
+			}
+			cc.setGoAway(f)
+		default:
+			log.Printf("Transport: unhandled response frame type %T", f)
+		}
+		headersEnded := false
+		if he, ok := f.(http2headersEnder); ok {
+			headersEnded = he.HeadersEnded()
+			if headersEnded {
+				continueStreamID = 0
+			} else {
+				continueStreamID = streamID
+			}
+		}
+
+		if streamEnded {
+			cs.pw.Close()
+			delete(activeRes, streamID)
+		}
+		if headersEnded {
+			if cs == nil {
+				panic("couldn't find stream")
+			}
+
+			cc.nextRes.Body = cs.pr
+			res := cc.nextRes
+			activeRes[streamID] = cs
+			cs.resc <- http2resAndError{res: res}
+		}
+	}
+}
+
+func (cc *http2clientConn) onNewHeaderField(f hpack.HeaderField) {
+
+	log.Printf("Header field: %+v", f)
+	if f.Name == ":status" {
+		code, err := strconv.Atoi(f.Value)
+		if err != nil {
+			panic("TODO: be graceful")
+		}
+		cc.nextRes.Status = f.Value + " " + StatusText(code)
+		cc.nextRes.StatusCode = code
+		return
+	}
+	if strings.HasPrefix(f.Name, ":") {
+
+		return
+	}
+	cc.nextRes.Header.Add(CanonicalHeaderKey(f.Name), f.Value)
+}
+
+// writeFramer is implemented by any type that is used to write frames.
+type http2writeFramer interface {
+	writeFrame(http2writeContext) error
+}
+
+// writeContext is the interface needed by the various frame writer
+// types below. All the writeFrame methods below are scheduled via the
+// frame writing scheduler (see writeScheduler in writesched.go).
+//
+// This interface is implemented by *serverConn.
+// TODO: use it from the client code too, once it exists.
+type http2writeContext interface {
+	Framer() *http2Framer
+	Flush() error
+	CloseConn() error
+	// HeaderEncoder returns an HPACK encoder that writes to the
+	// returned buffer.
+	HeaderEncoder() (*hpack.Encoder, *bytes.Buffer)
+}
+
+// endsStream reports whether the given frame writer w will locally
+// close the stream.
+func http2endsStream(w http2writeFramer) bool {
+	switch v := w.(type) {
+	case *http2writeData:
+		return v.endStream
+	case *http2writeResHeaders:
+		return v.endStream
+	}
+	return false
+}
+
+type http2flushFrameWriter struct{}
+
+func (http2flushFrameWriter) writeFrame(ctx http2writeContext) error {
+	return ctx.Flush()
+}
+
+type http2writeSettings []http2Setting
+
+func (s http2writeSettings) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WriteSettings([]http2Setting(s)...)
+}
+
+type http2writeGoAway struct {
+	maxStreamID uint32
+	code        http2ErrCode
+}
+
+func (p *http2writeGoAway) writeFrame(ctx http2writeContext) error {
+	err := ctx.Framer().WriteGoAway(p.maxStreamID, p.code, nil)
+	if p.code != 0 {
+		ctx.Flush()
+		time.Sleep(50 * time.Millisecond)
+		ctx.CloseConn()
+	}
+	return err
+}
+
+type http2writeData struct {
+	streamID  uint32
+	p         []byte
+	endStream bool
+}
+
+func (w *http2writeData) String() string {
+	return fmt.Sprintf("writeData(stream=%d, p=%d, endStream=%v)", w.streamID, len(w.p), w.endStream)
+}
+
+func (w *http2writeData) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WriteData(w.streamID, w.endStream, w.p)
+}
+
+func (se http2StreamError) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WriteRSTStream(se.StreamID, se.Code)
+}
+
+type http2writePingAck struct{ pf *http2PingFrame }
+
+func (w http2writePingAck) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WritePing(true, w.pf.Data)
+}
+
+type http2writeSettingsAck struct{}
+
+func (http2writeSettingsAck) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WriteSettingsAck()
+}
+
+// writeResHeaders is a request to write a HEADERS and 0+ CONTINUATION frames
+// for HTTP response headers from a server handler.
+type http2writeResHeaders struct {
+	streamID    uint32
+	httpResCode int
+	h           Header // may be nil
+	endStream   bool
+
+	contentType   string
+	contentLength string
+}
+
+func (w *http2writeResHeaders) writeFrame(ctx http2writeContext) error {
+	enc, buf := ctx.HeaderEncoder()
+	buf.Reset()
+	enc.WriteField(hpack.HeaderField{Name: ":status", Value: http2httpCodeString(w.httpResCode)})
+	for k, vv := range w.h {
+		k = http2lowerHeader(k)
+		for _, v := range vv {
+
+			if k == "transfer-encoding" && v != "trailers" {
+				continue
+			}
+			enc.WriteField(hpack.HeaderField{Name: k, Value: v})
+		}
+	}
+	if w.contentType != "" {
+		enc.WriteField(hpack.HeaderField{Name: "content-type", Value: w.contentType})
+	}
+	if w.contentLength != "" {
+		enc.WriteField(hpack.HeaderField{Name: "content-length", Value: w.contentLength})
+	}
+
+	headerBlock := buf.Bytes()
+	if len(headerBlock) == 0 {
+		panic("unexpected empty hpack")
+	}
+
+	// For now we're lazy and just pick the minimum MAX_FRAME_SIZE
+	// that all peers must support (16KB). Later we could care
+	// more and send larger frames if the peer advertised it, but
+	// there's little point. Most headers are small anyway (so we
+	// generally won't have CONTINUATION frames), and extra frames
+	// only waste 9 bytes anyway.
+	const maxFrameSize = 16384
+
+	first := true
+	for len(headerBlock) > 0 {
+		frag := headerBlock
+		if len(frag) > maxFrameSize {
+			frag = frag[:maxFrameSize]
+		}
+		headerBlock = headerBlock[len(frag):]
+		endHeaders := len(headerBlock) == 0
+		var err error
+		if first {
+			first = false
+			err = ctx.Framer().WriteHeaders(http2HeadersFrameParam{
+				StreamID:      w.streamID,
+				BlockFragment: frag,
+				EndStream:     w.endStream,
+				EndHeaders:    endHeaders,
+			})
+		} else {
+			err = ctx.Framer().WriteContinuation(w.streamID, endHeaders, frag)
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+type http2write100ContinueHeadersFrame struct {
+	streamID uint32
+}
+
+func (w http2write100ContinueHeadersFrame) writeFrame(ctx http2writeContext) error {
+	enc, buf := ctx.HeaderEncoder()
+	buf.Reset()
+	enc.WriteField(hpack.HeaderField{Name: ":status", Value: "100"})
+	return ctx.Framer().WriteHeaders(http2HeadersFrameParam{
+		StreamID:      w.streamID,
+		BlockFragment: buf.Bytes(),
+		EndStream:     false,
+		EndHeaders:    true,
+	})
+}
+
+type http2writeWindowUpdate struct {
+	streamID uint32 // or 0 for conn-level
+	n        uint32
+}
+
+func (wu http2writeWindowUpdate) writeFrame(ctx http2writeContext) error {
+	return ctx.Framer().WriteWindowUpdate(wu.streamID, wu.n)
+}
+
+// frameWriteMsg is a request to write a frame.
+type http2frameWriteMsg struct {
+	// write is the interface value that does the writing, once the
+	// writeScheduler (below) has decided to select this frame
+	// to write. The write functions are all defined in write.go.
+	write http2writeFramer
+
+	stream *http2stream // used for prioritization. nil for non-stream frames.
+
+	// done, if non-nil, must be a buffered channel with space for
+	// 1 message and is sent the return value from write (or an
+	// earlier error) when the frame has been written.
+	done chan error
+}
+
+// for debugging only:
+func (wm http2frameWriteMsg) String() string {
+	var streamID uint32
+	if wm.stream != nil {
+		streamID = wm.stream.id
+	}
+	var des string
+	if s, ok := wm.write.(fmt.Stringer); ok {
+		des = s.String()
+	} else {
+		des = fmt.Sprintf("%T", wm.write)
+	}
+	return fmt.Sprintf("[frameWriteMsg stream=%d, ch=%v, type: %v]", streamID, wm.done != nil, des)
+}
+
+// writeScheduler tracks pending frames to write, priorities, and decides
+// the next one to use. It is not thread-safe.
+type http2writeScheduler struct {
+	// zero are frames not associated with a specific stream.
+	// They're sent before any stream-specific freams.
+	zero http2writeQueue
+
+	// maxFrameSize is the maximum size of a DATA frame
+	// we'll write. Must be non-zero and between 16K-16M.
+	maxFrameSize uint32
+
+	// sq contains the stream-specific queues, keyed by stream ID.
+	// when a stream is idle, it's deleted from the map.
+	sq map[uint32]*http2writeQueue
+
+	// canSend is a slice of memory that's reused between frame
+	// scheduling decisions to hold the list of writeQueues (from sq)
+	// which have enough flow control data to send. After canSend is
+	// built, the best is selected.
+	canSend []*http2writeQueue
+
+	// pool of empty queues for reuse.
+	queuePool []*http2writeQueue
+}
+
+func (ws *http2writeScheduler) putEmptyQueue(q *http2writeQueue) {
+	if len(q.s) != 0 {
+		panic("queue must be empty")
+	}
+	ws.queuePool = append(ws.queuePool, q)
+}
+
+func (ws *http2writeScheduler) getEmptyQueue() *http2writeQueue {
+	ln := len(ws.queuePool)
+	if ln == 0 {
+		return new(http2writeQueue)
+	}
+	q := ws.queuePool[ln-1]
+	ws.queuePool = ws.queuePool[:ln-1]
+	return q
+}
+
+func (ws *http2writeScheduler) empty() bool { return ws.zero.empty() && len(ws.sq) == 0 }
+
+func (ws *http2writeScheduler) add(wm http2frameWriteMsg) {
+	st := wm.stream
+	if st == nil {
+		ws.zero.push(wm)
+	} else {
+		ws.streamQueue(st.id).push(wm)
+	}
+}
+
+func (ws *http2writeScheduler) streamQueue(streamID uint32) *http2writeQueue {
+	if q, ok := ws.sq[streamID]; ok {
+		return q
+	}
+	if ws.sq == nil {
+		ws.sq = make(map[uint32]*http2writeQueue)
+	}
+	q := ws.getEmptyQueue()
+	ws.sq[streamID] = q
+	return q
+}
+
+// take returns the most important frame to write and removes it from the scheduler.
+// It is illegal to call this if the scheduler is empty or if there are no connection-level
+// flow control bytes available.
+func (ws *http2writeScheduler) take() (wm http2frameWriteMsg, ok bool) {
+	if ws.maxFrameSize == 0 {
+		panic("internal error: ws.maxFrameSize not initialized or invalid")
+	}
+
+	if !ws.zero.empty() {
+		return ws.zero.shift(), true
+	}
+	if len(ws.sq) == 0 {
+		return
+	}
+
+	for id, q := range ws.sq {
+		if q.firstIsNoCost() {
+			return ws.takeFrom(id, q)
+		}
+	}
+
+	if len(ws.canSend) != 0 {
+		panic("should be empty")
+	}
+	for _, q := range ws.sq {
+		if n := ws.streamWritableBytes(q); n > 0 {
+			ws.canSend = append(ws.canSend, q)
+		}
+	}
+	if len(ws.canSend) == 0 {
+		return
+	}
+	defer ws.zeroCanSend()
+
+	q := ws.canSend[0]
+
+	return ws.takeFrom(q.streamID(), q)
+}
+
+// zeroCanSend is defered from take.
+func (ws *http2writeScheduler) zeroCanSend() {
+	for i := range ws.canSend {
+		ws.canSend[i] = nil
+	}
+	ws.canSend = ws.canSend[:0]
+}
+
+// streamWritableBytes returns the number of DATA bytes we could write
+// from the given queue's stream, if this stream/queue were
+// selected. It is an error to call this if q's head isn't a
+// *writeData.
+func (ws *http2writeScheduler) streamWritableBytes(q *http2writeQueue) int32 {
+	wm := q.head()
+	ret := wm.stream.flow.available()
+	if ret == 0 {
+		return 0
+	}
+	if int32(ws.maxFrameSize) < ret {
+		ret = int32(ws.maxFrameSize)
+	}
+	if ret == 0 {
+		panic("internal error: ws.maxFrameSize not initialized or invalid")
+	}
+	wd := wm.write.(*http2writeData)
+	if len(wd.p) < int(ret) {
+		ret = int32(len(wd.p))
+	}
+	return ret
+}
+
+func (ws *http2writeScheduler) takeFrom(id uint32, q *http2writeQueue) (wm http2frameWriteMsg, ok bool) {
+	wm = q.head()
+
+	if wd, ok := wm.write.(*http2writeData); ok && len(wd.p) > 0 {
+		allowed := wm.stream.flow.available()
+		if allowed == 0 {
+
+			return http2frameWriteMsg{}, false
+		}
+		if int32(ws.maxFrameSize) < allowed {
+			allowed = int32(ws.maxFrameSize)
+		}
+
+		if len(wd.p) > int(allowed) {
+			wm.stream.flow.take(allowed)
+			chunk := wd.p[:allowed]
+			wd.p = wd.p[allowed:]
+
+			return http2frameWriteMsg{
+				stream: wm.stream,
+				write: &http2writeData{
+					streamID: wd.streamID,
+					p:        chunk,
+
+					endStream: false,
+				},
+
+				done: nil,
+			}, true
+		}
+		wm.stream.flow.take(int32(len(wd.p)))
+	}
+
+	q.shift()
+	if q.empty() {
+		ws.putEmptyQueue(q)
+		delete(ws.sq, id)
+	}
+	return wm, true
+}
+
+func (ws *http2writeScheduler) forgetStream(id uint32) {
+	q, ok := ws.sq[id]
+	if !ok {
+		return
+	}
+	delete(ws.sq, id)
+
+	for i := range q.s {
+		q.s[i] = http2frameWriteMsg{}
+	}
+	q.s = q.s[:0]
+	ws.putEmptyQueue(q)
+}
+
+type http2writeQueue struct {
+	s []http2frameWriteMsg
+}
+
+// streamID returns the stream ID for a non-empty stream-specific queue.
+func (q *http2writeQueue) streamID() uint32 { return q.s[0].stream.id }
+
+func (q *http2writeQueue) empty() bool { return len(q.s) == 0 }
+
+func (q *http2writeQueue) push(wm http2frameWriteMsg) {
+	q.s = append(q.s, wm)
+}
+
+// head returns the next item that would be removed by shift.
+func (q *http2writeQueue) head() http2frameWriteMsg {
+	if len(q.s) == 0 {
+		panic("invalid use of queue")
+	}
+	return q.s[0]
+}
+
+func (q *http2writeQueue) shift() http2frameWriteMsg {
+	if len(q.s) == 0 {
+		panic("invalid use of queue")
+	}
+	wm := q.s[0]
+
+	copy(q.s, q.s[1:])
+	q.s[len(q.s)-1] = http2frameWriteMsg{}
+	q.s = q.s[:len(q.s)-1]
+	return wm
+}
+
+func (q *http2writeQueue) firstIsNoCost() bool {
+	if df, ok := q.s[0].write.(*http2writeData); ok {
+		return len(df.p) == 0
+	}
+	return true
+}
diff --git a/src/net/http/httputil/dump.go b/src/net/http/httputil/dump.go
index ca2d1cd..0a7003d 100644
--- a/src/net/http/httputil/dump.go
+++ b/src/net/http/httputil/dump.go
@@ -55,9 +55,9 @@
 	return len(p), nil
 }
 
-// DumpRequestOut is like DumpRequest but includes
-// headers that the standard http.Transport adds,
-// such as User-Agent.
+// DumpRequestOut is like DumpRequest but for outgoing client requests. It
+// includes any headers that the standard http.Transport adds, such as
+// User-Agent.
 func DumpRequestOut(req *http.Request, body bool) ([]byte, error) {
 	save := req.Body
 	dummyBody := false
@@ -175,8 +175,10 @@
 	return nil
 }
 
-// DumpRequest returns the as-received wire representation of req,
-// optionally including the request body, for debugging.
+// DumpRequest returns the as-received wire representation of req, optionally
+// including the request body, for debugging. It is for use in servers; use
+// DumpRequestOut for client requests.
+//
 // DumpRequest is semantically a no-op, but in order to
 // dump the body, it reads the body data into memory and
 // changes req.Body to refer to the in-memory copy.
diff --git a/src/net/http/httputil/reverseproxy.go b/src/net/http/httputil/reverseproxy.go
index c8e1132..95a99dd 100644
--- a/src/net/http/httputil/reverseproxy.go
+++ b/src/net/http/httputil/reverseproxy.go
@@ -60,10 +60,13 @@
 	return a + b
 }
 
-// NewSingleHostReverseProxy returns a new ReverseProxy that rewrites
+// NewSingleHostReverseProxy returns a new ReverseProxy that routes
 // URLs to the scheme, host, and base path provided in target. If the
 // target's path is "/base" and the incoming request was for "/dir",
 // the target request will be for /base/dir.
+// NewSingleHostReverseProxy does not rewrite the Host header.
+// To rewrite Host headers, use ReverseProxy directly with a custom
+// Director policy.
 func NewSingleHostReverseProxy(target *url.URL) *ReverseProxy {
 	targetQuery := target.RawQuery
 	director := func(req *http.Request) {
diff --git a/src/net/http/httputil/reverseproxy_test.go b/src/net/http/httputil/reverseproxy_test.go
index 80a26ab..1d30961 100644
--- a/src/net/http/httputil/reverseproxy_test.go
+++ b/src/net/http/httputil/reverseproxy_test.go
@@ -14,7 +14,6 @@
 	"net/http/httptest"
 	"net/url"
 	"reflect"
-	"runtime"
 	"strings"
 	"testing"
 	"time"
@@ -225,10 +224,7 @@
 	}
 }
 
-func TestReverseProxyCancellation(t *testing.T) {
-	if runtime.GOOS == "plan9" {
-		t.Skip("skipping test; see https://golang.org/issue/9554")
-	}
+func TestReverseProxyCancelation(t *testing.T) {
 	const backendResponse = "I am the backend"
 
 	reqInFlight := make(chan struct{})
diff --git a/src/net/http/request.go b/src/net/http/request.go
index 31fe45a..8467dec 100644
--- a/src/net/http/request.go
+++ b/src/net/http/request.go
@@ -354,7 +354,7 @@
 // hasn't been set to "identity", Write adds "Transfer-Encoding:
 // chunked" to the header. Body is closed after it is sent.
 func (r *Request) Write(w io.Writer) error {
-	return r.write(w, false, nil)
+	return r.write(w, false, nil, nil)
 }
 
 // WriteProxy is like Write but writes the request in the form
@@ -364,11 +364,12 @@
 // In either case, WriteProxy also writes a Host header, using
 // either r.Host or r.URL.Host.
 func (r *Request) WriteProxy(w io.Writer) error {
-	return r.write(w, true, nil)
+	return r.write(w, true, nil, nil)
 }
 
 // extraHeaders may be nil
-func (req *Request) write(w io.Writer, usingProxy bool, extraHeaders Header) error {
+// waitForContinue may be nil
+func (req *Request) write(w io.Writer, usingProxy bool, extraHeaders Header, waitForContinue func() bool) error {
 	// Find the target host. Prefer the Host: header, but if that
 	// is not given, use the host from the request URL.
 	//
@@ -458,6 +459,21 @@
 		return err
 	}
 
+	// Flush and wait for 100-continue if expected.
+	if waitForContinue != nil {
+		if bw, ok := w.(*bufio.Writer); ok {
+			err = bw.Flush()
+			if err != nil {
+				return err
+			}
+		}
+
+		if !waitForContinue() {
+			req.closeBody()
+			return nil
+		}
+	}
+
 	// Write body and trailer
 	err = tw.WriteBody(w)
 	if err != nil {
diff --git a/src/net/http/response_test.go b/src/net/http/response_test.go
index 421cf55f..d1c5a61 100644
--- a/src/net/http/response_test.go
+++ b/src/net/http/response_test.go
@@ -456,6 +456,55 @@
 
 		"",
 	},
+
+	// Issue 12785: HTTP/1.0 response with bogus (to be ignored) Transfer-Encoding.
+	// Without a Content-Length.
+	{
+		"HTTP/1.0 200 OK\r\n" +
+			"Transfer-Encoding: bogus\r\n" +
+			"\r\n" +
+			"Body here\n",
+
+		Response{
+			Status:        "200 OK",
+			StatusCode:    200,
+			Proto:         "HTTP/1.0",
+			ProtoMajor:    1,
+			ProtoMinor:    0,
+			Request:       dummyReq("GET"),
+			Header:        Header{},
+			Close:         true,
+			ContentLength: -1,
+		},
+
+		"Body here\n",
+	},
+
+	// Issue 12785: HTTP/1.0 response with bogus (to be ignored) Transfer-Encoding.
+	// With a Content-Length.
+	{
+		"HTTP/1.0 200 OK\r\n" +
+			"Transfer-Encoding: bogus\r\n" +
+			"Content-Length: 10\r\n" +
+			"\r\n" +
+			"Body here\n",
+
+		Response{
+			Status:     "200 OK",
+			StatusCode: 200,
+			Proto:      "HTTP/1.0",
+			ProtoMajor: 1,
+			ProtoMinor: 0,
+			Request:    dummyReq("GET"),
+			Header: Header{
+				"Content-Length": {"10"},
+			},
+			Close:         true,
+			ContentLength: 10,
+		},
+
+		"Body here\n",
+	},
 }
 
 func TestReadResponse(t *testing.T) {
diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go
index 7840742..9def81a 100644
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@@ -26,6 +26,7 @@
 	"os/exec"
 	"reflect"
 	"runtime"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -755,6 +756,107 @@
 	}
 }
 
+type blockingRemoteAddrListener struct {
+	net.Listener
+	conns chan<- net.Conn
+}
+
+func (l *blockingRemoteAddrListener) Accept() (net.Conn, error) {
+	c, err := l.Listener.Accept()
+	if err != nil {
+		return nil, err
+	}
+	brac := &blockingRemoteAddrConn{
+		Conn:  c,
+		addrs: make(chan net.Addr, 1),
+	}
+	l.conns <- brac
+	return brac, nil
+}
+
+type blockingRemoteAddrConn struct {
+	net.Conn
+	addrs chan net.Addr
+}
+
+func (c *blockingRemoteAddrConn) RemoteAddr() net.Addr {
+	return <-c.addrs
+}
+
+// Issue 12943
+func TestServerAllowsBlockingRemoteAddr(t *testing.T) {
+	defer afterTest(t)
+	ts := httptest.NewUnstartedServer(HandlerFunc(func(w ResponseWriter, r *Request) {
+		fmt.Fprintf(w, "RA:%s", r.RemoteAddr)
+	}))
+	conns := make(chan net.Conn)
+	ts.Listener = &blockingRemoteAddrListener{
+		Listener: ts.Listener,
+		conns:    conns,
+	}
+	ts.Start()
+	defer ts.Close()
+
+	tr := &Transport{DisableKeepAlives: true}
+	defer tr.CloseIdleConnections()
+	c := &Client{Transport: tr, Timeout: time.Second}
+
+	fetch := func(response chan string) {
+		resp, err := c.Get(ts.URL)
+		if err != nil {
+			t.Error(err)
+			response <- ""
+			return
+		}
+		defer resp.Body.Close()
+		body, err := ioutil.ReadAll(resp.Body)
+		if err != nil {
+			t.Error(err)
+			response <- ""
+			return
+		}
+		response <- string(body)
+	}
+
+	// Start a request. The server will block on getting conn.RemoteAddr.
+	response1c := make(chan string, 1)
+	go fetch(response1c)
+
+	// Wait for the server to accept it; grab the connection.
+	conn1 := <-conns
+
+	// Start another request and grab its connection
+	response2c := make(chan string, 1)
+	go fetch(response2c)
+	var conn2 net.Conn
+
+	select {
+	case conn2 = <-conns:
+	case <-time.After(time.Second):
+		t.Fatal("Second Accept didn't happen")
+	}
+
+	// Send a response on connection 2.
+	conn2.(*blockingRemoteAddrConn).addrs <- &net.TCPAddr{
+		IP: net.ParseIP("12.12.12.12"), Port: 12}
+
+	// ... and see it
+	response2 := <-response2c
+	if g, e := response2, "RA:12.12.12.12:12"; g != e {
+		t.Fatalf("response 2 addr = %q; want %q", g, e)
+	}
+
+	// Finish the first response.
+	conn1.(*blockingRemoteAddrConn).addrs <- &net.TCPAddr{
+		IP: net.ParseIP("21.21.21.21"), Port: 21}
+
+	// ... and see it
+	response1 := <-response1c
+	if g, e := response1, "RA:21.21.21.21:21"; g != e {
+		t.Fatalf("response 1 addr = %q; want %q", g, e)
+	}
+}
+
 func TestChunkedResponseHeaders(t *testing.T) {
 	defer afterTest(t)
 	log.SetOutput(ioutil.Discard) // is noisy otherwise
@@ -967,6 +1069,22 @@
 	})
 }
 
+func TestAutomaticHTTP2(t *testing.T) {
+	ln, err := net.Listen("tcp", ":0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	ln.Close() // immediately (not a defer!)
+	var s Server
+	if err := s.Serve(ln); err == nil {
+		t.Fatal("expected an error")
+	}
+	on := s.TLSNextProto["h2"] != nil
+	if !on {
+		t.Errorf("http2 wasn't automatically enabled")
+	}
+}
+
 type serverExpectTest struct {
 	contentLength    int // of request body
 	chunked          bool
@@ -2863,6 +2981,7 @@
 		if _, err := io.WriteString(c, "BOGUS REQUEST\r\n\r\n"); err != nil {
 			t.Fatal(err)
 		}
+		c.Read(make([]byte, 1)) // block until server hangs up on us
 		c.Close()
 	}
 
@@ -2896,9 +3015,14 @@
 	}
 	logString := func(m map[int][]ConnState) string {
 		var b bytes.Buffer
-		for id, l := range m {
+		var keys []int
+		for id := range m {
+			keys = append(keys, id)
+		}
+		sort.Ints(keys)
+		for _, id := range keys {
 			fmt.Fprintf(&b, "Conn %d: ", id)
-			for _, s := range l {
+			for _, s := range m[id] {
 				fmt.Fprintf(&b, "%s ", s)
 			}
 			b.WriteString("\n")
diff --git a/src/net/http/server.go b/src/net/http/server.go
index f525815..dc4f100 100644
--- a/src/net/http/server.go
+++ b/src/net/http/server.go
@@ -470,7 +470,6 @@
 // Create new connection from rwc.
 func (srv *Server) newConn(rwc net.Conn) (c *conn, err error) {
 	c = new(conn)
-	c.remoteAddr = rwc.RemoteAddr().String()
 	c.server = srv
 	c.rwc = rwc
 	c.w = rwc
@@ -1290,6 +1289,7 @@
 
 // Serve a new connection.
 func (c *conn) serve() {
+	c.remoteAddr = c.rwc.RemoteAddr().String()
 	origConn := c.rwc // copy it before it's set nil on Close or Hijack
 	defer func() {
 		if err := recover(); err != nil {
@@ -1339,7 +1339,7 @@
 				// responding to them and hanging up
 				// while they're still writing their
 				// request.  Undefined behavior.
-				io.WriteString(c.rwc, "HTTP/1.1 413 Request Entity Too Large\r\n\r\n")
+				io.WriteString(c.rwc, "HTTP/1.1 413 Request Entity Too Large\r\nContent-Type: text/plain\r\nConnection: close\r\n\r\n413 Request Entity Too Large")
 				c.closeWriteAndWait()
 				break
 			} else if err == io.EOF {
@@ -1347,7 +1347,7 @@
 			} else if neterr, ok := err.(net.Error); ok && neterr.Timeout() {
 				break // Don't reply
 			}
-			io.WriteString(c.rwc, "HTTP/1.1 400 Bad Request\r\n\r\n")
+			io.WriteString(c.rwc, "HTTP/1.1 400 Bad Request\r\nContent-Type: text/plain\r\nConnection: close\r\n\r\n400 Bad Request")
 			break
 		}
 
@@ -1806,7 +1806,8 @@
 	// standard logger.
 	ErrorLog *log.Logger
 
-	disableKeepAlives int32 // accessed atomically.
+	disableKeepAlives int32     // accessed atomically.
+	nextProtoOnce     sync.Once // guards initialization of TLSNextProto in Serve
 }
 
 // A ConnState represents the state of a client connection to a server.
@@ -1896,6 +1897,7 @@
 func (srv *Server) Serve(l net.Listener) error {
 	defer l.Close()
 	var tempDelay time.Duration // how long to sleep on accept failure
+	srv.nextProtoOnce.Do(srv.setNextProtoDefaults)
 	for {
 		rw, e := l.Accept()
 		if e != nil {
@@ -2052,6 +2054,14 @@
 	return srv.Serve(tlsListener)
 }
 
+func (srv *Server) setNextProtoDefaults() {
+	// Enable HTTP/2 by default if the user hasn't otherwise
+	// configured their TLSNextProto map.
+	if srv.TLSNextProto == nil {
+		http2ConfigureServer(srv, nil)
+	}
+}
+
 // TimeoutHandler returns a Handler that runs h with the given time limit.
 //
 // The new Handler calls h.ServeHTTP to handle each request, but if a
diff --git a/src/net/http/status.go b/src/net/http/status.go
index d253bd5..b071c5d 100644
--- a/src/net/http/status.go
+++ b/src/net/http/status.go
@@ -44,20 +44,17 @@
 	StatusRequestedRangeNotSatisfiable = 416
 	StatusExpectationFailed            = 417
 	StatusTeapot                       = 418
+	StatusPreconditionRequired         = 428
+	StatusTooManyRequests              = 429
+	StatusRequestHeaderFieldsTooLarge  = 431
 
-	StatusInternalServerError     = 500
-	StatusNotImplemented          = 501
-	StatusBadGateway              = 502
-	StatusServiceUnavailable      = 503
-	StatusGatewayTimeout          = 504
-	StatusHTTPVersionNotSupported = 505
-
-	// New HTTP status codes from RFC 6585. Not exported yet in Go 1.1.
-	// See discussion at https://codereview.appspot.com/7678043/
-	statusPreconditionRequired          = 428
-	statusTooManyRequests               = 429
-	statusRequestHeaderFieldsTooLarge   = 431
-	statusNetworkAuthenticationRequired = 511
+	StatusInternalServerError           = 500
+	StatusNotImplemented                = 501
+	StatusBadGateway                    = 502
+	StatusServiceUnavailable            = 503
+	StatusGatewayTimeout                = 504
+	StatusHTTPVersionNotSupported       = 505
+	StatusNetworkAuthenticationRequired = 511
 )
 
 var statusText = map[int]string{
@@ -99,18 +96,17 @@
 	StatusRequestedRangeNotSatisfiable: "Requested Range Not Satisfiable",
 	StatusExpectationFailed:            "Expectation Failed",
 	StatusTeapot:                       "I'm a teapot",
+	StatusPreconditionRequired:         "Precondition Required",
+	StatusTooManyRequests:              "Too Many Requests",
+	StatusRequestHeaderFieldsTooLarge:  "Request Header Fields Too Large",
 
-	StatusInternalServerError:     "Internal Server Error",
-	StatusNotImplemented:          "Not Implemented",
-	StatusBadGateway:              "Bad Gateway",
-	StatusServiceUnavailable:      "Service Unavailable",
-	StatusGatewayTimeout:          "Gateway Timeout",
-	StatusHTTPVersionNotSupported: "HTTP Version Not Supported",
-
-	statusPreconditionRequired:          "Precondition Required",
-	statusTooManyRequests:               "Too Many Requests",
-	statusRequestHeaderFieldsTooLarge:   "Request Header Fields Too Large",
-	statusNetworkAuthenticationRequired: "Network Authentication Required",
+	StatusInternalServerError:           "Internal Server Error",
+	StatusNotImplemented:                "Not Implemented",
+	StatusBadGateway:                    "Bad Gateway",
+	StatusServiceUnavailable:            "Service Unavailable",
+	StatusGatewayTimeout:                "Gateway Timeout",
+	StatusHTTPVersionNotSupported:       "HTTP Version Not Supported",
+	StatusNetworkAuthenticationRequired: "Network Authentication Required",
 }
 
 // StatusText returns a text for the HTTP status code. It returns the empty
diff --git a/src/net/http/transfer.go b/src/net/http/transfer.go
index a8736b2..38b6f67 100644
--- a/src/net/http/transfer.go
+++ b/src/net/http/transfer.go
@@ -271,6 +271,10 @@
 	Trailer          Header
 }
 
+func (t *transferReader) protoAtLeast(m, n int) bool {
+	return t.ProtoMajor > m || (t.ProtoMajor == m && t.ProtoMinor >= n)
+}
+
 // bodyAllowedForStatus reports whether a given response status code
 // permits a body.  See RFC2616, section 4.4.
 func bodyAllowedForStatus(status int) bool {
@@ -337,7 +341,7 @@
 	}
 
 	// Transfer encoding, content length
-	t.TransferEncoding, err = fixTransferEncoding(isResponse, t.RequestMethod, t.Header)
+	err = t.fixTransferEncoding()
 	if err != nil {
 		return err
 	}
@@ -424,13 +428,18 @@
 // Checks whether the encoding is explicitly "identity".
 func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
 
-// Sanitize transfer encoding
-func fixTransferEncoding(isResponse bool, requestMethod string, header Header) ([]string, error) {
-	raw, present := header["Transfer-Encoding"]
+// fixTransferEncoding sanitizes t.TransferEncoding, if needed.
+func (t *transferReader) fixTransferEncoding() error {
+	raw, present := t.Header["Transfer-Encoding"]
 	if !present {
-		return nil, nil
+		return nil
 	}
-	delete(header, "Transfer-Encoding")
+	delete(t.Header, "Transfer-Encoding")
+
+	// Issue 12785; ignore Transfer-Encoding on HTTP/1.0 requests.
+	if !t.protoAtLeast(1, 1) {
+		return nil
+	}
 
 	encodings := strings.Split(raw[0], ",")
 	te := make([]string, 0, len(encodings))
@@ -445,13 +454,13 @@
 			break
 		}
 		if encoding != "chunked" {
-			return nil, &badStringError{"unsupported transfer encoding", encoding}
+			return &badStringError{"unsupported transfer encoding", encoding}
 		}
 		te = te[0 : len(te)+1]
 		te[len(te)-1] = encoding
 	}
 	if len(te) > 1 {
-		return nil, &badStringError{"too many transfer encodings", strings.Join(te, ",")}
+		return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
 	}
 	if len(te) > 0 {
 		// RFC 7230 3.3.2 says "A sender MUST NOT send a
@@ -470,11 +479,12 @@
 		// such a message downstream."
 		//
 		// Reportedly, these appear in the wild.
-		delete(header, "Content-Length")
-		return te, nil
+		delete(t.Header, "Content-Length")
+		t.TransferEncoding = te
+		return nil
 	}
 
-	return nil, nil
+	return nil
 }
 
 // Determine the expected body length, using RFC 2616 Section 4.4. This
diff --git a/src/net/http/transport.go b/src/net/http/transport.go
index 70d1864..3159923 100644
--- a/src/net/http/transport.go
+++ b/src/net/http/transport.go
@@ -36,7 +36,8 @@
 		Timeout:   30 * time.Second,
 		KeepAlive: 30 * time.Second,
 	}).Dial,
-	TLSHandshakeTimeout: 10 * time.Second,
+	TLSHandshakeTimeout:   10 * time.Second,
+	ExpectContinueTimeout: 1 * time.Second,
 }
 
 // DefaultMaxIdleConnsPerHost is the default value of Transport's
@@ -113,6 +114,13 @@
 	// time does not include the time to read the response body.
 	ResponseHeaderTimeout time.Duration
 
+	// ExpectContinueTimeout, if non-zero, specifies the amount of
+	// time to wait for a server's first response headers after fully
+	// writing the request headers if the request has an
+	// "Expect: 100-continue" header. Zero means no timeout.
+	// This time does not include the time to send the request header.
+	ExpectContinueTimeout time.Duration
+
 	// TODO: tunable on global max cached connections
 	// TODO: tunable on timeout on cached connections
 }
@@ -894,13 +902,17 @@
 		var resp *Response
 		if err == nil {
 			resp, err = ReadResponse(pc.br, rc.req)
-			if err == nil && resp.StatusCode == 100 {
-				// Skip any 100-continue for now.
-				// TODO(bradfitz): if rc.req had "Expect: 100-continue",
-				// actually block the request body write and signal the
-				// writeLoop now to begin sending it. (Issue 2184) For now we
-				// eat it, since we're never expecting one.
-				resp, err = ReadResponse(pc.br, rc.req)
+			if err == nil {
+				if rc.continueCh != nil {
+					if resp.StatusCode == 100 {
+						rc.continueCh <- struct{}{}
+					} else {
+						close(rc.continueCh)
+					}
+				}
+				if resp.StatusCode == 100 {
+					resp, err = ReadResponse(pc.br, rc.req)
+				}
 			}
 		}
 
@@ -1004,6 +1016,28 @@
 	pc.close()
 }
 
+// waitForContinue returns the function to block until
+// any response, timeout or connection close. After any of them,
+// the function returns a bool which indicates if the body should be sent.
+func (pc *persistConn) waitForContinue(continueCh <-chan struct{}) func() bool {
+	if continueCh == nil {
+		return nil
+	}
+	return func() bool {
+		timer := time.NewTimer(pc.t.ExpectContinueTimeout)
+		defer timer.Stop()
+
+		select {
+		case _, ok := <-continueCh:
+			return ok
+		case <-timer.C:
+			return true
+		case <-pc.closech:
+			return false
+		}
+	}
+}
+
 func (pc *persistConn) writeLoop() {
 	for {
 		select {
@@ -1012,7 +1046,7 @@
 				wr.ch <- errors.New("http: can't write HTTP request on broken connection")
 				continue
 			}
-			err := wr.req.Request.write(pc.bw, pc.isProxy, wr.req.extra)
+			err := wr.req.Request.write(pc.bw, pc.isProxy, wr.req.extra, pc.waitForContinue(wr.continueCh))
 			if err == nil {
 				err = pc.bw.Flush()
 			}
@@ -1069,6 +1103,12 @@
 	// Accept-Encoding gzip header? only if it we set it do
 	// we transparently decode the gzip.
 	addedGzip bool
+
+	// Optional blocking chan for Expect: 100-continue (for send).
+	// If the request has an "Expect: 100-continue" header and
+	// the server responds 100 Continue, readLoop send a value
+	// to writeLoop via this chan.
+	continueCh chan<- struct{}
 }
 
 // A writeRequest is sent by the readLoop's goroutine to the
@@ -1078,6 +1118,11 @@
 type writeRequest struct {
 	req *transportRequest
 	ch  chan<- error
+
+	// Optional blocking chan for Expect: 100-continue (for recieve).
+	// If not nil, writeLoop blocks sending request body until
+	// it receives from this chan.
+	continueCh <-chan struct{}
 }
 
 type httpError struct {
@@ -1143,6 +1188,11 @@
 		req.extraHeaders().Set("Accept-Encoding", "gzip")
 	}
 
+	var continueCh chan struct{}
+	if req.ProtoAtLeast(1, 1) && req.Body != nil && req.expectsContinue() {
+		continueCh = make(chan struct{}, 1)
+	}
+
 	if pc.t.DisableKeepAlives {
 		req.extraHeaders().Set("Connection", "close")
 	}
@@ -1151,10 +1201,10 @@
 	// in case the server decides to reply before reading our full
 	// request body.
 	writeErrCh := make(chan error, 1)
-	pc.writech <- writeRequest{req, writeErrCh}
+	pc.writech <- writeRequest{req, writeErrCh, continueCh}
 
 	resc := make(chan responseAndError, 1)
-	pc.reqch <- requestAndChan{req.Request, resc, requestedGzip}
+	pc.reqch <- requestAndChan{req.Request, resc, requestedGzip, continueCh}
 
 	var re responseAndError
 	var respHeaderTimer <-chan time.Time
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index cc19342..f721fd5 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -790,6 +790,94 @@
 	}
 }
 
+// If a request has Expect:100-continue header, the request blocks sending body until the first response.
+// Premature consumption of the request body should not be occurred.
+func TestTransportExpect100Continue(t *testing.T) {
+	defer afterTest(t)
+
+	ts := httptest.NewServer(HandlerFunc(func(rw ResponseWriter, req *Request) {
+		switch req.URL.Path {
+		case "/100":
+			// This endpoint implicitly responds 100 Continue and reads body.
+			if _, err := io.Copy(ioutil.Discard, req.Body); err != nil {
+				t.Error("Failed to read Body", err)
+			}
+			rw.WriteHeader(StatusOK)
+		case "/200":
+			// Go 1.5 adds Connection: close header if the client expect
+			// continue but not entire request body is consumed.
+			rw.WriteHeader(StatusOK)
+		case "/500":
+			rw.WriteHeader(StatusInternalServerError)
+		case "/keepalive":
+			// This hijacked endpoint responds error without Connection:close.
+			_, bufrw, err := rw.(Hijacker).Hijack()
+			if err != nil {
+				log.Fatal(err)
+			}
+			bufrw.WriteString("HTTP/1.1 500 Internal Server Error\r\n")
+			bufrw.WriteString("Content-Length: 0\r\n\r\n")
+			bufrw.Flush()
+		case "/timeout":
+			// This endpoint tries to read body without 100 (Continue) response.
+			// After ExpectContinueTimeout, the reading will be started.
+			conn, bufrw, err := rw.(Hijacker).Hijack()
+			if err != nil {
+				log.Fatal(err)
+			}
+			if _, err := io.CopyN(ioutil.Discard, bufrw, req.ContentLength); err != nil {
+				t.Error("Failed to read Body", err)
+			}
+			bufrw.WriteString("HTTP/1.1 200 OK\r\n\r\n")
+			bufrw.Flush()
+			conn.Close()
+		}
+
+	}))
+	defer ts.Close()
+
+	tests := []struct {
+		path   string
+		body   []byte
+		sent   int
+		status int
+	}{
+		{path: "/100", body: []byte("hello"), sent: 5, status: 200},       // Got 100 followed by 200, entire body is sent.
+		{path: "/200", body: []byte("hello"), sent: 0, status: 200},       // Got 200 without 100. body isn't sent.
+		{path: "/500", body: []byte("hello"), sent: 0, status: 500},       // Got 500 without 100. body isn't sent.
+		{path: "/keepalive", body: []byte("hello"), sent: 0, status: 500}, // Althogh without Connection:close, body isn't sent.
+		{path: "/timeout", body: []byte("hello"), sent: 5, status: 200},   // Timeout exceeded and entire body is sent.
+	}
+
+	for i, v := range tests {
+		tr := &Transport{ExpectContinueTimeout: 2 * time.Second}
+		defer tr.CloseIdleConnections()
+		c := &Client{Transport: tr}
+
+		body := bytes.NewReader(v.body)
+		req, err := NewRequest("PUT", ts.URL+v.path, body)
+		if err != nil {
+			t.Fatal(err)
+		}
+		req.Header.Set("Expect", "100-continue")
+		req.ContentLength = int64(len(v.body))
+
+		resp, err := c.Do(req)
+		if err != nil {
+			t.Fatal(err)
+		}
+		resp.Body.Close()
+
+		sent := len(v.body) - body.Len()
+		if v.status != resp.StatusCode {
+			t.Errorf("test %d: status code should be %d but got %d. (%s)", i, v.status, resp.StatusCode, v.path)
+		}
+		if v.sent != sent {
+			t.Errorf("test %d: sent body should be %d but sent %d. (%s)", i, v.sent, sent, v.path)
+		}
+	}
+}
+
 func TestTransportProxy(t *testing.T) {
 	defer afterTest(t)
 	ch := make(chan string, 1)
@@ -874,9 +962,6 @@
 
 // tests that persistent goroutine connections shut down when no longer desired.
 func TestTransportPersistConnLeak(t *testing.T) {
-	if runtime.GOOS == "plan9" {
-		t.Skip("skipping test; see https://golang.org/issue/7237")
-	}
 	defer afterTest(t)
 	gotReqCh := make(chan bool)
 	unblockCh := make(chan bool)
@@ -943,9 +1028,6 @@
 // golang.org/issue/4531: Transport leaks goroutines when
 // request.ContentLength is explicitly short
 func TestTransportPersistConnLeakShortBody(t *testing.T) {
-	if runtime.GOOS == "plan9" {
-		t.Skip("skipping test; see https://golang.org/issue/7237")
-	}
 	defer afterTest(t)
 	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
 	}))
@@ -2291,15 +2373,28 @@
 
 func (e errorReader) Read(p []byte) (int, error) { return 0, e.err }
 
+type plan9SleepReader struct{}
+
+func (plan9SleepReader) Read(p []byte) (int, error) {
+	if runtime.GOOS == "plan9" {
+		// After the fix to unblock TCP Reads in
+		// https://golang.org/cl/15941, this sleep is required
+		// on plan9 to make sure TCP Writes before an
+		// immediate TCP close go out on the wire.  On Plan 9,
+		// it seems that a hangup of a TCP connection with
+		// queued data doesn't send the queued data first.
+		// https://golang.org/issue/9554
+		time.Sleep(50 * time.Millisecond)
+	}
+	return 0, io.EOF
+}
+
 type closerFunc func() error
 
 func (f closerFunc) Close() error { return f() }
 
 // Issue 6981
 func TestTransportClosesBodyOnError(t *testing.T) {
-	if runtime.GOOS == "plan9" {
-		t.Skip("skipping test; see https://golang.org/issue/7782")
-	}
 	defer afterTest(t)
 	readBody := make(chan error, 1)
 	ts := httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
@@ -2313,7 +2408,7 @@
 		io.Reader
 		io.Closer
 	}{
-		io.MultiReader(io.LimitReader(neverEnding('x'), 1<<20), errorReader{fakeErr}),
+		io.MultiReader(io.LimitReader(neverEnding('x'), 1<<20), plan9SleepReader{}, errorReader{fakeErr}),
 		closerFunc(func() error {
 			select {
 			case didClose <- true:
diff --git a/src/net/lookup_windows_test.go b/src/net/lookup_windows_test.go
index 3f64d8c..8368ad4 100644
--- a/src/net/lookup_windows_test.go
+++ b/src/net/lookup_windows_test.go
@@ -12,7 +12,6 @@
 	"reflect"
 	"regexp"
 	"sort"
-	"strconv"
 	"strings"
 	"testing"
 )
@@ -184,14 +183,14 @@
 	// golang.org      mail exchanger = 2 alt1.aspmx.l.google.com.
 	rx := regexp.MustCompile(`(?m)^([a-z0-9.\-]+)\s+mail exchanger\s*=\s*([0-9]+)\s*([a-z0-9.\-]+)$`)
 	for _, ans := range rx.FindAllStringSubmatch(r, -1) {
-		pref, _ := strconv.Atoi(ans[2])
+		pref, _, _ := dtoi(ans[2], 0)
 		mx = append(mx, &MX{fqdn(ans[3]), uint16(pref)})
 	}
 	// windows nslookup syntax
 	// gmail.com       MX preference = 30, mail exchanger = alt3.gmail-smtp-in.l.google.com
 	rx = regexp.MustCompile(`(?m)^([a-z0-9.\-]+)\s+MX preference\s*=\s*([0-9]+)\s*,\s*mail exchanger\s*=\s*([a-z0-9.\-]+)$`)
 	for _, ans := range rx.FindAllStringSubmatch(r, -1) {
-		pref, _ := strconv.Atoi(ans[2])
+		pref, _, _ := dtoi(ans[2], 0)
 		mx = append(mx, &MX{fqdn(ans[3]), uint16(pref)})
 	}
 	return
diff --git a/src/net/net.go b/src/net/net.go
index 4f1bf9d..89212e6 100644
--- a/src/net/net.go
+++ b/src/net/net.go
@@ -345,7 +345,7 @@
 // Multiple goroutines may invoke methods on a Listener simultaneously.
 type Listener interface {
 	// Accept waits for and returns the next connection to the listener.
-	Accept() (c Conn, err error)
+	Accept() (Conn, error)
 
 	// Close closes the listener.
 	// Any blocked Accept operations will be unblocked and return errors.
diff --git a/src/net/net_test.go b/src/net/net_test.go
index 3907ce4..b91a9e3 100644
--- a/src/net/net_test.go
+++ b/src/net/net_test.go
@@ -208,7 +208,6 @@
 		case "unix", "unixpacket":
 			defer os.Remove(ln.Addr().String())
 		}
-		defer ln.Close()
 
 		if err := ln.Close(); err != nil {
 			if perr := parseCloseError(err); perr != nil {
@@ -221,6 +220,14 @@
 			c.Close()
 			t.Fatal("should fail")
 		}
+
+		if network == "tcp" {
+			cc, err := Dial("tcp", ln.Addr().String())
+			if err == nil {
+				t.Error("Dial to closed TCP listener succeeeded.")
+				cc.Close()
+			}
+		}
 	}
 }
 
diff --git a/src/net/parse.go b/src/net/parse.go
index c72e1c2..2c686f5 100644
--- a/src/net/parse.go
+++ b/src/net/parse.go
@@ -10,6 +10,7 @@
 import (
 	"io"
 	"os"
+	_ "unsafe" // For go:linkname
 )
 
 type file struct {
@@ -70,14 +71,11 @@
 	return &file{fd, make([]byte, 0, os.Getpagesize()), false}, nil
 }
 
-func byteIndex(s string, c byte) int {
-	for i := 0; i < len(s); i++ {
-		if s[i] == c {
-			return i
-		}
-	}
-	return -1
-}
+// byteIndex is strings.IndexByte. It returns the index of the
+// first instance of c in s, or -1 if c is not present in s.
+// strings.IndexByte is implemented in  runtime/asm_$GOARCH.s
+//go:linkname byteIndex strings.IndexByte
+func byteIndex(s string, c byte) int
 
 // Count occurrences in s of any bytes in t.
 func countAnyByte(s string, t string) int {
@@ -314,14 +312,9 @@
 
 // bytesIndexByte is bytes.IndexByte. It returns the index of the
 // first instance of c in s, or -1 if c is not present in s.
-func bytesIndexByte(s []byte, c byte) int {
-	for i, b := range s {
-		if b == c {
-			return i
-		}
-	}
-	return -1
-}
+// bytes.IndexByte is implemented in  runtime/asm_$GOARCH.s
+//go:linkname bytesIndexByte bytes.IndexByte
+func bytesIndexByte(s []byte, c byte) int
 
 // stringsHasSuffix is strings.HasSuffix. It reports whether s ends in
 // suffix.
diff --git a/src/net/platform_test.go b/src/net/platform_test.go
index c9415d1..76c5313 100644
--- a/src/net/platform_test.go
+++ b/src/net/platform_test.go
@@ -32,7 +32,7 @@
 		}
 	case "unix", "unixgram":
 		switch runtime.GOOS {
-		case "nacl", "plan9", "windows":
+		case "android", "nacl", "plan9", "windows":
 			return false
 		}
 		// iOS does not support unix, unixgram.
diff --git a/src/net/tcpsockopt_plan9.go b/src/net/tcpsockopt_plan9.go
index 9abe186..157282a 100644
--- a/src/net/tcpsockopt_plan9.go
+++ b/src/net/tcpsockopt_plan9.go
@@ -7,13 +7,12 @@
 package net
 
 import (
-	"strconv"
 	"time"
 )
 
 // Set keep alive period.
 func setKeepAlivePeriod(fd *netFD, d time.Duration) error {
-	cmd := "keepalive " + strconv.Itoa(int(d/time.Millisecond))
+	cmd := "keepalive " + itoa(int(d/time.Millisecond))
 	_, e := fd.ctl.WriteAt([]byte(cmd), 0)
 	return e
 }
diff --git a/src/net/testdata/hosts b/src/net/testdata/hosts
index b601763..4b8abb4 100644
--- a/src/net/testdata/hosts
+++ b/src/net/testdata/hosts
@@ -1,6 +1,8 @@
 255.255.255.255	broadcasthost
 127.0.0.2	odin
 127.0.0.3	odin  # inline comment 
+# case insensitivity
+127.0.0.4	Bor
 ::2             odin
 127.1.1.1	thor
 # aliases
diff --git a/src/net/textproto/reader.go b/src/net/textproto/reader.go
index 91303fe..17edfad 100644
--- a/src/net/textproto/reader.go
+++ b/src/net/textproto/reader.go
@@ -150,7 +150,7 @@
 			break
 		}
 		r.buf = append(r.buf, ' ')
-		r.buf = append(r.buf, line...)
+		r.buf = append(r.buf, trim(line)...)
 	}
 	return r.buf, nil
 }
diff --git a/src/net/textproto/reader_test.go b/src/net/textproto/reader_test.go
index 8fce7dd..db7d8ab 100644
--- a/src/net/textproto/reader_test.go
+++ b/src/net/textproto/reader_test.go
@@ -205,6 +205,32 @@
 	}
 }
 
+// Test that continued lines are properly trimmed. Issue 11204.
+func TestReadMIMEHeaderTrimContinued(t *testing.T) {
+	// In this header, \n and \r\n terminated lines are mixed on purpose.
+	// We expect each line to be trimmed (prefix and suffix) before being concatenated.
+	// Keep the spaces as they are.
+	r := reader("" + // for code formatting purpose.
+		"a:\n" +
+		" 0 \r\n" +
+		"b:1 \t\r\n" +
+		"c: 2\r\n" +
+		" 3\t\n" +
+		"  \t 4  \r\n\n")
+	m, err := r.ReadMIMEHeader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := MIMEHeader{
+		"A": {"0"},
+		"B": {"1"},
+		"C": {"2 3 4"},
+	}
+	if !reflect.DeepEqual(m, want) {
+		t.Fatalf("ReadMIMEHeader mismatch.\n got: %q\nwant: %q", m, want)
+	}
+}
+
 type readResponseTest struct {
 	in       string
 	inCode   int
diff --git a/src/net/url/url.go b/src/net/url/url.go
index 8ffad66..7f648e3 100644
--- a/src/net/url/url.go
+++ b/src/net/url/url.go
@@ -24,6 +24,24 @@
 
 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
 
+type timeout interface {
+	Timeout() bool
+}
+
+func (e *Error) Timeout() bool {
+	t, ok := e.Err.(timeout)
+	return ok && t.Timeout()
+}
+
+type temporary interface {
+	Temporary() bool
+}
+
+func (e *Error) Temporary() bool {
+	t, ok := e.Err.(temporary)
+	return ok && t.Temporary()
+}
+
 func ishex(c byte) bool {
 	switch {
 	case '0' <= c && c <= '9':
diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go
index ff6e9e4..dbac91b 100644
--- a/src/net/url/url_test.go
+++ b/src/net/url/url_test.go
@@ -6,6 +6,8 @@
 
 import (
 	"fmt"
+	"io"
+	"net"
 	"reflect"
 	"strings"
 	"testing"
@@ -1229,3 +1231,84 @@
 		}
 	}
 }
+
+type timeoutError struct {
+	timeout bool
+}
+
+func (e *timeoutError) Error() string { return "timeout error" }
+func (e *timeoutError) Timeout() bool { return e.timeout }
+
+type temporaryError struct {
+	temporary bool
+}
+
+func (e *temporaryError) Error() string   { return "temporary error" }
+func (e *temporaryError) Temporary() bool { return e.temporary }
+
+type timeoutTemporaryError struct {
+	timeoutError
+	temporaryError
+}
+
+func (e *timeoutTemporaryError) Error() string { return "timeout/temporary error" }
+
+var netErrorTests = []struct {
+	err       error
+	timeout   bool
+	temporary bool
+}{{
+	err:       &Error{"Get", "http://google.com/", &timeoutError{timeout: true}},
+	timeout:   true,
+	temporary: false,
+}, {
+	err:       &Error{"Get", "http://google.com/", &timeoutError{timeout: false}},
+	timeout:   false,
+	temporary: false,
+}, {
+	err:       &Error{"Get", "http://google.com/", &temporaryError{temporary: true}},
+	timeout:   false,
+	temporary: true,
+}, {
+	err:       &Error{"Get", "http://google.com/", &temporaryError{temporary: false}},
+	timeout:   false,
+	temporary: false,
+}, {
+	err:       &Error{"Get", "http://google.com/", &timeoutTemporaryError{timeoutError{timeout: true}, temporaryError{temporary: true}}},
+	timeout:   true,
+	temporary: true,
+}, {
+	err:       &Error{"Get", "http://google.com/", &timeoutTemporaryError{timeoutError{timeout: false}, temporaryError{temporary: true}}},
+	timeout:   false,
+	temporary: true,
+}, {
+	err:       &Error{"Get", "http://google.com/", &timeoutTemporaryError{timeoutError{timeout: true}, temporaryError{temporary: false}}},
+	timeout:   true,
+	temporary: false,
+}, {
+	err:       &Error{"Get", "http://google.com/", &timeoutTemporaryError{timeoutError{timeout: false}, temporaryError{temporary: false}}},
+	timeout:   false,
+	temporary: false,
+}, {
+	err:       &Error{"Get", "http://google.com/", io.EOF},
+	timeout:   false,
+	temporary: false,
+}}
+
+// Test that url.Error implements net.Error and that it forwards
+func TestURLErrorImplementsNetError(t *testing.T) {
+	for i, tt := range netErrorTests {
+		err, ok := tt.err.(net.Error)
+		if !ok {
+			t.Errorf("%d: %T does not implement net.Error", i+1, tt.err)
+			continue
+		}
+		if err.Timeout() != tt.timeout {
+			t.Errorf("%d: err.Timeout(): want %v, have %v", i+1, tt.timeout, err.Timeout())
+			continue
+		}
+		if err.Temporary() != tt.temporary {
+			t.Errorf("%d: err.Temporary(): want %v, have %v", i+1, tt.temporary, err.Temporary())
+		}
+	}
+}
diff --git a/src/os/error_plan9.go b/src/os/error_plan9.go
index 001cdfc..2dc6b39 100644
--- a/src/os/error_plan9.go
+++ b/src/os/error_plan9.go
@@ -12,6 +12,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return contains(err.Error(), " exists")
 }
@@ -24,6 +26,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return contains(err.Error(), "does not exist") || contains(err.Error(), "not found") ||
 		contains(err.Error(), "has been removed") || contains(err.Error(), "no parent")
@@ -37,6 +41,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return contains(err.Error(), "permission denied")
 }
diff --git a/src/os/error_test.go b/src/os/error_test.go
index 02ed235..5477e7e 100644
--- a/src/os/error_test.go
+++ b/src/os/error_test.go
@@ -93,6 +93,8 @@
 	{&os.LinkError{Err: os.ErrPermission}, false, false},
 	{&os.LinkError{Err: os.ErrExist}, true, false},
 	{&os.LinkError{Err: os.ErrNotExist}, false, true},
+	{&os.SyscallError{Err: os.ErrNotExist}, false, true},
+	{&os.SyscallError{Err: os.ErrExist}, true, false},
 	{nil, false, false},
 }
 
@@ -107,6 +109,23 @@
 	}
 }
 
+var isPermissionTests = []struct {
+	err  error
+	want bool
+}{
+	{nil, false},
+	{&os.PathError{Err: os.ErrPermission}, true},
+	{&os.SyscallError{Err: os.ErrPermission}, true},
+}
+
+func TestIsPermission(t *testing.T) {
+	for _, tt := range isPermissionTests {
+		if got := os.IsPermission(tt.err); got != tt.want {
+			t.Errorf("os.IsPermission(%#v) = %v; want %v", tt.err, got, tt.want)
+		}
+	}
+}
+
 func TestErrPathNUL(t *testing.T) {
 	f, err := ioutil.TempFile("", "_Go_ErrPathNUL\x00")
 	if err == nil {
diff --git a/src/os/error_unix.go b/src/os/error_unix.go
index f2aabbb..c600227 100644
--- a/src/os/error_unix.go
+++ b/src/os/error_unix.go
@@ -16,6 +16,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.EEXIST || err == ErrExist
 }
@@ -28,6 +30,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.ENOENT || err == ErrNotExist
 }
@@ -40,6 +44,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.EACCES || err == syscall.EPERM || err == ErrPermission
 }
diff --git a/src/os/error_windows.go b/src/os/error_windows.go
index 83db6c0..2c1c39c 100644
--- a/src/os/error_windows.go
+++ b/src/os/error_windows.go
@@ -14,11 +14,15 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.ERROR_ALREADY_EXISTS ||
 		err == syscall.ERROR_FILE_EXISTS || err == ErrExist
 }
 
+const _ERROR_BAD_NETPATH = syscall.Errno(53)
+
 func isNotExist(err error) bool {
 	switch pe := err.(type) {
 	case nil:
@@ -27,8 +31,11 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.ERROR_FILE_NOT_FOUND ||
+		err == _ERROR_BAD_NETPATH ||
 		err == syscall.ERROR_PATH_NOT_FOUND || err == ErrNotExist
 }
 
@@ -40,6 +47,8 @@
 		err = pe.Err
 	case *LinkError:
 		err = pe.Err
+	case *SyscallError:
+		err = pe.Err
 	}
 	return err == syscall.ERROR_ACCESS_DENIED || err == ErrPermission
 }
diff --git a/src/os/file.go b/src/os/file.go
index 69338ce..a662197 100644
--- a/src/os/file.go
+++ b/src/os/file.go
@@ -173,6 +173,7 @@
 // according to whence: 0 means relative to the origin of the file, 1 means
 // relative to the current offset, and 2 means relative to the end.
 // It returns the new offset and an error, if any.
+// The behavior of Seek on a file opened with O_APPEND is not specified.
 func (f *File) Seek(offset int64, whence int) (ret int64, err error) {
 	if f == nil {
 		return 0, ErrInvalid
diff --git a/src/os/file_unix.go b/src/os/file_unix.go
index 0677707..68d0a6e 100644
--- a/src/os/file_unix.go
+++ b/src/os/file_unix.go
@@ -90,8 +90,21 @@
 		}
 	}
 
-	r, e := syscall.Open(name, flag|syscall.O_CLOEXEC, syscallMode(perm))
-	if e != nil {
+	var r int
+	for {
+		var e error
+		r, e = syscall.Open(name, flag|syscall.O_CLOEXEC, syscallMode(perm))
+		if e == nil {
+			break
+		}
+
+		// On OS X, sigaction(2) doesn't guarantee that SA_RESTART will cause
+		// open(2) to be restarted for regular files. This is easy to reproduce on
+		// fuse file systems (see http://golang.org/issue/11180).
+		if runtime.GOOS == "darwin" && e == syscall.EINTR {
+			continue
+		}
+
 		return nil, &PathError{"open", name, e}
 	}
 
diff --git a/src/os/os_windows_test.go b/src/os/os_windows_test.go
index ee19b2b..4de208e 100644
--- a/src/os/os_windows_test.go
+++ b/src/os/os_windows_test.go
@@ -126,3 +126,23 @@
 	defer p.Wait()
 	t.Fatalf("StartProcess expected to fail, but succeeded.")
 }
+
+func TestShareNotExistError(t *testing.T) {
+	if testing.Short() {
+		t.Skip("slow test that uses network; skipping")
+	}
+	_, err := os.Stat(`\\no_such_server\no_such_share\no_such_file`)
+	if err == nil {
+		t.Fatal("stat succeeded, but expected to fail")
+	}
+	if !os.IsNotExist(err) {
+		t.Fatalf("os.Stat failed with %q, but os.IsNotExist(err) is false", err)
+	}
+}
+
+func TestBadNetPathError(t *testing.T) {
+	const ERROR_BAD_NETPATH = syscall.Errno(53)
+	if !os.IsNotExist(ERROR_BAD_NETPATH) {
+		t.Fatal("os.IsNotExist(syscall.Errno(53)) is false, but want true")
+	}
+}
diff --git a/src/os/signal/sig.s b/src/os/signal/sig.s
index 7fa6c92..2e94c91 100644
--- a/src/os/signal/sig.s
+++ b/src/os/signal/sig.s
@@ -2,31 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Assembly to get into package runtime without using exported symbols.
-
-// +build amd64 amd64p32 arm arm64 386 ppc64 ppc64le
-
-#include "textflag.h"
-
-#ifdef GOARCH_arm
-#define JMP B
-#endif
-#ifdef GOARCH_ppc64
-#define JMP BR
-#endif
-#ifdef GOARCH_ppc64le
-#define JMP BR
-#endif
-
-TEXT ·signal_disable(SB),NOSPLIT,$0
-	JMP runtime·signal_disable(SB)
-
-TEXT ·signal_enable(SB),NOSPLIT,$0
-	JMP runtime·signal_enable(SB)
-
-TEXT ·signal_ignore(SB),NOSPLIT,$0
-	JMP runtime·signal_ignore(SB)
-
-TEXT ·signal_recv(SB),NOSPLIT,$0
-	JMP runtime·signal_recv(SB)
-
+// The runtime package uses //go:linkname to push a few functions into this
+// package but we still need a .s file so the Go tool does not pass -complete
+// to the go tool compile so the latter does not complain about Go functions
+// with no bodies.
diff --git a/src/os/signal/signal_unix.go b/src/os/signal/signal_unix.go
index 1bdf1d7..01b1b14 100644
--- a/src/os/signal/signal_unix.go
+++ b/src/os/signal/signal_unix.go
@@ -11,7 +11,7 @@
 	"syscall"
 )
 
-// In assembly.
+// Defined by the runtime package.
 func signal_disable(uint32)
 func signal_enable(uint32)
 func signal_ignore(uint32)
diff --git a/src/reflect/asm_ppc64x.s b/src/reflect/asm_ppc64x.s
index d5f7f8f..0fa570c 100644
--- a/src/reflect/asm_ppc64x.s
+++ b/src/reflect/asm_ppc64x.s
@@ -6,6 +6,7 @@
 
 #include "textflag.h"
 #include "funcdata.h"
+#include "asm_ppc64x.h"
 
 // makeFuncStub is the code half of the function returned by MakeFunc.
 // See the comment on the declaration of makeFuncStub in makefunc.go
@@ -13,9 +14,9 @@
 // No arg size here, runtime pulls arg map out of the func value.
 TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16
 	NO_LOCAL_POINTERS
-	MOVD	R11, 8(R1)
+	MOVD	R11, FIXED_FRAME+0(R1)
 	MOVD	$argframe+0(FP), R3
-	MOVD	R3, 16(R1)
+	MOVD	R3, FIXED_FRAME+8(R1)
 	BL	·callReflect(SB)
 	RET
 
@@ -25,8 +26,8 @@
 // No arg size here; runtime pulls arg map out of the func value.
 TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16
 	NO_LOCAL_POINTERS
-	MOVD	R11, 8(R1)
+	MOVD	R11, FIXED_FRAME+0(R1)
 	MOVD	$argframe+0(FP), R3
-	MOVD	R3, 16(R1)
+	MOVD	R3, FIXED_FRAME+8(R1)
 	BL	·callMethod(SB)
 	RET
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 95eaf2c..e98c960 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -347,6 +347,7 @@
 	indirectvalue uint8  // store ptr to value instead of value itself
 	bucketsize    uint16 // size of bucket
 	reflexivekey  bool   // true if k==k for all keys
+	needkeyupdate bool   // true if we need to update key on an overwrite
 }
 
 // ptrType represents a pointer type.
@@ -1525,6 +1526,7 @@
 	}
 	mt.bucketsize = uint16(mt.bucket.size)
 	mt.reflexivekey = isReflexive(ktyp)
+	mt.needkeyupdate = needKeyUpdate(ktyp)
 	mt.uncommonType = nil
 	mt.ptrToThis = nil
 
@@ -1669,6 +1671,33 @@
 	}
 }
 
+// needKeyUpdate reports whether map overwrites require the key to be copied.
+func needKeyUpdate(t *rtype) bool {
+	switch t.Kind() {
+	case Bool, Int, Int8, Int16, Int32, Int64, Uint, Uint8, Uint16, Uint32, Uint64, Uintptr, Chan, Ptr, UnsafePointer:
+		return false
+	case Float32, Float64, Complex64, Complex128, Interface, String:
+		// Float keys can be updated from +0 to -0.
+		// String keys can be updated to use a smaller backing store.
+		// Interfaces might have floats of strings in them.
+		return true
+	case Array:
+		tt := (*arrayType)(unsafe.Pointer(t))
+		return needKeyUpdate(tt.elem)
+	case Struct:
+		tt := (*structType)(unsafe.Pointer(t))
+		for _, f := range tt.fields {
+			if needKeyUpdate(f.typ) {
+				return true
+			}
+		}
+		return false
+	default:
+		// Func, Map, Slice, Invalid
+		panic("needKeyUpdate called on non-key type " + t.String())
+	}
+}
+
 // Make sure these routines stay in sync with ../../runtime/hashmap.go!
 // These types exist only for GC, so we only fill out GC relevant info.
 // Currently, that's just size and the GC program.  We also fill in string
diff --git a/src/regexp/example_test.go b/src/regexp/example_test.go
index a4e0da8..d05e87b 100644
--- a/src/regexp/example_test.go
+++ b/src/regexp/example_test.go
@@ -146,3 +146,25 @@
 	// ${last} ${first}
 	// Turing Alan
 }
+
+func ExampleRegexp_Split() {
+	a := regexp.MustCompile("a")
+	fmt.Println(a.Split("banana", -1))
+	fmt.Println(a.Split("banana", 0))
+	fmt.Println(a.Split("banana", 1))
+	fmt.Println(a.Split("banana", 2))
+	zp := regexp.MustCompile("z+")
+	fmt.Println(zp.Split("pizza", -1))
+	fmt.Println(zp.Split("pizza", 0))
+	fmt.Println(zp.Split("pizza", 1))
+	fmt.Println(zp.Split("pizza", 2))
+	// Output:
+	// [b n n ]
+	// []
+	// [banana]
+	// [b nana]
+	// [pi a]
+	// []
+	// [pizza]
+	// [pi a]
+}
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
index c666836..bb2f2b8 100644
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -335,5 +335,8 @@
 		return
 	}
 	getRandomData((*[len(hashkey) * ptrSize]byte)(unsafe.Pointer(&hashkey))[:])
-	hashkey[0] |= 1 // make sure this number is odd
+	hashkey[0] |= 1 // make sure these numbers are odd
+	hashkey[1] |= 1
+	hashkey[2] |= 1
+	hashkey[3] |= 1
 }
diff --git a/src/runtime/arch1_386.go b/src/runtime/arch1_386.go
deleted file mode 100644
index d41696a..0000000
--- a/src/runtime/arch1_386.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '8'
-	_BigEndian     = 0
-	_CacheLineSize = 64
-	_PhysPageSize  = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl
-	_PCQuantum     = 1
-	_Int64Align    = 4
-	hugePageSize   = 1 << 21
-)
diff --git a/src/runtime/arch1_amd64.go b/src/runtime/arch1_amd64.go
deleted file mode 100644
index 15f4cc6..0000000
--- a/src/runtime/arch1_amd64.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '6'
-	_BigEndian     = 0
-	_CacheLineSize = 64
-	_PhysPageSize  = 4096
-	_PCQuantum     = 1
-	_Int64Align    = 8
-	hugePageSize   = 1 << 21
-)
diff --git a/src/runtime/arch1_amd64p32.go b/src/runtime/arch1_amd64p32.go
deleted file mode 100644
index 3c5456f..0000000
--- a/src/runtime/arch1_amd64p32.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '6'
-	_BigEndian     = 0
-	_CacheLineSize = 64
-	_PhysPageSize  = 65536*goos_nacl + 4096*(1-goos_nacl)
-	_PCQuantum     = 1
-	_Int64Align    = 8
-	hugePageSize   = 1 << 21
-)
diff --git a/src/runtime/arch1_arm.go b/src/runtime/arch1_arm.go
deleted file mode 100644
index 0ec2093..0000000
--- a/src/runtime/arch1_arm.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '5'
-	_BigEndian     = 0
-	_CacheLineSize = 32
-	_PhysPageSize  = 65536*goos_nacl + 4096*(1-goos_nacl)
-	_PCQuantum     = 4
-	_Int64Align    = 4
-	hugePageSize   = 0
-)
diff --git a/src/runtime/arch1_arm64.go b/src/runtime/arch1_arm64.go
deleted file mode 100644
index 29a87db..0000000
--- a/src/runtime/arch1_arm64.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '7'
-	_BigEndian     = 0
-	_CacheLineSize = 32
-	_PhysPageSize  = 65536
-	_PCQuantum     = 4
-	_Int64Align    = 8
-	hugePageSize   = 0
-)
diff --git a/src/runtime/arch1_ppc64.go b/src/runtime/arch1_ppc64.go
deleted file mode 100644
index de6dd91..0000000
--- a/src/runtime/arch1_ppc64.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '9'
-	_BigEndian     = 1
-	_CacheLineSize = 64
-	_PhysPageSize  = 65536
-	_PCQuantum     = 4
-	_Int64Align    = 8
-	hugePageSize   = 0
-)
diff --git a/src/runtime/arch1_ppc64le.go b/src/runtime/arch1_ppc64le.go
deleted file mode 100644
index 9a55c71..0000000
--- a/src/runtime/arch1_ppc64le.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	thechar        = '9'
-	_BigEndian     = 0
-	_CacheLineSize = 64
-	_PhysPageSize  = 65536
-	_PCQuantum     = 4
-	_Int64Align    = 8
-	hugePageSize   = 0
-)
diff --git a/src/runtime/arch_386.go b/src/runtime/arch_386.go
index 79d38c7..75e94ec 100644
--- a/src/runtime/arch_386.go
+++ b/src/runtime/arch_386.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '8'
+	_BigEndian     = 0
+	_CacheLineSize = 64
+	_PhysPageSize  = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl
+	_PCQuantum     = 1
+	_Int64Align    = 4
+	hugePageSize   = 1 << 21
+	minFrameSize   = 0
+)
+
 type uintreg uint32
 type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_amd64.go b/src/runtime/arch_amd64.go
index 270cd7b..d7721f7 100644
--- a/src/runtime/arch_amd64.go
+++ b/src/runtime/arch_amd64.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '6'
+	_BigEndian     = 0
+	_CacheLineSize = 64
+	_PhysPageSize  = 4096
+	_PCQuantum     = 1
+	_Int64Align    = 8
+	hugePageSize   = 1 << 21
+	minFrameSize   = 0
+)
+
 type uintreg uint64
 type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_amd64p32.go b/src/runtime/arch_amd64p32.go
index 5c636ae..aa8343a 100644
--- a/src/runtime/arch_amd64p32.go
+++ b/src/runtime/arch_amd64p32.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '6'
+	_BigEndian     = 0
+	_CacheLineSize = 64
+	_PhysPageSize  = 65536*goos_nacl + 4096*(1-goos_nacl)
+	_PCQuantum     = 1
+	_Int64Align    = 8
+	hugePageSize   = 1 << 21
+	minFrameSize   = 0
+)
+
 type uintreg uint64
 type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_arm.go b/src/runtime/arch_arm.go
index 79d38c7..aa3e180 100644
--- a/src/runtime/arch_arm.go
+++ b/src/runtime/arch_arm.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '5'
+	_BigEndian     = 0
+	_CacheLineSize = 32
+	_PhysPageSize  = 65536*goos_nacl + 4096*(1-goos_nacl)
+	_PCQuantum     = 4
+	_Int64Align    = 4
+	hugePageSize   = 0
+	minFrameSize   = 4
+)
+
 type uintreg uint32
 type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_arm64.go b/src/runtime/arch_arm64.go
index 270cd7b..f01c26d 100644
--- a/src/runtime/arch_arm64.go
+++ b/src/runtime/arch_arm64.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '7'
+	_BigEndian     = 0
+	_CacheLineSize = 32
+	_PhysPageSize  = 65536
+	_PCQuantum     = 4
+	_Int64Align    = 8
+	hugePageSize   = 0
+	minFrameSize   = 8
+)
+
 type uintreg uint64
 type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_ppc64.go b/src/runtime/arch_ppc64.go
index 270cd7b..273cc56 100644
--- a/src/runtime/arch_ppc64.go
+++ b/src/runtime/arch_ppc64.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '9'
+	_BigEndian     = 1
+	_CacheLineSize = 64
+	_PhysPageSize  = 65536
+	_PCQuantum     = 4
+	_Int64Align    = 8
+	hugePageSize   = 0
+	minFrameSize   = 8
+)
+
 type uintreg uint64
 type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_ppc64le.go b/src/runtime/arch_ppc64le.go
index 270cd7b..e4eb9e5 100644
--- a/src/runtime/arch_ppc64le.go
+++ b/src/runtime/arch_ppc64le.go
@@ -4,5 +4,16 @@
 
 package runtime
 
+const (
+	thechar        = '9'
+	_BigEndian     = 0
+	_CacheLineSize = 64
+	_PhysPageSize  = 65536
+	_PCQuantum     = 4
+	_Int64Align    = 8
+	hugePageSize   = 0
+	minFrameSize   = 8
+)
+
 type uintreg uint64
 type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 2bc5d8b..fa74853 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -964,10 +964,13 @@
 // CX: length
 // DX: address to put return value
 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
-	MOVL	h+4(FP), X6	// seed to low 64 bits of xmm6
-	PINSRD	$2, CX, X6	// size to high 64 bits of xmm6
-	PSHUFHW	$0, X6, X6	// replace size with its low 2 bytes repeated 4 times
-	MOVO	runtime·aeskeysched(SB), X7
+	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
+	PINSRW	$4, CX, X0	            // 16 bits of length
+	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
+	MOVO	X0, X1                      // save unscrambled seed
+	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
+	AESENC	X0, X0                      // scramble seed
+
 	CMPL	CX, $16
 	JB	aes0to15
 	JE	aes16
@@ -987,100 +990,117 @@
 
 	// 16 bytes loaded at this address won't cross
 	// a page boundary, so we can load it directly.
-	MOVOU	-16(AX), X0
+	MOVOU	-16(AX), X1
 	ADDL	CX, CX
-	PAND	masks<>(SB)(CX*8), X0
+	PAND	masks<>(SB)(CX*8), X1
 
-	// scramble 3 times
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVL	X0, (DX)
+final1:	
+	AESENC	X0, X1  // scramble input, xor in seed
+	AESENC	X1, X1  // scramble combo 2 times
+	AESENC	X1, X1
+	MOVL	X1, (DX)
 	RET
 
 endofpage:
 	// address ends in 1111xxxx.  Might be up against
 	// a page boundary, so load ending at last byte.
 	// Then shift bytes down using pshufb.
-	MOVOU	-32(AX)(CX*1), X0
+	MOVOU	-32(AX)(CX*1), X1
 	ADDL	CX, CX
-	PSHUFB	shifts<>(SB)(CX*8), X0
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVL	X0, (DX)
-	RET
+	PSHUFB	shifts<>(SB)(CX*8), X1
+	JMP	final1
 
 aes0:
-	// return input seed
-	MOVL	h+4(FP), AX
-	MOVL	AX, (DX)
+	// Return scrambled input seed
+	AESENC	X0, X0
+	MOVL	X0, (DX)
 	RET
 
 aes16:
-	MOVOU	(AX), X0
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVL	X0, (DX)
-	RET
-
+	MOVOU	(AX), X1
+	JMP	final1
 
 aes17to32:
+	// make second starting seed
+	PXOR	runtime·aeskeysched+16(SB), X1
+	AESENC	X1, X1
+	
 	// load data to be hashed
-	MOVOU	(AX), X0
-	MOVOU	-16(AX)(CX*1), X1
+	MOVOU	(AX), X2
+	MOVOU	-16(AX)(CX*1), X3
 
 	// scramble 3 times
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X0
-	AESENC	X7, X1
+	AESENC	X0, X2
+	AESENC	X1, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
 
 	// combine results
-	PXOR	X1, X0
-	MOVL	X0, (DX)
+	PXOR	X3, X2
+	MOVL	X2, (DX)
 	RET
 
 aes33to64:
-	MOVOU	(AX), X0
-	MOVOU	16(AX), X1
-	MOVOU	-32(AX)(CX*1), X2
-	MOVOU	-16(AX)(CX*1), X3
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	runtime·aeskeysched+16(SB), X1
+	PXOR	runtime·aeskeysched+32(SB), X2
+	PXOR	runtime·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
 	
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	runtime·aeskeysched+32(SB), X2
-	AESENC	runtime·aeskeysched+48(SB), X3
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
+	MOVOU	(AX), X4
+	MOVOU	16(AX), X5
+	MOVOU	-32(AX)(CX*1), X6
+	MOVOU	-16(AX)(CX*1), X7
+	
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
+	
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+	
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
 
-	PXOR	X2, X0
-	PXOR	X3, X1
-	PXOR	X1, X0
-	MOVL	X0, (DX)
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVL	X4, (DX)
 	RET
 
 aes65plus:
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	runtime·aeskeysched+16(SB), X1
+	PXOR	runtime·aeskeysched+32(SB), X2
+	PXOR	runtime·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+	
 	// start with last (possibly overlapping) block
-	MOVOU	-64(AX)(CX*1), X0
-	MOVOU	-48(AX)(CX*1), X1
-	MOVOU	-32(AX)(CX*1), X2
-	MOVOU	-16(AX)(CX*1), X3
+	MOVOU	-64(AX)(CX*1), X4
+	MOVOU	-48(AX)(CX*1), X5
+	MOVOU	-32(AX)(CX*1), X6
+	MOVOU	-16(AX)(CX*1), X7
 
 	// scramble state once
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	runtime·aeskeysched+32(SB), X2
-	AESENC	runtime·aeskeysched+48(SB), X3
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
 
 	// compute number of remaining 64-byte blocks
 	DECL	CX
@@ -1088,39 +1108,40 @@
 	
 aesloop:
 	// scramble state, xor in a block
-	MOVOU	(AX), X4
-	MOVOU	16(AX), X5
-	AESENC	X4, X0
-	AESENC	X5, X1
-	MOVOU	32(AX), X4
-	MOVOU	48(AX), X5
-	AESENC	X4, X2
-	AESENC	X5, X3
+	MOVOU	(AX), X0
+	MOVOU	16(AX), X1
+	MOVOU	32(AX), X2
+	MOVOU	48(AX), X3
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
 
 	// scramble state
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
 
 	ADDL	$64, AX
 	DECL	CX
 	JNE	aesloop
 
 	// 2 more scrambles to finish
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+	
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
 
-	PXOR	X2, X0
-	PXOR	X3, X1
-	PXOR	X1, X0
-	MOVL	X0, (DX)
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVL	X4, (DX)
 	RET
 
 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index dc975be..39602ec 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -951,10 +951,14 @@
 // CX: length
 // DX: address to put return value
 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
-	MOVQ	h+8(FP), X6	// seed to low 64 bits of xmm6
-	PINSRQ	$1, CX, X6	// size to high 64 bits of xmm6
-	PSHUFHW	$0, X6, X6	// replace size with its low 2 bytes repeated 4 times
-	MOVO	runtime·aeskeysched(SB), X7
+	// Fill an SSE register with our seeds.
+	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
+	PINSRW	$4, CX, X0			// 16 bits of length
+	PSHUFHW $0, X0, X0			// repeat length 4 times total
+	MOVO	X0, X1				// save unscrambled seed
+	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
+	AESENC	X0, X0				// scramble seed
+
 	CMPQ	CX, $16
 	JB	aes0to15
 	JE	aes16
@@ -976,218 +980,275 @@
 
 	// 16 bytes loaded at this address won't cross
 	// a page boundary, so we can load it directly.
-	MOVOU	-16(AX), X0
+	MOVOU	-16(AX), X1
 	ADDQ	CX, CX
 	MOVQ	$masks<>(SB), AX
-	PAND	(AX)(CX*8), X0
-
-	// scramble 3 times
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVQ	X0, (DX)
+	PAND	(AX)(CX*8), X1
+final1:
+	AESENC	X0, X1	// scramble input, xor in seed
+	AESENC	X1, X1  // scramble combo 2 times
+	AESENC	X1, X1
+	MOVQ	X1, (DX)
 	RET
 
 endofpage:
 	// address ends in 1111xxxx.  Might be up against
 	// a page boundary, so load ending at last byte.
 	// Then shift bytes down using pshufb.
-	MOVOU	-32(AX)(CX*1), X0
+	MOVOU	-32(AX)(CX*1), X1
 	ADDQ	CX, CX
 	MOVQ	$shifts<>(SB), AX
-	PSHUFB	(AX)(CX*8), X0
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVQ	X0, (DX)
-	RET
+	PSHUFB	(AX)(CX*8), X1
+	JMP	final1
 
 aes0:
-	// return input seed
-	MOVQ	h+8(FP), AX
-	MOVQ	AX, (DX)
+	// Return scrambled input seed
+	AESENC	X0, X0
+	MOVQ	X0, (DX)
 	RET
 
 aes16:
-	MOVOU	(AX), X0
-	AESENC	X6, X0
-	AESENC	X7, X0
-	AESENC	X7, X0
-	MOVQ	X0, (DX)
-	RET
+	MOVOU	(AX), X1
+	JMP	final1
 
 aes17to32:
+	// make second starting seed
+	PXOR	runtime·aeskeysched+16(SB), X1
+	AESENC	X1, X1
+	
 	// load data to be hashed
-	MOVOU	(AX), X0
-	MOVOU	-16(AX)(CX*1), X1
+	MOVOU	(AX), X2
+	MOVOU	-16(AX)(CX*1), X3
 
 	// scramble 3 times
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X0
-	AESENC	X7, X1
+	AESENC	X0, X2
+	AESENC	X1, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X2, X2
+	AESENC	X3, X3
 
 	// combine results
-	PXOR	X1, X0
-	MOVQ	X0, (DX)
+	PXOR	X3, X2
+	MOVQ	X2, (DX)
 	RET
 
 aes33to64:
-	MOVOU	(AX), X0
-	MOVOU	16(AX), X1
-	MOVOU	-32(AX)(CX*1), X2
-	MOVOU	-16(AX)(CX*1), X3
+	// make 3 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	PXOR	runtime·aeskeysched+16(SB), X1
+	PXOR	runtime·aeskeysched+32(SB), X2
+	PXOR	runtime·aeskeysched+48(SB), X3
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
 	
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	runtime·aeskeysched+32(SB), X2
-	AESENC	runtime·aeskeysched+48(SB), X3
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
+	MOVOU	(AX), X4
+	MOVOU	16(AX), X5
+	MOVOU	-32(AX)(CX*1), X6
+	MOVOU	-16(AX)(CX*1), X7
+	
+	AESENC	X0, X4
+	AESENC	X1, X5
+	AESENC	X2, X6
+	AESENC	X3, X7
+	
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+	
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
 
-	PXOR	X2, X0
-	PXOR	X3, X1
-	PXOR	X1, X0
-	MOVQ	X0, (DX)
+	PXOR	X6, X4
+	PXOR	X7, X5
+	PXOR	X5, X4
+	MOVQ	X4, (DX)
 	RET
 
 aes65to128:
-	MOVOU	(AX), X0
-	MOVOU	16(AX), X1
-	MOVOU	32(AX), X2
-	MOVOU	48(AX), X3
-	MOVOU	-64(AX)(CX*1), X4
-	MOVOU	-48(AX)(CX*1), X5
-	MOVOU	-32(AX)(CX*1), X8
-	MOVOU	-16(AX)(CX*1), X9
-	
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	runtime·aeskeysched+32(SB), X2
-	AESENC	runtime·aeskeysched+48(SB), X3
-	AESENC	runtime·aeskeysched+64(SB), X4
-	AESENC	runtime·aeskeysched+80(SB), X5
-	AESENC	runtime·aeskeysched+96(SB), X8
-	AESENC	runtime·aeskeysched+112(SB), X9
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X4
-	AESENC	X7, X5
-	AESENC	X7, X8
-	AESENC	X7, X9
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X4
-	AESENC	X7, X5
-	AESENC	X7, X8
-	AESENC	X7, X9
+	// make 7 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	MOVO	X1, X4
+	MOVO	X1, X5
+	MOVO	X1, X6
+	MOVO	X1, X7
+	PXOR	runtime·aeskeysched+16(SB), X1
+	PXOR	runtime·aeskeysched+32(SB), X2
+	PXOR	runtime·aeskeysched+48(SB), X3
+	PXOR	runtime·aeskeysched+64(SB), X4
+	PXOR	runtime·aeskeysched+80(SB), X5
+	PXOR	runtime·aeskeysched+96(SB), X6
+	PXOR	runtime·aeskeysched+112(SB), X7
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
 
-	PXOR	X4, X0
-	PXOR	X5, X1
-	PXOR	X8, X2
-	PXOR	X9, X3
-	PXOR	X2, X0
-	PXOR	X3, X1
-	PXOR	X1, X0
-	MOVQ	X0, (DX)
+	// load data
+	MOVOU	(AX), X8
+	MOVOU	16(AX), X9
+	MOVOU	32(AX), X10
+	MOVOU	48(AX), X11
+	MOVOU	-64(AX)(CX*1), X12
+	MOVOU	-48(AX)(CX*1), X13
+	MOVOU	-32(AX)(CX*1), X14
+	MOVOU	-16(AX)(CX*1), X15
+
+	// scramble data, xor in seed
+	AESENC	X0, X8
+	AESENC	X1, X9
+	AESENC	X2, X10
+	AESENC	X3, X11
+	AESENC	X4, X12
+	AESENC	X5, X13
+	AESENC	X6, X14
+	AESENC	X7, X15
+
+	// scramble twice
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+	
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+
+	// combine results
+	PXOR	X12, X8
+	PXOR	X13, X9
+	PXOR	X14, X10
+	PXOR	X15, X11
+	PXOR	X10, X8
+	PXOR	X11, X9
+	PXOR	X9, X8
+	MOVQ	X8, (DX)
 	RET
 
 aes129plus:
+	// make 7 more starting seeds
+	MOVO	X1, X2
+	MOVO	X1, X3
+	MOVO	X1, X4
+	MOVO	X1, X5
+	MOVO	X1, X6
+	MOVO	X1, X7
+	PXOR	runtime·aeskeysched+16(SB), X1
+	PXOR	runtime·aeskeysched+32(SB), X2
+	PXOR	runtime·aeskeysched+48(SB), X3
+	PXOR	runtime·aeskeysched+64(SB), X4
+	PXOR	runtime·aeskeysched+80(SB), X5
+	PXOR	runtime·aeskeysched+96(SB), X6
+	PXOR	runtime·aeskeysched+112(SB), X7
+	AESENC	X1, X1
+	AESENC	X2, X2
+	AESENC	X3, X3
+	AESENC	X4, X4
+	AESENC	X5, X5
+	AESENC	X6, X6
+	AESENC	X7, X7
+	
 	// start with last (possibly overlapping) block
-	MOVOU	-128(AX)(CX*1), X0
-	MOVOU	-112(AX)(CX*1), X1
-	MOVOU	-96(AX)(CX*1), X2
-	MOVOU	-80(AX)(CX*1), X3
-	MOVOU	-64(AX)(CX*1), X4
-	MOVOU	-48(AX)(CX*1), X5
-	MOVOU	-32(AX)(CX*1), X8
-	MOVOU	-16(AX)(CX*1), X9
+	MOVOU	-128(AX)(CX*1), X8
+	MOVOU	-112(AX)(CX*1), X9
+	MOVOU	-96(AX)(CX*1), X10
+	MOVOU	-80(AX)(CX*1), X11
+	MOVOU	-64(AX)(CX*1), X12
+	MOVOU	-48(AX)(CX*1), X13
+	MOVOU	-32(AX)(CX*1), X14
+	MOVOU	-16(AX)(CX*1), X15
 
-	// scramble state once
-	AESENC	X6, X0
-	AESENC	runtime·aeskeysched+16(SB), X1
-	AESENC	runtime·aeskeysched+32(SB), X2
-	AESENC	runtime·aeskeysched+48(SB), X3
-	AESENC	runtime·aeskeysched+64(SB), X4
-	AESENC	runtime·aeskeysched+80(SB), X5
-	AESENC	runtime·aeskeysched+96(SB), X8
-	AESENC	runtime·aeskeysched+112(SB), X9
-
+	// scramble input once, xor in seed
+	AESENC	X0, X8
+	AESENC	X1, X9
+	AESENC	X2, X10
+	AESENC	X3, X11
+	AESENC	X4, X12
+	AESENC	X5, X13
+	AESENC	X6, X14
+	AESENC	X7, X15
+	
 	// compute number of remaining 128-byte blocks
 	DECQ	CX
 	SHRQ	$7, CX
 	
 aesloop:
 	// scramble state, xor in a block
-	MOVOU	(AX), X10
-	MOVOU	16(AX), X11
-	MOVOU	32(AX), X12
-	MOVOU	48(AX), X13
-	AESENC	X10, X0
-	AESENC	X11, X1
-	AESENC	X12, X2
-	AESENC	X13, X3
-	MOVOU	64(AX), X10
-	MOVOU	80(AX), X11
-	MOVOU	96(AX), X12
-	MOVOU	112(AX), X13
-	AESENC	X10, X4
-	AESENC	X11, X5
-	AESENC	X12, X8
-	AESENC	X13, X9
+	MOVOU	(AX), X0
+	MOVOU	16(AX), X1
+	MOVOU	32(AX), X2
+	MOVOU	48(AX), X3
+	AESENC	X0, X8
+	AESENC	X1, X9
+	AESENC	X2, X10
+	AESENC	X3, X11
+	MOVOU	64(AX), X4
+	MOVOU	80(AX), X5
+	MOVOU	96(AX), X6
+	MOVOU	112(AX), X7
+	AESENC	X4, X12
+	AESENC	X5, X13
+	AESENC	X6, X14
+	AESENC	X7, X15
 
 	// scramble state
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X4
-	AESENC	X7, X5
-	AESENC	X7, X8
-	AESENC	X7, X9
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
 
 	ADDQ	$128, AX
 	DECQ	CX
 	JNE	aesloop
 
 	// 2 more scrambles to finish
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X4
-	AESENC	X7, X5
-	AESENC	X7, X8
-	AESENC	X7, X9
-	AESENC	X7, X0
-	AESENC	X7, X1
-	AESENC	X7, X2
-	AESENC	X7, X3
-	AESENC	X7, X4
-	AESENC	X7, X5
-	AESENC	X7, X8
-	AESENC	X7, X9
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
+	AESENC	X8, X8
+	AESENC	X9, X9
+	AESENC	X10, X10
+	AESENC	X11, X11
+	AESENC	X12, X12
+	AESENC	X13, X13
+	AESENC	X14, X14
+	AESENC	X15, X15
 
-	PXOR	X4, X0
-	PXOR	X5, X1
-	PXOR	X8, X2
-	PXOR	X9, X3
-	PXOR	X2, X0
-	PXOR	X3, X1
-	PXOR	X1, X0
-	MOVQ	X0, (DX)
+	PXOR	X12, X8
+	PXOR	X13, X9
+	PXOR	X14, X10
+	PXOR	X15, X11
+	PXOR	X10, X8
+	PXOR	X11, X9
+	PXOR	X9, X8
+	MOVQ	X8, (DX)
 	RET
 	
 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index e8b1491..78b16f4 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -620,12 +620,12 @@
 	MOVL	ptr+0(FP), DI
 	MOVL	n+4(FP), CX
 	MOVQ	CX, BX
-	ANDQ	$7, BX
-	SHRQ	$3, CX
+	ANDQ	$3, BX
+	SHRQ	$2, CX
 	MOVQ	$0, AX
 	CLD
 	REP
-	STOSQ
+	STOSL
 	MOVQ	BX, CX
 	REP
 	STOSB
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 797c95e..2f2d8ca 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -36,9 +36,9 @@
 	MRS_TPIDR_R0			// load TLS base pointer
 	MOVD	R0, R3			// arg 3: TLS base pointer
 #ifdef TLSG_IS_VARIABLE
-	MOVD	$runtime·tls_g(SB), R2 	// arg 2: tlsg
+	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
 #else
-	MOVD	$0x10, R2		// arg 2: tlsg TODO(minux): hardcoded for linux
+	MOVD	0, R2		        // arg 2: not used when using platform's TLS
 #endif
 	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
 	MOVD	g, R0			// arg 0: G
diff --git a/src/runtime/asm_ppc64x.h b/src/runtime/asm_ppc64x.h
new file mode 100644
index 0000000..a2d2e5b
--- /dev/null
+++ b/src/runtime/asm_ppc64x.h
@@ -0,0 +1,31 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// FIXED_FRAME defines the size of the fixed part of a stack frame. A stack
+// frame looks like this:
+//
+// +---------------------+
+// | local variable area |
+// +---------------------+
+// | argument area       |
+// +---------------------+ <- R1+FIXED_FRAME
+// | fixed area          |
+// +---------------------+ <- R1
+//
+// So a function that sets up a stack frame at all uses as least FIXED_FRAME
+// bytes of stack.  This mostly affects assembly that calls other functions
+// with arguments (the arguments should be stored at FIXED_FRAME+0(R1),
+// FIXED_FRAME+8(R1) etc) and some other low-level places.
+//
+// The reason for using a constant is when code is compiled as PIC on ppc64le
+// the fixed part of the stack is 32 bytes large (although PIC is not actually
+// supported yet).
+
+#ifdef GOARCH_ppc64
+#define FIXED_FRAME 8
+#endif
+
+#ifdef GOARCH_ppc64le
+#define FIXED_FRAME 8
+#endif
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 6646dd8..27c6492 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -8,6 +8,7 @@
 #include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
+#include "asm_ppc64x.h"
 
 TEXT runtime·rt0_go(SB),NOSPLIT,$0
 	// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
@@ -15,9 +16,9 @@
 	// initialize essential registers
 	BL	runtime·reginit(SB)
 
-	SUB	$24, R1
-	MOVW	R3, 8(R1) // argc
-	MOVD	R4, 16(R1) // argv
+	SUB	$(FIXED_FRAME+16), R1
+	MOVW	R3, FIXED_FRAME+0(R1) // argc
+	MOVD	R4, FIXED_FRAME+8(R1) // argv
 
 	// create istack out of the given (operating system) stack.
 	// _cgo_init may update stackguard.
@@ -85,19 +86,19 @@
 DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
 GLOBL	runtime·mainPC(SB),RODATA,$8
 
-TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
+TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, 2(R0) // TODO: TD
 	RET
 
-TEXT runtime·asminit(SB),NOSPLIT,$-8-0
+TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
 	RET
 
-TEXT _cgo_reginit(SB),NOSPLIT,$-8-0
+TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
 	// crosscall_ppc64 and crosscall2 need to reginit, but can't
 	// get at the 'runtime.reginit' symbol.
 	BR	runtime·reginit(SB)
 
-TEXT runtime·reginit(SB),NOSPLIT,$-8-0
+TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
 	// set R0 to zero, it's expected by the toolchain
 	XOR R0, R0
 	// initialize essential FP registers
@@ -114,7 +115,7 @@
 
 // void gosave(Gobuf*)
 // save state in Gobuf; setjmp
-TEXT runtime·gosave(SB), NOSPLIT, $-8-8
+TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
 	MOVD	buf+0(FP), R3
 	MOVD	R1, gobuf_sp(R3)
 	MOVD	LR, R31
@@ -127,7 +128,7 @@
 
 // void gogo(Gobuf*)
 // restore state from Gobuf; longjmp
-TEXT runtime·gogo(SB), NOSPLIT, $-8-8
+TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
 	MOVD	buf+0(FP), R5
 	MOVD	gobuf_g(R5), g	// make sure g is not nil
 	BL	runtime·save_g(SB)
@@ -143,15 +144,15 @@
 	MOVD	R0, gobuf_lr(R5)
 	MOVD	R0, gobuf_ctxt(R5)
 	CMP	R0, R0 // set condition codes for == test, needed by stack split
-	MOVD	gobuf_pc(R5), R31
-	MOVD	R31, CTR
+	MOVD	gobuf_pc(R5), R12
+	MOVD	R12, CTR
 	BR	(CTR)
 
 // void mcall(fn func(*g))
 // Switch to m->g0's stack, call fn(g).
 // Fn must never return.  It should gogo(&g->sched)
 // to keep running g.
-TEXT runtime·mcall(SB), NOSPLIT, $-8-8
+TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
 	// Save caller state in g->sched
 	MOVD	R1, (g_sched+gobuf_sp)(g)
 	MOVD	LR, R31
@@ -168,8 +169,8 @@
 	BNE	2(PC)
 	BR	runtime·badmcall(SB)
 	MOVD	fn+0(FP), R11			// context
-	MOVD	0(R11), R4			// code pointer
-	MOVD	R4, CTR
+	MOVD	0(R11), R12			// code pointer
+	MOVD	R12, CTR
 	MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
 	MOVDU	R3, -8(R1)
 	MOVDU	R0, -8(R1)
@@ -206,8 +207,8 @@
 
 	// Bad: g is not gsignal, not g0, not curg. What is it?
 	// Hide call from linker nosplit analysis.
-	MOVD	$runtime·badsystemstack(SB), R3
-	MOVD	R3, CTR
+	MOVD	$runtime·badsystemstack(SB), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 
 switch:
@@ -225,14 +226,14 @@
 	BL	runtime·save_g(SB)
 	MOVD	(g_sched+gobuf_sp)(g), R3
 	// make it look like mstart called systemstack on g0, to stop traceback
-	SUB	$8, R3
+	SUB	$FIXED_FRAME, R3
 	MOVD	$runtime·mstart(SB), R4
 	MOVD	R4, 0(R3)
 	MOVD	R3, R1
 
 	// call target function
-	MOVD	0(R11), R3	// code pointer
-	MOVD	R3, CTR
+	MOVD	0(R11), R12	// code pointer
+	MOVD	R12, CTR
 	BL	(CTR)
 
 	// switch back to g
@@ -245,8 +246,8 @@
 
 noswitch:
 	// already on m stack, just call directly
-	MOVD	0(R11), R3	// code pointer
-	MOVD	R3, CTR
+	MOVD	0(R11), R12	// code pointer
+	MOVD	R12, CTR
 	BL	(CTR)
 	RET
 
@@ -262,7 +263,7 @@
 // the top of a stack (for example, morestack calling newstack
 // calling the scheduler calling newm calling gc), so we must
 // record an argument size. For that purpose, it has no arguments.
-TEXT runtime·morestack(SB),NOSPLIT,$-8-0
+TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
 	// Cannot grow scheduler stack (m->g0).
 	MOVD	g_m(g), R7
 	MOVD	m_g0(R7), R8
@@ -300,7 +301,7 @@
 	// is still in this function, and not the beginning of the next.
 	UNDEF
 
-TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0
+TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, R11
 	BR	runtime·morestack(SB)
 
@@ -332,15 +333,15 @@
 	MOVD	$MAXSIZE, R31;		\
 	CMP	R3, R31;		\
 	BGT	4(PC);			\
-	MOVD	$NAME(SB), R31;	\
-	MOVD	R31, CTR;		\
+	MOVD	$NAME(SB), R12;		\
+	MOVD	R12, CTR;		\
 	BR	(CTR)
 // Note: can't just "BR NAME(SB)" - bad inlining results.
 
 TEXT reflect·call(SB), NOSPLIT, $0-0
 	BR	·reflectcall(SB)
 
-TEXT ·reflectcall(SB), NOSPLIT, $-8-32
+TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
 	MOVWZ argsize+24(FP), R3
 	// NOTE(rsc): No call16, because CALLFN needs four words
 	// of argument space to invoke callwritebarrier.
@@ -370,8 +371,8 @@
 	DISPATCH(runtime·call268435456, 268435456)
 	DISPATCH(runtime·call536870912, 536870912)
 	DISPATCH(runtime·call1073741824, 1073741824)
-	MOVD	$runtime·badreflectcall(SB), R31
-	MOVD	R31, CTR
+	MOVD	$runtime·badreflectcall(SB), R12
+	MOVD	R12, CTR
 	BR	(CTR)
 
 #define CALLFN(NAME,MAXSIZE)			\
@@ -381,7 +382,7 @@
 	MOVD	arg+16(FP), R3;			\
 	MOVWZ	argsize+24(FP), R4;			\
 	MOVD	R1, R5;				\
-	ADD	$(8-1), R5;			\
+	ADD	$(FIXED_FRAME-1), R5;			\
 	SUB	$1, R3;				\
 	ADD	R5, R4;				\
 	CMP	R5, R4;				\
@@ -391,8 +392,8 @@
 	BR	-4(PC);				\
 	/* call function */			\
 	MOVD	f+8(FP), R11;			\
-	MOVD	(R11), R31;			\
-	MOVD	R31, CTR;			\
+	MOVD	(R11), R12;			\
+	MOVD	R12, CTR;			\
 	PCDATA  $PCDATA_StackMapIndex, $0;	\
 	BL	(CTR);				\
 	/* copy return values back */		\
@@ -403,7 +404,7 @@
 	ADD	R6, R5; 			\
 	ADD	R6, R3;				\
 	SUB	R6, R4;				\
-	ADD	$(8-1), R5;			\
+	ADD	$(FIXED_FRAME-1), R5;			\
 	SUB	$1, R3;				\
 	ADD	R5, R4;				\
 loop:						\
@@ -418,10 +419,10 @@
 	MOVD	arg+16(FP), R3;			\
 	MOVWZ	n+24(FP), R4;			\
 	MOVWZ	retoffset+28(FP), R6;		\
-	MOVD	R7, 8(R1);			\
-	MOVD	R3, 16(R1);			\
-	MOVD	R4, 24(R1);			\
-	MOVD	R6, 32(R1);			\
+	MOVD	R7, FIXED_FRAME+0(R1);			\
+	MOVD	R3, FIXED_FRAME+8(R1);			\
+	MOVD	R4, FIXED_FRAME+16(R1);			\
+	MOVD	R6, FIXED_FRAME+24(R1);			\
 	BL	runtime·callwritebarrier(SB);	\
 	RET
 
@@ -511,10 +512,10 @@
 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
 	BR	runtime·cas64(SB)
 
-TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $-8-16
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT|NOFRAME, $0-16
 	BR	runtime·atomicload64(SB)
 
-TEXT runtime·atomicloaduint(SB), NOSPLIT, $-8-16
+TEXT runtime·atomicloaduint(SB), NOSPLIT|NOFRAME, $0-16
 	BR	runtime·atomicload64(SB)
 
 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
@@ -669,20 +670,20 @@
 // 1. grab stored LR for caller
 // 2. sub 4 bytes to get back to BL deferreturn
 // 3. BR to fn
-TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
+TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
 	MOVD	0(R1), R31
 	SUB	$4, R31
 	MOVD	R31, LR
 
 	MOVD	fv+0(FP), R11
 	MOVD	argp+8(FP), R1
-	SUB	$8, R1
-	MOVD	0(R11), R3
-	MOVD	R3, CTR
+	SUB	$FIXED_FRAME, R1
+	MOVD	0(R11), R12
+	MOVD	R12, CTR
 	BR	(CTR)
 
 // Save state of caller into g->sched. Smashes R31.
-TEXT gosave<>(SB),NOSPLIT,$-8
+TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
 	MOVD	LR, R31
 	MOVD	R31, (g_sched+gobuf_pc)(g)
 	MOVD	R1, (g_sched+gobuf_sp)(g)
@@ -699,7 +700,7 @@
 	MOVD	fn+0(FP), R3
 	MOVD	arg+8(FP), R4
 
-	MOVD	R1, R2		// save original stack pointer
+	MOVD	R1, R7		// save original stack pointer
 	MOVD	g, R5
 
 	// Figure out if we need to switch to m->g0 stack.
@@ -722,7 +723,7 @@
 	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
 	MOVD	R5, 40(R1)	// save old g on stack
 	MOVD	(g_stack+stack_hi)(R5), R5
-	SUB	R2, R5
+	SUB	R7, R5
 	MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
 	MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
 	// This is a "global call", so put the global entry point in r12
@@ -750,13 +751,13 @@
 // cgocallback_gofunc.
 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
 	MOVD	$fn+0(FP), R3
-	MOVD	R3, 8(R1)
+	MOVD	R3, FIXED_FRAME+0(R1)
 	MOVD	frame+8(FP), R3
-	MOVD	R3, 16(R1)
+	MOVD	R3, FIXED_FRAME+8(R1)
 	MOVD	framesize+16(FP), R3
-	MOVD	R3, 24(R1)
-	MOVD	$runtime·cgocallback_gofunc(SB), R3
-	MOVD	R3, CTR
+	MOVD	R3, FIXED_FRAME+16(R1)
+	MOVD	$runtime·cgocallback_gofunc(SB), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 	RET
 
@@ -780,8 +781,8 @@
 	CMP	g, $0
 	BNE	havem
 	MOVD	g, savedm-8(SP) // g is zero, so is m.
-	MOVD	$runtime·needm(SB), R3
-	MOVD	R3, CTR
+	MOVD	$runtime·needm(SB), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 
 	// Set m->sched.sp = SP, so that if a panic happens
@@ -831,14 +832,14 @@
 	BL	runtime·save_g(SB)
 	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
 	MOVD	(g_sched+gobuf_pc)(g), R5
-	MOVD	R5, -24(R4)
-	MOVD	$-24(R4), R1
+	MOVD	R5, -(FIXED_FRAME+16)(R4)
+	MOVD	$-(FIXED_FRAME+16)(R4), R1
 	BL	runtime·cgocallbackg(SB)
 
 	// Restore g->sched (== m->curg->sched) from saved values.
 	MOVD	0(R1), R5
 	MOVD	R5, (g_sched+gobuf_pc)(g)
-	MOVD	$24(R1), R4
+	MOVD	$(FIXED_FRAME+16)(R1), R4
 	MOVD	R4, (g_sched+gobuf_sp)(g)
 
 	// Switch back to m->g0's stack and restore m->g0->sched.sp.
@@ -856,8 +857,8 @@
 	MOVD	savedm-8(SP), R6
 	CMP	R6, $0
 	BNE	droppedm
-	MOVD	$runtime·dropm(SB), R3
-	MOVD	R3, CTR
+	MOVD	$runtime·dropm(SB), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 droppedm:
 
@@ -873,7 +874,7 @@
 
 // void setg_gcc(G*); set g in C TLS.
 // Must obey the gcc calling convention.
-TEXT setg_gcc<>(SB),NOSPLIT,$-8-0
+TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
 	// The standard prologue clobbers R31, which is callee-save in
 	// the C ABI, so we have to use $-8-0 and save LR ourselves.
 	MOVD	LR, R4
@@ -890,38 +891,38 @@
 	RET
 
 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
-	MOVD	16(R1), R3		// LR saved by caller
+	MOVD	FIXED_FRAME+8(R1), R3		// LR saved by caller
 	MOVD	runtime·stackBarrierPC(SB), R4
 	CMP	R3, R4
 	BNE	nobar
 	// Get original return PC.
 	BL	runtime·nextBarrierPC(SB)
-	MOVD	8(R1), R3
+	MOVD	FIXED_FRAME+0(R1), R3
 nobar:
 	MOVD	R3, ret+8(FP)
 	RET
 
 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16
 	MOVD	pc+8(FP), R3
-	MOVD	16(R1), R4
+	MOVD	FIXED_FRAME+8(R1), R4
 	MOVD	runtime·stackBarrierPC(SB), R5
 	CMP	R4, R5
 	BEQ	setbar
-	MOVD	R3, 16(R1)		// set LR in caller
+	MOVD	R3, FIXED_FRAME+8(R1)		// set LR in caller
 	RET
 setbar:
 	// Set the stack barrier return PC.
-	MOVD	R3, 8(R1)
+	MOVD	R3, FIXED_FRAME+0(R1)
 	BL	runtime·setNextBarrierPC(SB)
 	RET
 
 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
 	MOVD	argp+0(FP), R3
-	SUB	$8, R3
+	SUB	$FIXED_FRAME, R3
 	MOVD	R3, ret+8(FP)
 	RET
 
-TEXT runtime·abort(SB),NOSPLIT,$-8-0
+TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R0
 	UNDEF
 
@@ -949,25 +950,25 @@
 	MOVD	p+0(FP), R3
 	MOVD	h+8(FP), R4
 	MOVD	8(R11), R5
-	MOVD	R3, 8(R1)
-	MOVD	R4, 16(R1)
-	MOVD	R5, 24(R1)
+	MOVD	R3, FIXED_FRAME+0(R1)
+	MOVD	R4, FIXED_FRAME+8(R1)
+	MOVD	R5, FIXED_FRAME+16(R1)
 	BL	runtime·memhash(SB)
-	MOVD	32(R1), R3
+	MOVD	FIXED_FRAME+24(R1), R3
 	MOVD	R3, ret+16(FP)
 	RET
 
 // AES hashing not implemented for ppc64
-TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
 
-TEXT runtime·memeq(SB),NOSPLIT,$-8-25
+TEXT runtime·memeq(SB),NOSPLIT|NOFRAME,$0-25
 	MOVD	a+0(FP), R3
 	MOVD	b+8(FP), R4
 	MOVD	size+16(FP), R5
@@ -996,11 +997,11 @@
 	CMP	R3, R4
 	BEQ	eq
 	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
-	MOVD	R3, 8(R1)
-	MOVD	R4, 16(R1)
-	MOVD	R5, 24(R1)
+	MOVD	R3, FIXED_FRAME+0(R1)
+	MOVD	R4, FIXED_FRAME+8(R1)
+	MOVD	R5, FIXED_FRAME+16(R1)
 	BL	runtime·memeq(SB)
-	MOVBZ	32(R1), R3
+	MOVBZ	FIXED_FRAME+24(R1), R3
 	MOVB	R3, ret+16(FP)
 	RET
 eq:
@@ -1115,7 +1116,7 @@
 	MOVD	R3, ret+24(FP)
 	RET
 
-TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
 	MOVD	s1_base+0(FP), R5
 	MOVD	s1_len+8(FP), R3
 	MOVD	s2_base+16(FP), R6
@@ -1123,7 +1124,7 @@
 	MOVD	$ret+32(FP), R7
 	BR	runtime·cmpbody<>(SB)
 
-TEXT bytes·Compare(SB),NOSPLIT,$-4-56
+TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
 	MOVD	s1+0(FP), R5
 	MOVD	s1+8(FP), R3
 	MOVD	s2+24(FP), R6
@@ -1140,7 +1141,7 @@
 //
 // On exit:
 // R5, R6, R8, R9 and R10 are clobbered
-TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
+TEXT runtime·cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
 	CMP	R5, R6
 	BEQ	samebytes // same starting pointers; compare lengths
 	SUB	$1, R5
@@ -1191,7 +1192,7 @@
 
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
-TEXT _cgo_topofstack(SB),NOSPLIT,$-8
+TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
 	// g (R30) and R31 are callee-save in the C ABI, so save them
 	MOVD	g, R4
 	MOVD	R31, R5
@@ -1209,7 +1210,7 @@
 
 // The top-most function running on a goroutine
 // returns to goexit+PCQuantum.
-TEXT runtime·goexit(SB),NOSPLIT,$-8-0
+TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, R0	// NOP
 	BL	runtime·goexit1(SB)	// does not return
 	// traceback from goexit1 must hit code range of goexit
diff --git a/src/runtime/atomic_ppc64x.s b/src/runtime/atomic_ppc64x.s
index 28c5bf3..47769e6 100644
--- a/src/runtime/atomic_ppc64x.s
+++ b/src/runtime/atomic_ppc64x.s
@@ -7,7 +7,7 @@
 #include "textflag.h"
 
 // uint32 runtime·atomicload(uint32 volatile* addr)
-TEXT ·atomicload(SB),NOSPLIT,$-8-12
+TEXT ·atomicload(SB),NOSPLIT|NOFRAME,$0-12
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVWZ	0(R3), R3
@@ -18,7 +18,7 @@
 	RET
 
 // uint64 runtime·atomicload64(uint64 volatile* addr)
-TEXT ·atomicload64(SB),NOSPLIT,$-8-16
+TEXT ·atomicload64(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVD	0(R3), R3
@@ -29,7 +29,7 @@
 	RET
 
 // void *runtime·atomicloadp(void *volatile *addr)
-TEXT ·atomicloadp(SB),NOSPLIT,$-8-16
+TEXT ·atomicloadp(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVD	0(R3), R3
@@ -39,7 +39,7 @@
 	MOVD	R3, ret+8(FP)
 	RET
 
-TEXT ·publicationBarrier(SB),NOSPLIT,$-8-0
+TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
 	// LWSYNC is the "export" barrier recommended by Power ISA
 	// v2.07 book II, appendix B.2.2.2.
 	// LWSYNC is a load/load, load/store, and store/store barrier.
diff --git a/src/runtime/cgo/asm_ppc64x.s b/src/runtime/cgo/asm_ppc64x.s
index 0c08a1d..5cdbe06 100644
--- a/src/runtime/cgo/asm_ppc64x.s
+++ b/src/runtime/cgo/asm_ppc64x.s
@@ -5,13 +5,14 @@
 // +build ppc64 ppc64le
 
 #include "textflag.h"
+#include "asm_ppc64x.h"
 
 /*
  * void crosscall2(void (*fn)(void*, int32), void*, int32)
  * Save registers and call fn with two arguments.
  * crosscall2 obeys the C ABI; fn obeys the Go ABI.
  */
-TEXT crosscall2(SB),NOSPLIT,$-8
+TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
 	// TODO(austin): ABI v1 (fn is probably a function descriptor)
 
 	// Start with standard C stack frame layout and linkage
@@ -21,18 +22,19 @@
 
 	BL	saveregs2<>(SB)
 
-	MOVDU	R1, (-288-3*8)(R1)
+	MOVDU	R1, (-288-2*8-FIXED_FRAME)(R1)
 
 	// Initialize Go ABI environment
 	BL	runtime·reginit(SB)
 	BL	runtime·load_g(SB)
 
+	MOVD	R3, R12
 	MOVD	R3, CTR
-	MOVD	R4, 8(R1)
-	MOVD	R5, 16(R1)
+	MOVD	R4, FIXED_FRAME+0(R1)
+	MOVD	R5, FIXED_FRAME+8(R1)
 	BL	(CTR)
 
-	ADD	$(288+3*8), R1
+	ADD	$(288+2*8+FIXED_FRAME), R1
 
 	BL	restoreregs2<>(SB)
 
@@ -41,7 +43,7 @@
 	MOVD	R0, LR
 	RET
 
-TEXT saveregs2<>(SB),NOSPLIT,$-8
+TEXT saveregs2<>(SB),NOSPLIT|NOFRAME,$0
 	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$R, $O(R1)"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$F, $O(R1)"; ((O+=8)); done
 	MOVD	R14, -288(R1)
 	MOVD	R15, -280(R1)
@@ -82,7 +84,7 @@
 
 	RET
 
-TEXT restoreregs2<>(SB),NOSPLIT,$-8
+TEXT restoreregs2<>(SB),NOSPLIT|NOFRAME,$0
 	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$O(R1), $R"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$O(R1), $F"; ((O+=8)); done
 	MOVD	-288(R1), R14
 	MOVD	-280(R1), R15
diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go
index cb24678..8f3e66f 100644
--- a/src/runtime/cgo/cgo.go
+++ b/src/runtime/cgo/cgo.go
@@ -20,7 +20,9 @@
 #cgo !android,linux LDFLAGS: -lpthread
 #cgo netbsd LDFLAGS: -lpthread
 #cgo openbsd LDFLAGS: -lpthread
-#cgo windows LDFLAGS: -lm -mthreads
+// we must explicitly link msvcrt, because runtime needs ntdll, and ntdll
+// exports some incompatible libc functions. See golang.org/issue/12030.
+#cgo windows LDFLAGS: -lmsvcrt -lm -mthreads
 
 #cgo CFLAGS: -Wall -Werror
 
diff --git a/src/runtime/cgo/gcc_linux_amd64.c b/src/runtime/cgo/gcc_linux_amd64.c
index 275d5dd..c93cacd 100644
--- a/src/runtime/cgo/gcc_linux_amd64.c
+++ b/src/runtime/cgo/gcc_linux_amd64.c
@@ -3,8 +3,10 @@
 // license that can be found in the LICENSE file.
 
 #include <pthread.h>
+#include <errno.h>
 #include <string.h> // strerror
 #include <signal.h>
+#include <stdlib.h>
 #include "libcgo.h"
 
 static void* threadentry(void*);
@@ -13,14 +15,34 @@
 void
 x_cgo_init(G* g, void (*setg)(void*))
 {
-	pthread_attr_t attr;
+	pthread_attr_t *attr;
 	size_t size;
 
+	/* The memory sanitizer distributed with versions of clang
+	   before 3.8 has a bug: if you call mmap before malloc, mmap
+	   may return an address that is later overwritten by the msan
+	   library.  Avoid this problem by forcing a call to malloc
+	   here, before we ever call malloc.
+
+	   This is only required for the memory sanitizer, so it's
+	   unfortunate that we always run it.  It should be possible
+	   to remove this when we no longer care about versions of
+	   clang before 3.8.  The test for this is
+	   misc/cgo/testsanitizers.
+
+	   GCC works hard to eliminate a seemingly unnecessary call to
+	   malloc, so we actually use the memory we allocate.  */
+
 	setg_gcc = setg;
-	pthread_attr_init(&attr);
-	pthread_attr_getstacksize(&attr, &size);
-	g->stacklo = (uintptr)&attr - size + 4096;
-	pthread_attr_destroy(&attr);
+	attr = (pthread_attr_t*)malloc(sizeof *attr);
+	if (attr == NULL) {
+		fatalf("malloc failed: %s", strerror(errno));
+	}
+	pthread_attr_init(attr);
+	pthread_attr_getstacksize(attr, &size);
+	g->stacklo = (uintptr)&size - size + 4096;
+	pthread_attr_destroy(attr);
+	free(attr);
 }
 
 
diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c
new file mode 100644
index 0000000..10d589f
--- /dev/null
+++ b/src/runtime/cgo/gcc_mmap.c
@@ -0,0 +1,21 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,amd64
+
+#include <errno.h>
+#include <stdint.h>
+#include <sys/mman.h>
+
+void *
+x_cgo_mmap(void *addr, uintptr_t length, int32_t prot, int32_t flags, int32_t fd, uint32_t offset) {
+	void *p;
+
+	p = mmap(addr, length, prot, flags, fd, offset);
+	if (p == MAP_FAILED) {
+		/* This is what the Go code expects on failure.  */
+		p = (void *) (uintptr_t) errno;
+	}
+	return p;
+}
diff --git a/src/runtime/cgo/gcc_ppc64x.S b/src/runtime/cgo/gcc_ppc64x.S
index 682349b..58f1336 100644
--- a/src/runtime/cgo/gcc_ppc64x.S
+++ b/src/runtime/cgo/gcc_ppc64x.S
@@ -36,6 +36,7 @@
 	mr	%r30, %r4
 
 	// Call fn
+	mr	%r12, %r3
 	mtctr	%r3
 	bctrl
 
diff --git a/src/runtime/cgo/mmap.go b/src/runtime/cgo/mmap.go
new file mode 100644
index 0000000..d514c38
--- /dev/null
+++ b/src/runtime/cgo/mmap.go
@@ -0,0 +1,22 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,amd64
+
+package cgo
+
+// Import "unsafe" because we use go:linkname.
+import _ "unsafe"
+
+// When using cgo, call the C library for mmap, so that we call into
+// any sanitizer interceptors.  This supports using the memory
+// sanitizer with Go programs.  The memory sanitizer only applies to
+// C/C++ code; this permits that code to see the Go code as normal
+// program addresses that have been initialized.
+
+//go:cgo_import_static x_cgo_mmap
+//go:linkname x_cgo_mmap x_cgo_mmap
+//go:linkname _cgo_mmap _cgo_mmap
+var x_cgo_mmap byte
+var _cgo_mmap = &x_cgo_mmap
diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go
new file mode 100644
index 0000000..ef5501c
--- /dev/null
+++ b/src/runtime/cgo_mmap.go
@@ -0,0 +1,34 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for memory sanitizer.  See runtime/cgo/mmap.go.
+
+// +build linux,amd64
+
+package runtime
+
+import "unsafe"
+
+// _cgo_mmap is filled in by runtime/cgo when it is linked into the
+// program, so it is only non-nil when using cgo.
+//go:linkname _cgo_mmap _cgo_mmap
+var _cgo_mmap unsafe.Pointer
+
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (ret unsafe.Pointer) {
+	if _cgo_mmap != nil {
+		systemstack(func() {
+			ret = callCgoMmap(addr, n, prot, flags, fd, off)
+		})
+		return
+	}
+	return sysMmap(addr, n, prot, flags, fd, off)
+}
+
+// sysMmap calls the mmap system call.  It is implemented in assembly.
+func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+
+// cgoMmap calls the mmap function in the runtime/cgo package on the
+// callCgoMmap calls the mmap function in the runtime/cgo package
+// using the GCC calling convention.  It is implemented in assembly.
+func callCgoMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index f09a66a..d39e660 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -237,10 +237,22 @@
 		// On 386, stack frame is three words, plus caller PC.
 		cb = (*args)(unsafe.Pointer(sp + 4*ptrSize))
 	case "ppc64", "ppc64le":
-		// On ppc64, stack frame is two words and there's a
-		// saved LR between SP and the stack frame and between
-		// the stack frame and the arguments.
-		cb = (*args)(unsafe.Pointer(sp + 4*ptrSize))
+		// On ppc64, the callback arguments are in the arguments area of
+		// cgocallback's stack frame. The stack looks like this:
+		// +--------------------+------------------------------+
+		// |                    | ...                          |
+		// | cgoexp_$fn         +------------------------------+
+		// |                    | fixed frame area             |
+		// +--------------------+------------------------------+
+		// |                    | arguments area               |
+		// | cgocallback        +------------------------------+ <- sp + 2*minFrameSize + 2*ptrSize
+		// |                    | fixed frame area             |
+		// +--------------------+------------------------------+ <- sp + minFrameSize + 2*ptrSize
+		// |                    | local variables (2 pointers) |
+		// | cgocallback_gofunc +------------------------------+ <- sp + minFrameSize
+		// |                    | fixed frame area             |
+		// +--------------------+------------------------------+ <- sp
+		cb = (*args)(unsafe.Pointer(sp + 2*minFrameSize + 2*ptrSize))
 	}
 
 	// Invoke callback.
@@ -271,14 +283,10 @@
 	switch GOARCH {
 	default:
 		throw("unwindm not implemented")
-	case "386", "amd64":
-		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp))
-	case "arm":
-		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 4))
+	case "386", "amd64", "arm", "ppc64", "ppc64le":
+		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + minFrameSize))
 	case "arm64":
 		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16))
-	case "ppc64", "ppc64le":
-		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 8))
 	}
 	releasem(mp)
 }
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
index 2e65e4c..8e23006 100644
--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -222,7 +222,10 @@
 
 static void foo() {
     pthread_t th;
-    pthread_create(&th, 0, thr, 0);
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, 256 << 10);
+    pthread_create(&th, &attr, thr, 0);
     pthread_join(th, 0);
 }
 */
diff --git a/src/runtime/debug/stack.go b/src/runtime/debug/stack.go
index ab12bff..5d810af5 100644
--- a/src/runtime/debug/stack.go
+++ b/src/runtime/debug/stack.go
@@ -7,92 +7,24 @@
 package debug
 
 import (
-	"bytes"
-	"fmt"
-	"io/ioutil"
 	"os"
 	"runtime"
 )
 
-var (
-	dunno     = []byte("???")
-	centerDot = []byte("·")
-	dot       = []byte(".")
-	slash     = []byte("/")
-)
-
-// PrintStack prints to standard error the stack trace returned by Stack.
+// PrintStack prints to standard error the stack trace returned by runtime.Stack.
 func PrintStack() {
-	os.Stderr.Write(stack())
+	os.Stderr.Write(Stack())
 }
 
 // Stack returns a formatted stack trace of the goroutine that calls it.
-// For each routine, it includes the source line information and PC value,
-// then attempts to discover, for Go functions, the calling function or
-// method and the text of the line containing the invocation.
-//
-// Deprecated: Use package runtime's Stack instead.
+// It calls runtime.Stack with a large enough buffer to capture the entire trace.
 func Stack() []byte {
-	return stack()
-}
-
-// stack implements Stack, skipping 2 frames
-func stack() []byte {
-	buf := new(bytes.Buffer) // the returned data
-	// As we loop, we open files and read them. These variables record the currently
-	// loaded file.
-	var lines [][]byte
-	var lastFile string
-	for i := 2; ; i++ { // Caller we care about is the user, 2 frames up
-		pc, file, line, ok := runtime.Caller(i)
-		if !ok {
-			break
+	buf := make([]byte, 1024)
+	for {
+		n := runtime.Stack(buf, false)
+		if n < len(buf) {
+			return buf[:n]
 		}
-		// Print this much at least.  If we can't find the source, it won't show.
-		fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc)
-		if file != lastFile {
-			data, err := ioutil.ReadFile(file)
-			if err != nil {
-				continue
-			}
-			lines = bytes.Split(data, []byte{'\n'})
-			lastFile = file
-		}
-		line-- // in stack trace, lines are 1-indexed but our array is 0-indexed
-		fmt.Fprintf(buf, "\t%s: %s\n", function(pc), source(lines, line))
+		buf = make([]byte, 2*len(buf))
 	}
-	return buf.Bytes()
-}
-
-// source returns a space-trimmed slice of the n'th line.
-func source(lines [][]byte, n int) []byte {
-	if n < 0 || n >= len(lines) {
-		return dunno
-	}
-	return bytes.Trim(lines[n], " \t")
-}
-
-// function returns, if possible, the name of the function containing the PC.
-func function(pc uintptr) []byte {
-	fn := runtime.FuncForPC(pc)
-	if fn == nil {
-		return dunno
-	}
-	name := []byte(fn.Name())
-	// The name includes the path name to the package, which is unnecessary
-	// since the file name is already included.  Plus, it has center dots.
-	// That is, we see
-	//	runtime/debug.*T·ptrmethod
-	// and want
-	//	*T.ptrmethod
-	// Since the package path might contains dots (e.g. code.google.com/...),
-	// we first remove the path prefix if there is one.
-	if lastslash := bytes.LastIndex(name, slash); lastslash >= 0 {
-		name = name[lastslash+1:]
-	}
-	if period := bytes.Index(name, dot); period >= 0 {
-		name = name[period+1:]
-	}
-	name = bytes.Replace(name, centerDot, dot, -1)
-	return name
 }
diff --git a/src/runtime/debug/stack_test.go b/src/runtime/debug/stack_test.go
index 28691ee..d2a4ea5 100644
--- a/src/runtime/debug/stack_test.go
+++ b/src/runtime/debug/stack_test.go
@@ -22,16 +22,19 @@
 	The traceback should look something like this, modulo line numbers and hex constants.
 	Don't worry much about the base levels, but check the ones in our own package.
 
-		/Users/r/go/src/runtime/debug/stack_test.go:15 (0x13878)
-			(*T).ptrmethod: return Stack()
-		/Users/r/go/src/runtime/debug/stack_test.go:18 (0x138dd)
-			T.method: return t.ptrmethod()
-		/Users/r/go/src/runtime/debug/stack_test.go:23 (0x13920)
-			TestStack: b := T(0).method()
-		/Users/r/go/src/testing/testing.go:132 (0x14a7a)
-			tRunner: test.F(t)
-		/Users/r/go/src/runtime/proc.c:145 (0xc970)
-			???: runtime·unlock(&runtime·sched);
+		goroutine 10 [running]:
+		runtime/debug.Stack(0x0, 0x0, 0x0)
+			/Users/r/go/src/runtime/debug/stack.go:28 +0x80
+		runtime/debug.(*T).ptrmethod(0xc82005ee70, 0x0, 0x0, 0x0)
+			/Users/r/go/src/runtime/debug/stack_test.go:15 +0x29
+		runtime/debug.T.method(0x0, 0x0, 0x0, 0x0)
+			/Users/r/go/src/runtime/debug/stack_test.go:18 +0x32
+		runtime/debug.TestStack(0xc8201ce000)
+			/Users/r/go/src/runtime/debug/stack_test.go:37 +0x38
+		testing.tRunner(0xc8201ce000, 0x664b58)
+			/Users/r/go/src/testing/testing.go:456 +0x98
+		created by testing.RunTests
+			/Users/r/go/src/testing/testing.go:561 +0x86d
 */
 func TestStack(t *testing.T) {
 	b := T(0).method()
@@ -41,17 +44,16 @@
 	}
 	n := 0
 	frame := func(line, code string) {
+		check(t, lines[n], code)
+		n++
 		check(t, lines[n], line)
 		n++
-		// The source might not be available while running the test.
-		if strings.HasPrefix(lines[n], "\t") {
-			check(t, lines[n], code)
-			n++
-		}
 	}
-	frame("src/runtime/debug/stack_test.go", "\t(*T).ptrmethod: return Stack()")
-	frame("src/runtime/debug/stack_test.go", "\tT.method: return t.ptrmethod()")
-	frame("src/runtime/debug/stack_test.go", "\tTestStack: b := T(0).method()")
+	n++
+	frame("src/runtime/debug/stack.go", "runtime/debug.Stack")
+	frame("src/runtime/debug/stack_test.go", "runtime/debug.(*T).ptrmethod")
+	frame("src/runtime/debug/stack_test.go", "runtime/debug.T.method")
+	frame("src/runtime/debug/stack_test.go", "runtime/debug.TestStack")
 	frame("src/testing/testing.go", "")
 }
 
diff --git a/src/runtime/debug/stubs.go b/src/runtime/debug/stubs.go
index 95b33e4..6c87ffd 100644
--- a/src/runtime/debug/stubs.go
+++ b/src/runtime/debug/stubs.go
@@ -8,12 +8,10 @@
 	"time"
 )
 
-// Uses assembly to call corresponding runtime-internal functions.
+// Implemented in package runtime.
+func readGCStats(*[]time.Duration)
+func freeOSMemory()
 func setMaxStack(int) int
 func setGCPercent(int32) int32
 func setPanicOnFault(bool) bool
 func setMaxThreads(int) int
-
-// Implemented in package runtime.
-func readGCStats(*[]time.Duration)
-func freeOSMemory()
diff --git a/src/runtime/debug/stubs.s b/src/runtime/debug/stubs.s
deleted file mode 100644
index 9dc8e54..0000000
--- a/src/runtime/debug/stubs.s
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#ifdef GOARCH_arm
-#define JMP B
-#endif
-#ifdef GOARCH_arm64
-#define JMP B
-#endif
-#ifdef GOARCH_ppc64
-#define JMP BR
-#endif
-#ifdef GOARCH_ppc64le
-#define JMP BR
-#endif
-
-TEXT ·setMaxStack(SB),NOSPLIT,$0-0
-  JMP runtime·setMaxStack(SB)
-
-TEXT ·setGCPercent(SB),NOSPLIT,$0-0
-  JMP runtime·setGCPercent(SB)
-
-TEXT ·setPanicOnFault(SB),NOSPLIT,$0-0
-  JMP runtime·setPanicOnFault(SB)
-
-TEXT ·setMaxThreads(SB),NOSPLIT,$0-0
-  JMP runtime·setMaxThreads(SB)
diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s
index 0b51228..6ed7f65 100644
--- a/src/runtime/duff_amd64.s
+++ b/src/runtime/duff_amd64.s
@@ -5,837 +5,423 @@
 #include "textflag.h"
 
 TEXT runtime·duffzero(SB), NOSPLIT, $0-0
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
+	MOVUPS	X0,(DI)
+	MOVUPS	X0,16(DI)
+	MOVUPS	X0,32(DI)
+	MOVUPS	X0,48(DI)
+	ADDQ	$64,DI
 
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	MOVQ	AX,(DI)
-	MOVQ	AX,8(DI)
-	MOVQ	AX,16(DI)
-	MOVQ	AX,24(DI)
-	ADDQ	$32,DI
-
-	STOSQ
-	STOSQ
-	STOSQ
-	STOSQ
 	RET
 
 TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
-
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
-	MOVQ	(SI), CX
-	ADDQ	$8, SI
-	MOVQ	CX, (DI)
-	ADDQ	$8, DI
+	MOVUPS	(SI), X0
+	ADDQ	$16, SI
+	MOVUPS	X0, (DI)
+	ADDQ	$16, DI
 
 	RET
diff --git a/src/runtime/duff_ppc64x.s b/src/runtime/duff_ppc64x.s
index 14bc33e..c8204c4 100644
--- a/src/runtime/duff_ppc64x.s
+++ b/src/runtime/duff_ppc64x.s
@@ -6,7 +6,7 @@
 
 #include "textflag.h"
 
-TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
+TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0
 	MOVDU	R0, 8(R3)
 	MOVDU	R0, 8(R3)
 	MOVDU	R0, 8(R3)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index f14dc30..5c13948 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -26,6 +26,8 @@
 
 var FuncPC = funcPC
 
+var Fastlog2 = fastlog2
+
 type LFNode struct {
 	Next    uint64
 	Pushcnt uintptr
@@ -130,11 +132,11 @@
 	var size uintptr
 	var p unsafe.Pointer
 	switch t.kind & kindMask {
-	case _KindPtr:
+	case kindPtr:
 		t = (*ptrtype)(unsafe.Pointer(t)).elem
 		size = t.size
 		p = e.data
-	case _KindSlice:
+	case kindSlice:
 		slice := *(*struct {
 			ptr      unsafe.Pointer
 			len, cap uintptr
@@ -155,3 +157,5 @@
 
 var TestingAssertE2I2GC = &testingAssertE2I2GC
 var TestingAssertE2T2GC = &testingAssertE2T2GC
+
+var ForceGCPeriod = &forcegcperiod
diff --git a/src/runtime/export_windows_test.go b/src/runtime/export_windows_test.go
index 61fcef9..6322ee2 100644
--- a/src/runtime/export_windows_test.go
+++ b/src/runtime/export_windows_test.go
@@ -6,4 +6,7 @@
 
 package runtime
 
-var TestingWER = &testingWER
+var (
+	TestingWER              = &testingWER
+	TimeBeginPeriodRetValue = &timeBeginPeriodRetValue
+)
diff --git a/src/runtime/fastlog2.go b/src/runtime/fastlog2.go
new file mode 100644
index 0000000..b22e825
--- /dev/null
+++ b/src/runtime/fastlog2.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// fastlog2 implements a fast approximation to the base 2 log of a
+// float64. This is used to compute a geometric distribution for heap
+// sampling, without introducing dependences into package math. This
+// uses a very rough approximation using the float64 exponent and the
+// first 25 bits of the mantissa. The top 5 bits of the mantissa are
+// used to load limits from a table of constants and the rest are used
+// to scale linearly between them.
+func fastlog2(x float64) float64 {
+	const fastlogScaleBits = 20
+	const fastlogScaleRatio = 1.0 / (1 << fastlogScaleBits)
+
+	xBits := float64bits(x)
+	// Extract the exponent from the IEEE float64, and index a constant
+	// table with the first 10 bits from the mantissa.
+	xExp := int64((xBits>>52)&0x7FF) - 1023
+	xManIndex := (xBits >> (52 - fastlogNumBits)) % (1 << fastlogNumBits)
+	xManScale := (xBits >> (52 - fastlogNumBits - fastlogScaleBits)) % (1 << fastlogScaleBits)
+
+	low, high := fastlog2Table[xManIndex], fastlog2Table[xManIndex+1]
+	return float64(xExp) + low + (high-low)*float64(xManScale)*fastlogScaleRatio
+}
+
+// float64bits returns the IEEE 754 binary representation of f.
+// Taken from math.Float64bits to avoid dependences into package math.
+func float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }
diff --git a/src/runtime/fastlog2_test.go b/src/runtime/fastlog2_test.go
new file mode 100644
index 0000000..14f6c90
--- /dev/null
+++ b/src/runtime/fastlog2_test.go
@@ -0,0 +1,34 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math"
+	"runtime"
+	"testing"
+)
+
+func TestFastLog2(t *testing.T) {
+	// Compute the euclidean distance between math.Log2 and the FastLog2
+	// implementation over the range of interest for heap sampling.
+	const randomBitCount = 26
+	var e float64
+
+	inc := 1
+	if testing.Short() {
+		// Check 1K total values, down from 64M.
+		inc = 1 << 16
+	}
+	for i := 1; i < 1<<randomBitCount; i += inc {
+		l, fl := math.Log2(float64(i)), runtime.Fastlog2(float64(i))
+		d := l - fl
+		e += d * d
+	}
+	e = math.Sqrt(e)
+
+	if e > 1.0 {
+		t.Fatalf("imprecision on fastlog2 implementation, want <=1.0, got %f", e)
+	}
+}
diff --git a/src/runtime/fastlog2table.go b/src/runtime/fastlog2table.go
new file mode 100644
index 0000000..c36d583
--- /dev/null
+++ b/src/runtime/fastlog2table.go
@@ -0,0 +1,43 @@
+// AUTO-GENERATED by mkfastlog2table.go
+// Run go generate from src/runtime to update.
+// See mkfastlog2table.go for comments.
+
+package runtime
+
+const fastlogNumBits = 5
+
+var fastlog2Table = [1<<fastlogNumBits + 1]float64{
+	0,
+	0.0443941193584535,
+	0.08746284125033943,
+	0.12928301694496647,
+	0.16992500144231248,
+	0.2094533656289499,
+	0.24792751344358555,
+	0.28540221886224837,
+	0.3219280948873623,
+	0.3575520046180837,
+	0.39231742277876036,
+	0.4262647547020979,
+	0.4594316186372973,
+	0.4918530963296748,
+	0.5235619560570128,
+	0.5545888516776374,
+	0.5849625007211563,
+	0.6147098441152082,
+	0.6438561897747247,
+	0.6724253419714956,
+	0.7004397181410922,
+	0.7279204545631992,
+	0.7548875021634686,
+	0.7813597135246596,
+	0.8073549220576042,
+	0.8328900141647417,
+	0.8579809951275721,
+	0.8826430493618412,
+	0.9068905956085185,
+	0.9307373375628862,
+	0.9541963103868752,
+	0.9772799234999164,
+	1,
+}
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 6c9b314..61bbc14 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -199,6 +199,37 @@
 	}
 }
 
+func TestPeriodicGC(t *testing.T) {
+	// Make sure we're not in the middle of a GC.
+	runtime.GC()
+
+	var ms1, ms2 runtime.MemStats
+	runtime.ReadMemStats(&ms1)
+
+	// Make periodic GC run continuously.
+	orig := *runtime.ForceGCPeriod
+	*runtime.ForceGCPeriod = 0
+
+	// Let some periodic GCs happen. In a heavily loaded system,
+	// it's possible these will be delayed, so this is designed to
+	// succeed quickly if things are working, but to give it some
+	// slack if things are slow.
+	var numGCs uint32
+	const want = 2
+	for i := 0; i < 20 && numGCs < want; i++ {
+		time.Sleep(5 * time.Millisecond)
+
+		// Test that periodic GC actually happened.
+		runtime.ReadMemStats(&ms2)
+		numGCs = ms2.NumGC - ms1.NumGC
+	}
+	*runtime.ForceGCPeriod = orig
+
+	if numGCs < want {
+		t.Fatalf("no periodic GC: got %v GCs, want >= 2", numGCs)
+	}
+}
+
 func BenchmarkSetTypePtr(b *testing.B) {
 	benchSetType(b, new(*byte))
 }
diff --git a/src/runtime/hash32.go b/src/runtime/hash32.go
index 79fb15c..2b7c5c0 100644
--- a/src/runtime/hash32.go
+++ b/src/runtime/hash32.go
@@ -52,9 +52,9 @@
 		h = rotl_15(h*m1) * m2
 	default:
 		v1 := h
-		v2 := uint32(hashkey[1])
-		v3 := uint32(hashkey[2])
-		v4 := uint32(hashkey[3])
+		v2 := uint32(seed * hashkey[1])
+		v3 := uint32(seed * hashkey[2])
+		v4 := uint32(seed * hashkey[3])
 		for s >= 16 {
 			v1 ^= readUnaligned32(p)
 			v1 = rotl_15(v1*m1) * m2
diff --git a/src/runtime/hash64.go b/src/runtime/hash64.go
index 716db61..f339a30 100644
--- a/src/runtime/hash64.go
+++ b/src/runtime/hash64.go
@@ -53,9 +53,9 @@
 		h = rotl_31(h*m1) * m2
 	default:
 		v1 := h
-		v2 := uint64(hashkey[1])
-		v3 := uint64(hashkey[2])
-		v4 := uint64(hashkey[3])
+		v2 := uint64(seed * hashkey[1])
+		v3 := uint64(seed * hashkey[2])
+		v4 := uint64(seed * hashkey[3])
 		for s >= 32 {
 			v1 ^= readUnaligned64(p)
 			v1 = rotl_31(v1*m1) * m2
diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go
index 579b0e3..1651485 100644
--- a/src/runtime/hash_test.go
+++ b/src/runtime/hash_test.go
@@ -561,3 +561,100 @@
 func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
 func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
+
+func TestArrayHash(t *testing.T) {
+	// Make sure that "" in arrays hash correctly.  The hash
+	// should at least scramble the input seed so that, e.g.,
+	// {"","foo"} and {"foo",""} have different hashes.
+
+	// If the hash is bad, then all (8 choose 4) = 70 keys
+	// have the same hash. If so, we allocate 70/8 = 8
+	// overflow buckets.  If the hash is good we don't
+	// normally allocate any overflow buckets, and the
+	// probability of even one or two overflows goes down rapidly.
+	// (There is always 1 allocation of the bucket array.  The map
+	// header is allocated on the stack.)
+	f := func() {
+		// Make the key type at most 128 bytes.  Otherwise,
+		// we get an allocation per key.
+		type key [8]string
+		m := make(map[key]bool, 70)
+
+		// fill m with keys that have 4 "foo"s and 4 ""s.
+		for i := 0; i < 256; i++ {
+			var k key
+			cnt := 0
+			for j := uint(0); j < 8; j++ {
+				if i>>j&1 != 0 {
+					k[j] = "foo"
+					cnt++
+				}
+			}
+			if cnt == 4 {
+				m[k] = true
+			}
+		}
+		if len(m) != 70 {
+			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
+		}
+	}
+	if n := testing.AllocsPerRun(10, f); n > 6 {
+		t.Errorf("too many allocs %f - hash not balanced", n)
+	}
+}
+func TestStructHash(t *testing.T) {
+	// See the comment in TestArrayHash.
+	f := func() {
+		type key struct {
+			a, b, c, d, e, f, g, h string
+		}
+		m := make(map[key]bool, 70)
+
+		// fill m with keys that have 4 "foo"s and 4 ""s.
+		for i := 0; i < 256; i++ {
+			var k key
+			cnt := 0
+			if i&1 != 0 {
+				k.a = "foo"
+				cnt++
+			}
+			if i&2 != 0 {
+				k.b = "foo"
+				cnt++
+			}
+			if i&4 != 0 {
+				k.c = "foo"
+				cnt++
+			}
+			if i&8 != 0 {
+				k.d = "foo"
+				cnt++
+			}
+			if i&16 != 0 {
+				k.e = "foo"
+				cnt++
+			}
+			if i&32 != 0 {
+				k.f = "foo"
+				cnt++
+			}
+			if i&64 != 0 {
+				k.g = "foo"
+				cnt++
+			}
+			if i&128 != 0 {
+				k.h = "foo"
+				cnt++
+			}
+			if cnt == 4 {
+				m[k] = true
+			}
+		}
+		if len(m) != 70 {
+			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
+		}
+	}
+	if n := testing.AllocsPerRun(10, f); n > 6 {
+		t.Errorf("too many allocs %f - hash not balanced", n)
+	}
+}
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 9eca9cf..2db73bc 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -460,7 +460,9 @@
 				continue
 			}
 			// already have a mapping for key.  Update it.
-			typedmemmove(t.key, k2, key)
+			if t.needkeyupdate {
+				typedmemmove(t.key, k2, key)
+			}
 			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
 			v2 := v
 			if t.indirectvalue {
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 492ea92..48205ea 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -70,7 +70,7 @@
 		return
 	}
 
-	write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+	write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
 	if len >= bufSize {
 		write(dumpfd, data, int32(len))
 		nbuf = 0
@@ -85,7 +85,7 @@
 }
 
 func flush() {
-	write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+	write(dumpfd, unsafe.Pointer(&buf), int32(nbuf))
 	nbuf = 0
 }
 
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 332b7d5..646f878 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -129,6 +129,9 @@
 }
 
 func convT2E(t *_type, elem unsafe.Pointer, x unsafe.Pointer) (e interface{}) {
+	if raceenabled {
+		raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E))
+	}
 	ep := (*eface)(unsafe.Pointer(&e))
 	if isDirectIface(t) {
 		ep._type = t
@@ -147,6 +150,9 @@
 }
 
 func convT2I(t *_type, inter *interfacetype, cache **itab, elem unsafe.Pointer, x unsafe.Pointer) (i fInterface) {
+	if raceenabled {
+		raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2I))
+	}
 	tab := (*itab)(atomicloadp(unsafe.Pointer(cache)))
 	if tab == nil {
 		tab = getitab(inter, t, false)
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index d9d91c9..531f186 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -104,7 +104,7 @@
 		} else {
 			// Other M's are waiting for the lock.
 			// Dequeue an M.
-			mp = (*m)((unsafe.Pointer)(v &^ locked))
+			mp = (*m)(unsafe.Pointer(v &^ locked))
 			if casuintptr(&l.key, v, mp.nextwaitm) {
 				// Dequeued an M.  Wake it.
 				semawakeup(mp)
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 353f840..4ce159c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -397,7 +397,10 @@
 			// TODO: It would be bad if part of the arena
 			// is reserved and part is not.
 			var reserved bool
-			p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
+			p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
+			if p == 0 {
+				return nil
+			}
 			if p == h.arena_end {
 				h.arena_end = new_end
 				h.arena_reserved = reserved
@@ -412,7 +415,7 @@
 				h.arena_reserved = reserved
 			} else {
 				var stat uint64
-				sysFree((unsafe.Pointer)(p), p_size, &stat)
+				sysFree(unsafe.Pointer(p), p_size, &stat)
 			}
 		}
 	}
@@ -420,18 +423,18 @@
 	if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
 		// Keep taking from our reservation.
 		p := h.arena_used
-		sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
+		sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
 		mHeap_MapBits(h, p+n)
 		mHeap_MapSpans(h, p+n)
 		h.arena_used = p + n
 		if raceenabled {
-			racemapshadow((unsafe.Pointer)(p), n)
+			racemapshadow(unsafe.Pointer(p), n)
 		}
 
 		if uintptr(p)&(_PageSize-1) != 0 {
 			throw("misrounded allocation in MHeap_SysAlloc")
 		}
-		return (unsafe.Pointer)(p)
+		return unsafe.Pointer(p)
 	}
 
 	// If using 64-bit, our reservation is all we have.
@@ -450,7 +453,7 @@
 
 	if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
 		print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
-		sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
+		sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
 		return nil
 	}
 
@@ -464,14 +467,14 @@
 			h.arena_end = p_end
 		}
 		if raceenabled {
-			racemapshadow((unsafe.Pointer)(p), n)
+			racemapshadow(unsafe.Pointer(p), n)
 		}
 	}
 
 	if uintptr(p)&(_PageSize-1) != 0 {
 		throw("misrounded allocation in MHeap_SysAlloc")
 	}
-	return (unsafe.Pointer)(p)
+	return unsafe.Pointer(p)
 }
 
 // base address for all 0-byte allocations
@@ -507,6 +510,27 @@
 		return persistentalloc(size, align, &memstats.other_sys)
 	}
 
+	// assistG is the G to charge for this allocation, or nil if
+	// GC is not currently active.
+	var assistG *g
+	if gcBlackenEnabled != 0 {
+		// Charge the current user G for this allocation.
+		assistG = getg()
+		if assistG.m.curg != nil {
+			assistG = assistG.m.curg
+		}
+		// Charge the allocation against the G. We'll account
+		// for internal fragmentation at the end of mallocgc.
+		assistG.gcAssistBytes -= int64(size)
+
+		if assistG.gcAssistBytes < 0 {
+			// This G is in debt. Assist the GC to correct
+			// this before allocating. This must happen
+			// before disabling preemption.
+			gcAssistAlloc(assistG)
+		}
+	}
+
 	// Set mp.mallocing to keep from being preempted by GC.
 	mp := acquirem()
 	if mp.mallocing != 0 {
@@ -701,15 +725,15 @@
 		}
 	}
 
+	if assistG != nil {
+		// Account for internal fragmentation in the assist
+		// debt now that we know it.
+		assistG.gcAssistBytes -= int64(size - dataSize)
+	}
+
 	if shouldhelpgc && shouldtriggergc() {
 		startGC(gcBackgroundMode, false)
-	} else if gcBlackenEnabled != 0 {
-		// Assist garbage collector. We delay this until the
-		// epilogue so that it doesn't interfere with the
-		// inner working of malloc such as mcache refills that
-		// might happen while doing the gcAssistAlloc.
-		gcAssistAlloc(size, shouldhelpgc)
-	} else if shouldhelpgc && bggc.working != 0 {
+	} else if shouldhelpgc && bggc.working != 0 && gcBlackenEnabled == 0 {
 		// The GC is starting up or shutting down, so we can't
 		// assist, but we also can't allocate unabated. Slow
 		// down this G's allocation and help the GC stay
@@ -789,26 +813,43 @@
 }
 
 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
-	c := mp.mcache
-	rate := MemProfileRate
-	if size < uintptr(rate) {
-		// pick next profile time
-		// If you change this, also change allocmcache.
-		if rate > 0x3fffffff { // make 2*rate not overflow
-			rate = 0x3fffffff
-		}
-		next := int32(fastrand1()) % (2 * int32(rate))
-		// Subtract the "remainder" of the current allocation.
-		// Otherwise objects that are close in size to sampling rate
-		// will be under-sampled, because we consistently discard this remainder.
-		next -= (int32(size) - c.next_sample)
-		if next < 0 {
-			next = 0
-		}
-		c.next_sample = next
+	mp.mcache.next_sample = nextSample()
+	mProf_Malloc(x, size)
+}
+
+// nextSample returns the next sampling point for heap profiling.
+// It produces a random variable with a geometric distribution and
+// mean MemProfileRate. This is done by generating a uniformly
+// distributed random number and applying the cumulative distribution
+// function for an exponential.
+func nextSample() int32 {
+	period := MemProfileRate
+
+	// make nextSample not overflow. Maximum possible step is
+	// -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
+	switch {
+	case period > 0x7000000:
+		period = 0x7000000
+	case period == 0:
+		return 0
 	}
 
-	mProf_Malloc(x, size)
+	// Let m be the sample rate,
+	// the probability distribution function is m*exp(-mx), so the CDF is
+	// p = 1 - exp(-mx), so
+	// q = 1 - p == exp(-mx)
+	// log_e(q) = -mx
+	// -log_e(q)/m = x
+	// x = -log_e(q) * period
+	// x = log_2(q) * (-log_e(2)) * period    ; Using log_2 for efficiency
+	const randomBitCount = 26
+	q := uint32(fastrand1())%(1<<randomBitCount) + 1
+	qlog := fastlog2(float64(q)) - randomBitCount
+	if qlog > 0 {
+		qlog = 0
+	}
+	const minusLog2 = -0.6931471805599453 // -ln(2)
+	return int32(qlog*(minusLog2*float64(period))) + 1
 }
 
 type persistentAlloc struct {
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index fd6b4b1..e7319c1 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -182,7 +182,11 @@
 // If p does not point into a heap object,
 // return base == 0
 // otherwise return the base of the object.
-func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
+//
+// refBase and refOff optionally give the base address of the object
+// in which the pointer p was found and the byte offset at which it
+// was found. These are used for error reporting.
+func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan) {
 	arenaStart := mheap_.arena_start
 	if p < arenaStart || p >= mheap_.arena_used {
 		return
@@ -203,18 +207,28 @@
 
 		// The following ensures that we are rigorous about what data
 		// structures hold valid pointers.
-		// TODO(rsc): Check if this still happens.
 		if debug.invalidptr != 0 {
-			// Still happens sometimes. We don't know why.
+			// Typically this indicates an incorrect use
+			// of unsafe or cgo to store a bad pointer in
+			// the Go heap. It may also indicate a runtime
+			// bug.
+			//
+			// TODO(austin): We could be more aggressive
+			// and detect pointers to unallocated objects
+			// in allocated spans.
 			printlock()
-			print("runtime:objectstart Span weird: p=", hex(p), " k=", hex(k))
-			if s == nil {
-				print(" s=nil\n")
+			print("runtime: pointer ", hex(p))
+			if s.state != mSpanInUse {
+				print(" to unallocated span")
 			} else {
-				print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n")
+				print(" to unused region of span")
 			}
-			printunlock()
-			throw("objectstart: bad pointer in unexpected span")
+			print("idx=", hex(idx), " span.start=", hex(s.start<<_PageShift), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
+			if refBase != 0 {
+				print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
+				gcDumpObject("object", refBase, refOff)
+			}
+			throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
 		}
 		return
 	}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 8c2a6b0..7424691 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -69,16 +69,7 @@
 	for i := 0; i < _NumSizeClasses; i++ {
 		c.alloc[i] = &emptymspan
 	}
-
-	// Set first allocation sample size.
-	rate := MemProfileRate
-	if rate > 0x3fffffff { // make 2*rate not overflow
-		rate = 0x3fffffff
-	}
-	if rate != 0 {
-		c.next_sample = int32(int(fastrand1()) % (2 * rate))
-	}
-
+	c.next_sample = nextSample()
 	return c
 }
 
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
index 3bebd97..65b1b48 100644
--- a/src/runtime/mem_darwin.go
+++ b/src/runtime/mem_darwin.go
@@ -10,7 +10,7 @@
 // which prevents us from allocating more stack.
 //go:nosplit
 func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
-	v := (unsafe.Pointer)(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	v := unsafe.Pointer(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
 	if uintptr(v) < 4096 {
 		return nil
 	}
@@ -40,7 +40,7 @@
 
 func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
 	*reserved = true
-	p := (unsafe.Pointer)(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	p := unsafe.Pointer(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
 	if uintptr(p) < 4096 {
 		return nil
 	}
@@ -53,7 +53,7 @@
 
 func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
 	mSysStatInc(sysStat, n)
-	p := (unsafe.Pointer)(mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0))
+	p := unsafe.Pointer(mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0))
 	if uintptr(p) == _ENOMEM {
 		throw("runtime: out of memory")
 	}
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index f988e75..e8c8999 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -69,29 +69,89 @@
 }
 
 func sysUnused(v unsafe.Pointer, n uintptr) {
-	var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
-	if s != 0 && (uintptr(v)%s != 0 || n%s != 0) {
-		// See issue 8832
-		// Linux kernel bug: https://bugzilla.kernel.org/show_bug.cgi?id=93111
-		// Mark the region as NOHUGEPAGE so the kernel's khugepaged
-		// doesn't undo our DONTNEED request.  khugepaged likes to migrate
-		// regions which are only partially mapped to huge pages, including
-		// regions with some DONTNEED marks.  That needlessly allocates physical
-		// memory for our DONTNEED regions.
-		madvise(v, n, _MADV_NOHUGEPAGE)
+	// By default, Linux's "transparent huge page" support will
+	// merge pages into a huge page if there's even a single
+	// present regular page, undoing the effects of the DONTNEED
+	// below. On amd64, that means khugepaged can turn a single
+	// 4KB page to 2MB, bloating the process's RSS by as much as
+	// 512X. (See issue #8832 and Linux kernel bug
+	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
+	//
+	// To work around this, we explicitly disable transparent huge
+	// pages when we release pages of the heap. However, we have
+	// to do this carefully because changing this flag tends to
+	// split the VMA (memory mapping) containing v in to three
+	// VMAs in order to track the different values of the
+	// MADV_NOHUGEPAGE flag in the different regions. There's a
+	// default limit of 65530 VMAs per address space (sysctl
+	// vm.max_map_count), so we must be careful not to create too
+	// many VMAs (see issue #12233).
+	//
+	// Since huge pages are huge, there's little use in adjusting
+	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
+	// exploding the number of VMAs by only adjusting the
+	// MADV_NOHUGEPAGE flag on a large granularity. This still
+	// gets most of the benefit of huge pages while keeping the
+	// number of VMAs under control. With hugePageSize = 2MB, even
+	// a pessimal heap can reach 128GB before running out of VMAs.
+	if hugePageSize != 0 {
+		var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
+
+		// If it's a large allocation, we want to leave huge
+		// pages enabled. Hence, we only adjust the huge page
+		// flag on the huge pages containing v and v+n-1, and
+		// only if those aren't aligned.
+		var head, tail uintptr
+		if uintptr(v)%s != 0 {
+			// Compute huge page containing v.
+			head = uintptr(v) &^ (s - 1)
+		}
+		if (uintptr(v)+n)%s != 0 {
+			// Compute huge page containing v+n-1.
+			tail = (uintptr(v) + n - 1) &^ (s - 1)
+		}
+
+		// Note that madvise will return EINVAL if the flag is
+		// already set, which is quite likely. We ignore
+		// errors.
+		if head != 0 && head+hugePageSize == tail {
+			// head and tail are different but adjacent,
+			// so do this in one call.
+			madvise(unsafe.Pointer(head), 2*hugePageSize, _MADV_NOHUGEPAGE)
+		} else {
+			// Advise the huge pages containing v and v+n-1.
+			if head != 0 {
+				madvise(unsafe.Pointer(head), hugePageSize, _MADV_NOHUGEPAGE)
+			}
+			if tail != 0 && tail != head {
+				madvise(unsafe.Pointer(tail), hugePageSize, _MADV_NOHUGEPAGE)
+			}
+		}
 	}
+
 	madvise(v, n, _MADV_DONTNEED)
 }
 
 func sysUsed(v unsafe.Pointer, n uintptr) {
 	if hugePageSize != 0 {
-		// Undo the NOHUGEPAGE marks from sysUnused.  There is no alignment check
-		// around this call as spans may have been merged in the interim.
-		// Note that this might enable huge pages for regions which were
-		// previously disabled.  Unfortunately there is no easy way to detect
-		// what the previous state was, and in any case we probably want huge
-		// pages to back our heap if the kernel can arrange that.
-		madvise(v, n, _MADV_HUGEPAGE)
+		// Partially undo the NOHUGEPAGE marks from sysUnused
+		// for whole huge pages between v and v+n. This may
+		// leave huge pages off at the end points v and v+n
+		// even though allocations may cover these entire huge
+		// pages. We could detect this and undo NOHUGEPAGE on
+		// the end points as well, but it's probably not worth
+		// the cost because when neighboring allocations are
+		// freed sysUnused will just set NOHUGEPAGE again.
+		var s uintptr = hugePageSize
+
+		// Round v up to a huge page boundary.
+		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
+		// Round v+n down to a huge page boundary.
+		end := (uintptr(v) + n) &^ (s - 1)
+
+		if beg < end {
+			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
+		}
 	}
 }
 
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
index 42aa7fb..50fca95 100644
--- a/src/runtime/mem_windows.go
+++ b/src/runtime/mem_windows.go
@@ -48,6 +48,7 @@
 			small &^= 4096 - 1
 		}
 		if small < 4096 {
+			print("runtime: VirtualFree of ", small, " bytes failed with errno=", getlasterror(), "\n")
 			throw("runtime: failed to decommit pages")
 		}
 		v = add(v, small)
@@ -58,6 +59,7 @@
 func sysUsed(v unsafe.Pointer, n uintptr) {
 	r := stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_COMMIT, _PAGE_READWRITE)
 	if r != uintptr(v) {
+		print("runtime: VirtualAlloc of ", n, " bytes failed with errno=", getlasterror(), "\n")
 		throw("runtime: failed to commit pages")
 	}
 
@@ -69,7 +71,8 @@
 			small &^= 4096 - 1
 		}
 		if small < 4096 {
-			throw("runtime: failed to decommit pages")
+			print("runtime: VirtualAlloc of ", small, " bytes failed with errno=", getlasterror(), "\n")
+			throw("runtime: failed to commit pages")
 		}
 		v = add(v, small)
 		n -= small
@@ -83,6 +86,7 @@
 	mSysStatDec(sysStat, n)
 	r := stdcall3(_VirtualFree, uintptr(v), 0, _MEM_RELEASE)
 	if r == 0 {
+		print("runtime: VirtualFree of ", n, " bytes failed with errno=", getlasterror(), "\n")
 		throw("runtime: failed to release pages")
 	}
 }
@@ -109,6 +113,7 @@
 	mSysStatInc(sysStat, n)
 	p := stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_COMMIT, _PAGE_READWRITE)
 	if p != uintptr(v) {
+		print("runtime: VirtualAlloc of ", n, " bytes failed with errno=", getlasterror(), "\n")
 		throw("runtime: cannot map pages in arena address space")
 	}
 }
diff --git a/src/runtime/memclr_arm64.s b/src/runtime/memclr_arm64.s
index c44c123..47c6b73 100644
--- a/src/runtime/memclr_arm64.s
+++ b/src/runtime/memclr_arm64.s
@@ -8,11 +8,30 @@
 TEXT runtime·memclr(SB),NOSPLIT,$0-16
 	MOVD	ptr+0(FP), R3
 	MOVD	n+8(FP), R4
-	CMP	$0, R4
-	BEQ	done
-	ADD	R3, R4, R4
+	// TODO(mwhudson): this is written this way to avoid tickling
+	// warnings from addpool when written as AND $7, R4, R6 (see
+	// https://golang.org/issue/12708)
+	AND	$~7, R4, R5	// R5 is N&~7
+	SUB	R5, R4, R6	// R6 is N&7
+
+	CMP	$0, R5
+	BEQ	nowords
+
+	ADD	R3, R5, R5
+
+wordloop: // TODO: Optimize for unaligned ptr.
+	MOVD.P	$0, 8(R3)
+	CMP	R3, R5
+	BNE	wordloop
+nowords:
+        CMP	$0, R6
+        BEQ	done
+
+	ADD	R3, R6, R6
+
+byteloop:
 	MOVBU.P	$0, 1(R3)
-	CMP	R3, R4
-	BNE	-2(PC)
+	CMP	R3, R6
+	BNE	byteloop
 done:
 	RET
diff --git a/src/runtime/memclr_ppc64x.s b/src/runtime/memclr_ppc64x.s
index cea42cb..442faa2 100644
--- a/src/runtime/memclr_ppc64x.s
+++ b/src/runtime/memclr_ppc64x.s
@@ -7,14 +7,25 @@
 #include "textflag.h"
 
 // void runtime·memclr(void*, uintptr)
-TEXT runtime·memclr(SB),NOSPLIT,$0-16
+TEXT runtime·memclr(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	ptr+0(FP), R3
 	MOVD	n+8(FP), R4
-	CMP	R4, $0
+	SRADCC	$3, R4, R6	// R6 is the number of words to zero
+	BEQ	bytes
+
+	SUB	$8, R3
+	MOVD	R6, CTR
+	MOVDU	R0, 8(R3)
+	BC	25, 0, -1(PC)	// bdnz+ $-4
+	ADD	$8, R3
+
+bytes:
+	ANDCC	$7, R4, R7	// R7 is the number of bytes to zero
 	BEQ	done
 	SUB	$1, R3
-	MOVD	R4, CTR
+	MOVD	R7, CTR
 	MOVBU	R0, 1(R3)
-	BC	25, 0, -1(PC) // bdnz+ $-4
+	BC	25, 0, -1(PC)	// bdnz+ $-4
+
 done:
 	RET
diff --git a/src/runtime/memmove_arm64.s b/src/runtime/memmove_arm64.s
index 66059a7..00813d4 100644
--- a/src/runtime/memmove_arm64.s
+++ b/src/runtime/memmove_arm64.s
@@ -14,23 +14,78 @@
 	RET
 
 check:
+	AND	$~7, R5, R7	// R7 is N&~7
+	// TODO(mwhudson): this is written this way to avoid tickling
+	// warnings from addpool when written as AND $7, R5, R6 (see
+	// https://golang.org/issue/12708)
+	SUB	R7, R5, R6	// R6 is N&7
+
 	CMP	R3, R4
 	BLT	backward
 
-	ADD	R3, R5
-loop:
-	MOVBU.P	1(R4), R6
-	MOVBU.P	R6, 1(R3)
-	CMP	R3, R5
-	BNE	loop
+	// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
+	// R3 and R4 are advanced as we copy.
+
+        // (There may be implementations of armv8 where copying by bytes until
+        // at least one of source or dest is word aligned is a worthwhile
+        // optimization, but the on the one tested so far (xgene) it did not
+        // make a significance difference.)
+
+	CMP	$0, R7		// Do we need to do any word-by-word copying?
+	BEQ	noforwardlarge
+
+	ADD	R3, R7, R9	// R9 points just past where we copy by word
+
+forwardlargeloop:
+	MOVD.P	8(R4), R8	// R8 is just a scratch register
+	MOVD.P	R8, 8(R3)
+	CMP	R3, R9
+	BNE	forwardlargeloop
+
+noforwardlarge:
+	CMP	$0, R6		// Do we need to do any byte-by-byte copying?
+	BNE	forwardtail
+	RET
+
+forwardtail:
+	ADD	R3, R6, R9	// R9 points just past the destination memory
+
+forwardtailloop:
+	MOVBU.P 1(R4), R8
+	MOVBU.P	R8, 1(R3)
+	CMP	R3, R9
+	BNE	forwardtailloop
 	RET
 
 backward:
-	ADD	R5, R4
-	ADD	R3, R5
-loop1:
-	MOVBU.W	-1(R4), R6
-	MOVBU.W	R6, -1(R5)
-	CMP	R3, R5
-	BNE	loop1
+	// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
+	// R3 and R4 are advanced to the end of the destination/source buffers
+	// respectively and moved back as we copy.
+
+	ADD	R4, R5, R4	// R4 points just past the last source byte
+	ADD	R3, R5, R3	// R3 points just past the last destination byte
+
+	CMP	$0, R6		// Do we need to do any byte-by-byte copying?
+	BEQ	nobackwardtail
+
+	SUB	R6, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
+backwardtailloop:
+	MOVBU.W	-1(R4), R8
+	MOVBU.W	R8, -1(R3)
+	CMP	R9, R3
+	BNE	backwardtailloop
+
+nobackwardtail:
+	CMP     $0, R7		// Do we need to do any word-by-word copying?
+	BNE	backwardlarge
+	RET
+
+backwardlarge:
+        SUB	R7, R3, R9      // R9 points at the lowest destination byte
+
+backwardlargeloop:
+	MOVD.W	-8(R4), R8
+	MOVD.W	R8, -8(R3)
+	CMP	R9, R3
+	BNE	backwardlargeloop
 	RET
diff --git a/src/runtime/memmove_nacl_amd64p32.s b/src/runtime/memmove_nacl_amd64p32.s
index 373607a..be9e1e5 100644
--- a/src/runtime/memmove_nacl_amd64p32.s
+++ b/src/runtime/memmove_nacl_amd64p32.s
@@ -4,6 +4,9 @@
 
 #include "textflag.h"
 
+// This could use MOVSQ, but we use MOVSL so that if an object ends in
+// a 4 byte pointer, we copy it as a unit instead of byte by byte.
+
 TEXT runtime·memmove(SB), NOSPLIT, $0-12
 	MOVL	to+0(FP), DI
 	MOVL	from+4(FP), SI
@@ -14,9 +17,9 @@
 
 forward:
 	MOVL	BX, CX
-	SHRL	$3, CX
-	ANDL	$7, BX
-	REP; MOVSQ
+	SHRL	$2, CX
+	ANDL	$3, BX
+	REP; MOVSL
 	MOVL	BX, CX
 	REP; MOVSB
 	RET
@@ -32,13 +35,13 @@
 	STD
 	
 	MOVL	BX, CX
-	SHRL	$3, CX
-	ANDL	$7, BX
-	SUBL	$8, DI
-	SUBL	$8, SI
-	REP; MOVSQ
-	ADDL	$7, DI
-	ADDL	$7, SI
+	SHRL	$2, CX
+	ANDL	$3, BX
+	SUBL	$4, DI
+	SUBL	$4, SI
+	REP; MOVSL
+	ADDL	$3, DI
+	ADDL	$3, SI
 	MOVL	BX, CX
 	REP; MOVSB
 	CLD
diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s
index 3ada63e..b6d0b85 100644
--- a/src/runtime/memmove_ppc64x.s
+++ b/src/runtime/memmove_ppc64x.s
@@ -7,7 +7,7 @@
 #include "textflag.h"
 
 // void runtime·memmove(void*, void*, uintptr)
-TEXT runtime·memmove(SB), NOSPLIT, $-8-24
+TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
 	MOVD	to+0(FP), R3
 	MOVD	from+8(FP), R4
 	MOVD	n+16(FP), R5
@@ -16,25 +16,73 @@
 	RET
 
 check:
-	CMP	R3, R4
-	BGT	backward
+	ANDCC	$7, R5, R7	// R7 is the number of bytes to copy and CR0[EQ] is set if there are none.
+	SRAD	$3, R5, R6	// R6 is the number of words to copy
+	CMP	R6, $0, CR1	// CR1[EQ] is set if there are no words to copy.
 
+	CMP	R3, R4, CR2
+	BC	12, 9, backward	// I think you should be able to write this as "BGT CR2, backward"
+
+	// Copying forward proceeds by copying R6 words then copying R7 bytes.
+	// R3 and R4 are advanced as we copy. Becuase PPC64 lacks post-increment
+	// load/store, R3 and R4 point before the bytes that are to be copied.
+
+	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
+
+	MOVD	R6, CTR
+
+	SUB	$8, R3
+	SUB	$8, R4
+
+forwardlargeloop:
+	MOVDU	8(R4), R8
+	MOVDU	R8, 8(R3)
+	BC	16, 0, forwardlargeloop // "BDNZ"
+
+	ADD	$8, R3
+	ADD	$8, R4
+
+noforwardlarge:
+	BNE	forwardtail	// Tests the bit set by ANDCC above
+	RET
+
+forwardtail:
 	SUB	$1, R3
-	ADD	R3, R5
 	SUB	$1, R4
-loop:
-	MOVBU	1(R4), R6
-	MOVBU	R6, 1(R3)
-	CMP	R3, R5
-	BNE	loop
+	MOVD	R7, CTR
+
+forwardtailloop:
+	MOVBZU	1(R4), R8
+	MOVBZU	R8, 1(R3)
+	BC	16, 0, forwardtailloop
 	RET
 
 backward:
-	ADD	R5, R4
-	ADD	R3, R5
-loop1:
-	MOVBU	-1(R4), R6
-	MOVBU	R6, -1(R5)
-	CMP	R3, R5
-	BNE	loop1
+	// Copying backwards proceeds by copying R7 bytes then copying R6 words.
+	// R3 and R4 are advanced to the end of the destination/source buffers
+	// respectively and moved back as we copy.
+
+	ADD	R5, R4, R4
+	ADD	R3, R5, R3
+
+	BEQ	nobackwardtail
+
+	MOVD	R7, CTR
+
+backwardtailloop:
+	MOVBZU	-1(R4), R8
+	MOVBZU	R8, -1(R3)
+	BC	16, 0, backwardtailloop
+
+nobackwardtail:
+	BC	4, 6, backwardlarge		// "BNE CR1"
+	RET
+
+backwardlarge:
+	MOVD	R6, CTR
+
+backwardlargeloop:
+	MOVDU	-8(R4), R8
+	MOVDU	R8, -8(R3)
+	BC	16, 0, backwardlargeloop	// "BDNZ"
 	RET
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index 857f99b..d5a2ad8 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -116,6 +116,41 @@
 func BenchmarkMemmove2048(b *testing.B) { bmMemmove(b, 2048) }
 func BenchmarkMemmove4096(b *testing.B) { bmMemmove(b, 4096) }
 
+func bmMemmoveUnaligned(b *testing.B, n int) {
+	x := make([]byte, n+1)
+	y := make([]byte, n)
+	b.SetBytes(int64(n))
+	for i := 0; i < b.N; i++ {
+		copy(x[1:], y)
+	}
+}
+
+func BenchmarkMemmoveUnaligned0(b *testing.B)    { bmMemmoveUnaligned(b, 0) }
+func BenchmarkMemmoveUnaligned1(b *testing.B)    { bmMemmoveUnaligned(b, 1) }
+func BenchmarkMemmoveUnaligned2(b *testing.B)    { bmMemmoveUnaligned(b, 2) }
+func BenchmarkMemmoveUnaligned3(b *testing.B)    { bmMemmoveUnaligned(b, 3) }
+func BenchmarkMemmoveUnaligned4(b *testing.B)    { bmMemmoveUnaligned(b, 4) }
+func BenchmarkMemmoveUnaligned5(b *testing.B)    { bmMemmoveUnaligned(b, 5) }
+func BenchmarkMemmoveUnaligned6(b *testing.B)    { bmMemmoveUnaligned(b, 6) }
+func BenchmarkMemmoveUnaligned7(b *testing.B)    { bmMemmoveUnaligned(b, 7) }
+func BenchmarkMemmoveUnaligned8(b *testing.B)    { bmMemmoveUnaligned(b, 8) }
+func BenchmarkMemmoveUnaligned9(b *testing.B)    { bmMemmoveUnaligned(b, 9) }
+func BenchmarkMemmoveUnaligned10(b *testing.B)   { bmMemmoveUnaligned(b, 10) }
+func BenchmarkMemmoveUnaligned11(b *testing.B)   { bmMemmoveUnaligned(b, 11) }
+func BenchmarkMemmoveUnaligned12(b *testing.B)   { bmMemmoveUnaligned(b, 12) }
+func BenchmarkMemmoveUnaligned13(b *testing.B)   { bmMemmoveUnaligned(b, 13) }
+func BenchmarkMemmoveUnaligned14(b *testing.B)   { bmMemmoveUnaligned(b, 14) }
+func BenchmarkMemmoveUnaligned15(b *testing.B)   { bmMemmoveUnaligned(b, 15) }
+func BenchmarkMemmoveUnaligned16(b *testing.B)   { bmMemmoveUnaligned(b, 16) }
+func BenchmarkMemmoveUnaligned32(b *testing.B)   { bmMemmoveUnaligned(b, 32) }
+func BenchmarkMemmoveUnaligned64(b *testing.B)   { bmMemmoveUnaligned(b, 64) }
+func BenchmarkMemmoveUnaligned128(b *testing.B)  { bmMemmoveUnaligned(b, 128) }
+func BenchmarkMemmoveUnaligned256(b *testing.B)  { bmMemmoveUnaligned(b, 256) }
+func BenchmarkMemmoveUnaligned512(b *testing.B)  { bmMemmoveUnaligned(b, 512) }
+func BenchmarkMemmoveUnaligned1024(b *testing.B) { bmMemmoveUnaligned(b, 1024) }
+func BenchmarkMemmoveUnaligned2048(b *testing.B) { bmMemmoveUnaligned(b, 2048) }
+func BenchmarkMemmoveUnaligned4096(b *testing.B) { bmMemmoveUnaligned(b, 4096) }
+
 func TestMemclr(t *testing.T) {
 	size := 512
 	if testing.Short() {
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index bb2f4e7..57a136d 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -20,7 +20,7 @@
 // smashed by freeing and reallocating.
 type fixalloc struct {
 	size   uintptr
-	first  unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
+	first  func(arg, p unsafe.Pointer) // called first time p is returned
 	arg    unsafe.Pointer
 	list   *mlink
 	chunk  *byte
@@ -40,9 +40,9 @@
 
 // Initialize f to allocate objects of the given size,
 // using the allocator to obtain chunks of memory.
-func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
+func fixAlloc_Init(f *fixalloc, size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
 	f.size = size
-	f.first = *(*unsafe.Pointer)(unsafe.Pointer(&first))
+	f.first = first
 	f.arg = arg
 	f.list = nil
 	f.chunk = nil
@@ -68,10 +68,9 @@
 		f.nchunk = _FixAllocChunk
 	}
 
-	v := (unsafe.Pointer)(f.chunk)
+	v := unsafe.Pointer(f.chunk)
 	if f.first != nil {
-		fn := *(*func(unsafe.Pointer, unsafe.Pointer))(unsafe.Pointer(&f.first))
-		fn(f.arg, v)
+		f.first(f.arg, v)
 	}
 	f.chunk = (*byte)(add(unsafe.Pointer(f.chunk), f.size))
 	f.nchunk -= uint32(f.size)
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 82b12b6..54c92a9 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -126,14 +126,14 @@
 	_DebugGC         = 0
 	_ConcurrentSweep = true
 	_FinBlockSize    = 4 * 1024
+
 	_RootData        = 0
 	_RootBss         = 1
 	_RootFinalizers  = 2
-	_RootSpans       = 3
-	_RootFlushCaches = 4
-	_RootCount       = 5
-
-	debugStackBarrier = false
+	_RootFlushCaches = 3
+	_RootSpans0      = 4
+	_RootSpansShards = 128
+	_RootCount       = _RootSpans0 + _RootSpansShards
 
 	// sweepMinHeapDistance is a lower bound on the heap distance
 	// (in bytes) reserved for concurrent sweeping between GC
@@ -141,18 +141,6 @@
 	sweepMinHeapDistance = 1024 * 1024
 )
 
-// firstStackBarrierOffset is the approximate byte offset at
-// which to place the first stack barrier from the current SP.
-// This is a lower bound on how much stack will have to be
-// re-scanned during mark termination. Subsequent barriers are
-// placed at firstStackBarrierOffset * 2^n offsets.
-//
-// For debugging, this can be set to 0, which will install a
-// stack barrier at every frame. If you do this, you may also
-// have to raise _StackMin, since the stack barrier
-// bookkeeping will use a large amount of each stack.
-var firstStackBarrierOffset = 1024
-
 // heapminimum is the minimum heap size at which to trigger GC.
 // For small heaps, this overrides the usual GOGC*live set rule.
 //
@@ -208,6 +196,7 @@
 	memstats.enablegc = true // now that runtime is initialized, GC is okay
 }
 
+//go:linkname setGCPercent runtime/debug.setGCPercent
 func setGCPercent(in int32) (out int32) {
 	lock(&mheap_.lock)
 	out = gcpercent
@@ -307,9 +296,9 @@
 
 type gcControllerState struct {
 	// scanWork is the total scan work performed this cycle. This
-	// is updated atomically during the cycle. Updates may be
-	// batched arbitrarily, since the value is only read at the
-	// end of the cycle.
+	// is updated atomically during the cycle. Updates occur in
+	// bounded batches, since it is both written and read
+	// throughout the cycle.
 	//
 	// Currently this is the bytes of heap scanned. For most uses,
 	// this is an opaque unit of work, but for estimation the
@@ -363,11 +352,14 @@
 	// dedicated mark workers get started.
 	dedicatedMarkWorkersNeeded int64
 
-	// assistRatio is the ratio of allocated bytes to scan work
-	// that should be performed by mutator assists. This is
+	// assistWorkPerByte is the ratio of scan work to allocated
+	// bytes that should be performed by mutator assists. This is
 	// computed at the beginning of each cycle and updated every
 	// time heap_scan is updated.
-	assistRatio float64
+	assistWorkPerByte float64
+
+	// assistBytesPerWork is 1/assistWorkPerByte.
+	assistBytesPerWork float64
 
 	// fractionalUtilizationGoal is the fraction of wall clock
 	// time that should be spent in the fractional mark worker.
@@ -420,6 +412,17 @@
 	// Compute the heap goal for this cycle
 	c.heapGoal = memstats.heap_reachable + memstats.heap_reachable*uint64(gcpercent)/100
 
+	// Ensure that the heap goal is at least a little larger than
+	// the current live heap size. This may not be the case if GC
+	// start is delayed or if the allocation that pushed heap_live
+	// over next_gc is large or if the trigger is really close to
+	// GOGC. Assist is proportional to this distance, so enforce a
+	// minimum distance, even if it means going over the GOGC goal
+	// by a tiny bit.
+	if c.heapGoal < memstats.heap_live+1024*1024 {
+		c.heapGoal = memstats.heap_live + 1024*1024
+	}
+
 	// Compute the total mark utilization goal and divide it among
 	// dedicated and fractional workers.
 	totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization
@@ -444,7 +447,7 @@
 	c.revise()
 
 	if debug.gcpacertrace > 0 {
-		print("pacer: assist ratio=", c.assistRatio,
+		print("pacer: assist ratio=", c.assistWorkPerByte,
 			" (scan ", memstats.heap_scan>>20, " MB in ",
 			work.initialHeapLive>>20, "->",
 			c.heapGoal>>20, " MB)",
@@ -455,33 +458,55 @@
 
 // revise updates the assist ratio during the GC cycle to account for
 // improved estimates. This should be called either under STW or
-// whenever memstats.heap_scan is updated (with mheap_.lock held).
+// whenever memstats.heap_scan or memstats.heap_live is updated (with
+// mheap_.lock held).
+//
+// It should only be called when gcBlackenEnabled != 0 (because this
+// is when assists are enabled and the necessary statistics are
+// available).
 func (c *gcControllerState) revise() {
-	// Compute the expected scan work. This is a strict upper
-	// bound on the possible scan work in the current heap.
+	// Compute the expected scan work remaining.
 	//
+	// Note that the scannable heap size is likely to increase
+	// during the GC cycle. This is why it's important to revise
+	// the assist ratio throughout the cycle: if the scannable
+	// heap size increases, the assist ratio based on the initial
+	// scannable heap size may target too little scan work.
+	//
+	// This particular estimate is a strict upper bound on the
+	// possible remaining scan work for the current heap.
 	// You might consider dividing this by 2 (or by
 	// (100+GOGC)/100) to counter this over-estimation, but
 	// benchmarks show that this has almost no effect on mean
 	// mutator utilization, heap size, or assist time and it
 	// introduces the danger of under-estimating and letting the
 	// mutator outpace the garbage collector.
-	scanWorkExpected := memstats.heap_scan
+	scanWorkExpected := int64(memstats.heap_scan) - c.scanWork
+	if scanWorkExpected < 1000 {
+		// We set a somewhat arbitrary lower bound on
+		// remaining scan work since if we aim a little high,
+		// we can miss by a little.
+		//
+		// We *do* need to enforce that this is at least 1,
+		// since marking is racy and double-scanning objects
+		// may legitimately make the expected scan work
+		// negative.
+		scanWorkExpected = 1000
+	}
+
+	// Compute the heap distance remaining.
+	heapDistance := int64(c.heapGoal) - int64(memstats.heap_live)
+	if heapDistance <= 0 {
+		// This shouldn't happen, but if it does, avoid
+		// dividing by zero or setting the assist negative.
+		heapDistance = 1
+	}
 
 	// Compute the mutator assist ratio so by the time the mutator
 	// allocates the remaining heap bytes up to next_gc, it will
-	// have done (or stolen) the estimated amount of scan work.
-	heapDistance := int64(c.heapGoal) - int64(work.initialHeapLive)
-	if heapDistance <= 1024*1024 {
-		// heapDistance can be negative if GC start is delayed
-		// or if the allocation that pushed heap_live over
-		// next_gc is large or if the trigger is really close
-		// to GOGC. We don't want to set the assist negative
-		// (or divide by zero, or set it really high), so
-		// enforce a minimum on the distance.
-		heapDistance = 1024 * 1024
-	}
-	c.assistRatio = float64(scanWorkExpected) / float64(heapDistance)
+	// have done (or stolen) the remaining amount of scan work.
+	c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance)
+	c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected)
 }
 
 // endCycle updates the GC controller state at the end of the
@@ -597,7 +622,7 @@
 		// else for a while, so kick everything out of its run
 		// queue.
 	} else {
-		if _p_.gcw.wbuf == 0 && work.full == 0 && work.partial == 0 {
+		if _p_.gcw.wbuf == 0 && work.full == 0 {
 			// No work to be done right now. This can
 			// happen at the end of the mark phase when
 			// there are still assists tapering off. Don't
@@ -687,17 +712,24 @@
 // marking as a fraction of GOMAXPROCS.
 const gcGoalUtilization = 0.25
 
-// gcBgCreditSlack is the amount of scan work credit background
-// scanning can accumulate locally before updating
-// gcController.bgScanCredit. Lower values give mutator assists more
-// accurate accounting of background scanning. Higher values reduce
-// memory contention.
-const gcBgCreditSlack = 2000
+// gcCreditSlack is the amount of scan work credit that can can
+// accumulate locally before updating gcController.scanWork and,
+// optionally, gcController.bgScanCredit. Lower values give a more
+// accurate assist ratio and make it more likely that assists will
+// successfully steal background credit. Higher values reduce memory
+// contention.
+const gcCreditSlack = 2000
 
 // gcAssistTimeSlack is the nanoseconds of mutator assist time that
 // can accumulate on a P before updating gcController.assistTime.
 const gcAssistTimeSlack = 5000
 
+// gcOverAssistBytes determines how many extra allocation bytes of
+// assist credit a GC assist builds up when an assist happens. This
+// amortizes the cost of an assist by pre-paying for this many bytes
+// of future allocations.
+const gcOverAssistBytes = 1 << 20
+
 // Determine whether to initiate a GC.
 // If the GC is already working no need to trigger another one.
 // This should establish a feedback loop where if the GC does not
@@ -763,10 +795,8 @@
 }
 
 var work struct {
-	full  uint64 // lock-free list of full blocks workbuf
-	empty uint64 // lock-free list of empty blocks workbuf
-	// TODO(rlh): partial no longer used, remove. (issue #11922)
-	partial uint64                // lock-free list of partially filled blocks workbuf
+	full    uint64                // lock-free list of full blocks workbuf
+	empty   uint64                // lock-free list of empty blocks workbuf
 	pad0    [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
 	nproc   uint32
 	tstart  int64
@@ -775,6 +805,12 @@
 	alldone note
 	markfor *parfor
 
+	// finalizersDone indicates that finalizers and objects with
+	// finalizers have been scanned by markroot. During concurrent
+	// GC, this happens during the concurrent scan phase. During
+	// STW GC, this happens during mark termination.
+	finalizersDone bool
+
 	bgMarkReady note   // signal background mark worker has started
 	bgMarkDone  uint32 // cas to 1 when at a background mark completion point
 	// Background mark completion signaling
@@ -815,10 +851,13 @@
 	startGC(gcForceBlockMode, false)
 }
 
+// gcMode indicates how concurrent a GC cycle should be.
+type gcMode int
+
 const (
-	gcBackgroundMode = iota // concurrent GC
-	gcForceMode             // stop-the-world GC now
-	gcForceBlockMode        // stop-the-world GC now and wait for sweep
+	gcBackgroundMode gcMode = iota // concurrent GC and sweep
+	gcForceMode                    // stop-the-world GC now, concurrent sweep
+	gcForceBlockMode               // stop-the-world GC now and STW sweep
 )
 
 // startGC starts a GC cycle. If mode is gcBackgroundMode, this will
@@ -826,7 +865,7 @@
 // until the new GC cycle is started and finishes. If forceTrigger is
 // true, it indicates that GC should be started regardless of the
 // current heap size.
-func startGC(mode int, forceTrigger bool) {
+func startGC(mode gcMode, forceTrigger bool) {
 	// The gc is turned off (via enablegc) until the bootstrap has completed.
 	// Also, malloc gets called in the guts of a number of libraries that might be
 	// holding locks. To avoid deadlocks during stop-the-world, don't bother
@@ -901,7 +940,7 @@
 	}
 }
 
-func gc(mode int) {
+func gc(mode gcMode) {
 	// Timing/utilization tracking
 	var stwprocs, maxprocs int32
 	var tSweepTerm, tScan, tInstallWB, tMark, tMarkTerm int64
@@ -940,13 +979,18 @@
 
 	pauseStart = now
 	systemstack(stopTheWorldWithSema)
-	systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
+	// Finish sweep before we start concurrent scan.
+	systemstack(func() {
+		finishsweep_m(true)
+	})
 	// clearpools before we start the GC. If we wait they memory will not be
 	// reclaimed until the next GC cycle.
 	clearpools()
 
 	gcResetMarkState()
 
+	work.finalizersDone = false
+
 	if mode == gcBackgroundMode { // Do as much work concurrently as possible
 		gcController.startCycle()
 		heapGoal = gcController.heapGoal
@@ -979,6 +1023,10 @@
 			// boundaries where there are up-pointers.
 			setGCPhase(_GCscan)
 
+			// markrootSpans uses work.spans, so make sure
+			// it is up to date.
+			gcCopySpans()
+
 			gcBgMarkPrepare() // Must happen before assist enable.
 
 			// At this point all Ps have enabled the write
@@ -1046,6 +1094,10 @@
 		// below. The important thing is that the wb remains active until
 		// all marking is complete. This includes writes made by the GC.
 
+		// markroot is done now, so record that objects with
+		// finalizers have been scanned.
+		work.finalizersDone = true
+
 		// Flush the gcWork caches. This must be done before
 		// endCycle since endCycle depends on statistics kept
 		// in these caches.
@@ -1053,11 +1105,6 @@
 
 		gcController.endCycle()
 	} else {
-		// For non-concurrent GC (mode != gcBackgroundMode)
-		// The g stacks have not been scanned so clear g state
-		// such that mark termination scans all stacks.
-		gcResetGState()
-
 		t := nanotime()
 		tScan, tInstallWB, tMark, tMarkTerm = t, t, t, t
 		heapGoal = heap0
@@ -1102,7 +1149,6 @@
 			// Run a full stop-the-world mark using checkmark bits,
 			// to check that we didn't forget to mark anything during
 			// the concurrent mark process.
-			gcResetGState() // Rescan stacks
 			gcResetMarkState()
 			initCheckmarks()
 			gcMark(startTime)
@@ -1118,9 +1164,8 @@
 			// The g stacks have been scanned so
 			// they have gcscanvalid==true and gcworkdone==true.
 			// Reset these so that all stacks will be rescanned.
-			gcResetGState()
 			gcResetMarkState()
-			finishsweep_m()
+			finishsweep_m(true)
 
 			// Still in STW but gcphase is _GCoff, reset to _GCmarktermination
 			// At this point all objects will be found during the gcMark which
@@ -1317,7 +1362,7 @@
 		default:
 			throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
 		case gcMarkWorkerDedicatedMode:
-			gcDrain(&p.gcw, gcBgCreditSlack)
+			gcDrain(&p.gcw, gcDrainBlock|gcDrainFlushBgCredit)
 			// gcDrain did the xadd(&work.nwait +1) to
 			// match the decrement above. It only returns
 			// at a mark completion point.
@@ -1326,7 +1371,7 @@
 				throw("gcDrain returned with buffer")
 			}
 		case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
-			gcDrainUntilPreempt(&p.gcw, gcBgCreditSlack)
+			gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
 
 			// If we are nearing the end of mark, dispose
 			// of the cache promptly. We must do this
@@ -1346,7 +1391,7 @@
 					"work.nwait=", incnwait, "work.nproc=", work.nproc)
 				throw("work.nwait > work.nproc")
 			}
-			done = incnwait == work.nproc && work.full == 0 && work.partial == 0
+			done = incnwait == work.nproc && work.full == 0
 		}
 
 		// If this worker reached a background mark completion
@@ -1382,7 +1427,7 @@
 	if !p.gcw.empty() {
 		return true
 	}
-	if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 {
+	if atomicload64(&work.full) != 0 {
 		return true // global work available
 	}
 	return false
@@ -1437,20 +1482,21 @@
 	parfordo(work.markfor)
 
 	var gcw gcWork
-	gcDrain(&gcw, -1)
+	gcDrain(&gcw, gcDrainBlock)
 	gcw.dispose()
 
 	if work.full != 0 {
 		throw("work.full != 0")
 	}
-	if work.partial != 0 {
-		throw("work.partial != 0")
-	}
 
 	if work.nproc > 1 {
 		notesleep(&work.alldone)
 	}
 
+	// markroot is done now, so record that objects with
+	// finalizers have been scanned.
+	work.finalizersDone = true
+
 	for i := 0; i < int(gomaxprocs); i++ {
 		if allp[i].gcw.wbuf != 0 {
 			throw("P has cached GC work at end of mark termination")
@@ -1525,7 +1571,7 @@
 	}
 }
 
-func gcSweep(mode int) {
+func gcSweep(mode gcMode) {
 	if gcphase != _GCoff {
 		throw("gcSweep being done but phase is not GCoff")
 	}
@@ -1554,14 +1600,9 @@
 		return
 	}
 
-	// Account how much sweeping needs to be done before the next
-	// GC cycle and set up proportional sweep statistics.
-	var pagesToSweep uintptr
-	for _, s := range work.spans {
-		if s.state == mSpanInUse {
-			pagesToSweep += s.npages
-		}
-	}
+	// Concurrent sweep needs to sweep all of the in-use pages by
+	// the time the allocated heap reaches the GC trigger. Compute
+	// the ratio of in-use pages to sweep per byte allocated.
 	heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live)
 	// Add a little margin so rounding errors and concurrent
 	// sweep are less likely to leave pages unswept when GC starts.
@@ -1571,7 +1612,7 @@
 		heapDistance = _PageSize
 	}
 	lock(&mheap_.lock)
-	mheap_.sweepPagesPerByte = float64(pagesToSweep) / float64(heapDistance)
+	mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance)
 	mheap_.pagesSwept = 0
 	mheap_.spanBytesAlloc = 0
 	unlock(&mheap_.lock)
@@ -1605,27 +1646,20 @@
 	unlock(&mheap_.lock)
 }
 
-// gcResetGState resets the GC state of all G's and returns the length
-// of allgs.
-func gcResetGState() (numgs int) {
+// gcResetMarkState resets global state prior to marking (concurrent
+// or STW) and resets the stack scan state of all Gs. Any Gs created
+// after this will also be in the reset state.
+func gcResetMarkState() {
 	// This may be called during a concurrent phase, so make sure
 	// allgs doesn't change.
 	lock(&allglock)
 	for _, gp := range allgs {
 		gp.gcscandone = false  // set to true in gcphasework
 		gp.gcscanvalid = false // stack has not been scanned
-		gp.gcalloc = 0
-		gp.gcscanwork = 0
+		gp.gcAssistBytes = 0
 	}
-	numgs = len(allgs)
 	unlock(&allglock)
-	return
-}
 
-// gcResetMarkState resets state prior to marking (concurrent or STW).
-//
-// TODO(austin): Merge with gcResetGState. See issue #11427.
-func gcResetMarkState() {
 	work.bytesMarked = 0
 	work.initialHeapLive = memstats.heap_live
 }
@@ -1701,7 +1735,7 @@
 	parfordo(work.markfor)
 	if gcphase != _GCscan {
 		var gcw gcWork
-		gcDrain(&gcw, -1) // blocks in getfull
+		gcDrain(&gcw, gcDrainBlock) // blocks in getfull
 		gcw.dispose()
 	}
 
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 44f9512..35bdda9 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -26,8 +26,12 @@
 	// runtime·restartg(mastergp) to make it Grunnable.
 	// At the bottom we will want to return this p back to the scheduler.
 
-	// Prepare flag indicating that the scan has not been completed.
-	local_allglen := gcResetGState()
+	// Snapshot of allglen. During concurrent scan, we just need
+	// to be consistent about how many markroot jobs we create and
+	// how many Gs we check. Gs may be created after this and
+	// they'll be scanned during mark termination. During mark
+	// termination, allglen isn't changing.
+	local_allglen := int(atomicloaduintptr(&allglen))
 
 	work.ndone = 0
 	useOneP := uint32(1) // For now do not do this in parallel.
@@ -74,41 +78,18 @@
 			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
 		}
 
-	case _RootSpans:
-		// mark MSpan.specials
-		sg := mheap_.sweepgen
-		for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
-			s := work.spans[spanidx]
-			if s.state != mSpanInUse {
-				continue
-			}
-			if !useCheckmark && s.sweepgen != sg {
-				// sweepgen was updated (+2) during non-checkmark GC pass
-				print("sweep ", s.sweepgen, " ", sg, "\n")
-				throw("gc: unswept span")
-			}
-			for sp := s.specials; sp != nil; sp = sp.next {
-				if sp.kind != _KindSpecialFinalizer {
-					continue
-				}
-				// don't mark finalized object, but scan it so we
-				// retain everything it points to.
-				spf := (*specialfinalizer)(unsafe.Pointer(sp))
-				// A finalizer can be set for an inner byte of an object, find object beginning.
-				p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
-				if gcphase != _GCscan {
-					scanobject(p, &gcw) // scanned during mark termination
-				}
-				scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptrmask[0], &gcw)
-			}
-		}
-
 	case _RootFlushCaches:
 		if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
 			flushallmcaches()
 		}
 
 	default:
+		if _RootSpans0 <= i && i < _RootSpans0+_RootSpansShards {
+			// mark MSpan.specials
+			markrootSpans(&gcw, int(i)-_RootSpans0)
+			break
+		}
+
 		// the rest is scanning goroutine stacks
 		if uintptr(i-_RootCount) >= allglen {
 			throw("markroot: bad index")
@@ -136,28 +117,97 @@
 	gcw.dispose()
 }
 
-// gcAssistAlloc records and allocation of size bytes and, if
-// allowAssist is true, may assist GC scanning in proportion to the
-// allocations performed by this mutator since the last assist.
+// markrootSpans marks roots for one shard (out of _RootSpansShards)
+// of work.spans.
 //
-// It should only be called if gcAssistAlloc != 0.
-//
-// This must be called with preemption disabled.
 //go:nowritebarrier
-func gcAssistAlloc(size uintptr, allowAssist bool) {
-	// Find the G responsible for this assist.
-	gp := getg()
-	if gp.m.curg != nil {
-		gp = gp.m.curg
-	}
+func markrootSpans(gcw *gcWork, shard int) {
+	// Objects with finalizers have two GC-related invariants:
+	//
+	// 1) Everything reachable from the object must be marked.
+	// This ensures that when we pass the object to its finalizer,
+	// everything the finalizer can reach will be retained.
+	//
+	// 2) Finalizer specials (which are not in the garbage
+	// collected heap) are roots. In practice, this means the fn
+	// field must be scanned.
+	//
+	// TODO(austin): There are several ideas for making this more
+	// efficient in issue #11485.
 
-	// Record allocation.
-	gp.gcalloc += size
-
-	if !allowAssist {
+	// We process objects with finalizers only during the first
+	// markroot pass. In concurrent GC, this happens during
+	// concurrent scan and we depend on addfinalizer to ensure the
+	// above invariants for objects that get finalizers after
+	// concurrent scan. In STW GC, this will happen during mark
+	// termination.
+	if work.finalizersDone {
 		return
 	}
 
+	sg := mheap_.sweepgen
+	startSpan := shard * len(work.spans) / _RootSpansShards
+	endSpan := (shard + 1) * len(work.spans) / _RootSpansShards
+	// Note that work.spans may not include spans that were
+	// allocated between entering the scan phase and now. This is
+	// okay because any objects with finalizers in those spans
+	// must have been allocated and given finalizers after we
+	// entered the scan phase, so addfinalizer will have ensured
+	// the above invariants for them.
+	for _, s := range work.spans[startSpan:endSpan] {
+		if s.state != mSpanInUse {
+			continue
+		}
+		if !useCheckmark && s.sweepgen != sg {
+			// sweepgen was updated (+2) during non-checkmark GC pass
+			print("sweep ", s.sweepgen, " ", sg, "\n")
+			throw("gc: unswept span")
+		}
+
+		// Speculatively check if there are any specials
+		// without acquiring the span lock. This may race with
+		// adding the first special to a span, but in that
+		// case addfinalizer will observe that the GC is
+		// active (which is globally synchronized) and ensure
+		// the above invariants. We may also ensure the
+		// invariants, but it's okay to scan an object twice.
+		if s.specials == nil {
+			continue
+		}
+
+		// Lock the specials to prevent a special from being
+		// removed from the list while we're traversing it.
+		lock(&s.speciallock)
+
+		for sp := s.specials; sp != nil; sp = sp.next {
+			if sp.kind != _KindSpecialFinalizer {
+				continue
+			}
+			// don't mark finalized object, but scan it so we
+			// retain everything it points to.
+			spf := (*specialfinalizer)(unsafe.Pointer(sp))
+			// A finalizer can be set for an inner byte of an object, find object beginning.
+			p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+
+			// Mark everything that can be reached from
+			// the object (but *not* the object itself or
+			// we'll never collect it).
+			scanobject(p, gcw)
+
+			// The special itself is a root.
+			scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptrmask[0], gcw)
+		}
+
+		unlock(&s.speciallock)
+	}
+}
+
+// gcAssistAlloc performs GC work to make gp's assist debt positive.
+// gp must be the calling user gorountine.
+//
+// This must be called with preemption enabled.
+//go:nowritebarrier
+func gcAssistAlloc(gp *g) {
 	// Don't assist in non-preemptible contexts. These are
 	// generally fragile and won't allow the assist to block.
 	if getg() == gp.m.g0 {
@@ -167,13 +217,11 @@
 		return
 	}
 
-	// Compute the amount of assist scan work we need to do.
-	scanWork := int64(gcController.assistRatio*float64(gp.gcalloc)) - gp.gcscanwork
-	// scanWork can be negative if the last assist scanned a large
-	// object and we're still ahead of our assist goal.
-	if scanWork <= 0 {
-		return
-	}
+	// Compute the amount of scan work we need to do to make the
+	// balance positive. We over-assist to build up credit for
+	// future allocations and amortize the cost of assisting.
+	debtBytes := -gp.gcAssistBytes + gcOverAssistBytes
+	scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
 
 retry:
 	// Steal as much credit as we can from the background GC's
@@ -187,15 +235,18 @@
 	if bgScanCredit > 0 {
 		if bgScanCredit < scanWork {
 			stolen = bgScanCredit
+			gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen))
 		} else {
 			stolen = scanWork
+			gp.gcAssistBytes += debtBytes
 		}
 		xaddint64(&gcController.bgScanCredit, -stolen)
 
 		scanWork -= stolen
-		gp.gcscanwork += stolen
 
 		if scanWork == 0 {
+			// We were able to steal all of the credit we
+			// needed.
 			return
 		}
 	}
@@ -228,17 +279,21 @@
 		// drain own cached work first in the hopes that it
 		// will be more cache friendly.
 		gcw := &getg().m.p.ptr().gcw
-		startScanWork := gcw.scanWork
-		gcDrainN(gcw, scanWork)
-		// Record that we did this much scan work.
-		workDone := gcw.scanWork - startScanWork
-		gp.gcscanwork += workDone
-		scanWork -= workDone
+		workDone := gcDrainN(gcw, scanWork)
 		// If we are near the end of the mark phase
 		// dispose of the gcw.
 		if gcBlackenPromptly {
 			gcw.dispose()
 		}
+
+		// Record that we did this much scan work.
+		scanWork -= workDone
+		// Back out the number of bytes of assist credit that
+		// this scan work counts for. The "1+" is a poor man's
+		// round-up, to ensure this adds credit even if
+		// assistBytesPerWork is very low.
+		gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone))
+
 		// If this is the last worker and we ran out of work,
 		// signal a completion point.
 		incnwait := xadd(&work.nwait, +1)
@@ -249,7 +304,7 @@
 			throw("work.nwait > work.nproc")
 		}
 
-		if incnwait == work.nproc && work.full == 0 && work.partial == 0 {
+		if incnwait == work.nproc && work.full == 0 {
 			// This has reached a background completion
 			// point.
 			if gcBlackenPromptly {
@@ -292,7 +347,12 @@
 		// more, so go around again after performing an
 		// interruptible sleep for 100 us (the same as the
 		// getfull barrier) to let other mutators run.
+
+		// timeSleep may allocate, so avoid recursive assist.
+		gcAssistBytes := gp.gcAssistBytes
+		gp.gcAssistBytes = int64(^uint64(0) >> 1)
 		timeSleep(100 * 1000)
+		gp.gcAssistBytes = gcAssistBytes
 		goto retry
 	}
 }
@@ -440,12 +500,10 @@
 	size := frame.varp - frame.sp
 	var minsize uintptr
 	switch thechar {
-	case '6', '8':
-		minsize = 0
 	case '7':
 		minsize = spAlign
 	default:
-		minsize = ptrSize
+		minsize = minFrameSize
 	}
 	if size > minsize {
 		stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
@@ -487,197 +545,54 @@
 	}
 }
 
-// gcMaxStackBarriers returns the maximum number of stack barriers
-// that can be installed in a stack of stackSize bytes.
-func gcMaxStackBarriers(stackSize int) (n int) {
-	if firstStackBarrierOffset == 0 {
-		// Special debugging case for inserting stack barriers
-		// at every frame. Steal half of the stack for the
-		// []stkbar. Technically, if the stack were to consist
-		// solely of return PCs we would need two thirds of
-		// the stack, but stealing that much breaks things and
-		// this doesn't happen in practice.
-		return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
-	}
+type gcDrainFlags int
 
-	offset := firstStackBarrierOffset
-	for offset < stackSize {
-		n++
-		offset *= 2
-	}
-	return n + 1
-}
+const (
+	gcDrainUntilPreempt gcDrainFlags = 1 << iota
+	gcDrainFlushBgCredit
 
-// gcInstallStackBarrier installs a stack barrier over the return PC of frame.
-//go:nowritebarrier
-func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
-	if frame.lr == 0 {
-		if debugStackBarrier {
-			print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
-		}
-		return false
-	}
+	// gcDrainBlock is the opposite of gcDrainUntilPreempt. This
+	// is the default, but callers should use the constant for
+	// documentation purposes.
+	gcDrainBlock gcDrainFlags = 0
+)
 
-	if frame.fn.entry == cgocallback_gofuncPC {
-		// cgocallback_gofunc doesn't return to its LR;
-		// instead, its return path puts LR in g.sched.pc and
-		// switches back to the system stack on which
-		// cgocallback_gofunc was originally called. We can't
-		// have a stack barrier in g.sched.pc, so don't
-		// install one in this frame.
-		if debugStackBarrier {
-			print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
-		}
-		return false
-	}
-
-	// Save the return PC and overwrite it with stackBarrier.
-	var lrUintptr uintptr
-	if usesLR {
-		lrUintptr = frame.sp
-	} else {
-		lrUintptr = frame.fp - regSize
-	}
-	lrPtr := (*uintreg)(unsafe.Pointer(lrUintptr))
-	if debugStackBarrier {
-		print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
-		if uintptr(*lrPtr) != frame.lr {
-			print("frame.lr=", hex(frame.lr))
-			throw("frame.lr differs from stack LR")
-		}
-	}
-
-	gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
-	stkbar := &gp.stkbar[len(gp.stkbar)-1]
-	stkbar.savedLRPtr = lrUintptr
-	stkbar.savedLRVal = uintptr(*lrPtr)
-	*lrPtr = uintreg(stackBarrierPC)
-	return true
-}
-
-// gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
-//go:nowritebarrier
-func gcRemoveStackBarriers(gp *g) {
-	if debugStackBarrier && gp.stkbarPos != 0 {
-		print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
-	}
-
-	// Remove stack barriers that we didn't hit.
-	for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
-		gcRemoveStackBarrier(gp, stkbar)
-	}
-
-	// Clear recorded stack barriers so copystack doesn't try to
-	// adjust them.
-	gp.stkbarPos = 0
-	gp.stkbar = gp.stkbar[:0]
-}
-
-// gcRemoveStackBarrier removes a single stack barrier. It is the
-// inverse operation of gcInstallStackBarrier.
+// gcDrain scans objects in work buffers, blackening grey objects
+// until all work buffers have been drained.
 //
-// This is nosplit to ensure gp's stack does not move.
+// If flags&gcDrainUntilPreempt != 0, gcDrain also returns if
+// g.preempt is set. Otherwise, this will block until all dedicated
+// workers are blocked in gcDrain.
 //
+// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
+// credit to gcController.bgScanCredit every gcCreditSlack units of
+// scan work.
 //go:nowritebarrier
-//go:nosplit
-func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
-	if debugStackBarrier {
-		print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
-	}
-	lrPtr := (*uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
-	if val := *lrPtr; val != uintreg(stackBarrierPC) {
-		printlock()
-		print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
-		print("gp.stkbar=")
-		gcPrintStkbars(gp.stkbar)
-		print(", gp.stkbarPos=", gp.stkbarPos, ", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
-		throw("stack barrier lost")
-	}
-	*lrPtr = uintreg(stkbar.savedLRVal)
-}
-
-// gcPrintStkbars prints a []stkbar for debugging.
-func gcPrintStkbars(stkbar []stkbar) {
-	print("[")
-	for i, s := range stkbar {
-		if i > 0 {
-			print(" ")
-		}
-		print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
-	}
-	print("]")
-}
-
-// gcUnwindBarriers marks all stack barriers up the frame containing
-// sp as hit and removes them. This is used during stack unwinding for
-// panic/recover and by heapBitsBulkBarrier to force stack re-scanning
-// when its destination is on the stack.
-//
-// This is nosplit to ensure gp's stack does not move.
-//
-//go:nosplit
-func gcUnwindBarriers(gp *g, sp uintptr) {
-	// On LR machines, if there is a stack barrier on the return
-	// from the frame containing sp, this will mark it as hit even
-	// though it isn't, but it's okay to be conservative.
-	before := gp.stkbarPos
-	for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
-		gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
-		gp.stkbarPos++
-	}
-	if debugStackBarrier && gp.stkbarPos != before {
-		print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
-		gcPrintStkbars(gp.stkbar[before:gp.stkbarPos])
-		print("\n")
-	}
-}
-
-// nextBarrierPC returns the original return PC of the next stack barrier.
-// Used by getcallerpc, so it must be nosplit.
-//go:nosplit
-func nextBarrierPC() uintptr {
-	gp := getg()
-	return gp.stkbar[gp.stkbarPos].savedLRVal
-}
-
-// setNextBarrierPC sets the return PC of the next stack barrier.
-// Used by setcallerpc, so it must be nosplit.
-//go:nosplit
-func setNextBarrierPC(pc uintptr) {
-	gp := getg()
-	gp.stkbar[gp.stkbarPos].savedLRVal = pc
-}
-
-// TODO(austin): Can we consolidate the gcDrain* functions?
-
-// gcDrain scans objects in work buffers, blackening grey
-// objects until all work buffers have been drained.
-// If flushScanCredit != -1, gcDrain flushes accumulated scan work
-// credit to gcController.bgScanCredit whenever gcw's local scan work
-// credit exceeds flushScanCredit.
-//go:nowritebarrier
-func gcDrain(gcw *gcWork, flushScanCredit int64) {
+func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 	if !writeBarrierEnabled {
 		throw("gcDrain phase incorrect")
 	}
 
-	var lastScanFlush, nextScanFlush int64
-	if flushScanCredit != -1 {
-		lastScanFlush = gcw.scanWork
-		nextScanFlush = lastScanFlush + flushScanCredit
-	} else {
-		nextScanFlush = int64(^uint64(0) >> 1)
-	}
+	blocking := flags&gcDrainUntilPreempt == 0
+	flushBgCredit := flags&gcDrainFlushBgCredit != 0
 
-	for {
+	initScanWork := gcw.scanWork
+
+	gp := getg()
+	for blocking || !gp.preempt {
 		// If another proc wants a pointer, give it some.
 		if work.nwait > 0 && work.full == 0 {
 			gcw.balance()
 		}
 
-		b := gcw.get()
+		var b uintptr
+		if blocking {
+			b = gcw.get()
+		} else {
+			b = gcw.tryGet()
+		}
 		if b == 0 {
-			// work barrier reached
+			// work barrier reached or tryGet failed.
 			break
 		}
 		// If the current wbuf is filled by the scan a new wbuf might be
@@ -691,68 +606,23 @@
 		// Flush background scan work credit to the global
 		// account if we've accumulated enough locally so
 		// mutator assists can draw on it.
-		if gcw.scanWork >= nextScanFlush {
-			credit := gcw.scanWork - lastScanFlush
-			xaddint64(&gcController.bgScanCredit, credit)
-			lastScanFlush = gcw.scanWork
-			nextScanFlush = lastScanFlush + flushScanCredit
+		if gcw.scanWork >= gcCreditSlack {
+			xaddint64(&gcController.scanWork, gcw.scanWork)
+			if flushBgCredit {
+				xaddint64(&gcController.bgScanCredit, gcw.scanWork-initScanWork)
+				initScanWork = 0
+			}
+			gcw.scanWork = 0
 		}
 	}
-	if flushScanCredit != -1 {
-		credit := gcw.scanWork - lastScanFlush
-		xaddint64(&gcController.bgScanCredit, credit)
-	}
-}
 
-// gcDrainUntilPreempt blackens grey objects until g.preempt is set.
-// This is best-effort, so it will return as soon as it is unable to
-// get work, even though there may be more work in the system.
-//go:nowritebarrier
-func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) {
-	if !writeBarrierEnabled {
-		println("gcphase =", gcphase)
-		throw("gcDrainUntilPreempt phase incorrect")
-	}
-
-	var lastScanFlush, nextScanFlush int64
-	if flushScanCredit != -1 {
-		lastScanFlush = gcw.scanWork
-		nextScanFlush = lastScanFlush + flushScanCredit
-	} else {
-		nextScanFlush = int64(^uint64(0) >> 1)
-	}
-
-	gp := getg()
-	for !gp.preempt {
-		// If the work queue is empty, balance. During
-		// concurrent mark we don't really know if anyone else
-		// can make use of this work, but even if we're the
-		// only worker, the total cost of this per cycle is
-		// only O(_WorkbufSize) pointer copies.
-		if work.full == 0 && work.partial == 0 {
-			gcw.balance()
+	// Flush remaining scan work credit.
+	if gcw.scanWork > 0 {
+		xaddint64(&gcController.scanWork, gcw.scanWork)
+		if flushBgCredit {
+			xaddint64(&gcController.bgScanCredit, gcw.scanWork-initScanWork)
 		}
-
-		b := gcw.tryGet()
-		if b == 0 {
-			// No more work
-			break
-		}
-		scanobject(b, gcw)
-
-		// Flush background scan work credit to the global
-		// account if we've accumulated enough locally so
-		// mutator assists can draw on it.
-		if gcw.scanWork >= nextScanFlush {
-			credit := gcw.scanWork - lastScanFlush
-			xaddint64(&gcController.bgScanCredit, credit)
-			lastScanFlush = gcw.scanWork
-			nextScanFlush = lastScanFlush + flushScanCredit
-		}
-	}
-	if flushScanCredit != -1 {
-		credit := gcw.scanWork - lastScanFlush
-		xaddint64(&gcController.bgScanCredit, credit)
+		gcw.scanWork = 0
 	}
 }
 
@@ -760,24 +630,42 @@
 // scanWork units of scan work. This is best-effort, so it may perform
 // less work if it fails to get a work buffer. Otherwise, it will
 // perform at least n units of work, but may perform more because
-// scanning is always done in whole object increments.
+// scanning is always done in whole object increments. It returns the
+// amount of scan work performed.
 //go:nowritebarrier
-func gcDrainN(gcw *gcWork, scanWork int64) {
+func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 	if !writeBarrierEnabled {
 		throw("gcDrainN phase incorrect")
 	}
-	targetScanWork := gcw.scanWork + scanWork
-	for gcw.scanWork < targetScanWork {
+
+	// There may already be scan work on the gcw, which we don't
+	// want to claim was done by this call.
+	workFlushed := -gcw.scanWork
+
+	for workFlushed+gcw.scanWork < scanWork {
 		// This might be a good place to add prefetch code...
 		// if(wbuf.nobj > 4) {
 		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
 		//  }
 		b := gcw.tryGet()
 		if b == 0 {
-			return
+			break
 		}
 		scanobject(b, gcw)
+
+		// Flush background scan work credit.
+		if gcw.scanWork >= gcCreditSlack {
+			xaddint64(&gcController.scanWork, gcw.scanWork)
+			workFlushed += gcw.scanWork
+			gcw.scanWork = 0
+		}
 	}
+
+	// Unlike gcDrain, there's no need to flush remaining work
+	// here because this never flushes to bgScanCredit and
+	// gcw.dispose will flush any remaining work to scanWork.
+
+	return workFlushed + gcw.scanWork
 }
 
 // scanblock scans b as scanobject would, but using an explicit
@@ -809,7 +697,7 @@
 				// Same work as in scanobject; see comments there.
 				obj := *(*uintptr)(unsafe.Pointer(b + i))
 				if obj != 0 && arena_start <= obj && obj < arena_used {
-					if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
+					if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
 						greyobject(obj, b, i, hbits, span, gcw)
 					}
 				}
@@ -875,7 +763,7 @@
 		// Check if it points into heap and not back at the current object.
 		if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
 			// Mark the object.
-			if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
+			if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
 				greyobject(obj, b, i, hbits, span, gcw)
 			}
 		}
@@ -889,7 +777,7 @@
 // Preemption must be disabled.
 //go:nowritebarrier
 func shade(b uintptr) {
-	if obj, hbits, span := heapBitsForObject(b); obj != 0 {
+	if obj, hbits, span := heapBitsForObject(b, 0, 0); obj != 0 {
 		gcw := &getg().m.p.ptr().gcw
 		greyobject(obj, 0, 0, hbits, span, gcw)
 		if gcphase == _GCmarktermination || gcBlackenPromptly {
@@ -960,7 +848,7 @@
 // field at byte offset off in obj.
 func gcDumpObject(label string, obj, off uintptr) {
 	if obj < mheap_.arena_start || obj >= mheap_.arena_used {
-		print(label, "=", hex(obj), " is not a heap object\n")
+		print(label, "=", hex(obj), " is not in the Go heap\n")
 		return
 	}
 	k := obj >> _PageShift
@@ -973,13 +861,28 @@
 		return
 	}
 	print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
+	skipped := false
 	for i := uintptr(0); i < s.elemsize; i += ptrSize {
+		// For big objects, just print the beginning (because
+		// that usually hints at the object's type) and the
+		// fields around off.
+		if !(i < 128*ptrSize || off-16*ptrSize < i && i < off+16*ptrSize) {
+			skipped = true
+			continue
+		}
+		if skipped {
+			print(" ...\n")
+			skipped = false
+		}
 		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)))))
 		if i == off {
 			print(" <==")
 		}
 		print("\n")
 	}
+	if skipped {
+		print(" ...\n")
+	}
 }
 
 // If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index eaa4463..9468af9 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -24,20 +24,28 @@
 }
 
 //go:nowritebarrier
-func finishsweep_m() {
-	// The world is stopped so we should be able to complete the sweeps
-	// quickly.
+func finishsweep_m(stw bool) {
+	// Sweeping must be complete before marking commences, so
+	// sweep any unswept spans. If this is a concurrent GC, there
+	// shouldn't be any spans left to sweep, so this should finish
+	// instantly. If GC was forced before the concurrent sweep
+	// finished, there may be spans to sweep.
 	for sweepone() != ^uintptr(0) {
 		sweep.npausesweep++
 	}
 
 	// There may be some other spans being swept concurrently that
 	// we need to wait for. If finishsweep_m is done with the world stopped
-	// this code is not required.
-	sg := mheap_.sweepgen
-	for _, s := range work.spans {
-		if s.sweepgen != sg && s.state == _MSpanInUse {
-			mSpan_EnsureSwept(s)
+	// this is not required because the STW must have waited for sweeps.
+	//
+	// TODO(austin): As of this writing, we always pass true for stw.
+	// Consider removing this code.
+	if !stw {
+		sg := mheap_.sweepgen
+		for _, s := range work.spans {
+			if s.sweepgen != sg && s.state == _MSpanInUse {
+				mSpan_EnsureSwept(s)
+			}
 		}
 	}
 }
@@ -235,9 +243,6 @@
 			heapBitsForSpan(p).initSpan(s.layout())
 			s.needzero = 1
 
-			// important to set sweepgen before returning it to heap
-			atomicstore(&s.sweepgen, sweepgen)
-
 			// Free the span after heapBitsSweepSpan
 			// returns, since it's not done with the span.
 			freeToHeap = true
@@ -264,10 +269,7 @@
 	// But we need to set it before we make the span available for allocation
 	// (return it to heap or mcentral), because allocation code assumes that a
 	// span is already swept if available for allocation.
-	//
-	// TODO(austin): Clean this up by consolidating atomicstore in
-	// large span path above with this.
-	if !freeToHeap && nfree == 0 {
+	if freeToHeap || nfree == 0 {
 		// The span must be in our exclusive ownership until we update sweepgen,
 		// check for potential races.
 		if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index b18eaaf..4d305e2 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -51,7 +51,7 @@
 //     (preemption must be disabled)
 //     gcw := &getg().m.p.ptr().gcw
 //     .. call gcw.put() to produce and gcw.get() to consume ..
-//     if gcphase == _GCmarktermination {
+//     if gcBlackenPromptly {
 //         gcw.dispose()
 //     }
 //
@@ -68,7 +68,7 @@
 	bytesMarked uint64
 
 	// Scan work performed on this gcWork. This is aggregated into
-	// gcController by dispose.
+	// gcController by dispose and may also be flushed by callers.
 	scanWork int64
 }
 
@@ -80,7 +80,7 @@
 
 	wbuf := w.wbuf.ptr()
 	if wbuf == nil {
-		wbuf = getpartialorempty(42)
+		wbuf = getempty(42)
 		w.wbuf = wbufptrOf(wbuf)
 	}
 
@@ -204,7 +204,7 @@
 type workbufhdr struct {
 	node  lfnode // must be first
 	nobj  int
-	inuse bool   // This workbuf is in use by some gorotuine and is not on the work.empty/partial/full queues.
+	inuse bool   // This workbuf is in use by some gorotuine and is not on the work.empty/full queues.
 	log   [4]int // line numbers forming a history of ownership changes to workbuf
 }
 
@@ -217,7 +217,7 @@
 // workbuf factory routines. These funcs are used to manage the
 // workbufs.
 // If the GC asks for some work these are the only routines that
-// make partially full wbufs available to the GC.
+// make wbufs available to the GC.
 // Each of the gets and puts also take an distinct integer that is used
 // to record a brief history of changes to ownership of the workbuf.
 // The convention is to use a unique line number but any encoding
@@ -314,54 +314,11 @@
 	lfstackpush(&work.full, &b.node)
 }
 
-// getpartialorempty tries to return a partially empty
-// and if none are available returns an empty one.
-// entry is used to provide a brief history of ownership
-// using entry + xxx00000 to
-// indicating that two line numbers in the call chain.
-//go:nowritebarrier
-func getpartialorempty(entry int) *workbuf {
-	b := (*workbuf)(lfstackpop(&work.partial))
-	if b != nil {
-		b.logget(entry)
-		return b
-	}
-	// Let getempty do the logget check but
-	// use the entry to encode that it passed
-	// through this routine.
-	b = getempty(entry + 80700000)
-	return b
-}
-
-// putpartial puts empty buffers on the work.empty queue,
-// full buffers on the work.full queue and
-// others on the work.partial queue.
-// entry is used to provide a brief history of ownership
-// using entry + xxx00000 to
-// indicating that two call chain line numbers.
-//go:nowritebarrier
-func putpartial(b *workbuf, entry int) {
-	if b.nobj == 0 {
-		putempty(b, entry+81500000)
-	} else if b.nobj < len(b.obj) {
-		b.logput(entry)
-		lfstackpush(&work.partial, &b.node)
-	} else if b.nobj == len(b.obj) {
-		b.logput(entry)
-		lfstackpush(&work.full, &b.node)
-	} else {
-		throw("putpartial: bad Workbuf b.nobj")
-	}
-}
-
 // trygetfull tries to get a full or partially empty workbuffer.
 // If one is not immediately available return nil
 //go:nowritebarrier
 func trygetfull(entry int) *workbuf {
 	b := (*workbuf)(lfstackpop(&work.full))
-	if b == nil {
-		b = (*workbuf)(lfstackpop(&work.partial))
-	}
 	if b != nil {
 		b.logget(entry)
 		b.checknonempty()
@@ -370,10 +327,9 @@
 	return b
 }
 
-// Get a full work buffer off the work.full or a partially
-// filled one off the work.partial list. If nothing is available
-// wait until all the other gc helpers have finished and then
-// return nil.
+// Get a full work buffer off the work.full list.
+// If nothing is available wait until all the other gc helpers have
+// finished and then return nil.
 // getfull acts as a barrier for work.nproc helpers. As long as one
 // gchelper is actively marking objects it
 // may create a workbuffer that the other helpers can work on.
@@ -390,11 +346,6 @@
 		b.checknonempty()
 		return b
 	}
-	b = (*workbuf)(lfstackpop(&work.partial))
-	if b != nil {
-		b.logget(entry)
-		return b
-	}
 
 	incnwait := xadd(&work.nwait, +1)
 	if incnwait > work.nproc {
@@ -402,16 +353,13 @@
 		throw("work.nwait > work.nproc")
 	}
 	for i := 0; ; i++ {
-		if work.full != 0 || work.partial != 0 {
+		if work.full != 0 {
 			decnwait := xadd(&work.nwait, -1)
 			if decnwait == work.nproc {
 				println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
 				throw("work.nwait > work.nproc")
 			}
 			b = (*workbuf)(lfstackpop(&work.full))
-			if b == nil {
-				b = (*workbuf)(lfstackpop(&work.partial))
-			}
 			if b != nil {
 				b.logget(entry)
 				b.checknonempty()
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index bc4e7c1..7c313de 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -29,9 +29,12 @@
 	spans_mapped uintptr
 
 	// Proportional sweep
+	pagesInUse        uint64  // pages of spans in stats _MSpanInUse; R/W with mheap.lock
 	spanBytesAlloc    uint64  // bytes of spans allocated this cycle; updated atomically
 	pagesSwept        uint64  // pages swept this cycle; updated atomically
 	sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
+	// TODO(austin): pagesInUse should be a uintptr, but the 386
+	// compiler can't 8-byte align fields.
 
 	// Malloc stats.
 	largefree  uint64                  // bytes freed for large objects (>maxsmallsize)
@@ -420,8 +423,6 @@
 	memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
 	_g_.m.mcache.local_tinyallocs = 0
 
-	gcController.revise()
-
 	s := mHeap_AllocSpanLocked(h, npage)
 	if s != nil {
 		// Record span info, because gc needs to be
@@ -447,6 +448,7 @@
 		}
 
 		// update stats, sweep lists
+		h.pagesInUse += uint64(npage)
 		if large {
 			memstats.heap_objects++
 			memstats.heap_live += uint64(npage << _PageShift)
@@ -458,6 +460,11 @@
 			}
 		}
 	}
+	// heap_scan and heap_live were updated.
+	if gcBlackenEnabled != 0 {
+		gcController.revise()
+	}
+
 	if trace.enabled {
 		traceHeapAlloc()
 	}
@@ -551,7 +558,7 @@
 		throw("still in list")
 	}
 	if s.npreleased > 0 {
-		sysUsed((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+		sysUsed(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
 		memstats.heap_released -= uint64(s.npreleased << _PageShift)
 		s.npreleased = 0
 	}
@@ -614,6 +621,8 @@
 
 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
+//
+// h must be locked.
 func mHeap_Grow(h *mheap, npage uintptr) bool {
 	// Ask for a big chunk, to reduce the number of mappings
 	// the operating system needs to track; also amortizes
@@ -648,6 +657,7 @@
 	}
 	atomicstore(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
+	h.pagesInUse += uint64(npage)
 	mHeap_FreeSpanLocked(h, s, false, true, 0)
 	return true
 }
@@ -696,7 +706,9 @@
 		if acct != 0 {
 			memstats.heap_objects--
 		}
-		gcController.revise()
+		if gcBlackenEnabled != 0 {
+			gcController.revise()
+		}
 		mHeap_FreeSpanLocked(h, s, true, true, 0)
 		if trace.enabled {
 			traceHeapAlloc()
@@ -728,6 +740,7 @@
 			print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
 			throw("MHeap_FreeSpanLocked - invalid free")
 		}
+		h.pagesInUse -= uint64(s.npages)
 	default:
 		throw("MHeap_FreeSpanLocked - invalid span state")
 	}
@@ -763,7 +776,7 @@
 			h_spans[p] = s
 			mSpanList_Remove(t)
 			t.state = _MSpanDead
-			fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+			fixAlloc_Free(&h.spanalloc, unsafe.Pointer(t))
 		}
 	}
 	if (p+s.npages)*ptrSize < h.spans_mapped {
@@ -775,7 +788,7 @@
 			h_spans[p+s.npages-1] = s
 			mSpanList_Remove(t)
 			t.state = _MSpanDead
-			fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+			fixAlloc_Free(&h.spanalloc, unsafe.Pointer(t))
 		}
 	}
 
@@ -808,7 +821,7 @@
 			memstats.heap_released += uint64(released)
 			sumreleased += released
 			s.npreleased = s.npages
-			sysUnused((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+			sysUnused(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
 		}
 	}
 	return sumreleased
@@ -930,7 +943,8 @@
 	}
 
 	// Ensure that the span is swept.
-	// GC accesses specials list w/o locks. And it's just much safer.
+	// Sweeping accesses the specials list w/o locks, so we have
+	// to synchronize with it. And it's just much safer.
 	mp := acquirem()
 	mSpan_EnsureSwept(span)
 
@@ -977,7 +991,8 @@
 	}
 
 	// Ensure that the span is swept.
-	// GC accesses specials list w/o locks. And it's just much safer.
+	// Sweeping accesses the specials list w/o locks, so we have
+	// to synchronize with it. And it's just much safer.
 	mp := acquirem()
 	mSpan_EnsureSwept(span)
 
@@ -1025,12 +1040,31 @@
 	s.fint = fint
 	s.ot = ot
 	if addspecial(p, &s.special) {
+		// This is responsible for maintaining the same
+		// GC-related invariants as markrootSpans in any
+		// situation where it's possible that markrootSpans
+		// has already run but mark termination hasn't yet.
+		if gcphase != _GCoff {
+			_, base, _ := findObject(p)
+			mp := acquirem()
+			gcw := &mp.p.ptr().gcw
+			// Mark everything reachable from the object
+			// so it's retained for the finalizer.
+			scanobject(uintptr(base), gcw)
+			// Mark the finalizer itself, since the
+			// special isn't part of the GC'd heap.
+			scanblock(uintptr(unsafe.Pointer(&s.fn)), ptrSize, &oneptrmask[0], gcw)
+			if gcBlackenPromptly {
+				gcw.dispose()
+			}
+			releasem(mp)
+		}
 		return true
 	}
 
 	// There was an old finalizer
 	lock(&mheap_.speciallock)
-	fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+	fixAlloc_Free(&mheap_.specialfinalizeralloc, unsafe.Pointer(s))
 	unlock(&mheap_.speciallock)
 	return false
 }
@@ -1042,7 +1076,7 @@
 		return // there wasn't a finalizer to remove
 	}
 	lock(&mheap_.speciallock)
-	fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+	fixAlloc_Free(&mheap_.specialfinalizeralloc, unsafe.Pointer(s))
 	unlock(&mheap_.speciallock)
 }
 
@@ -1073,14 +1107,14 @@
 		sf := (*specialfinalizer)(unsafe.Pointer(s))
 		queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
 		lock(&mheap_.speciallock)
-		fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(sf))
+		fixAlloc_Free(&mheap_.specialfinalizeralloc, unsafe.Pointer(sf))
 		unlock(&mheap_.speciallock)
 		return false // don't free p until finalizer is done
 	case _KindSpecialProfile:
 		sp := (*specialprofile)(unsafe.Pointer(s))
 		mProf_Free(sp.b, size, freed)
 		lock(&mheap_.speciallock)
-		fixAlloc_Free(&mheap_.specialprofilealloc, (unsafe.Pointer)(sp))
+		fixAlloc_Free(&mheap_.specialprofilealloc, unsafe.Pointer(sp))
 		unlock(&mheap_.speciallock)
 		return true
 	default:
diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go
index dc94cee..821906c 100644
--- a/src/runtime/mkduff.go
+++ b/src/runtime/mkduff.go
@@ -60,21 +60,18 @@
 func notags(w io.Writer) { fmt.Fprintln(w) }
 
 func zeroAMD64(w io.Writer) {
-	// AX: zero
+	// X0: zero
 	// DI: ptr to memory to be zeroed
 	// DI is updated as a side effect.
 	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
-	for i := 0; i < 31; i++ {
-		fmt.Fprintln(w, "\tMOVQ\tAX,(DI)")
-		fmt.Fprintln(w, "\tMOVQ\tAX,8(DI)")
-		fmt.Fprintln(w, "\tMOVQ\tAX,16(DI)")
-		fmt.Fprintln(w, "\tMOVQ\tAX,24(DI)")
-		fmt.Fprintln(w, "\tADDQ\t$32,DI")
+	for i := 0; i < 16; i++ {
+		fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
+		fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
+		fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
+		fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
+		fmt.Fprintln(w, "\tADDQ\t$64,DI")
 		fmt.Fprintln(w)
 	}
-	for i := 0; i < 4; i++ {
-		fmt.Fprintln(w, "\tSTOSQ")
-	}
 	fmt.Fprintln(w, "\tRET")
 }
 
@@ -87,11 +84,11 @@
 	// for some reason that is 3.5x slower than this code.
 	// The STOSQ in duffzero seem fine, though.
 	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
-	for i := 0; i < 128; i++ {
-		fmt.Fprintln(w, "\tMOVQ\t(SI), CX")
-		fmt.Fprintln(w, "\tADDQ\t$8, SI")
-		fmt.Fprintln(w, "\tMOVQ\tCX, (DI)")
-		fmt.Fprintln(w, "\tADDQ\t$8, DI")
+	for i := 0; i < 64; i++ {
+		fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
+		fmt.Fprintln(w, "\tADDQ\t$16, SI")
+		fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
+		fmt.Fprintln(w, "\tADDQ\t$16, DI")
 		fmt.Fprintln(w)
 	}
 	fmt.Fprintln(w, "\tRET")
@@ -176,11 +173,11 @@
 	// R0: always zero
 	// R3 (aka REGRT1): ptr to memory to be zeroed - 8
 	// On return, R3 points to the last zeroed dword.
-	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
+	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
 	for i := 0; i < 128; i++ {
 		fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
 	}
-	fmt.Fprintln(w, "\tRETURN")
+	fmt.Fprintln(w, "\tRET")
 }
 
 func copyPPC64x(w io.Writer) {
diff --git a/src/runtime/mkfastlog2table.go b/src/runtime/mkfastlog2table.go
new file mode 100644
index 0000000..587ebf4
--- /dev/null
+++ b/src/runtime/mkfastlog2table.go
@@ -0,0 +1,52 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// fastlog2Table contains log2 approximations for 5 binary digits.
+// This is used to implement fastlog2, which is used for heap sampling.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math"
+)
+
+func main() {
+	var buf bytes.Buffer
+
+	fmt.Fprintln(&buf, "// AUTO-GENERATED by mkfastlog2table.go")
+	fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
+	fmt.Fprintln(&buf, "// See mkfastlog2table.go for comments.")
+	fmt.Fprintln(&buf)
+	fmt.Fprintln(&buf, "package runtime")
+	fmt.Fprintln(&buf)
+	fmt.Fprintln(&buf, "const fastlogNumBits =", fastlogNumBits)
+	fmt.Fprintln(&buf)
+
+	fmt.Fprintln(&buf, "var fastlog2Table = [1<<fastlogNumBits + 1]float64{")
+	table := computeTable()
+	for _, t := range table {
+		fmt.Fprintf(&buf, "\t%v,\n", t)
+	}
+	fmt.Fprintln(&buf, "}")
+
+	if err := ioutil.WriteFile("fastlog2table.go", buf.Bytes(), 0644); err != nil {
+		log.Fatalln(err)
+	}
+}
+
+const fastlogNumBits = 5
+
+func computeTable() []float64 {
+	fastlog2Table := make([]float64, 1<<fastlogNumBits+1)
+	for i := 0; i <= (1 << fastlogNumBits); i++ {
+		fastlog2Table[i] = math.Log2(1.0 + float64(i)/(1<<fastlogNumBits))
+	}
+	return fastlog2Table
+}
diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go
new file mode 100644
index 0000000..a076842
--- /dev/null
+++ b/src/runtime/mmap.go
@@ -0,0 +1,16 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9
+// +build !solaris
+// +build !windows
+// +build !nacl
+// +build !linux !amd64
+
+package runtime
+
+import "unsafe"
+
+// mmap calls the mmap system call.  It is implemented in assembly.
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
diff --git a/src/runtime/mstkbar.go b/src/runtime/mstkbar.go
new file mode 100644
index 0000000..9a27d2a
--- /dev/null
+++ b/src/runtime/mstkbar.go
@@ -0,0 +1,283 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector: stack barriers
+//
+// Stack barriers enable the garbage collector to determine how much
+// of a gorountine stack has changed between when a stack is scanned
+// during the concurrent scan phase and when it is re-scanned during
+// the stop-the-world mark termination phase. Mark termination only
+// needs to re-scan the changed part, so for deep stacks this can
+// significantly reduce GC pause time compared to the alternative of
+// re-scanning whole stacks. The deeper the stacks, the more stack
+// barriers help.
+//
+// When stacks are scanned during the concurrent scan phase, the stack
+// scan installs stack barriers by selecting stack frames and
+// overwriting the saved return PCs (or link registers) of these
+// frames with the PC of a "stack barrier trampoline". Later, when a
+// selected frame returns, it "returns" to this trampoline instead of
+// returning to its actual caller. The trampoline records that the
+// stack has unwound past this frame and jumps to the original return
+// PC recorded when the stack barrier was installed. Mark termination
+// re-scans only as far as the first frame that hasn't hit a stack
+// barrier and then removes and un-hit stack barriers.
+//
+// This scheme is very lightweight. No special code is required in the
+// mutator to record stack unwinding and the trampoline is only a few
+// assembly instructions.
+//
+// Book-keeping
+// ------------
+//
+// The primary cost of stack barriers is book-keeping: the runtime has
+// to record the locations of all stack barriers and the original
+// return PCs in order to return to the correct caller when a stack
+// barrier is hit and so it can remove un-hit stack barriers. In order
+// to minimize this cost, the Go runtime places stack barriers in
+// exponentially-spaced frames, starting 1K past the current frame.
+// The book-keeping structure hence grows logarithmically with the
+// size of the stack and mark termination re-scans at most twice as
+// much stack as necessary.
+//
+// The runtime reserves space for this book-keeping structure at the
+// top of the stack allocation itself (just above the outermost
+// frame). This is necessary because the regular memory allocator can
+// itself grow the stack, and hence can't be used when allocating
+// stack-related structures.
+//
+// For debugging, the runtime also supports installing stack barriers
+// at every frame. However, this requires significantly more
+// book-keeping space.
+//
+// Correctness
+// -----------
+//
+// The runtime and the compiler cooperate to ensure that all objects
+// reachable from the stack as of mark termination are marked.
+// Anything unchanged since the concurrent scan phase will be marked
+// because it is marked by the concurrent scan. After the concurrent
+// scan, there are three possible classes of stack modifications that
+// must be tracked:
+//
+// 1) Mutator writes below the lowest un-hit stack barrier. This
+// includes all writes performed by an executing function to its own
+// stack frame. This part of the stack will be re-scanned by mark
+// termination, which will mark any objects made reachable from
+// modifications to this part of the stack.
+//
+// 2) Mutator writes above the lowest un-hit stack barrier. It's
+// possible for a mutator to modify the stack above the lowest un-hit
+// stack barrier if a higher frame has passed down a pointer to a
+// stack variable in its frame. This is called an "up-pointer". The
+// compiler ensures that writes through up-pointers have an
+// accompanying write barrier (it simply doesn't distinguish between
+// writes through up-pointers and writes through heap pointers). This
+// write barrier marks any object made reachable from modifications to
+// this part of the stack.
+//
+// 3) Runtime writes to the stack. Various runtime operations such as
+// sends to unbuffered channels can write to arbitrary parts of the
+// stack, including above the lowest un-hit stack barrier. We solve
+// this in two ways. In many cases, the runtime can perform an
+// explicit write barrier operation like in case 2. However, in the
+// case of bulk memory move (typedmemmove), the runtime doesn't
+// necessary have ready access to a pointer bitmap for the memory
+// being copied, so it simply unwinds any stack barriers below the
+// destination.
+//
+// Gotchas
+// -------
+//
+// Anything that inspects or manipulates the stack potentially needs
+// to understand stack barriers. The most obvious case is that
+// gentraceback needs to use the original return PC when it encounters
+// the stack barrier trampoline. Anything that unwinds the stack such
+// as panic/recover must unwind stack barriers in tandem with
+// unwinding the stack.
+//
+// Stack barriers require that any goroutine whose stack has been
+// scanned must execute write barriers. Go solves this by simply
+// enabling write barriers globally during the concurrent scan phase.
+// However, traditionally, write barriers are not enabled during this
+// phase.
+
+package runtime
+
+import "unsafe"
+
+const debugStackBarrier = false
+
+// firstStackBarrierOffset is the approximate byte offset at
+// which to place the first stack barrier from the current SP.
+// This is a lower bound on how much stack will have to be
+// re-scanned during mark termination. Subsequent barriers are
+// placed at firstStackBarrierOffset * 2^n offsets.
+//
+// For debugging, this can be set to 0, which will install a
+// stack barrier at every frame. If you do this, you may also
+// have to raise _StackMin, since the stack barrier
+// bookkeeping will use a large amount of each stack.
+var firstStackBarrierOffset = 1024
+
+// gcMaxStackBarriers returns the maximum number of stack barriers
+// that can be installed in a stack of stackSize bytes.
+func gcMaxStackBarriers(stackSize int) (n int) {
+	if firstStackBarrierOffset == 0 {
+		// Special debugging case for inserting stack barriers
+		// at every frame. Steal half of the stack for the
+		// []stkbar. Technically, if the stack were to consist
+		// solely of return PCs we would need two thirds of
+		// the stack, but stealing that much breaks things and
+		// this doesn't happen in practice.
+		return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
+	}
+
+	offset := firstStackBarrierOffset
+	for offset < stackSize {
+		n++
+		offset *= 2
+	}
+	return n + 1
+}
+
+// gcInstallStackBarrier installs a stack barrier over the return PC of frame.
+//go:nowritebarrier
+func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
+	if frame.lr == 0 {
+		if debugStackBarrier {
+			print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
+		}
+		return false
+	}
+
+	if frame.fn.entry == cgocallback_gofuncPC {
+		// cgocallback_gofunc doesn't return to its LR;
+		// instead, its return path puts LR in g.sched.pc and
+		// switches back to the system stack on which
+		// cgocallback_gofunc was originally called. We can't
+		// have a stack barrier in g.sched.pc, so don't
+		// install one in this frame.
+		if debugStackBarrier {
+			print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
+		}
+		return false
+	}
+
+	// Save the return PC and overwrite it with stackBarrier.
+	var lrUintptr uintptr
+	if usesLR {
+		lrUintptr = frame.sp
+	} else {
+		lrUintptr = frame.fp - regSize
+	}
+	lrPtr := (*uintreg)(unsafe.Pointer(lrUintptr))
+	if debugStackBarrier {
+		print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
+		if uintptr(*lrPtr) != frame.lr {
+			print("frame.lr=", hex(frame.lr))
+			throw("frame.lr differs from stack LR")
+		}
+	}
+
+	gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
+	stkbar := &gp.stkbar[len(gp.stkbar)-1]
+	stkbar.savedLRPtr = lrUintptr
+	stkbar.savedLRVal = uintptr(*lrPtr)
+	*lrPtr = uintreg(stackBarrierPC)
+	return true
+}
+
+// gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
+//go:nowritebarrier
+func gcRemoveStackBarriers(gp *g) {
+	if debugStackBarrier && gp.stkbarPos != 0 {
+		print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
+	}
+
+	// Remove stack barriers that we didn't hit.
+	for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
+		gcRemoveStackBarrier(gp, stkbar)
+	}
+
+	// Clear recorded stack barriers so copystack doesn't try to
+	// adjust them.
+	gp.stkbarPos = 0
+	gp.stkbar = gp.stkbar[:0]
+}
+
+// gcRemoveStackBarrier removes a single stack barrier. It is the
+// inverse operation of gcInstallStackBarrier.
+//
+// This is nosplit to ensure gp's stack does not move.
+//
+//go:nowritebarrier
+//go:nosplit
+func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
+	if debugStackBarrier {
+		print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
+	}
+	lrPtr := (*uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
+	if val := *lrPtr; val != uintreg(stackBarrierPC) {
+		printlock()
+		print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
+		print("gp.stkbar=")
+		gcPrintStkbars(gp.stkbar)
+		print(", gp.stkbarPos=", gp.stkbarPos, ", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
+		throw("stack barrier lost")
+	}
+	*lrPtr = uintreg(stkbar.savedLRVal)
+}
+
+// gcPrintStkbars prints a []stkbar for debugging.
+func gcPrintStkbars(stkbar []stkbar) {
+	print("[")
+	for i, s := range stkbar {
+		if i > 0 {
+			print(" ")
+		}
+		print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
+	}
+	print("]")
+}
+
+// gcUnwindBarriers marks all stack barriers up the frame containing
+// sp as hit and removes them. This is used during stack unwinding for
+// panic/recover and by heapBitsBulkBarrier to force stack re-scanning
+// when its destination is on the stack.
+//
+// This is nosplit to ensure gp's stack does not move.
+//
+//go:nosplit
+func gcUnwindBarriers(gp *g, sp uintptr) {
+	// On LR machines, if there is a stack barrier on the return
+	// from the frame containing sp, this will mark it as hit even
+	// though it isn't, but it's okay to be conservative.
+	before := gp.stkbarPos
+	for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
+		gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
+		gp.stkbarPos++
+	}
+	if debugStackBarrier && gp.stkbarPos != before {
+		print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
+		gcPrintStkbars(gp.stkbar[before:gp.stkbarPos])
+		print("\n")
+	}
+}
+
+// nextBarrierPC returns the original return PC of the next stack barrier.
+// Used by getcallerpc, so it must be nosplit.
+//go:nosplit
+func nextBarrierPC() uintptr {
+	gp := getg()
+	return gp.stkbar[gp.stkbarPos].savedLRVal
+}
+
+// setNextBarrierPC sets the return PC of the next stack barrier.
+// Used by setcallerpc, so it must be nosplit.
+//go:nosplit
+func setNextBarrierPC(pc uintptr) {
+	gp := getg()
+	gp.stkbar[gp.stkbarPos].savedLRVal = pc
+}
diff --git a/src/runtime/netpoll_epoll.go b/src/runtime/netpoll_epoll.go
index 7b4052a..e06eff8 100644
--- a/src/runtime/netpoll_epoll.go
+++ b/src/runtime/netpoll_epoll.go
@@ -19,8 +19,7 @@
 func closeonexec(fd int32)
 
 var (
-	epfd           int32 = -1 // epoll descriptor
-	netpolllasterr int32
+	epfd int32 = -1 // epoll descriptor
 )
 
 func netpollinit() {
@@ -67,9 +66,9 @@
 retry:
 	n := epollwait(epfd, &events[0], int32(len(events)), waitms)
 	if n < 0 {
-		if n != -_EINTR && n != netpolllasterr {
-			netpolllasterr = n
+		if n != -_EINTR {
 			println("runtime: epollwait on fd", epfd, "failed with", -n)
+			throw("epollwait failed")
 		}
 		goto retry
 	}
diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go
index 01445dc..36956ba 100644
--- a/src/runtime/netpoll_kqueue.go
+++ b/src/runtime/netpoll_kqueue.go
@@ -17,8 +17,7 @@
 func closeonexec(fd int32)
 
 var (
-	kq             int32 = -1
-	netpolllasterr int32
+	kq int32 = -1
 )
 
 func netpollinit() {
@@ -75,9 +74,9 @@
 retry:
 	n := kevent(kq, nil, 0, &events[0], int32(len(events)), tp)
 	if n < 0 {
-		if n != -_EINTR && n != netpolllasterr {
-			netpolllasterr = n
+		if n != -_EINTR {
 			println("runtime: kevent on fd", kq, "failed with", -n)
+			throw("kevent failed")
 		}
 		goto retry
 	}
diff --git a/src/runtime/netpoll_solaris.go b/src/runtime/netpoll_solaris.go
index e4652d8..86e9b99 100644
--- a/src/runtime/netpoll_solaris.go
+++ b/src/runtime/netpoll_solaris.go
@@ -174,9 +174,6 @@
 	unlock(&pd.lock)
 }
 
-// netpolllasterr holds the last error code returned by port_getn to prevent log spamming
-var netpolllasterr int32
-
 // polls for ready network connections
 // returns list of goroutines that become runnable
 func netpoll(block bool) *g {
@@ -194,9 +191,9 @@
 retry:
 	var n uint32 = 1
 	if port_getn(portfd, &events[0], uint32(len(events)), &n, wait) < 0 {
-		if e := errno(); e != _EINTR && e != netpolllasterr {
-			netpolllasterr = e
+		if e := errno(); e != _EINTR {
 			print("runtime: port_getn on fd ", portfd, " failed with ", e, "\n")
+			throw("port_getn failed")
 		}
 		goto retry
 	}
diff --git a/src/runtime/os1_plan9.go b/src/runtime/os1_plan9.go
index 9615b6d..43ebfa3 100644
--- a/src/runtime/os1_plan9.go
+++ b/src/runtime/os1_plan9.go
@@ -164,7 +164,7 @@
 		return -1
 	}
 	len := findnull(&msg[0])
-	if write(uintptr(fd), (unsafe.Pointer)(&msg[0]), int32(len)) != int64(len) {
+	if write(uintptr(fd), unsafe.Pointer(&msg[0]), int32(len)) != int64(len) {
 		closefd(fd)
 		return -1
 	}
diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index f608b4a..b6da4df 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -139,6 +139,8 @@
 // in sys_windows_386.s and sys_windows_amd64.s
 func externalthreadhandler()
 
+var timeBeginPeriodRetValue uint32
+
 func osinit() {
 	asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
 
@@ -154,7 +156,7 @@
 
 	stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
 
-	stdcall1(_timeBeginPeriod, 1)
+	timeBeginPeriodRetValue = uint32(stdcall1(_timeBeginPeriod, 1))
 
 	ncpu = getproccount()
 
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index b19270a..62fc56a 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -13,9 +13,6 @@
 func sigaltstack(new, old *sigaltstackt)
 
 //go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
-//go:noescape
 func sigaction(sig int32, new, old *sigactiont)
 
 //go:noescape
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index 8c8a106..bc3394c 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -13,9 +13,6 @@
 func sigaltstack(new, old *stackt)
 
 //go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
-//go:noescape
 func sigaction(sig int32, new, old *sigactiont)
 
 //go:noescape
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index bd492f5..dc932db 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -19,9 +19,6 @@
 func sigaltstack(new, old *sigaltstackt)
 
 //go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
-//go:noescape
 func setitimer(mode int32, new, old *itimerval)
 
 //go:noescape
diff --git a/src/runtime/os_nacl.go b/src/runtime/os_nacl.go
index 3b4c136..efa8fa1 100644
--- a/src/runtime/os_nacl.go
+++ b/src/runtime/os_nacl.go
@@ -50,10 +50,6 @@
 	panicmem()
 }
 
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer) {
-	throw("sigfwd not implemented")
-}
-
 func raiseproc(sig int32) {
 }
 
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index af52099..4fa4a41 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -15,10 +15,6 @@
 //go:noescape
 func sigaltstack(new, old *sigaltstackt)
 
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer) {
-	throw("sigfwd not implemented")
-}
-
 //go:noescape
 func sigprocmask(mode int32, new, old *sigset)
 
diff --git a/src/runtime/os_openbsd.go b/src/runtime/os_openbsd.go
index f94b490..8a97a73 100644
--- a/src/runtime/os_openbsd.go
+++ b/src/runtime/os_openbsd.go
@@ -4,8 +4,6 @@
 
 package runtime
 
-import "unsafe"
-
 //go:noescape
 func setitimer(mode int32, new, old *itimerval)
 
@@ -16,9 +14,6 @@
 func sigaltstack(new, old *stackt)
 
 //go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
-//go:noescape
 func sigprocmask(mode int32, new uint32) uint32
 
 //go:noescape
diff --git a/src/runtime/os_solaris.go b/src/runtime/os_solaris.go
index fd20a5c..634e4cf 100644
--- a/src/runtime/os_solaris.go
+++ b/src/runtime/os_solaris.go
@@ -10,9 +10,6 @@
 
 var asmsysvicall6 libcFunc
 
-//go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
 //go:nosplit
 func sysvicall0(fn *libcFunc) uintptr {
 	libcall := &getg().m.libcall
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index a166281..f603d10 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -417,7 +417,7 @@
 		// Record the panic that is running the defer.
 		// If there is a new panic during the deferred call, that panic
 		// will find d in the list and will mark d._panic (this panic) aborted.
-		d._panic = (*_panic)(noescape((unsafe.Pointer)(&p)))
+		d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
 
 		p.argp = unsafe.Pointer(getargp(0))
 		reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
@@ -527,3 +527,141 @@
 	dopanic(0)
 	*(*int)(nil) = 0 // not reached
 }
+
+//uint32 runtime·panicking;
+var paniclk mutex
+
+// Unwind the stack after a deferred function calls recover
+// after a panic.  Then arrange to continue running as though
+// the caller of the deferred function returned normally.
+func recovery(gp *g) {
+	// Info about defer passed in G struct.
+	sp := gp.sigcode0
+	pc := gp.sigcode1
+
+	// d's arguments need to be in the stack.
+	if sp != 0 && (sp < gp.stack.lo || gp.stack.hi < sp) {
+		print("recover: ", hex(sp), " not in [", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
+		throw("bad recovery")
+	}
+
+	// Make the deferproc for this d return again,
+	// this time returning 1.  The calling function will
+	// jump to the standard return epilogue.
+	gcUnwindBarriers(gp, sp)
+	gp.sched.sp = sp
+	gp.sched.pc = pc
+	gp.sched.lr = 0
+	gp.sched.ret = 1
+	gogo(&gp.sched)
+}
+
+func startpanic_m() {
+	_g_ := getg()
+	if mheap_.cachealloc.size == 0 { // very early
+		print("runtime: panic before malloc heap initialized\n")
+		_g_.m.mallocing = 1 // tell rest of panic not to try to malloc
+	} else if _g_.m.mcache == nil { // can happen if called from signal handler or throw
+		_g_.m.mcache = allocmcache()
+	}
+
+	switch _g_.m.dying {
+	case 0:
+		_g_.m.dying = 1
+		_g_.writebuf = nil
+		xadd(&panicking, 1)
+		lock(&paniclk)
+		if debug.schedtrace > 0 || debug.scheddetail > 0 {
+			schedtrace(true)
+		}
+		freezetheworld()
+		return
+	case 1:
+		// Something failed while panicing, probably the print of the
+		// argument to panic().  Just print a stack trace and exit.
+		_g_.m.dying = 2
+		print("panic during panic\n")
+		dopanic(0)
+		exit(3)
+		fallthrough
+	case 2:
+		// This is a genuine bug in the runtime, we couldn't even
+		// print the stack trace successfully.
+		_g_.m.dying = 3
+		print("stack trace unavailable\n")
+		exit(4)
+		fallthrough
+	default:
+		// Can't even print!  Just exit.
+		exit(5)
+	}
+}
+
+var didothers bool
+var deadlock mutex
+
+func dopanic_m(gp *g, pc, sp uintptr) {
+	if gp.sig != 0 {
+		print("[signal ", hex(gp.sig), " code=", hex(gp.sigcode0), " addr=", hex(gp.sigcode1), " pc=", hex(gp.sigpc), "]\n")
+	}
+
+	var docrash bool
+	_g_ := getg()
+	if t := gotraceback(&docrash); t > 0 {
+		if gp != gp.m.g0 {
+			print("\n")
+			goroutineheader(gp)
+			traceback(pc, sp, 0, gp)
+		} else if t >= 2 || _g_.m.throwing > 0 {
+			print("\nruntime stack:\n")
+			traceback(pc, sp, 0, gp)
+		}
+		if !didothers {
+			didothers = true
+			tracebackothers(gp)
+		}
+	}
+	unlock(&paniclk)
+
+	if xadd(&panicking, -1) != 0 {
+		// Some other m is panicking too.
+		// Let it print what it needs to print.
+		// Wait forever without chewing up cpu.
+		// It will exit when it's done.
+		lock(&deadlock)
+		lock(&deadlock)
+	}
+
+	if docrash {
+		crash()
+	}
+
+	exit(2)
+}
+
+//go:nosplit
+func canpanic(gp *g) bool {
+	// Note that g is m->gsignal, different from gp.
+	// Note also that g->m can change at preemption, so m can go stale
+	// if this function ever makes a function call.
+	_g_ := getg()
+	_m_ := _g_.m
+
+	// Is it okay for gp to panic instead of crashing the program?
+	// Yes, as long as it is running Go code, not runtime code,
+	// and not stuck in a system call.
+	if gp == nil || gp != _m_.curg {
+		return false
+	}
+	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
+		return false
+	}
+	status := readgstatus(gp)
+	if status&^_Gscan != _Grunning || gp.syscallsp != 0 {
+		return false
+	}
+	if GOOS == "windows" && _m_.libcallsp != 0 {
+		return false
+	}
+	return true
+}
diff --git a/src/runtime/panic1.go b/src/runtime/panic1.go
deleted file mode 100644
index 1a71d09..0000000
--- a/src/runtime/panic1.go
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-// Code related to defer, panic and recover.
-// TODO: Merge into panic.go.
-
-//uint32 runtime·panicking;
-var paniclk mutex
-
-const hasLinkRegister = GOARCH == "arm" || GOARCH == "arm64" || GOARCH == "ppc64" || GOARCH == "ppc64le"
-
-// Unwind the stack after a deferred function calls recover
-// after a panic.  Then arrange to continue running as though
-// the caller of the deferred function returned normally.
-func recovery(gp *g) {
-	// Info about defer passed in G struct.
-	sp := gp.sigcode0
-	pc := gp.sigcode1
-
-	// d's arguments need to be in the stack.
-	if sp != 0 && (sp < gp.stack.lo || gp.stack.hi < sp) {
-		print("recover: ", hex(sp), " not in [", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
-		throw("bad recovery")
-	}
-
-	// Make the deferproc for this d return again,
-	// this time returning 1.  The calling function will
-	// jump to the standard return epilogue.
-	gcUnwindBarriers(gp, sp)
-	gp.sched.sp = sp
-	gp.sched.pc = pc
-	gp.sched.lr = 0
-	gp.sched.ret = 1
-	gogo(&gp.sched)
-}
-
-func startpanic_m() {
-	_g_ := getg()
-	if mheap_.cachealloc.size == 0 { // very early
-		print("runtime: panic before malloc heap initialized\n")
-		_g_.m.mallocing = 1 // tell rest of panic not to try to malloc
-	} else if _g_.m.mcache == nil { // can happen if called from signal handler or throw
-		_g_.m.mcache = allocmcache()
-	}
-
-	switch _g_.m.dying {
-	case 0:
-		_g_.m.dying = 1
-		if _g_ != nil {
-			_g_.writebuf = nil
-		}
-		xadd(&panicking, 1)
-		lock(&paniclk)
-		if debug.schedtrace > 0 || debug.scheddetail > 0 {
-			schedtrace(true)
-		}
-		freezetheworld()
-		return
-	case 1:
-		// Something failed while panicing, probably the print of the
-		// argument to panic().  Just print a stack trace and exit.
-		_g_.m.dying = 2
-		print("panic during panic\n")
-		dopanic(0)
-		exit(3)
-		fallthrough
-	case 2:
-		// This is a genuine bug in the runtime, we couldn't even
-		// print the stack trace successfully.
-		_g_.m.dying = 3
-		print("stack trace unavailable\n")
-		exit(4)
-		fallthrough
-	default:
-		// Can't even print!  Just exit.
-		exit(5)
-	}
-}
-
-var didothers bool
-var deadlock mutex
-
-func dopanic_m(gp *g, pc, sp uintptr) {
-	if gp.sig != 0 {
-		print("[signal ", hex(gp.sig), " code=", hex(gp.sigcode0), " addr=", hex(gp.sigcode1), " pc=", hex(gp.sigpc), "]\n")
-	}
-
-	var docrash bool
-	_g_ := getg()
-	if t := gotraceback(&docrash); t > 0 {
-		if gp != gp.m.g0 {
-			print("\n")
-			goroutineheader(gp)
-			traceback(pc, sp, 0, gp)
-		} else if t >= 2 || _g_.m.throwing > 0 {
-			print("\nruntime stack:\n")
-			traceback(pc, sp, 0, gp)
-		}
-		if !didothers {
-			didothers = true
-			tracebackothers(gp)
-		}
-	}
-	unlock(&paniclk)
-
-	if xadd(&panicking, -1) != 0 {
-		// Some other m is panicking too.
-		// Let it print what it needs to print.
-		// Wait forever without chewing up cpu.
-		// It will exit when it's done.
-		lock(&deadlock)
-		lock(&deadlock)
-	}
-
-	if docrash {
-		crash()
-	}
-
-	exit(2)
-}
-
-//go:nosplit
-func canpanic(gp *g) bool {
-	// Note that g is m->gsignal, different from gp.
-	// Note also that g->m can change at preemption, so m can go stale
-	// if this function ever makes a function call.
-	_g_ := getg()
-	_m_ := _g_.m
-
-	// Is it okay for gp to panic instead of crashing the program?
-	// Yes, as long as it is running Go code, not runtime code,
-	// and not stuck in a system call.
-	if gp == nil || gp != _m_.curg {
-		return false
-	}
-	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
-		return false
-	}
-	status := readgstatus(gp)
-	if status&^_Gscan != _Grunning || gp.syscallsp != 0 {
-		return false
-	}
-	if GOOS == "windows" && _m_.libcallsp != 0 {
-		return false
-	}
-	return true
-}
diff --git a/src/runtime/print1.go b/src/runtime/print.go
similarity index 100%
rename from src/runtime/print1.go
rename to src/runtime/print.go
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index c5b4a8c..2477637 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -6,6 +6,23 @@
 
 import "unsafe"
 
+// Goroutine scheduler
+// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
+//
+// The main concepts are:
+// G - goroutine.
+// M - worker thread, or machine.
+// P - processor, a resource that is required to execute Go code.
+//     M must have an associated P to execute Go code, however it can be
+//     blocked or in a syscall w/o an associated P.
+//
+// Design doc at https://golang.org/s/go11sched.
+
+var (
+	m0 m
+	g0 g
+)
+
 //go:linkname runtime_init runtime.init
 func runtime_init()
 
@@ -323,3 +340,3712 @@
 	allglen = uintptr(len(allgs))
 	unlock(&allglock)
 }
+
+const (
+	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
+	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
+	_GoidCacheBatch = 16
+)
+
+// The bootstrap sequence is:
+//
+//	call osinit
+//	call schedinit
+//	make & queue new G
+//	call runtime·mstart
+//
+// The new G calls runtime·main.
+func schedinit() {
+	// raceinit must be the first call to race detector.
+	// In particular, it must be done before mallocinit below calls racemapshadow.
+	_g_ := getg()
+	if raceenabled {
+		_g_.racectx = raceinit()
+	}
+
+	sched.maxmcount = 10000
+
+	// Cache the framepointer experiment.  This affects stack unwinding.
+	framepointer_enabled = haveexperiment("framepointer")
+
+	tracebackinit()
+	moduledataverify()
+	stackinit()
+	mallocinit()
+	mcommoninit(_g_.m)
+
+	goargs()
+	goenvs()
+	parsedebugvars()
+	gcinit()
+
+	sched.lastpoll = uint64(nanotime())
+	procs := int(ncpu)
+	if n := atoi(gogetenv("GOMAXPROCS")); n > 0 {
+		if n > _MaxGomaxprocs {
+			n = _MaxGomaxprocs
+		}
+		procs = n
+	}
+	if procresize(int32(procs)) != nil {
+		throw("unknown runnable goroutine during bootstrap")
+	}
+
+	if buildVersion == "" {
+		// Condition should never trigger.  This code just serves
+		// to ensure runtime·buildVersion is kept in the resulting binary.
+		buildVersion = "unknown"
+	}
+}
+
+func dumpgstatus(gp *g) {
+	_g_ := getg()
+	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
+}
+
+func checkmcount() {
+	// sched lock is held
+	if sched.mcount > sched.maxmcount {
+		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
+		throw("thread exhaustion")
+	}
+}
+
+func mcommoninit(mp *m) {
+	_g_ := getg()
+
+	// g0 stack won't make sense for user (and is not necessary unwindable).
+	if _g_ != _g_.m.g0 {
+		callers(1, mp.createstack[:])
+	}
+
+	mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
+	if mp.fastrand == 0 {
+		mp.fastrand = 0x49f6428a
+	}
+
+	lock(&sched.lock)
+	mp.id = sched.mcount
+	sched.mcount++
+	checkmcount()
+	mpreinit(mp)
+	if mp.gsignal != nil {
+		mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
+	}
+
+	// Add to allm so garbage collector doesn't free g->m
+	// when it is just in a register or thread-local storage.
+	mp.alllink = allm
+
+	// NumCgoCall() iterates over allm w/o schedlock,
+	// so we need to publish it safely.
+	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
+	unlock(&sched.lock)
+}
+
+// Mark gp ready to run.
+func ready(gp *g, traceskip int) {
+	if trace.enabled {
+		traceGoUnpark(gp, traceskip)
+	}
+
+	status := readgstatus(gp)
+
+	// Mark runnable.
+	_g_ := getg()
+	_g_.m.locks++ // disable preemption because it can be holding p in a local var
+	if status&^_Gscan != _Gwaiting {
+		dumpgstatus(gp)
+		throw("bad g->status in ready")
+	}
+
+	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
+	casgstatus(gp, _Gwaiting, _Grunnable)
+	runqput(_g_.m.p.ptr(), gp, true)
+	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic
+		wakep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+}
+
+func gcprocs() int32 {
+	// Figure out how many CPUs to use during GC.
+	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+	lock(&sched.lock)
+	n := gomaxprocs
+	if n > ncpu {
+		n = ncpu
+	}
+	if n > _MaxGcproc {
+		n = _MaxGcproc
+	}
+	if n > sched.nmidle+1 { // one M is currently running
+		n = sched.nmidle + 1
+	}
+	unlock(&sched.lock)
+	return n
+}
+
+func needaddgcproc() bool {
+	lock(&sched.lock)
+	n := gomaxprocs
+	if n > ncpu {
+		n = ncpu
+	}
+	if n > _MaxGcproc {
+		n = _MaxGcproc
+	}
+	n -= sched.nmidle + 1 // one M is currently running
+	unlock(&sched.lock)
+	return n > 0
+}
+
+func helpgc(nproc int32) {
+	_g_ := getg()
+	lock(&sched.lock)
+	pos := 0
+	for n := int32(1); n < nproc; n++ { // one M is currently running
+		if allp[pos].mcache == _g_.m.mcache {
+			pos++
+		}
+		mp := mget()
+		if mp == nil {
+			throw("gcprocs inconsistency")
+		}
+		mp.helpgc = n
+		mp.p.set(allp[pos])
+		mp.mcache = allp[pos].mcache
+		pos++
+		notewakeup(&mp.park)
+	}
+	unlock(&sched.lock)
+}
+
+// freezeStopWait is a large value that freezetheworld sets
+// sched.stopwait to in order to request that all Gs permanently stop.
+const freezeStopWait = 0x7fffffff
+
+// Similar to stopTheWorld but best-effort and can be called several times.
+// There is no reverse operation, used during crashing.
+// This function must not lock any mutexes.
+func freezetheworld() {
+	// stopwait and preemption requests can be lost
+	// due to races with concurrently executing threads,
+	// so try several times
+	for i := 0; i < 5; i++ {
+		// this should tell the scheduler to not start any new goroutines
+		sched.stopwait = freezeStopWait
+		atomicstore(&sched.gcwaiting, 1)
+		// this should stop running goroutines
+		if !preemptall() {
+			break // no running goroutines
+		}
+		usleep(1000)
+	}
+	// to be sure
+	usleep(1000)
+	preemptall()
+	usleep(1000)
+}
+
+func isscanstatus(status uint32) bool {
+	if status == _Gscan {
+		throw("isscanstatus: Bad status Gscan")
+	}
+	return status&_Gscan == _Gscan
+}
+
+// All reads and writes of g's status go through readgstatus, casgstatus
+// castogscanstatus, casfrom_Gscanstatus.
+//go:nosplit
+func readgstatus(gp *g) uint32 {
+	return atomicload(&gp.atomicstatus)
+}
+
+// Ownership of gscanvalid:
+//
+// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
+// then gp owns gp.gscanvalid, and other goroutines must not modify it.
+//
+// Otherwise, a second goroutine can lock the scan state by setting _Gscan
+// in the status bit and then modify gscanvalid, and then unlock the scan state.
+//
+// Note that the first condition implies an exception to the second:
+// if a second goroutine changes gp's status to _Grunning|_Gscan,
+// that second goroutine still does not have the right to modify gscanvalid.
+
+// The Gscanstatuses are acting like locks and this releases them.
+// If it proves to be a performance hit we should be able to make these
+// simple atomic stores but for now we are going to throw if
+// we see an inconsistent state.
+func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
+	success := false
+
+	// Check that transition is valid.
+	switch oldval {
+	default:
+		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+		dumpgstatus(gp)
+		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
+	case _Gscanrunnable,
+		_Gscanwaiting,
+		_Gscanrunning,
+		_Gscansyscall:
+		if newval == oldval&^_Gscan {
+			success = cas(&gp.atomicstatus, oldval, newval)
+		}
+	case _Gscanenqueue:
+		if newval == _Gwaiting {
+			success = cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	if !success {
+		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+		dumpgstatus(gp)
+		throw("casfrom_Gscanstatus: gp->status is not in scan state")
+	}
+	if newval == _Grunning {
+		gp.gcscanvalid = false
+	}
+}
+
+// This will return false if the gp is not in the expected status and the cas fails.
+// This acts like a lock acquire while the casfromgstatus acts like a lock release.
+func castogscanstatus(gp *g, oldval, newval uint32) bool {
+	switch oldval {
+	case _Grunnable,
+		_Gwaiting,
+		_Gsyscall:
+		if newval == oldval|_Gscan {
+			return cas(&gp.atomicstatus, oldval, newval)
+		}
+	case _Grunning:
+		if newval == _Gscanrunning || newval == _Gscanenqueue {
+			return cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
+	throw("castogscanstatus")
+	panic("not reached")
+}
+
+// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
+// and casfrom_Gscanstatus instead.
+// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
+// put it in the Gscan state is finished.
+//go:nosplit
+func casgstatus(gp *g, oldval, newval uint32) {
+	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
+		systemstack(func() {
+			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
+			throw("casgstatus: bad incoming values")
+		})
+	}
+
+	if oldval == _Grunning && gp.gcscanvalid {
+		// If oldvall == _Grunning, then the actual status must be
+		// _Grunning or _Grunning|_Gscan; either way,
+		// we own gp.gcscanvalid, so it's safe to read.
+		// gp.gcscanvalid must not be true when we are running.
+		print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
+		throw("casgstatus")
+	}
+
+	// loop if gp->atomicstatus is in a scan state giving
+	// GC time to finish and change the state to oldval.
+	for !cas(&gp.atomicstatus, oldval, newval) {
+		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
+			systemstack(func() {
+				throw("casgstatus: waiting for Gwaiting but is Grunnable")
+			})
+		}
+		// Help GC if needed.
+		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
+		// 	gp.preemptscan = false
+		// 	systemstack(func() {
+		// 		gcphasework(gp)
+		// 	})
+		// }
+	}
+	if newval == _Grunning {
+		gp.gcscanvalid = false
+	}
+}
+
+// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
+// Returns old status. Cannot call casgstatus directly, because we are racing with an
+// async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
+// it might have become Grunnable by the time we get to the cas. If we called casgstatus,
+// it would loop waiting for the status to go back to Gwaiting, which it never will.
+//go:nosplit
+func casgcopystack(gp *g) uint32 {
+	for {
+		oldstatus := readgstatus(gp) &^ _Gscan
+		if oldstatus != _Gwaiting && oldstatus != _Grunnable {
+			throw("copystack: bad status, not Gwaiting or Grunnable")
+		}
+		if cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
+			return oldstatus
+		}
+	}
+}
+
+// scang blocks until gp's stack has been scanned.
+// It might be scanned by scang or it might be scanned by the goroutine itself.
+// Either way, the stack scan has completed when scang returns.
+func scang(gp *g) {
+	// Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
+	// Nothing is racing with us now, but gcscandone might be set to true left over
+	// from an earlier round of stack scanning (we scan twice per GC).
+	// We use gcscandone to record whether the scan has been done during this round.
+	// It is important that the scan happens exactly once: if called twice,
+	// the installation of stack barriers will detect the double scan and die.
+
+	gp.gcscandone = false
+
+	// Endeavor to get gcscandone set to true,
+	// either by doing the stack scan ourselves or by coercing gp to scan itself.
+	// gp.gcscandone can transition from false to true when we're not looking
+	// (if we asked for preemption), so any time we lock the status using
+	// castogscanstatus we have to double-check that the scan is still not done.
+	for !gp.gcscandone {
+		switch s := readgstatus(gp); s {
+		default:
+			dumpgstatus(gp)
+			throw("stopg: invalid status")
+
+		case _Gdead:
+			// No stack.
+			gp.gcscandone = true
+
+		case _Gcopystack:
+		// Stack being switched. Go around again.
+
+		case _Grunnable, _Gsyscall, _Gwaiting:
+			// Claim goroutine by setting scan bit.
+			// Racing with execution or readying of gp.
+			// The scan bit keeps them from running
+			// the goroutine until we're done.
+			if castogscanstatus(gp, s, s|_Gscan) {
+				if !gp.gcscandone {
+					// Coordinate with traceback
+					// in sigprof.
+					for !cas(&gp.stackLock, 0, 1) {
+						osyield()
+					}
+					scanstack(gp)
+					atomicstore(&gp.stackLock, 0)
+					gp.gcscandone = true
+				}
+				restartg(gp)
+			}
+
+		case _Gscanwaiting:
+		// newstack is doing a scan for us right now. Wait.
+
+		case _Grunning:
+			// Goroutine running. Try to preempt execution so it can scan itself.
+			// The preemption handler (in newstack) does the actual scan.
+
+			// Optimization: if there is already a pending preemption request
+			// (from the previous loop iteration), don't bother with the atomics.
+			if gp.preemptscan && gp.preempt && gp.stackguard0 == stackPreempt {
+				break
+			}
+
+			// Ask for preemption and self scan.
+			if castogscanstatus(gp, _Grunning, _Gscanrunning) {
+				if !gp.gcscandone {
+					gp.preemptscan = true
+					gp.preempt = true
+					gp.stackguard0 = stackPreempt
+				}
+				casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+			}
+		}
+	}
+
+	gp.preemptscan = false // cancel scan request if no longer needed
+}
+
+// The GC requests that this routine be moved from a scanmumble state to a mumble state.
+func restartg(gp *g) {
+	s := readgstatus(gp)
+	switch s {
+	default:
+		dumpgstatus(gp)
+		throw("restartg: unexpected status")
+
+	case _Gdead:
+	// ok
+
+	case _Gscanrunnable,
+		_Gscanwaiting,
+		_Gscansyscall:
+		casfrom_Gscanstatus(gp, s, s&^_Gscan)
+
+	// Scan is now completed.
+	// Goroutine now needs to be made runnable.
+	// We put it on the global run queue; ready blocks on the global scheduler lock.
+	case _Gscanenqueue:
+		casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting)
+		if gp != getg().m.curg {
+			throw("processing Gscanenqueue on wrong m")
+		}
+		dropg()
+		ready(gp, 0)
+	}
+}
+
+// stopTheWorld stops all P's from executing goroutines, interrupting
+// all goroutines at GC safe points and records reason as the reason
+// for the stop. On return, only the current goroutine's P is running.
+// stopTheWorld must not be called from a system stack and the caller
+// must not hold worldsema. The caller must call startTheWorld when
+// other P's should resume execution.
+//
+// stopTheWorld is safe for multiple goroutines to call at the
+// same time. Each will execute its own stop, and the stops will
+// be serialized.
+//
+// This is also used by routines that do stack dumps. If the system is
+// in panic or being exited, this may not reliably stop all
+// goroutines.
+func stopTheWorld(reason string) {
+	semacquire(&worldsema, false)
+	getg().m.preemptoff = reason
+	systemstack(stopTheWorldWithSema)
+}
+
+// startTheWorld undoes the effects of stopTheWorld.
+func startTheWorld() {
+	systemstack(startTheWorldWithSema)
+	// worldsema must be held over startTheWorldWithSema to ensure
+	// gomaxprocs cannot change while worldsema is held.
+	semrelease(&worldsema)
+	getg().m.preemptoff = ""
+}
+
+// Holding worldsema grants an M the right to try to stop the world
+// and prevents gomaxprocs from changing concurrently.
+var worldsema uint32 = 1
+
+// stopTheWorldWithSema is the core implementation of stopTheWorld.
+// The caller is responsible for acquiring worldsema and disabling
+// preemption first and then should stopTheWorldWithSema on the system
+// stack:
+//
+//	semacquire(&worldsema, false)
+//	m.preemptoff = "reason"
+//	systemstack(stopTheWorldWithSema)
+//
+// When finished, the caller must either call startTheWorld or undo
+// these three operations separately:
+//
+//	m.preemptoff = ""
+//	systemstack(startTheWorldWithSema)
+//	semrelease(&worldsema)
+//
+// It is allowed to acquire worldsema once and then execute multiple
+// startTheWorldWithSema/stopTheWorldWithSema pairs.
+// Other P's are able to execute between successive calls to
+// startTheWorldWithSema and stopTheWorldWithSema.
+// Holding worldsema causes any other goroutines invoking
+// stopTheWorld to block.
+func stopTheWorldWithSema() {
+	_g_ := getg()
+
+	// If we hold a lock, then we won't be able to stop another M
+	// that is blocked trying to acquire the lock.
+	if _g_.m.locks > 0 {
+		throw("stopTheWorld: holding locks")
+	}
+
+	lock(&sched.lock)
+	sched.stopwait = gomaxprocs
+	atomicstore(&sched.gcwaiting, 1)
+	preemptall()
+	// stop current P
+	_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
+	sched.stopwait--
+	// try to retake all P's in Psyscall status
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		s := p.status
+		if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
+			if trace.enabled {
+				traceGoSysBlock(p)
+				traceProcStop(p)
+			}
+			p.syscalltick++
+			sched.stopwait--
+		}
+	}
+	// stop idle P's
+	for {
+		p := pidleget()
+		if p == nil {
+			break
+		}
+		p.status = _Pgcstop
+		sched.stopwait--
+	}
+	wait := sched.stopwait > 0
+	unlock(&sched.lock)
+
+	// wait for remaining P's to stop voluntarily
+	if wait {
+		for {
+			// wait for 100us, then try to re-preempt in case of any races
+			if notetsleep(&sched.stopnote, 100*1000) {
+				noteclear(&sched.stopnote)
+				break
+			}
+			preemptall()
+		}
+	}
+	if sched.stopwait != 0 {
+		throw("stopTheWorld: not stopped")
+	}
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		if p.status != _Pgcstop {
+			throw("stopTheWorld: not stopped")
+		}
+	}
+}
+
+func mhelpgc() {
+	_g_ := getg()
+	_g_.m.helpgc = -1
+}
+
+func startTheWorldWithSema() {
+	_g_ := getg()
+
+	_g_.m.locks++        // disable preemption because it can be holding p in a local var
+	gp := netpoll(false) // non-blocking
+	injectglist(gp)
+	add := needaddgcproc()
+	lock(&sched.lock)
+
+	procs := gomaxprocs
+	if newprocs != 0 {
+		procs = newprocs
+		newprocs = 0
+	}
+	p1 := procresize(procs)
+	sched.gcwaiting = 0
+	if sched.sysmonwait != 0 {
+		sched.sysmonwait = 0
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+
+	for p1 != nil {
+		p := p1
+		p1 = p1.link.ptr()
+		if p.m != 0 {
+			mp := p.m.ptr()
+			p.m = 0
+			if mp.nextp != 0 {
+				throw("startTheWorld: inconsistent mp->nextp")
+			}
+			mp.nextp.set(p)
+			notewakeup(&mp.park)
+		} else {
+			// Start M to run P.  Do not start another M below.
+			newm(nil, p)
+			add = false
+		}
+	}
+
+	// Wakeup an additional proc in case we have excessive runnable goroutines
+	// in local queues or in the global queue. If we don't, the proc will park itself.
+	// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
+	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 {
+		wakep()
+	}
+
+	if add {
+		// If GC could have used another helper proc, start one now,
+		// in the hope that it will be available next time.
+		// It would have been even better to start it before the collection,
+		// but doing so requires allocating memory, so it's tricky to
+		// coordinate.  This lazy approach works out in practice:
+		// we don't mind if the first couple gc rounds don't have quite
+		// the maximum number of procs.
+		newm(mhelpgc, nil)
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+}
+
+// Called to start an M.
+//go:nosplit
+func mstart() {
+	_g_ := getg()
+
+	if _g_.stack.lo == 0 {
+		// Initialize stack bounds from system stack.
+		// Cgo may have left stack size in stack.hi.
+		size := _g_.stack.hi
+		if size == 0 {
+			size = 8192 * stackGuardMultiplier
+		}
+		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
+		_g_.stack.lo = _g_.stack.hi - size + 1024
+	}
+	// Initialize stack guards so that we can start calling
+	// both Go and C functions with stack growth prologues.
+	_g_.stackguard0 = _g_.stack.lo + _StackGuard
+	_g_.stackguard1 = _g_.stackguard0
+	mstart1()
+}
+
+func mstart1() {
+	_g_ := getg()
+
+	if _g_ != _g_.m.g0 {
+		throw("bad runtime·mstart")
+	}
+
+	// Record top of stack for use by mcall.
+	// Once we call schedule we're never coming back,
+	// so other calls can reuse this stack space.
+	gosave(&_g_.m.g0.sched)
+	_g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
+	asminit()
+	minit()
+
+	// Install signal handlers; after minit so that minit can
+	// prepare the thread to be able to handle the signals.
+	if _g_.m == &m0 {
+		// Create an extra M for callbacks on threads not created by Go.
+		if iscgo && !cgoHasExtraM {
+			cgoHasExtraM = true
+			newextram()
+		}
+		initsig()
+	}
+
+	if fn := _g_.m.mstartfn; fn != nil {
+		fn()
+	}
+
+	if _g_.m.helpgc != 0 {
+		_g_.m.helpgc = 0
+		stopm()
+	} else if _g_.m != &m0 {
+		acquirep(_g_.m.nextp.ptr())
+		_g_.m.nextp = 0
+	}
+	schedule()
+}
+
+// forEachP calls fn(p) for every P p when p reaches a GC safe point.
+// If a P is currently executing code, this will bring the P to a GC
+// safe point and execute fn on that P. If the P is not executing code
+// (it is idle or in a syscall), this will call fn(p) directly while
+// preventing the P from exiting its state. This does not ensure that
+// fn will run on every CPU executing Go code, but it acts as a global
+// memory barrier. GC uses this as a "ragged barrier."
+//
+// The caller must hold worldsema.
+func forEachP(fn func(*p)) {
+	mp := acquirem()
+	_p_ := getg().m.p.ptr()
+
+	lock(&sched.lock)
+	if sched.safePointWait != 0 {
+		throw("forEachP: sched.safePointWait != 0")
+	}
+	sched.safePointWait = gomaxprocs - 1
+	sched.safePointFn = fn
+
+	// Ask all Ps to run the safe point function.
+	for _, p := range allp[:gomaxprocs] {
+		if p != _p_ {
+			atomicstore(&p.runSafePointFn, 1)
+		}
+	}
+	preemptall()
+
+	// Any P entering _Pidle or _Psyscall from now on will observe
+	// p.runSafePointFn == 1 and will call runSafePointFn when
+	// changing its status to _Pidle/_Psyscall.
+
+	// Run safe point function for all idle Ps. sched.pidle will
+	// not change because we hold sched.lock.
+	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
+		if cas(&p.runSafePointFn, 1, 0) {
+			fn(p)
+			sched.safePointWait--
+		}
+	}
+
+	wait := sched.safePointWait > 0
+	unlock(&sched.lock)
+
+	// Run fn for the current P.
+	fn(_p_)
+
+	// Force Ps currently in _Psyscall into _Pidle and hand them
+	// off to induce safe point function execution.
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		s := p.status
+		if s == _Psyscall && p.runSafePointFn == 1 && cas(&p.status, s, _Pidle) {
+			if trace.enabled {
+				traceGoSysBlock(p)
+				traceProcStop(p)
+			}
+			p.syscalltick++
+			handoffp(p)
+		}
+	}
+
+	// Wait for remaining Ps to run fn.
+	if wait {
+		for {
+			// Wait for 100us, then try to re-preempt in
+			// case of any races.
+			if notetsleep(&sched.safePointNote, 100*1000) {
+				noteclear(&sched.safePointNote)
+				break
+			}
+			preemptall()
+		}
+	}
+	if sched.safePointWait != 0 {
+		throw("forEachP: not done")
+	}
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		if p.runSafePointFn != 0 {
+			throw("forEachP: P did not run fn")
+		}
+	}
+
+	lock(&sched.lock)
+	sched.safePointFn = nil
+	unlock(&sched.lock)
+	releasem(mp)
+}
+
+// runSafePointFn runs the safe point function, if any, for this P.
+// This should be called like
+//
+//     if getg().m.p.runSafePointFn != 0 {
+//         runSafePointFn()
+//     }
+//
+// runSafePointFn must be checked on any transition in to _Pidle or
+// _Psyscall to avoid a race where forEachP sees that the P is running
+// just before the P goes into _Pidle/_Psyscall and neither forEachP
+// nor the P run the safe-point function.
+func runSafePointFn() {
+	p := getg().m.p.ptr()
+	// Resolve the race between forEachP running the safe-point
+	// function on this P's behalf and this P running the
+	// safe-point function directly.
+	if !cas(&p.runSafePointFn, 1, 0) {
+		return
+	}
+	sched.safePointFn(p)
+	lock(&sched.lock)
+	sched.safePointWait--
+	if sched.safePointWait == 0 {
+		notewakeup(&sched.safePointNote)
+	}
+	unlock(&sched.lock)
+}
+
+// When running with cgo, we call _cgo_thread_start
+// to start threads for us so that we can play nicely with
+// foreign code.
+var cgoThreadStart unsafe.Pointer
+
+type cgothreadstart struct {
+	g   guintptr
+	tls *uint64
+	fn  unsafe.Pointer
+}
+
+// Allocate a new m unassociated with any thread.
+// Can use p for allocation context if needed.
+// fn is recorded as the new m's m.mstartfn.
+func allocm(_p_ *p, fn func()) *m {
+	_g_ := getg()
+	_g_.m.locks++ // disable GC because it can be called from sysmon
+	if _g_.m.p == 0 {
+		acquirep(_p_) // temporarily borrow p for mallocs in this function
+	}
+	mp := new(m)
+	mp.mstartfn = fn
+	mcommoninit(mp)
+
+	// In case of cgo or Solaris, pthread_create will make us a stack.
+	// Windows and Plan 9 will layout sched stack on OS stack.
+	if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" {
+		mp.g0 = malg(-1)
+	} else {
+		mp.g0 = malg(8192 * stackGuardMultiplier)
+	}
+	mp.g0.m = mp
+
+	if _p_ == _g_.m.p.ptr() {
+		releasep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+
+	return mp
+}
+
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+//go:nosplit
+func needm(x byte) {
+	if iscgo && !cgoHasExtraM {
+		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can not throw, because scheduler is not initialized yet.
+		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
+		exit(1)
+	}
+
+	// Lock extra list, take head, unlock popped list.
+	// nilokay=false is safe here because of the invariant above,
+	// that the extra list always contains or will soon contain
+	// at least one m.
+	mp := lockextra(false)
+
+	// Set needextram when we've just emptied the list,
+	// so that the eventual call into cgocallbackg will
+	// allocate a new m for the extra list. We delay the
+	// allocation until then so that it can be done
+	// after exitsyscall makes sure it is okay to be
+	// running at all (that is, there's no garbage collection
+	// running right now).
+	mp.needextram = mp.schedlink == 0
+	unlockextra(mp.schedlink.ptr())
+
+	// Install g (= m->g0) and set the stack bounds
+	// to match the current stack. We don't actually know
+	// how big the stack is, like we don't know how big any
+	// scheduling stack is, but we assume there's at least 32 kB,
+	// which is more than enough for us.
+	setg(mp.g0)
+	_g_ := getg()
+	_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
+	_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
+	_g_.stackguard0 = _g_.stack.lo + _StackGuard
+
+	msigsave(mp)
+	// Initialize this thread to use the m.
+	asminit()
+	minit()
+}
+
+var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
+
+// newextram allocates an m and puts it on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+func newextram() {
+	// Create extra goroutine locked to extra m.
+	// The goroutine is the context in which the cgo callback will run.
+	// The sched.pc will never be returned to, but setting it to
+	// goexit makes clear to the traceback routines where
+	// the goroutine stack ends.
+	mp := allocm(nil, nil)
+	gp := malg(4096)
+	gp.sched.pc = funcPC(goexit) + _PCQuantum
+	gp.sched.sp = gp.stack.hi
+	gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame
+	gp.sched.lr = 0
+	gp.sched.g = guintptr(unsafe.Pointer(gp))
+	gp.syscallpc = gp.sched.pc
+	gp.syscallsp = gp.sched.sp
+	gp.stktopsp = gp.sched.sp
+	// malg returns status as Gidle, change to Gsyscall before adding to allg
+	// where GC will see it.
+	casgstatus(gp, _Gidle, _Gsyscall)
+	gp.m = mp
+	mp.curg = gp
+	mp.locked = _LockInternal
+	mp.lockedg = gp
+	gp.lockedm = mp
+	gp.goid = int64(xadd64(&sched.goidgen, 1))
+	if raceenabled {
+		gp.racectx = racegostart(funcPC(newextram))
+	}
+	// put on allg for garbage collector
+	allgadd(gp)
+
+	// Add m to the extra list.
+	mnext := lockextra(true)
+	mp.schedlink.set(mnext)
+	unlockextra(mp)
+}
+
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+func dropm() {
+	// Undo whatever initialization minit did during needm.
+	unminit()
+
+	// Clear m and g, and return m to the extra list.
+	// After the call to setg we can only call nosplit functions
+	// with no pointer manipulation.
+	mp := getg().m
+	mnext := lockextra(true)
+	mp.schedlink.set(mnext)
+
+	setg(nil)
+	unlockextra(mp)
+}
+
+var extram uintptr
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+//go:nosplit
+func lockextra(nilokay bool) *m {
+	const locked = 1
+
+	for {
+		old := atomicloaduintptr(&extram)
+		if old == locked {
+			yield := osyield
+			yield()
+			continue
+		}
+		if old == 0 && !nilokay {
+			usleep(1)
+			continue
+		}
+		if casuintptr(&extram, old, locked) {
+			return (*m)(unsafe.Pointer(old))
+		}
+		yield := osyield
+		yield()
+		continue
+	}
+}
+
+//go:nosplit
+func unlockextra(mp *m) {
+	atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp)))
+}
+
+// Create a new m.  It will start off with a call to fn, or else the scheduler.
+// fn needs to be static and not a heap allocated closure.
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newm(fn func(), _p_ *p) {
+	mp := allocm(_p_, fn)
+	mp.nextp.set(_p_)
+	msigsave(mp)
+	if iscgo {
+		var ts cgothreadstart
+		if _cgo_thread_start == nil {
+			throw("_cgo_thread_start missing")
+		}
+		ts.g.set(mp.g0)
+		ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
+		ts.fn = unsafe.Pointer(funcPC(mstart))
+		asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
+		return
+	}
+	newosproc(mp, unsafe.Pointer(mp.g0.stack.hi))
+}
+
+// Stops execution of the current m until new work is available.
+// Returns with acquired P.
+func stopm() {
+	_g_ := getg()
+
+	if _g_.m.locks != 0 {
+		throw("stopm holding locks")
+	}
+	if _g_.m.p != 0 {
+		throw("stopm holding p")
+	}
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+
+retry:
+	lock(&sched.lock)
+	mput(_g_.m)
+	unlock(&sched.lock)
+	notesleep(&_g_.m.park)
+	noteclear(&_g_.m.park)
+	if _g_.m.helpgc != 0 {
+		gchelper()
+		_g_.m.helpgc = 0
+		_g_.m.mcache = nil
+		_g_.m.p = 0
+		goto retry
+	}
+	acquirep(_g_.m.nextp.ptr())
+	_g_.m.nextp = 0
+}
+
+func mspinning() {
+	gp := getg()
+	if !runqempty(gp.m.nextp.ptr()) {
+		// Something (presumably the GC) was readied while the
+		// runtime was starting up this M, so the M is no
+		// longer spinning.
+		if int32(xadd(&sched.nmspinning, -1)) < 0 {
+			throw("mspinning: nmspinning underflowed")
+		}
+	} else {
+		gp.m.spinning = true
+	}
+}
+
+// Schedules some M to run the p (creates an M if necessary).
+// If p==nil, tries to get an idle P, if no idle P's does nothing.
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func startm(_p_ *p, spinning bool) {
+	lock(&sched.lock)
+	if _p_ == nil {
+		_p_ = pidleget()
+		if _p_ == nil {
+			unlock(&sched.lock)
+			if spinning {
+				xadd(&sched.nmspinning, -1)
+			}
+			return
+		}
+	}
+	mp := mget()
+	unlock(&sched.lock)
+	if mp == nil {
+		var fn func()
+		if spinning {
+			fn = mspinning
+		}
+		newm(fn, _p_)
+		return
+	}
+	if mp.spinning {
+		throw("startm: m is spinning")
+	}
+	if mp.nextp != 0 {
+		throw("startm: m has p")
+	}
+	if spinning && !runqempty(_p_) {
+		throw("startm: p has runnable gs")
+	}
+	mp.spinning = spinning
+	mp.nextp.set(_p_)
+	notewakeup(&mp.park)
+}
+
+// Hands off P from syscall or locked M.
+// Always runs without a P, so write barriers are not allowed.
+//go:nowritebarrier
+func handoffp(_p_ *p) {
+	// if it has local work, start it straight away
+	if !runqempty(_p_) || sched.runqsize != 0 {
+		startm(_p_, false)
+		return
+	}
+	// no local work, check that there are no spinning/idle M's,
+	// otherwise our help is not required
+	if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
+		startm(_p_, true)
+		return
+	}
+	lock(&sched.lock)
+	if sched.gcwaiting != 0 {
+		_p_.status = _Pgcstop
+		sched.stopwait--
+		if sched.stopwait == 0 {
+			notewakeup(&sched.stopnote)
+		}
+		unlock(&sched.lock)
+		return
+	}
+	if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) {
+		sched.safePointFn(_p_)
+		sched.safePointWait--
+		if sched.safePointWait == 0 {
+			notewakeup(&sched.safePointNote)
+		}
+	}
+	if sched.runqsize != 0 {
+		unlock(&sched.lock)
+		startm(_p_, false)
+		return
+	}
+	// If this is the last running P and nobody is polling network,
+	// need to wakeup another M to poll network.
+	if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 {
+		unlock(&sched.lock)
+		startm(_p_, false)
+		return
+	}
+	pidleput(_p_)
+	unlock(&sched.lock)
+}
+
+// Tries to add one more P to execute G's.
+// Called when a G is made runnable (newproc, ready).
+func wakep() {
+	// be conservative about spinning threads
+	if !cas(&sched.nmspinning, 0, 1) {
+		return
+	}
+	startm(nil, true)
+}
+
+// Stops execution of the current m that is locked to a g until the g is runnable again.
+// Returns with acquired P.
+func stoplockedm() {
+	_g_ := getg()
+
+	if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
+		throw("stoplockedm: inconsistent locking")
+	}
+	if _g_.m.p != 0 {
+		// Schedule another M to run this p.
+		_p_ := releasep()
+		handoffp(_p_)
+	}
+	incidlelocked(1)
+	// Wait until another thread schedules lockedg again.
+	notesleep(&_g_.m.park)
+	noteclear(&_g_.m.park)
+	status := readgstatus(_g_.m.lockedg)
+	if status&^_Gscan != _Grunnable {
+		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
+		dumpgstatus(_g_)
+		throw("stoplockedm: not runnable")
+	}
+	acquirep(_g_.m.nextp.ptr())
+	_g_.m.nextp = 0
+}
+
+// Schedules the locked m to run the locked gp.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func startlockedm(gp *g) {
+	_g_ := getg()
+
+	mp := gp.lockedm
+	if mp == _g_.m {
+		throw("startlockedm: locked to me")
+	}
+	if mp.nextp != 0 {
+		throw("startlockedm: m has p")
+	}
+	// directly handoff current P to the locked m
+	incidlelocked(-1)
+	_p_ := releasep()
+	mp.nextp.set(_p_)
+	notewakeup(&mp.park)
+	stopm()
+}
+
+// Stops the current m for stopTheWorld.
+// Returns when the world is restarted.
+func gcstopm() {
+	_g_ := getg()
+
+	if sched.gcwaiting == 0 {
+		throw("gcstopm: not waiting for gc")
+	}
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+	_p_ := releasep()
+	lock(&sched.lock)
+	_p_.status = _Pgcstop
+	sched.stopwait--
+	if sched.stopwait == 0 {
+		notewakeup(&sched.stopnote)
+	}
+	unlock(&sched.lock)
+	stopm()
+}
+
+// Schedules gp to run on the current M.
+// If inheritTime is true, gp inherits the remaining time in the
+// current time slice. Otherwise, it starts a new time slice.
+// Never returns.
+func execute(gp *g, inheritTime bool) {
+	_g_ := getg()
+
+	casgstatus(gp, _Grunnable, _Grunning)
+	gp.waitsince = 0
+	gp.preempt = false
+	gp.stackguard0 = gp.stack.lo + _StackGuard
+	if !inheritTime {
+		_g_.m.p.ptr().schedtick++
+	}
+	_g_.m.curg = gp
+	gp.m = _g_.m
+
+	// Check whether the profiler needs to be turned on or off.
+	hz := sched.profilehz
+	if _g_.m.profilehz != hz {
+		resetcpuprofiler(hz)
+	}
+
+	if trace.enabled {
+		// GoSysExit has to happen when we have a P, but before GoStart.
+		// So we emit it here.
+		if gp.syscallsp != 0 && gp.sysblocktraced {
+			// Since gp.sysblocktraced is true, we must emit an event.
+			// There is a race between the code that initializes sysexitseq
+			// and sysexitticks (in exitsyscall, which runs without a P,
+			// and therefore is not stopped with the rest of the world)
+			// and the code that initializes a new trace.
+			// The recorded sysexitseq and sysexitticks must therefore
+			// be treated as "best effort". If they are valid for this trace,
+			// then great, use them for greater accuracy.
+			// But if they're not valid for this trace, assume that the
+			// trace was started after the actual syscall exit (but before
+			// we actually managed to start the goroutine, aka right now),
+			// and assign a fresh time stamp to keep the log consistent.
+			seq, ts := gp.sysexitseq, gp.sysexitticks
+			if seq == 0 || int64(seq)-int64(trace.seqStart) < 0 {
+				seq, ts = tracestamp()
+			}
+			traceGoSysExit(seq, ts)
+		}
+		traceGoStart()
+	}
+
+	gogo(&gp.sched)
+}
+
+// Finds a runnable goroutine to execute.
+// Tries to steal from other P's, get g from global queue, poll network.
+func findrunnable() (gp *g, inheritTime bool) {
+	_g_ := getg()
+
+top:
+	if sched.gcwaiting != 0 {
+		gcstopm()
+		goto top
+	}
+	if _g_.m.p.ptr().runSafePointFn != 0 {
+		runSafePointFn()
+	}
+	if fingwait && fingwake {
+		if gp := wakefing(); gp != nil {
+			ready(gp, 0)
+		}
+	}
+
+	// local runq
+	if gp, inheritTime := runqget(_g_.m.p.ptr()); gp != nil {
+		return gp, inheritTime
+	}
+
+	// global runq
+	if sched.runqsize != 0 {
+		lock(&sched.lock)
+		gp := globrunqget(_g_.m.p.ptr(), 0)
+		unlock(&sched.lock)
+		if gp != nil {
+			return gp, false
+		}
+	}
+
+	// Poll network.
+	// This netpoll is only an optimization before we resort to stealing.
+	// We can safely skip it if there a thread blocked in netpoll already.
+	// If there is any kind of logical race with that blocked thread
+	// (e.g. it has already returned from netpoll, but does not set lastpoll yet),
+	// this thread will do blocking netpoll below anyway.
+	if netpollinited() && sched.lastpoll != 0 {
+		if gp := netpoll(false); gp != nil { // non-blocking
+			// netpoll returns list of goroutines linked by schedlink.
+			injectglist(gp.schedlink.ptr())
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			if trace.enabled {
+				traceGoUnpark(gp, 0)
+			}
+			return gp, false
+		}
+	}
+
+	// If number of spinning M's >= number of busy P's, block.
+	// This is necessary to prevent excessive CPU consumption
+	// when GOMAXPROCS>>1 but the program parallelism is low.
+	if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic
+		goto stop
+	}
+	if !_g_.m.spinning {
+		_g_.m.spinning = true
+		xadd(&sched.nmspinning, 1)
+	}
+	// random steal from other P's
+	for i := 0; i < int(4*gomaxprocs); i++ {
+		if sched.gcwaiting != 0 {
+			goto top
+		}
+		_p_ := allp[fastrand1()%uint32(gomaxprocs)]
+		var gp *g
+		if _p_ == _g_.m.p.ptr() {
+			gp, _ = runqget(_p_)
+		} else {
+			stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g
+			gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG)
+		}
+		if gp != nil {
+			return gp, false
+		}
+	}
+
+stop:
+
+	// We have nothing to do. If we're in the GC mark phase and can
+	// safely scan and blacken objects, run idle-time marking
+	// rather than give up the P.
+	if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil && gcMarkWorkAvailable(_p_) {
+		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
+		gp := _p_.gcBgMarkWorker
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		if trace.enabled {
+			traceGoUnpark(gp, 0)
+		}
+		return gp, false
+	}
+
+	// return P and block
+	lock(&sched.lock)
+	if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 {
+		unlock(&sched.lock)
+		goto top
+	}
+	if sched.runqsize != 0 {
+		gp := globrunqget(_g_.m.p.ptr(), 0)
+		unlock(&sched.lock)
+		return gp, false
+	}
+	_p_ := releasep()
+	pidleput(_p_)
+	unlock(&sched.lock)
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+
+	// check all runqueues once again
+	for i := 0; i < int(gomaxprocs); i++ {
+		_p_ := allp[i]
+		if _p_ != nil && !runqempty(_p_) {
+			lock(&sched.lock)
+			_p_ = pidleget()
+			unlock(&sched.lock)
+			if _p_ != nil {
+				acquirep(_p_)
+				goto top
+			}
+			break
+		}
+	}
+
+	// poll network
+	if netpollinited() && xchg64(&sched.lastpoll, 0) != 0 {
+		if _g_.m.p != 0 {
+			throw("findrunnable: netpoll with p")
+		}
+		if _g_.m.spinning {
+			throw("findrunnable: netpoll with spinning")
+		}
+		gp := netpoll(true) // block until new work is available
+		atomicstore64(&sched.lastpoll, uint64(nanotime()))
+		if gp != nil {
+			lock(&sched.lock)
+			_p_ = pidleget()
+			unlock(&sched.lock)
+			if _p_ != nil {
+				acquirep(_p_)
+				injectglist(gp.schedlink.ptr())
+				casgstatus(gp, _Gwaiting, _Grunnable)
+				if trace.enabled {
+					traceGoUnpark(gp, 0)
+				}
+				return gp, false
+			}
+			injectglist(gp)
+		}
+	}
+	stopm()
+	goto top
+}
+
+func resetspinning() {
+	_g_ := getg()
+
+	var nmspinning uint32
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		nmspinning = xadd(&sched.nmspinning, -1)
+		if int32(nmspinning) < 0 {
+			throw("findrunnable: negative nmspinning")
+		}
+	} else {
+		nmspinning = atomicload(&sched.nmspinning)
+	}
+
+	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+	// so see if we need to wakeup another P here.
+	if nmspinning == 0 && atomicload(&sched.npidle) > 0 {
+		wakep()
+	}
+}
+
+// Injects the list of runnable G's into the scheduler.
+// Can run concurrently with GC.
+func injectglist(glist *g) {
+	if glist == nil {
+		return
+	}
+	if trace.enabled {
+		for gp := glist; gp != nil; gp = gp.schedlink.ptr() {
+			traceGoUnpark(gp, 0)
+		}
+	}
+	lock(&sched.lock)
+	var n int
+	for n = 0; glist != nil; n++ {
+		gp := glist
+		glist = gp.schedlink.ptr()
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		globrunqput(gp)
+	}
+	unlock(&sched.lock)
+	for ; n != 0 && sched.npidle != 0; n-- {
+		startm(nil, false)
+	}
+}
+
+// One round of scheduler: find a runnable goroutine and execute it.
+// Never returns.
+func schedule() {
+	_g_ := getg()
+
+	if _g_.m.locks != 0 {
+		throw("schedule: holding locks")
+	}
+
+	if _g_.m.lockedg != nil {
+		stoplockedm()
+		execute(_g_.m.lockedg, false) // Never returns.
+	}
+
+top:
+	if sched.gcwaiting != 0 {
+		gcstopm()
+		goto top
+	}
+	if _g_.m.p.ptr().runSafePointFn != 0 {
+		runSafePointFn()
+	}
+
+	var gp *g
+	var inheritTime bool
+	if trace.enabled || trace.shutdown {
+		gp = traceReader()
+		if gp != nil {
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			traceGoUnpark(gp, 0)
+			resetspinning()
+		}
+	}
+	if gp == nil && gcBlackenEnabled != 0 {
+		gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
+		if gp != nil {
+			resetspinning()
+		}
+	}
+	if gp == nil {
+		// Check the global runnable queue once in a while to ensure fairness.
+		// Otherwise two goroutines can completely occupy the local runqueue
+		// by constantly respawning each other.
+		if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
+			lock(&sched.lock)
+			gp = globrunqget(_g_.m.p.ptr(), 1)
+			unlock(&sched.lock)
+			if gp != nil {
+				resetspinning()
+			}
+		}
+	}
+	if gp == nil {
+		gp, inheritTime = runqget(_g_.m.p.ptr())
+		if gp != nil && _g_.m.spinning {
+			throw("schedule: spinning with local work")
+		}
+	}
+	if gp == nil {
+		gp, inheritTime = findrunnable() // blocks until work is available
+		resetspinning()
+	}
+
+	if gp.lockedm != nil {
+		// Hands off own p to the locked m,
+		// then blocks waiting for a new p.
+		startlockedm(gp)
+		goto top
+	}
+
+	execute(gp, inheritTime)
+}
+
+// dropg removes the association between m and the current goroutine m->curg (gp for short).
+// Typically a caller sets gp's status away from Grunning and then
+// immediately calls dropg to finish the job. The caller is also responsible
+// for arranging that gp will be restarted using ready at an
+// appropriate time. After calling dropg and arranging for gp to be
+// readied later, the caller can do other work but eventually should
+// call schedule to restart the scheduling of goroutines on this m.
+func dropg() {
+	_g_ := getg()
+
+	if _g_.m.lockedg == nil {
+		_g_.m.curg.m = nil
+		_g_.m.curg = nil
+	}
+}
+
+func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
+	unlock((*mutex)(lock))
+	return true
+}
+
+// park continuation on g0.
+func park_m(gp *g) {
+	_g_ := getg()
+
+	if trace.enabled {
+		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip, gp)
+	}
+
+	casgstatus(gp, _Grunning, _Gwaiting)
+	dropg()
+
+	if _g_.m.waitunlockf != nil {
+		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
+		ok := fn(gp, _g_.m.waitlock)
+		_g_.m.waitunlockf = nil
+		_g_.m.waitlock = nil
+		if !ok {
+			if trace.enabled {
+				traceGoUnpark(gp, 2)
+			}
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			execute(gp, true) // Schedule it back, never returns.
+		}
+	}
+	schedule()
+}
+
+func goschedImpl(gp *g) {
+	status := readgstatus(gp)
+	if status&^_Gscan != _Grunning {
+		dumpgstatus(gp)
+		throw("bad g status")
+	}
+	casgstatus(gp, _Grunning, _Grunnable)
+	dropg()
+	lock(&sched.lock)
+	globrunqput(gp)
+	unlock(&sched.lock)
+
+	schedule()
+}
+
+// Gosched continuation on g0.
+func gosched_m(gp *g) {
+	if trace.enabled {
+		traceGoSched()
+	}
+	goschedImpl(gp)
+}
+
+func gopreempt_m(gp *g) {
+	if trace.enabled {
+		traceGoPreempt()
+	}
+	goschedImpl(gp)
+}
+
+// Finishes execution of the current goroutine.
+func goexit1() {
+	if raceenabled {
+		racegoend()
+	}
+	if trace.enabled {
+		traceGoEnd()
+	}
+	mcall(goexit0)
+}
+
+// goexit continuation on g0.
+func goexit0(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Grunning, _Gdead)
+	gp.m = nil
+	gp.lockedm = nil
+	_g_.m.lockedg = nil
+	gp.paniconfault = false
+	gp._defer = nil // should be true already but just in case.
+	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
+	gp.writebuf = nil
+	gp.waitreason = ""
+	gp.param = nil
+
+	dropg()
+
+	if _g_.m.locked&^_LockExternal != 0 {
+		print("invalid m->locked = ", _g_.m.locked, "\n")
+		throw("internal lockOSThread error")
+	}
+	_g_.m.locked = 0
+	gfput(_g_.m.p.ptr(), gp)
+	schedule()
+}
+
+//go:nosplit
+//go:nowritebarrier
+func save(pc, sp uintptr) {
+	_g_ := getg()
+
+	_g_.sched.pc = pc
+	_g_.sched.sp = sp
+	_g_.sched.lr = 0
+	_g_.sched.ret = 0
+	_g_.sched.ctxt = nil
+	_g_.sched.g = guintptr(unsafe.Pointer(_g_))
+}
+
+// The goroutine g is about to enter a system call.
+// Record that it's not using the cpu anymore.
+// This is called only from the go syscall library and cgocall,
+// not from the low-level system calls used by the runtime.
+//
+// Entersyscall cannot split the stack: the gosave must
+// make g->sched refer to the caller's stack segment, because
+// entersyscall is going to return immediately after.
+//
+// Nothing entersyscall calls can split the stack either.
+// We cannot safely move the stack during an active call to syscall,
+// because we do not know which of the uintptr arguments are
+// really pointers (back into the stack).
+// In practice, this means that we make the fast path run through
+// entersyscall doing no-split things, and the slow path has to use systemstack
+// to run bigger things on the system stack.
+//
+// reentersyscall is the entry point used by cgo callbacks, where explicitly
+// saved SP and PC are restored. This is needed when exitsyscall will be called
+// from a function further up in the call stack than the parent, as g->syscallsp
+// must always point to a valid stack frame. entersyscall below is the normal
+// entry point for syscalls, which obtains the SP and PC from the caller.
+//
+// Syscall tracing:
+// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
+// If the syscall does not block, that is it, we do not emit any other events.
+// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
+// when syscall returns we emit traceGoSysExit and when the goroutine starts running
+// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
+// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
+// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
+// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
+// and we wait for the increment before emitting traceGoSysExit.
+// Note that the increment is done even if tracing is not enabled,
+// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
+//
+//go:nosplit
+func reentersyscall(pc, sp uintptr) {
+	_g_ := getg()
+
+	// Disable preemption because during this function g is in Gsyscall status,
+	// but can have inconsistent g->sched, do not let GC observe it.
+	_g_.m.locks++
+
+	// Entersyscall must not call any function that might split/grow the stack.
+	// (See details in comment above.)
+	// Catch calls that might, by replacing the stack guard with something that
+	// will trip any stack check and leaving a flag to tell newstack to die.
+	_g_.stackguard0 = stackPreempt
+	_g_.throwsplit = true
+
+	// Leave SP around for GC and traceback.
+	save(pc, sp)
+	_g_.syscallsp = sp
+	_g_.syscallpc = pc
+	casgstatus(_g_, _Grunning, _Gsyscall)
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		systemstack(func() {
+			print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			throw("entersyscall")
+		})
+	}
+
+	if trace.enabled {
+		systemstack(traceGoSysCall)
+		// systemstack itself clobbers g.sched.{pc,sp} and we might
+		// need them later when the G is genuinely blocked in a
+		// syscall
+		save(pc, sp)
+	}
+
+	if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
+		systemstack(entersyscall_sysmon)
+		save(pc, sp)
+	}
+
+	if _g_.m.p.ptr().runSafePointFn != 0 {
+		// runSafePointFn may stack split if run on this stack
+		systemstack(runSafePointFn)
+		save(pc, sp)
+	}
+
+	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
+	_g_.sysblocktraced = true
+	_g_.m.mcache = nil
+	_g_.m.p.ptr().m = 0
+	atomicstore(&_g_.m.p.ptr().status, _Psyscall)
+	if sched.gcwaiting != 0 {
+		systemstack(entersyscall_gcwait)
+		save(pc, sp)
+	}
+
+	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
+	// We set _StackGuard to StackPreempt so that first split stack check calls morestack.
+	// Morestack detects this case and throws.
+	_g_.stackguard0 = stackPreempt
+	_g_.m.locks--
+}
+
+// Standard syscall entry used by the go syscall library and normal cgo calls.
+//go:nosplit
+func entersyscall(dummy int32) {
+	reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+}
+
+func entersyscall_sysmon() {
+	lock(&sched.lock)
+	if atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+}
+
+func entersyscall_gcwait() {
+	_g_ := getg()
+	_p_ := _g_.m.p.ptr()
+
+	lock(&sched.lock)
+	if sched.stopwait > 0 && cas(&_p_.status, _Psyscall, _Pgcstop) {
+		if trace.enabled {
+			traceGoSysBlock(_p_)
+			traceProcStop(_p_)
+		}
+		_p_.syscalltick++
+		if sched.stopwait--; sched.stopwait == 0 {
+			notewakeup(&sched.stopnote)
+		}
+	}
+	unlock(&sched.lock)
+}
+
+// The same as entersyscall(), but with a hint that the syscall is blocking.
+//go:nosplit
+func entersyscallblock(dummy int32) {
+	_g_ := getg()
+
+	_g_.m.locks++ // see comment in entersyscall
+	_g_.throwsplit = true
+	_g_.stackguard0 = stackPreempt // see comment in entersyscall
+	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
+	_g_.sysblocktraced = true
+	_g_.m.p.ptr().syscalltick++
+
+	// Leave SP around for GC and traceback.
+	pc := getcallerpc(unsafe.Pointer(&dummy))
+	sp := getcallersp(unsafe.Pointer(&dummy))
+	save(pc, sp)
+	_g_.syscallsp = _g_.sched.sp
+	_g_.syscallpc = _g_.sched.pc
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		sp1 := sp
+		sp2 := _g_.sched.sp
+		sp3 := _g_.syscallsp
+		systemstack(func() {
+			print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			throw("entersyscallblock")
+		})
+	}
+	casgstatus(_g_, _Grunning, _Gsyscall)
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		systemstack(func() {
+			print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			throw("entersyscallblock")
+		})
+	}
+
+	systemstack(entersyscallblock_handoff)
+
+	// Resave for traceback during blocked call.
+	save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+
+	_g_.m.locks--
+}
+
+func entersyscallblock_handoff() {
+	if trace.enabled {
+		traceGoSysCall()
+		traceGoSysBlock(getg().m.p.ptr())
+	}
+	handoffp(releasep())
+}
+
+// The goroutine g exited its system call.
+// Arrange for it to run on a cpu again.
+// This is called only from the go syscall library, not
+// from the low-level system calls used by the
+//go:nosplit
+func exitsyscall(dummy int32) {
+	_g_ := getg()
+
+	_g_.m.locks++ // see comment in entersyscall
+	if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp {
+		throw("exitsyscall: syscall frame is no longer valid")
+	}
+
+	_g_.waitsince = 0
+	oldp := _g_.m.p.ptr()
+	if exitsyscallfast() {
+		if _g_.m.mcache == nil {
+			throw("lost mcache")
+		}
+		if trace.enabled {
+			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+				systemstack(traceGoStart)
+			}
+		}
+		// There's a cpu for us, so we can run.
+		_g_.m.p.ptr().syscalltick++
+		// We need to cas the status and scan before resuming...
+		casgstatus(_g_, _Gsyscall, _Grunning)
+
+		// Garbage collector isn't running (since we are),
+		// so okay to clear syscallsp.
+		_g_.syscallsp = 0
+		_g_.m.locks--
+		if _g_.preempt {
+			// restore the preemption request in case we've cleared it in newstack
+			_g_.stackguard0 = stackPreempt
+		} else {
+			// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
+			_g_.stackguard0 = _g_.stack.lo + _StackGuard
+		}
+		_g_.throwsplit = false
+		return
+	}
+
+	_g_.sysexitticks = 0
+	_g_.sysexitseq = 0
+	if trace.enabled {
+		// Wait till traceGoSysBlock event is emitted.
+		// This ensures consistency of the trace (the goroutine is started after it is blocked).
+		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
+			osyield()
+		}
+		// We can't trace syscall exit right now because we don't have a P.
+		// Tracing code can invoke write barriers that cannot run without a P.
+		// So instead we remember the syscall exit time and emit the event
+		// in execute when we have a P.
+		_g_.sysexitseq, _g_.sysexitticks = tracestamp()
+	}
+
+	_g_.m.locks--
+
+	// Call the scheduler.
+	mcall(exitsyscall0)
+
+	if _g_.m.mcache == nil {
+		throw("lost mcache")
+	}
+
+	// Scheduler returned, so we're allowed to run now.
+	// Delete the syscallsp information that we left for
+	// the garbage collector during the system call.
+	// Must wait until now because until gosched returns
+	// we don't know for sure that the garbage collector
+	// is not running.
+	_g_.syscallsp = 0
+	_g_.m.p.ptr().syscalltick++
+	_g_.throwsplit = false
+}
+
+//go:nosplit
+func exitsyscallfast() bool {
+	_g_ := getg()
+
+	// Freezetheworld sets stopwait but does not retake P's.
+	if sched.stopwait == freezeStopWait {
+		_g_.m.mcache = nil
+		_g_.m.p = 0
+		return false
+	}
+
+	// Try to re-acquire the last P.
+	if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
+		// There's a cpu for us, so we can run.
+		_g_.m.mcache = _g_.m.p.ptr().mcache
+		_g_.m.p.ptr().m.set(_g_.m)
+		if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+			if trace.enabled {
+				// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
+				// traceGoSysBlock for this syscall was already emitted,
+				// but here we effectively retake the p from the new syscall running on the same p.
+				systemstack(func() {
+					// Denote blocking of the new syscall.
+					traceGoSysBlock(_g_.m.p.ptr())
+					// Denote completion of the current syscall.
+					traceGoSysExit(tracestamp())
+				})
+			}
+			_g_.m.p.ptr().syscalltick++
+		}
+		return true
+	}
+
+	// Try to get any other idle P.
+	oldp := _g_.m.p.ptr()
+	_g_.m.mcache = nil
+	_g_.m.p = 0
+	if sched.pidle != 0 {
+		var ok bool
+		systemstack(func() {
+			ok = exitsyscallfast_pidle()
+			if ok && trace.enabled {
+				if oldp != nil {
+					// Wait till traceGoSysBlock event is emitted.
+					// This ensures consistency of the trace (the goroutine is started after it is blocked).
+					for oldp.syscalltick == _g_.m.syscalltick {
+						osyield()
+					}
+				}
+				traceGoSysExit(tracestamp())
+			}
+		})
+		if ok {
+			return true
+		}
+	}
+	return false
+}
+
+func exitsyscallfast_pidle() bool {
+	lock(&sched.lock)
+	_p_ := pidleget()
+	if _p_ != nil && atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+	if _p_ != nil {
+		acquirep(_p_)
+		return true
+	}
+	return false
+}
+
+// exitsyscall slow path on g0.
+// Failed to acquire P, enqueue gp as runnable.
+func exitsyscall0(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Gsyscall, _Grunnable)
+	dropg()
+	lock(&sched.lock)
+	_p_ := pidleget()
+	if _p_ == nil {
+		globrunqput(gp)
+	} else if atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+	if _p_ != nil {
+		acquirep(_p_)
+		execute(gp, false) // Never returns.
+	}
+	if _g_.m.lockedg != nil {
+		// Wait until another thread schedules gp and so m again.
+		stoplockedm()
+		execute(gp, false) // Never returns.
+	}
+	stopm()
+	schedule() // Never returns.
+}
+
+func beforefork() {
+	gp := getg().m.curg
+
+	// Fork can hang if preempted with signals frequently enough (see issue 5517).
+	// Ensure that we stay on the same M where we disable profiling.
+	gp.m.locks++
+	if gp.m.profilehz != 0 {
+		resetcpuprofiler(0)
+	}
+
+	// This function is called before fork in syscall package.
+	// Code between fork and exec must not allocate memory nor even try to grow stack.
+	// Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
+	// runtime_AfterFork will undo this in parent process, but not in child.
+	gp.stackguard0 = stackFork
+}
+
+// Called from syscall package before fork.
+//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
+//go:nosplit
+func syscall_runtime_BeforeFork() {
+	systemstack(beforefork)
+}
+
+func afterfork() {
+	gp := getg().m.curg
+
+	// See the comment in beforefork.
+	gp.stackguard0 = gp.stack.lo + _StackGuard
+
+	hz := sched.profilehz
+	if hz != 0 {
+		resetcpuprofiler(hz)
+	}
+	gp.m.locks--
+}
+
+// Called from syscall package after fork in parent.
+//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
+//go:nosplit
+func syscall_runtime_AfterFork() {
+	systemstack(afterfork)
+}
+
+// Allocate a new g, with a stack big enough for stacksize bytes.
+func malg(stacksize int32) *g {
+	newg := new(g)
+	if stacksize >= 0 {
+		stacksize = round2(_StackSystem + stacksize)
+		systemstack(func() {
+			newg.stack, newg.stkbar = stackalloc(uint32(stacksize))
+		})
+		newg.stackguard0 = newg.stack.lo + _StackGuard
+		newg.stackguard1 = ^uintptr(0)
+		newg.stackAlloc = uintptr(stacksize)
+	}
+	return newg
+}
+
+// Create a new g running fn with siz bytes of arguments.
+// Put it on the queue of g's waiting to run.
+// The compiler turns a go statement into a call to this.
+// Cannot split the stack because it assumes that the arguments
+// are available sequentially after &fn; they would not be
+// copied if a stack split occurred.
+//go:nosplit
+func newproc(siz int32, fn *funcval) {
+	argp := add(unsafe.Pointer(&fn), ptrSize)
+	pc := getcallerpc(unsafe.Pointer(&siz))
+	systemstack(func() {
+		newproc1(fn, (*uint8)(argp), siz, 0, pc)
+	})
+}
+
+// Create a new g running fn with narg bytes of arguments starting
+// at argp and returning nret bytes of results.  callerpc is the
+// address of the go statement that created this.  The new g is put
+// on the queue of g's waiting to run.
+func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
+	_g_ := getg()
+
+	if fn == nil {
+		_g_.m.throwing = -1 // do not dump full stacks
+		throw("go of nil func value")
+	}
+	_g_.m.locks++ // disable preemption because it can be holding p in a local var
+	siz := narg + nret
+	siz = (siz + 7) &^ 7
+
+	// We could allocate a larger initial stack if necessary.
+	// Not worth it: this is almost always an error.
+	// 4*sizeof(uintreg): extra space added below
+	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
+	if siz >= _StackMin-4*regSize-regSize {
+		throw("newproc: function arguments too large for new goroutine")
+	}
+
+	_p_ := _g_.m.p.ptr()
+	newg := gfget(_p_)
+	if newg == nil {
+		newg = malg(_StackMin)
+		casgstatus(newg, _Gidle, _Gdead)
+		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
+	}
+	if newg.stack.hi == 0 {
+		throw("newproc1: newg missing stack")
+	}
+
+	if readgstatus(newg) != _Gdead {
+		throw("newproc1: new g is not Gdead")
+	}
+
+	totalSize := 4*regSize + uintptr(siz) + minFrameSize // extra space in case of reads slightly beyond frame
+	totalSize += -totalSize & (spAlign - 1)              // align to spAlign
+	sp := newg.stack.hi - totalSize
+	spArg := sp
+	if usesLR {
+		// caller's LR
+		*(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
+		spArg += minFrameSize
+	}
+	memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
+
+	memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
+	newg.sched.sp = sp
+	newg.stktopsp = sp
+	newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function
+	newg.sched.g = guintptr(unsafe.Pointer(newg))
+	gostartcallfn(&newg.sched, fn)
+	newg.gopc = callerpc
+	newg.startpc = fn.fn
+	casgstatus(newg, _Gdead, _Grunnable)
+
+	if _p_.goidcache == _p_.goidcacheend {
+		// Sched.goidgen is the last allocated id,
+		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
+		// At startup sched.goidgen=0, so main goroutine receives goid=1.
+		_p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch)
+		_p_.goidcache -= _GoidCacheBatch - 1
+		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
+	}
+	newg.goid = int64(_p_.goidcache)
+	_p_.goidcache++
+	if raceenabled {
+		newg.racectx = racegostart(callerpc)
+	}
+	if trace.enabled {
+		traceGoCreate(newg, newg.startpc)
+	}
+	runqput(_p_, newg, true)
+
+	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
+		wakep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+	return newg
+}
+
+// Put on gfree list.
+// If local list is too long, transfer a batch to the global list.
+func gfput(_p_ *p, gp *g) {
+	if readgstatus(gp) != _Gdead {
+		throw("gfput: bad status (not Gdead)")
+	}
+
+	stksize := gp.stackAlloc
+
+	if stksize != _FixedStack {
+		// non-standard stack size - free it.
+		stackfree(gp.stack, gp.stackAlloc)
+		gp.stack.lo = 0
+		gp.stack.hi = 0
+		gp.stackguard0 = 0
+		gp.stkbar = nil
+		gp.stkbarPos = 0
+	} else {
+		// Reset stack barriers.
+		gp.stkbar = gp.stkbar[:0]
+		gp.stkbarPos = 0
+	}
+
+	gp.schedlink.set(_p_.gfree)
+	_p_.gfree = gp
+	_p_.gfreecnt++
+	if _p_.gfreecnt >= 64 {
+		lock(&sched.gflock)
+		for _p_.gfreecnt >= 32 {
+			_p_.gfreecnt--
+			gp = _p_.gfree
+			_p_.gfree = gp.schedlink.ptr()
+			gp.schedlink.set(sched.gfree)
+			sched.gfree = gp
+			sched.ngfree++
+		}
+		unlock(&sched.gflock)
+	}
+}
+
+// Get from gfree list.
+// If local list is empty, grab a batch from global list.
+func gfget(_p_ *p) *g {
+retry:
+	gp := _p_.gfree
+	if gp == nil && sched.gfree != nil {
+		lock(&sched.gflock)
+		for _p_.gfreecnt < 32 && sched.gfree != nil {
+			_p_.gfreecnt++
+			gp = sched.gfree
+			sched.gfree = gp.schedlink.ptr()
+			sched.ngfree--
+			gp.schedlink.set(_p_.gfree)
+			_p_.gfree = gp
+		}
+		unlock(&sched.gflock)
+		goto retry
+	}
+	if gp != nil {
+		_p_.gfree = gp.schedlink.ptr()
+		_p_.gfreecnt--
+		if gp.stack.lo == 0 {
+			// Stack was deallocated in gfput.  Allocate a new one.
+			systemstack(func() {
+				gp.stack, gp.stkbar = stackalloc(_FixedStack)
+			})
+			gp.stackguard0 = gp.stack.lo + _StackGuard
+			gp.stackAlloc = _FixedStack
+		} else {
+			if raceenabled {
+				racemalloc(unsafe.Pointer(gp.stack.lo), gp.stackAlloc)
+			}
+		}
+	}
+	return gp
+}
+
+// Purge all cached G's from gfree list to the global list.
+func gfpurge(_p_ *p) {
+	lock(&sched.gflock)
+	for _p_.gfreecnt != 0 {
+		_p_.gfreecnt--
+		gp := _p_.gfree
+		_p_.gfree = gp.schedlink.ptr()
+		gp.schedlink.set(sched.gfree)
+		sched.gfree = gp
+		sched.ngfree++
+	}
+	unlock(&sched.gflock)
+}
+
+// Breakpoint executes a breakpoint trap.
+func Breakpoint() {
+	breakpoint()
+}
+
+// dolockOSThread is called by LockOSThread and lockOSThread below
+// after they modify m.locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
+//go:nosplit
+func dolockOSThread() {
+	_g_ := getg()
+	_g_.m.lockedg = _g_
+	_g_.lockedm = _g_.m
+}
+
+//go:nosplit
+
+// LockOSThread wires the calling goroutine to its current operating system thread.
+// Until the calling goroutine exits or calls UnlockOSThread, it will always
+// execute in that thread, and no other goroutine can.
+func LockOSThread() {
+	getg().m.locked |= _LockExternal
+	dolockOSThread()
+}
+
+//go:nosplit
+func lockOSThread() {
+	getg().m.locked += _LockInternal
+	dolockOSThread()
+}
+
+// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
+//go:nosplit
+func dounlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked != 0 {
+		return
+	}
+	_g_.m.lockedg = nil
+	_g_.lockedm = nil
+}
+
+//go:nosplit
+
+// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
+// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+func UnlockOSThread() {
+	getg().m.locked &^= _LockExternal
+	dounlockOSThread()
+}
+
+//go:nosplit
+func unlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked < _LockInternal {
+		systemstack(badunlockosthread)
+	}
+	_g_.m.locked -= _LockInternal
+	dounlockOSThread()
+}
+
+func badunlockosthread() {
+	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
+}
+
+func gcount() int32 {
+	n := int32(allglen) - sched.ngfree
+	for i := 0; ; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			break
+		}
+		n -= _p_.gfreecnt
+	}
+
+	// All these variables can be changed concurrently, so the result can be inconsistent.
+	// But at least the current goroutine is running.
+	if n < 1 {
+		n = 1
+	}
+	return n
+}
+
+func mcount() int32 {
+	return sched.mcount
+}
+
+var prof struct {
+	lock uint32
+	hz   int32
+}
+
+func _System()       { _System() }
+func _ExternalCode() { _ExternalCode() }
+func _GC()           { _GC() }
+
+// Called if we receive a SIGPROF signal.
+func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
+	if prof.hz == 0 {
+		return
+	}
+
+	// Profiling runs concurrently with GC, so it must not allocate.
+	mp.mallocing++
+
+	// Coordinate with stack barrier insertion in scanstack.
+	for !cas(&gp.stackLock, 0, 1) {
+		osyield()
+	}
+
+	// Define that a "user g" is a user-created goroutine, and a "system g"
+	// is one that is m->g0 or m->gsignal.
+	//
+	// We might be interrupted for profiling halfway through a
+	// goroutine switch. The switch involves updating three (or four) values:
+	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
+	// because once it gets updated the new g is running.
+	//
+	// When switching from a user g to a system g, LR is not considered live,
+	// so the update only affects g, SP, and PC. Since PC must be last, there
+	// the possible partial transitions in ordinary execution are (1) g alone is updated,
+	// (2) both g and SP are updated, and (3) SP alone is updated.
+	// If SP or g alone is updated, we can detect the partial transition by checking
+	// whether the SP is within g's stack bounds. (We could also require that SP
+	// be changed only after g, but the stack bounds check is needed by other
+	// cases, so there is no need to impose an additional requirement.)
+	//
+	// There is one exceptional transition to a system g, not in ordinary execution.
+	// When a signal arrives, the operating system starts the signal handler running
+	// with an updated PC and SP. The g is updated last, at the beginning of the
+	// handler. There are two reasons this is okay. First, until g is updated the
+	// g and SP do not match, so the stack bounds check detects the partial transition.
+	// Second, signal handlers currently run with signals disabled, so a profiling
+	// signal cannot arrive during the handler.
+	//
+	// When switching from a system g to a user g, there are three possibilities.
+	//
+	// First, it may be that the g switch has no PC update, because the SP
+	// either corresponds to a user g throughout (as in asmcgocall)
+	// or because it has been arranged to look like a user g frame
+	// (as in cgocallback_gofunc). In this case, since the entire
+	// transition is a g+SP update, a partial transition updating just one of
+	// those will be detected by the stack bounds check.
+	//
+	// Second, when returning from a signal handler, the PC and SP updates
+	// are performed by the operating system in an atomic update, so the g
+	// update must be done before them. The stack bounds check detects
+	// the partial transition here, and (again) signal handlers run with signals
+	// disabled, so a profiling signal cannot arrive then anyway.
+	//
+	// Third, the common case: it may be that the switch updates g, SP, and PC
+	// separately. If the PC is within any of the functions that does this,
+	// we don't ask for a traceback. C.F. the function setsSP for more about this.
+	//
+	// There is another apparently viable approach, recorded here in case
+	// the "PC within setsSP function" check turns out not to be usable.
+	// It would be possible to delay the update of either g or SP until immediately
+	// before the PC update instruction. Then, because of the stack bounds check,
+	// the only problematic interrupt point is just before that PC update instruction,
+	// and the sigprof handler can detect that instruction and simulate stepping past
+	// it in order to reach a consistent state. On ARM, the update of g must be made
+	// in two places (in R10 and also in a TLS slot), so the delayed update would
+	// need to be the SP update. The sigprof handler must read the instruction at
+	// the current PC and if it was the known instruction (for example, JMP BX or
+	// MOV R2, PC), use that other register in place of the PC value.
+	// The biggest drawback to this solution is that it requires that we can tell
+	// whether it's safe to read from the memory pointed at by PC.
+	// In a correct program, we can test PC == nil and otherwise read,
+	// but if a profiling signal happens at the instant that a program executes
+	// a bad jump (before the program manages to handle the resulting fault)
+	// the profiling handler could fault trying to read nonexistent memory.
+	//
+	// To recap, there are no constraints on the assembly being used for the
+	// transition. We simply require that g and SP match and that the PC is not
+	// in gogo.
+	traceback := true
+	if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
+		traceback = false
+	}
+	var stk [maxCPUProfStack]uintptr
+	n := 0
+	if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
+		// Cgo, we can't unwind and symbolize arbitrary C code,
+		// so instead collect Go stack that leads to the cgo call.
+		// This is especially important on windows, since all syscalls are cgo calls.
+		n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
+	} else if traceback {
+		n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
+	}
+	if !traceback || n <= 0 {
+		// Normal traceback is impossible or has failed.
+		// See if it falls into several common cases.
+		n = 0
+		if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
+			// Libcall, i.e. runtime syscall on windows.
+			// Collect Go stack that leads to the call.
+			n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
+		}
+		if n == 0 {
+			// If all of the above has failed, account it against abstract "System" or "GC".
+			n = 2
+			// "ExternalCode" is better than "etext".
+			if pc > firstmoduledata.etext {
+				pc = funcPC(_ExternalCode) + _PCQuantum
+			}
+			stk[0] = pc
+			if mp.preemptoff != "" || mp.helpgc != 0 {
+				stk[1] = funcPC(_GC) + _PCQuantum
+			} else {
+				stk[1] = funcPC(_System) + _PCQuantum
+			}
+		}
+	}
+	atomicstore(&gp.stackLock, 0)
+
+	if prof.hz != 0 {
+		// Simple cas-lock to coordinate with setcpuprofilerate.
+		for !cas(&prof.lock, 0, 1) {
+			osyield()
+		}
+		if prof.hz != 0 {
+			cpuprof.add(stk[:n])
+		}
+		atomicstore(&prof.lock, 0)
+	}
+	mp.mallocing--
+}
+
+// Reports whether a function will set the SP
+// to an absolute value. Important that
+// we don't traceback when these are at the bottom
+// of the stack since we can't be sure that we will
+// find the caller.
+//
+// If the function is not on the bottom of the stack
+// we assume that it will have set it up so that traceback will be consistent,
+// either by being a traceback terminating function
+// or putting one on the stack at the right offset.
+func setsSP(pc uintptr) bool {
+	f := findfunc(pc)
+	if f == nil {
+		// couldn't find the function for this PC,
+		// so assume the worst and stop traceback
+		return true
+	}
+	switch f.entry {
+	case gogoPC, systemstackPC, mcallPC, morestackPC:
+		return true
+	}
+	return false
+}
+
+// Arrange to call fn with a traceback hz times a second.
+func setcpuprofilerate_m(hz int32) {
+	// Force sane arguments.
+	if hz < 0 {
+		hz = 0
+	}
+
+	// Disable preemption, otherwise we can be rescheduled to another thread
+	// that has profiling enabled.
+	_g_ := getg()
+	_g_.m.locks++
+
+	// Stop profiler on this thread so that it is safe to lock prof.
+	// if a profiling signal came in while we had prof locked,
+	// it would deadlock.
+	resetcpuprofiler(0)
+
+	for !cas(&prof.lock, 0, 1) {
+		osyield()
+	}
+	prof.hz = hz
+	atomicstore(&prof.lock, 0)
+
+	lock(&sched.lock)
+	sched.profilehz = hz
+	unlock(&sched.lock)
+
+	if hz != 0 {
+		resetcpuprofiler(hz)
+	}
+
+	_g_.m.locks--
+}
+
+// Change number of processors.  The world is stopped, sched is locked.
+// gcworkbufs are not being modified by either the GC or
+// the write barrier code.
+// Returns list of Ps with local work, they need to be scheduled by the caller.
+func procresize(nprocs int32) *p {
+	old := gomaxprocs
+	if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs {
+		throw("procresize: invalid arg")
+	}
+	if trace.enabled {
+		traceGomaxprocs(nprocs)
+	}
+
+	// update statistics
+	now := nanotime()
+	if sched.procresizetime != 0 {
+		sched.totaltime += int64(old) * (now - sched.procresizetime)
+	}
+	sched.procresizetime = now
+
+	// initialize new P's
+	for i := int32(0); i < nprocs; i++ {
+		pp := allp[i]
+		if pp == nil {
+			pp = new(p)
+			pp.id = i
+			pp.status = _Pgcstop
+			pp.sudogcache = pp.sudogbuf[:0]
+			for i := range pp.deferpool {
+				pp.deferpool[i] = pp.deferpoolbuf[i][:0]
+			}
+			atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
+		}
+		if pp.mcache == nil {
+			if old == 0 && i == 0 {
+				if getg().m.mcache == nil {
+					throw("missing mcache?")
+				}
+				pp.mcache = getg().m.mcache // bootstrap
+			} else {
+				pp.mcache = allocmcache()
+			}
+		}
+	}
+
+	// free unused P's
+	for i := nprocs; i < old; i++ {
+		p := allp[i]
+		if trace.enabled {
+			if p == getg().m.p.ptr() {
+				// moving to p[0], pretend that we were descheduled
+				// and then scheduled again to keep the trace sane.
+				traceGoSched()
+				traceProcStop(p)
+			}
+		}
+		// move all runnable goroutines to the global queue
+		for p.runqhead != p.runqtail {
+			// pop from tail of local queue
+			p.runqtail--
+			gp := p.runq[p.runqtail%uint32(len(p.runq))]
+			// push onto head of global queue
+			globrunqputhead(gp)
+		}
+		if p.runnext != 0 {
+			globrunqputhead(p.runnext.ptr())
+			p.runnext = 0
+		}
+		// if there's a background worker, make it runnable and put
+		// it on the global queue so it can clean itself up
+		if p.gcBgMarkWorker != nil {
+			casgstatus(p.gcBgMarkWorker, _Gwaiting, _Grunnable)
+			if trace.enabled {
+				traceGoUnpark(p.gcBgMarkWorker, 0)
+			}
+			globrunqput(p.gcBgMarkWorker)
+			p.gcBgMarkWorker = nil
+		}
+		for i := range p.sudogbuf {
+			p.sudogbuf[i] = nil
+		}
+		p.sudogcache = p.sudogbuf[:0]
+		for i := range p.deferpool {
+			for j := range p.deferpoolbuf[i] {
+				p.deferpoolbuf[i][j] = nil
+			}
+			p.deferpool[i] = p.deferpoolbuf[i][:0]
+		}
+		freemcache(p.mcache)
+		p.mcache = nil
+		gfpurge(p)
+		traceProcFree(p)
+		p.status = _Pdead
+		// can't free P itself because it can be referenced by an M in syscall
+	}
+
+	_g_ := getg()
+	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
+		// continue to use the current P
+		_g_.m.p.ptr().status = _Prunning
+	} else {
+		// release the current P and acquire allp[0]
+		if _g_.m.p != 0 {
+			_g_.m.p.ptr().m = 0
+		}
+		_g_.m.p = 0
+		_g_.m.mcache = nil
+		p := allp[0]
+		p.m = 0
+		p.status = _Pidle
+		acquirep(p)
+		if trace.enabled {
+			traceGoStart()
+		}
+	}
+	var runnablePs *p
+	for i := nprocs - 1; i >= 0; i-- {
+		p := allp[i]
+		if _g_.m.p.ptr() == p {
+			continue
+		}
+		p.status = _Pidle
+		if runqempty(p) {
+			pidleput(p)
+		} else {
+			p.m.set(mget())
+			p.link.set(runnablePs)
+			runnablePs = p
+		}
+	}
+	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
+	atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
+	return runnablePs
+}
+
+// Associate p and the current m.
+func acquirep(_p_ *p) {
+	acquirep1(_p_)
+
+	// have p; write barriers now allowed
+	_g_ := getg()
+	_g_.m.mcache = _p_.mcache
+
+	if trace.enabled {
+		traceProcStart()
+	}
+}
+
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func acquirep1(_p_ *p) {
+	_g_ := getg()
+
+	if _g_.m.p != 0 || _g_.m.mcache != nil {
+		throw("acquirep: already in go")
+	}
+	if _p_.m != 0 || _p_.status != _Pidle {
+		id := int32(0)
+		if _p_.m != 0 {
+			id = _p_.m.ptr().id
+		}
+		print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
+		throw("acquirep: invalid p state")
+	}
+	_g_.m.p.set(_p_)
+	_p_.m.set(_g_.m)
+	_p_.status = _Prunning
+}
+
+// Disassociate p and the current m.
+func releasep() *p {
+	_g_ := getg()
+
+	if _g_.m.p == 0 || _g_.m.mcache == nil {
+		throw("releasep: invalid arg")
+	}
+	_p_ := _g_.m.p.ptr()
+	if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
+		print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
+		throw("releasep: invalid p state")
+	}
+	if trace.enabled {
+		traceProcStop(_g_.m.p.ptr())
+	}
+	_g_.m.p = 0
+	_g_.m.mcache = nil
+	_p_.m = 0
+	_p_.status = _Pidle
+	return _p_
+}
+
+func incidlelocked(v int32) {
+	lock(&sched.lock)
+	sched.nmidlelocked += v
+	if v > 0 {
+		checkdead()
+	}
+	unlock(&sched.lock)
+}
+
+// Check for deadlock situation.
+// The check is based on number of running M's, if 0 -> deadlock.
+func checkdead() {
+	// For -buildmode=c-shared or -buildmode=c-archive it's OK if
+	// there are no running goroutines.  The calling program is
+	// assumed to be running.
+	if islibrary || isarchive {
+		return
+	}
+
+	// If we are dying because of a signal caught on an already idle thread,
+	// freezetheworld will cause all running threads to block.
+	// And runtime will essentially enter into deadlock state,
+	// except that there is a thread that will call exit soon.
+	if panicking > 0 {
+		return
+	}
+
+	// -1 for sysmon
+	run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
+	if run > 0 {
+		return
+	}
+	if run < 0 {
+		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
+		throw("checkdead: inconsistent counts")
+	}
+
+	grunning := 0
+	lock(&allglock)
+	for i := 0; i < len(allgs); i++ {
+		gp := allgs[i]
+		if isSystemGoroutine(gp) {
+			continue
+		}
+		s := readgstatus(gp)
+		switch s &^ _Gscan {
+		case _Gwaiting:
+			grunning++
+		case _Grunnable,
+			_Grunning,
+			_Gsyscall:
+			unlock(&allglock)
+			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
+			throw("checkdead: runnable g")
+		}
+	}
+	unlock(&allglock)
+	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
+		throw("no goroutines (main called runtime.Goexit) - deadlock!")
+	}
+
+	// Maybe jump time forward for playground.
+	gp := timejump()
+	if gp != nil {
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		globrunqput(gp)
+		_p_ := pidleget()
+		if _p_ == nil {
+			throw("checkdead: no p for timer")
+		}
+		mp := mget()
+		if mp == nil {
+			newm(nil, _p_)
+		} else {
+			mp.nextp.set(_p_)
+			notewakeup(&mp.park)
+		}
+		return
+	}
+
+	getg().m.throwing = -1 // do not dump full stacks
+	throw("all goroutines are asleep - deadlock!")
+}
+
+// forcegcperiod is the maximum time in nanoseconds between garbage
+// collections. If we go this long without a garbage collection, one
+// is forced to run.
+//
+// This is a variable for testing purposes. It normally doesn't change.
+var forcegcperiod int64 = 2 * 60 * 1e9
+
+func sysmon() {
+	// If a heap span goes unused for 5 minutes after a garbage collection,
+	// we hand it back to the operating system.
+	scavengelimit := int64(5 * 60 * 1e9)
+
+	if debug.scavenge > 0 {
+		// Scavenge-a-lot for testing.
+		forcegcperiod = 10 * 1e6
+		scavengelimit = 20 * 1e6
+	}
+
+	lastscavenge := nanotime()
+	nscavenge := 0
+
+	lasttrace := int64(0)
+	idle := 0 // how many cycles in succession we had not wokeup somebody
+	delay := uint32(0)
+	for {
+		if idle == 0 { // start with 20us sleep...
+			delay = 20
+		} else if idle > 50 { // start doubling the sleep after 1ms...
+			delay *= 2
+		}
+		if delay > 10*1000 { // up to 10ms
+			delay = 10 * 1000
+		}
+		usleep(delay)
+		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
+			lock(&sched.lock)
+			if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) {
+				atomicstore(&sched.sysmonwait, 1)
+				unlock(&sched.lock)
+				// Make wake-up period small enough
+				// for the sampling to be correct.
+				maxsleep := forcegcperiod / 2
+				if scavengelimit < forcegcperiod {
+					maxsleep = scavengelimit / 2
+				}
+				notetsleep(&sched.sysmonnote, maxsleep)
+				lock(&sched.lock)
+				atomicstore(&sched.sysmonwait, 0)
+				noteclear(&sched.sysmonnote)
+				idle = 0
+				delay = 20
+			}
+			unlock(&sched.lock)
+		}
+		// poll network if not polled for more than 10ms
+		lastpoll := int64(atomicload64(&sched.lastpoll))
+		now := nanotime()
+		unixnow := unixnanotime()
+		if lastpoll != 0 && lastpoll+10*1000*1000 < now {
+			cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
+			gp := netpoll(false) // non-blocking - returns list of goroutines
+			if gp != nil {
+				// Need to decrement number of idle locked M's
+				// (pretending that one more is running) before injectglist.
+				// Otherwise it can lead to the following situation:
+				// injectglist grabs all P's but before it starts M's to run the P's,
+				// another M returns from syscall, finishes running its G,
+				// observes that there is no work to do and no other running M's
+				// and reports deadlock.
+				incidlelocked(-1)
+				injectglist(gp)
+				incidlelocked(1)
+			}
+		}
+		// retake P's blocked in syscalls
+		// and preempt long running G's
+		if retake(now) != 0 {
+			idle = 0
+		} else {
+			idle++
+		}
+		// check if we need to force a GC
+		lastgc := int64(atomicload64(&memstats.last_gc))
+		if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 && atomicloaduint(&bggc.working) == 0 {
+			lock(&forcegc.lock)
+			forcegc.idle = 0
+			forcegc.g.schedlink = 0
+			injectglist(forcegc.g)
+			unlock(&forcegc.lock)
+		}
+		// scavenge heap once in a while
+		if lastscavenge+scavengelimit/2 < now {
+			mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
+			lastscavenge = now
+			nscavenge++
+		}
+		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now {
+			lasttrace = now
+			schedtrace(debug.scheddetail > 0)
+		}
+	}
+}
+
+var pdesc [_MaxGomaxprocs]struct {
+	schedtick   uint32
+	schedwhen   int64
+	syscalltick uint32
+	syscallwhen int64
+}
+
+// forcePreemptNS is the time slice given to a G before it is
+// preempted.
+const forcePreemptNS = 10 * 1000 * 1000 // 10ms
+
+func retake(now int64) uint32 {
+	n := 0
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			continue
+		}
+		pd := &pdesc[i]
+		s := _p_.status
+		if s == _Psyscall {
+			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
+			t := int64(_p_.syscalltick)
+			if int64(pd.syscalltick) != t {
+				pd.syscalltick = uint32(t)
+				pd.syscallwhen = now
+				continue
+			}
+			// On the one hand we don't want to retake Ps if there is no other work to do,
+			// but on the other hand we want to retake them eventually
+			// because they can prevent the sysmon thread from deep sleep.
+			if runqempty(_p_) && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
+				continue
+			}
+			// Need to decrement number of idle locked M's
+			// (pretending that one more is running) before the CAS.
+			// Otherwise the M from which we retake can exit the syscall,
+			// increment nmidle and report deadlock.
+			incidlelocked(-1)
+			if cas(&_p_.status, s, _Pidle) {
+				if trace.enabled {
+					traceGoSysBlock(_p_)
+					traceProcStop(_p_)
+				}
+				n++
+				_p_.syscalltick++
+				handoffp(_p_)
+			}
+			incidlelocked(1)
+		} else if s == _Prunning {
+			// Preempt G if it's running for too long.
+			t := int64(_p_.schedtick)
+			if int64(pd.schedtick) != t {
+				pd.schedtick = uint32(t)
+				pd.schedwhen = now
+				continue
+			}
+			if pd.schedwhen+forcePreemptNS > now {
+				continue
+			}
+			preemptone(_p_)
+		}
+	}
+	return uint32(n)
+}
+
+// Tell all goroutines that they have been preempted and they should stop.
+// This function is purely best-effort.  It can fail to inform a goroutine if a
+// processor just started running it.
+// No locks need to be held.
+// Returns true if preemption request was issued to at least one goroutine.
+func preemptall() bool {
+	res := false
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil || _p_.status != _Prunning {
+			continue
+		}
+		if preemptone(_p_) {
+			res = true
+		}
+	}
+	return res
+}
+
+// Tell the goroutine running on processor P to stop.
+// This function is purely best-effort.  It can incorrectly fail to inform the
+// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
+// correct goroutine, that goroutine might ignore the request if it is
+// simultaneously executing newstack.
+// No lock needs to be held.
+// Returns true if preemption request was issued.
+// The actual preemption will happen at some point in the future
+// and will be indicated by the gp->status no longer being
+// Grunning
+func preemptone(_p_ *p) bool {
+	mp := _p_.m.ptr()
+	if mp == nil || mp == getg().m {
+		return false
+	}
+	gp := mp.curg
+	if gp == nil || gp == mp.g0 {
+		return false
+	}
+
+	gp.preempt = true
+
+	// Every call in a go routine checks for stack overflow by
+	// comparing the current stack pointer to gp->stackguard0.
+	// Setting gp->stackguard0 to StackPreempt folds
+	// preemption into the normal stack overflow check.
+	gp.stackguard0 = stackPreempt
+	return true
+}
+
+var starttime int64
+
+func schedtrace(detailed bool) {
+	now := nanotime()
+	if starttime == 0 {
+		starttime = now
+	}
+
+	lock(&sched.lock)
+	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
+	if detailed {
+		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
+	}
+	// We must be careful while reading data from P's, M's and G's.
+	// Even if we hold schedlock, most data can be changed concurrently.
+	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			continue
+		}
+		mp := _p_.m.ptr()
+		h := atomicload(&_p_.runqhead)
+		t := atomicload(&_p_.runqtail)
+		if detailed {
+			id := int32(-1)
+			if mp != nil {
+				id = mp.id
+			}
+			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
+		} else {
+			// In non-detailed mode format lengths of per-P run queues as:
+			// [len1 len2 len3 len4]
+			print(" ")
+			if i == 0 {
+				print("[")
+			}
+			print(t - h)
+			if i == gomaxprocs-1 {
+				print("]\n")
+			}
+		}
+	}
+
+	if !detailed {
+		unlock(&sched.lock)
+		return
+	}
+
+	for mp := allm; mp != nil; mp = mp.alllink {
+		_p_ := mp.p.ptr()
+		gp := mp.curg
+		lockedg := mp.lockedg
+		id1 := int32(-1)
+		if _p_ != nil {
+			id1 = _p_.id
+		}
+		id2 := int64(-1)
+		if gp != nil {
+			id2 = gp.goid
+		}
+		id3 := int64(-1)
+		if lockedg != nil {
+			id3 = lockedg.goid
+		}
+		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n")
+	}
+
+	lock(&allglock)
+	for gi := 0; gi < len(allgs); gi++ {
+		gp := allgs[gi]
+		mp := gp.m
+		lockedm := gp.lockedm
+		id1 := int32(-1)
+		if mp != nil {
+			id1 = mp.id
+		}
+		id2 := int32(-1)
+		if lockedm != nil {
+			id2 = lockedm.id
+		}
+		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
+	}
+	unlock(&allglock)
+	unlock(&sched.lock)
+}
+
+// Put mp on midle list.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func mput(mp *m) {
+	mp.schedlink = sched.midle
+	sched.midle.set(mp)
+	sched.nmidle++
+	checkdead()
+}
+
+// Try to get an m from midle list.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func mget() *m {
+	mp := sched.midle.ptr()
+	if mp != nil {
+		sched.midle = mp.schedlink
+		sched.nmidle--
+	}
+	return mp
+}
+
+// Put gp on the global runnable queue.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func globrunqput(gp *g) {
+	gp.schedlink = 0
+	if sched.runqtail != 0 {
+		sched.runqtail.ptr().schedlink.set(gp)
+	} else {
+		sched.runqhead.set(gp)
+	}
+	sched.runqtail.set(gp)
+	sched.runqsize++
+}
+
+// Put gp at the head of the global runnable queue.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func globrunqputhead(gp *g) {
+	gp.schedlink = sched.runqhead
+	sched.runqhead.set(gp)
+	if sched.runqtail == 0 {
+		sched.runqtail.set(gp)
+	}
+	sched.runqsize++
+}
+
+// Put a batch of runnable goroutines on the global runnable queue.
+// Sched must be locked.
+func globrunqputbatch(ghead *g, gtail *g, n int32) {
+	gtail.schedlink = 0
+	if sched.runqtail != 0 {
+		sched.runqtail.ptr().schedlink.set(ghead)
+	} else {
+		sched.runqhead.set(ghead)
+	}
+	sched.runqtail.set(gtail)
+	sched.runqsize += n
+}
+
+// Try get a batch of G's from the global runnable queue.
+// Sched must be locked.
+func globrunqget(_p_ *p, max int32) *g {
+	if sched.runqsize == 0 {
+		return nil
+	}
+
+	n := sched.runqsize/gomaxprocs + 1
+	if n > sched.runqsize {
+		n = sched.runqsize
+	}
+	if max > 0 && n > max {
+		n = max
+	}
+	if n > int32(len(_p_.runq))/2 {
+		n = int32(len(_p_.runq)) / 2
+	}
+
+	sched.runqsize -= n
+	if sched.runqsize == 0 {
+		sched.runqtail = 0
+	}
+
+	gp := sched.runqhead.ptr()
+	sched.runqhead = gp.schedlink
+	n--
+	for ; n > 0; n-- {
+		gp1 := sched.runqhead.ptr()
+		sched.runqhead = gp1.schedlink
+		runqput(_p_, gp1, false)
+	}
+	return gp
+}
+
+// Put p to on _Pidle list.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func pidleput(_p_ *p) {
+	if !runqempty(_p_) {
+		throw("pidleput: P has non-empty run queue")
+	}
+	_p_.link = sched.pidle
+	sched.pidle.set(_p_)
+	xadd(&sched.npidle, 1) // TODO: fast atomic
+}
+
+// Try get a p from _Pidle list.
+// Sched must be locked.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+func pidleget() *p {
+	_p_ := sched.pidle.ptr()
+	if _p_ != nil {
+		sched.pidle = _p_.link
+		xadd(&sched.npidle, -1) // TODO: fast atomic
+	}
+	return _p_
+}
+
+// runqempty returns true if _p_ has no Gs on its local run queue.
+// Note that this test is generally racy.
+func runqempty(_p_ *p) bool {
+	return _p_.runqhead == _p_.runqtail && _p_.runnext == 0
+}
+
+// To shake out latent assumptions about scheduling order,
+// we introduce some randomness into scheduling decisions
+// when running with the race detector.
+// The need for this was made obvious by changing the
+// (deterministic) scheduling order in Go 1.5 and breaking
+// many poorly-written tests.
+// With the randomness here, as long as the tests pass
+// consistently with -race, they shouldn't have latent scheduling
+// assumptions.
+const randomizeScheduler = raceenabled
+
+// runqput tries to put g on the local runnable queue.
+// If next if false, runqput adds g to the tail of the runnable queue.
+// If next is true, runqput puts g in the _p_.runnext slot.
+// If the run queue is full, runnext puts g on the global queue.
+// Executed only by the owner P.
+func runqput(_p_ *p, gp *g, next bool) {
+	if randomizeScheduler && next && fastrand1()%2 == 0 {
+		next = false
+	}
+
+	if next {
+	retryNext:
+		oldnext := _p_.runnext
+		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
+			goto retryNext
+		}
+		if oldnext == 0 {
+			return
+		}
+		// Kick the old runnext out to the regular run queue.
+		gp = oldnext.ptr()
+	}
+
+retry:
+	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
+	t := _p_.runqtail
+	if t-h < uint32(len(_p_.runq)) {
+		_p_.runq[t%uint32(len(_p_.runq))] = gp
+		atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
+		return
+	}
+	if runqputslow(_p_, gp, h, t) {
+		return
+	}
+	// the queue is not full, now the put above must suceed
+	goto retry
+}
+
+// Put g and a batch of work from local runnable queue on global queue.
+// Executed only by the owner P.
+func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
+	var batch [len(_p_.runq)/2 + 1]*g
+
+	// First, grab a batch from local queue.
+	n := t - h
+	n = n / 2
+	if n != uint32(len(_p_.runq)/2) {
+		throw("runqputslow: queue is not full")
+	}
+	for i := uint32(0); i < n; i++ {
+		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
+	}
+	if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+		return false
+	}
+	batch[n] = gp
+
+	if randomizeScheduler {
+		for i := uint32(1); i <= n; i++ {
+			j := fastrand1() % (i + 1)
+			batch[i], batch[j] = batch[j], batch[i]
+		}
+	}
+
+	// Link the goroutines.
+	for i := uint32(0); i < n; i++ {
+		batch[i].schedlink.set(batch[i+1])
+	}
+
+	// Now put the batch on global queue.
+	lock(&sched.lock)
+	globrunqputbatch(batch[0], batch[n], int32(n+1))
+	unlock(&sched.lock)
+	return true
+}
+
+// Get g from local runnable queue.
+// If inheritTime is true, gp should inherit the remaining time in the
+// current time slice. Otherwise, it should start a new time slice.
+// Executed only by the owner P.
+func runqget(_p_ *p) (gp *g, inheritTime bool) {
+	// If there's a runnext, it's the next G to run.
+	for {
+		next := _p_.runnext
+		if next == 0 {
+			break
+		}
+		if _p_.runnext.cas(next, 0) {
+			return next.ptr(), true
+		}
+	}
+
+	for {
+		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		t := _p_.runqtail
+		if t == h {
+			return nil, false
+		}
+		gp := _p_.runq[h%uint32(len(_p_.runq))]
+		if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
+			return gp, false
+		}
+	}
+}
+
+// Grabs a batch of goroutines from _p_'s runnable queue into batch.
+// Batch is a ring buffer starting at batchHead.
+// Returns number of grabbed goroutines.
+// Can be executed by any P.
+func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
+	for {
+		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
+		n := t - h
+		n = n - n/2
+		if n == 0 {
+			if stealRunNextG {
+				// Try to steal from _p_.runnext.
+				if next := _p_.runnext; next != 0 {
+					// Sleep to ensure that _p_ isn't about to run the g we
+					// are about to steal.
+					// The important use case here is when the g running on _p_
+					// ready()s another g and then almost immediately blocks.
+					// Instead of stealing runnext in this window, back off
+					// to give _p_ a chance to schedule runnext. This will avoid
+					// thrashing gs between different Ps.
+					usleep(100)
+					if !_p_.runnext.cas(next, 0) {
+						continue
+					}
+					batch[batchHead%uint32(len(batch))] = next.ptr()
+					return 1
+				}
+			}
+			return 0
+		}
+		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
+			continue
+		}
+		for i := uint32(0); i < n; i++ {
+			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
+			batch[(batchHead+i)%uint32(len(batch))] = g
+		}
+		if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+			return n
+		}
+	}
+}
+
+// Steal half of elements from local runnable queue of p2
+// and put onto local runnable queue of p.
+// Returns one of the stolen elements (or nil if failed).
+func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
+	t := _p_.runqtail
+	n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
+	if n == 0 {
+		return nil
+	}
+	n--
+	gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
+	if n == 0 {
+		return gp
+	}
+	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
+	if t-h+n >= uint32(len(_p_.runq)) {
+		throw("runqsteal: runq overflow")
+	}
+	atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+	return gp
+}
+
+func testSchedLocalQueue() {
+	_p_ := new(p)
+	gs := make([]g, len(_p_.runq))
+	for i := 0; i < len(_p_.runq); i++ {
+		if g, _ := runqget(_p_); g != nil {
+			throw("runq is not empty initially")
+		}
+		for j := 0; j < i; j++ {
+			runqput(_p_, &gs[i], false)
+		}
+		for j := 0; j < i; j++ {
+			if g, _ := runqget(_p_); g != &gs[i] {
+				print("bad element at iter ", i, "/", j, "\n")
+				throw("bad element")
+			}
+		}
+		if g, _ := runqget(_p_); g != nil {
+			throw("runq is not empty afterwards")
+		}
+	}
+}
+
+func testSchedLocalQueueSteal() {
+	p1 := new(p)
+	p2 := new(p)
+	gs := make([]g, len(p1.runq))
+	for i := 0; i < len(p1.runq); i++ {
+		for j := 0; j < i; j++ {
+			gs[j].sig = 0
+			runqput(p1, &gs[j], false)
+		}
+		gp := runqsteal(p2, p1, true)
+		s := 0
+		if gp != nil {
+			s++
+			gp.sig++
+		}
+		for {
+			gp, _ = runqget(p2)
+			if gp == nil {
+				break
+			}
+			s++
+			gp.sig++
+		}
+		for {
+			gp, _ = runqget(p1)
+			if gp == nil {
+				break
+			}
+			gp.sig++
+		}
+		for j := 0; j < i; j++ {
+			if gs[j].sig != 1 {
+				print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n")
+				throw("bad element")
+			}
+		}
+		if s != i/2 && s != i/2+1 {
+			print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n")
+			throw("bad steal")
+		}
+	}
+}
+
+//go:linkname setMaxThreads runtime/debug.setMaxThreads
+func setMaxThreads(in int) (out int) {
+	lock(&sched.lock)
+	out = int(sched.maxmcount)
+	sched.maxmcount = int32(in)
+	checkmcount()
+	unlock(&sched.lock)
+	return
+}
+
+func haveexperiment(name string) bool {
+	x := goexperiment
+	for x != "" {
+		xname := ""
+		i := index(x, ",")
+		if i < 0 {
+			xname, x = x, ""
+		} else {
+			xname, x = x[:i], x[i+1:]
+		}
+		if xname == name {
+			return true
+		}
+	}
+	return false
+}
+
+//go:nosplit
+func procPin() int {
+	_g_ := getg()
+	mp := _g_.m
+
+	mp.locks++
+	return int(mp.p.ptr().id)
+}
+
+//go:nosplit
+func procUnpin() {
+	_g_ := getg()
+	_g_.m.locks--
+}
+
+//go:linkname sync_runtime_procPin sync.runtime_procPin
+//go:nosplit
+func sync_runtime_procPin() int {
+	return procPin()
+}
+
+//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
+//go:nosplit
+func sync_runtime_procUnpin() {
+	procUnpin()
+}
+
+//go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin
+//go:nosplit
+func sync_atomic_runtime_procPin() int {
+	return procPin()
+}
+
+//go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin
+//go:nosplit
+func sync_atomic_runtime_procUnpin() {
+	procUnpin()
+}
+
+// Active spinning for sync.Mutex.
+//go:linkname sync_runtime_canSpin sync.runtime_canSpin
+//go:nosplit
+func sync_runtime_canSpin(i int) bool {
+	// sync.Mutex is cooperative, so we are conservative with spinning.
+	// Spin only few times and only if running on a multicore machine and
+	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
+	// As opposed to runtime mutex we don't do passive spinning here,
+	// because there can be work on global runq on on other Ps.
+	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
+		return false
+	}
+	if p := getg().m.p.ptr(); !runqempty(p) {
+		return false
+	}
+	return true
+}
+
+//go:linkname sync_runtime_doSpin sync.runtime_doSpin
+//go:nosplit
+func sync_runtime_doSpin() {
+	procyield(active_spin_cnt)
+}
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
deleted file mode 100644
index 35d9e86..0000000
--- a/src/runtime/proc1.go
+++ /dev/null
@@ -1,3727 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-var (
-	m0 m
-	g0 g
-)
-
-// Goroutine scheduler
-// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
-//
-// The main concepts are:
-// G - goroutine.
-// M - worker thread, or machine.
-// P - processor, a resource that is required to execute Go code.
-//     M must have an associated P to execute Go code, however it can be
-//     blocked or in a syscall w/o an associated P.
-//
-// Design doc at https://golang.org/s/go11sched.
-
-const (
-	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
-	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
-	_GoidCacheBatch = 16
-)
-
-// The bootstrap sequence is:
-//
-//	call osinit
-//	call schedinit
-//	make & queue new G
-//	call runtime·mstart
-//
-// The new G calls runtime·main.
-func schedinit() {
-	// raceinit must be the first call to race detector.
-	// In particular, it must be done before mallocinit below calls racemapshadow.
-	_g_ := getg()
-	if raceenabled {
-		_g_.racectx = raceinit()
-	}
-
-	sched.maxmcount = 10000
-
-	// Cache the framepointer experiment.  This affects stack unwinding.
-	framepointer_enabled = haveexperiment("framepointer")
-
-	tracebackinit()
-	moduledataverify()
-	stackinit()
-	mallocinit()
-	mcommoninit(_g_.m)
-
-	goargs()
-	goenvs()
-	parsedebugvars()
-	gcinit()
-
-	sched.lastpoll = uint64(nanotime())
-	procs := int(ncpu)
-	if n := atoi(gogetenv("GOMAXPROCS")); n > 0 {
-		if n > _MaxGomaxprocs {
-			n = _MaxGomaxprocs
-		}
-		procs = n
-	}
-	if procresize(int32(procs)) != nil {
-		throw("unknown runnable goroutine during bootstrap")
-	}
-
-	if buildVersion == "" {
-		// Condition should never trigger.  This code just serves
-		// to ensure runtime·buildVersion is kept in the resulting binary.
-		buildVersion = "unknown"
-	}
-}
-
-func dumpgstatus(gp *g) {
-	_g_ := getg()
-	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
-	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
-}
-
-func checkmcount() {
-	// sched lock is held
-	if sched.mcount > sched.maxmcount {
-		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
-		throw("thread exhaustion")
-	}
-}
-
-func mcommoninit(mp *m) {
-	_g_ := getg()
-
-	// g0 stack won't make sense for user (and is not necessary unwindable).
-	if _g_ != _g_.m.g0 {
-		callers(1, mp.createstack[:])
-	}
-
-	mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
-	if mp.fastrand == 0 {
-		mp.fastrand = 0x49f6428a
-	}
-
-	lock(&sched.lock)
-	mp.id = sched.mcount
-	sched.mcount++
-	checkmcount()
-	mpreinit(mp)
-	if mp.gsignal != nil {
-		mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
-	}
-
-	// Add to allm so garbage collector doesn't free g->m
-	// when it is just in a register or thread-local storage.
-	mp.alllink = allm
-
-	// NumCgoCall() iterates over allm w/o schedlock,
-	// so we need to publish it safely.
-	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
-	unlock(&sched.lock)
-}
-
-// Mark gp ready to run.
-func ready(gp *g, traceskip int) {
-	if trace.enabled {
-		traceGoUnpark(gp, traceskip)
-	}
-
-	status := readgstatus(gp)
-
-	// Mark runnable.
-	_g_ := getg()
-	_g_.m.locks++ // disable preemption because it can be holding p in a local var
-	if status&^_Gscan != _Gwaiting {
-		dumpgstatus(gp)
-		throw("bad g->status in ready")
-	}
-
-	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
-	casgstatus(gp, _Gwaiting, _Grunnable)
-	runqput(_g_.m.p.ptr(), gp, true)
-	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic
-		wakep()
-	}
-	_g_.m.locks--
-	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
-		_g_.stackguard0 = stackPreempt
-	}
-}
-
-func gcprocs() int32 {
-	// Figure out how many CPUs to use during GC.
-	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	if n > sched.nmidle+1 { // one M is currently running
-		n = sched.nmidle + 1
-	}
-	unlock(&sched.lock)
-	return n
-}
-
-func needaddgcproc() bool {
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	n -= sched.nmidle + 1 // one M is currently running
-	unlock(&sched.lock)
-	return n > 0
-}
-
-func helpgc(nproc int32) {
-	_g_ := getg()
-	lock(&sched.lock)
-	pos := 0
-	for n := int32(1); n < nproc; n++ { // one M is currently running
-		if allp[pos].mcache == _g_.m.mcache {
-			pos++
-		}
-		mp := mget()
-		if mp == nil {
-			throw("gcprocs inconsistency")
-		}
-		mp.helpgc = n
-		mp.p.set(allp[pos])
-		mp.mcache = allp[pos].mcache
-		pos++
-		notewakeup(&mp.park)
-	}
-	unlock(&sched.lock)
-}
-
-// freezeStopWait is a large value that freezetheworld sets
-// sched.stopwait to in order to request that all Gs permanently stop.
-const freezeStopWait = 0x7fffffff
-
-// Similar to stopTheWorld but best-effort and can be called several times.
-// There is no reverse operation, used during crashing.
-// This function must not lock any mutexes.
-func freezetheworld() {
-	// stopwait and preemption requests can be lost
-	// due to races with concurrently executing threads,
-	// so try several times
-	for i := 0; i < 5; i++ {
-		// this should tell the scheduler to not start any new goroutines
-		sched.stopwait = freezeStopWait
-		atomicstore(&sched.gcwaiting, 1)
-		// this should stop running goroutines
-		if !preemptall() {
-			break // no running goroutines
-		}
-		usleep(1000)
-	}
-	// to be sure
-	usleep(1000)
-	preemptall()
-	usleep(1000)
-}
-
-func isscanstatus(status uint32) bool {
-	if status == _Gscan {
-		throw("isscanstatus: Bad status Gscan")
-	}
-	return status&_Gscan == _Gscan
-}
-
-// All reads and writes of g's status go through readgstatus, casgstatus
-// castogscanstatus, casfrom_Gscanstatus.
-//go:nosplit
-func readgstatus(gp *g) uint32 {
-	return atomicload(&gp.atomicstatus)
-}
-
-// Ownership of gscanvalid:
-//
-// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
-// then gp owns gp.gscanvalid, and other goroutines must not modify it.
-//
-// Otherwise, a second goroutine can lock the scan state by setting _Gscan
-// in the status bit and then modify gscanvalid, and then unlock the scan state.
-//
-// Note that the first condition implies an exception to the second:
-// if a second goroutine changes gp's status to _Grunning|_Gscan,
-// that second goroutine still does not have the right to modify gscanvalid.
-
-// The Gscanstatuses are acting like locks and this releases them.
-// If it proves to be a performance hit we should be able to make these
-// simple atomic stores but for now we are going to throw if
-// we see an inconsistent state.
-func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
-	success := false
-
-	// Check that transition is valid.
-	switch oldval {
-	default:
-		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
-		dumpgstatus(gp)
-		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
-	case _Gscanrunnable,
-		_Gscanwaiting,
-		_Gscanrunning,
-		_Gscansyscall:
-		if newval == oldval&^_Gscan {
-			success = cas(&gp.atomicstatus, oldval, newval)
-		}
-	case _Gscanenqueue:
-		if newval == _Gwaiting {
-			success = cas(&gp.atomicstatus, oldval, newval)
-		}
-	}
-	if !success {
-		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
-		dumpgstatus(gp)
-		throw("casfrom_Gscanstatus: gp->status is not in scan state")
-	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
-	}
-}
-
-// This will return false if the gp is not in the expected status and the cas fails.
-// This acts like a lock acquire while the casfromgstatus acts like a lock release.
-func castogscanstatus(gp *g, oldval, newval uint32) bool {
-	switch oldval {
-	case _Grunnable,
-		_Gwaiting,
-		_Gsyscall:
-		if newval == oldval|_Gscan {
-			return cas(&gp.atomicstatus, oldval, newval)
-		}
-	case _Grunning:
-		if newval == _Gscanrunning || newval == _Gscanenqueue {
-			return cas(&gp.atomicstatus, oldval, newval)
-		}
-	}
-	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
-	throw("castogscanstatus")
-	panic("not reached")
-}
-
-// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
-// and casfrom_Gscanstatus instead.
-// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
-// put it in the Gscan state is finished.
-//go:nosplit
-func casgstatus(gp *g, oldval, newval uint32) {
-	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
-		systemstack(func() {
-			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
-			throw("casgstatus: bad incoming values")
-		})
-	}
-
-	if oldval == _Grunning && gp.gcscanvalid {
-		// If oldvall == _Grunning, then the actual status must be
-		// _Grunning or _Grunning|_Gscan; either way,
-		// we own gp.gcscanvalid, so it's safe to read.
-		// gp.gcscanvalid must not be true when we are running.
-		print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
-		throw("casgstatus")
-	}
-
-	// loop if gp->atomicstatus is in a scan state giving
-	// GC time to finish and change the state to oldval.
-	for !cas(&gp.atomicstatus, oldval, newval) {
-		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
-			systemstack(func() {
-				throw("casgstatus: waiting for Gwaiting but is Grunnable")
-			})
-		}
-		// Help GC if needed.
-		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
-		// 	gp.preemptscan = false
-		// 	systemstack(func() {
-		// 		gcphasework(gp)
-		// 	})
-		// }
-	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
-	}
-}
-
-// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
-// Returns old status. Cannot call casgstatus directly, because we are racing with an
-// async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
-// it might have become Grunnable by the time we get to the cas. If we called casgstatus,
-// it would loop waiting for the status to go back to Gwaiting, which it never will.
-//go:nosplit
-func casgcopystack(gp *g) uint32 {
-	for {
-		oldstatus := readgstatus(gp) &^ _Gscan
-		if oldstatus != _Gwaiting && oldstatus != _Grunnable {
-			throw("copystack: bad status, not Gwaiting or Grunnable")
-		}
-		if cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
-			return oldstatus
-		}
-	}
-}
-
-// scang blocks until gp's stack has been scanned.
-// It might be scanned by scang or it might be scanned by the goroutine itself.
-// Either way, the stack scan has completed when scang returns.
-func scang(gp *g) {
-	// Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
-	// Nothing is racing with us now, but gcscandone might be set to true left over
-	// from an earlier round of stack scanning (we scan twice per GC).
-	// We use gcscandone to record whether the scan has been done during this round.
-	// It is important that the scan happens exactly once: if called twice,
-	// the installation of stack barriers will detect the double scan and die.
-
-	gp.gcscandone = false
-
-	// Endeavor to get gcscandone set to true,
-	// either by doing the stack scan ourselves or by coercing gp to scan itself.
-	// gp.gcscandone can transition from false to true when we're not looking
-	// (if we asked for preemption), so any time we lock the status using
-	// castogscanstatus we have to double-check that the scan is still not done.
-	for !gp.gcscandone {
-		switch s := readgstatus(gp); s {
-		default:
-			dumpgstatus(gp)
-			throw("stopg: invalid status")
-
-		case _Gdead:
-			// No stack.
-			gp.gcscandone = true
-
-		case _Gcopystack:
-			// Stack being switched. Go around again.
-
-		case _Grunnable, _Gsyscall, _Gwaiting:
-			// Claim goroutine by setting scan bit.
-			// Racing with execution or readying of gp.
-			// The scan bit keeps them from running
-			// the goroutine until we're done.
-			if castogscanstatus(gp, s, s|_Gscan) {
-				if !gp.gcscandone {
-					// Coordinate with traceback
-					// in sigprof.
-					for !cas(&gp.stackLock, 0, 1) {
-						osyield()
-					}
-					scanstack(gp)
-					atomicstore(&gp.stackLock, 0)
-					gp.gcscandone = true
-				}
-				restartg(gp)
-			}
-
-		case _Gscanwaiting:
-			// newstack is doing a scan for us right now. Wait.
-
-		case _Grunning:
-			// Goroutine running. Try to preempt execution so it can scan itself.
-			// The preemption handler (in newstack) does the actual scan.
-
-			// Optimization: if there is already a pending preemption request
-			// (from the previous loop iteration), don't bother with the atomics.
-			if gp.preemptscan && gp.preempt && gp.stackguard0 == stackPreempt {
-				break
-			}
-
-			// Ask for preemption and self scan.
-			if castogscanstatus(gp, _Grunning, _Gscanrunning) {
-				if !gp.gcscandone {
-					gp.preemptscan = true
-					gp.preempt = true
-					gp.stackguard0 = stackPreempt
-				}
-				casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
-			}
-		}
-	}
-
-	gp.preemptscan = false // cancel scan request if no longer needed
-}
-
-// The GC requests that this routine be moved from a scanmumble state to a mumble state.
-func restartg(gp *g) {
-	s := readgstatus(gp)
-	switch s {
-	default:
-		dumpgstatus(gp)
-		throw("restartg: unexpected status")
-
-	case _Gdead:
-		// ok
-
-	case _Gscanrunnable,
-		_Gscanwaiting,
-		_Gscansyscall:
-		casfrom_Gscanstatus(gp, s, s&^_Gscan)
-
-	// Scan is now completed.
-	// Goroutine now needs to be made runnable.
-	// We put it on the global run queue; ready blocks on the global scheduler lock.
-	case _Gscanenqueue:
-		casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting)
-		if gp != getg().m.curg {
-			throw("processing Gscanenqueue on wrong m")
-		}
-		dropg()
-		ready(gp, 0)
-	}
-}
-
-// stopTheWorld stops all P's from executing goroutines, interrupting
-// all goroutines at GC safe points and records reason as the reason
-// for the stop. On return, only the current goroutine's P is running.
-// stopTheWorld must not be called from a system stack and the caller
-// must not hold worldsema. The caller must call startTheWorld when
-// other P's should resume execution.
-//
-// stopTheWorld is safe for multiple goroutines to call at the
-// same time. Each will execute its own stop, and the stops will
-// be serialized.
-//
-// This is also used by routines that do stack dumps. If the system is
-// in panic or being exited, this may not reliably stop all
-// goroutines.
-func stopTheWorld(reason string) {
-	semacquire(&worldsema, false)
-	getg().m.preemptoff = reason
-	systemstack(stopTheWorldWithSema)
-}
-
-// startTheWorld undoes the effects of stopTheWorld.
-func startTheWorld() {
-	systemstack(startTheWorldWithSema)
-	// worldsema must be held over startTheWorldWithSema to ensure
-	// gomaxprocs cannot change while worldsema is held.
-	semrelease(&worldsema)
-	getg().m.preemptoff = ""
-}
-
-// Holding worldsema grants an M the right to try to stop the world
-// and prevents gomaxprocs from changing concurrently.
-var worldsema uint32 = 1
-
-// stopTheWorldWithSema is the core implementation of stopTheWorld.
-// The caller is responsible for acquiring worldsema and disabling
-// preemption first and then should stopTheWorldWithSema on the system
-// stack:
-//
-//	semacquire(&worldsema, false)
-//	m.preemptoff = "reason"
-//	systemstack(stopTheWorldWithSema)
-//
-// When finished, the caller must either call startTheWorld or undo
-// these three operations separately:
-//
-//	m.preemptoff = ""
-//	systemstack(startTheWorldWithSema)
-//	semrelease(&worldsema)
-//
-// It is allowed to acquire worldsema once and then execute multiple
-// startTheWorldWithSema/stopTheWorldWithSema pairs.
-// Other P's are able to execute between successive calls to
-// startTheWorldWithSema and stopTheWorldWithSema.
-// Holding worldsema causes any other goroutines invoking
-// stopTheWorld to block.
-func stopTheWorldWithSema() {
-	_g_ := getg()
-
-	// If we hold a lock, then we won't be able to stop another M
-	// that is blocked trying to acquire the lock.
-	if _g_.m.locks > 0 {
-		throw("stopTheWorld: holding locks")
-	}
-
-	lock(&sched.lock)
-	sched.stopwait = gomaxprocs
-	atomicstore(&sched.gcwaiting, 1)
-	preemptall()
-	// stop current P
-	_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
-	sched.stopwait--
-	// try to retake all P's in Psyscall status
-	for i := 0; i < int(gomaxprocs); i++ {
-		p := allp[i]
-		s := p.status
-		if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
-			if trace.enabled {
-				traceGoSysBlock(p)
-				traceProcStop(p)
-			}
-			p.syscalltick++
-			sched.stopwait--
-		}
-	}
-	// stop idle P's
-	for {
-		p := pidleget()
-		if p == nil {
-			break
-		}
-		p.status = _Pgcstop
-		sched.stopwait--
-	}
-	wait := sched.stopwait > 0
-	unlock(&sched.lock)
-
-	// wait for remaining P's to stop voluntarily
-	if wait {
-		for {
-			// wait for 100us, then try to re-preempt in case of any races
-			if notetsleep(&sched.stopnote, 100*1000) {
-				noteclear(&sched.stopnote)
-				break
-			}
-			preemptall()
-		}
-	}
-	if sched.stopwait != 0 {
-		throw("stopTheWorld: not stopped")
-	}
-	for i := 0; i < int(gomaxprocs); i++ {
-		p := allp[i]
-		if p.status != _Pgcstop {
-			throw("stopTheWorld: not stopped")
-		}
-	}
-}
-
-func mhelpgc() {
-	_g_ := getg()
-	_g_.m.helpgc = -1
-}
-
-func startTheWorldWithSema() {
-	_g_ := getg()
-
-	_g_.m.locks++        // disable preemption because it can be holding p in a local var
-	gp := netpoll(false) // non-blocking
-	injectglist(gp)
-	add := needaddgcproc()
-	lock(&sched.lock)
-
-	procs := gomaxprocs
-	if newprocs != 0 {
-		procs = newprocs
-		newprocs = 0
-	}
-	p1 := procresize(procs)
-	sched.gcwaiting = 0
-	if sched.sysmonwait != 0 {
-		sched.sysmonwait = 0
-		notewakeup(&sched.sysmonnote)
-	}
-	unlock(&sched.lock)
-
-	for p1 != nil {
-		p := p1
-		p1 = p1.link.ptr()
-		if p.m != 0 {
-			mp := p.m.ptr()
-			p.m = 0
-			if mp.nextp != 0 {
-				throw("startTheWorld: inconsistent mp->nextp")
-			}
-			mp.nextp.set(p)
-			notewakeup(&mp.park)
-		} else {
-			// Start M to run P.  Do not start another M below.
-			newm(nil, p)
-			add = false
-		}
-	}
-
-	// Wakeup an additional proc in case we have excessive runnable goroutines
-	// in local queues or in the global queue. If we don't, the proc will park itself.
-	// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
-	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 {
-		wakep()
-	}
-
-	if add {
-		// If GC could have used another helper proc, start one now,
-		// in the hope that it will be available next time.
-		// It would have been even better to start it before the collection,
-		// but doing so requires allocating memory, so it's tricky to
-		// coordinate.  This lazy approach works out in practice:
-		// we don't mind if the first couple gc rounds don't have quite
-		// the maximum number of procs.
-		newm(mhelpgc, nil)
-	}
-	_g_.m.locks--
-	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
-		_g_.stackguard0 = stackPreempt
-	}
-}
-
-// Called to start an M.
-//go:nosplit
-func mstart() {
-	_g_ := getg()
-
-	if _g_.stack.lo == 0 {
-		// Initialize stack bounds from system stack.
-		// Cgo may have left stack size in stack.hi.
-		size := _g_.stack.hi
-		if size == 0 {
-			size = 8192 * stackGuardMultiplier
-		}
-		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
-		_g_.stack.lo = _g_.stack.hi - size + 1024
-	}
-	// Initialize stack guards so that we can start calling
-	// both Go and C functions with stack growth prologues.
-	_g_.stackguard0 = _g_.stack.lo + _StackGuard
-	_g_.stackguard1 = _g_.stackguard0
-	mstart1()
-}
-
-func mstart1() {
-	_g_ := getg()
-
-	if _g_ != _g_.m.g0 {
-		throw("bad runtime·mstart")
-	}
-
-	// Record top of stack for use by mcall.
-	// Once we call schedule we're never coming back,
-	// so other calls can reuse this stack space.
-	gosave(&_g_.m.g0.sched)
-	_g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
-	asminit()
-	minit()
-
-	// Install signal handlers; after minit so that minit can
-	// prepare the thread to be able to handle the signals.
-	if _g_.m == &m0 {
-		// Create an extra M for callbacks on threads not created by Go.
-		if iscgo && !cgoHasExtraM {
-			cgoHasExtraM = true
-			newextram()
-		}
-		initsig()
-	}
-
-	if fn := _g_.m.mstartfn; fn != nil {
-		fn()
-	}
-
-	if _g_.m.helpgc != 0 {
-		_g_.m.helpgc = 0
-		stopm()
-	} else if _g_.m != &m0 {
-		acquirep(_g_.m.nextp.ptr())
-		_g_.m.nextp = 0
-	}
-	schedule()
-}
-
-// forEachP calls fn(p) for every P p when p reaches a GC safe point.
-// If a P is currently executing code, this will bring the P to a GC
-// safe point and execute fn on that P. If the P is not executing code
-// (it is idle or in a syscall), this will call fn(p) directly while
-// preventing the P from exiting its state. This does not ensure that
-// fn will run on every CPU executing Go code, but it acts as a global
-// memory barrier. GC uses this as a "ragged barrier."
-//
-// The caller must hold worldsema.
-func forEachP(fn func(*p)) {
-	mp := acquirem()
-	_p_ := getg().m.p.ptr()
-
-	lock(&sched.lock)
-	if sched.safePointWait != 0 {
-		throw("forEachP: sched.safePointWait != 0")
-	}
-	sched.safePointWait = gomaxprocs - 1
-	sched.safePointFn = fn
-
-	// Ask all Ps to run the safe point function.
-	for _, p := range allp[:gomaxprocs] {
-		if p != _p_ {
-			atomicstore(&p.runSafePointFn, 1)
-		}
-	}
-	preemptall()
-
-	// Any P entering _Pidle or _Psyscall from now on will observe
-	// p.runSafePointFn == 1 and will call runSafePointFn when
-	// changing its status to _Pidle/_Psyscall.
-
-	// Run safe point function for all idle Ps. sched.pidle will
-	// not change because we hold sched.lock.
-	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
-		if cas(&p.runSafePointFn, 1, 0) {
-			fn(p)
-			sched.safePointWait--
-		}
-	}
-
-	wait := sched.safePointWait > 0
-	unlock(&sched.lock)
-
-	// Run fn for the current P.
-	fn(_p_)
-
-	// Force Ps currently in _Psyscall into _Pidle and hand them
-	// off to induce safe point function execution.
-	for i := 0; i < int(gomaxprocs); i++ {
-		p := allp[i]
-		s := p.status
-		if s == _Psyscall && p.runSafePointFn == 1 && cas(&p.status, s, _Pidle) {
-			if trace.enabled {
-				traceGoSysBlock(p)
-				traceProcStop(p)
-			}
-			p.syscalltick++
-			handoffp(p)
-		}
-	}
-
-	// Wait for remaining Ps to run fn.
-	if wait {
-		for {
-			// Wait for 100us, then try to re-preempt in
-			// case of any races.
-			if notetsleep(&sched.safePointNote, 100*1000) {
-				noteclear(&sched.safePointNote)
-				break
-			}
-			preemptall()
-		}
-	}
-	if sched.safePointWait != 0 {
-		throw("forEachP: not done")
-	}
-	for i := 0; i < int(gomaxprocs); i++ {
-		p := allp[i]
-		if p.runSafePointFn != 0 {
-			throw("forEachP: P did not run fn")
-		}
-	}
-
-	lock(&sched.lock)
-	sched.safePointFn = nil
-	unlock(&sched.lock)
-	releasem(mp)
-}
-
-// runSafePointFn runs the safe point function, if any, for this P.
-// This should be called like
-//
-//     if getg().m.p.runSafePointFn != 0 {
-//         runSafePointFn()
-//     }
-//
-// runSafePointFn must be checked on any transition in to _Pidle or
-// _Psyscall to avoid a race where forEachP sees that the P is running
-// just before the P goes into _Pidle/_Psyscall and neither forEachP
-// nor the P run the safe-point function.
-func runSafePointFn() {
-	p := getg().m.p.ptr()
-	// Resolve the race between forEachP running the safe-point
-	// function on this P's behalf and this P running the
-	// safe-point function directly.
-	if !cas(&p.runSafePointFn, 1, 0) {
-		return
-	}
-	sched.safePointFn(p)
-	lock(&sched.lock)
-	sched.safePointWait--
-	if sched.safePointWait == 0 {
-		notewakeup(&sched.safePointNote)
-	}
-	unlock(&sched.lock)
-}
-
-// When running with cgo, we call _cgo_thread_start
-// to start threads for us so that we can play nicely with
-// foreign code.
-var cgoThreadStart unsafe.Pointer
-
-type cgothreadstart struct {
-	g   guintptr
-	tls *uint64
-	fn  unsafe.Pointer
-}
-
-// Allocate a new m unassociated with any thread.
-// Can use p for allocation context if needed.
-// fn is recorded as the new m's m.mstartfn.
-func allocm(_p_ *p, fn func()) *m {
-	_g_ := getg()
-	_g_.m.locks++ // disable GC because it can be called from sysmon
-	if _g_.m.p == 0 {
-		acquirep(_p_) // temporarily borrow p for mallocs in this function
-	}
-	mp := new(m)
-	mp.mstartfn = fn
-	mcommoninit(mp)
-
-	// In case of cgo or Solaris, pthread_create will make us a stack.
-	// Windows and Plan 9 will layout sched stack on OS stack.
-	if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" {
-		mp.g0 = malg(-1)
-	} else {
-		mp.g0 = malg(8192 * stackGuardMultiplier)
-	}
-	mp.g0.m = mp
-
-	if _p_ == _g_.m.p.ptr() {
-		releasep()
-	}
-	_g_.m.locks--
-	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
-		_g_.stackguard0 = stackPreempt
-	}
-
-	return mp
-}
-
-// needm is called when a cgo callback happens on a
-// thread without an m (a thread not created by Go).
-// In this case, needm is expected to find an m to use
-// and return with m, g initialized correctly.
-// Since m and g are not set now (likely nil, but see below)
-// needm is limited in what routines it can call. In particular
-// it can only call nosplit functions (textflag 7) and cannot
-// do any scheduling that requires an m.
-//
-// In order to avoid needing heavy lifting here, we adopt
-// the following strategy: there is a stack of available m's
-// that can be stolen. Using compare-and-swap
-// to pop from the stack has ABA races, so we simulate
-// a lock by doing an exchange (via casp) to steal the stack
-// head and replace the top pointer with MLOCKED (1).
-// This serves as a simple spin lock that we can use even
-// without an m. The thread that locks the stack in this way
-// unlocks the stack by storing a valid stack head pointer.
-//
-// In order to make sure that there is always an m structure
-// available to be stolen, we maintain the invariant that there
-// is always one more than needed. At the beginning of the
-// program (if cgo is in use) the list is seeded with a single m.
-// If needm finds that it has taken the last m off the list, its job
-// is - once it has installed its own m so that it can do things like
-// allocate memory - to create a spare m and put it on the list.
-//
-// Each of these extra m's also has a g0 and a curg that are
-// pressed into service as the scheduling stack and current
-// goroutine for the duration of the cgo callback.
-//
-// When the callback is done with the m, it calls dropm to
-// put the m back on the list.
-//go:nosplit
-func needm(x byte) {
-	if iscgo && !cgoHasExtraM {
-		// Can happen if C/C++ code calls Go from a global ctor.
-		// Can not throw, because scheduler is not initialized yet.
-		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
-		exit(1)
-	}
-
-	// Lock extra list, take head, unlock popped list.
-	// nilokay=false is safe here because of the invariant above,
-	// that the extra list always contains or will soon contain
-	// at least one m.
-	mp := lockextra(false)
-
-	// Set needextram when we've just emptied the list,
-	// so that the eventual call into cgocallbackg will
-	// allocate a new m for the extra list. We delay the
-	// allocation until then so that it can be done
-	// after exitsyscall makes sure it is okay to be
-	// running at all (that is, there's no garbage collection
-	// running right now).
-	mp.needextram = mp.schedlink == 0
-	unlockextra(mp.schedlink.ptr())
-
-	// Install g (= m->g0) and set the stack bounds
-	// to match the current stack. We don't actually know
-	// how big the stack is, like we don't know how big any
-	// scheduling stack is, but we assume there's at least 32 kB,
-	// which is more than enough for us.
-	setg(mp.g0)
-	_g_ := getg()
-	_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
-	_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
-	_g_.stackguard0 = _g_.stack.lo + _StackGuard
-
-	msigsave(mp)
-	// Initialize this thread to use the m.
-	asminit()
-	minit()
-}
-
-var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
-
-// newextram allocates an m and puts it on the extra list.
-// It is called with a working local m, so that it can do things
-// like call schedlock and allocate.
-func newextram() {
-	// Create extra goroutine locked to extra m.
-	// The goroutine is the context in which the cgo callback will run.
-	// The sched.pc will never be returned to, but setting it to
-	// goexit makes clear to the traceback routines where
-	// the goroutine stack ends.
-	mp := allocm(nil, nil)
-	gp := malg(4096)
-	gp.sched.pc = funcPC(goexit) + _PCQuantum
-	gp.sched.sp = gp.stack.hi
-	gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame
-	gp.sched.lr = 0
-	gp.sched.g = guintptr(unsafe.Pointer(gp))
-	gp.syscallpc = gp.sched.pc
-	gp.syscallsp = gp.sched.sp
-	gp.stktopsp = gp.sched.sp
-	// malg returns status as Gidle, change to Gsyscall before adding to allg
-	// where GC will see it.
-	casgstatus(gp, _Gidle, _Gsyscall)
-	gp.m = mp
-	mp.curg = gp
-	mp.locked = _LockInternal
-	mp.lockedg = gp
-	gp.lockedm = mp
-	gp.goid = int64(xadd64(&sched.goidgen, 1))
-	if raceenabled {
-		gp.racectx = racegostart(funcPC(newextram))
-	}
-	// put on allg for garbage collector
-	allgadd(gp)
-
-	// Add m to the extra list.
-	mnext := lockextra(true)
-	mp.schedlink.set(mnext)
-	unlockextra(mp)
-}
-
-// dropm is called when a cgo callback has called needm but is now
-// done with the callback and returning back into the non-Go thread.
-// It puts the current m back onto the extra list.
-//
-// The main expense here is the call to signalstack to release the
-// m's signal stack, and then the call to needm on the next callback
-// from this thread. It is tempting to try to save the m for next time,
-// which would eliminate both these costs, but there might not be
-// a next time: the current thread (which Go does not control) might exit.
-// If we saved the m for that thread, there would be an m leak each time
-// such a thread exited. Instead, we acquire and release an m on each
-// call. These should typically not be scheduling operations, just a few
-// atomics, so the cost should be small.
-//
-// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
-// variable using pthread_key_create. Unlike the pthread keys we already use
-// on OS X, this dummy key would never be read by Go code. It would exist
-// only so that we could register at thread-exit-time destructor.
-// That destructor would put the m back onto the extra list.
-// This is purely a performance optimization. The current version,
-// in which dropm happens on each cgo call, is still correct too.
-// We may have to keep the current version on systems with cgo
-// but without pthreads, like Windows.
-func dropm() {
-	// Undo whatever initialization minit did during needm.
-	unminit()
-
-	// Clear m and g, and return m to the extra list.
-	// After the call to setg we can only call nosplit functions
-	// with no pointer manipulation.
-	mp := getg().m
-	mnext := lockextra(true)
-	mp.schedlink.set(mnext)
-
-	setg(nil)
-	unlockextra(mp)
-}
-
-var extram uintptr
-
-// lockextra locks the extra list and returns the list head.
-// The caller must unlock the list by storing a new list head
-// to extram. If nilokay is true, then lockextra will
-// return a nil list head if that's what it finds. If nilokay is false,
-// lockextra will keep waiting until the list head is no longer nil.
-//go:nosplit
-func lockextra(nilokay bool) *m {
-	const locked = 1
-
-	for {
-		old := atomicloaduintptr(&extram)
-		if old == locked {
-			yield := osyield
-			yield()
-			continue
-		}
-		if old == 0 && !nilokay {
-			usleep(1)
-			continue
-		}
-		if casuintptr(&extram, old, locked) {
-			return (*m)(unsafe.Pointer(old))
-		}
-		yield := osyield
-		yield()
-		continue
-	}
-}
-
-//go:nosplit
-func unlockextra(mp *m) {
-	atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp)))
-}
-
-// Create a new m.  It will start off with a call to fn, or else the scheduler.
-// fn needs to be static and not a heap allocated closure.
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newm(fn func(), _p_ *p) {
-	mp := allocm(_p_, fn)
-	mp.nextp.set(_p_)
-	msigsave(mp)
-	if iscgo {
-		var ts cgothreadstart
-		if _cgo_thread_start == nil {
-			throw("_cgo_thread_start missing")
-		}
-		ts.g.set(mp.g0)
-		ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
-		ts.fn = unsafe.Pointer(funcPC(mstart))
-		asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
-		return
-	}
-	newosproc(mp, unsafe.Pointer(mp.g0.stack.hi))
-}
-
-// Stops execution of the current m until new work is available.
-// Returns with acquired P.
-func stopm() {
-	_g_ := getg()
-
-	if _g_.m.locks != 0 {
-		throw("stopm holding locks")
-	}
-	if _g_.m.p != 0 {
-		throw("stopm holding p")
-	}
-	if _g_.m.spinning {
-		_g_.m.spinning = false
-		xadd(&sched.nmspinning, -1)
-	}
-
-retry:
-	lock(&sched.lock)
-	mput(_g_.m)
-	unlock(&sched.lock)
-	notesleep(&_g_.m.park)
-	noteclear(&_g_.m.park)
-	if _g_.m.helpgc != 0 {
-		gchelper()
-		_g_.m.helpgc = 0
-		_g_.m.mcache = nil
-		_g_.m.p = 0
-		goto retry
-	}
-	acquirep(_g_.m.nextp.ptr())
-	_g_.m.nextp = 0
-}
-
-func mspinning() {
-	gp := getg()
-	if !runqempty(gp.m.nextp.ptr()) {
-		// Something (presumably the GC) was readied while the
-		// runtime was starting up this M, so the M is no
-		// longer spinning.
-		if int32(xadd(&sched.nmspinning, -1)) < 0 {
-			throw("mspinning: nmspinning underflowed")
-		}
-	} else {
-		gp.m.spinning = true
-	}
-}
-
-// Schedules some M to run the p (creates an M if necessary).
-// If p==nil, tries to get an idle P, if no idle P's does nothing.
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func startm(_p_ *p, spinning bool) {
-	lock(&sched.lock)
-	if _p_ == nil {
-		_p_ = pidleget()
-		if _p_ == nil {
-			unlock(&sched.lock)
-			if spinning {
-				xadd(&sched.nmspinning, -1)
-			}
-			return
-		}
-	}
-	mp := mget()
-	unlock(&sched.lock)
-	if mp == nil {
-		var fn func()
-		if spinning {
-			fn = mspinning
-		}
-		newm(fn, _p_)
-		return
-	}
-	if mp.spinning {
-		throw("startm: m is spinning")
-	}
-	if mp.nextp != 0 {
-		throw("startm: m has p")
-	}
-	if spinning && !runqempty(_p_) {
-		throw("startm: p has runnable gs")
-	}
-	mp.spinning = spinning
-	mp.nextp.set(_p_)
-	notewakeup(&mp.park)
-}
-
-// Hands off P from syscall or locked M.
-// Always runs without a P, so write barriers are not allowed.
-//go:nowritebarrier
-func handoffp(_p_ *p) {
-	// if it has local work, start it straight away
-	if !runqempty(_p_) || sched.runqsize != 0 {
-		startm(_p_, false)
-		return
-	}
-	// no local work, check that there are no spinning/idle M's,
-	// otherwise our help is not required
-	if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
-		startm(_p_, true)
-		return
-	}
-	lock(&sched.lock)
-	if sched.gcwaiting != 0 {
-		_p_.status = _Pgcstop
-		sched.stopwait--
-		if sched.stopwait == 0 {
-			notewakeup(&sched.stopnote)
-		}
-		unlock(&sched.lock)
-		return
-	}
-	if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) {
-		sched.safePointFn(_p_)
-		sched.safePointWait--
-		if sched.safePointWait == 0 {
-			notewakeup(&sched.safePointNote)
-		}
-	}
-	if sched.runqsize != 0 {
-		unlock(&sched.lock)
-		startm(_p_, false)
-		return
-	}
-	// If this is the last running P and nobody is polling network,
-	// need to wakeup another M to poll network.
-	if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 {
-		unlock(&sched.lock)
-		startm(_p_, false)
-		return
-	}
-	pidleput(_p_)
-	unlock(&sched.lock)
-}
-
-// Tries to add one more P to execute G's.
-// Called when a G is made runnable (newproc, ready).
-func wakep() {
-	// be conservative about spinning threads
-	if !cas(&sched.nmspinning, 0, 1) {
-		return
-	}
-	startm(nil, true)
-}
-
-// Stops execution of the current m that is locked to a g until the g is runnable again.
-// Returns with acquired P.
-func stoplockedm() {
-	_g_ := getg()
-
-	if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
-		throw("stoplockedm: inconsistent locking")
-	}
-	if _g_.m.p != 0 {
-		// Schedule another M to run this p.
-		_p_ := releasep()
-		handoffp(_p_)
-	}
-	incidlelocked(1)
-	// Wait until another thread schedules lockedg again.
-	notesleep(&_g_.m.park)
-	noteclear(&_g_.m.park)
-	status := readgstatus(_g_.m.lockedg)
-	if status&^_Gscan != _Grunnable {
-		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
-		dumpgstatus(_g_)
-		throw("stoplockedm: not runnable")
-	}
-	acquirep(_g_.m.nextp.ptr())
-	_g_.m.nextp = 0
-}
-
-// Schedules the locked m to run the locked gp.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func startlockedm(gp *g) {
-	_g_ := getg()
-
-	mp := gp.lockedm
-	if mp == _g_.m {
-		throw("startlockedm: locked to me")
-	}
-	if mp.nextp != 0 {
-		throw("startlockedm: m has p")
-	}
-	// directly handoff current P to the locked m
-	incidlelocked(-1)
-	_p_ := releasep()
-	mp.nextp.set(_p_)
-	notewakeup(&mp.park)
-	stopm()
-}
-
-// Stops the current m for stopTheWorld.
-// Returns when the world is restarted.
-func gcstopm() {
-	_g_ := getg()
-
-	if sched.gcwaiting == 0 {
-		throw("gcstopm: not waiting for gc")
-	}
-	if _g_.m.spinning {
-		_g_.m.spinning = false
-		xadd(&sched.nmspinning, -1)
-	}
-	_p_ := releasep()
-	lock(&sched.lock)
-	_p_.status = _Pgcstop
-	sched.stopwait--
-	if sched.stopwait == 0 {
-		notewakeup(&sched.stopnote)
-	}
-	unlock(&sched.lock)
-	stopm()
-}
-
-// Schedules gp to run on the current M.
-// If inheritTime is true, gp inherits the remaining time in the
-// current time slice. Otherwise, it starts a new time slice.
-// Never returns.
-func execute(gp *g, inheritTime bool) {
-	_g_ := getg()
-
-	casgstatus(gp, _Grunnable, _Grunning)
-	gp.waitsince = 0
-	gp.preempt = false
-	gp.stackguard0 = gp.stack.lo + _StackGuard
-	if !inheritTime {
-		_g_.m.p.ptr().schedtick++
-	}
-	_g_.m.curg = gp
-	gp.m = _g_.m
-
-	// Check whether the profiler needs to be turned on or off.
-	hz := sched.profilehz
-	if _g_.m.profilehz != hz {
-		resetcpuprofiler(hz)
-	}
-
-	if trace.enabled {
-		// GoSysExit has to happen when we have a P, but before GoStart.
-		// So we emit it here.
-		if gp.syscallsp != 0 && gp.sysblocktraced {
-			// Since gp.sysblocktraced is true, we must emit an event.
-			// There is a race between the code that initializes sysexitseq
-			// and sysexitticks (in exitsyscall, which runs without a P,
-			// and therefore is not stopped with the rest of the world)
-			// and the code that initializes a new trace.
-			// The recorded sysexitseq and sysexitticks must therefore
-			// be treated as "best effort". If they are valid for this trace,
-			// then great, use them for greater accuracy.
-			// But if they're not valid for this trace, assume that the
-			// trace was started after the actual syscall exit (but before
-			// we actually managed to start the goroutine, aka right now),
-			// and assign a fresh time stamp to keep the log consistent.
-			seq, ts := gp.sysexitseq, gp.sysexitticks
-			if seq == 0 || int64(seq)-int64(trace.seqStart) < 0 {
-				seq, ts = tracestamp()
-			}
-			traceGoSysExit(seq, ts)
-		}
-		traceGoStart()
-	}
-
-	gogo(&gp.sched)
-}
-
-// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
-func findrunnable() (gp *g, inheritTime bool) {
-	_g_ := getg()
-
-top:
-	if sched.gcwaiting != 0 {
-		gcstopm()
-		goto top
-	}
-	if _g_.m.p.ptr().runSafePointFn != 0 {
-		runSafePointFn()
-	}
-	if fingwait && fingwake {
-		if gp := wakefing(); gp != nil {
-			ready(gp, 0)
-		}
-	}
-
-	// local runq
-	if gp, inheritTime := runqget(_g_.m.p.ptr()); gp != nil {
-		return gp, inheritTime
-	}
-
-	// global runq
-	if sched.runqsize != 0 {
-		lock(&sched.lock)
-		gp := globrunqget(_g_.m.p.ptr(), 0)
-		unlock(&sched.lock)
-		if gp != nil {
-			return gp, false
-		}
-	}
-
-	// Poll network.
-	// This netpoll is only an optimization before we resort to stealing.
-	// We can safely skip it if there a thread blocked in netpoll already.
-	// If there is any kind of logical race with that blocked thread
-	// (e.g. it has already returned from netpoll, but does not set lastpoll yet),
-	// this thread will do blocking netpoll below anyway.
-	if netpollinited() && sched.lastpoll != 0 {
-		if gp := netpoll(false); gp != nil { // non-blocking
-			// netpoll returns list of goroutines linked by schedlink.
-			injectglist(gp.schedlink.ptr())
-			casgstatus(gp, _Gwaiting, _Grunnable)
-			if trace.enabled {
-				traceGoUnpark(gp, 0)
-			}
-			return gp, false
-		}
-	}
-
-	// If number of spinning M's >= number of busy P's, block.
-	// This is necessary to prevent excessive CPU consumption
-	// when GOMAXPROCS>>1 but the program parallelism is low.
-	if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic
-		goto stop
-	}
-	if !_g_.m.spinning {
-		_g_.m.spinning = true
-		xadd(&sched.nmspinning, 1)
-	}
-	// random steal from other P's
-	for i := 0; i < int(4*gomaxprocs); i++ {
-		if sched.gcwaiting != 0 {
-			goto top
-		}
-		_p_ := allp[fastrand1()%uint32(gomaxprocs)]
-		var gp *g
-		if _p_ == _g_.m.p.ptr() {
-			gp, _ = runqget(_p_)
-		} else {
-			stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g
-			gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG)
-		}
-		if gp != nil {
-			return gp, false
-		}
-	}
-
-stop:
-
-	// We have nothing to do. If we're in the GC mark phase and can
-	// safely scan and blacken objects, run idle-time marking
-	// rather than give up the P.
-	if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil && gcMarkWorkAvailable(_p_) {
-		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
-		gp := _p_.gcBgMarkWorker
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		if trace.enabled {
-			traceGoUnpark(gp, 0)
-		}
-		return gp, false
-	}
-
-	// return P and block
-	lock(&sched.lock)
-	if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 {
-		unlock(&sched.lock)
-		goto top
-	}
-	if sched.runqsize != 0 {
-		gp := globrunqget(_g_.m.p.ptr(), 0)
-		unlock(&sched.lock)
-		return gp, false
-	}
-	_p_ := releasep()
-	pidleput(_p_)
-	unlock(&sched.lock)
-	if _g_.m.spinning {
-		_g_.m.spinning = false
-		xadd(&sched.nmspinning, -1)
-	}
-
-	// check all runqueues once again
-	for i := 0; i < int(gomaxprocs); i++ {
-		_p_ := allp[i]
-		if _p_ != nil && !runqempty(_p_) {
-			lock(&sched.lock)
-			_p_ = pidleget()
-			unlock(&sched.lock)
-			if _p_ != nil {
-				acquirep(_p_)
-				goto top
-			}
-			break
-		}
-	}
-
-	// poll network
-	if netpollinited() && xchg64(&sched.lastpoll, 0) != 0 {
-		if _g_.m.p != 0 {
-			throw("findrunnable: netpoll with p")
-		}
-		if _g_.m.spinning {
-			throw("findrunnable: netpoll with spinning")
-		}
-		gp := netpoll(true) // block until new work is available
-		atomicstore64(&sched.lastpoll, uint64(nanotime()))
-		if gp != nil {
-			lock(&sched.lock)
-			_p_ = pidleget()
-			unlock(&sched.lock)
-			if _p_ != nil {
-				acquirep(_p_)
-				injectglist(gp.schedlink.ptr())
-				casgstatus(gp, _Gwaiting, _Grunnable)
-				if trace.enabled {
-					traceGoUnpark(gp, 0)
-				}
-				return gp, false
-			}
-			injectglist(gp)
-		}
-	}
-	stopm()
-	goto top
-}
-
-func resetspinning() {
-	_g_ := getg()
-
-	var nmspinning uint32
-	if _g_.m.spinning {
-		_g_.m.spinning = false
-		nmspinning = xadd(&sched.nmspinning, -1)
-		if int32(nmspinning) < 0 {
-			throw("findrunnable: negative nmspinning")
-		}
-	} else {
-		nmspinning = atomicload(&sched.nmspinning)
-	}
-
-	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
-	// so see if we need to wakeup another P here.
-	if nmspinning == 0 && atomicload(&sched.npidle) > 0 {
-		wakep()
-	}
-}
-
-// Injects the list of runnable G's into the scheduler.
-// Can run concurrently with GC.
-func injectglist(glist *g) {
-	if glist == nil {
-		return
-	}
-	if trace.enabled {
-		for gp := glist; gp != nil; gp = gp.schedlink.ptr() {
-			traceGoUnpark(gp, 0)
-		}
-	}
-	lock(&sched.lock)
-	var n int
-	for n = 0; glist != nil; n++ {
-		gp := glist
-		glist = gp.schedlink.ptr()
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		globrunqput(gp)
-	}
-	unlock(&sched.lock)
-	for ; n != 0 && sched.npidle != 0; n-- {
-		startm(nil, false)
-	}
-}
-
-// One round of scheduler: find a runnable goroutine and execute it.
-// Never returns.
-func schedule() {
-	_g_ := getg()
-
-	if _g_.m.locks != 0 {
-		throw("schedule: holding locks")
-	}
-
-	if _g_.m.lockedg != nil {
-		stoplockedm()
-		execute(_g_.m.lockedg, false) // Never returns.
-	}
-
-top:
-	if sched.gcwaiting != 0 {
-		gcstopm()
-		goto top
-	}
-	if _g_.m.p.ptr().runSafePointFn != 0 {
-		runSafePointFn()
-	}
-
-	var gp *g
-	var inheritTime bool
-	if trace.enabled || trace.shutdown {
-		gp = traceReader()
-		if gp != nil {
-			casgstatus(gp, _Gwaiting, _Grunnable)
-			traceGoUnpark(gp, 0)
-			resetspinning()
-		}
-	}
-	if gp == nil && gcBlackenEnabled != 0 {
-		gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
-		if gp != nil {
-			resetspinning()
-		}
-	}
-	if gp == nil {
-		// Check the global runnable queue once in a while to ensure fairness.
-		// Otherwise two goroutines can completely occupy the local runqueue
-		// by constantly respawning each other.
-		if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
-			lock(&sched.lock)
-			gp = globrunqget(_g_.m.p.ptr(), 1)
-			unlock(&sched.lock)
-			if gp != nil {
-				resetspinning()
-			}
-		}
-	}
-	if gp == nil {
-		gp, inheritTime = runqget(_g_.m.p.ptr())
-		if gp != nil && _g_.m.spinning {
-			throw("schedule: spinning with local work")
-		}
-	}
-	if gp == nil {
-		gp, inheritTime = findrunnable() // blocks until work is available
-		resetspinning()
-	}
-
-	if gp.lockedm != nil {
-		// Hands off own p to the locked m,
-		// then blocks waiting for a new p.
-		startlockedm(gp)
-		goto top
-	}
-
-	execute(gp, inheritTime)
-}
-
-// dropg removes the association between m and the current goroutine m->curg (gp for short).
-// Typically a caller sets gp's status away from Grunning and then
-// immediately calls dropg to finish the job. The caller is also responsible
-// for arranging that gp will be restarted using ready at an
-// appropriate time. After calling dropg and arranging for gp to be
-// readied later, the caller can do other work but eventually should
-// call schedule to restart the scheduling of goroutines on this m.
-func dropg() {
-	_g_ := getg()
-
-	if _g_.m.lockedg == nil {
-		_g_.m.curg.m = nil
-		_g_.m.curg = nil
-	}
-}
-
-func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
-	unlock((*mutex)(lock))
-	return true
-}
-
-// park continuation on g0.
-func park_m(gp *g) {
-	_g_ := getg()
-
-	if trace.enabled {
-		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip, gp)
-	}
-
-	casgstatus(gp, _Grunning, _Gwaiting)
-	dropg()
-
-	if _g_.m.waitunlockf != nil {
-		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
-		ok := fn(gp, _g_.m.waitlock)
-		_g_.m.waitunlockf = nil
-		_g_.m.waitlock = nil
-		if !ok {
-			if trace.enabled {
-				traceGoUnpark(gp, 2)
-			}
-			casgstatus(gp, _Gwaiting, _Grunnable)
-			execute(gp, true) // Schedule it back, never returns.
-		}
-	}
-	schedule()
-}
-
-func goschedImpl(gp *g) {
-	status := readgstatus(gp)
-	if status&^_Gscan != _Grunning {
-		dumpgstatus(gp)
-		throw("bad g status")
-	}
-	casgstatus(gp, _Grunning, _Grunnable)
-	dropg()
-	lock(&sched.lock)
-	globrunqput(gp)
-	unlock(&sched.lock)
-
-	schedule()
-}
-
-// Gosched continuation on g0.
-func gosched_m(gp *g) {
-	if trace.enabled {
-		traceGoSched()
-	}
-	goschedImpl(gp)
-}
-
-func gopreempt_m(gp *g) {
-	if trace.enabled {
-		traceGoPreempt()
-	}
-	goschedImpl(gp)
-}
-
-// Finishes execution of the current goroutine.
-func goexit1() {
-	if raceenabled {
-		racegoend()
-	}
-	if trace.enabled {
-		traceGoEnd()
-	}
-	mcall(goexit0)
-}
-
-// goexit continuation on g0.
-func goexit0(gp *g) {
-	_g_ := getg()
-
-	casgstatus(gp, _Grunning, _Gdead)
-	gp.m = nil
-	gp.lockedm = nil
-	_g_.m.lockedg = nil
-	gp.paniconfault = false
-	gp._defer = nil // should be true already but just in case.
-	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
-	gp.writebuf = nil
-	gp.waitreason = ""
-	gp.param = nil
-
-	dropg()
-
-	if _g_.m.locked&^_LockExternal != 0 {
-		print("invalid m->locked = ", _g_.m.locked, "\n")
-		throw("internal lockOSThread error")
-	}
-	_g_.m.locked = 0
-	gfput(_g_.m.p.ptr(), gp)
-	schedule()
-}
-
-//go:nosplit
-//go:nowritebarrier
-func save(pc, sp uintptr) {
-	_g_ := getg()
-
-	_g_.sched.pc = pc
-	_g_.sched.sp = sp
-	_g_.sched.lr = 0
-	_g_.sched.ret = 0
-	_g_.sched.ctxt = nil
-	_g_.sched.g = guintptr(unsafe.Pointer(_g_))
-}
-
-// The goroutine g is about to enter a system call.
-// Record that it's not using the cpu anymore.
-// This is called only from the go syscall library and cgocall,
-// not from the low-level system calls used by the
-//
-// Entersyscall cannot split the stack: the gosave must
-// make g->sched refer to the caller's stack segment, because
-// entersyscall is going to return immediately after.
-//
-// Nothing entersyscall calls can split the stack either.
-// We cannot safely move the stack during an active call to syscall,
-// because we do not know which of the uintptr arguments are
-// really pointers (back into the stack).
-// In practice, this means that we make the fast path run through
-// entersyscall doing no-split things, and the slow path has to use systemstack
-// to run bigger things on the system stack.
-//
-// reentersyscall is the entry point used by cgo callbacks, where explicitly
-// saved SP and PC are restored. This is needed when exitsyscall will be called
-// from a function further up in the call stack than the parent, as g->syscallsp
-// must always point to a valid stack frame. entersyscall below is the normal
-// entry point for syscalls, which obtains the SP and PC from the caller.
-//
-// Syscall tracing:
-// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
-// If the syscall does not block, that is it, we do not emit any other events.
-// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
-// when syscall returns we emit traceGoSysExit and when the goroutine starts running
-// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
-// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
-// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
-// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
-// and we wait for the increment before emitting traceGoSysExit.
-// Note that the increment is done even if tracing is not enabled,
-// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
-//
-//go:nosplit
-func reentersyscall(pc, sp uintptr) {
-	_g_ := getg()
-
-	// Disable preemption because during this function g is in Gsyscall status,
-	// but can have inconsistent g->sched, do not let GC observe it.
-	_g_.m.locks++
-
-	if trace.enabled {
-		systemstack(traceGoSysCall)
-	}
-
-	// Entersyscall must not call any function that might split/grow the stack.
-	// (See details in comment above.)
-	// Catch calls that might, by replacing the stack guard with something that
-	// will trip any stack check and leaving a flag to tell newstack to die.
-	_g_.stackguard0 = stackPreempt
-	_g_.throwsplit = true
-
-	// Leave SP around for GC and traceback.
-	save(pc, sp)
-	_g_.syscallsp = sp
-	_g_.syscallpc = pc
-	casgstatus(_g_, _Grunning, _Gsyscall)
-	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
-		systemstack(func() {
-			print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
-			throw("entersyscall")
-		})
-	}
-
-	if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
-		systemstack(entersyscall_sysmon)
-		save(pc, sp)
-	}
-
-	if _g_.m.p.ptr().runSafePointFn != 0 {
-		// runSafePointFn may stack split if run on this stack
-		systemstack(runSafePointFn)
-		save(pc, sp)
-	}
-
-	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
-	_g_.sysblocktraced = true
-	_g_.m.mcache = nil
-	_g_.m.p.ptr().m = 0
-	atomicstore(&_g_.m.p.ptr().status, _Psyscall)
-	if sched.gcwaiting != 0 {
-		systemstack(entersyscall_gcwait)
-		save(pc, sp)
-	}
-
-	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
-	// We set _StackGuard to StackPreempt so that first split stack check calls morestack.
-	// Morestack detects this case and throws.
-	_g_.stackguard0 = stackPreempt
-	_g_.m.locks--
-}
-
-// Standard syscall entry used by the go syscall library and normal cgo calls.
-//go:nosplit
-func entersyscall(dummy int32) {
-	reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
-}
-
-func entersyscall_sysmon() {
-	lock(&sched.lock)
-	if atomicload(&sched.sysmonwait) != 0 {
-		atomicstore(&sched.sysmonwait, 0)
-		notewakeup(&sched.sysmonnote)
-	}
-	unlock(&sched.lock)
-}
-
-func entersyscall_gcwait() {
-	_g_ := getg()
-	_p_ := _g_.m.p.ptr()
-
-	lock(&sched.lock)
-	if sched.stopwait > 0 && cas(&_p_.status, _Psyscall, _Pgcstop) {
-		if trace.enabled {
-			traceGoSysBlock(_p_)
-			traceProcStop(_p_)
-		}
-		_p_.syscalltick++
-		if sched.stopwait--; sched.stopwait == 0 {
-			notewakeup(&sched.stopnote)
-		}
-	}
-	unlock(&sched.lock)
-}
-
-// The same as entersyscall(), but with a hint that the syscall is blocking.
-//go:nosplit
-func entersyscallblock(dummy int32) {
-	_g_ := getg()
-
-	_g_.m.locks++ // see comment in entersyscall
-	_g_.throwsplit = true
-	_g_.stackguard0 = stackPreempt // see comment in entersyscall
-	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
-	_g_.sysblocktraced = true
-	_g_.m.p.ptr().syscalltick++
-
-	// Leave SP around for GC and traceback.
-	pc := getcallerpc(unsafe.Pointer(&dummy))
-	sp := getcallersp(unsafe.Pointer(&dummy))
-	save(pc, sp)
-	_g_.syscallsp = _g_.sched.sp
-	_g_.syscallpc = _g_.sched.pc
-	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
-		sp1 := sp
-		sp2 := _g_.sched.sp
-		sp3 := _g_.syscallsp
-		systemstack(func() {
-			print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
-			throw("entersyscallblock")
-		})
-	}
-	casgstatus(_g_, _Grunning, _Gsyscall)
-	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
-		systemstack(func() {
-			print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
-			throw("entersyscallblock")
-		})
-	}
-
-	systemstack(entersyscallblock_handoff)
-
-	// Resave for traceback during blocked call.
-	save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
-
-	_g_.m.locks--
-}
-
-func entersyscallblock_handoff() {
-	if trace.enabled {
-		traceGoSysCall()
-		traceGoSysBlock(getg().m.p.ptr())
-	}
-	handoffp(releasep())
-}
-
-// The goroutine g exited its system call.
-// Arrange for it to run on a cpu again.
-// This is called only from the go syscall library, not
-// from the low-level system calls used by the
-//go:nosplit
-func exitsyscall(dummy int32) {
-	_g_ := getg()
-
-	_g_.m.locks++ // see comment in entersyscall
-	if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp {
-		throw("exitsyscall: syscall frame is no longer valid")
-	}
-
-	_g_.waitsince = 0
-	oldp := _g_.m.p.ptr()
-	if exitsyscallfast() {
-		if _g_.m.mcache == nil {
-			throw("lost mcache")
-		}
-		if trace.enabled {
-			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
-				systemstack(traceGoStart)
-			}
-		}
-		// There's a cpu for us, so we can run.
-		_g_.m.p.ptr().syscalltick++
-		// We need to cas the status and scan before resuming...
-		casgstatus(_g_, _Gsyscall, _Grunning)
-
-		// Garbage collector isn't running (since we are),
-		// so okay to clear syscallsp.
-		_g_.syscallsp = 0
-		_g_.m.locks--
-		if _g_.preempt {
-			// restore the preemption request in case we've cleared it in newstack
-			_g_.stackguard0 = stackPreempt
-		} else {
-			// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
-			_g_.stackguard0 = _g_.stack.lo + _StackGuard
-		}
-		_g_.throwsplit = false
-		return
-	}
-
-	_g_.sysexitticks = 0
-	_g_.sysexitseq = 0
-	if trace.enabled {
-		// Wait till traceGoSysBlock event is emitted.
-		// This ensures consistency of the trace (the goroutine is started after it is blocked).
-		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
-			osyield()
-		}
-		// We can't trace syscall exit right now because we don't have a P.
-		// Tracing code can invoke write barriers that cannot run without a P.
-		// So instead we remember the syscall exit time and emit the event
-		// in execute when we have a P.
-		_g_.sysexitseq, _g_.sysexitticks = tracestamp()
-	}
-
-	_g_.m.locks--
-
-	// Call the scheduler.
-	mcall(exitsyscall0)
-
-	if _g_.m.mcache == nil {
-		throw("lost mcache")
-	}
-
-	// Scheduler returned, so we're allowed to run now.
-	// Delete the syscallsp information that we left for
-	// the garbage collector during the system call.
-	// Must wait until now because until gosched returns
-	// we don't know for sure that the garbage collector
-	// is not running.
-	_g_.syscallsp = 0
-	_g_.m.p.ptr().syscalltick++
-	_g_.throwsplit = false
-}
-
-//go:nosplit
-func exitsyscallfast() bool {
-	_g_ := getg()
-
-	// Freezetheworld sets stopwait but does not retake P's.
-	if sched.stopwait == freezeStopWait {
-		_g_.m.mcache = nil
-		_g_.m.p = 0
-		return false
-	}
-
-	// Try to re-acquire the last P.
-	if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
-		// There's a cpu for us, so we can run.
-		_g_.m.mcache = _g_.m.p.ptr().mcache
-		_g_.m.p.ptr().m.set(_g_.m)
-		if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
-			if trace.enabled {
-				// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
-				// traceGoSysBlock for this syscall was already emitted,
-				// but here we effectively retake the p from the new syscall running on the same p.
-				systemstack(func() {
-					// Denote blocking of the new syscall.
-					traceGoSysBlock(_g_.m.p.ptr())
-					// Denote completion of the current syscall.
-					traceGoSysExit(tracestamp())
-				})
-			}
-			_g_.m.p.ptr().syscalltick++
-		}
-		return true
-	}
-
-	// Try to get any other idle P.
-	oldp := _g_.m.p.ptr()
-	_g_.m.mcache = nil
-	_g_.m.p = 0
-	if sched.pidle != 0 {
-		var ok bool
-		systemstack(func() {
-			ok = exitsyscallfast_pidle()
-			if ok && trace.enabled {
-				if oldp != nil {
-					// Wait till traceGoSysBlock event is emitted.
-					// This ensures consistency of the trace (the goroutine is started after it is blocked).
-					for oldp.syscalltick == _g_.m.syscalltick {
-						osyield()
-					}
-				}
-				traceGoSysExit(tracestamp())
-			}
-		})
-		if ok {
-			return true
-		}
-	}
-	return false
-}
-
-func exitsyscallfast_pidle() bool {
-	lock(&sched.lock)
-	_p_ := pidleget()
-	if _p_ != nil && atomicload(&sched.sysmonwait) != 0 {
-		atomicstore(&sched.sysmonwait, 0)
-		notewakeup(&sched.sysmonnote)
-	}
-	unlock(&sched.lock)
-	if _p_ != nil {
-		acquirep(_p_)
-		return true
-	}
-	return false
-}
-
-// exitsyscall slow path on g0.
-// Failed to acquire P, enqueue gp as runnable.
-func exitsyscall0(gp *g) {
-	_g_ := getg()
-
-	casgstatus(gp, _Gsyscall, _Grunnable)
-	dropg()
-	lock(&sched.lock)
-	_p_ := pidleget()
-	if _p_ == nil {
-		globrunqput(gp)
-	} else if atomicload(&sched.sysmonwait) != 0 {
-		atomicstore(&sched.sysmonwait, 0)
-		notewakeup(&sched.sysmonnote)
-	}
-	unlock(&sched.lock)
-	if _p_ != nil {
-		acquirep(_p_)
-		execute(gp, false) // Never returns.
-	}
-	if _g_.m.lockedg != nil {
-		// Wait until another thread schedules gp and so m again.
-		stoplockedm()
-		execute(gp, false) // Never returns.
-	}
-	stopm()
-	schedule() // Never returns.
-}
-
-func beforefork() {
-	gp := getg().m.curg
-
-	// Fork can hang if preempted with signals frequently enough (see issue 5517).
-	// Ensure that we stay on the same M where we disable profiling.
-	gp.m.locks++
-	if gp.m.profilehz != 0 {
-		resetcpuprofiler(0)
-	}
-
-	// This function is called before fork in syscall package.
-	// Code between fork and exec must not allocate memory nor even try to grow stack.
-	// Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
-	// runtime_AfterFork will undo this in parent process, but not in child.
-	gp.stackguard0 = stackFork
-}
-
-// Called from syscall package before fork.
-//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
-//go:nosplit
-func syscall_runtime_BeforeFork() {
-	systemstack(beforefork)
-}
-
-func afterfork() {
-	gp := getg().m.curg
-
-	// See the comment in beforefork.
-	gp.stackguard0 = gp.stack.lo + _StackGuard
-
-	hz := sched.profilehz
-	if hz != 0 {
-		resetcpuprofiler(hz)
-	}
-	gp.m.locks--
-}
-
-// Called from syscall package after fork in parent.
-//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
-//go:nosplit
-func syscall_runtime_AfterFork() {
-	systemstack(afterfork)
-}
-
-// Allocate a new g, with a stack big enough for stacksize bytes.
-func malg(stacksize int32) *g {
-	newg := new(g)
-	if stacksize >= 0 {
-		stacksize = round2(_StackSystem + stacksize)
-		systemstack(func() {
-			newg.stack, newg.stkbar = stackalloc(uint32(stacksize))
-		})
-		newg.stackguard0 = newg.stack.lo + _StackGuard
-		newg.stackguard1 = ^uintptr(0)
-		newg.stackAlloc = uintptr(stacksize)
-	}
-	return newg
-}
-
-// Create a new g running fn with siz bytes of arguments.
-// Put it on the queue of g's waiting to run.
-// The compiler turns a go statement into a call to this.
-// Cannot split the stack because it assumes that the arguments
-// are available sequentially after &fn; they would not be
-// copied if a stack split occurred.
-//go:nosplit
-func newproc(siz int32, fn *funcval) {
-	argp := add(unsafe.Pointer(&fn), ptrSize)
-	pc := getcallerpc(unsafe.Pointer(&siz))
-	systemstack(func() {
-		newproc1(fn, (*uint8)(argp), siz, 0, pc)
-	})
-}
-
-// Create a new g running fn with narg bytes of arguments starting
-// at argp and returning nret bytes of results.  callerpc is the
-// address of the go statement that created this.  The new g is put
-// on the queue of g's waiting to run.
-func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
-	_g_ := getg()
-
-	if fn == nil {
-		_g_.m.throwing = -1 // do not dump full stacks
-		throw("go of nil func value")
-	}
-	_g_.m.locks++ // disable preemption because it can be holding p in a local var
-	siz := narg + nret
-	siz = (siz + 7) &^ 7
-
-	// We could allocate a larger initial stack if necessary.
-	// Not worth it: this is almost always an error.
-	// 4*sizeof(uintreg): extra space added below
-	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
-	if siz >= _StackMin-4*regSize-regSize {
-		throw("newproc: function arguments too large for new goroutine")
-	}
-
-	_p_ := _g_.m.p.ptr()
-	newg := gfget(_p_)
-	if newg == nil {
-		newg = malg(_StackMin)
-		casgstatus(newg, _Gidle, _Gdead)
-		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
-	}
-	if newg.stack.hi == 0 {
-		throw("newproc1: newg missing stack")
-	}
-
-	if readgstatus(newg) != _Gdead {
-		throw("newproc1: new g is not Gdead")
-	}
-
-	totalSize := 4*regSize + uintptr(siz) // extra space in case of reads slightly beyond frame
-	if hasLinkRegister {
-		totalSize += ptrSize
-	}
-	totalSize += -totalSize & (spAlign - 1) // align to spAlign
-	sp := newg.stack.hi - totalSize
-	spArg := sp
-	if hasLinkRegister {
-		// caller's LR
-		*(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
-		spArg += ptrSize
-	}
-	memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
-
-	memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
-	newg.sched.sp = sp
-	newg.stktopsp = sp
-	newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function
-	newg.sched.g = guintptr(unsafe.Pointer(newg))
-	gostartcallfn(&newg.sched, fn)
-	newg.gopc = callerpc
-	newg.startpc = fn.fn
-	casgstatus(newg, _Gdead, _Grunnable)
-
-	if _p_.goidcache == _p_.goidcacheend {
-		// Sched.goidgen is the last allocated id,
-		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
-		// At startup sched.goidgen=0, so main goroutine receives goid=1.
-		_p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch)
-		_p_.goidcache -= _GoidCacheBatch - 1
-		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
-	}
-	newg.goid = int64(_p_.goidcache)
-	_p_.goidcache++
-	if raceenabled {
-		newg.racectx = racegostart(callerpc)
-	}
-	if trace.enabled {
-		traceGoCreate(newg, newg.startpc)
-	}
-	runqput(_p_, newg, true)
-
-	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
-		wakep()
-	}
-	_g_.m.locks--
-	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
-		_g_.stackguard0 = stackPreempt
-	}
-	return newg
-}
-
-// Put on gfree list.
-// If local list is too long, transfer a batch to the global list.
-func gfput(_p_ *p, gp *g) {
-	if readgstatus(gp) != _Gdead {
-		throw("gfput: bad status (not Gdead)")
-	}
-
-	stksize := gp.stackAlloc
-
-	if stksize != _FixedStack {
-		// non-standard stack size - free it.
-		stackfree(gp.stack, gp.stackAlloc)
-		gp.stack.lo = 0
-		gp.stack.hi = 0
-		gp.stackguard0 = 0
-		gp.stkbar = nil
-		gp.stkbarPos = 0
-	} else {
-		// Reset stack barriers.
-		gp.stkbar = gp.stkbar[:0]
-		gp.stkbarPos = 0
-	}
-
-	gp.schedlink.set(_p_.gfree)
-	_p_.gfree = gp
-	_p_.gfreecnt++
-	if _p_.gfreecnt >= 64 {
-		lock(&sched.gflock)
-		for _p_.gfreecnt >= 32 {
-			_p_.gfreecnt--
-			gp = _p_.gfree
-			_p_.gfree = gp.schedlink.ptr()
-			gp.schedlink.set(sched.gfree)
-			sched.gfree = gp
-			sched.ngfree++
-		}
-		unlock(&sched.gflock)
-	}
-}
-
-// Get from gfree list.
-// If local list is empty, grab a batch from global list.
-func gfget(_p_ *p) *g {
-retry:
-	gp := _p_.gfree
-	if gp == nil && sched.gfree != nil {
-		lock(&sched.gflock)
-		for _p_.gfreecnt < 32 && sched.gfree != nil {
-			_p_.gfreecnt++
-			gp = sched.gfree
-			sched.gfree = gp.schedlink.ptr()
-			sched.ngfree--
-			gp.schedlink.set(_p_.gfree)
-			_p_.gfree = gp
-		}
-		unlock(&sched.gflock)
-		goto retry
-	}
-	if gp != nil {
-		_p_.gfree = gp.schedlink.ptr()
-		_p_.gfreecnt--
-		if gp.stack.lo == 0 {
-			// Stack was deallocated in gfput.  Allocate a new one.
-			systemstack(func() {
-				gp.stack, gp.stkbar = stackalloc(_FixedStack)
-			})
-			gp.stackguard0 = gp.stack.lo + _StackGuard
-			gp.stackAlloc = _FixedStack
-		} else {
-			if raceenabled {
-				racemalloc(unsafe.Pointer(gp.stack.lo), gp.stackAlloc)
-			}
-		}
-	}
-	return gp
-}
-
-// Purge all cached G's from gfree list to the global list.
-func gfpurge(_p_ *p) {
-	lock(&sched.gflock)
-	for _p_.gfreecnt != 0 {
-		_p_.gfreecnt--
-		gp := _p_.gfree
-		_p_.gfree = gp.schedlink.ptr()
-		gp.schedlink.set(sched.gfree)
-		sched.gfree = gp
-		sched.ngfree++
-	}
-	unlock(&sched.gflock)
-}
-
-// Breakpoint executes a breakpoint trap.
-func Breakpoint() {
-	breakpoint()
-}
-
-// dolockOSThread is called by LockOSThread and lockOSThread below
-// after they modify m.locked. Do not allow preemption during this call,
-// or else the m might be different in this function than in the caller.
-//go:nosplit
-func dolockOSThread() {
-	_g_ := getg()
-	_g_.m.lockedg = _g_
-	_g_.lockedm = _g_.m
-}
-
-//go:nosplit
-
-// LockOSThread wires the calling goroutine to its current operating system thread.
-// Until the calling goroutine exits or calls UnlockOSThread, it will always
-// execute in that thread, and no other goroutine can.
-func LockOSThread() {
-	getg().m.locked |= _LockExternal
-	dolockOSThread()
-}
-
-//go:nosplit
-func lockOSThread() {
-	getg().m.locked += _LockInternal
-	dolockOSThread()
-}
-
-// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
-// after they update m->locked. Do not allow preemption during this call,
-// or else the m might be in different in this function than in the caller.
-//go:nosplit
-func dounlockOSThread() {
-	_g_ := getg()
-	if _g_.m.locked != 0 {
-		return
-	}
-	_g_.m.lockedg = nil
-	_g_.lockedm = nil
-}
-
-//go:nosplit
-
-// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
-// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
-func UnlockOSThread() {
-	getg().m.locked &^= _LockExternal
-	dounlockOSThread()
-}
-
-//go:nosplit
-func unlockOSThread() {
-	_g_ := getg()
-	if _g_.m.locked < _LockInternal {
-		systemstack(badunlockosthread)
-	}
-	_g_.m.locked -= _LockInternal
-	dounlockOSThread()
-}
-
-func badunlockosthread() {
-	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
-}
-
-func gcount() int32 {
-	n := int32(allglen) - sched.ngfree
-	for i := 0; ; i++ {
-		_p_ := allp[i]
-		if _p_ == nil {
-			break
-		}
-		n -= _p_.gfreecnt
-	}
-
-	// All these variables can be changed concurrently, so the result can be inconsistent.
-	// But at least the current goroutine is running.
-	if n < 1 {
-		n = 1
-	}
-	return n
-}
-
-func mcount() int32 {
-	return sched.mcount
-}
-
-var prof struct {
-	lock uint32
-	hz   int32
-}
-
-func _System()       { _System() }
-func _ExternalCode() { _ExternalCode() }
-func _GC()           { _GC() }
-
-// Called if we receive a SIGPROF signal.
-func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
-	if prof.hz == 0 {
-		return
-	}
-
-	// Profiling runs concurrently with GC, so it must not allocate.
-	mp.mallocing++
-
-	// Coordinate with stack barrier insertion in scanstack.
-	for !cas(&gp.stackLock, 0, 1) {
-		osyield()
-	}
-
-	// Define that a "user g" is a user-created goroutine, and a "system g"
-	// is one that is m->g0 or m->gsignal.
-	//
-	// We might be interrupted for profiling halfway through a
-	// goroutine switch. The switch involves updating three (or four) values:
-	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
-	// because once it gets updated the new g is running.
-	//
-	// When switching from a user g to a system g, LR is not considered live,
-	// so the update only affects g, SP, and PC. Since PC must be last, there
-	// the possible partial transitions in ordinary execution are (1) g alone is updated,
-	// (2) both g and SP are updated, and (3) SP alone is updated.
-	// If SP or g alone is updated, we can detect the partial transition by checking
-	// whether the SP is within g's stack bounds. (We could also require that SP
-	// be changed only after g, but the stack bounds check is needed by other
-	// cases, so there is no need to impose an additional requirement.)
-	//
-	// There is one exceptional transition to a system g, not in ordinary execution.
-	// When a signal arrives, the operating system starts the signal handler running
-	// with an updated PC and SP. The g is updated last, at the beginning of the
-	// handler. There are two reasons this is okay. First, until g is updated the
-	// g and SP do not match, so the stack bounds check detects the partial transition.
-	// Second, signal handlers currently run with signals disabled, so a profiling
-	// signal cannot arrive during the handler.
-	//
-	// When switching from a system g to a user g, there are three possibilities.
-	//
-	// First, it may be that the g switch has no PC update, because the SP
-	// either corresponds to a user g throughout (as in asmcgocall)
-	// or because it has been arranged to look like a user g frame
-	// (as in cgocallback_gofunc). In this case, since the entire
-	// transition is a g+SP update, a partial transition updating just one of
-	// those will be detected by the stack bounds check.
-	//
-	// Second, when returning from a signal handler, the PC and SP updates
-	// are performed by the operating system in an atomic update, so the g
-	// update must be done before them. The stack bounds check detects
-	// the partial transition here, and (again) signal handlers run with signals
-	// disabled, so a profiling signal cannot arrive then anyway.
-	//
-	// Third, the common case: it may be that the switch updates g, SP, and PC
-	// separately. If the PC is within any of the functions that does this,
-	// we don't ask for a traceback. C.F. the function setsSP for more about this.
-	//
-	// There is another apparently viable approach, recorded here in case
-	// the "PC within setsSP function" check turns out not to be usable.
-	// It would be possible to delay the update of either g or SP until immediately
-	// before the PC update instruction. Then, because of the stack bounds check,
-	// the only problematic interrupt point is just before that PC update instruction,
-	// and the sigprof handler can detect that instruction and simulate stepping past
-	// it in order to reach a consistent state. On ARM, the update of g must be made
-	// in two places (in R10 and also in a TLS slot), so the delayed update would
-	// need to be the SP update. The sigprof handler must read the instruction at
-	// the current PC and if it was the known instruction (for example, JMP BX or
-	// MOV R2, PC), use that other register in place of the PC value.
-	// The biggest drawback to this solution is that it requires that we can tell
-	// whether it's safe to read from the memory pointed at by PC.
-	// In a correct program, we can test PC == nil and otherwise read,
-	// but if a profiling signal happens at the instant that a program executes
-	// a bad jump (before the program manages to handle the resulting fault)
-	// the profiling handler could fault trying to read nonexistent memory.
-	//
-	// To recap, there are no constraints on the assembly being used for the
-	// transition. We simply require that g and SP match and that the PC is not
-	// in gogo.
-	traceback := true
-	if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
-		traceback = false
-	}
-	var stk [maxCPUProfStack]uintptr
-	n := 0
-	if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
-		// Cgo, we can't unwind and symbolize arbitrary C code,
-		// so instead collect Go stack that leads to the cgo call.
-		// This is especially important on windows, since all syscalls are cgo calls.
-		n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
-	} else if traceback {
-		n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
-	}
-	if !traceback || n <= 0 {
-		// Normal traceback is impossible or has failed.
-		// See if it falls into several common cases.
-		n = 0
-		if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
-			// Libcall, i.e. runtime syscall on windows.
-			// Collect Go stack that leads to the call.
-			n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
-		}
-		if n == 0 {
-			// If all of the above has failed, account it against abstract "System" or "GC".
-			n = 2
-			// "ExternalCode" is better than "etext".
-			if pc > firstmoduledata.etext {
-				pc = funcPC(_ExternalCode) + _PCQuantum
-			}
-			stk[0] = pc
-			if mp.preemptoff != "" || mp.helpgc != 0 {
-				stk[1] = funcPC(_GC) + _PCQuantum
-			} else {
-				stk[1] = funcPC(_System) + _PCQuantum
-			}
-		}
-	}
-	atomicstore(&gp.stackLock, 0)
-
-	if prof.hz != 0 {
-		// Simple cas-lock to coordinate with setcpuprofilerate.
-		for !cas(&prof.lock, 0, 1) {
-			osyield()
-		}
-		if prof.hz != 0 {
-			cpuprof.add(stk[:n])
-		}
-		atomicstore(&prof.lock, 0)
-	}
-	mp.mallocing--
-}
-
-// Reports whether a function will set the SP
-// to an absolute value. Important that
-// we don't traceback when these are at the bottom
-// of the stack since we can't be sure that we will
-// find the caller.
-//
-// If the function is not on the bottom of the stack
-// we assume that it will have set it up so that traceback will be consistent,
-// either by being a traceback terminating function
-// or putting one on the stack at the right offset.
-func setsSP(pc uintptr) bool {
-	f := findfunc(pc)
-	if f == nil {
-		// couldn't find the function for this PC,
-		// so assume the worst and stop traceback
-		return true
-	}
-	switch f.entry {
-	case gogoPC, systemstackPC, mcallPC, morestackPC:
-		return true
-	}
-	return false
-}
-
-// Arrange to call fn with a traceback hz times a second.
-func setcpuprofilerate_m(hz int32) {
-	// Force sane arguments.
-	if hz < 0 {
-		hz = 0
-	}
-
-	// Disable preemption, otherwise we can be rescheduled to another thread
-	// that has profiling enabled.
-	_g_ := getg()
-	_g_.m.locks++
-
-	// Stop profiler on this thread so that it is safe to lock prof.
-	// if a profiling signal came in while we had prof locked,
-	// it would deadlock.
-	resetcpuprofiler(0)
-
-	for !cas(&prof.lock, 0, 1) {
-		osyield()
-	}
-	prof.hz = hz
-	atomicstore(&prof.lock, 0)
-
-	lock(&sched.lock)
-	sched.profilehz = hz
-	unlock(&sched.lock)
-
-	if hz != 0 {
-		resetcpuprofiler(hz)
-	}
-
-	_g_.m.locks--
-}
-
-// Change number of processors.  The world is stopped, sched is locked.
-// gcworkbufs are not being modified by either the GC or
-// the write barrier code.
-// Returns list of Ps with local work, they need to be scheduled by the caller.
-func procresize(nprocs int32) *p {
-	old := gomaxprocs
-	if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs {
-		throw("procresize: invalid arg")
-	}
-	if trace.enabled {
-		traceGomaxprocs(nprocs)
-	}
-
-	// update statistics
-	now := nanotime()
-	if sched.procresizetime != 0 {
-		sched.totaltime += int64(old) * (now - sched.procresizetime)
-	}
-	sched.procresizetime = now
-
-	// initialize new P's
-	for i := int32(0); i < nprocs; i++ {
-		pp := allp[i]
-		if pp == nil {
-			pp = new(p)
-			pp.id = i
-			pp.status = _Pgcstop
-			pp.sudogcache = pp.sudogbuf[:0]
-			for i := range pp.deferpool {
-				pp.deferpool[i] = pp.deferpoolbuf[i][:0]
-			}
-			atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
-		}
-		if pp.mcache == nil {
-			if old == 0 && i == 0 {
-				if getg().m.mcache == nil {
-					throw("missing mcache?")
-				}
-				pp.mcache = getg().m.mcache // bootstrap
-			} else {
-				pp.mcache = allocmcache()
-			}
-		}
-	}
-
-	// free unused P's
-	for i := nprocs; i < old; i++ {
-		p := allp[i]
-		if trace.enabled {
-			if p == getg().m.p.ptr() {
-				// moving to p[0], pretend that we were descheduled
-				// and then scheduled again to keep the trace sane.
-				traceGoSched()
-				traceProcStop(p)
-			}
-		}
-		// move all runnable goroutines to the global queue
-		for p.runqhead != p.runqtail {
-			// pop from tail of local queue
-			p.runqtail--
-			gp := p.runq[p.runqtail%uint32(len(p.runq))]
-			// push onto head of global queue
-			globrunqputhead(gp)
-		}
-		if p.runnext != 0 {
-			globrunqputhead(p.runnext.ptr())
-			p.runnext = 0
-		}
-		// if there's a background worker, make it runnable and put
-		// it on the global queue so it can clean itself up
-		if p.gcBgMarkWorker != nil {
-			casgstatus(p.gcBgMarkWorker, _Gwaiting, _Grunnable)
-			if trace.enabled {
-				traceGoUnpark(p.gcBgMarkWorker, 0)
-			}
-			globrunqput(p.gcBgMarkWorker)
-			p.gcBgMarkWorker = nil
-		}
-		for i := range p.sudogbuf {
-			p.sudogbuf[i] = nil
-		}
-		p.sudogcache = p.sudogbuf[:0]
-		for i := range p.deferpool {
-			for j := range p.deferpoolbuf[i] {
-				p.deferpoolbuf[i][j] = nil
-			}
-			p.deferpool[i] = p.deferpoolbuf[i][:0]
-		}
-		freemcache(p.mcache)
-		p.mcache = nil
-		gfpurge(p)
-		traceProcFree(p)
-		p.status = _Pdead
-		// can't free P itself because it can be referenced by an M in syscall
-	}
-
-	_g_ := getg()
-	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
-		// continue to use the current P
-		_g_.m.p.ptr().status = _Prunning
-	} else {
-		// release the current P and acquire allp[0]
-		if _g_.m.p != 0 {
-			_g_.m.p.ptr().m = 0
-		}
-		_g_.m.p = 0
-		_g_.m.mcache = nil
-		p := allp[0]
-		p.m = 0
-		p.status = _Pidle
-		acquirep(p)
-		if trace.enabled {
-			traceGoStart()
-		}
-	}
-	var runnablePs *p
-	for i := nprocs - 1; i >= 0; i-- {
-		p := allp[i]
-		if _g_.m.p.ptr() == p {
-			continue
-		}
-		p.status = _Pidle
-		if runqempty(p) {
-			pidleput(p)
-		} else {
-			p.m.set(mget())
-			p.link.set(runnablePs)
-			runnablePs = p
-		}
-	}
-	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
-	atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
-	return runnablePs
-}
-
-// Associate p and the current m.
-func acquirep(_p_ *p) {
-	acquirep1(_p_)
-
-	// have p; write barriers now allowed
-	_g_ := getg()
-	_g_.m.mcache = _p_.mcache
-
-	if trace.enabled {
-		traceProcStart()
-	}
-}
-
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func acquirep1(_p_ *p) {
-	_g_ := getg()
-
-	if _g_.m.p != 0 || _g_.m.mcache != nil {
-		throw("acquirep: already in go")
-	}
-	if _p_.m != 0 || _p_.status != _Pidle {
-		id := int32(0)
-		if _p_.m != 0 {
-			id = _p_.m.ptr().id
-		}
-		print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
-		throw("acquirep: invalid p state")
-	}
-	_g_.m.p.set(_p_)
-	_p_.m.set(_g_.m)
-	_p_.status = _Prunning
-}
-
-// Disassociate p and the current m.
-func releasep() *p {
-	_g_ := getg()
-
-	if _g_.m.p == 0 || _g_.m.mcache == nil {
-		throw("releasep: invalid arg")
-	}
-	_p_ := _g_.m.p.ptr()
-	if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
-		print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
-		throw("releasep: invalid p state")
-	}
-	if trace.enabled {
-		traceProcStop(_g_.m.p.ptr())
-	}
-	_g_.m.p = 0
-	_g_.m.mcache = nil
-	_p_.m = 0
-	_p_.status = _Pidle
-	return _p_
-}
-
-func incidlelocked(v int32) {
-	lock(&sched.lock)
-	sched.nmidlelocked += v
-	if v > 0 {
-		checkdead()
-	}
-	unlock(&sched.lock)
-}
-
-// Check for deadlock situation.
-// The check is based on number of running M's, if 0 -> deadlock.
-func checkdead() {
-	// For -buildmode=c-shared or -buildmode=c-archive it's OK if
-	// there are no running goroutines.  The calling program is
-	// assumed to be running.
-	if islibrary || isarchive {
-		return
-	}
-
-	// If we are dying because of a signal caught on an already idle thread,
-	// freezetheworld will cause all running threads to block.
-	// And runtime will essentially enter into deadlock state,
-	// except that there is a thread that will call exit soon.
-	if panicking > 0 {
-		return
-	}
-
-	// -1 for sysmon
-	run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
-	if run > 0 {
-		return
-	}
-	if run < 0 {
-		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
-		throw("checkdead: inconsistent counts")
-	}
-
-	grunning := 0
-	lock(&allglock)
-	for i := 0; i < len(allgs); i++ {
-		gp := allgs[i]
-		if isSystemGoroutine(gp) {
-			continue
-		}
-		s := readgstatus(gp)
-		switch s &^ _Gscan {
-		case _Gwaiting:
-			grunning++
-		case _Grunnable,
-			_Grunning,
-			_Gsyscall:
-			unlock(&allglock)
-			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
-			throw("checkdead: runnable g")
-		}
-	}
-	unlock(&allglock)
-	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
-		throw("no goroutines (main called runtime.Goexit) - deadlock!")
-	}
-
-	// Maybe jump time forward for playground.
-	gp := timejump()
-	if gp != nil {
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		globrunqput(gp)
-		_p_ := pidleget()
-		if _p_ == nil {
-			throw("checkdead: no p for timer")
-		}
-		mp := mget()
-		if mp == nil {
-			newm(nil, _p_)
-		} else {
-			mp.nextp.set(_p_)
-			notewakeup(&mp.park)
-		}
-		return
-	}
-
-	getg().m.throwing = -1 // do not dump full stacks
-	throw("all goroutines are asleep - deadlock!")
-}
-
-func sysmon() {
-	// If we go two minutes without a garbage collection, force one to run.
-	forcegcperiod := int64(2 * 60 * 1e9)
-
-	// If a heap span goes unused for 5 minutes after a garbage collection,
-	// we hand it back to the operating system.
-	scavengelimit := int64(5 * 60 * 1e9)
-
-	if debug.scavenge > 0 {
-		// Scavenge-a-lot for testing.
-		forcegcperiod = 10 * 1e6
-		scavengelimit = 20 * 1e6
-	}
-
-	lastscavenge := nanotime()
-	nscavenge := 0
-
-	// Make wake-up period small enough for the sampling to be correct.
-	maxsleep := forcegcperiod / 2
-	if scavengelimit < forcegcperiod {
-		maxsleep = scavengelimit / 2
-	}
-
-	lasttrace := int64(0)
-	idle := 0 // how many cycles in succession we had not wokeup somebody
-	delay := uint32(0)
-	for {
-		if idle == 0 { // start with 20us sleep...
-			delay = 20
-		} else if idle > 50 { // start doubling the sleep after 1ms...
-			delay *= 2
-		}
-		if delay > 10*1000 { // up to 10ms
-			delay = 10 * 1000
-		}
-		usleep(delay)
-		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
-			lock(&sched.lock)
-			if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) {
-				atomicstore(&sched.sysmonwait, 1)
-				unlock(&sched.lock)
-				notetsleep(&sched.sysmonnote, maxsleep)
-				lock(&sched.lock)
-				atomicstore(&sched.sysmonwait, 0)
-				noteclear(&sched.sysmonnote)
-				idle = 0
-				delay = 20
-			}
-			unlock(&sched.lock)
-		}
-		// poll network if not polled for more than 10ms
-		lastpoll := int64(atomicload64(&sched.lastpoll))
-		now := nanotime()
-		unixnow := unixnanotime()
-		if lastpoll != 0 && lastpoll+10*1000*1000 < now {
-			cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
-			gp := netpoll(false) // non-blocking - returns list of goroutines
-			if gp != nil {
-				// Need to decrement number of idle locked M's
-				// (pretending that one more is running) before injectglist.
-				// Otherwise it can lead to the following situation:
-				// injectglist grabs all P's but before it starts M's to run the P's,
-				// another M returns from syscall, finishes running its G,
-				// observes that there is no work to do and no other running M's
-				// and reports deadlock.
-				incidlelocked(-1)
-				injectglist(gp)
-				incidlelocked(1)
-			}
-		}
-		// retake P's blocked in syscalls
-		// and preempt long running G's
-		if retake(now) != 0 {
-			idle = 0
-		} else {
-			idle++
-		}
-		// check if we need to force a GC
-		lastgc := int64(atomicload64(&memstats.last_gc))
-		if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 && atomicloaduint(&bggc.working) == 0 {
-			lock(&forcegc.lock)
-			forcegc.idle = 0
-			forcegc.g.schedlink = 0
-			injectglist(forcegc.g)
-			unlock(&forcegc.lock)
-		}
-		// scavenge heap once in a while
-		if lastscavenge+scavengelimit/2 < now {
-			mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
-			lastscavenge = now
-			nscavenge++
-		}
-		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now {
-			lasttrace = now
-			schedtrace(debug.scheddetail > 0)
-		}
-	}
-}
-
-var pdesc [_MaxGomaxprocs]struct {
-	schedtick   uint32
-	schedwhen   int64
-	syscalltick uint32
-	syscallwhen int64
-}
-
-// forcePreemptNS is the time slice given to a G before it is
-// preempted.
-const forcePreemptNS = 10 * 1000 * 1000 // 10ms
-
-func retake(now int64) uint32 {
-	n := 0
-	for i := int32(0); i < gomaxprocs; i++ {
-		_p_ := allp[i]
-		if _p_ == nil {
-			continue
-		}
-		pd := &pdesc[i]
-		s := _p_.status
-		if s == _Psyscall {
-			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
-			t := int64(_p_.syscalltick)
-			if int64(pd.syscalltick) != t {
-				pd.syscalltick = uint32(t)
-				pd.syscallwhen = now
-				continue
-			}
-			// On the one hand we don't want to retake Ps if there is no other work to do,
-			// but on the other hand we want to retake them eventually
-			// because they can prevent the sysmon thread from deep sleep.
-			if runqempty(_p_) && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
-				continue
-			}
-			// Need to decrement number of idle locked M's
-			// (pretending that one more is running) before the CAS.
-			// Otherwise the M from which we retake can exit the syscall,
-			// increment nmidle and report deadlock.
-			incidlelocked(-1)
-			if cas(&_p_.status, s, _Pidle) {
-				if trace.enabled {
-					traceGoSysBlock(_p_)
-					traceProcStop(_p_)
-				}
-				n++
-				_p_.syscalltick++
-				handoffp(_p_)
-			}
-			incidlelocked(1)
-		} else if s == _Prunning {
-			// Preempt G if it's running for too long.
-			t := int64(_p_.schedtick)
-			if int64(pd.schedtick) != t {
-				pd.schedtick = uint32(t)
-				pd.schedwhen = now
-				continue
-			}
-			if pd.schedwhen+forcePreemptNS > now {
-				continue
-			}
-			preemptone(_p_)
-		}
-	}
-	return uint32(n)
-}
-
-// Tell all goroutines that they have been preempted and they should stop.
-// This function is purely best-effort.  It can fail to inform a goroutine if a
-// processor just started running it.
-// No locks need to be held.
-// Returns true if preemption request was issued to at least one goroutine.
-func preemptall() bool {
-	res := false
-	for i := int32(0); i < gomaxprocs; i++ {
-		_p_ := allp[i]
-		if _p_ == nil || _p_.status != _Prunning {
-			continue
-		}
-		if preemptone(_p_) {
-			res = true
-		}
-	}
-	return res
-}
-
-// Tell the goroutine running on processor P to stop.
-// This function is purely best-effort.  It can incorrectly fail to inform the
-// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
-// correct goroutine, that goroutine might ignore the request if it is
-// simultaneously executing newstack.
-// No lock needs to be held.
-// Returns true if preemption request was issued.
-// The actual preemption will happen at some point in the future
-// and will be indicated by the gp->status no longer being
-// Grunning
-func preemptone(_p_ *p) bool {
-	mp := _p_.m.ptr()
-	if mp == nil || mp == getg().m {
-		return false
-	}
-	gp := mp.curg
-	if gp == nil || gp == mp.g0 {
-		return false
-	}
-
-	gp.preempt = true
-
-	// Every call in a go routine checks for stack overflow by
-	// comparing the current stack pointer to gp->stackguard0.
-	// Setting gp->stackguard0 to StackPreempt folds
-	// preemption into the normal stack overflow check.
-	gp.stackguard0 = stackPreempt
-	return true
-}
-
-var starttime int64
-
-func schedtrace(detailed bool) {
-	now := nanotime()
-	if starttime == 0 {
-		starttime = now
-	}
-
-	lock(&sched.lock)
-	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
-	if detailed {
-		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
-	}
-	// We must be careful while reading data from P's, M's and G's.
-	// Even if we hold schedlock, most data can be changed concurrently.
-	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
-	for i := int32(0); i < gomaxprocs; i++ {
-		_p_ := allp[i]
-		if _p_ == nil {
-			continue
-		}
-		mp := _p_.m.ptr()
-		h := atomicload(&_p_.runqhead)
-		t := atomicload(&_p_.runqtail)
-		if detailed {
-			id := int32(-1)
-			if mp != nil {
-				id = mp.id
-			}
-			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
-		} else {
-			// In non-detailed mode format lengths of per-P run queues as:
-			// [len1 len2 len3 len4]
-			print(" ")
-			if i == 0 {
-				print("[")
-			}
-			print(t - h)
-			if i == gomaxprocs-1 {
-				print("]\n")
-			}
-		}
-	}
-
-	if !detailed {
-		unlock(&sched.lock)
-		return
-	}
-
-	for mp := allm; mp != nil; mp = mp.alllink {
-		_p_ := mp.p.ptr()
-		gp := mp.curg
-		lockedg := mp.lockedg
-		id1 := int32(-1)
-		if _p_ != nil {
-			id1 = _p_.id
-		}
-		id2 := int64(-1)
-		if gp != nil {
-			id2 = gp.goid
-		}
-		id3 := int64(-1)
-		if lockedg != nil {
-			id3 = lockedg.goid
-		}
-		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n")
-	}
-
-	lock(&allglock)
-	for gi := 0; gi < len(allgs); gi++ {
-		gp := allgs[gi]
-		mp := gp.m
-		lockedm := gp.lockedm
-		id1 := int32(-1)
-		if mp != nil {
-			id1 = mp.id
-		}
-		id2 := int32(-1)
-		if lockedm != nil {
-			id2 = lockedm.id
-		}
-		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
-	}
-	unlock(&allglock)
-	unlock(&sched.lock)
-}
-
-// Put mp on midle list.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func mput(mp *m) {
-	mp.schedlink = sched.midle
-	sched.midle.set(mp)
-	sched.nmidle++
-	checkdead()
-}
-
-// Try to get an m from midle list.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func mget() *m {
-	mp := sched.midle.ptr()
-	if mp != nil {
-		sched.midle = mp.schedlink
-		sched.nmidle--
-	}
-	return mp
-}
-
-// Put gp on the global runnable queue.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func globrunqput(gp *g) {
-	gp.schedlink = 0
-	if sched.runqtail != 0 {
-		sched.runqtail.ptr().schedlink.set(gp)
-	} else {
-		sched.runqhead.set(gp)
-	}
-	sched.runqtail.set(gp)
-	sched.runqsize++
-}
-
-// Put gp at the head of the global runnable queue.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func globrunqputhead(gp *g) {
-	gp.schedlink = sched.runqhead
-	sched.runqhead.set(gp)
-	if sched.runqtail == 0 {
-		sched.runqtail.set(gp)
-	}
-	sched.runqsize++
-}
-
-// Put a batch of runnable goroutines on the global runnable queue.
-// Sched must be locked.
-func globrunqputbatch(ghead *g, gtail *g, n int32) {
-	gtail.schedlink = 0
-	if sched.runqtail != 0 {
-		sched.runqtail.ptr().schedlink.set(ghead)
-	} else {
-		sched.runqhead.set(ghead)
-	}
-	sched.runqtail.set(gtail)
-	sched.runqsize += n
-}
-
-// Try get a batch of G's from the global runnable queue.
-// Sched must be locked.
-func globrunqget(_p_ *p, max int32) *g {
-	if sched.runqsize == 0 {
-		return nil
-	}
-
-	n := sched.runqsize/gomaxprocs + 1
-	if n > sched.runqsize {
-		n = sched.runqsize
-	}
-	if max > 0 && n > max {
-		n = max
-	}
-	if n > int32(len(_p_.runq))/2 {
-		n = int32(len(_p_.runq)) / 2
-	}
-
-	sched.runqsize -= n
-	if sched.runqsize == 0 {
-		sched.runqtail = 0
-	}
-
-	gp := sched.runqhead.ptr()
-	sched.runqhead = gp.schedlink
-	n--
-	for ; n > 0; n-- {
-		gp1 := sched.runqhead.ptr()
-		sched.runqhead = gp1.schedlink
-		runqput(_p_, gp1, false)
-	}
-	return gp
-}
-
-// Put p to on _Pidle list.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func pidleput(_p_ *p) {
-	if !runqempty(_p_) {
-		throw("pidleput: P has non-empty run queue")
-	}
-	_p_.link = sched.pidle
-	sched.pidle.set(_p_)
-	xadd(&sched.npidle, 1) // TODO: fast atomic
-}
-
-// Try get a p from _Pidle list.
-// Sched must be locked.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-func pidleget() *p {
-	_p_ := sched.pidle.ptr()
-	if _p_ != nil {
-		sched.pidle = _p_.link
-		xadd(&sched.npidle, -1) // TODO: fast atomic
-	}
-	return _p_
-}
-
-// runqempty returns true if _p_ has no Gs on its local run queue.
-// Note that this test is generally racy.
-func runqempty(_p_ *p) bool {
-	return _p_.runqhead == _p_.runqtail && _p_.runnext == 0
-}
-
-// To shake out latent assumptions about scheduling order,
-// we introduce some randomness into scheduling decisions
-// when running with the race detector.
-// The need for this was made obvious by changing the
-// (deterministic) scheduling order in Go 1.5 and breaking
-// many poorly-written tests.
-// With the randomness here, as long as the tests pass
-// consistently with -race, they shouldn't have latent scheduling
-// assumptions.
-const randomizeScheduler = raceenabled
-
-// runqput tries to put g on the local runnable queue.
-// If next if false, runqput adds g to the tail of the runnable queue.
-// If next is true, runqput puts g in the _p_.runnext slot.
-// If the run queue is full, runnext puts g on the global queue.
-// Executed only by the owner P.
-func runqput(_p_ *p, gp *g, next bool) {
-	if randomizeScheduler && next && fastrand1()%2 == 0 {
-		next = false
-	}
-
-	if next {
-	retryNext:
-		oldnext := _p_.runnext
-		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
-			goto retryNext
-		}
-		if oldnext == 0 {
-			return
-		}
-		// Kick the old runnext out to the regular run queue.
-		gp = oldnext.ptr()
-	}
-
-retry:
-	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
-	t := _p_.runqtail
-	if t-h < uint32(len(_p_.runq)) {
-		_p_.runq[t%uint32(len(_p_.runq))] = gp
-		atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
-		return
-	}
-	if runqputslow(_p_, gp, h, t) {
-		return
-	}
-	// the queue is not full, now the put above must suceed
-	goto retry
-}
-
-// Put g and a batch of work from local runnable queue on global queue.
-// Executed only by the owner P.
-func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
-	var batch [len(_p_.runq)/2 + 1]*g
-
-	// First, grab a batch from local queue.
-	n := t - h
-	n = n / 2
-	if n != uint32(len(_p_.runq)/2) {
-		throw("runqputslow: queue is not full")
-	}
-	for i := uint32(0); i < n; i++ {
-		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
-	}
-	if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
-		return false
-	}
-	batch[n] = gp
-
-	if randomizeScheduler {
-		for i := uint32(1); i <= n; i++ {
-			j := fastrand1() % (i + 1)
-			batch[i], batch[j] = batch[j], batch[i]
-		}
-	}
-
-	// Link the goroutines.
-	for i := uint32(0); i < n; i++ {
-		batch[i].schedlink.set(batch[i+1])
-	}
-
-	// Now put the batch on global queue.
-	lock(&sched.lock)
-	globrunqputbatch(batch[0], batch[n], int32(n+1))
-	unlock(&sched.lock)
-	return true
-}
-
-// Get g from local runnable queue.
-// If inheritTime is true, gp should inherit the remaining time in the
-// current time slice. Otherwise, it should start a new time slice.
-// Executed only by the owner P.
-func runqget(_p_ *p) (gp *g, inheritTime bool) {
-	// If there's a runnext, it's the next G to run.
-	for {
-		next := _p_.runnext
-		if next == 0 {
-			break
-		}
-		if _p_.runnext.cas(next, 0) {
-			return next.ptr(), true
-		}
-	}
-
-	for {
-		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
-		t := _p_.runqtail
-		if t == h {
-			return nil, false
-		}
-		gp := _p_.runq[h%uint32(len(_p_.runq))]
-		if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
-			return gp, false
-		}
-	}
-}
-
-// Grabs a batch of goroutines from _p_'s runnable queue into batch.
-// Batch is a ring buffer starting at batchHead.
-// Returns number of grabbed goroutines.
-// Can be executed by any P.
-func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
-	for {
-		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
-		t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
-		n := t - h
-		n = n - n/2
-		if n == 0 {
-			if stealRunNextG {
-				// Try to steal from _p_.runnext.
-				if next := _p_.runnext; next != 0 {
-					// Sleep to ensure that _p_ isn't about to run the g we
-					// are about to steal.
-					// The important use case here is when the g running on _p_
-					// ready()s another g and then almost immediately blocks.
-					// Instead of stealing runnext in this window, back off
-					// to give _p_ a chance to schedule runnext. This will avoid
-					// thrashing gs between different Ps.
-					usleep(100)
-					if !_p_.runnext.cas(next, 0) {
-						continue
-					}
-					batch[batchHead%uint32(len(batch))] = next.ptr()
-					return 1
-				}
-			}
-			return 0
-		}
-		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
-			continue
-		}
-		for i := uint32(0); i < n; i++ {
-			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
-			batch[(batchHead+i)%uint32(len(batch))] = g
-		}
-		if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
-			return n
-		}
-	}
-}
-
-// Steal half of elements from local runnable queue of p2
-// and put onto local runnable queue of p.
-// Returns one of the stolen elements (or nil if failed).
-func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
-	t := _p_.runqtail
-	n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
-	if n == 0 {
-		return nil
-	}
-	n--
-	gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
-	if n == 0 {
-		return gp
-	}
-	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
-	if t-h+n >= uint32(len(_p_.runq)) {
-		throw("runqsteal: runq overflow")
-	}
-	atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
-	return gp
-}
-
-func testSchedLocalQueue() {
-	_p_ := new(p)
-	gs := make([]g, len(_p_.runq))
-	for i := 0; i < len(_p_.runq); i++ {
-		if g, _ := runqget(_p_); g != nil {
-			throw("runq is not empty initially")
-		}
-		for j := 0; j < i; j++ {
-			runqput(_p_, &gs[i], false)
-		}
-		for j := 0; j < i; j++ {
-			if g, _ := runqget(_p_); g != &gs[i] {
-				print("bad element at iter ", i, "/", j, "\n")
-				throw("bad element")
-			}
-		}
-		if g, _ := runqget(_p_); g != nil {
-			throw("runq is not empty afterwards")
-		}
-	}
-}
-
-func testSchedLocalQueueSteal() {
-	p1 := new(p)
-	p2 := new(p)
-	gs := make([]g, len(p1.runq))
-	for i := 0; i < len(p1.runq); i++ {
-		for j := 0; j < i; j++ {
-			gs[j].sig = 0
-			runqput(p1, &gs[j], false)
-		}
-		gp := runqsteal(p2, p1, true)
-		s := 0
-		if gp != nil {
-			s++
-			gp.sig++
-		}
-		for {
-			gp, _ = runqget(p2)
-			if gp == nil {
-				break
-			}
-			s++
-			gp.sig++
-		}
-		for {
-			gp, _ = runqget(p1)
-			if gp == nil {
-				break
-			}
-			gp.sig++
-		}
-		for j := 0; j < i; j++ {
-			if gs[j].sig != 1 {
-				print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n")
-				throw("bad element")
-			}
-		}
-		if s != i/2 && s != i/2+1 {
-			print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n")
-			throw("bad steal")
-		}
-	}
-}
-
-func setMaxThreads(in int) (out int) {
-	lock(&sched.lock)
-	out = int(sched.maxmcount)
-	sched.maxmcount = int32(in)
-	checkmcount()
-	unlock(&sched.lock)
-	return
-}
-
-func haveexperiment(name string) bool {
-	x := goexperiment
-	for x != "" {
-		xname := ""
-		i := index(x, ",")
-		if i < 0 {
-			xname, x = x, ""
-		} else {
-			xname, x = x[:i], x[i+1:]
-		}
-		if xname == name {
-			return true
-		}
-	}
-	return false
-}
-
-//go:nosplit
-func procPin() int {
-	_g_ := getg()
-	mp := _g_.m
-
-	mp.locks++
-	return int(mp.p.ptr().id)
-}
-
-//go:nosplit
-func procUnpin() {
-	_g_ := getg()
-	_g_.m.locks--
-}
-
-//go:linkname sync_runtime_procPin sync.runtime_procPin
-//go:nosplit
-func sync_runtime_procPin() int {
-	return procPin()
-}
-
-//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
-//go:nosplit
-func sync_runtime_procUnpin() {
-	procUnpin()
-}
-
-//go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin
-//go:nosplit
-func sync_atomic_runtime_procPin() int {
-	return procPin()
-}
-
-//go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin
-//go:nosplit
-func sync_atomic_runtime_procUnpin() {
-	procUnpin()
-}
-
-// Active spinning for sync.Mutex.
-//go:linkname sync_runtime_canSpin sync.runtime_canSpin
-//go:nosplit
-func sync_runtime_canSpin(i int) bool {
-	// sync.Mutex is cooperative, so we are conservative with spinning.
-	// Spin only few times and only if running on a multicore machine and
-	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
-	// As opposed to runtime mutex we don't do passive spinning here,
-	// because there can be work on global runq on on other Ps.
-	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
-		return false
-	}
-	if p := getg().m.p.ptr(); !runqempty(p) {
-		return false
-	}
-	return true
-}
-
-//go:linkname sync_runtime_doSpin sync.runtime_doSpin
-//go:nosplit
-func sync_runtime_doSpin() {
-	procyield(active_spin_cnt)
-}
diff --git a/src/runtime/race.go b/src/runtime/race.go
index 923d611..6ee1475 100644
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@@ -90,3 +90,308 @@
 	ctx.res = 1
 	return
 }
+
+// Race runtime functions called via runtime·racecall.
+//go:linkname __tsan_init __tsan_init
+var __tsan_init byte
+
+//go:linkname __tsan_fini __tsan_fini
+var __tsan_fini byte
+
+//go:linkname __tsan_map_shadow __tsan_map_shadow
+var __tsan_map_shadow byte
+
+//go:linkname __tsan_finalizer_goroutine __tsan_finalizer_goroutine
+var __tsan_finalizer_goroutine byte
+
+//go:linkname __tsan_go_start __tsan_go_start
+var __tsan_go_start byte
+
+//go:linkname __tsan_go_end __tsan_go_end
+var __tsan_go_end byte
+
+//go:linkname __tsan_malloc __tsan_malloc
+var __tsan_malloc byte
+
+//go:linkname __tsan_acquire __tsan_acquire
+var __tsan_acquire byte
+
+//go:linkname __tsan_release __tsan_release
+var __tsan_release byte
+
+//go:linkname __tsan_release_merge __tsan_release_merge
+var __tsan_release_merge byte
+
+//go:linkname __tsan_go_ignore_sync_begin __tsan_go_ignore_sync_begin
+var __tsan_go_ignore_sync_begin byte
+
+//go:linkname __tsan_go_ignore_sync_end __tsan_go_ignore_sync_end
+var __tsan_go_ignore_sync_end byte
+
+// Mimic what cmd/cgo would do.
+//go:cgo_import_static __tsan_init
+//go:cgo_import_static __tsan_fini
+//go:cgo_import_static __tsan_map_shadow
+//go:cgo_import_static __tsan_finalizer_goroutine
+//go:cgo_import_static __tsan_go_start
+//go:cgo_import_static __tsan_go_end
+//go:cgo_import_static __tsan_malloc
+//go:cgo_import_static __tsan_acquire
+//go:cgo_import_static __tsan_release
+//go:cgo_import_static __tsan_release_merge
+//go:cgo_import_static __tsan_go_ignore_sync_begin
+//go:cgo_import_static __tsan_go_ignore_sync_end
+
+// These are called from race_amd64.s.
+//go:cgo_import_static __tsan_read
+//go:cgo_import_static __tsan_read_pc
+//go:cgo_import_static __tsan_read_range
+//go:cgo_import_static __tsan_write
+//go:cgo_import_static __tsan_write_pc
+//go:cgo_import_static __tsan_write_range
+//go:cgo_import_static __tsan_func_enter
+//go:cgo_import_static __tsan_func_exit
+
+//go:cgo_import_static __tsan_go_atomic32_load
+//go:cgo_import_static __tsan_go_atomic64_load
+//go:cgo_import_static __tsan_go_atomic32_store
+//go:cgo_import_static __tsan_go_atomic64_store
+//go:cgo_import_static __tsan_go_atomic32_exchange
+//go:cgo_import_static __tsan_go_atomic64_exchange
+//go:cgo_import_static __tsan_go_atomic32_fetch_add
+//go:cgo_import_static __tsan_go_atomic64_fetch_add
+//go:cgo_import_static __tsan_go_atomic32_compare_exchange
+//go:cgo_import_static __tsan_go_atomic64_compare_exchange
+
+// start/end of global data (data+bss).
+var racedatastart uintptr
+var racedataend uintptr
+
+// start/end of heap for race_amd64.s
+var racearenastart uintptr
+var racearenaend uintptr
+
+func racefuncenter(uintptr)
+func racefuncexit()
+func racereadrangepc1(uintptr, uintptr, uintptr)
+func racewriterangepc1(uintptr, uintptr, uintptr)
+func racesymbolizethunk(uintptr)
+
+// racecall allows calling an arbitrary function f from C race runtime
+// with up to 4 uintptr arguments.
+func racecall(*byte, uintptr, uintptr, uintptr, uintptr)
+
+// checks if the address has shadow (i.e. heap or data/bss)
+//go:nosplit
+func isvalidaddr(addr unsafe.Pointer) bool {
+	return racearenastart <= uintptr(addr) && uintptr(addr) < racearenaend ||
+		racedatastart <= uintptr(addr) && uintptr(addr) < racedataend
+}
+
+//go:nosplit
+func raceinit() uintptr {
+	// cgo is required to initialize libc, which is used by race runtime
+	if !iscgo {
+		throw("raceinit: race build must use cgo")
+	}
+
+	var racectx uintptr
+	racecall(&__tsan_init, uintptr(unsafe.Pointer(&racectx)), funcPC(racesymbolizethunk), 0, 0)
+
+	// Round data segment to page boundaries, because it's used in mmap().
+	start := ^uintptr(0)
+	end := uintptr(0)
+	if start > firstmoduledata.noptrdata {
+		start = firstmoduledata.noptrdata
+	}
+	if start > firstmoduledata.data {
+		start = firstmoduledata.data
+	}
+	if start > firstmoduledata.noptrbss {
+		start = firstmoduledata.noptrbss
+	}
+	if start > firstmoduledata.bss {
+		start = firstmoduledata.bss
+	}
+	if end < firstmoduledata.enoptrdata {
+		end = firstmoduledata.enoptrdata
+	}
+	if end < firstmoduledata.edata {
+		end = firstmoduledata.edata
+	}
+	if end < firstmoduledata.enoptrbss {
+		end = firstmoduledata.enoptrbss
+	}
+	if end < firstmoduledata.ebss {
+		end = firstmoduledata.ebss
+	}
+	size := round(end-start, _PageSize)
+	racecall(&__tsan_map_shadow, start, size, 0, 0)
+	racedatastart = start
+	racedataend = start + size
+
+	return racectx
+}
+
+//go:nosplit
+func racefini() {
+	racecall(&__tsan_fini, 0, 0, 0, 0)
+}
+
+//go:nosplit
+func racemapshadow(addr unsafe.Pointer, size uintptr) {
+	if racearenastart == 0 {
+		racearenastart = uintptr(addr)
+	}
+	if racearenaend < uintptr(addr)+size {
+		racearenaend = uintptr(addr) + size
+	}
+	racecall(&__tsan_map_shadow, uintptr(addr), size, 0, 0)
+}
+
+//go:nosplit
+func racemalloc(p unsafe.Pointer, sz uintptr) {
+	racecall(&__tsan_malloc, uintptr(p), sz, 0, 0)
+}
+
+//go:nosplit
+func racegostart(pc uintptr) uintptr {
+	_g_ := getg()
+	var spawng *g
+	if _g_.m.curg != nil {
+		spawng = _g_.m.curg
+	} else {
+		spawng = _g_
+	}
+
+	var racectx uintptr
+	racecall(&__tsan_go_start, spawng.racectx, uintptr(unsafe.Pointer(&racectx)), pc, 0)
+	return racectx
+}
+
+//go:nosplit
+func racegoend() {
+	racecall(&__tsan_go_end, getg().racectx, 0, 0, 0)
+}
+
+//go:nosplit
+func racewriterangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
+	_g_ := getg()
+	if _g_ != _g_.m.curg {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return
+	}
+	if callpc != 0 {
+		racefuncenter(callpc)
+	}
+	racewriterangepc1(uintptr(addr), sz, pc)
+	if callpc != 0 {
+		racefuncexit()
+	}
+}
+
+//go:nosplit
+func racereadrangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
+	_g_ := getg()
+	if _g_ != _g_.m.curg {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return
+	}
+	if callpc != 0 {
+		racefuncenter(callpc)
+	}
+	racereadrangepc1(uintptr(addr), sz, pc)
+	if callpc != 0 {
+		racefuncexit()
+	}
+}
+
+//go:nosplit
+func raceacquire(addr unsafe.Pointer) {
+	raceacquireg(getg(), addr)
+}
+
+//go:nosplit
+func raceacquireg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_acquire, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racerelease(addr unsafe.Pointer) {
+	_g_ := getg()
+	if _g_.raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racereleaseg(_g_, addr)
+}
+
+//go:nosplit
+func racereleaseg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_release, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racereleasemerge(addr unsafe.Pointer) {
+	racereleasemergeg(getg(), addr)
+}
+
+//go:nosplit
+func racereleasemergeg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_release_merge, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racefingo() {
+	racecall(&__tsan_finalizer_goroutine, getg().racectx, 0, 0, 0)
+}
+
+//go:nosplit
+
+func RaceAcquire(addr unsafe.Pointer) {
+	raceacquire(addr)
+}
+
+//go:nosplit
+
+func RaceRelease(addr unsafe.Pointer) {
+	racerelease(addr)
+}
+
+//go:nosplit
+
+func RaceReleaseMerge(addr unsafe.Pointer) {
+	racereleasemerge(addr)
+}
+
+//go:nosplit
+
+// RaceDisable disables handling of race events in the current goroutine.
+func RaceDisable() {
+	_g_ := getg()
+	if _g_.raceignore == 0 {
+		racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
+	}
+	_g_.raceignore++
+}
+
+//go:nosplit
+
+// RaceEnable re-enables handling of race events in the current goroutine.
+func RaceEnable() {
+	_g_ := getg()
+	_g_.raceignore--
+	if _g_.raceignore == 0 {
+		racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
+	}
+}
diff --git a/src/runtime/race/testdata/issue12664_test.go b/src/runtime/race/testdata/issue12664_test.go
new file mode 100644
index 0000000..c9f790e
--- /dev/null
+++ b/src/runtime/race/testdata/issue12664_test.go
@@ -0,0 +1,76 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"fmt"
+	"testing"
+)
+
+var issue12664 = "hi"
+
+func TestRaceIssue12664(t *testing.T) {
+	c := make(chan struct{})
+	go func() {
+		issue12664 = "bye"
+		close(c)
+	}()
+	fmt.Println(issue12664)
+	<-c
+}
+
+type MyI interface {
+	foo()
+}
+
+type MyT int
+
+func (MyT) foo() {
+}
+
+var issue12664_2 MyT = 0
+
+func TestRaceIssue12664_2(t *testing.T) {
+	c := make(chan struct{})
+	go func() {
+		issue12664_2 = 1
+		close(c)
+	}()
+	func(x MyI) {
+		// Never true, but prevents inlining.
+		if x.(MyT) == -1 {
+			close(c)
+		}
+	}(issue12664_2)
+	<-c
+}
+
+var issue12664_3 MyT = 0
+
+func TestRaceIssue12664_3(t *testing.T) {
+	c := make(chan struct{})
+	go func() {
+		issue12664_3 = 1
+		close(c)
+	}()
+	var r MyT
+	var i interface{} = r
+	issue12664_3 = i.(MyT)
+	<-c
+}
+
+var issue12664_4 MyT = 0
+
+func TestRaceIssue12664_4(t *testing.T) {
+	c := make(chan struct{})
+	go func() {
+		issue12664_4 = 1
+		close(c)
+	}()
+	var r MyT
+	var i MyI = r
+	issue12664_4 = i.(MyT)
+	<-c
+}
diff --git a/src/runtime/race1.go b/src/runtime/race1.go
deleted file mode 100644
index 38afca7..0000000
--- a/src/runtime/race1.go
+++ /dev/null
@@ -1,315 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implementation of the race detector API.
-// +build race
-
-package runtime
-
-import "unsafe"
-
-// Race runtime functions called via runtime·racecall.
-//go:linkname __tsan_init __tsan_init
-var __tsan_init byte
-
-//go:linkname __tsan_fini __tsan_fini
-var __tsan_fini byte
-
-//go:linkname __tsan_map_shadow __tsan_map_shadow
-var __tsan_map_shadow byte
-
-//go:linkname __tsan_finalizer_goroutine __tsan_finalizer_goroutine
-var __tsan_finalizer_goroutine byte
-
-//go:linkname __tsan_go_start __tsan_go_start
-var __tsan_go_start byte
-
-//go:linkname __tsan_go_end __tsan_go_end
-var __tsan_go_end byte
-
-//go:linkname __tsan_malloc __tsan_malloc
-var __tsan_malloc byte
-
-//go:linkname __tsan_acquire __tsan_acquire
-var __tsan_acquire byte
-
-//go:linkname __tsan_release __tsan_release
-var __tsan_release byte
-
-//go:linkname __tsan_release_merge __tsan_release_merge
-var __tsan_release_merge byte
-
-//go:linkname __tsan_go_ignore_sync_begin __tsan_go_ignore_sync_begin
-var __tsan_go_ignore_sync_begin byte
-
-//go:linkname __tsan_go_ignore_sync_end __tsan_go_ignore_sync_end
-var __tsan_go_ignore_sync_end byte
-
-// Mimic what cmd/cgo would do.
-//go:cgo_import_static __tsan_init
-//go:cgo_import_static __tsan_fini
-//go:cgo_import_static __tsan_map_shadow
-//go:cgo_import_static __tsan_finalizer_goroutine
-//go:cgo_import_static __tsan_go_start
-//go:cgo_import_static __tsan_go_end
-//go:cgo_import_static __tsan_malloc
-//go:cgo_import_static __tsan_acquire
-//go:cgo_import_static __tsan_release
-//go:cgo_import_static __tsan_release_merge
-//go:cgo_import_static __tsan_go_ignore_sync_begin
-//go:cgo_import_static __tsan_go_ignore_sync_end
-
-// These are called from race_amd64.s.
-//go:cgo_import_static __tsan_read
-//go:cgo_import_static __tsan_read_pc
-//go:cgo_import_static __tsan_read_range
-//go:cgo_import_static __tsan_write
-//go:cgo_import_static __tsan_write_pc
-//go:cgo_import_static __tsan_write_range
-//go:cgo_import_static __tsan_func_enter
-//go:cgo_import_static __tsan_func_exit
-
-//go:cgo_import_static __tsan_go_atomic32_load
-//go:cgo_import_static __tsan_go_atomic64_load
-//go:cgo_import_static __tsan_go_atomic32_store
-//go:cgo_import_static __tsan_go_atomic64_store
-//go:cgo_import_static __tsan_go_atomic32_exchange
-//go:cgo_import_static __tsan_go_atomic64_exchange
-//go:cgo_import_static __tsan_go_atomic32_fetch_add
-//go:cgo_import_static __tsan_go_atomic64_fetch_add
-//go:cgo_import_static __tsan_go_atomic32_compare_exchange
-//go:cgo_import_static __tsan_go_atomic64_compare_exchange
-
-// start/end of global data (data+bss).
-var racedatastart uintptr
-var racedataend uintptr
-
-// start/end of heap for race_amd64.s
-var racearenastart uintptr
-var racearenaend uintptr
-
-func racefuncenter(uintptr)
-func racefuncexit()
-func racereadrangepc1(uintptr, uintptr, uintptr)
-func racewriterangepc1(uintptr, uintptr, uintptr)
-func racesymbolizethunk(uintptr)
-
-// racecall allows calling an arbitrary function f from C race runtime
-// with up to 4 uintptr arguments.
-func racecall(*byte, uintptr, uintptr, uintptr, uintptr)
-
-// checks if the address has shadow (i.e. heap or data/bss)
-//go:nosplit
-func isvalidaddr(addr unsafe.Pointer) bool {
-	return racearenastart <= uintptr(addr) && uintptr(addr) < racearenaend ||
-		racedatastart <= uintptr(addr) && uintptr(addr) < racedataend
-}
-
-//go:nosplit
-func raceinit() uintptr {
-	// cgo is required to initialize libc, which is used by race runtime
-	if !iscgo {
-		throw("raceinit: race build must use cgo")
-	}
-
-	var racectx uintptr
-	racecall(&__tsan_init, uintptr(unsafe.Pointer(&racectx)), funcPC(racesymbolizethunk), 0, 0)
-
-	// Round data segment to page boundaries, because it's used in mmap().
-	start := ^uintptr(0)
-	end := uintptr(0)
-	if start > firstmoduledata.noptrdata {
-		start = firstmoduledata.noptrdata
-	}
-	if start > firstmoduledata.data {
-		start = firstmoduledata.data
-	}
-	if start > firstmoduledata.noptrbss {
-		start = firstmoduledata.noptrbss
-	}
-	if start > firstmoduledata.bss {
-		start = firstmoduledata.bss
-	}
-	if end < firstmoduledata.enoptrdata {
-		end = firstmoduledata.enoptrdata
-	}
-	if end < firstmoduledata.edata {
-		end = firstmoduledata.edata
-	}
-	if end < firstmoduledata.enoptrbss {
-		end = firstmoduledata.enoptrbss
-	}
-	if end < firstmoduledata.ebss {
-		end = firstmoduledata.ebss
-	}
-	size := round(end-start, _PageSize)
-	racecall(&__tsan_map_shadow, start, size, 0, 0)
-	racedatastart = start
-	racedataend = start + size
-
-	return racectx
-}
-
-//go:nosplit
-func racefini() {
-	racecall(&__tsan_fini, 0, 0, 0, 0)
-}
-
-//go:nosplit
-func racemapshadow(addr unsafe.Pointer, size uintptr) {
-	if racearenastart == 0 {
-		racearenastart = uintptr(addr)
-	}
-	if racearenaend < uintptr(addr)+size {
-		racearenaend = uintptr(addr) + size
-	}
-	racecall(&__tsan_map_shadow, uintptr(addr), size, 0, 0)
-}
-
-//go:nosplit
-func racemalloc(p unsafe.Pointer, sz uintptr) {
-	racecall(&__tsan_malloc, uintptr(p), sz, 0, 0)
-}
-
-//go:nosplit
-func racegostart(pc uintptr) uintptr {
-	_g_ := getg()
-	var spawng *g
-	if _g_.m.curg != nil {
-		spawng = _g_.m.curg
-	} else {
-		spawng = _g_
-	}
-
-	var racectx uintptr
-	racecall(&__tsan_go_start, spawng.racectx, uintptr(unsafe.Pointer(&racectx)), pc, 0)
-	return racectx
-}
-
-//go:nosplit
-func racegoend() {
-	racecall(&__tsan_go_end, getg().racectx, 0, 0, 0)
-}
-
-//go:nosplit
-func racewriterangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
-	_g_ := getg()
-	if _g_ != _g_.m.curg {
-		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
-		// Not interesting.
-		return
-	}
-	if callpc != 0 {
-		racefuncenter(callpc)
-	}
-	racewriterangepc1(uintptr(addr), sz, pc)
-	if callpc != 0 {
-		racefuncexit()
-	}
-}
-
-//go:nosplit
-func racereadrangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
-	_g_ := getg()
-	if _g_ != _g_.m.curg {
-		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
-		// Not interesting.
-		return
-	}
-	if callpc != 0 {
-		racefuncenter(callpc)
-	}
-	racereadrangepc1(uintptr(addr), sz, pc)
-	if callpc != 0 {
-		racefuncexit()
-	}
-}
-
-//go:nosplit
-func raceacquire(addr unsafe.Pointer) {
-	raceacquireg(getg(), addr)
-}
-
-//go:nosplit
-func raceacquireg(gp *g, addr unsafe.Pointer) {
-	if getg().raceignore != 0 || !isvalidaddr(addr) {
-		return
-	}
-	racecall(&__tsan_acquire, gp.racectx, uintptr(addr), 0, 0)
-}
-
-//go:nosplit
-func racerelease(addr unsafe.Pointer) {
-	_g_ := getg()
-	if _g_.raceignore != 0 || !isvalidaddr(addr) {
-		return
-	}
-	racereleaseg(_g_, addr)
-}
-
-//go:nosplit
-func racereleaseg(gp *g, addr unsafe.Pointer) {
-	if getg().raceignore != 0 || !isvalidaddr(addr) {
-		return
-	}
-	racecall(&__tsan_release, gp.racectx, uintptr(addr), 0, 0)
-}
-
-//go:nosplit
-func racereleasemerge(addr unsafe.Pointer) {
-	racereleasemergeg(getg(), addr)
-}
-
-//go:nosplit
-func racereleasemergeg(gp *g, addr unsafe.Pointer) {
-	if getg().raceignore != 0 || !isvalidaddr(addr) {
-		return
-	}
-	racecall(&__tsan_release_merge, gp.racectx, uintptr(addr), 0, 0)
-}
-
-//go:nosplit
-func racefingo() {
-	racecall(&__tsan_finalizer_goroutine, getg().racectx, 0, 0, 0)
-}
-
-//go:nosplit
-
-func RaceAcquire(addr unsafe.Pointer) {
-	raceacquire(addr)
-}
-
-//go:nosplit
-
-func RaceRelease(addr unsafe.Pointer) {
-	racerelease(addr)
-}
-
-//go:nosplit
-
-func RaceReleaseMerge(addr unsafe.Pointer) {
-	racereleasemerge(addr)
-}
-
-//go:nosplit
-
-// RaceDisable disables handling of race events in the current goroutine.
-func RaceDisable() {
-	_g_ := getg()
-	if _g_.raceignore == 0 {
-		racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
-	}
-	_g_.raceignore++
-}
-
-//go:nosplit
-
-// RaceEnable re-enables handling of race events in the current goroutine.
-func RaceEnable() {
-	_g_ := getg()
-	_g_.raceignore--
-	if _g_.raceignore == 0 {
-		racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
-	}
-}
diff --git a/src/runtime/rdebug.go b/src/runtime/rdebug.go
index f2766d7..ad7b976 100644
--- a/src/runtime/rdebug.go
+++ b/src/runtime/rdebug.go
@@ -4,12 +4,16 @@
 
 package runtime
 
+import _ "unsafe" // for go:linkname
+
+//go:linkname setMaxStack runtime/debug.setMaxStack
 func setMaxStack(in int) (out int) {
 	out = int(maxstacksize)
 	maxstacksize = uintptr(in)
 	return out
 }
 
+//go:linkname setPanicOnFault runtime/debug.setPanicOnFault
 func setPanicOnFault(new bool) (old bool) {
 	mp := acquirem()
 	old = mp.curg.paniconfault
diff --git a/src/runtime/rt0_darwin_386.s b/src/runtime/rt0_darwin_386.s
index 4c8c92d..be2e564 100644
--- a/src/runtime/rt0_darwin_386.s
+++ b/src/runtime/rt0_darwin_386.s
@@ -12,5 +12,60 @@
 	CALL	main(SB)
 	INT	$3
 
+// With -buildmode=c-archive, this symbol is called from a global constructor.
+TEXT _rt0_386_darwin_lib(SB),NOSPLIT,$0
+	PUSHL	BP
+	MOVL	SP, BP
+	PUSHL	BX
+	PUSHL	SI
+	PUSHL	DI
+
+	MOVL	8(BP), AX
+	MOVL	AX, _rt0_386_darwin_lib_argc<>(SB)
+	MOVL	12(BP), AX
+	MOVL	AX, _rt0_386_darwin_lib_argv<>(SB)
+
+	SUBL	$12, SP
+
+	// Create a new thread to do the runtime initialization and return.
+	MOVL	_cgo_sys_thread_create(SB), AX
+	TESTL	AX, AX
+	JZ	nocgo
+	MOVL	$_rt0_386_darwin_lib_go(SB), BX
+	MOVL	BX, 0(SP)
+	MOVL	$0, 4(SP)
+	CALL	AX
+	JMP     restore
+
+nocgo:
+	MOVL	$0x800000, 0(SP)               // stacksize = 8192KB
+	MOVL	$_rt0_386_darwin_lib_go(SB), AX
+	MOVL	AX, 4(SP)                      // fn
+	MOVL	$0, 8(SP)                      // fnarg
+	MOVL	$runtime·newosproc0(SB), AX
+	CALL	AX
+
+restore:
+	ADDL	$12, SP
+	POPL	DI
+	POPL	SI
+	POPL	BX
+	POPL	BP
+	RET
+
+TEXT _rt0_386_darwin_lib_go(SB),NOSPLIT,$12
+	MOVL	_rt0_386_darwin_lib_argc<>(SB), AX
+	MOVL	AX, 0(SP)
+	MOVL	_rt0_386_darwin_lib_argv<>(SB), AX
+	MOVL	AX, 4(SP)
+	MOVL	$runtime·rt0_go(SB), AX
+	CALL	AX
+	RET
+
+DATA _rt0_386_darwin_lib_argc<>(SB)/4, $0
+GLOBL _rt0_386_darwin_lib_argc<>(SB),NOPTR, $4
+DATA _rt0_386_darwin_lib_argv<>(SB)/4, $0
+GLOBL _rt0_386_darwin_lib_argv<>(SB),NOPTR, $4
+
 TEXT main(SB),NOSPLIT,$0
 	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_darwin_arm.s b/src/runtime/rt0_darwin_arm.s
index 95a2b17..d609850 100644
--- a/src/runtime/rt0_darwin_arm.s
+++ b/src/runtime/rt0_darwin_arm.s
@@ -16,27 +16,34 @@
 //
 // Note that all currently shipping darwin/arm platforms require
 // cgo and do not support c-shared.
-TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$12
+TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$0
+	// R11 is REGTMP, reserved for liblink. It is used below to
+	// move R0/R1 into globals. However in the darwin ARMv7 calling
+	// convention, it is a callee-saved register. So we save it to a
+	// temporary register.
+	MOVW  R11, R2
 	MOVW  R0, _rt0_arm_darwin_lib_argc<>(SB)
 	MOVW  R1, _rt0_arm_darwin_lib_argv<>(SB)
 
 	// Create a new thread to do the runtime initialization and return.
-	MOVW  _cgo_sys_thread_create(SB), R4
-	CMP   $0, R4
+	MOVW  _cgo_sys_thread_create(SB), R3
+	CMP   $0, R3
 	B.EQ  nocgo
 	MOVW  $_rt0_arm_darwin_lib_go(SB), R0
 	MOVW  $0, R1
-	BL    (R4)
+	MOVW  R2, R11
+	BL    (R3)
 	RET
 nocgo:
 	MOVW  $0x400000, R0
-	MOVW  $_rt0_arm_darwin_lib_go(SB), R1
-	MOVW  $0, R2
-	MOVW  R0,  (R13) // stacksize
-	MOVW  R1, 4(R13) // fn
-	MOVW  R2, 8(R13) // fnarg
-	MOVW  $runtime·newosproc0(SB), R4
-	BL    (R4)
+	MOVW  R0, (R13) // stacksize
+	MOVW  $_rt0_arm_darwin_lib_go(SB), R0
+	MOVW  R0, 4(R13) // fn
+	MOVW  $0, R0
+	MOVW  R0, 8(R13) // fnarg
+	MOVW  $runtime·newosproc0(SB), R3
+	MOVW  R2, R11
+	BL    (R3)
 	RET
 
 TEXT _rt0_arm_darwin_lib_go(SB),NOSPLIT,$0
diff --git a/src/runtime/rt0_linux_ppc64.s b/src/runtime/rt0_linux_ppc64.s
index 33e973d..f814515 100644
--- a/src/runtime/rt0_linux_ppc64.s
+++ b/src/runtime/rt0_linux_ppc64.s
@@ -18,6 +18,6 @@
 	BR main(SB)
 
 TEXT main(SB),NOSPLIT,$-8
-	MOVD	$runtime·rt0_go(SB), R31
-	MOVD	R31, CTR
+	MOVD	$runtime·rt0_go(SB), R12
+	MOVD	R12, CTR
 	BR	(CTR)
diff --git a/src/runtime/rt0_linux_ppc64le.s b/src/runtime/rt0_linux_ppc64le.s
index f5c0af5..5616e1d 100644
--- a/src/runtime/rt0_linux_ppc64le.s
+++ b/src/runtime/rt0_linux_ppc64le.s
@@ -29,6 +29,6 @@
 	BR	main(SB)
 
 TEXT main(SB),NOSPLIT,$-8
-	MOVD	$runtime·rt0_go(SB), R31
-	MOVD	R31, CTR
+	MOVD	$runtime·rt0_go(SB), R12
+	MOVD	R12, CTR
 	BR	(CTR)
diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go
index 2387d9a..81d3e5b 100644
--- a/src/runtime/runtime.go
+++ b/src/runtime/runtime.go
@@ -8,6 +8,7 @@
 
 //go:generate go run wincallback.go
 //go:generate go run mkduff.go
+//go:generate go run mkfastlog2table.go
 
 var ticks struct {
 	lock mutex
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 7d3c8f6..5d0aad0 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -257,11 +257,17 @@
 	startpc        uintptr // pc of goroutine function
 	racectx        uintptr
 	waiting        *sudog // sudog structures this g is waiting on (that have a valid elem ptr)
-	readyg         *g     // scratch for readyExecute
 
 	// Per-G gcController state
-	gcalloc    uintptr // bytes allocated during this GC cycle
-	gcscanwork int64   // scan work done (or stolen) this GC cycle
+
+	// gcAssistBytes is this G's GC assist credit in terms of
+	// bytes allocated. If this is positive, then the G has credit
+	// to allocate gcAssistBytes bytes without assisting. If this
+	// is negative, then the G must correct this by performing
+	// scan work. We track this in bytes to make it fast to update
+	// and check for debt in the malloc hot path. The assist ratio
+	// determines how this corresponds to scan work debt.
+	gcAssistBytes int64
 }
 
 type mts struct {
diff --git a/src/runtime/signal1_unix.go b/src/runtime/signal1_unix.go
index 56d9755..8cabbc2 100644
--- a/src/runtime/signal1_unix.go
+++ b/src/runtime/signal1_unix.go
@@ -16,7 +16,7 @@
 // handle a particular signal (e.g., signal occurred on a non-Go thread).
 // See sigfwdgo() for more information on when the signals are forwarded.
 //
-// Signal forwarding is currently available only on Linux.
+// Signal forwarding is currently available only on Darwin and Linux.
 var fwdSig [_NSIG]uintptr
 
 // sigmask represents a general signal mask compatible with the GOOS
@@ -195,7 +195,7 @@
 	raise(_SIGABRT)
 }
 
-// createSigM starts one global, sleeping thread to make sure at least one thread
+// ensureSigM starts one global, sleeping thread to make sure at least one thread
 // is available to catch signals enabled for os/signal.
 func ensureSigM() {
 	if maskUpdatedChan != nil {
diff --git a/src/runtime/signal2_unix.go b/src/runtime/signal2_unix.go
new file mode 100644
index 0000000..8b0bd42
--- /dev/null
+++ b/src/runtime/signal2_unix.go
@@ -0,0 +1,46 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin linux
+
+package runtime
+
+import "unsafe"
+
+//go:noescape
+func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
+
+// Determines if the signal should be handled by Go and if not, forwards the
+// signal to the handler that was installed before Go's.  Returns whether the
+// signal was forwarded.
+//go:nosplit
+func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool {
+	g := getg()
+	c := &sigctxt{info, ctx}
+	if sig >= uint32(len(sigtable)) {
+		return false
+	}
+	fwdFn := fwdSig[sig]
+	flags := sigtable[sig].flags
+
+	// If there is no handler to forward to, no need to forward.
+	if fwdFn == _SIG_DFL {
+		return false
+	}
+	// Only forward synchronous signals.
+	if c.sigcode() == _SI_USER || flags&_SigPanic == 0 {
+		return false
+	}
+	// Determine if the signal occurred inside Go code.  We test that:
+	//   (1) we were in a goroutine (i.e., m.curg != nil), and
+	//   (2) we weren't in CGO (i.e., m.curg.syscallsp == 0).
+	if g != nil && g.m != nil && g.m.curg != nil && g.m.curg.syscallsp == 0 {
+		return false
+	}
+	// Signal not handled by Go, forward it.
+	if fwdFn != _SIG_IGN {
+		sigfwd(fwdFn, sig, info, ctx)
+	}
+	return true
+}
diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go
index 6cd1865..e8ec162 100644
--- a/src/runtime/signal_darwin.go
+++ b/src/runtime/signal_darwin.go
@@ -47,9 +47,6 @@
 }
 
 //go:noescape
-func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
-
-//go:noescape
 func sigreturn(ctx unsafe.Pointer, infostyle uint32)
 
 //go:nosplit
diff --git a/src/runtime/signal_linux.go b/src/runtime/signal_linux.go
index 2f25b59..2cc76b2 100644
--- a/src/runtime/signal_linux.go
+++ b/src/runtime/signal_linux.go
@@ -44,8 +44,8 @@
 	/* 29 */ {_SigNotify, "SIGIO: i/o now possible"},
 	/* 30 */ {_SigNotify, "SIGPWR: power failure restart"},
 	/* 31 */ {_SigNotify, "SIGSYS: bad system call"},
-	/* 32 */ {_SigSetStack, "signal 32"}, /* SIGCANCEL; see issue 6997 */
-	/* 33 */ {_SigSetStack, "signal 33"}, /* SIGSETXID; see issue 3871, 9400 */
+	/* 32 */ {_SigSetStack + _SigUnblock, "signal 32"}, /* SIGCANCEL; see issue 6997 */
+	/* 33 */ {_SigSetStack + _SigUnblock, "signal 33"}, /* SIGSETXID; see issues 3871, 9400, 12498 */
 	/* 34 */ {_SigNotify, "signal 34"},
 	/* 35 */ {_SigNotify, "signal 35"},
 	/* 36 */ {_SigNotify, "signal 36"},
diff --git a/src/runtime/signal_ppc64x.go b/src/runtime/signal_ppc64x.go
index bad9fe6..71055b6 100644
--- a/src/runtime/signal_ppc64x.go
+++ b/src/runtime/signal_ppc64x.go
@@ -82,7 +82,7 @@
 		// functions are correctly handled. This smashes
 		// the stack frame but we're not going back there
 		// anyway.
-		sp := c.sp() - ptrSize
+		sp := c.sp() - minFrameSize
 		c.set_sp(sp)
 		*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
 
diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go
index d8ac676..0d4fa7f 100644
--- a/src/runtime/signal_solaris.go
+++ b/src/runtime/signal_solaris.go
@@ -46,7 +46,7 @@
 	/* 33 */ {_SigNotify, "SIGLWP: reserved signal no longer used by"},
 	/* 34 */ {_SigNotify, "SIGFREEZE: special signal used by CPR"},
 	/* 35 */ {_SigNotify, "SIGTHAW: special signal used by CPR"},
-	/* 36 */ {0, "SIGCANCEL: reserved signal for thread cancellation"},
+	/* 36 */ {0, "SIGCANCEL: reserved signal for thread cancellation"}, // Oracle's spelling of cancelation.
 	/* 37 */ {_SigNotify, "SIGLOST: resource lost (eg, record-lock lost)"},
 	/* 38 */ {_SigNotify, "SIGXRES: resource control exceeded"},
 	/* 39 */ {_SigNotify, "SIGJVM1: reserved signal for Java Virtual Machine"},
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index ad3ab31..8834e51 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -6,43 +6,9 @@
 
 package runtime
 
-import "unsafe"
+import _ "unsafe" // for go:linkname
 
 //go:linkname os_sigpipe os.sigpipe
 func os_sigpipe() {
 	systemstack(sigpipe)
 }
-
-// Determines if the signal should be handled by Go and if not, forwards the
-// signal to the handler that was installed before Go's.  Returns whether the
-// signal was forwarded.
-//go:nosplit
-func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool {
-	g := getg()
-	c := &sigctxt{info, ctx}
-	if sig >= uint32(len(sigtable)) {
-		return false
-	}
-	fwdFn := fwdSig[sig]
-	flags := sigtable[sig].flags
-
-	// If there is no handler to forward to, no need to forward.
-	if fwdFn == _SIG_DFL {
-		return false
-	}
-	// Only forward synchronous signals.
-	if c.sigcode() == _SI_USER || flags&_SigPanic == 0 {
-		return false
-	}
-	// Determine if the signal occurred inside Go code.  We test that:
-	//   (1) we were in a goroutine (i.e., m.curg != nil), and
-	//   (2) we weren't in CGO (i.e., m.curg.syscallsp == 0).
-	if g != nil && g.m != nil && g.m.curg != nil && g.m.curg.syscallsp == 0 {
-		return false
-	}
-	// Signal not handled by Go, forward it.
-	if fwdFn != _SIG_IGN {
-		sigfwd(fwdFn, sig, info, ctx)
-	}
-	return true
-}
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index e6e1a84..f28067f 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -90,6 +90,7 @@
 
 // Called to receive the next queued signal.
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_recv os/signal.signal_recv
 func signal_recv() uint32 {
 	for {
 		// Serve any signals from local copy.
@@ -127,6 +128,7 @@
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_enable os/signal.signal_enable
 func signal_enable(s uint32) {
 	if !sig.inuse {
 		// The first call to signal_enable is for us
@@ -145,6 +147,7 @@
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_disable os/signal.signal_disable
 func signal_disable(s uint32) {
 	if s >= uint32(len(sig.wanted)*32) {
 		return
@@ -154,6 +157,7 @@
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_ignore os/signal.signal_ignore
 func signal_ignore(s uint32) {
 	if s >= uint32(len(sig.wanted)*32) {
 		return
diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go
index f000fab..89f96be 100644
--- a/src/runtime/sigqueue_plan9.go
+++ b/src/runtime/sigqueue_plan9.go
@@ -6,6 +6,8 @@
 
 package runtime
 
+import _ "unsafe"
+
 const qsize = 64
 
 var sig struct {
@@ -92,6 +94,7 @@
 
 // Called to receive the next queued signal.
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_recv os/signal.signal_recv
 func signal_recv() string {
 	for {
 		note := sig.q.pop()
@@ -108,6 +111,7 @@
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_enable os/signal.signal_enable
 func signal_enable(s uint32) {
 	if !sig.inuse {
 		// The first call to signal_enable is for us
@@ -120,9 +124,11 @@
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_disable os/signal.signal_disable
 func signal_disable(s uint32) {
 }
 
 // Must only be called from a single goroutine at a time.
+//go:linkname signal_ignore os/signal.signal_ignore
 func signal_ignore(s uint32) {
 }
diff --git a/src/runtime/stack1.go b/src/runtime/stack.go
similarity index 85%
rename from src/runtime/stack1.go
rename to src/runtime/stack.go
index 78d168b..1809a4d9a 100644
--- a/src/runtime/stack1.go
+++ b/src/runtime/stack.go
@@ -6,6 +6,107 @@
 
 import "unsafe"
 
+/*
+Stack layout parameters.
+Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
+
+The per-goroutine g->stackguard is set to point StackGuard bytes
+above the bottom of the stack.  Each function compares its stack
+pointer against g->stackguard to check for overflow.  To cut one
+instruction from the check sequence for functions with tiny frames,
+the stack is allowed to protrude StackSmall bytes below the stack
+guard.  Functions with large frames don't bother with the check and
+always call morestack.  The sequences are (for amd64, others are
+similar):
+
+	guard = g->stackguard
+	frame = function's stack frame size
+	argsize = size of function arguments (call + return)
+
+	stack frame size <= StackSmall:
+		CMPQ guard, SP
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size > StackSmall but < StackBig
+		LEAQ (frame-StackSmall)(SP), R0
+		CMPQ guard, R0
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size >= StackBig:
+		MOVQ m->morearg, $((argsize << 32) | frame)
+		CALL morestack(SB)
+
+The bottom StackGuard - StackSmall bytes are important: there has
+to be enough room to execute functions that refuse to check for
+stack overflow, either because they need to be adjacent to the
+actual caller's frame (deferproc) or because they handle the imminent
+stack overflow (morestack).
+
+For example, deferproc might call malloc, which does one of the
+above checks (without allocating a full frame), which might trigger
+a call to morestack.  This sequence needs to fit in the bottom
+section of the stack.  On amd64, morestack's frame is 40 bytes, and
+deferproc's frame is 56 bytes.  That fits well within the
+StackGuard - StackSmall bytes at the bottom.
+The linkers explore all possible call traces involving non-splitting
+functions to make sure that this limit cannot be violated.
+*/
+
+const (
+	// StackSystem is a number of additional bytes to add
+	// to each stack below the usual guard area for OS-specific
+	// purposes like signal handling. Used on Windows, Plan 9,
+	// and Darwin/ARM because they do not use a separate stack.
+	_StackSystem = goos_windows*512*ptrSize + goos_plan9*512 + goos_darwin*goarch_arm*1024
+
+	// The minimum size of stack used by Go code
+	_StackMin = 2048
+
+	// The minimum stack size to allocate.
+	// The hackery here rounds FixedStack0 up to a power of 2.
+	_FixedStack0 = _StackMin + _StackSystem
+	_FixedStack1 = _FixedStack0 - 1
+	_FixedStack2 = _FixedStack1 | (_FixedStack1 >> 1)
+	_FixedStack3 = _FixedStack2 | (_FixedStack2 >> 2)
+	_FixedStack4 = _FixedStack3 | (_FixedStack3 >> 4)
+	_FixedStack5 = _FixedStack4 | (_FixedStack4 >> 8)
+	_FixedStack6 = _FixedStack5 | (_FixedStack5 >> 16)
+	_FixedStack  = _FixedStack6 + 1
+
+	// Functions that need frames bigger than this use an extra
+	// instruction to do the stack split check, to avoid overflow
+	// in case SP - framesize wraps below zero.
+	// This value can be no bigger than the size of the unmapped
+	// space at zero.
+	_StackBig = 4096
+
+	// The stack guard is a pointer this many bytes above the
+	// bottom of the stack.
+	_StackGuard = 640*stackGuardMultiplier + _StackSystem
+
+	// After a stack split check the SP is allowed to be this
+	// many bytes below the stack guard.  This saves an instruction
+	// in the checking sequence for tiny frames.
+	_StackSmall = 128
+
+	// The maximum number of bytes that a chain of NOSPLIT
+	// functions can use.
+	_StackLimit = _StackGuard - _StackSystem - _StackSmall
+)
+
+// Goroutine preemption request.
+// Stored into g->stackguard0 to cause split stack check failure.
+// Must be greater than any real sp.
+// 0xfffffade in hex.
+const (
+	_StackPreempt = uintptrMask & -1314
+	_StackFork    = uintptrMask & -1234
+)
+
 const (
 	// stackDebug == 0: no logging
 	//            == 1: logging of per-stack operations
@@ -100,7 +201,7 @@
 
 // Adds stack x to the free pool.  Must be called with stackpoolmu held.
 func stackpoolfree(x gclinkptr, order uint8) {
-	s := mHeap_Lookup(&mheap_, (unsafe.Pointer)(x))
+	s := mHeap_Lookup(&mheap_, unsafe.Pointer(x))
 	if s.state != _MSpanStack {
 		throw("freeing stack not in a stack span")
 	}
@@ -251,13 +352,13 @@
 			c.stackcache[order].list = x.ptr().next
 			c.stackcache[order].size -= uintptr(n)
 		}
-		v = (unsafe.Pointer)(x)
+		v = unsafe.Pointer(x)
 	} else {
 		s := mHeap_AllocStack(&mheap_, round(uintptr(n), _PageSize)>>_PageShift)
 		if s == nil {
 			throw("out of memory")
 		}
-		v = (unsafe.Pointer)(s.start << _PageShift)
+		v = unsafe.Pointer(s.start << _PageShift)
 	}
 
 	if raceenabled {
@@ -273,7 +374,7 @@
 
 func stackfree(stk stack, n uintptr) {
 	gp := getg()
-	v := (unsafe.Pointer)(stk.lo)
+	v := unsafe.Pointer(stk.lo)
 	if n&(n-1) != 0 {
 		throw("stack not a power of 2")
 	}
@@ -477,12 +578,10 @@
 	size := frame.varp - frame.sp
 	var minsize uintptr
 	switch thechar {
-	case '6', '8':
-		minsize = 0
 	case '7':
 		minsize = spAlign
 	default:
-		minsize = ptrSize
+		minsize = minFrameSize
 	}
 	if size > minsize {
 		var bv bitvector
@@ -545,7 +644,7 @@
 }
 
 func adjustctxt(gp *g, adjinfo *adjustinfo) {
-	adjustpointer(adjinfo, (unsafe.Pointer)(&gp.sched.ctxt))
+	adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.ctxt))
 }
 
 func adjustdefers(gp *g, adjinfo *adjustinfo) {
@@ -555,30 +654,30 @@
 	// Adjust pointers in the Defer structs.
 	// Defer structs themselves are never on the stack.
 	for d := gp._defer; d != nil; d = d.link {
-		adjustpointer(adjinfo, (unsafe.Pointer)(&d.fn))
-		adjustpointer(adjinfo, (unsafe.Pointer)(&d.sp))
-		adjustpointer(adjinfo, (unsafe.Pointer)(&d._panic))
+		adjustpointer(adjinfo, unsafe.Pointer(&d.fn))
+		adjustpointer(adjinfo, unsafe.Pointer(&d.sp))
+		adjustpointer(adjinfo, unsafe.Pointer(&d._panic))
 	}
 }
 
 func adjustpanics(gp *g, adjinfo *adjustinfo) {
 	// Panics are on stack and already adjusted.
 	// Update pointer to head of list in G.
-	adjustpointer(adjinfo, (unsafe.Pointer)(&gp._panic))
+	adjustpointer(adjinfo, unsafe.Pointer(&gp._panic))
 }
 
 func adjustsudogs(gp *g, adjinfo *adjustinfo) {
 	// the data elements pointed to by a SudoG structure
 	// might be in the stack.
 	for s := gp.waiting; s != nil; s = s.waitlink {
-		adjustpointer(adjinfo, (unsafe.Pointer)(&s.elem))
-		adjustpointer(adjinfo, (unsafe.Pointer)(&s.selectdone))
+		adjustpointer(adjinfo, unsafe.Pointer(&s.elem))
+		adjustpointer(adjinfo, unsafe.Pointer(&s.selectdone))
 	}
 }
 
 func adjuststkbar(gp *g, adjinfo *adjustinfo) {
 	for i := int(gp.stkbarPos); i < len(gp.stkbar); i++ {
-		adjustpointer(adjinfo, (unsafe.Pointer)(&gp.stkbar[i].savedLRPtr))
+		adjustpointer(adjinfo, unsafe.Pointer(&gp.stkbar[i].savedLRPtr))
 	}
 }
 
@@ -817,11 +916,11 @@
 func gostartcallfn(gobuf *gobuf, fv *funcval) {
 	var fn unsafe.Pointer
 	if fv != nil {
-		fn = (unsafe.Pointer)(fv.fn)
+		fn = unsafe.Pointer(fv.fn)
 	} else {
 		fn = unsafe.Pointer(funcPC(nilfunc))
 	}
-	gostartcall(gobuf, fn, (unsafe.Pointer)(fv))
+	gostartcall(gobuf, fn, unsafe.Pointer(fv))
 }
 
 // Maybe shrink the stack being used by gp.
diff --git a/src/runtime/stack2.go b/src/runtime/stack2.go
deleted file mode 100644
index 59d4ef6..0000000
--- a/src/runtime/stack2.go
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2011 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-/*
-Stack layout parameters.
-Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
-
-The per-goroutine g->stackguard is set to point StackGuard bytes
-above the bottom of the stack.  Each function compares its stack
-pointer against g->stackguard to check for overflow.  To cut one
-instruction from the check sequence for functions with tiny frames,
-the stack is allowed to protrude StackSmall bytes below the stack
-guard.  Functions with large frames don't bother with the check and
-always call morestack.  The sequences are (for amd64, others are
-similar):
-
-	guard = g->stackguard
-	frame = function's stack frame size
-	argsize = size of function arguments (call + return)
-
-	stack frame size <= StackSmall:
-		CMPQ guard, SP
-		JHI 3(PC)
-		MOVQ m->morearg, $(argsize << 32)
-		CALL morestack(SB)
-
-	stack frame size > StackSmall but < StackBig
-		LEAQ (frame-StackSmall)(SP), R0
-		CMPQ guard, R0
-		JHI 3(PC)
-		MOVQ m->morearg, $(argsize << 32)
-		CALL morestack(SB)
-
-	stack frame size >= StackBig:
-		MOVQ m->morearg, $((argsize << 32) | frame)
-		CALL morestack(SB)
-
-The bottom StackGuard - StackSmall bytes are important: there has
-to be enough room to execute functions that refuse to check for
-stack overflow, either because they need to be adjacent to the
-actual caller's frame (deferproc) or because they handle the imminent
-stack overflow (morestack).
-
-For example, deferproc might call malloc, which does one of the
-above checks (without allocating a full frame), which might trigger
-a call to morestack.  This sequence needs to fit in the bottom
-section of the stack.  On amd64, morestack's frame is 40 bytes, and
-deferproc's frame is 56 bytes.  That fits well within the
-StackGuard - StackSmall bytes at the bottom.
-The linkers explore all possible call traces involving non-splitting
-functions to make sure that this limit cannot be violated.
-*/
-
-// Constants here match those in cmd/internal/obj/stack.go.
-
-const (
-	// StackSystem is a number of additional bytes to add
-	// to each stack below the usual guard area for OS-specific
-	// purposes like signal handling. Used on Windows, Plan 9,
-	// and Darwin/ARM because they do not use a separate stack.
-	_StackSystem = goos_windows*512*ptrSize + goos_plan9*512 + goos_darwin*goarch_arm*1024
-
-	// The minimum size of stack used by Go code
-	_StackMin = 2048
-
-	// The minimum stack size to allocate.
-	// The hackery here rounds FixedStack0 up to a power of 2.
-	_FixedStack0 = _StackMin + _StackSystem
-	_FixedStack1 = _FixedStack0 - 1
-	_FixedStack2 = _FixedStack1 | (_FixedStack1 >> 1)
-	_FixedStack3 = _FixedStack2 | (_FixedStack2 >> 2)
-	_FixedStack4 = _FixedStack3 | (_FixedStack3 >> 4)
-	_FixedStack5 = _FixedStack4 | (_FixedStack4 >> 8)
-	_FixedStack6 = _FixedStack5 | (_FixedStack5 >> 16)
-	_FixedStack  = _FixedStack6 + 1
-
-	// Functions that need frames bigger than this use an extra
-	// instruction to do the stack split check, to avoid overflow
-	// in case SP - framesize wraps below zero.
-	// This value can be no bigger than the size of the unmapped
-	// space at zero.
-	_StackBig = 4096
-
-	// The stack guard is a pointer this many bytes above the
-	// bottom of the stack.
-	_StackGuard = 640*stackGuardMultiplier + _StackSystem
-
-	// After a stack split check the SP is allowed to be this
-	// many bytes below the stack guard.  This saves an instruction
-	// in the checking sequence for tiny frames.
-	_StackSmall = 128
-
-	// The maximum number of bytes that a chain of NOSPLIT
-	// functions can use.
-	_StackLimit = _StackGuard - _StackSystem - _StackSmall
-)
-
-// Goroutine preemption request.
-// Stored into g->stackguard0 to cause split stack check failure.
-// Must be greater than any real sp.
-// 0xfffffade in hex.
-const (
-	_StackPreempt = uintptrMask & -1314
-	_StackFork    = uintptrMask & -1234
-)
diff --git a/src/runtime/string.go b/src/runtime/string.go
index a5851b7..0b31173 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -360,3 +360,63 @@
 	}
 	return n
 }
+
+//go:nosplit
+func findnull(s *byte) int {
+	if s == nil {
+		return 0
+	}
+	p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
+	l := 0
+	for p[l] != 0 {
+		l++
+	}
+	return l
+}
+
+func findnullw(s *uint16) int {
+	if s == nil {
+		return 0
+	}
+	p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
+	l := 0
+	for p[l] != 0 {
+		l++
+	}
+	return l
+}
+
+var maxstring uintptr = 256 // a hint for print
+
+//go:nosplit
+func gostringnocopy(str *byte) string {
+	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
+	s := *(*string)(unsafe.Pointer(&ss))
+	for {
+		ms := maxstring
+		if uintptr(len(s)) <= ms || casuintptr(&maxstring, ms, uintptr(len(s))) {
+			break
+		}
+	}
+	return s
+}
+
+func gostringw(strw *uint16) string {
+	var buf [8]byte
+	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
+	n1 := 0
+	for i := 0; str[i] != 0; i++ {
+		n1 += runetochar(buf[:], rune(str[i]))
+	}
+	s, b := rawstring(n1 + 4)
+	n2 := 0
+	for i := 0; str[i] != 0; i++ {
+		// check for race
+		if n2 >= n1 {
+			break
+		}
+		n2 += runetochar(b[n2:], rune(str[i]))
+	}
+	b[n2] = 0 // for luck
+	return s[:n2]
+}
diff --git a/src/runtime/string1.go b/src/runtime/string1.go
deleted file mode 100644
index 4bfa3d9..0000000
--- a/src/runtime/string1.go
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-//go:nosplit
-func findnull(s *byte) int {
-	if s == nil {
-		return 0
-	}
-	p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
-	l := 0
-	for p[l] != 0 {
-		l++
-	}
-	return l
-}
-
-func findnullw(s *uint16) int {
-	if s == nil {
-		return 0
-	}
-	p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
-	l := 0
-	for p[l] != 0 {
-		l++
-	}
-	return l
-}
-
-var maxstring uintptr = 256 // a hint for print
-
-//go:nosplit
-func gostringnocopy(str *byte) string {
-	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
-	s := *(*string)(unsafe.Pointer(&ss))
-	for {
-		ms := maxstring
-		if uintptr(len(s)) <= ms || casuintptr(&maxstring, ms, uintptr(len(s))) {
-			break
-		}
-	}
-	return s
-}
-
-func gostringw(strw *uint16) string {
-	var buf [8]byte
-	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
-	n1 := 0
-	for i := 0; str[i] != 0; i++ {
-		n1 += runetochar(buf[:], rune(str[i]))
-	}
-	s, b := rawstring(n1 + 4)
-	n2 := 0
-	for i := 0; str[i] != 0; i++ {
-		// check for race
-		if n2 >= n1 {
-			break
-		}
-		n2 += runetochar(b[n2:], rune(str[i]))
-	}
-	b[n2] = 0 // for luck
-	return s[:n2]
-}
diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go
index 1cb6f91..95db924 100644
--- a/src/runtime/stubs2.go
+++ b/src/runtime/stubs2.go
@@ -18,7 +18,6 @@
 func nanotime() int64
 func usleep(usec uint32)
 
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
 func munmap(addr unsafe.Pointer, n uintptr)
 
 //go:noescape
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index fa1caaf..4668609 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -284,7 +284,7 @@
 	if datap == nil {
 		return nil
 	}
-	return (*byte)(unsafe.Pointer(&datap.pclntable[f.nameoff]))
+	return &datap.pclntable[f.nameoff]
 }
 
 func funcname(f *_func) string {
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index 59c21c5..7ad704f 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -239,7 +239,7 @@
 	SYSCALL
 	INT $3	// not reached
 
-TEXT runtime·mmap(SB),NOSPLIT,$0
+TEXT runtime·sysMmap(SB),NOSPLIT,$0
 	MOVQ	addr+0(FP), DI
 	MOVQ	n+8(FP), SI
 	MOVL	prot+16(FP), DX
@@ -256,6 +256,20 @@
 	MOVQ	AX, ret+32(FP)
 	RET
 
+// Call the function stored in _cgo_mmap using the GCC calling convention.
+// This must be called on the system stack.
+TEXT runtime·callCgoMmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	prot+16(FP), DX
+	MOVL	flags+20(FP), CX
+	MOVL	fd+24(FP), R8
+	MOVL	off+28(FP), R9
+	MOVQ	_cgo_mmap(SB), AX
+	CALL	AX
+	MOVQ	AX, ret+32(FP)
+	RET
+
 TEXT runtime·munmap(SB),NOSPLIT,$0
 	MOVQ	addr+0(FP), DI
 	MOVQ	n+8(FP), SI
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index 01575f8..b334a03 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -12,6 +12,7 @@
 #include "go_asm.h"
 #include "go_tls.h"
 #include "textflag.h"
+#include "asm_ppc64x.h"
 
 #define SYS_exit		  1
 #define SYS_read		  3
@@ -47,17 +48,17 @@
 #define SYS_clock_gettime	246
 #define SYS_epoll_create1	315
 
-TEXT runtime·exit(SB),NOSPLIT,$-8-4
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
 	MOVW	code+0(FP), R3
 	SYSCALL	$SYS_exit_group
 	RET
 
-TEXT runtime·exit1(SB),NOSPLIT,$-8-4
+TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4
 	MOVW	code+0(FP), R3
 	SYSCALL	$SYS_exit
 	RET
 
-TEXT runtime·open(SB),NOSPLIT,$-8-20
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
 	MOVD	name+0(FP), R3
 	MOVW	mode+8(FP), R4
 	MOVW	perm+12(FP), R5
@@ -67,7 +68,7 @@
 	MOVW	R3, ret+16(FP)
 	RET
 
-TEXT runtime·closefd(SB),NOSPLIT,$-8-12
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
 	MOVW	fd+0(FP), R3
 	SYSCALL	$SYS_close
 	BVC	2(PC)
@@ -75,7 +76,7 @@
 	MOVW	R3, ret+8(FP)
 	RET
 
-TEXT runtime·write(SB),NOSPLIT,$-8-28
+TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
 	MOVD	fd+0(FP), R3
 	MOVD	p+8(FP), R4
 	MOVW	n+16(FP), R5
@@ -85,7 +86,7 @@
 	MOVW	R3, ret+24(FP)
 	RET
 
-TEXT runtime·read(SB),NOSPLIT,$-8-28
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
 	MOVW	fd+0(FP), R3
 	MOVD	p+8(FP), R4
 	MOVW	n+16(FP), R5
@@ -95,7 +96,7 @@
 	MOVW	R3, ret+24(FP)
 	RET
 
-TEXT runtime·getrlimit(SB),NOSPLIT,$-8-20
+TEXT runtime·getrlimit(SB),NOSPLIT|NOFRAME,$0-20
 	MOVW	kind+0(FP), R3
 	MOVD	limit+8(FP), R4
 	SYSCALL	$SYS_ugetrlimit
@@ -126,28 +127,28 @@
 	MOVW	R3, ret+0(FP)
 	RET
 
-TEXT runtime·raise(SB),NOSPLIT,$-8
+TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_gettid
 	MOVW	R3, R3	// arg 1 tid
 	MOVW	sig+0(FP), R4	// arg 2
 	SYSCALL	$SYS_tkill
 	RET
 
-TEXT runtime·raiseproc(SB),NOSPLIT,$-8
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_getpid
 	MOVW	R3, R3	// arg 1 pid
 	MOVW	sig+0(FP), R4	// arg 2
 	SYSCALL	$SYS_kill
 	RET
 
-TEXT runtime·setitimer(SB),NOSPLIT,$-8-24
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
 	MOVW	mode+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
 	SYSCALL	$SYS_setitimer
 	RET
 
-TEXT runtime·mincore(SB),NOSPLIT,$-8-28
+TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVD	dst+16(FP), R5
@@ -182,7 +183,7 @@
 	MOVD	R3, ret+0(FP)
 	RET
 
-TEXT runtime·rtsigprocmask(SB),NOSPLIT,$-8-28
+TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
 	MOVW	sig+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
@@ -192,7 +193,7 @@
 	MOVD	R0, 0xf1(R0)	// crash
 	RET
 
-TEXT runtime·rt_sigaction(SB),NOSPLIT,$-8-36
+TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36
 	MOVD	sig+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
@@ -205,8 +206,8 @@
 	MOVW	sig+8(FP), R3
 	MOVD	info+16(FP), R4
 	MOVD	ctx+24(FP), R5
-	MOVD	fn+0(FP), R31
-	MOVD	R31, CTR
+	MOVD	fn+0(FP), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 	RET
 
@@ -215,7 +216,7 @@
 TEXT runtime·sigtramp(SB),NOSPLIT,$64
 #else
 // function descriptor for the real sigtramp
-TEXT runtime·sigtramp(SB),NOSPLIT,$-8
+TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0
 	DWORD	$runtime·_sigtramp(SB)
 	DWORD	$0
 	DWORD	$0
@@ -231,15 +232,15 @@
 	BEQ	2(PC)
 	BL	runtime·load_g(SB)
 
-	MOVW	R3, 8(R1)
-	MOVD	R4, 16(R1)
-	MOVD	R5, 24(R1)
-	MOVD	$runtime·sigtrampgo(SB), R31
-	MOVD	R31, CTR
+	MOVW	R3, FIXED_FRAME+0(R1)
+	MOVD	R4, FIXED_FRAME+8(R1)
+	MOVD	R5, FIXED_FRAME+16(R1)
+	MOVD	$runtime·sigtrampgo(SB), R12
+	MOVD	R12, CTR
 	BL	(CTR)
 	RET
 
-TEXT runtime·mmap(SB),NOSPLIT,$-8
+TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVW	prot+16(FP), R5
@@ -251,7 +252,7 @@
 	MOVD	R3, ret+32(FP)
 	RET
 
-TEXT runtime·munmap(SB),NOSPLIT,$-8
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	SYSCALL	$SYS_munmap
@@ -259,7 +260,7 @@
 	MOVD	R0, 0xf3(R0)
 	RET
 
-TEXT runtime·madvise(SB),NOSPLIT,$-8
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVW	flags+16(FP), R5
@@ -269,7 +270,7 @@
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
 //	struct timespec *timeout, int32 *uaddr2, int32 val2);
-TEXT runtime·futex(SB),NOSPLIT,$-8
+TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVW	op+8(FP), R4
 	MOVW	val+12(FP), R5
@@ -281,7 +282,7 @@
 	RET
 
 // int64 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void));
-TEXT runtime·clone(SB),NOSPLIT,$-8
+TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0
 	MOVW	flags+0(FP), R3
 	MOVD	stk+8(FP), R4
 
@@ -344,7 +345,7 @@
 	SYSCALL	$SYS_exit
 	BR	-2(PC)	// keep exiting
 
-TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
 	MOVD	new+0(FP), R3
 	MOVD	old+8(FP), R4
 	SYSCALL	$SYS_sigaltstack
@@ -352,11 +353,11 @@
 	MOVD	R0, 0xf1(R0)  // crash
 	RET
 
-TEXT runtime·osyield(SB),NOSPLIT,$-8
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_sched_yield
 	RET
 
-TEXT runtime·sched_getaffinity(SB),NOSPLIT,$-8
+TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0
 	MOVD	pid+0(FP), R3
 	MOVD	len+8(FP), R4
 	MOVD	buf+16(FP), R5
@@ -365,21 +366,21 @@
 	RET
 
 // int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT,$-8
+TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
 	MOVW    size+0(FP), R3
 	SYSCALL	$SYS_epoll_create
 	MOVW	R3, ret+8(FP)
 	RET
 
 // int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT,$-8
+TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
 	MOVW	flags+0(FP), R3
 	SYSCALL	$SYS_epoll_create1
 	MOVW	R3, ret+8(FP)
 	RET
 
 // func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$-8
+TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
 	MOVW	epfd+0(FP), R3
 	MOVW	op+4(FP), R4
 	MOVW	fd+8(FP), R5
@@ -389,7 +390,7 @@
 	RET
 
 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT,$-8
+TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
 	MOVW	epfd+0(FP), R3
 	MOVD	ev+8(FP), R4
 	MOVW	nev+16(FP), R5
@@ -399,7 +400,7 @@
 	RET
 
 // void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT,$-8
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
 	MOVW    fd+0(FP), R3  // fd
 	MOVD    $2, R4  // F_SETFD
 	MOVD    $1, R5  // FD_CLOEXEC
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index ea4f3e0..b15eacb 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -428,19 +428,21 @@
 	RET
 
 // Runs on OS stack. duration (in 100ns units) is in BX.
-TEXT runtime·usleep2(SB),NOSPLIT,$16
+// The function leaves room for 4 syscall parameters
+// (as per windows amd64 calling convention).
+TEXT runtime·usleep2(SB),NOSPLIT,$48
 	MOVQ	SP, AX
 	ANDQ	$~15, SP	// alignment as per Windows requirement
-	MOVQ	AX, 8(SP)
+	MOVQ	AX, 40(SP)
 	// Want negative 100ns units.
 	NEGQ	BX
-	MOVQ	SP, R8 // ptime
+	LEAQ	32(SP), R8  // ptime
 	MOVQ	BX, (R8)
 	MOVQ	$-1, CX // handle
 	MOVQ	$0, DX // alertable
 	MOVQ	runtime·_NtWaitForSingleObject(SB), AX
 	CALL	AX
-	MOVQ	8(SP), SP
+	MOVQ	40(SP), SP
 	RET
 
 // func now() (sec int64, nsec int32)
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index 677eb5f..4bedd4a 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -640,3 +640,10 @@
 		t.Errorf("got %d want %d", got, want)
 	}
 }
+
+func TestTimeBeginPeriod(t *testing.T) {
+	const TIMERR_NOERROR = 0
+	if *runtime.TimeBeginPeriodRetValue != TIMERR_NOERROR {
+		t.Fatalf("timeBeginPeriod failed: it returned %d", *runtime.TimeBeginPeriodRetValue)
+	}
+}
diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h
index f2690c9..dbf3d99 100644
--- a/src/runtime/textflag.h
+++ b/src/runtime/textflag.h
@@ -24,3 +24,7 @@
 // Allocate a word of thread local storage and store the offset from the
 // thread local base to the thread local storage in this variable.
 #define TLSBSS	256
+// Do not insert instructions to allocate a stack frame for this function.
+// Only valid on functions that declare a frame size of 0.
+// TODO(mwhudson): only implemented for ppc64x at present.
+#define NOFRAME 512
diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s
index d37970e..00ca469 100644
--- a/src/runtime/tls_arm.s
+++ b/src/runtime/tls_arm.s
@@ -15,7 +15,7 @@
 // Note: both functions will clobber R0 and R11 and
 // can be called from 5c ABI code.
 
-// On android and darwin, runtime.tlsg is a normal variable.
+// On android and darwin, runtime.tls_g is a normal variable.
 // TLS offset is computed in x_cgo_inittls.
 #ifdef GOOS_android
 #define TLSG_IS_VARIABLE
@@ -41,14 +41,7 @@
 	// The replacement function saves LR in R11 over the call to read_tls_fallback.
 	MRC	15, 0, R0, C13, C0, 3 // fetch TLS base pointer
 	BIC $3, R0 // Darwin/ARM might return unaligned pointer
-	// $runtime.tlsg(SB) is a special linker symbol.
-	// It is the offset from the TLS base pointer to our
-	// thread-local storage for g.
-#ifdef TLSG_IS_VARIABLE
-	MOVW	runtime·tlsg(SB), R11
-#else
-	MOVW	$runtime·tlsg(SB), R11
-#endif
+	MOVW	runtime·tls_g(SB), R11
 	ADD	R11, R0
 	MOVW	g, 0(R0)
 	MOVW	g, R0 // preserve R0 across call to setg<>
@@ -65,14 +58,7 @@
 	// See save_g
 	MRC	15, 0, R0, C13, C0, 3 // fetch TLS base pointer
 	BIC $3, R0 // Darwin/ARM might return unaligned pointer
-	// $runtime.tlsg(SB) is a special linker symbol.
-	// It is the offset from the TLS base pointer to our
-	// thread-local storage for g.
-#ifdef TLSG_IS_VARIABLE
-	MOVW	runtime·tlsg(SB), R11
-#else
-	MOVW	$runtime·tlsg(SB), R11
-#endif
+	MOVW	runtime·tls_g(SB), R11
 	ADD	R11, R0
 	MOVW	0(R0), g
 	RET
@@ -95,7 +81,11 @@
 	B.EQ	nocgo
 	MRC     15, 0, R0, C13, C0, 3 	// load TLS base pointer
 	MOVW 	R0, R3 			// arg 3: TLS base pointer
-	MOVW 	$runtime·tlsg(SB), R2 	// arg 2: tlsg
+#ifdef TLSG_IS_VARIABLE
+	MOVW 	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
+#else
+        MOVW	$0, R2			// arg 2: not used when using platform tls
+#endif
 	MOVW	$setg_gcc<>(SB), R1 	// arg 1: setg
 	MOVW	g, R0 			// arg 0: G
 	BL	(R4) // will clobber R0-R3
@@ -109,5 +99,7 @@
 	B		runtime·save_g(SB)
 
 #ifdef TLSG_IS_VARIABLE
-GLOBL runtime·tlsg+0(SB), NOPTR, $4
+GLOBL runtime·tls_g+0(SB), NOPTR, $4
+#else
+GLOBL runtime·tls_g+0(SB), TLSBSS, $4
 #endif
diff --git a/src/runtime/tls_arm64.s b/src/runtime/tls_arm64.s
index a5f86c4..f31a16b 100644
--- a/src/runtime/tls_arm64.s
+++ b/src/runtime/tls_arm64.s
@@ -53,8 +53,5 @@
 	RET
 
 #ifdef TLSG_IS_VARIABLE
-// The runtime.tlsg name is being handled specially in the
-// linker. As we just need a regular variable here, don't
-// use that name.
 GLOBL runtime·tls_g+0(SB), NOPTR, $8
 #endif
diff --git a/src/runtime/tls_ppc64x.s b/src/runtime/tls_ppc64x.s
index fc1718f..d930718 100644
--- a/src/runtime/tls_ppc64x.s
+++ b/src/runtime/tls_ppc64x.s
@@ -22,7 +22,7 @@
 // If !iscgo, this is a no-op.
 //
 // NOTE: setg_gcc<> assume this clobbers only R31.
-TEXT runtime·save_g(SB),NOSPLIT,$-8-0
+TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
 	MOVB	runtime·iscgo(SB), R31
 	CMP	R31, $0
 	BEQ	nocgo
@@ -30,7 +30,7 @@
 	// $runtime.tlsg(SB) is a special linker symbol.
 	// It is the offset from the start of TLS to our
 	// thread-local storage for g.
-	MOVD	$runtime·tlsg(SB), R31
+	MOVD	$runtime·tls_g(SB), R31
 	ADD	R13, R31
 	// The actual TLS base is 0x7000 below R13
 	SUB	$0x7000, R31
@@ -50,11 +50,13 @@
 // usual Go registers aren't set up.
 //
 // NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31.
-TEXT runtime·load_g(SB),NOSPLIT,$-8-0
-	MOVD	$runtime·tlsg(SB), R31
+TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	$runtime·tls_g(SB), R31
 	// R13 is the C ABI TLS base pointer + 0x7000
 	ADD	R13, R31
 	SUB	$0x7000, R31
 
 	MOVD	0(R31), g
 	RET
+
+GLOBL runtime·tls_g+0(SB), TLSBSS, $8
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index c818462..6631bc2 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -624,7 +624,7 @@
 	if len(pcs) == 0 {
 		return 0
 	}
-	hash := memhash(unsafe.Pointer(&pcs[0]), uintptr(len(pcs))*unsafe.Sizeof(pcs[0]), 0)
+	hash := memhash(unsafe.Pointer(&pcs[0]), 0, uintptr(len(pcs))*unsafe.Sizeof(pcs[0]))
 	// First, search the hashtable w/o the mutex.
 	if id := tab.find(pcs, hash); id != 0 {
 		return id
@@ -828,7 +828,7 @@
 }
 
 func traceGoSysCall() {
-	traceEvent(traceEvGoSysCall, 4)
+	traceEvent(traceEvGoSysCall, 1)
 }
 
 func traceGoSysExit(seq uint64, ts int64) {
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index 3fe1747..b5fe7c3 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -144,107 +144,107 @@
 		Stk  []frame
 	}
 	want := []eventDesc{
-		eventDesc{trace.EvGCStart, []frame{
-			frame{"runtime.GC", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 106},
-			frame{"testing.tRunner", 0},
+		{trace.EvGCStart, []frame{
+			{"runtime.GC", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 106},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoSched, []frame{
-			frame{"runtime/trace_test.TestTraceSymbolize", 107},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoSched, []frame{
+			{"runtime/trace_test.TestTraceSymbolize", 107},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoCreate, []frame{
-			frame{"runtime/trace_test.TestTraceSymbolize", 39},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoCreate, []frame{
+			{"runtime/trace_test.TestTraceSymbolize", 39},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoStop, []frame{
-			frame{"runtime.block", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func1", 38},
+		{trace.EvGoStop, []frame{
+			{"runtime.block", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 38},
 		}},
-		eventDesc{trace.EvGoStop, []frame{
-			frame{"runtime.chansend1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func2", 42},
+		{trace.EvGoStop, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func2", 42},
 		}},
-		eventDesc{trace.EvGoStop, []frame{
-			frame{"runtime.chanrecv1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func3", 46},
+		{trace.EvGoStop, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func3", 46},
 		}},
-		eventDesc{trace.EvGoBlockRecv, []frame{
-			frame{"runtime.chanrecv1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func4", 50},
+		{trace.EvGoBlockRecv, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func4", 50},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"runtime.chansend1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 109},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 109},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoBlockSend, []frame{
-			frame{"runtime.chansend1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func5", 54},
+		{trace.EvGoBlockSend, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func5", 54},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"runtime.chanrecv1", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 110},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 110},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoBlockSelect, []frame{
-			frame{"runtime.selectgo", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func6", 59},
+		{trace.EvGoBlockSelect, []frame{
+			{"runtime.selectgo", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func6", 59},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"runtime.selectgo", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 111},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.selectgo", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 111},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoBlockSync, []frame{
-			frame{"sync.(*Mutex).Lock", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func7", 67},
+		{trace.EvGoBlockSync, []frame{
+			{"sync.(*Mutex).Lock", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func7", 67},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"sync.(*Mutex).Unlock", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 115},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*Mutex).Unlock", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 115},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoBlockSync, []frame{
-			frame{"sync.(*WaitGroup).Wait", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func8", 73},
+		{trace.EvGoBlockSync, []frame{
+			{"sync.(*WaitGroup).Wait", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func8", 73},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"sync.(*WaitGroup).Add", 0},
-			frame{"sync.(*WaitGroup).Done", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 116},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*WaitGroup).Add", 0},
+			{"sync.(*WaitGroup).Done", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 116},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoBlockCond, []frame{
-			frame{"sync.(*Cond).Wait", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize.func9", 78},
+		{trace.EvGoBlockCond, []frame{
+			{"sync.(*Cond).Wait", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func9", 78},
 		}},
-		eventDesc{trace.EvGoUnblock, []frame{
-			frame{"sync.(*Cond).Signal", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 117},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*Cond).Signal", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 117},
+			{"testing.tRunner", 0},
 		}},
-		eventDesc{trace.EvGoSleep, []frame{
-			frame{"time.Sleep", 0},
-			frame{"runtime/trace_test.TestTraceSymbolize", 108},
-			frame{"testing.tRunner", 0},
+		{trace.EvGoSleep, []frame{
+			{"time.Sleep", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 108},
+			{"testing.tRunner", 0},
 		}},
 	}
 	// Stacks for the following events are OS-dependent due to OS-specific code in net package.
 	if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
 		want = append(want, []eventDesc{
-			eventDesc{trace.EvGoBlockNet, []frame{
-				frame{"net.(*netFD).accept", 0},
-				frame{"net.(*TCPListener).AcceptTCP", 0},
-				frame{"net.(*TCPListener).Accept", 0},
-				frame{"runtime/trace_test.TestTraceSymbolize.func10", 86},
+			{trace.EvGoBlockNet, []frame{
+				{"net.(*netFD).accept", 0},
+				{"net.(*TCPListener).AcceptTCP", 0},
+				{"net.(*TCPListener).Accept", 0},
+				{"runtime/trace_test.TestTraceSymbolize.func10", 86},
 			}},
-			eventDesc{trace.EvGoSysCall, []frame{
-				frame{"syscall.read", 0},
-				frame{"syscall.Read", 0},
-				frame{"os.(*File).read", 0},
-				frame{"os.(*File).Read", 0},
-				frame{"runtime/trace_test.TestTraceSymbolize.func11", 101},
+			{trace.EvGoSysCall, []frame{
+				{"syscall.read", 0},
+				{"syscall.Read", 0},
+				{"os.(*File).read", 0},
+				{"os.(*File).Read", 0},
+				{"runtime/trace_test.TestTraceSymbolize.func11", 101},
 			}},
 		}...)
 	}
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index 0a8957f..f4791c2 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -129,7 +129,13 @@
 
 	runtime.GC()
 	// Trigger GC from malloc.
-	for i := 0; i < 1e3; i++ {
+	n := int(1e3)
+	if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" {
+		// Reduce allocation to avoid running out of
+		// memory on the builder - see issue/12032.
+		n = 512
+	}
+	for i := 0; i < n; i++ {
 		_ = make([]byte, 1<<20)
 	}
 
@@ -260,7 +266,13 @@
 
 		runtime.GC()
 		// Trigger GC from malloc.
-		for i := 0; i < 1e3; i++ {
+		n := int(1e3)
+		if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" {
+			// Reduce allocation to avoid running out of
+			// memory on the builder - see issue/12032.
+			n = 512
+		}
+		for i := 0; i < n; i++ {
 			_ = make([]byte, 1<<20)
 		}
 
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 544ce27..2d223ce 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -26,9 +26,10 @@
 // stores an 8-byte return PC onto the stack. To accommodate this, we use regSize
 // as the size of the architecture-pushed return PC.
 //
-// usesLR is defined below. ptrSize and regSize are defined in stubs.go.
+// usesLR is defined below in terms of minFrameSize, which is defined in
+// arch_$GOARCH.go. ptrSize and regSize are defined in stubs.go.
 
-const usesLR = GOARCH != "amd64" && GOARCH != "amd64p32" && GOARCH != "386"
+const usesLR = minFrameSize > 0
 
 var (
 	// initialized in tracebackinit
@@ -295,10 +296,7 @@
 		// in package runtime and reflect, and for those we use call-specific
 		// metadata recorded by f's caller.
 		if callback != nil || printing {
-			frame.argp = frame.fp
-			if usesLR {
-				frame.argp += ptrSize
-			}
+			frame.argp = frame.fp + minFrameSize
 			setArgInfo(&frame, f, callback != nil)
 		}
 
@@ -396,7 +394,7 @@
 		// before faking a call to sigpanic.
 		if usesLR && waspanic {
 			x := *(*uintptr)(unsafe.Pointer(frame.sp))
-			frame.sp += ptrSize
+			frame.sp += minFrameSize
 			if GOARCH == "arm64" {
 				// arm64 needs 16-byte aligned SP, always
 				frame.sp += ptrSize
@@ -496,10 +494,7 @@
 		// Extract argument bitmaps for reflect stubs from the calls they made to reflect.
 		switch funcname(f) {
 		case "reflect.makeFuncStub", "reflect.methodValueCall":
-			arg0 := frame.sp
-			if usesLR {
-				arg0 += ptrSize
-			}
+			arg0 := frame.sp + minFrameSize
 			fn := *(**[2]uintptr)(unsafe.Pointer(arg0))
 			if fn[0] != f.entry {
 				print("runtime: confused by ", funcname(f), "\n")
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 1321af8..033f12f 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -67,6 +67,7 @@
 	indirectvalue bool   // store ptr to value instead of value itself
 	bucketsize    uint16 // size of bucket
 	reflexivekey  bool   // true if k==k for all keys
+	needkeyupdate bool   // true if we need to update key on an overwrite
 }
 
 type chantype struct {
diff --git a/src/runtime/typekind.go b/src/runtime/typekind.go
index b64ec44..d146dca 100644
--- a/src/runtime/typekind.go
+++ b/src/runtime/typekind.go
@@ -5,37 +5,37 @@
 package runtime
 
 const (
-	kindBool          = _KindBool
-	kindInt           = _KindInt
-	kindInt8          = _KindInt8
-	kindInt16         = _KindInt16
-	kindInt32         = _KindInt32
-	kindInt64         = _KindInt64
-	kindUint          = _KindUint
-	kindUint8         = _KindUint8
-	kindUint16        = _KindUint16
-	kindUint32        = _KindUint32
-	kindUint64        = _KindUint64
-	kindUintptr       = _KindUintptr
-	kindFloat32       = _KindFloat32
-	kindFloat64       = _KindFloat64
-	kindComplex64     = _KindComplex64
-	kindComplex128    = _KindComplex128
-	kindArray         = _KindArray
-	kindChan          = _KindChan
-	kindFunc          = _KindFunc
-	kindInterface     = _KindInterface
-	kindMap           = _KindMap
-	kindPtr           = _KindPtr
-	kindSlice         = _KindSlice
-	kindString        = _KindString
-	kindStruct        = _KindStruct
-	kindUnsafePointer = _KindUnsafePointer
+	kindBool = 1 + iota
+	kindInt
+	kindInt8
+	kindInt16
+	kindInt32
+	kindInt64
+	kindUint
+	kindUint8
+	kindUint16
+	kindUint32
+	kindUint64
+	kindUintptr
+	kindFloat32
+	kindFloat64
+	kindComplex64
+	kindComplex128
+	kindArray
+	kindChan
+	kindFunc
+	kindInterface
+	kindMap
+	kindPtr
+	kindSlice
+	kindString
+	kindStruct
+	kindUnsafePointer
 
-	kindDirectIface = _KindDirectIface
-	kindGCProg      = _KindGCProg
-	kindNoPointers  = _KindNoPointers
-	kindMask        = _KindMask
+	kindDirectIface = 1 << 5
+	kindGCProg      = 1 << 6
+	kindNoPointers  = 1 << 7
+	kindMask        = (1 << 5) - 1
 )
 
 // isDirectIface reports whether t is stored directly in an interface value.
diff --git a/src/runtime/typekind1.go b/src/runtime/typekind1.go
deleted file mode 100644
index 73028d6f4..0000000
--- a/src/runtime/typekind1.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	_KindBool = 1 + iota
-	_KindInt
-	_KindInt8
-	_KindInt16
-	_KindInt32
-	_KindInt64
-	_KindUint
-	_KindUint8
-	_KindUint16
-	_KindUint32
-	_KindUint64
-	_KindUintptr
-	_KindFloat32
-	_KindFloat64
-	_KindComplex64
-	_KindComplex128
-	_KindArray
-	_KindChan
-	_KindFunc
-	_KindInterface
-	_KindMap
-	_KindPtr
-	_KindSlice
-	_KindString
-	_KindStruct
-	_KindUnsafePointer
-
-	_KindDirectIface = 1 << 5
-	_KindGCProg      = 1 << 6 // Type.gc points to GC program
-	_KindNoPointers  = 1 << 7
-	_KindMask        = (1 << 5) - 1
-)
diff --git a/src/strconv/ftoa.go b/src/strconv/ftoa.go
index 468c37f..9ff5d10 100644
--- a/src/strconv/ftoa.go
+++ b/src/strconv/ftoa.go
@@ -286,25 +286,23 @@
 	// Now we can figure out the minimum number of digits required.
 	// Walk along until d has distinguished itself from upper and lower.
 	for i := 0; i < d.nd; i++ {
-		var l, m, u byte // lower, middle, upper digits
+		l := byte('0') // lower digit
 		if i < lower.nd {
 			l = lower.d[i]
-		} else {
-			l = '0'
 		}
-		m = d.d[i]
+		m := d.d[i]    // middle digit
+		u := byte('0') // upper digit
 		if i < upper.nd {
 			u = upper.d[i]
-		} else {
-			u = '0'
 		}
 
 		// Okay to round down (truncate) if lower has a different digit
-		// or if lower is inclusive and is exactly the result of rounding down.
-		okdown := l != m || (inclusive && l == m && i+1 == lower.nd)
+		// or if lower is inclusive and is exactly the result of rounding
+		// down (i.e., and we have reached the final digit of lower).
+		okdown := l != m || inclusive && i+1 == lower.nd
 
-		// Okay to round up if upper has a different digit and
-		// either upper is inclusive or upper is bigger than the result of rounding up.
+		// Okay to round up if upper has a different digit and either upper
+		// is inclusive or upper is bigger than the result of rounding up.
 		okup := m != u && (inclusive || m+1 < u || i+1 < upper.nd)
 
 		// If it's okay to do either, then round to the nearest one.
diff --git a/src/strconv/ftoa_test.go b/src/strconv/ftoa_test.go
index 1b4dcd9..0b9f0fe 100644
--- a/src/strconv/ftoa_test.go
+++ b/src/strconv/ftoa_test.go
@@ -18,7 +18,7 @@
 	s    string
 }
 
-func fdiv(a, b float64) float64 { return a / b } // keep compiler in the dark
+func fdiv(a, b float64) float64 { return a / b }
 
 const (
 	below1e23 = 99999999999999974834176
@@ -94,8 +94,8 @@
 	{above1e23, 'f', -1, "100000000000000010000000"},
 	{above1e23, 'g', -1, "1.0000000000000001e+23"},
 
-	{fdiv(5e-304, 1e20), 'g', -1, "5e-324"},
-	{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"},
+	{fdiv(5e-304, 1e20), 'g', -1, "5e-324"},   // avoid constant arithmetic
+	{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic
 
 	{32, 'g', -1, "32"},
 	{32, 'g', 0, "3e+01"},
diff --git a/src/strconv/itoa.go b/src/strconv/itoa.go
index e6f6303..f50d877 100644
--- a/src/strconv/itoa.go
+++ b/src/strconv/itoa.go
@@ -20,7 +20,7 @@
 	return s
 }
 
-// Itoa is shorthand for FormatInt(i, 10).
+// Itoa is shorthand for FormatInt(int64(i), 10).
 func Itoa(i int) string {
 	return FormatInt(int64(i), 10)
 }
diff --git a/src/syscall/exec_linux_test.go b/src/syscall/exec_linux_test.go
index e4ed6ad..cca3812 100644
--- a/src/syscall/exec_linux_test.go
+++ b/src/syscall/exec_linux_test.go
@@ -29,6 +29,13 @@
 			t.Skip("kernel prohibits user namespace in unprivileged process")
 		}
 	}
+	// When running under the Go continuous build, skip tests for
+	// now when under Kubernetes. (where things are root but not quite)
+	// Both of these are our own environment variables.
+	// See Issue 12815.
+	if os.Getenv("GO_BUILDER_NAME") != "" && os.Getenv("IN_KUBERNETES") == "1" {
+		t.Skip("skipping test on Kubernetes-based builders; see Issue 12815")
+	}
 	cmd := exec.Command("whoami")
 	cmd.SysProcAttr = &syscall.SysProcAttr{
 		Cloneflags: syscall.CLONE_NEWUSER,
diff --git a/src/syscall/mkerrors.sh b/src/syscall/mkerrors.sh
index 438de6e..b59a46b 100755
--- a/src/syscall/mkerrors.sh
+++ b/src/syscall/mkerrors.sh
@@ -13,6 +13,11 @@
 
 CC=${CC:-gcc}
 
+if [[ "$GOOS" -eq "solaris" ]]; then
+	# Assumes GNU versions of utilities in PATH.
+	export PATH=/usr/gnu/bin:$PATH
+fi
+
 uname=$(uname)
 
 includes_Darwin='
@@ -195,6 +200,7 @@
 '
 
 includes_SunOS='
+#include <limits.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
diff --git a/src/syscall/mksyscall_solaris.pl b/src/syscall/mksyscall_solaris.pl
index f5eb4b3..cd69ebc 100755
--- a/src/syscall/mksyscall_solaris.pl
+++ b/src/syscall/mksyscall_solaris.pl
@@ -38,6 +38,11 @@
 	exit 1;
 }
 
+if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
+	print STDERR "GOARCH or GOOS not defined in environment\n";
+	exit 1;
+}
+
 sub parseparamlist($) {
 	my ($list) = @_;
 	$list =~ s/^\s*//;
@@ -60,9 +65,9 @@
 
 my $package = "";
 my $text = "";
-my $vars = "";
 my $dynimports = "";
 my $linknames = "";
+my @vars = ();
 while(<>) {
 	chomp;
 	s/\s+/ /g;
@@ -100,20 +105,19 @@
 	}
 
 	# System call pointer variable name.
-	my $sysvarname = "libc_$sysname";
+	my $sysvarname = "libc_${sysname}";
 
 	my $strconvfunc = "BytePtrFromString";
 	my $strconvtype = "*byte";
 
-	# Library proc address variable.
 	$sysname =~ y/A-Z/a-z/; # All libc functions are lowercase.
-	if($vars eq "") {
-		$vars .= "\t$sysvarname";
-	} else {
-		$vars .= ",\n\t$sysvarname";
-	}
-	$dynimports .= "//go:cgo_import_dynamic $sysvarname $sysname \"$modname.so\"\n";
-	$linknames .= "//go:linkname $sysvarname $sysvarname\n";
+
+	# Runtime import of function to allow cross-platform builds.
+	$dynimports .= "//go:cgo_import_dynamic ${sysvarname} ${sysname} \"$modname.so\"\n";
+	# Link symbol to proc address variable.
+	$linknames .= "//go:linkname ${sysvarname} ${sysvarname}\n";
+	# Library proc address variable.
+	push @vars, $sysvarname;
 
 	# Go function header.
 	$out = join(', ', @out);
@@ -264,6 +268,8 @@
 // $cmdline
 // MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
 
+// +build $ENV{'GOARCH'},$ENV{'GOOS'}
+
 package $package
 
 import "unsafe"
@@ -271,17 +277,20 @@
 
 print "import \"syscall\"\n" if $package ne "syscall";
 
-print <<EOF;
+my $vardecls = "\t" . join(",\n\t", @vars);
+$vardecls .= " libcFunc";
+
+chomp($_=<<EOF);
 
 $dynimports
 $linknames
 type libcFunc uintptr
 
 var (
-$vars libcFunc
+$vardecls
 )
 
 $text
-
 EOF
+print $_;
 exit 0;
diff --git a/src/syscall/net_nacl.go b/src/syscall/net_nacl.go
index b5cb530..a3633ad 100644
--- a/src/syscall/net_nacl.go
+++ b/src/syscall/net_nacl.go
@@ -577,7 +577,7 @@
 		return EISCONN
 	}
 	l, ok := net.listener[netAddr{f.proto, f.sotype, sa.key()}]
-	if !ok {
+	if !ok || l.listenerClosed() {
 		net.Unlock()
 		return ECONNREFUSED
 	}
@@ -676,6 +676,12 @@
 	return nil
 }
 
+func (f *netFile) listenerClosed() bool {
+	f.listener.Lock()
+	defer f.listener.Unlock()
+	return f.listener.closed
+}
+
 func (f *netFile) close() error {
 	if f.listener != nil {
 		f.listener.close()
diff --git a/src/syscall/route_bsd.go b/src/syscall/route_bsd.go
index c62fdc3..4434a56 100644
--- a/src/syscall/route_bsd.go
+++ b/src/syscall/route_bsd.go
@@ -44,6 +44,9 @@
 
 // parseSockaddrLink parses b as a datalink socket address.
 func parseSockaddrLink(b []byte) (*SockaddrDatalink, error) {
+	if len(b) < 8 {
+		return nil, EINVAL
+	}
 	sa, _, err := parseLinkLayerAddr(b[4:])
 	if err != nil {
 		return nil, err
@@ -77,16 +80,16 @@
 		Slen byte
 	}
 	lla := (*linkLayerAddr)(unsafe.Pointer(&b[0]))
-	l := rsaAlignOf(int(4 + lla.Nlen + lla.Alen + lla.Slen))
+	l := 4 + int(lla.Nlen) + int(lla.Alen) + int(lla.Slen)
 	if len(b) < l {
 		return nil, 0, EINVAL
 	}
 	b = b[4:]
 	sa := &SockaddrDatalink{Type: lla.Type, Nlen: lla.Nlen, Alen: lla.Alen, Slen: lla.Slen}
-	for i := 0; len(sa.Data) > i && i < int(lla.Nlen+lla.Alen+lla.Slen); i++ {
+	for i := 0; len(sa.Data) > i && i < l-4; i++ {
 		sa.Data[i] = int8(b[i])
 	}
-	return sa, l, nil
+	return sa, rsaAlignOf(l), nil
 }
 
 // parseSockaddrInet parses b as an internet socket address.
diff --git a/src/syscall/route_bsd_test.go b/src/syscall/route_bsd_test.go
index 8617663..74d11f9 100644
--- a/src/syscall/route_bsd_test.go
+++ b/src/syscall/route_bsd_test.go
@@ -119,6 +119,41 @@
 	<-tmo
 }
 
+var parseInterfaceMessageTests = []*syscall.InterfaceMessage{
+	// with link-layer address
+	{
+		Header: syscall.IfMsghdr{Version: syscall.RTM_VERSION, Addrs: syscall.RTA_IFP},
+		Data: []uint8{
+			0x11, 0x12, 0x2, 0x0, 0x6, 0x3, 0x6, 0x0,
+			0x77, 0x6d, 0x31, 0x01, 0x23, 0x45, 0xab, 0xcd,
+			0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+		},
+	},
+	// without link-layer address
+	{
+		Header: syscall.IfMsghdr{Version: syscall.RTM_VERSION, Addrs: syscall.RTA_IFP},
+		Data: []uint8{
+			0xe, 0x12, 0x4, 0x0, 0xf5, 0x6, 0x0, 0x0,
+			0x70, 0x66, 0x6c, 0x6f, 0x67, 0x30, 0x0, 0x0,
+		},
+	},
+	// no data
+	{
+		Header: syscall.IfMsghdr{Version: syscall.RTM_VERSION, Addrs: syscall.RTA_IFP},
+		Data: []uint8{
+			0x8, 0xa, 0xb, 0xc, 0xd, 0x0, 0x0, 0x0,
+		},
+	},
+}
+
+func TestParseInterfaceMessage(t *testing.T) {
+	for i, tt := range parseInterfaceMessageTests {
+		if _, err := syscall.ParseRoutingSockaddr(tt); err != nil {
+			t.Errorf("#%d: %v", i, err)
+		}
+	}
+}
+
 type addrFamily byte
 
 func (f addrFamily) String() string {
diff --git a/src/syscall/sockcmsg_unix.go b/src/syscall/sockcmsg_unix.go
index 045a012..4724275 100644
--- a/src/syscall/sockcmsg_unix.go
+++ b/src/syscall/sockcmsg_unix.go
@@ -77,10 +77,10 @@
 	h.Level = SOL_SOCKET
 	h.Type = SCM_RIGHTS
 	h.SetLen(CmsgLen(datalen))
-	data := uintptr(cmsgData(h))
+	data := cmsgData(h)
 	for _, fd := range fds {
-		*(*int32)(unsafe.Pointer(data)) = int32(fd)
-		data += 4
+		*(*int32)(data) = int32(fd)
+		data = unsafe.Pointer(uintptr(data) + 4)
 	}
 	return b
 }
diff --git a/src/syscall/syscall_solaris.go b/src/syscall/syscall_solaris.go
index 0f60e21..2f68760 100644
--- a/src/syscall/syscall_solaris.go
+++ b/src/syscall/syscall_solaris.go
@@ -142,12 +142,23 @@
 	return anyToSockaddr(&rsa)
 }
 
-// The const provides a compile-time constant so clients
-// can adjust to whether there is a working Getwd and avoid
-// even linking this function into the binary.  See ../os/getwd.go.
-const ImplementsGetwd = false
+const ImplementsGetwd = true
 
-func Getwd() (string, error) { return "", ENOTSUP }
+//sys	Getcwd(buf []byte) (n int, err error)
+
+func Getwd() (wd string, err error) {
+	var buf [PathMax]byte
+	// Getcwd will return an error if it failed for any reason.
+	_, err = Getcwd(buf[0:])
+	if err != nil {
+		return "", err
+	}
+	n := clen(buf[:])
+	if n < 1 {
+		return "", EINVAL
+	}
+	return string(buf[:n]), nil
+}
 
 /*
  * Wrapped
diff --git a/src/syscall/types_solaris.go b/src/syscall/types_solaris.go
index 53fa350..7246434 100644
--- a/src/syscall/types_solaris.go
+++ b/src/syscall/types_solaris.go
@@ -15,8 +15,14 @@
 
 /*
 #define KERNEL
+// These defines ensure that builds done on newer versions of Solaris are
+// backwards-compatible with older versions of Solaris and
+// OpenSolaris-based derivatives.
+#define __USE_SUNOS_SOCKETS__          // msghdr
+#define __USE_LEGACY_PROTOTYPES__      // iovec
 #include <dirent.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <signal.h>
 #include <termios.h>
 #include <stdio.h>
@@ -69,6 +75,7 @@
 	sizeofInt      = C.sizeof_int
 	sizeofLong     = C.sizeof_long
 	sizeofLongLong = C.sizeof_longlong
+	PathMax        = C.PATH_MAX
 )
 
 // Basic types
diff --git a/src/syscall/zsyscall_solaris_amd64.go b/src/syscall/zsyscall_solaris_amd64.go
index cabab7e..ebdeb92 100644
--- a/src/syscall/zsyscall_solaris_amd64.go
+++ b/src/syscall/zsyscall_solaris_amd64.go
@@ -7,6 +7,7 @@
 
 import "unsafe"
 
+//go:cgo_import_dynamic libc_Getcwd getcwd "libc.so"
 //go:cgo_import_dynamic libc_getgroups getgroups "libc.so"
 //go:cgo_import_dynamic libc_setgroups setgroups "libc.so"
 //go:cgo_import_dynamic libc_fcntl fcntl "libc.so"
@@ -89,6 +90,7 @@
 //go:cgo_import_dynamic libc_recvfrom recvfrom "libsocket.so"
 //go:cgo_import_dynamic libc_recvmsg recvmsg "libsocket.so"
 
+//go:linkname libc_Getcwd libc_Getcwd
 //go:linkname libc_getgroups libc_getgroups
 //go:linkname libc_setgroups libc_setgroups
 //go:linkname libc_fcntl libc_fcntl
@@ -174,6 +176,7 @@
 type libcFunc uintptr
 
 var (
+	libc_Getcwd,
 	libc_getgroups,
 	libc_setgroups,
 	libc_fcntl,
@@ -257,6 +260,19 @@
 	libc_recvmsg libcFunc
 )
 
+func Getcwd(buf []byte) (n int, err error) {
+	var _p0 *byte
+	if len(buf) > 0 {
+		_p0 = &buf[0]
+	}
+	r0, _, e1 := sysvicall6(uintptr(unsafe.Pointer(&libc_Getcwd)), 2, uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), 0, 0, 0, 0)
+	n = int(r0)
+	if e1 != 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
 func getgroups(ngid int, gid *_Gid_t) (n int, err error) {
 	r0, _, e1 := rawSysvicall6(uintptr(unsafe.Pointer(&libc_getgroups)), 2, uintptr(ngid), uintptr(unsafe.Pointer(gid)), 0, 0, 0, 0)
 	n = int(r0)
diff --git a/src/syscall/ztypes_solaris_amd64.go b/src/syscall/ztypes_solaris_amd64.go
index 2471519..4cf07ed 100644
--- a/src/syscall/ztypes_solaris_amd64.go
+++ b/src/syscall/ztypes_solaris_amd64.go
@@ -11,6 +11,7 @@
 	sizeofInt      = 0x4
 	sizeofLong     = 0x8
 	sizeofLongLong = 0x8
+	PathMax        = 0x400
 )
 
 type (
diff --git a/src/text/scanner/scanner.go b/src/text/scanner/scanner.go
index 3ab01ed..0155800 100644
--- a/src/text/scanner/scanner.go
+++ b/src/text/scanner/scanner.go
@@ -73,7 +73,7 @@
 	GoTokens       = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
 )
 
-// The result of Scan is one of the following tokens or a Unicode character.
+// The result of Scan is one of these tokens or a Unicode character.
 const (
 	EOF = -(iota + 1)
 	Ident
diff --git a/src/text/template/doc.go b/src/text/template/doc.go
index 0ce63f6..6c60091 100644
--- a/src/text/template/doc.go
+++ b/src/text/template/doc.go
@@ -36,6 +36,31 @@
 
 More intricate examples appear below.
 
+Text and spaces
+
+By default, all text between actions is copied verbatim when the template is
+executed. For example, the string " items are made of " in the example above appears
+on standard output when the program is run.
+
+However, to aid in formatting template source code, if an action's left delimiter
+(by default "{{") is followed immediately by a minus sign and ASCII space character
+("{{- "), all trailing white space is trimmed from the immediately preceding text.
+Similarly, if the right delimiter ("}}") is preceded by a space and minus sign
+(" -}}"), all leading white space is trimmed from the immediately following text.
+In these trim markers, the ASCII space must be present; "{{-3}}" parses as an
+action containing the number -3.
+
+For instance, when executing the template whose source is
+
+	"{{23 -}} < {{- 45}}"
+
+the generated output would be
+
+	"23<45"
+
+For this trimming, the definition of white space characters is the same as in Go:
+space, horizontal tab, carriage return, and newline.
+
 Actions
 
 Here is the list of actions. "Arguments" and "pipelines" are evaluations of
@@ -90,6 +115,14 @@
 		The template with the specified name is executed with dot set
 		to the value of the pipeline.
 
+	{{block "name" pipeline}} T1 {{end}}
+		A block is shorthand for defining a template
+			{{define "name"}} T1 {{end}}
+		and then executing it in place
+			{{template "name" .}}
+		The typical use is to define a set of root templates that are
+		then customized by redefining the block templates within.
+
 	{{with pipeline}} T1 {{end}}
 		If the value of the pipeline is empty, no output is generated;
 		otherwise, dot is set to the value of the pipeline and T1 is
diff --git a/src/text/template/example_test.go b/src/text/template/example_test.go
index de1d518..9cab2e8 100644
--- a/src/text/template/example_test.go
+++ b/src/text/template/example_test.go
@@ -7,6 +7,7 @@
 import (
 	"log"
 	"os"
+	"strings"
 	"text/template"
 )
 
@@ -15,9 +16,12 @@
 	const letter = `
 Dear {{.Name}},
 {{if .Attended}}
-It was a pleasure to see you at the wedding.{{else}}
-It is a shame you couldn't make it to the wedding.{{end}}
-{{with .Gift}}Thank you for the lovely {{.}}.
+It was a pleasure to see you at the wedding.
+{{- else}}
+It is a shame you couldn't make it to the wedding.
+{{- end}}
+{{with .Gift -}}
+Thank you for the lovely {{.}}.
 {{end}}
 Best wishes,
 Josie
@@ -69,3 +73,38 @@
 	// Best wishes,
 	// Josie
 }
+
+// The following example is duplicated in html/template; keep them in sync.
+
+func ExampleTemplate_block() {
+	const (
+		master  = `Names:{{block "list" .}}{{"\n"}}{{range .}}{{println "-" .}}{{end}}{{end}}`
+		overlay = `{{define "list"}} {{join . ", "}}{{end}} `
+	)
+	var (
+		funcs     = template.FuncMap{"join": strings.Join}
+		guardians = []string{"Gamora", "Groot", "Nebula", "Rocket", "Star-Lord"}
+	)
+	masterTmpl, err := template.New("master").Funcs(funcs).Parse(master)
+	if err != nil {
+		log.Fatal(err)
+	}
+	overlayTmpl, err := template.Must(masterTmpl.Clone()).Parse(overlay)
+	if err != nil {
+		log.Fatal(err)
+	}
+	if err := masterTmpl.Execute(os.Stdout, guardians); err != nil {
+		log.Fatal(err)
+	}
+	if err := overlayTmpl.Execute(os.Stdout, guardians); err != nil {
+		log.Fatal(err)
+	}
+	// Output:
+	// Names:
+	// - Gamora
+	// - Groot
+	// - Nebula
+	// - Rocket
+	// - Star-Lord
+	// Names: Gamora, Groot, Nebula, Rocket, Star-Lord
+}
diff --git a/src/text/template/exec.go b/src/text/template/exec.go
index 6e46d05..233d34a 100644
--- a/src/text/template/exec.go
+++ b/src/text/template/exec.go
@@ -135,8 +135,6 @@
 			*errp = err.Err // Strip the wrapper.
 		case ExecError:
 			*errp = err // Keep the wrapper.
-		case error: // TODO: This should never happen, but it does. Understand and/or fix.
-			*errp = err
 		default:
 			panic(e)
 		}
@@ -259,8 +257,13 @@
 	}
 }
 
-// isTrue reports whether the value is 'true', in the sense of not the zero of its type,
-// and whether the value has a meaningful truth value.
+// IsTrue reports whether the value is 'true', in the sense of not the zero of its type,
+// and whether the value has a meaningful truth value. This is the definition of
+// truth used by if and other such actions.
+func IsTrue(val interface{}) (truth, ok bool) {
+	return isTrue(reflect.ValueOf(val))
+}
+
 func isTrue(val reflect.Value) (truth, ok bool) {
 	if !val.IsValid() {
 		// Something like var x interface{}, never set. It's a form of nil.
@@ -826,16 +829,11 @@
 }
 
 // indirect returns the item at the end of indirection, and a bool to indicate if it's nil.
-// We indirect through pointers and empty interfaces (only) because
-// non-empty interfaces have methods we might need.
 func indirect(v reflect.Value) (rv reflect.Value, isNil bool) {
 	for ; v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface; v = v.Elem() {
 		if v.IsNil() {
 			return v, true
 		}
-		if v.Kind() == reflect.Interface && v.NumMethod() > 0 {
-			break
-		}
 	}
 	return v, false
 }
diff --git a/src/text/template/exec_test.go b/src/text/template/exec_test.go
index 07ebb55..e507e91 100644
--- a/src/text/template/exec_test.go
+++ b/src/text/template/exec_test.go
@@ -51,8 +51,9 @@
 	Empty2 interface{}
 	Empty3 interface{}
 	Empty4 interface{}
-	// Non-empty interface.
-	NonEmptyInterface I
+	// Non-empty interfaces.
+	NonEmptyInterface    I
+	NonEmptyInterfacePtS *I
 	// Stringer.
 	Str fmt.Stringer
 	Err error
@@ -73,6 +74,12 @@
 	unexported int
 }
 
+type S []string
+
+func (S) Method0() string {
+	return "M0"
+}
+
 type U struct {
 	V string
 }
@@ -99,6 +106,8 @@
 	return fmt.Sprintf("[%d]", w.k)
 }
 
+var siVal = I(S{"a", "b"})
+
 var tVal = &T{
 	True:   true,
 	I:      17,
@@ -119,22 +128,23 @@
 		{"one": 1, "two": 2},
 		{"eleven": 11, "twelve": 12},
 	},
-	Empty1:            3,
-	Empty2:            "empty2",
-	Empty3:            []int{7, 8},
-	Empty4:            &U{"UinEmpty"},
-	NonEmptyInterface: new(T),
-	Str:               bytes.NewBuffer([]byte("foozle")),
-	Err:               errors.New("erroozle"),
-	PI:                newInt(23),
-	PS:                newString("a string"),
-	PSI:               newIntSlice(21, 22, 23),
-	BinaryFunc:        func(a, b string) string { return fmt.Sprintf("[%s=%s]", a, b) },
-	VariadicFunc:      func(s ...string) string { return fmt.Sprint("<", strings.Join(s, "+"), ">") },
-	VariadicFuncInt:   func(a int, s ...string) string { return fmt.Sprint(a, "=<", strings.Join(s, "+"), ">") },
-	NilOKFunc:         func(s *int) bool { return s == nil },
-	ErrFunc:           func() (string, error) { return "bla", nil },
-	Tmpl:              Must(New("x").Parse("test template")), // "x" is the value of .X
+	Empty1:               3,
+	Empty2:               "empty2",
+	Empty3:               []int{7, 8},
+	Empty4:               &U{"UinEmpty"},
+	NonEmptyInterface:    &T{X: "x"},
+	NonEmptyInterfacePtS: &siVal,
+	Str:                  bytes.NewBuffer([]byte("foozle")),
+	Err:                  errors.New("erroozle"),
+	PI:                   newInt(23),
+	PS:                   newString("a string"),
+	PSI:                  newIntSlice(21, 22, 23),
+	BinaryFunc:           func(a, b string) string { return fmt.Sprintf("[%s=%s]", a, b) },
+	VariadicFunc:         func(s ...string) string { return fmt.Sprint("<", strings.Join(s, "+"), ">") },
+	VariadicFuncInt:      func(a int, s ...string) string { return fmt.Sprint(a, "=<", strings.Join(s, "+"), ">") },
+	NilOKFunc:            func(s *int) bool { return s == nil },
+	ErrFunc:              func() (string, error) { return "bla", nil },
+	Tmpl:                 Must(New("x").Parse("test template")), // "x" is the value of .X
 }
 
 // A non-empty interface.
@@ -337,6 +347,7 @@
 	{"if not .BinaryFunc call", "{{ if not .BinaryFunc}}{{call .BinaryFunc `1` `2`}}{{else}}No{{end}}", "No", tVal, true},
 	{"Interface Call", `{{stringer .S}}`, "foozle", map[string]interface{}{"S": bytes.NewBufferString("foozle")}, true},
 	{".ErrFunc", "{{call .ErrFunc}}", "bla", tVal, true},
+	{"call nil", "{{call nil}}", "", tVal, false},
 
 	// Erroneous function calls (check args).
 	{".BinaryFuncTooFew", "{{call .BinaryFunc `1`}}", "", tVal, false},
@@ -425,12 +436,15 @@
 	{"slice[1]", "{{index .SI 1}}", "4", tVal, true},
 	{"slice[HUGE]", "{{index .SI 10}}", "", tVal, false},
 	{"slice[WRONG]", "{{index .SI `hello`}}", "", tVal, false},
+	{"slice[nil]", "{{index .SI nil}}", "", tVal, false},
 	{"map[one]", "{{index .MSI `one`}}", "1", tVal, true},
 	{"map[two]", "{{index .MSI `two`}}", "2", tVal, true},
 	{"map[NO]", "{{index .MSI `XXX`}}", "0", tVal, true},
-	{"map[nil]", "{{index .MSI nil}}", "0", tVal, true},
+	{"map[nil]", "{{index .MSI nil}}", "", tVal, false},
+	{"map[``]", "{{index .MSI ``}}", "0", tVal, true},
 	{"map[WRONG]", "{{index .MSI 10}}", "", tVal, false},
 	{"double index", "{{index .SMSI 1 `eleven`}}", "11", tVal, true},
+	{"nil[1]", "{{index nil 1}}", "", tVal, false},
 
 	// Len.
 	{"slice", "{{len .SI}}", "3", tVal, true},
@@ -546,6 +560,11 @@
 	{"bug16i", "{{\"aaa\"|oneArg}}", "oneArg=aaa", tVal, true},
 	{"bug16j", "{{1+2i|printf \"%v\"}}", "(1+2i)", tVal, true},
 	{"bug16k", "{{\"aaa\"|printf }}", "aaa", tVal, true},
+	{"bug17a", "{{.NonEmptyInterface.X}}", "x", tVal, true},
+	{"bug17b", "-{{.NonEmptyInterface.Method1 1234}}-", "-1234-", tVal, true},
+	{"bug17c", "{{len .NonEmptyInterfacePtS}}", "2", tVal, true},
+	{"bug17d", "{{index .NonEmptyInterfacePtS 0}}", "a", tVal, true},
+	{"bug17e", "{{range .NonEmptyInterfacePtS}}-{{.}}-{{end}}", "-a--b-", tVal, true},
 }
 
 func zeroArgs() string {
@@ -797,18 +816,19 @@
 }
 
 // Use different delimiters to test Set.Delims.
+// Also test the trimming of leading and trailing spaces.
 const treeTemplate = `
-	(define "tree")
+	(- define "tree" -)
 	[
-		(.Val)
-		(with .Left)
-			(template "tree" .)
-		(end)
-		(with .Right)
-			(template "tree" .)
-		(end)
+		(- .Val -)
+		(- with .Left -)
+			(template "tree" . -)
+		(- end -)
+		(- with .Right -)
+			(- template "tree" . -)
+		(- end -)
 	]
-	(end)
+	(- end -)
 `
 
 func TestTree(t *testing.T) {
@@ -853,19 +873,13 @@
 		t.Fatal("parse error:", err)
 	}
 	var b bytes.Buffer
-	stripSpace := func(r rune) rune {
-		if r == '\t' || r == '\n' {
-			return -1
-		}
-		return r
-	}
 	const expect = "[1[2[3[4]][5[6]]][7[8[9]][10[11]]]]"
 	// First by looking up the template.
 	err = tmpl.Lookup("tree").Execute(&b, tree)
 	if err != nil {
 		t.Fatal("exec error:", err)
 	}
-	result := strings.Map(stripSpace, b.String())
+	result := b.String()
 	if result != expect {
 		t.Errorf("expected %q got %q", expect, result)
 	}
@@ -875,7 +889,7 @@
 	if err != nil {
 		t.Fatal("exec error:", err)
 	}
-	result = strings.Map(stripSpace, b.String())
+	result = b.String()
 	if result != expect {
 		t.Errorf("expected %q got %q", expect, result)
 	}
@@ -1184,3 +1198,85 @@
 		t.Errorf("expected %q; got %q", expect, err)
 	}
 }
+
+func funcNameTestFunc() int {
+	return 0
+}
+
+func TestGoodFuncNames(t *testing.T) {
+	names := []string{
+		"_",
+		"a",
+		"a1",
+		"a1",
+		"Ӵ",
+	}
+	for _, name := range names {
+		tmpl := New("X").Funcs(
+			FuncMap{
+				name: funcNameTestFunc,
+			},
+		)
+		if tmpl == nil {
+			t.Fatalf("nil result for %q", name)
+		}
+	}
+}
+
+func TestBadFuncNames(t *testing.T) {
+	names := []string{
+		"",
+		"2",
+		"a-b",
+	}
+	for _, name := range names {
+		testBadFuncName(name, t)
+	}
+}
+
+func testBadFuncName(name string, t *testing.T) {
+	defer func() {
+		recover()
+	}()
+	New("X").Funcs(
+		FuncMap{
+			name: funcNameTestFunc,
+		},
+	)
+	// If we get here, the name did not cause a panic, which is how Funcs
+	// reports an error.
+	t.Errorf("%q succeeded incorrectly as function name", name)
+}
+
+func TestBlock(t *testing.T) {
+	const (
+		input   = `a({{block "inner" .}}bar({{.}})baz{{end}})b`
+		want    = `a(bar(hello)baz)b`
+		overlay = `{{define "inner"}}foo({{.}})bar{{end}}`
+		want2   = `a(foo(goodbye)bar)b`
+	)
+	tmpl, err := New("outer").Parse(input)
+	if err != nil {
+		t.Fatal(err)
+	}
+	tmpl2, err := Must(tmpl.Clone()).Parse(overlay)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var buf bytes.Buffer
+	if err := tmpl.Execute(&buf, "hello"); err != nil {
+		t.Fatal(err)
+	}
+	if got := buf.String(); got != want {
+		t.Errorf("got %q, want %q", got, want)
+	}
+
+	buf.Reset()
+	if err := tmpl2.Execute(&buf, "goodbye"); err != nil {
+		t.Fatal(err)
+	}
+	if got := buf.String(); got != want2 {
+		t.Errorf("got %q, want %q", got, want2)
+	}
+}
diff --git a/src/text/template/funcs.go b/src/text/template/funcs.go
index ccd0dfc..49e9e74 100644
--- a/src/text/template/funcs.go
+++ b/src/text/template/funcs.go
@@ -58,6 +58,9 @@
 // addValueFuncs adds to values the functions in funcs, converting them to reflect.Values.
 func addValueFuncs(out map[string]reflect.Value, in FuncMap) {
 	for name, fn := range in {
+		if !goodName(name) {
+			panic(fmt.Errorf("function name %s is not a valid identifier", name))
+		}
 		v := reflect.ValueOf(fn)
 		if v.Kind() != reflect.Func {
 			panic("value for " + name + " not a function")
@@ -77,7 +80,7 @@
 	}
 }
 
-// goodFunc checks that the function or method has the right result signature.
+// goodFunc reports whether the function or method has the right result signature.
 func goodFunc(typ reflect.Type) bool {
 	// We allow functions with 1 result or 2 results where the second is an error.
 	switch {
@@ -89,6 +92,23 @@
 	return false
 }
 
+// goodName reports whether the function name is a valid identifier.
+func goodName(name string) bool {
+	if name == "" {
+		return false
+	}
+	for i, r := range name {
+		switch {
+		case r == '_':
+		case i == 0 && !unicode.IsLetter(r):
+			return false
+		case !unicode.IsLetter(r) && !unicode.IsDigit(r):
+			return false
+		}
+	}
+	return true
+}
+
 // findFunction looks for a function in the template, and global map.
 func findFunction(name string, tmpl *Template) (reflect.Value, bool) {
 	if tmpl != nil && tmpl.common != nil {
@@ -104,6 +124,21 @@
 	return reflect.Value{}, false
 }
 
+// prepareArg checks if value can be used as an argument of type argType, and
+// converts an invalid value to appropriate zero if possible.
+func prepareArg(value reflect.Value, argType reflect.Type) (reflect.Value, error) {
+	if !value.IsValid() {
+		if !canBeNil(argType) {
+			return reflect.Value{}, fmt.Errorf("value is nil; should be of type %s", argType)
+		}
+		value = reflect.Zero(argType)
+	}
+	if !value.Type().AssignableTo(argType) {
+		return reflect.Value{}, fmt.Errorf("value has type %s; should be %s", value.Type(), argType)
+	}
+	return value, nil
+}
+
 // Indexing.
 
 // index returns the result of indexing its first argument by the following
@@ -111,6 +146,9 @@
 // indexed item must be a map, slice, or array.
 func index(item interface{}, indices ...interface{}) (interface{}, error) {
 	v := reflect.ValueOf(item)
+	if !v.IsValid() {
+		return nil, fmt.Errorf("index of untyped nil")
+	}
 	for _, i := range indices {
 		index := reflect.ValueOf(i)
 		var isNil bool
@@ -125,6 +163,8 @@
 				x = index.Int()
 			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
 				x = int64(index.Uint())
+			case reflect.Invalid:
+				return nil, fmt.Errorf("cannot index slice/array with nil")
 			default:
 				return nil, fmt.Errorf("cannot index slice/array with type %s", index.Type())
 			}
@@ -133,17 +173,18 @@
 			}
 			v = v.Index(int(x))
 		case reflect.Map:
-			if !index.IsValid() {
-				index = reflect.Zero(v.Type().Key())
-			}
-			if !index.Type().AssignableTo(v.Type().Key()) {
-				return nil, fmt.Errorf("%s is not index type for %s", index.Type(), v.Type())
+			index, err := prepareArg(index, v.Type().Key())
+			if err != nil {
+				return nil, err
 			}
 			if x := v.MapIndex(index); x.IsValid() {
 				v = x
 			} else {
 				v = reflect.Zero(v.Type().Elem())
 			}
+		case reflect.Invalid:
+			// the loop holds invariant: v.IsValid()
+			panic("unreachable")
 		default:
 			return nil, fmt.Errorf("can't index item of type %s", v.Type())
 		}
@@ -155,7 +196,11 @@
 
 // length returns the length of the item, with an error if it has no defined length.
 func length(item interface{}) (int, error) {
-	v, isNil := indirect(reflect.ValueOf(item))
+	v := reflect.ValueOf(item)
+	if !v.IsValid() {
+		return 0, fmt.Errorf("len of untyped nil")
+	}
+	v, isNil := indirect(v)
 	if isNil {
 		return 0, fmt.Errorf("len of nil pointer")
 	}
@@ -172,6 +217,9 @@
 // The function must return 1 result, or 2 results, the second of which is an error.
 func call(fn interface{}, args ...interface{}) (interface{}, error) {
 	v := reflect.ValueOf(fn)
+	if !v.IsValid() {
+		return nil, fmt.Errorf("call of nil")
+	}
 	typ := v.Type()
 	if typ.Kind() != reflect.Func {
 		return nil, fmt.Errorf("non-function of type %s", typ)
@@ -201,13 +249,11 @@
 		} else {
 			argType = dddType
 		}
-		if !value.IsValid() && canBeNil(argType) {
-			value = reflect.Zero(argType)
+
+		var err error
+		if argv[i], err = prepareArg(value, argType); err != nil {
+			return nil, fmt.Errorf("arg %d: %s", i, err)
 		}
-		if !value.Type().AssignableTo(argType) {
-			return nil, fmt.Errorf("arg %d has type %s; should be %s", i, value.Type(), argType)
-		}
-		argv[i] = value
 	}
 	result := v.Call(argv)
 	if len(result) == 2 && !result[1].IsNil() {
@@ -219,7 +265,7 @@
 // Boolean logic.
 
 func truth(a interface{}) bool {
-	t, _ := isTrue(reflect.ValueOf(a))
+	t, _ := IsTrue(a)
 	return t
 }
 
@@ -254,9 +300,8 @@
 }
 
 // not returns the Boolean negation of its argument.
-func not(arg interface{}) (truth bool) {
-	truth, _ = isTrue(reflect.ValueOf(arg))
-	return !truth
+func not(arg interface{}) bool {
+	return !truth(arg)
 }
 
 // Comparison.
diff --git a/src/text/template/multi_test.go b/src/text/template/multi_test.go
index ea01875..a8342f5 100644
--- a/src/text/template/multi_test.go
+++ b/src/text/template/multi_test.go
@@ -9,7 +9,6 @@
 import (
 	"bytes"
 	"fmt"
-	"strings"
 	"testing"
 	"text/template/parse"
 )
@@ -277,17 +276,11 @@
 	if tmpl, err = New("tmpl1").Parse(`{{define "test"}}foo{{end}}`); err != nil {
 		t.Fatalf("parse 1: %v", err)
 	}
-	if _, err = tmpl.Parse(`{{define "test"}}bar{{end}}`); err == nil {
-		t.Fatal("expected error")
+	if _, err = tmpl.Parse(`{{define "test"}}bar{{end}}`); err != nil {
+		t.Fatalf("got error %v, expected nil", err)
 	}
-	if !strings.Contains(err.Error(), "redefinition") {
-		t.Fatalf("expected redefinition error; got %v", err)
-	}
-	if _, err = tmpl.New("tmpl2").Parse(`{{define "test"}}bar{{end}}`); err == nil {
-		t.Fatal("expected error")
-	}
-	if !strings.Contains(err.Error(), "redefinition") {
-		t.Fatalf("expected redefinition error; got %v", err)
+	if _, err = tmpl.New("tmpl2").Parse(`{{define "test"}}bar{{end}}`); err != nil {
+		t.Fatalf("got error %v, expected nil", err)
 	}
 }
 
@@ -345,7 +338,6 @@
 func TestParse(t *testing.T) {
 	// In multiple calls to Parse with the same receiver template, only one call
 	// can contain text other than space, comments, and template definitions
-	var err error
 	t1 := New("test")
 	if _, err := t1.Parse(`{{define "test"}}{{end}}`); err != nil {
 		t.Fatalf("parsing test: %s", err)
@@ -356,10 +348,4 @@
 	if _, err := t1.Parse(`{{define "test"}}foo{{end}}`); err != nil {
 		t.Fatalf("parsing test: %s", err)
 	}
-	if _, err = t1.Parse(`{{define "test"}}foo{{end}}`); err == nil {
-		t.Fatal("no error from redefining a template")
-	}
-	if !strings.Contains(err.Error(), "redefinition") {
-		t.Fatalf("expected redefinition error; got %v", err)
-	}
 }
diff --git a/src/text/template/parse/lex.go b/src/text/template/parse/lex.go
index 8f9fe1d..ea93e05 100644
--- a/src/text/template/parse/lex.go
+++ b/src/text/template/parse/lex.go
@@ -58,6 +58,7 @@
 	itemVariable   // variable starting with '$', such as '$' or  '$1' or '$hello'
 	// Keywords appear after all the rest.
 	itemKeyword  // used only to delimit the keywords
+	itemBlock    // block keyword
 	itemDot      // the cursor, spelled '.'
 	itemDefine   // define keyword
 	itemElse     // else keyword
@@ -71,6 +72,7 @@
 
 var key = map[string]itemType{
 	".":        itemDot,
+	"block":    itemBlock,
 	"define":   itemDefine,
 	"else":     itemElse,
 	"end":      itemEnd,
@@ -83,6 +85,21 @@
 
 const eof = -1
 
+// Trimming spaces.
+// If the action begins "{{- " rather than "{{", then all space/tab/newlines
+// preceding the action are trimmed; conversely if it ends " -}}" the
+// leading spaces are trimmed. This is done entirely in the lexer; the
+// parser never sees it happen. We require an ASCII space to be
+// present to avoid ambiguity with things like "{{-3}}". It reads
+// better with the space present anyway. For simplicity, only ASCII
+// space does the job.
+const (
+	spaceChars      = " \t\r\n" // These are the space characters defined by Go itself.
+	leftTrimMarker  = "- "      // Attached to left delimiter, trims trailing spaces from preceding text.
+	rightTrimMarker = " -"      // Attached to right delimiter, trims leading spaces from following text.
+	trimMarkerLen   = Pos(len(leftTrimMarker))
+)
+
 // stateFn represents the state of the scanner as a function that returns the next state.
 type stateFn func(*lexer) stateFn
 
@@ -220,10 +237,18 @@
 // lexText scans until an opening action delimiter, "{{".
 func lexText(l *lexer) stateFn {
 	for {
-		if strings.HasPrefix(l.input[l.pos:], l.leftDelim) {
+		delim, trimSpace := l.atLeftDelim()
+		if delim {
+			trimLength := Pos(0)
+			if trimSpace {
+				trimLength = rightTrimLength(l.input[l.start:l.pos])
+			}
+			l.pos -= trimLength
 			if l.pos > l.start {
 				l.emit(itemText)
 			}
+			l.pos += trimLength
+			l.ignore()
 			return lexLeftDelim
 		}
 		if l.next() == eof {
@@ -238,13 +263,56 @@
 	return nil
 }
 
-// lexLeftDelim scans the left delimiter, which is known to be present.
+// atLeftDelim reports whether the lexer is at a left delimiter, possibly followed by a trim marker.
+func (l *lexer) atLeftDelim() (delim, trimSpaces bool) {
+	if !strings.HasPrefix(l.input[l.pos:], l.leftDelim) {
+		return false, false
+	}
+	// The left delim might have the marker afterwards.
+	trimSpaces = strings.HasPrefix(l.input[l.pos+Pos(len(l.leftDelim)):], leftTrimMarker)
+	return true, trimSpaces
+}
+
+// rightTrimLength returns the length of the spaces at the end of the string.
+func rightTrimLength(s string) Pos {
+	return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
+}
+
+// atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.
+func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
+	if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
+		return true, false
+	}
+	// The right delim might have the marker before.
+	if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) {
+		if strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
+			return true, true
+		}
+	}
+	return false, false
+}
+
+// leftTrimLength returns the length of the spaces at the beginning of the string.
+func leftTrimLength(s string) Pos {
+	return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
+}
+
+// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
 func lexLeftDelim(l *lexer) stateFn {
 	l.pos += Pos(len(l.leftDelim))
-	if strings.HasPrefix(l.input[l.pos:], leftComment) {
+	trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker)
+	afterMarker := Pos(0)
+	if trimSpace {
+		afterMarker = trimMarkerLen
+	}
+	if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
+		l.pos += afterMarker
+		l.ignore()
 		return lexComment
 	}
 	l.emit(itemLeftDelim)
+	l.pos += afterMarker
+	l.ignore()
 	l.parenDepth = 0
 	return lexInsideAction
 }
@@ -257,19 +325,34 @@
 		return l.errorf("unclosed comment")
 	}
 	l.pos += Pos(i + len(rightComment))
-	if !strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
+	delim, trimSpace := l.atRightDelim()
+	if !delim {
 		return l.errorf("comment ends before closing delimiter")
-
+	}
+	if trimSpace {
+		l.pos += trimMarkerLen
 	}
 	l.pos += Pos(len(l.rightDelim))
+	if trimSpace {
+		l.pos += leftTrimLength(l.input[l.pos:])
+	}
 	l.ignore()
 	return lexText
 }
 
-// lexRightDelim scans the right delimiter, which is known to be present.
+// lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.
 func lexRightDelim(l *lexer) stateFn {
+	trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker)
+	if trimSpace {
+		l.pos += trimMarkerLen
+		l.ignore()
+	}
 	l.pos += Pos(len(l.rightDelim))
 	l.emit(itemRightDelim)
+	if trimSpace {
+		l.pos += leftTrimLength(l.input[l.pos:])
+		l.ignore()
+	}
 	return lexText
 }
 
@@ -278,7 +361,8 @@
 	// Either number, quoted string, or identifier.
 	// Spaces separate arguments; runs of spaces turn into itemSpace.
 	// Pipe symbols separate and are emitted.
-	if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
+	delim, _ := l.atRightDelim()
+	if delim {
 		if l.parenDepth == 0 {
 			return lexRightDelim
 		}
diff --git a/src/text/template/parse/lex_test.go b/src/text/template/parse/lex_test.go
index be551d8..e35ebf1 100644
--- a/src/text/template/parse/lex_test.go
+++ b/src/text/template/parse/lex_test.go
@@ -33,6 +33,7 @@
 
 	// keywords
 	itemDot:      ".",
+	itemBlock:    "block",
 	itemDefine:   "define",
 	itemElse:     "else",
 	itemIf:       "if",
@@ -58,6 +59,8 @@
 }
 
 var (
+	tDot        = item{itemDot, 0, "."}
+	tBlock      = item{itemBlock, 0, "block"}
 	tEOF        = item{itemEOF, 0, ""}
 	tFor        = item{itemIdentifier, 0, "for"}
 	tLeft       = item{itemLeftDelim, 0, "{{"}
@@ -104,6 +107,9 @@
 	}},
 	{"empty action", `{{}}`, []item{tLeft, tRight, tEOF}},
 	{"for", `{{for}}`, []item{tLeft, tFor, tRight, tEOF}},
+	{"block", `{{block "foo" .}}`, []item{
+		tLeft, tBlock, tSpace, {itemString, 0, `"foo"`}, tSpace, tDot, tRight, tEOF,
+	}},
 	{"quote", `{{"abc \n\t\" "}}`, []item{tLeft, tQuote, tRight, tEOF}},
 	{"raw quote", "{{" + raw + "}}", []item{tLeft, tRawQuote, tRight, tEOF}},
 	{"raw quote with newline", "{{" + rawNL + "}}", []item{tLeft, tRawQuoteNL, tRight, tEOF}},
@@ -155,7 +161,7 @@
 	}},
 	{"dot", "{{.}}", []item{
 		tLeft,
-		{itemDot, 0, "."},
+		tDot,
 		tRight,
 		tEOF,
 	}},
@@ -169,7 +175,7 @@
 		tLeft,
 		{itemField, 0, ".x"},
 		tSpace,
-		{itemDot, 0, "."},
+		tDot,
 		tSpace,
 		{itemNumber, 0, ".2"},
 		tSpace,
@@ -278,6 +284,19 @@
 		tRight,
 		tEOF,
 	}},
+	{"trimming spaces before and after", "hello- {{- 3 -}} -world", []item{
+		{itemText, 0, "hello-"},
+		tLeft,
+		{itemNumber, 0, "3"},
+		tRight,
+		{itemText, 0, "-world"},
+		tEOF,
+	}},
+	{"trimming spaces before and after comment", "hello- {{- /* hello */ -}} -world", []item{
+		{itemText, 0, "hello-"},
+		{itemText, 0, "-world"},
+		tEOF,
+	}},
 	// errors
 	{"badchar", "#{{\x01}}", []item{
 		{itemText, 0, "#"},
@@ -339,7 +358,7 @@
 		{itemText, 0, "hello-"},
 		{itemError, 0, `unclosed comment`},
 	}},
-	{"text with comment close separted from delim", "hello-{{/* */ }}-world", []item{
+	{"text with comment close separated from delim", "hello-{{/* */ }}-world", []item{
 		{itemText, 0, "hello-"},
 		{itemError, 0, `comment ends before closing delimiter`},
 	}},
@@ -488,9 +507,9 @@
 func (t *Tree) parseLexer(lex *lexer, text string) (tree *Tree, err error) {
 	defer t.recover(&err)
 	t.ParseName = t.Name
-	t.startParse(nil, lex)
-	t.parse(nil)
-	t.add(nil)
+	t.startParse(nil, lex, map[string]*Tree{})
+	t.parse()
+	t.add()
 	t.stopParse()
 	return t, nil
 }
diff --git a/src/text/template/parse/parse.go b/src/text/template/parse/parse.go
index 88aacd1..dc56cf7 100644
--- a/src/text/template/parse/parse.go
+++ b/src/text/template/parse/parse.go
@@ -28,6 +28,7 @@
 	token     [3]item // three-token lookahead for parser.
 	peekCount int
 	vars      []string // variables defined at the moment.
+	treeSet   map[string]*Tree
 }
 
 // Copy returns a copy of the Tree. Any parsing state is discarded.
@@ -205,11 +206,12 @@
 }
 
 // startParse initializes the parser, using the lexer.
-func (t *Tree) startParse(funcs []map[string]interface{}, lex *lexer) {
+func (t *Tree) startParse(funcs []map[string]interface{}, lex *lexer, treeSet map[string]*Tree) {
 	t.Root = nil
 	t.lex = lex
 	t.vars = []string{"$"}
 	t.funcs = funcs
+	t.treeSet = treeSet
 }
 
 // stopParse terminates parsing.
@@ -217,6 +219,7 @@
 	t.lex = nil
 	t.vars = nil
 	t.funcs = nil
+	t.treeSet = nil
 }
 
 // Parse parses the template definition string to construct a representation of
@@ -226,19 +229,19 @@
 func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]interface{}) (tree *Tree, err error) {
 	defer t.recover(&err)
 	t.ParseName = t.Name
-	t.startParse(funcs, lex(t.Name, text, leftDelim, rightDelim))
+	t.startParse(funcs, lex(t.Name, text, leftDelim, rightDelim), treeSet)
 	t.text = text
-	t.parse(treeSet)
-	t.add(treeSet)
+	t.parse()
+	t.add()
 	t.stopParse()
 	return t, nil
 }
 
-// add adds tree to the treeSet.
-func (t *Tree) add(treeSet map[string]*Tree) {
-	tree := treeSet[t.Name]
+// add adds tree to t.treeSet.
+func (t *Tree) add() {
+	tree := t.treeSet[t.Name]
 	if tree == nil || IsEmptyTree(tree.Root) {
-		treeSet[t.Name] = t
+		t.treeSet[t.Name] = t
 		return
 	}
 	if !IsEmptyTree(t.Root) {
@@ -274,7 +277,7 @@
 // parse is the top-level parser for a template, essentially the same
 // as itemList except it also parses {{define}} actions.
 // It runs to EOF.
-func (t *Tree) parse(treeSet map[string]*Tree) (next Node) {
+func (t *Tree) parse() (next Node) {
 	t.Root = t.newList(t.peek().pos)
 	for t.peek().typ != itemEOF {
 		if t.peek().typ == itemLeftDelim {
@@ -283,8 +286,8 @@
 				newT := New("definition") // name will be updated once we know it.
 				newT.text = t.text
 				newT.ParseName = t.ParseName
-				newT.startParse(t.funcs, t.lex)
-				newT.parseDefinition(treeSet)
+				newT.startParse(t.funcs, t.lex, t.treeSet)
+				newT.parseDefinition()
 				continue
 			}
 			t.backup2(delim)
@@ -300,9 +303,9 @@
 }
 
 // parseDefinition parses a {{define}} ...  {{end}} template definition and
-// installs the definition in the treeSet map.  The "define" keyword has already
+// installs the definition in t.treeSet. The "define" keyword has already
 // been scanned.
-func (t *Tree) parseDefinition(treeSet map[string]*Tree) {
+func (t *Tree) parseDefinition() {
 	const context = "define clause"
 	name := t.expectOneOf(itemString, itemRawString, context)
 	var err error
@@ -316,7 +319,7 @@
 	if end.Type() != nodeEnd {
 		t.errorf("unexpected %s in %s", end, context)
 	}
-	t.add(treeSet)
+	t.add()
 	t.stopParse()
 }
 
@@ -358,6 +361,8 @@
 // First word could be a keyword such as range.
 func (t *Tree) action() (n Node) {
 	switch token := t.nextNonSpace(); token.typ {
+	case itemBlock:
+		return t.blockControl()
 	case itemElse:
 		return t.elseControl()
 	case itemEnd:
@@ -522,13 +527,51 @@
 	return t.newElse(t.expect(itemRightDelim, "else").pos, t.lex.lineNumber())
 }
 
+// Block:
+//	{{block stringValue pipeline}}
+// Block keyword is past.
+// The name must be something that can evaluate to a string.
+// The pipeline is mandatory.
+func (t *Tree) blockControl() Node {
+	const context = "block clause"
+
+	token := t.nextNonSpace()
+	name := t.parseTemplateName(token, context)
+	pipe := t.pipeline(context)
+
+	block := New(name) // name will be updated once we know it.
+	block.text = t.text
+	block.ParseName = t.ParseName
+	block.startParse(t.funcs, t.lex, t.treeSet)
+	var end Node
+	block.Root, end = block.itemList()
+	if end.Type() != nodeEnd {
+		t.errorf("unexpected %s in %s", end, context)
+	}
+	block.add()
+	block.stopParse()
+
+	return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe)
+}
+
 // Template:
 //	{{template stringValue pipeline}}
 // Template keyword is past.  The name must be something that can evaluate
 // to a string.
 func (t *Tree) templateControl() Node {
-	var name string
+	const context = "template clause"
 	token := t.nextNonSpace()
+	name := t.parseTemplateName(token, context)
+	var pipe *PipeNode
+	if t.nextNonSpace().typ != itemRightDelim {
+		t.backup()
+		// Do not pop variables; they persist until "end".
+		pipe = t.pipeline(context)
+	}
+	return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe)
+}
+
+func (t *Tree) parseTemplateName(token item, context string) (name string) {
 	switch token.typ {
 	case itemString, itemRawString:
 		s, err := strconv.Unquote(token.val)
@@ -537,15 +580,9 @@
 		}
 		name = s
 	default:
-		t.unexpected(token, "template invocation")
+		t.unexpected(token, context)
 	}
-	var pipe *PipeNode
-	if t.nextNonSpace().typ != itemRightDelim {
-		t.backup()
-		// Do not pop variables; they persist until "end".
-		pipe = t.pipeline("template")
-	}
-	return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe)
+	return
 }
 
 // command:
diff --git a/src/text/template/parse/parse_test.go b/src/text/template/parse/parse_test.go
index 200d50c..b4512d3 100644
--- a/src/text/template/parse/parse_test.go
+++ b/src/text/template/parse/parse_test.go
@@ -228,6 +228,15 @@
 		`{{with .X}}"hello"{{end}}`},
 	{"with with else", "{{with .X}}hello{{else}}goodbye{{end}}", noError,
 		`{{with .X}}"hello"{{else}}"goodbye"{{end}}`},
+	// Trimming spaces.
+	{"trim left", "x \r\n\t{{- 3}}", noError, `"x"{{3}}`},
+	{"trim right", "{{3 -}}\n\n\ty", noError, `{{3}}"y"`},
+	{"trim left and right", "x \r\n\t{{- 3 -}}\n\n\ty", noError, `"x"{{3}}"y"`},
+	{"comment trim left", "x \r\n\t{{- /* hi */}}", noError, `"x"`},
+	{"comment trim right", "{{/* hi */ -}}\n\n\ty", noError, `"y"`},
+	{"comment trim left and right", "x \r\n\t{{- /* */ -}}\n\n\ty", noError, `"x""y"`},
+	{"block definition", `{{block "foo" .}}hello{{end}}`, noError,
+		`{{template "foo" .}}`},
 	// Errors.
 	{"unclosed action", "hello{{range", hasError, ""},
 	{"unmatched end", "{{end}}", hasError, ""},
@@ -277,6 +286,8 @@
 	{"wrong pipeline boolean", "{{.|true}}", hasError, ""},
 	{"wrong pipeline nil", "{{'c'|nil}}", hasError, ""},
 	{"empty pipeline", `{{printf "%d" ( ) }}`, hasError, ""},
+	// Missing pipeline in block
+	{"block definition", `{{block "foo"}}hello{{end}}`, hasError, ""},
 }
 
 var builtins = map[string]interface{}{
@@ -450,3 +461,26 @@
 		}
 	}
 }
+
+func TestBlock(t *testing.T) {
+	const (
+		input = `a{{block "inner" .}}bar{{.}}baz{{end}}b`
+		outer = `a{{template "inner" .}}b`
+		inner = `bar{{.}}baz`
+	)
+	treeSet := make(map[string]*Tree)
+	tmpl, err := New("outer").Parse(input, "", "", treeSet, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if g, w := tmpl.Root.String(), outer; g != w {
+		t.Errorf("outer template = %q, want %q", g, w)
+	}
+	inTmpl := treeSet["inner"]
+	if inTmpl == nil {
+		t.Fatal("block did not define template")
+	}
+	if g, w := inTmpl.Root.String(), inner; g != w {
+		t.Errorf("inner template = %q, want %q", g, w)
+	}
+}
diff --git a/src/text/template/template.go b/src/text/template/template.go
index 3e80982..7a7f42a 100644
--- a/src/text/template/template.go
+++ b/src/text/template/template.go
@@ -5,7 +5,6 @@
 package template
 
 import (
-	"fmt"
 	"reflect"
 	"sync"
 	"text/template/parse"
@@ -117,11 +116,10 @@
 
 // AddParseTree adds parse tree for template with given name and associates it with t.
 // If the template does not already exist, it will create a new one.
-// It is an error to reuse a name except to overwrite an empty template.
+// If the template does exist, it will be replaced.
 func (t *Template) AddParseTree(name string, tree *parse.Tree) (*Template, error) {
 	t.init()
 	// If the name is the name of this template, overwrite this template.
-	// The associate method checks it's not a redefinition.
 	nt := t
 	if name != t.name {
 		nt = t.New(name)
@@ -162,8 +160,9 @@
 
 // Funcs adds the elements of the argument map to the template's function map.
 // It panics if a value in the map is not a function with appropriate return
-// type. However, it is legal to overwrite elements of the map. The return
-// value is the template, so calls can be chained.
+// type or if the name cannot be used syntactically as a function in a template.
+// It is legal to overwrite elements of the map. The return value is the template,
+// so calls can be chained.
 func (t *Template) Funcs(funcMap FuncMap) *Template {
 	t.init()
 	t.muFuncs.Lock()
@@ -184,11 +183,7 @@
 
 // Parse defines the template by parsing the text. Nested template definitions will be
 // associated with the top-level template t. Parse may be called multiple times
-// to parse definitions of templates to associate with t. It is an error if a
-// resulting template is non-empty (contains content other than template
-// definitions) and would replace a non-empty template with the same name.
-// (In multiple calls to Parse with the same receiver template, only one call
-// can contain text other than space, comments, and template definitions.)
+// to parse definitions of templates to associate with t.
 func (t *Template) Parse(text string) (*Template, error) {
 	t.init()
 	t.muFuncs.RLock()
@@ -207,25 +202,17 @@
 }
 
 // associate installs the new template into the group of templates associated
-// with t. It is an error to reuse a name except to overwrite an empty
-// template. The two are already known to share the common structure.
-// The boolean return value reports wither to store this tree as t.Tree.
+// with t. The two are already known to share the common structure.
+// The boolean return value reports whether to store this tree as t.Tree.
 func (t *Template) associate(new *Template, tree *parse.Tree) (bool, error) {
 	if new.common != t.common {
 		panic("internal error: associate not common")
 	}
-	name := new.name
-	if old := t.tmpl[name]; old != nil {
-		oldIsEmpty := parse.IsEmptyTree(old.Root)
-		newIsEmpty := parse.IsEmptyTree(tree.Root)
-		if newIsEmpty {
-			// Whether old is empty or not, new is empty; no reason to replace old.
-			return false, nil
-		}
-		if !oldIsEmpty {
-			return false, fmt.Errorf("template: redefinition of template %q", name)
-		}
+	if t.tmpl[new.name] != nil && parse.IsEmptyTree(tree.Root) {
+		// If a template by that name exists,
+		// don't replace it with an empty template.
+		return false, nil
 	}
-	t.tmpl[name] = new
+	t.tmpl[new.name] = new
 	return true, nil
 }
diff --git a/src/time/format.go b/src/time/format.go
index 873d3ff..d727ef0 100644
--- a/src/time/format.go
+++ b/src/time/format.go
@@ -518,7 +518,7 @@
 		case stdZeroMinute:
 			b = appendInt(b, min, 2)
 		case stdSecond:
-			b = appendInt(b, sec, 2)
+			b = appendInt(b, sec, 0)
 		case stdZeroSecond:
 			b = appendInt(b, sec, 2)
 		case stdPM:
@@ -696,6 +696,11 @@
 // location and zone in the returned time. Otherwise it records the time as
 // being in a fabricated location with time fixed at the given zone offset.
 //
+// No checking is done that the day of the month is within the month's
+// valid dates; any one- or two-digit value is accepted. For example
+// February 31 and even February 99 are valid dates, specifying dates
+// in March and May. This behavior is consistent with time.Date.
+//
 // When parsing a time with a zone abbreviation like MST, if the zone abbreviation
 // has a defined offset in the current location, then that offset is used.
 // The zone abbreviation "UTC" is recognized as UTC regardless of location.
@@ -794,7 +799,8 @@
 				value = value[1:]
 			}
 			day, value, err = getnum(value, std == stdZeroDay)
-			if day < 0 || 31 < day {
+			if day < 0 {
+				// Note that we allow any one- or two-digit day here.
 				rangeErrString = "day"
 			}
 		case stdHour:
diff --git a/src/time/format_test.go b/src/time/format_test.go
index ecc5c8f..8ff053d 100644
--- a/src/time/format_test.go
+++ b/src/time/format_test.go
@@ -74,6 +74,16 @@
 	}
 }
 
+// issue 12440.
+func TestFormatSingleDigits(t *testing.T) {
+	time := Date(2001, 2, 3, 4, 5, 6, 700000000, UTC)
+	test := FormatTest{"single digit format", "3:4:5", "4:5:6"}
+	result := time.Format(test.format)
+	if result != test.result {
+		t.Errorf("%s expected %q got %q", test.name, test.result, result)
+	}
+}
+
 func TestFormatShortYear(t *testing.T) {
 	years := []int{
 		-100001, -100000, -99999,
@@ -154,6 +164,9 @@
 	// GMT with offset.
 	{"GMT-8", UnixDate, "Fri Feb  5 05:00:57 GMT-8 2010", true, true, 1, 0},
 
+	// Day of month can be out of range.
+	{"Jan 36", UnixDate, "Fri Jan 36 05:00:57 GMT-8 2010", true, true, 1, 0},
+
 	// Accept any number of fractional second digits (including none) for .999...
 	// In Go 1, .999... was completely ignored in the format, meaning the first two
 	// cases would succeed, but the next four would not. Go 1.1 accepts all six.
diff --git a/src/vendor/golang.org/x/net/http2/hpack/encode.go b/src/vendor/golang.org/x/net/http2/hpack/encode.go
new file mode 100644
index 0000000..80d621c
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/encode.go
@@ -0,0 +1,251 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package hpack
+
+import (
+	"io"
+)
+
+const (
+	uint32Max              = ^uint32(0)
+	initialHeaderTableSize = 4096
+)
+
+type Encoder struct {
+	dynTab dynamicTable
+	// minSize is the minimum table size set by
+	// SetMaxDynamicTableSize after the previous Header Table Size
+	// Update.
+	minSize uint32
+	// maxSizeLimit is the maximum table size this encoder
+	// supports. This will protect the encoder from too large
+	// size.
+	maxSizeLimit uint32
+	// tableSizeUpdate indicates whether "Header Table Size
+	// Update" is required.
+	tableSizeUpdate bool
+	w               io.Writer
+	buf             []byte
+}
+
+// NewEncoder returns a new Encoder which performs HPACK encoding. An
+// encoded data is written to w.
+func NewEncoder(w io.Writer) *Encoder {
+	e := &Encoder{
+		minSize:         uint32Max,
+		maxSizeLimit:    initialHeaderTableSize,
+		tableSizeUpdate: false,
+		w:               w,
+	}
+	e.dynTab.setMaxSize(initialHeaderTableSize)
+	return e
+}
+
+// WriteField encodes f into a single Write to e's underlying Writer.
+// This function may also produce bytes for "Header Table Size Update"
+// if necessary.  If produced, it is done before encoding f.
+func (e *Encoder) WriteField(f HeaderField) error {
+	e.buf = e.buf[:0]
+
+	if e.tableSizeUpdate {
+		e.tableSizeUpdate = false
+		if e.minSize < e.dynTab.maxSize {
+			e.buf = appendTableSize(e.buf, e.minSize)
+		}
+		e.minSize = uint32Max
+		e.buf = appendTableSize(e.buf, e.dynTab.maxSize)
+	}
+
+	idx, nameValueMatch := e.searchTable(f)
+	if nameValueMatch {
+		e.buf = appendIndexed(e.buf, idx)
+	} else {
+		indexing := e.shouldIndex(f)
+		if indexing {
+			e.dynTab.add(f)
+		}
+
+		if idx == 0 {
+			e.buf = appendNewName(e.buf, f, indexing)
+		} else {
+			e.buf = appendIndexedName(e.buf, f, idx, indexing)
+		}
+	}
+	n, err := e.w.Write(e.buf)
+	if err == nil && n != len(e.buf) {
+		err = io.ErrShortWrite
+	}
+	return err
+}
+
+// searchTable searches f in both stable and dynamic header tables.
+// The static header table is searched first. Only when there is no
+// exact match for both name and value, the dynamic header table is
+// then searched. If there is no match, i is 0. If both name and value
+// match, i is the matched index and nameValueMatch becomes true. If
+// only name matches, i points to that index and nameValueMatch
+// becomes false.
+func (e *Encoder) searchTable(f HeaderField) (i uint64, nameValueMatch bool) {
+	for idx, hf := range staticTable {
+		if !constantTimeStringCompare(hf.Name, f.Name) {
+			continue
+		}
+		if i == 0 {
+			i = uint64(idx + 1)
+		}
+		if f.Sensitive {
+			continue
+		}
+		if !constantTimeStringCompare(hf.Value, f.Value) {
+			continue
+		}
+		i = uint64(idx + 1)
+		nameValueMatch = true
+		return
+	}
+
+	j, nameValueMatch := e.dynTab.search(f)
+	if nameValueMatch || (i == 0 && j != 0) {
+		i = j + uint64(len(staticTable))
+	}
+	return
+}
+
+// SetMaxDynamicTableSize changes the dynamic header table size to v.
+// The actual size is bounded by the value passed to
+// SetMaxDynamicTableSizeLimit.
+func (e *Encoder) SetMaxDynamicTableSize(v uint32) {
+	if v > e.maxSizeLimit {
+		v = e.maxSizeLimit
+	}
+	if v < e.minSize {
+		e.minSize = v
+	}
+	e.tableSizeUpdate = true
+	e.dynTab.setMaxSize(v)
+}
+
+// SetMaxDynamicTableSizeLimit changes the maximum value that can be
+// specified in SetMaxDynamicTableSize to v. By default, it is set to
+// 4096, which is the same size of the default dynamic header table
+// size described in HPACK specification. If the current maximum
+// dynamic header table size is strictly greater than v, "Header Table
+// Size Update" will be done in the next WriteField call and the
+// maximum dynamic header table size is truncated to v.
+func (e *Encoder) SetMaxDynamicTableSizeLimit(v uint32) {
+	e.maxSizeLimit = v
+	if e.dynTab.maxSize > v {
+		e.tableSizeUpdate = true
+		e.dynTab.setMaxSize(v)
+	}
+}
+
+// shouldIndex reports whether f should be indexed.
+func (e *Encoder) shouldIndex(f HeaderField) bool {
+	return !f.Sensitive && f.size() <= e.dynTab.maxSize
+}
+
+// appendIndexed appends index i, as encoded in "Indexed Header Field"
+// representation, to dst and returns the extended buffer.
+func appendIndexed(dst []byte, i uint64) []byte {
+	first := len(dst)
+	dst = appendVarInt(dst, 7, i)
+	dst[first] |= 0x80
+	return dst
+}
+
+// appendNewName appends f, as encoded in one of "Literal Header field
+// - New Name" representation variants, to dst and returns the
+// extended buffer.
+//
+// If f.Sensitive is true, "Never Indexed" representation is used. If
+// f.Sensitive is false and indexing is true, "Inremental Indexing"
+// representation is used.
+func appendNewName(dst []byte, f HeaderField, indexing bool) []byte {
+	dst = append(dst, encodeTypeByte(indexing, f.Sensitive))
+	dst = appendHpackString(dst, f.Name)
+	return appendHpackString(dst, f.Value)
+}
+
+// appendIndexedName appends f and index i referring indexed name
+// entry, as encoded in one of "Literal Header field - Indexed Name"
+// representation variants, to dst and returns the extended buffer.
+//
+// If f.Sensitive is true, "Never Indexed" representation is used. If
+// f.Sensitive is false and indexing is true, "Incremental Indexing"
+// representation is used.
+func appendIndexedName(dst []byte, f HeaderField, i uint64, indexing bool) []byte {
+	first := len(dst)
+	var n byte
+	if indexing {
+		n = 6
+	} else {
+		n = 4
+	}
+	dst = appendVarInt(dst, n, i)
+	dst[first] |= encodeTypeByte(indexing, f.Sensitive)
+	return appendHpackString(dst, f.Value)
+}
+
+// appendTableSize appends v, as encoded in "Header Table Size Update"
+// representation, to dst and returns the extended buffer.
+func appendTableSize(dst []byte, v uint32) []byte {
+	first := len(dst)
+	dst = appendVarInt(dst, 5, uint64(v))
+	dst[first] |= 0x20
+	return dst
+}
+
+// appendVarInt appends i, as encoded in variable integer form using n
+// bit prefix, to dst and returns the extended buffer.
+//
+// See
+// http://http2.github.io/http2-spec/compression.html#integer.representation
+func appendVarInt(dst []byte, n byte, i uint64) []byte {
+	k := uint64((1 << n) - 1)
+	if i < k {
+		return append(dst, byte(i))
+	}
+	dst = append(dst, byte(k))
+	i -= k
+	for ; i >= 128; i >>= 7 {
+		dst = append(dst, byte(0x80|(i&0x7f)))
+	}
+	return append(dst, byte(i))
+}
+
+// appendHpackString appends s, as encoded in "String Literal"
+// representation, to dst and returns the the extended buffer.
+//
+// s will be encoded in Huffman codes only when it produces strictly
+// shorter byte string.
+func appendHpackString(dst []byte, s string) []byte {
+	huffmanLength := HuffmanEncodeLength(s)
+	if huffmanLength < uint64(len(s)) {
+		first := len(dst)
+		dst = appendVarInt(dst, 7, huffmanLength)
+		dst = AppendHuffmanString(dst, s)
+		dst[first] |= 0x80
+	} else {
+		dst = appendVarInt(dst, 7, uint64(len(s)))
+		dst = append(dst, s...)
+	}
+	return dst
+}
+
+// encodeTypeByte returns type byte. If sensitive is true, type byte
+// for "Never Indexed" representation is returned. If sensitive is
+// false and indexing is true, type byte for "Incremental Indexing"
+// representation is returned. Otherwise, type byte for "Without
+// Indexing" is returned.
+func encodeTypeByte(indexing, sensitive bool) byte {
+	if sensitive {
+		return 0x10
+	}
+	if indexing {
+		return 0x40
+	}
+	return 0
+}
diff --git a/src/vendor/golang.org/x/net/http2/hpack/encode_test.go b/src/vendor/golang.org/x/net/http2/hpack/encode_test.go
new file mode 100644
index 0000000..92286f3
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/encode_test.go
@@ -0,0 +1,330 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package hpack
+
+import (
+	"bytes"
+	"encoding/hex"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+func TestEncoderTableSizeUpdate(t *testing.T) {
+	tests := []struct {
+		size1, size2 uint32
+		wantHex      string
+	}{
+		// Should emit 2 table size updates (2048 and 4096)
+		{2048, 4096, "3fe10f 3fe11f 82"},
+
+		// Should emit 1 table size update (2048)
+		{16384, 2048, "3fe10f 82"},
+	}
+	for _, tt := range tests {
+		var buf bytes.Buffer
+		e := NewEncoder(&buf)
+		e.SetMaxDynamicTableSize(tt.size1)
+		e.SetMaxDynamicTableSize(tt.size2)
+		if err := e.WriteField(pair(":method", "GET")); err != nil {
+			t.Fatal(err)
+		}
+		want := removeSpace(tt.wantHex)
+		if got := hex.EncodeToString(buf.Bytes()); got != want {
+			t.Errorf("e.SetDynamicTableSize %v, %v = %q; want %q", tt.size1, tt.size2, got, want)
+		}
+	}
+}
+
+func TestEncoderWriteField(t *testing.T) {
+	var buf bytes.Buffer
+	e := NewEncoder(&buf)
+	var got []HeaderField
+	d := NewDecoder(4<<10, func(f HeaderField) {
+		got = append(got, f)
+	})
+
+	tests := []struct {
+		hdrs []HeaderField
+	}{
+		{[]HeaderField{
+			pair(":method", "GET"),
+			pair(":scheme", "http"),
+			pair(":path", "/"),
+			pair(":authority", "www.example.com"),
+		}},
+		{[]HeaderField{
+			pair(":method", "GET"),
+			pair(":scheme", "http"),
+			pair(":path", "/"),
+			pair(":authority", "www.example.com"),
+			pair("cache-control", "no-cache"),
+		}},
+		{[]HeaderField{
+			pair(":method", "GET"),
+			pair(":scheme", "https"),
+			pair(":path", "/index.html"),
+			pair(":authority", "www.example.com"),
+			pair("custom-key", "custom-value"),
+		}},
+	}
+	for i, tt := range tests {
+		buf.Reset()
+		got = got[:0]
+		for _, hf := range tt.hdrs {
+			if err := e.WriteField(hf); err != nil {
+				t.Fatal(err)
+			}
+		}
+		_, err := d.Write(buf.Bytes())
+		if err != nil {
+			t.Errorf("%d. Decoder Write = %v", i, err)
+		}
+		if !reflect.DeepEqual(got, tt.hdrs) {
+			t.Errorf("%d. Decoded %+v; want %+v", i, got, tt.hdrs)
+		}
+	}
+}
+
+func TestEncoderSearchTable(t *testing.T) {
+	e := NewEncoder(nil)
+
+	e.dynTab.add(pair("foo", "bar"))
+	e.dynTab.add(pair("blake", "miz"))
+	e.dynTab.add(pair(":method", "GET"))
+
+	tests := []struct {
+		hf        HeaderField
+		wantI     uint64
+		wantMatch bool
+	}{
+		// Name and Value match
+		{pair("foo", "bar"), uint64(len(staticTable) + 3), true},
+		{pair("blake", "miz"), uint64(len(staticTable) + 2), true},
+		{pair(":method", "GET"), 2, true},
+
+		// Only name match because Sensitive == true
+		{HeaderField{":method", "GET", true}, 2, false},
+
+		// Only Name matches
+		{pair("foo", "..."), uint64(len(staticTable) + 3), false},
+		{pair("blake", "..."), uint64(len(staticTable) + 2), false},
+		{pair(":method", "..."), 2, false},
+
+		// None match
+		{pair("foo-", "bar"), 0, false},
+	}
+	for _, tt := range tests {
+		if gotI, gotMatch := e.searchTable(tt.hf); gotI != tt.wantI || gotMatch != tt.wantMatch {
+			t.Errorf("d.search(%+v) = %v, %v; want %v, %v", tt.hf, gotI, gotMatch, tt.wantI, tt.wantMatch)
+		}
+	}
+}
+
+func TestAppendVarInt(t *testing.T) {
+	tests := []struct {
+		n    byte
+		i    uint64
+		want []byte
+	}{
+		// Fits in a byte:
+		{1, 0, []byte{0}},
+		{2, 2, []byte{2}},
+		{3, 6, []byte{6}},
+		{4, 14, []byte{14}},
+		{5, 30, []byte{30}},
+		{6, 62, []byte{62}},
+		{7, 126, []byte{126}},
+		{8, 254, []byte{254}},
+
+		// Multiple bytes:
+		{5, 1337, []byte{31, 154, 10}},
+	}
+	for _, tt := range tests {
+		got := appendVarInt(nil, tt.n, tt.i)
+		if !bytes.Equal(got, tt.want) {
+			t.Errorf("appendVarInt(nil, %v, %v) = %v; want %v", tt.n, tt.i, got, tt.want)
+		}
+	}
+}
+
+func TestAppendHpackString(t *testing.T) {
+	tests := []struct {
+		s, wantHex string
+	}{
+		// Huffman encoded
+		{"www.example.com", "8c f1e3 c2e5 f23a 6ba0 ab90 f4ff"},
+
+		// Not Huffman encoded
+		{"a", "01 61"},
+
+		// zero length
+		{"", "00"},
+	}
+	for _, tt := range tests {
+		want := removeSpace(tt.wantHex)
+		buf := appendHpackString(nil, tt.s)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("appendHpackString(nil, %q) = %q; want %q", tt.s, got, want)
+		}
+	}
+}
+
+func TestAppendIndexed(t *testing.T) {
+	tests := []struct {
+		i       uint64
+		wantHex string
+	}{
+		// 1 byte
+		{1, "81"},
+		{126, "fe"},
+
+		// 2 bytes
+		{127, "ff00"},
+		{128, "ff01"},
+	}
+	for _, tt := range tests {
+		want := removeSpace(tt.wantHex)
+		buf := appendIndexed(nil, tt.i)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("appendIndex(nil, %v) = %q; want %q", tt.i, got, want)
+		}
+	}
+}
+
+func TestAppendNewName(t *testing.T) {
+	tests := []struct {
+		f        HeaderField
+		indexing bool
+		wantHex  string
+	}{
+		// Incremental indexing
+		{HeaderField{"custom-key", "custom-value", false}, true, "40 88 25a8 49e9 5ba9 7d7f 89 25a8 49e9 5bb8 e8b4 bf"},
+
+		// Without indexing
+		{HeaderField{"custom-key", "custom-value", false}, false, "00 88 25a8 49e9 5ba9 7d7f 89 25a8 49e9 5bb8 e8b4 bf"},
+
+		// Never indexed
+		{HeaderField{"custom-key", "custom-value", true}, true, "10 88 25a8 49e9 5ba9 7d7f 89 25a8 49e9 5bb8 e8b4 bf"},
+		{HeaderField{"custom-key", "custom-value", true}, false, "10 88 25a8 49e9 5ba9 7d7f 89 25a8 49e9 5bb8 e8b4 bf"},
+	}
+	for _, tt := range tests {
+		want := removeSpace(tt.wantHex)
+		buf := appendNewName(nil, tt.f, tt.indexing)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("appendNewName(nil, %+v, %v) = %q; want %q", tt.f, tt.indexing, got, want)
+		}
+	}
+}
+
+func TestAppendIndexedName(t *testing.T) {
+	tests := []struct {
+		f        HeaderField
+		i        uint64
+		indexing bool
+		wantHex  string
+	}{
+		// Incremental indexing
+		{HeaderField{":status", "302", false}, 8, true, "48 82 6402"},
+
+		// Without indexing
+		{HeaderField{":status", "302", false}, 8, false, "08 82 6402"},
+
+		// Never indexed
+		{HeaderField{":status", "302", true}, 8, true, "18 82 6402"},
+		{HeaderField{":status", "302", true}, 8, false, "18 82 6402"},
+	}
+	for _, tt := range tests {
+		want := removeSpace(tt.wantHex)
+		buf := appendIndexedName(nil, tt.f, tt.i, tt.indexing)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("appendIndexedName(nil, %+v, %v) = %q; want %q", tt.f, tt.indexing, got, want)
+		}
+	}
+}
+
+func TestAppendTableSize(t *testing.T) {
+	tests := []struct {
+		i       uint32
+		wantHex string
+	}{
+		// Fits into 1 byte
+		{30, "3e"},
+
+		// Extra byte
+		{31, "3f00"},
+		{32, "3f01"},
+	}
+	for _, tt := range tests {
+		want := removeSpace(tt.wantHex)
+		buf := appendTableSize(nil, tt.i)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("appendTableSize(nil, %v) = %q; want %q", tt.i, got, want)
+		}
+	}
+}
+
+func TestEncoderSetMaxDynamicTableSize(t *testing.T) {
+	var buf bytes.Buffer
+	e := NewEncoder(&buf)
+	tests := []struct {
+		v           uint32
+		wantUpdate  bool
+		wantMinSize uint32
+		wantMaxSize uint32
+	}{
+		// Set new table size to 2048
+		{2048, true, 2048, 2048},
+
+		// Set new table size to 16384, but still limited to
+		// 4096
+		{16384, true, 2048, 4096},
+	}
+	for _, tt := range tests {
+		e.SetMaxDynamicTableSize(tt.v)
+		if got := e.tableSizeUpdate; tt.wantUpdate != got {
+			t.Errorf("e.tableSizeUpdate = %v; want %v", got, tt.wantUpdate)
+		}
+		if got := e.minSize; tt.wantMinSize != got {
+			t.Errorf("e.minSize = %v; want %v", got, tt.wantMinSize)
+		}
+		if got := e.dynTab.maxSize; tt.wantMaxSize != got {
+			t.Errorf("e.maxSize = %v; want %v", got, tt.wantMaxSize)
+		}
+	}
+}
+
+func TestEncoderSetMaxDynamicTableSizeLimit(t *testing.T) {
+	e := NewEncoder(nil)
+	// 4095 < initialHeaderTableSize means maxSize is truncated to
+	// 4095.
+	e.SetMaxDynamicTableSizeLimit(4095)
+	if got, want := e.dynTab.maxSize, uint32(4095); got != want {
+		t.Errorf("e.dynTab.maxSize = %v; want %v", got, want)
+	}
+	if got, want := e.maxSizeLimit, uint32(4095); got != want {
+		t.Errorf("e.maxSizeLimit = %v; want %v", got, want)
+	}
+	if got, want := e.tableSizeUpdate, true; got != want {
+		t.Errorf("e.tableSizeUpdate = %v; want %v", got, want)
+	}
+	// maxSize will be truncated to maxSizeLimit
+	e.SetMaxDynamicTableSize(16384)
+	if got, want := e.dynTab.maxSize, uint32(4095); got != want {
+		t.Errorf("e.dynTab.maxSize = %v; want %v", got, want)
+	}
+	// 8192 > current maxSizeLimit, so maxSize does not change.
+	e.SetMaxDynamicTableSizeLimit(8192)
+	if got, want := e.dynTab.maxSize, uint32(4095); got != want {
+		t.Errorf("e.dynTab.maxSize = %v; want %v", got, want)
+	}
+	if got, want := e.maxSizeLimit, uint32(8192); got != want {
+		t.Errorf("e.maxSizeLimit = %v; want %v", got, want)
+	}
+}
+
+func removeSpace(s string) string {
+	return strings.Replace(s, " ", "", -1)
+}
diff --git a/src/vendor/golang.org/x/net/http2/hpack/hpack.go b/src/vendor/golang.org/x/net/http2/hpack/hpack.go
new file mode 100644
index 0000000..f5a9b84
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/hpack.go
@@ -0,0 +1,512 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hpack implements HPACK, a compression format for
+// efficiently representing HTTP header fields in the context of HTTP/2.
+//
+// See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
+package hpack
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+)
+
+// A DecodingError is something the spec defines as a decoding error.
+type DecodingError struct {
+	Err error
+}
+
+func (de DecodingError) Error() string {
+	return fmt.Sprintf("decoding error: %v", de.Err)
+}
+
+// An InvalidIndexError is returned when an encoder references a table
+// entry before the static table or after the end of the dynamic table.
+type InvalidIndexError int
+
+func (e InvalidIndexError) Error() string {
+	return fmt.Sprintf("invalid indexed representation index %d", int(e))
+}
+
+// A HeaderField is a name-value pair. Both the name and value are
+// treated as opaque sequences of octets.
+type HeaderField struct {
+	Name, Value string
+
+	// Sensitive means that this header field should never be
+	// indexed.
+	Sensitive bool
+}
+
+func (hf *HeaderField) size() uint32 {
+	// http://http2.github.io/http2-spec/compression.html#rfc.section.4.1
+	// "The size of the dynamic table is the sum of the size of
+	// its entries.  The size of an entry is the sum of its name's
+	// length in octets (as defined in Section 5.2), its value's
+	// length in octets (see Section 5.2), plus 32.  The size of
+	// an entry is calculated using the length of the name and
+	// value without any Huffman encoding applied."
+
+	// This can overflow if somebody makes a large HeaderField
+	// Name and/or Value by hand, but we don't care, because that
+	// won't happen on the wire because the encoding doesn't allow
+	// it.
+	return uint32(len(hf.Name) + len(hf.Value) + 32)
+}
+
+// A Decoder is the decoding context for incremental processing of
+// header blocks.
+type Decoder struct {
+	dynTab dynamicTable
+	emit   func(f HeaderField)
+
+	emitEnabled bool // whether calls to emit are enabled
+	maxStrLen   int  // 0 means unlimited
+
+	// buf is the unparsed buffer. It's only written to
+	// saveBuf if it was truncated in the middle of a header
+	// block. Because it's usually not owned, we can only
+	// process it under Write.
+	buf []byte // not owned; only valid during Write
+
+	// saveBuf is previous data passed to Write which we weren't able
+	// to fully parse before. Unlike buf, we own this data.
+	saveBuf bytes.Buffer
+}
+
+// NewDecoder returns a new decoder with the provided maximum dynamic
+// table size. The emitFunc will be called for each valid field
+// parsed, in the same goroutine as calls to Write, before Write returns.
+func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
+	d := &Decoder{
+		emit:        emitFunc,
+		emitEnabled: true,
+	}
+	d.dynTab.allowedMaxSize = maxDynamicTableSize
+	d.dynTab.setMaxSize(maxDynamicTableSize)
+	return d
+}
+
+// ErrStringLength is returned by Decoder.Write when the max string length
+// (as configured by Decoder.SetMaxStringLength) would be violated.
+var ErrStringLength = errors.New("hpack: string too long")
+
+// SetMaxStringLength sets the maximum size of a HeaderField name or
+// value string. If a string exceeds this length (even after any
+// decompression), Write will return ErrStringLength.
+// A value of 0 means unlimited and is the default from NewDecoder.
+func (d *Decoder) SetMaxStringLength(n int) {
+	d.maxStrLen = n
+}
+
+// SetEmitEnabled controls whether the emitFunc provided to NewDecoder
+// should be called. The default is true.
+//
+// This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
+// while still decoding and keeping in-sync with decoder state, but
+// without doing unnecessary decompression or generating unnecessary
+// garbage for header fields past the limit.
+func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
+
+// EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
+// are currently enabled. The default is true.
+func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
+
+// TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
+// underlying buffers for garbage reasons.
+
+func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
+	d.dynTab.setMaxSize(v)
+}
+
+// SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
+// stream (via dynamic table size updates) may set the maximum size
+// to.
+func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
+	d.dynTab.allowedMaxSize = v
+}
+
+type dynamicTable struct {
+	// ents is the FIFO described at
+	// http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2
+	// The newest (low index) is append at the end, and items are
+	// evicted from the front.
+	ents           []HeaderField
+	size           uint32
+	maxSize        uint32 // current maxSize
+	allowedMaxSize uint32 // maxSize may go up to this, inclusive
+}
+
+func (dt *dynamicTable) setMaxSize(v uint32) {
+	dt.maxSize = v
+	dt.evict()
+}
+
+// TODO: change dynamicTable to be a struct with a slice and a size int field,
+// per http://http2.github.io/http2-spec/compression.html#rfc.section.4.1:
+//
+//
+// Then make add increment the size. maybe the max size should move from Decoder to
+// dynamicTable and add should return an ok bool if there was enough space.
+//
+// Later we'll need a remove operation on dynamicTable.
+
+func (dt *dynamicTable) add(f HeaderField) {
+	dt.ents = append(dt.ents, f)
+	dt.size += f.size()
+	dt.evict()
+}
+
+// If we're too big, evict old stuff (front of the slice)
+func (dt *dynamicTable) evict() {
+	base := dt.ents // keep base pointer of slice
+	for dt.size > dt.maxSize {
+		dt.size -= dt.ents[0].size()
+		dt.ents = dt.ents[1:]
+	}
+
+	// Shift slice contents down if we evicted things.
+	if len(dt.ents) != len(base) {
+		copy(base, dt.ents)
+		dt.ents = base[:len(dt.ents)]
+	}
+}
+
+// constantTimeStringCompare compares string a and b in a constant
+// time manner.
+func constantTimeStringCompare(a, b string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+
+	c := byte(0)
+
+	for i := 0; i < len(a); i++ {
+		c |= a[i] ^ b[i]
+	}
+
+	return c == 0
+}
+
+// Search searches f in the table. The return value i is 0 if there is
+// no name match. If there is name match or name/value match, i is the
+// index of that entry (1-based). If both name and value match,
+// nameValueMatch becomes true.
+func (dt *dynamicTable) search(f HeaderField) (i uint64, nameValueMatch bool) {
+	l := len(dt.ents)
+	for j := l - 1; j >= 0; j-- {
+		ent := dt.ents[j]
+		if !constantTimeStringCompare(ent.Name, f.Name) {
+			continue
+		}
+		if i == 0 {
+			i = uint64(l - j)
+		}
+		if f.Sensitive {
+			continue
+		}
+		if !constantTimeStringCompare(ent.Value, f.Value) {
+			continue
+		}
+		i = uint64(l - j)
+		nameValueMatch = true
+		return
+	}
+	return
+}
+
+func (d *Decoder) maxTableIndex() int {
+	return len(d.dynTab.ents) + len(staticTable)
+}
+
+func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
+	if i < 1 {
+		return
+	}
+	if i > uint64(d.maxTableIndex()) {
+		return
+	}
+	if i <= uint64(len(staticTable)) {
+		return staticTable[i-1], true
+	}
+	dents := d.dynTab.ents
+	return dents[len(dents)-(int(i)-len(staticTable))], true
+}
+
+// Decode decodes an entire block.
+//
+// TODO: remove this method and make it incremental later? This is
+// easier for debugging now.
+func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
+	var hf []HeaderField
+	saveFunc := d.emit
+	defer func() { d.emit = saveFunc }()
+	d.emit = func(f HeaderField) { hf = append(hf, f) }
+	if _, err := d.Write(p); err != nil {
+		return nil, err
+	}
+	if err := d.Close(); err != nil {
+		return nil, err
+	}
+	return hf, nil
+}
+
+func (d *Decoder) Close() error {
+	if d.saveBuf.Len() > 0 {
+		d.saveBuf.Reset()
+		return DecodingError{errors.New("truncated headers")}
+	}
+	return nil
+}
+
+func (d *Decoder) Write(p []byte) (n int, err error) {
+	if len(p) == 0 {
+		// Prevent state machine CPU attacks (making us redo
+		// work up to the point of finding out we don't have
+		// enough data)
+		return
+	}
+	// Only copy the data if we have to. Optimistically assume
+	// that p will contain a complete header block.
+	if d.saveBuf.Len() == 0 {
+		d.buf = p
+	} else {
+		d.saveBuf.Write(p)
+		d.buf = d.saveBuf.Bytes()
+		d.saveBuf.Reset()
+	}
+
+	for len(d.buf) > 0 {
+		err = d.parseHeaderFieldRepr()
+		if err == errNeedMore {
+			const varIntOverhead = 8 // conservative
+			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
+				return 0, ErrStringLength
+			}
+			d.saveBuf.Write(d.buf)
+			return len(p), nil
+		}
+		if err != nil {
+			break
+		}
+	}
+	return len(p), err
+}
+
+// errNeedMore is an internal sentinel error value that means the
+// buffer is truncated and we need to read more data before we can
+// continue parsing.
+var errNeedMore = errors.New("need more data")
+
+type indexType int
+
+const (
+	indexedTrue indexType = iota
+	indexedFalse
+	indexedNever
+)
+
+func (v indexType) indexed() bool   { return v == indexedTrue }
+func (v indexType) sensitive() bool { return v == indexedNever }
+
+// returns errNeedMore if there isn't enough data available.
+// any other error is fatal.
+// consumes d.buf iff it returns nil.
+// precondition: must be called with len(d.buf) > 0
+func (d *Decoder) parseHeaderFieldRepr() error {
+	b := d.buf[0]
+	switch {
+	case b&128 != 0:
+		// Indexed representation.
+		// High bit set?
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.1
+		return d.parseFieldIndexed()
+	case b&192 == 64:
+		// 6.2.1 Literal Header Field with Incremental Indexing
+		// 0b10xxxxxx: top two bits are 10
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1
+		return d.parseFieldLiteral(6, indexedTrue)
+	case b&240 == 0:
+		// 6.2.2 Literal Header Field without Indexing
+		// 0b0000xxxx: top four bits are 0000
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2
+		return d.parseFieldLiteral(4, indexedFalse)
+	case b&240 == 16:
+		// 6.2.3 Literal Header Field never Indexed
+		// 0b0001xxxx: top four bits are 0001
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3
+		return d.parseFieldLiteral(4, indexedNever)
+	case b&224 == 32:
+		// 6.3 Dynamic Table Size Update
+		// Top three bits are '001'.
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.3
+		return d.parseDynamicTableSizeUpdate()
+	}
+
+	return DecodingError{errors.New("invalid encoding")}
+}
+
+// (same invariants and behavior as parseHeaderFieldRepr)
+func (d *Decoder) parseFieldIndexed() error {
+	buf := d.buf
+	idx, buf, err := readVarInt(7, buf)
+	if err != nil {
+		return err
+	}
+	hf, ok := d.at(idx)
+	if !ok {
+		return DecodingError{InvalidIndexError(idx)}
+	}
+	d.buf = buf
+	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
+}
+
+// (same invariants and behavior as parseHeaderFieldRepr)
+func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
+	buf := d.buf
+	nameIdx, buf, err := readVarInt(n, buf)
+	if err != nil {
+		return err
+	}
+
+	var hf HeaderField
+	wantStr := d.emitEnabled || it.indexed()
+	if nameIdx > 0 {
+		ihf, ok := d.at(nameIdx)
+		if !ok {
+			return DecodingError{InvalidIndexError(nameIdx)}
+		}
+		hf.Name = ihf.Name
+	} else {
+		hf.Name, buf, err = d.readString(buf, wantStr)
+		if err != nil {
+			return err
+		}
+	}
+	hf.Value, buf, err = d.readString(buf, wantStr)
+	if err != nil {
+		return err
+	}
+	d.buf = buf
+	if it.indexed() {
+		d.dynTab.add(hf)
+	}
+	hf.Sensitive = it.sensitive()
+	return d.callEmit(hf)
+}
+
+func (d *Decoder) callEmit(hf HeaderField) error {
+	if d.maxStrLen != 0 {
+		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
+			return ErrStringLength
+		}
+	}
+	if d.emitEnabled {
+		d.emit(hf)
+	}
+	return nil
+}
+
+// (same invariants and behavior as parseHeaderFieldRepr)
+func (d *Decoder) parseDynamicTableSizeUpdate() error {
+	buf := d.buf
+	size, buf, err := readVarInt(5, buf)
+	if err != nil {
+		return err
+	}
+	if size > uint64(d.dynTab.allowedMaxSize) {
+		return DecodingError{errors.New("dynamic table size update too large")}
+	}
+	d.dynTab.setMaxSize(uint32(size))
+	d.buf = buf
+	return nil
+}
+
+var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
+
+// readVarInt reads an unsigned variable length integer off the
+// beginning of p. n is the parameter as described in
+// http://http2.github.io/http2-spec/compression.html#rfc.section.5.1.
+//
+// n must always be between 1 and 8.
+//
+// The returned remain buffer is either a smaller suffix of p, or err != nil.
+// The error is errNeedMore if p doesn't contain a complete integer.
+func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
+	if n < 1 || n > 8 {
+		panic("bad n")
+	}
+	if len(p) == 0 {
+		return 0, p, errNeedMore
+	}
+	i = uint64(p[0])
+	if n < 8 {
+		i &= (1 << uint64(n)) - 1
+	}
+	if i < (1<<uint64(n))-1 {
+		return i, p[1:], nil
+	}
+
+	origP := p
+	p = p[1:]
+	var m uint64
+	for len(p) > 0 {
+		b := p[0]
+		p = p[1:]
+		i += uint64(b&127) << m
+		if b&128 == 0 {
+			return i, p, nil
+		}
+		m += 7
+		if m >= 63 { // TODO: proper overflow check. making this up.
+			return 0, origP, errVarintOverflow
+		}
+	}
+	return 0, origP, errNeedMore
+}
+
+// readString decodes an hpack string from p.
+//
+// wantStr is whether s will be used. If false, decompression and
+// []byte->string garbage are skipped if s will be ignored
+// anyway. This does mean that huffman decoding errors for non-indexed
+// strings past the MAX_HEADER_LIST_SIZE are ignored, but the server
+// is returning an error anyway, and because they're not indexed, the error
+// won't affect the decoding state.
+func (d *Decoder) readString(p []byte, wantStr bool) (s string, remain []byte, err error) {
+	if len(p) == 0 {
+		return "", p, errNeedMore
+	}
+	isHuff := p[0]&128 != 0
+	strLen, p, err := readVarInt(7, p)
+	if err != nil {
+		return "", p, err
+	}
+	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
+		return "", nil, ErrStringLength
+	}
+	if uint64(len(p)) < strLen {
+		return "", p, errNeedMore
+	}
+	if !isHuff {
+		if wantStr {
+			s = string(p[:strLen])
+		}
+		return s, p[strLen:], nil
+	}
+
+	if wantStr {
+		buf := bufPool.Get().(*bytes.Buffer)
+		buf.Reset() // don't trust others
+		defer bufPool.Put(buf)
+		if err := huffmanDecode(buf, d.maxStrLen, p[:strLen]); err != nil {
+			return "", nil, err
+		}
+		s = buf.String()
+		buf.Reset() // be nice to GC
+	}
+	return s, p[strLen:], nil
+}
diff --git a/src/vendor/golang.org/x/net/http2/hpack/hpack_test.go b/src/vendor/golang.org/x/net/http2/hpack/hpack_test.go
new file mode 100644
index 0000000..6dc69f9
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/hpack_test.go
@@ -0,0 +1,813 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package hpack
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"fmt"
+	"math/rand"
+	"reflect"
+	"regexp"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestStaticTable(t *testing.T) {
+	fromSpec := `
+          +-------+-----------------------------+---------------+
+          | 1     | :authority                  |               |
+          | 2     | :method                     | GET           |
+          | 3     | :method                     | POST          |
+          | 4     | :path                       | /             |
+          | 5     | :path                       | /index.html   |
+          | 6     | :scheme                     | http          |
+          | 7     | :scheme                     | https         |
+          | 8     | :status                     | 200           |
+          | 9     | :status                     | 204           |
+          | 10    | :status                     | 206           |
+          | 11    | :status                     | 304           |
+          | 12    | :status                     | 400           |
+          | 13    | :status                     | 404           |
+          | 14    | :status                     | 500           |
+          | 15    | accept-charset              |               |
+          | 16    | accept-encoding             | gzip, deflate |
+          | 17    | accept-language             |               |
+          | 18    | accept-ranges               |               |
+          | 19    | accept                      |               |
+          | 20    | access-control-allow-origin |               |
+          | 21    | age                         |               |
+          | 22    | allow                       |               |
+          | 23    | authorization               |               |
+          | 24    | cache-control               |               |
+          | 25    | content-disposition         |               |
+          | 26    | content-encoding            |               |
+          | 27    | content-language            |               |
+          | 28    | content-length              |               |
+          | 29    | content-location            |               |
+          | 30    | content-range               |               |
+          | 31    | content-type                |               |
+          | 32    | cookie                      |               |
+          | 33    | date                        |               |
+          | 34    | etag                        |               |
+          | 35    | expect                      |               |
+          | 36    | expires                     |               |
+          | 37    | from                        |               |
+          | 38    | host                        |               |
+          | 39    | if-match                    |               |
+          | 40    | if-modified-since           |               |
+          | 41    | if-none-match               |               |
+          | 42    | if-range                    |               |
+          | 43    | if-unmodified-since         |               |
+          | 44    | last-modified               |               |
+          | 45    | link                        |               |
+          | 46    | location                    |               |
+          | 47    | max-forwards                |               |
+          | 48    | proxy-authenticate          |               |
+          | 49    | proxy-authorization         |               |
+          | 50    | range                       |               |
+          | 51    | referer                     |               |
+          | 52    | refresh                     |               |
+          | 53    | retry-after                 |               |
+          | 54    | server                      |               |
+          | 55    | set-cookie                  |               |
+          | 56    | strict-transport-security   |               |
+          | 57    | transfer-encoding           |               |
+          | 58    | user-agent                  |               |
+          | 59    | vary                        |               |
+          | 60    | via                         |               |
+          | 61    | www-authenticate            |               |
+          +-------+-----------------------------+---------------+
+`
+	bs := bufio.NewScanner(strings.NewReader(fromSpec))
+	re := regexp.MustCompile(`\| (\d+)\s+\| (\S+)\s*\| (\S(.*\S)?)?\s+\|`)
+	for bs.Scan() {
+		l := bs.Text()
+		if !strings.Contains(l, "|") {
+			continue
+		}
+		m := re.FindStringSubmatch(l)
+		if m == nil {
+			continue
+		}
+		i, err := strconv.Atoi(m[1])
+		if err != nil {
+			t.Errorf("Bogus integer on line %q", l)
+			continue
+		}
+		if i < 1 || i > len(staticTable) {
+			t.Errorf("Bogus index %d on line %q", i, l)
+			continue
+		}
+		if got, want := staticTable[i-1].Name, m[2]; got != want {
+			t.Errorf("header index %d name = %q; want %q", i, got, want)
+		}
+		if got, want := staticTable[i-1].Value, m[3]; got != want {
+			t.Errorf("header index %d value = %q; want %q", i, got, want)
+		}
+	}
+	if err := bs.Err(); err != nil {
+		t.Error(err)
+	}
+}
+
+func (d *Decoder) mustAt(idx int) HeaderField {
+	if hf, ok := d.at(uint64(idx)); !ok {
+		panic(fmt.Sprintf("bogus index %d", idx))
+	} else {
+		return hf
+	}
+}
+
+func TestDynamicTableAt(t *testing.T) {
+	d := NewDecoder(4096, nil)
+	at := d.mustAt
+	if got, want := at(2), (pair(":method", "GET")); got != want {
+		t.Errorf("at(2) = %v; want %v", got, want)
+	}
+	d.dynTab.add(pair("foo", "bar"))
+	d.dynTab.add(pair("blake", "miz"))
+	if got, want := at(len(staticTable)+1), (pair("blake", "miz")); got != want {
+		t.Errorf("at(dyn 1) = %v; want %v", got, want)
+	}
+	if got, want := at(len(staticTable)+2), (pair("foo", "bar")); got != want {
+		t.Errorf("at(dyn 2) = %v; want %v", got, want)
+	}
+	if got, want := at(3), (pair(":method", "POST")); got != want {
+		t.Errorf("at(3) = %v; want %v", got, want)
+	}
+}
+
+func TestDynamicTableSearch(t *testing.T) {
+	dt := dynamicTable{}
+	dt.setMaxSize(4096)
+
+	dt.add(pair("foo", "bar"))
+	dt.add(pair("blake", "miz"))
+	dt.add(pair(":method", "GET"))
+
+	tests := []struct {
+		hf        HeaderField
+		wantI     uint64
+		wantMatch bool
+	}{
+		// Name and Value match
+		{pair("foo", "bar"), 3, true},
+		{pair(":method", "GET"), 1, true},
+
+		// Only name match because of Sensitive == true
+		{HeaderField{"blake", "miz", true}, 2, false},
+
+		// Only Name matches
+		{pair("foo", "..."), 3, false},
+		{pair("blake", "..."), 2, false},
+		{pair(":method", "..."), 1, false},
+
+		// None match
+		{pair("foo-", "bar"), 0, false},
+	}
+	for _, tt := range tests {
+		if gotI, gotMatch := dt.search(tt.hf); gotI != tt.wantI || gotMatch != tt.wantMatch {
+			t.Errorf("d.search(%+v) = %v, %v; want %v, %v", tt.hf, gotI, gotMatch, tt.wantI, tt.wantMatch)
+		}
+	}
+}
+
+func TestDynamicTableSizeEvict(t *testing.T) {
+	d := NewDecoder(4096, nil)
+	if want := uint32(0); d.dynTab.size != want {
+		t.Fatalf("size = %d; want %d", d.dynTab.size, want)
+	}
+	add := d.dynTab.add
+	add(pair("blake", "eats pizza"))
+	if want := uint32(15 + 32); d.dynTab.size != want {
+		t.Fatalf("after pizza, size = %d; want %d", d.dynTab.size, want)
+	}
+	add(pair("foo", "bar"))
+	if want := uint32(15 + 32 + 6 + 32); d.dynTab.size != want {
+		t.Fatalf("after foo bar, size = %d; want %d", d.dynTab.size, want)
+	}
+	d.dynTab.setMaxSize(15 + 32 + 1 /* slop */)
+	if want := uint32(6 + 32); d.dynTab.size != want {
+		t.Fatalf("after setMaxSize, size = %d; want %d", d.dynTab.size, want)
+	}
+	if got, want := d.mustAt(len(staticTable)+1), (pair("foo", "bar")); got != want {
+		t.Errorf("at(dyn 1) = %v; want %v", got, want)
+	}
+	add(pair("long", strings.Repeat("x", 500)))
+	if want := uint32(0); d.dynTab.size != want {
+		t.Fatalf("after big one, size = %d; want %d", d.dynTab.size, want)
+	}
+}
+
+func TestDecoderDecode(t *testing.T) {
+	tests := []struct {
+		name       string
+		in         []byte
+		want       []HeaderField
+		wantDynTab []HeaderField // newest entry first
+	}{
+		// C.2.1 Literal Header Field with Indexing
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.C.2.1
+		{"C.2.1", dehex("400a 6375 7374 6f6d 2d6b 6579 0d63 7573 746f 6d2d 6865 6164 6572"),
+			[]HeaderField{pair("custom-key", "custom-header")},
+			[]HeaderField{pair("custom-key", "custom-header")},
+		},
+
+		// C.2.2 Literal Header Field without Indexing
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.C.2.2
+		{"C.2.2", dehex("040c 2f73 616d 706c 652f 7061 7468"),
+			[]HeaderField{pair(":path", "/sample/path")},
+			[]HeaderField{}},
+
+		// C.2.3 Literal Header Field never Indexed
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.C.2.3
+		{"C.2.3", dehex("1008 7061 7373 776f 7264 0673 6563 7265 74"),
+			[]HeaderField{{"password", "secret", true}},
+			[]HeaderField{}},
+
+		// C.2.4 Indexed Header Field
+		// http://http2.github.io/http2-spec/compression.html#rfc.section.C.2.4
+		{"C.2.4", []byte("\x82"),
+			[]HeaderField{pair(":method", "GET")},
+			[]HeaderField{}},
+	}
+	for _, tt := range tests {
+		d := NewDecoder(4096, nil)
+		hf, err := d.DecodeFull(tt.in)
+		if err != nil {
+			t.Errorf("%s: %v", tt.name, err)
+			continue
+		}
+		if !reflect.DeepEqual(hf, tt.want) {
+			t.Errorf("%s: Got %v; want %v", tt.name, hf, tt.want)
+		}
+		gotDynTab := d.dynTab.reverseCopy()
+		if !reflect.DeepEqual(gotDynTab, tt.wantDynTab) {
+			t.Errorf("%s: dynamic table after = %v; want %v", tt.name, gotDynTab, tt.wantDynTab)
+		}
+	}
+}
+
+func (dt *dynamicTable) reverseCopy() (hf []HeaderField) {
+	hf = make([]HeaderField, len(dt.ents))
+	for i := range hf {
+		hf[i] = dt.ents[len(dt.ents)-1-i]
+	}
+	return
+}
+
+type encAndWant struct {
+	enc         []byte
+	want        []HeaderField
+	wantDynTab  []HeaderField
+	wantDynSize uint32
+}
+
+// C.3 Request Examples without Huffman Coding
+// http://http2.github.io/http2-spec/compression.html#rfc.section.C.3
+func TestDecodeC3_NoHuffman(t *testing.T) {
+	testDecodeSeries(t, 4096, []encAndWant{
+		{dehex("8286 8441 0f77 7777 2e65 7861 6d70 6c65 2e63 6f6d"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "http"),
+				pair(":path", "/"),
+				pair(":authority", "www.example.com"),
+			},
+			[]HeaderField{
+				pair(":authority", "www.example.com"),
+			},
+			57,
+		},
+		{dehex("8286 84be 5808 6e6f 2d63 6163 6865"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "http"),
+				pair(":path", "/"),
+				pair(":authority", "www.example.com"),
+				pair("cache-control", "no-cache"),
+			},
+			[]HeaderField{
+				pair("cache-control", "no-cache"),
+				pair(":authority", "www.example.com"),
+			},
+			110,
+		},
+		{dehex("8287 85bf 400a 6375 7374 6f6d 2d6b 6579 0c63 7573 746f 6d2d 7661 6c75 65"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "https"),
+				pair(":path", "/index.html"),
+				pair(":authority", "www.example.com"),
+				pair("custom-key", "custom-value"),
+			},
+			[]HeaderField{
+				pair("custom-key", "custom-value"),
+				pair("cache-control", "no-cache"),
+				pair(":authority", "www.example.com"),
+			},
+			164,
+		},
+	})
+}
+
+// C.4 Request Examples with Huffman Coding
+// http://http2.github.io/http2-spec/compression.html#rfc.section.C.4
+func TestDecodeC4_Huffman(t *testing.T) {
+	testDecodeSeries(t, 4096, []encAndWant{
+		{dehex("8286 8441 8cf1 e3c2 e5f2 3a6b a0ab 90f4 ff"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "http"),
+				pair(":path", "/"),
+				pair(":authority", "www.example.com"),
+			},
+			[]HeaderField{
+				pair(":authority", "www.example.com"),
+			},
+			57,
+		},
+		{dehex("8286 84be 5886 a8eb 1064 9cbf"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "http"),
+				pair(":path", "/"),
+				pair(":authority", "www.example.com"),
+				pair("cache-control", "no-cache"),
+			},
+			[]HeaderField{
+				pair("cache-control", "no-cache"),
+				pair(":authority", "www.example.com"),
+			},
+			110,
+		},
+		{dehex("8287 85bf 4088 25a8 49e9 5ba9 7d7f 8925 a849 e95b b8e8 b4bf"),
+			[]HeaderField{
+				pair(":method", "GET"),
+				pair(":scheme", "https"),
+				pair(":path", "/index.html"),
+				pair(":authority", "www.example.com"),
+				pair("custom-key", "custom-value"),
+			},
+			[]HeaderField{
+				pair("custom-key", "custom-value"),
+				pair("cache-control", "no-cache"),
+				pair(":authority", "www.example.com"),
+			},
+			164,
+		},
+	})
+}
+
+// http://http2.github.io/http2-spec/compression.html#rfc.section.C.5
+// "This section shows several consecutive header lists, corresponding
+// to HTTP responses, on the same connection. The HTTP/2 setting
+// parameter SETTINGS_HEADER_TABLE_SIZE is set to the value of 256
+// octets, causing some evictions to occur."
+func TestDecodeC5_ResponsesNoHuff(t *testing.T) {
+	testDecodeSeries(t, 256, []encAndWant{
+		{dehex(`
+4803 3330 3258 0770 7269 7661 7465 611d
+4d6f 6e2c 2032 3120 4f63 7420 3230 3133
+2032 303a 3133 3a32 3120 474d 546e 1768
+7474 7073 3a2f 2f77 7777 2e65 7861 6d70
+6c65 2e63 6f6d
+`),
+			[]HeaderField{
+				pair(":status", "302"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("location", "https://www.example.com"),
+			},
+			[]HeaderField{
+				pair("location", "https://www.example.com"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("cache-control", "private"),
+				pair(":status", "302"),
+			},
+			222,
+		},
+		{dehex("4803 3330 37c1 c0bf"),
+			[]HeaderField{
+				pair(":status", "307"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("location", "https://www.example.com"),
+			},
+			[]HeaderField{
+				pair(":status", "307"),
+				pair("location", "https://www.example.com"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("cache-control", "private"),
+			},
+			222,
+		},
+		{dehex(`
+88c1 611d 4d6f 6e2c 2032 3120 4f63 7420
+3230 3133 2032 303a 3133 3a32 3220 474d
+54c0 5a04 677a 6970 7738 666f 6f3d 4153
+444a 4b48 514b 425a 584f 5157 454f 5049
+5541 5851 5745 4f49 553b 206d 6178 2d61
+6765 3d33 3630 303b 2076 6572 7369 6f6e
+3d31
+`),
+			[]HeaderField{
+				pair(":status", "200"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:22 GMT"),
+				pair("location", "https://www.example.com"),
+				pair("content-encoding", "gzip"),
+				pair("set-cookie", "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"),
+			},
+			[]HeaderField{
+				pair("set-cookie", "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"),
+				pair("content-encoding", "gzip"),
+				pair("date", "Mon, 21 Oct 2013 20:13:22 GMT"),
+			},
+			215,
+		},
+	})
+}
+
+// http://http2.github.io/http2-spec/compression.html#rfc.section.C.6
+// "This section shows the same examples as the previous section, but
+// using Huffman encoding for the literal values. The HTTP/2 setting
+// parameter SETTINGS_HEADER_TABLE_SIZE is set to the value of 256
+// octets, causing some evictions to occur. The eviction mechanism
+// uses the length of the decoded literal values, so the same
+// evictions occurs as in the previous section."
+func TestDecodeC6_ResponsesHuffman(t *testing.T) {
+	testDecodeSeries(t, 256, []encAndWant{
+		{dehex(`
+4882 6402 5885 aec3 771a 4b61 96d0 7abe
+9410 54d4 44a8 2005 9504 0b81 66e0 82a6
+2d1b ff6e 919d 29ad 1718 63c7 8f0b 97c8
+e9ae 82ae 43d3
+`),
+			[]HeaderField{
+				pair(":status", "302"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("location", "https://www.example.com"),
+			},
+			[]HeaderField{
+				pair("location", "https://www.example.com"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("cache-control", "private"),
+				pair(":status", "302"),
+			},
+			222,
+		},
+		{dehex("4883 640e ffc1 c0bf"),
+			[]HeaderField{
+				pair(":status", "307"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("location", "https://www.example.com"),
+			},
+			[]HeaderField{
+				pair(":status", "307"),
+				pair("location", "https://www.example.com"),
+				pair("date", "Mon, 21 Oct 2013 20:13:21 GMT"),
+				pair("cache-control", "private"),
+			},
+			222,
+		},
+		{dehex(`
+88c1 6196 d07a be94 1054 d444 a820 0595
+040b 8166 e084 a62d 1bff c05a 839b d9ab
+77ad 94e7 821d d7f2 e6c7 b335 dfdf cd5b
+3960 d5af 2708 7f36 72c1 ab27 0fb5 291f
+9587 3160 65c0 03ed 4ee5 b106 3d50 07
+`),
+			[]HeaderField{
+				pair(":status", "200"),
+				pair("cache-control", "private"),
+				pair("date", "Mon, 21 Oct 2013 20:13:22 GMT"),
+				pair("location", "https://www.example.com"),
+				pair("content-encoding", "gzip"),
+				pair("set-cookie", "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"),
+			},
+			[]HeaderField{
+				pair("set-cookie", "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"),
+				pair("content-encoding", "gzip"),
+				pair("date", "Mon, 21 Oct 2013 20:13:22 GMT"),
+			},
+			215,
+		},
+	})
+}
+
+func testDecodeSeries(t *testing.T, size uint32, steps []encAndWant) {
+	d := NewDecoder(size, nil)
+	for i, step := range steps {
+		hf, err := d.DecodeFull(step.enc)
+		if err != nil {
+			t.Fatalf("Error at step index %d: %v", i, err)
+		}
+		if !reflect.DeepEqual(hf, step.want) {
+			t.Fatalf("At step index %d: Got headers %v; want %v", i, hf, step.want)
+		}
+		gotDynTab := d.dynTab.reverseCopy()
+		if !reflect.DeepEqual(gotDynTab, step.wantDynTab) {
+			t.Errorf("After step index %d, dynamic table = %v; want %v", i, gotDynTab, step.wantDynTab)
+		}
+		if d.dynTab.size != step.wantDynSize {
+			t.Errorf("After step index %d, dynamic table size = %v; want %v", i, d.dynTab.size, step.wantDynSize)
+		}
+	}
+}
+
+func TestHuffmanDecode(t *testing.T) {
+	tests := []struct {
+		inHex, want string
+	}{
+		{"f1e3 c2e5 f23a 6ba0 ab90 f4ff", "www.example.com"},
+		{"a8eb 1064 9cbf", "no-cache"},
+		{"25a8 49e9 5ba9 7d7f", "custom-key"},
+		{"25a8 49e9 5bb8 e8b4 bf", "custom-value"},
+		{"6402", "302"},
+		{"aec3 771a 4b", "private"},
+		{"d07a be94 1054 d444 a820 0595 040b 8166 e082 a62d 1bff", "Mon, 21 Oct 2013 20:13:21 GMT"},
+		{"9d29 ad17 1863 c78f 0b97 c8e9 ae82 ae43 d3", "https://www.example.com"},
+		{"9bd9 ab", "gzip"},
+		{"94e7 821d d7f2 e6c7 b335 dfdf cd5b 3960 d5af 2708 7f36 72c1 ab27 0fb5 291f 9587 3160 65c0 03ed 4ee5 b106 3d50 07",
+			"foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"},
+	}
+	for i, tt := range tests {
+		var buf bytes.Buffer
+		in, err := hex.DecodeString(strings.Replace(tt.inHex, " ", "", -1))
+		if err != nil {
+			t.Errorf("%d. hex input error: %v", i, err)
+			continue
+		}
+		if _, err := HuffmanDecode(&buf, in); err != nil {
+			t.Errorf("%d. decode error: %v", i, err)
+			continue
+		}
+		if got := buf.String(); tt.want != got {
+			t.Errorf("%d. decode = %q; want %q", i, got, tt.want)
+		}
+	}
+}
+
+func TestAppendHuffmanString(t *testing.T) {
+	tests := []struct {
+		in, want string
+	}{
+		{"www.example.com", "f1e3 c2e5 f23a 6ba0 ab90 f4ff"},
+		{"no-cache", "a8eb 1064 9cbf"},
+		{"custom-key", "25a8 49e9 5ba9 7d7f"},
+		{"custom-value", "25a8 49e9 5bb8 e8b4 bf"},
+		{"302", "6402"},
+		{"private", "aec3 771a 4b"},
+		{"Mon, 21 Oct 2013 20:13:21 GMT", "d07a be94 1054 d444 a820 0595 040b 8166 e082 a62d 1bff"},
+		{"https://www.example.com", "9d29 ad17 1863 c78f 0b97 c8e9 ae82 ae43 d3"},
+		{"gzip", "9bd9 ab"},
+		{"foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1",
+			"94e7 821d d7f2 e6c7 b335 dfdf cd5b 3960 d5af 2708 7f36 72c1 ab27 0fb5 291f 9587 3160 65c0 03ed 4ee5 b106 3d50 07"},
+	}
+	for i, tt := range tests {
+		buf := []byte{}
+		want := strings.Replace(tt.want, " ", "", -1)
+		buf = AppendHuffmanString(buf, tt.in)
+		if got := hex.EncodeToString(buf); want != got {
+			t.Errorf("%d. encode = %q; want %q", i, got, want)
+		}
+	}
+}
+
+func TestHuffmanMaxStrLen(t *testing.T) {
+	const msg = "Some string"
+	huff := AppendHuffmanString(nil, msg)
+
+	testGood := func(max int) {
+		var out bytes.Buffer
+		if err := huffmanDecode(&out, max, huff); err != nil {
+			t.Errorf("For maxLen=%d, unexpected error: %v", max, err)
+		}
+		if out.String() != msg {
+			t.Errorf("For maxLen=%d, out = %q; want %q", max, out.String(), msg)
+		}
+	}
+	testGood(0)
+	testGood(len(msg))
+	testGood(len(msg) + 1)
+
+	var out bytes.Buffer
+	if err := huffmanDecode(&out, len(msg)-1, huff); err != ErrStringLength {
+		t.Errorf("err = %v; want ErrStringLength", err)
+	}
+}
+
+func TestHuffmanRoundtripStress(t *testing.T) {
+	const Len = 50 // of uncompressed string
+	input := make([]byte, Len)
+	var output bytes.Buffer
+	var huff []byte
+
+	n := 5000
+	if testing.Short() {
+		n = 100
+	}
+	seed := time.Now().UnixNano()
+	t.Logf("Seed = %v", seed)
+	src := rand.New(rand.NewSource(seed))
+	var encSize int64
+	for i := 0; i < n; i++ {
+		for l := range input {
+			input[l] = byte(src.Intn(256))
+		}
+		huff = AppendHuffmanString(huff[:0], string(input))
+		encSize += int64(len(huff))
+		output.Reset()
+		if err := huffmanDecode(&output, 0, huff); err != nil {
+			t.Errorf("Failed to decode %q -> %q -> error %v", input, huff, err)
+			continue
+		}
+		if !bytes.Equal(output.Bytes(), input) {
+			t.Errorf("Roundtrip failure on %q -> %q -> %q", input, huff, output.Bytes())
+		}
+	}
+	t.Logf("Compressed size of original: %0.02f%% (%v -> %v)", 100*(float64(encSize)/(Len*float64(n))), Len*n, encSize)
+}
+
+func TestHuffmanDecodeFuzz(t *testing.T) {
+	const Len = 50 // of compressed
+	var buf, zbuf bytes.Buffer
+
+	n := 5000
+	if testing.Short() {
+		n = 100
+	}
+	seed := time.Now().UnixNano()
+	t.Logf("Seed = %v", seed)
+	src := rand.New(rand.NewSource(seed))
+	numFail := 0
+	for i := 0; i < n; i++ {
+		zbuf.Reset()
+		if i == 0 {
+			// Start with at least one invalid one.
+			zbuf.WriteString("00\x91\xff\xff\xff\xff\xc8")
+		} else {
+			for l := 0; l < Len; l++ {
+				zbuf.WriteByte(byte(src.Intn(256)))
+			}
+		}
+
+		buf.Reset()
+		if err := huffmanDecode(&buf, 0, zbuf.Bytes()); err != nil {
+			if err == ErrInvalidHuffman {
+				numFail++
+				continue
+			}
+			t.Errorf("Failed to decode %q: %v", zbuf.Bytes(), err)
+			continue
+		}
+	}
+	t.Logf("%0.02f%% are invalid (%d / %d)", 100*float64(numFail)/float64(n), numFail, n)
+	if numFail < 1 {
+		t.Error("expected at least one invalid huffman encoding (test starts with one)")
+	}
+}
+
+func TestReadVarInt(t *testing.T) {
+	type res struct {
+		i        uint64
+		consumed int
+		err      error
+	}
+	tests := []struct {
+		n    byte
+		p    []byte
+		want res
+	}{
+		// Fits in a byte:
+		{1, []byte{0}, res{0, 1, nil}},
+		{2, []byte{2}, res{2, 1, nil}},
+		{3, []byte{6}, res{6, 1, nil}},
+		{4, []byte{14}, res{14, 1, nil}},
+		{5, []byte{30}, res{30, 1, nil}},
+		{6, []byte{62}, res{62, 1, nil}},
+		{7, []byte{126}, res{126, 1, nil}},
+		{8, []byte{254}, res{254, 1, nil}},
+
+		// Doesn't fit in a byte:
+		{1, []byte{1}, res{0, 0, errNeedMore}},
+		{2, []byte{3}, res{0, 0, errNeedMore}},
+		{3, []byte{7}, res{0, 0, errNeedMore}},
+		{4, []byte{15}, res{0, 0, errNeedMore}},
+		{5, []byte{31}, res{0, 0, errNeedMore}},
+		{6, []byte{63}, res{0, 0, errNeedMore}},
+		{7, []byte{127}, res{0, 0, errNeedMore}},
+		{8, []byte{255}, res{0, 0, errNeedMore}},
+
+		// Ignoring top bits:
+		{5, []byte{255, 154, 10}, res{1337, 3, nil}}, // high dummy three bits: 111
+		{5, []byte{159, 154, 10}, res{1337, 3, nil}}, // high dummy three bits: 100
+		{5, []byte{191, 154, 10}, res{1337, 3, nil}}, // high dummy three bits: 101
+
+		// Extra byte:
+		{5, []byte{191, 154, 10, 2}, res{1337, 3, nil}}, // extra byte
+
+		// Short a byte:
+		{5, []byte{191, 154}, res{0, 0, errNeedMore}},
+
+		// integer overflow:
+		{1, []byte{255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, res{0, 0, errVarintOverflow}},
+	}
+	for _, tt := range tests {
+		i, remain, err := readVarInt(tt.n, tt.p)
+		consumed := len(tt.p) - len(remain)
+		got := res{i, consumed, err}
+		if got != tt.want {
+			t.Errorf("readVarInt(%d, %v ~ %x) = %+v; want %+v", tt.n, tt.p, tt.p, got, tt.want)
+		}
+	}
+}
+
+// Fuzz crash, originally reported at https://github.com/bradfitz/http2/issues/56
+func TestHuffmanFuzzCrash(t *testing.T) {
+	got, err := HuffmanDecodeToString([]byte("00\x91\xff\xff\xff\xff\xc8"))
+	if got != "" {
+		t.Errorf("Got %q; want empty string", got)
+	}
+	if err != ErrInvalidHuffman {
+		t.Errorf("Err = %v; want ErrInvalidHuffman", err)
+	}
+}
+
+func dehex(s string) []byte {
+	s = strings.Replace(s, " ", "", -1)
+	s = strings.Replace(s, "\n", "", -1)
+	b, err := hex.DecodeString(s)
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+func TestEmitEnabled(t *testing.T) {
+	var buf bytes.Buffer
+	enc := NewEncoder(&buf)
+	enc.WriteField(HeaderField{Name: "foo", Value: "bar"})
+	enc.WriteField(HeaderField{Name: "foo", Value: "bar"})
+
+	numCallback := 0
+	var dec *Decoder
+	dec = NewDecoder(8<<20, func(HeaderField) {
+		numCallback++
+		dec.SetEmitEnabled(false)
+	})
+	if !dec.EmitEnabled() {
+		t.Errorf("initial emit enabled = false; want true")
+	}
+	if _, err := dec.Write(buf.Bytes()); err != nil {
+		t.Error(err)
+	}
+	if numCallback != 1 {
+		t.Errorf("num callbacks = %d; want 1", numCallback)
+	}
+	if dec.EmitEnabled() {
+		t.Errorf("emit enabled = true; want false")
+	}
+}
+
+func TestSaveBufLimit(t *testing.T) {
+	const maxStr = 1 << 10
+	var got []HeaderField
+	dec := NewDecoder(initialHeaderTableSize, func(hf HeaderField) {
+		got = append(got, hf)
+	})
+	dec.SetMaxStringLength(maxStr)
+	var frag []byte
+	frag = append(frag[:0], encodeTypeByte(false, false))
+	frag = appendVarInt(frag, 7, 3)
+	frag = append(frag, "foo"...)
+	frag = appendVarInt(frag, 7, 3)
+	frag = append(frag, "bar"...)
+
+	if _, err := dec.Write(frag); err != nil {
+		t.Fatal(err)
+	}
+
+	want := []HeaderField{{Name: "foo", Value: "bar"}}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("After small writes, got %v; want %v", got, want)
+	}
+
+	frag = append(frag[:0], encodeTypeByte(false, false))
+	frag = appendVarInt(frag, 7, maxStr*3)
+	frag = append(frag, make([]byte, maxStr*3)...)
+
+	_, err := dec.Write(frag)
+	if err != ErrStringLength {
+		t.Fatalf("Write error = %v; want ErrStringLength", err)
+	}
+}
diff --git a/src/vendor/golang.org/x/net/http2/hpack/huffman.go b/src/vendor/golang.org/x/net/http2/hpack/huffman.go
new file mode 100644
index 0000000..eb4b1f0
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/huffman.go
@@ -0,0 +1,190 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package hpack
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"sync"
+)
+
+var bufPool = sync.Pool{
+	New: func() interface{} { return new(bytes.Buffer) },
+}
+
+// HuffmanDecode decodes the string in v and writes the expanded
+// result to w, returning the number of bytes written to w and the
+// Write call's return value. At most one Write call is made.
+func HuffmanDecode(w io.Writer, v []byte) (int, error) {
+	buf := bufPool.Get().(*bytes.Buffer)
+	buf.Reset()
+	defer bufPool.Put(buf)
+	if err := huffmanDecode(buf, 0, v); err != nil {
+		return 0, err
+	}
+	return w.Write(buf.Bytes())
+}
+
+// HuffmanDecodeToString decodes the string in v.
+func HuffmanDecodeToString(v []byte) (string, error) {
+	buf := bufPool.Get().(*bytes.Buffer)
+	buf.Reset()
+	defer bufPool.Put(buf)
+	if err := huffmanDecode(buf, 0, v); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
+
+// ErrInvalidHuffman is returned for errors found decoding
+// Huffman-encoded strings.
+var ErrInvalidHuffman = errors.New("hpack: invalid Huffman-encoded data")
+
+// huffmanDecode decodes v to buf.
+// If maxLen is greater than 0, attempts to write more to buf than
+// maxLen bytes will return ErrStringLength.
+func huffmanDecode(buf *bytes.Buffer, maxLen int, v []byte) error {
+	n := rootHuffmanNode
+	cur, nbits := uint(0), uint8(0)
+	for _, b := range v {
+		cur = cur<<8 | uint(b)
+		nbits += 8
+		for nbits >= 8 {
+			idx := byte(cur >> (nbits - 8))
+			n = n.children[idx]
+			if n == nil {
+				return ErrInvalidHuffman
+			}
+			if n.children == nil {
+				if maxLen != 0 && buf.Len() == maxLen {
+					return ErrStringLength
+				}
+				buf.WriteByte(n.sym)
+				nbits -= n.codeLen
+				n = rootHuffmanNode
+			} else {
+				nbits -= 8
+			}
+		}
+	}
+	for nbits > 0 {
+		n = n.children[byte(cur<<(8-nbits))]
+		if n.children != nil || n.codeLen > nbits {
+			break
+		}
+		buf.WriteByte(n.sym)
+		nbits -= n.codeLen
+		n = rootHuffmanNode
+	}
+	return nil
+}
+
+type node struct {
+	// children is non-nil for internal nodes
+	children []*node
+
+	// The following are only valid if children is nil:
+	codeLen uint8 // number of bits that led to the output of sym
+	sym     byte  // output symbol
+}
+
+func newInternalNode() *node {
+	return &node{children: make([]*node, 256)}
+}
+
+var rootHuffmanNode = newInternalNode()
+
+func init() {
+	if len(huffmanCodes) != 256 {
+		panic("unexpected size")
+	}
+	for i, code := range huffmanCodes {
+		addDecoderNode(byte(i), code, huffmanCodeLen[i])
+	}
+}
+
+func addDecoderNode(sym byte, code uint32, codeLen uint8) {
+	cur := rootHuffmanNode
+	for codeLen > 8 {
+		codeLen -= 8
+		i := uint8(code >> codeLen)
+		if cur.children[i] == nil {
+			cur.children[i] = newInternalNode()
+		}
+		cur = cur.children[i]
+	}
+	shift := 8 - codeLen
+	start, end := int(uint8(code<<shift)), int(1<<shift)
+	for i := start; i < start+end; i++ {
+		cur.children[i] = &node{sym: sym, codeLen: codeLen}
+	}
+}
+
+// AppendHuffmanString appends s, as encoded in Huffman codes, to dst
+// and returns the extended buffer.
+func AppendHuffmanString(dst []byte, s string) []byte {
+	rembits := uint8(8)
+
+	for i := 0; i < len(s); i++ {
+		if rembits == 8 {
+			dst = append(dst, 0)
+		}
+		dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
+	}
+
+	if rembits < 8 {
+		// special EOS symbol
+		code := uint32(0x3fffffff)
+		nbits := uint8(30)
+
+		t := uint8(code >> (nbits - rembits))
+		dst[len(dst)-1] |= t
+	}
+
+	return dst
+}
+
+// HuffmanEncodeLength returns the number of bytes required to encode
+// s in Huffman codes. The result is round up to byte boundary.
+func HuffmanEncodeLength(s string) uint64 {
+	n := uint64(0)
+	for i := 0; i < len(s); i++ {
+		n += uint64(huffmanCodeLen[s[i]])
+	}
+	return (n + 7) / 8
+}
+
+// appendByteToHuffmanCode appends Huffman code for c to dst and
+// returns the extended buffer and the remaining bits in the last
+// element. The appending is not byte aligned and the remaining bits
+// in the last element of dst is given in rembits.
+func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
+	code := huffmanCodes[c]
+	nbits := huffmanCodeLen[c]
+
+	for {
+		if rembits > nbits {
+			t := uint8(code << (rembits - nbits))
+			dst[len(dst)-1] |= t
+			rembits -= nbits
+			break
+		}
+
+		t := uint8(code >> (nbits - rembits))
+		dst[len(dst)-1] |= t
+
+		nbits -= rembits
+		rembits = 8
+
+		if nbits == 0 {
+			break
+		}
+
+		dst = append(dst, 0)
+	}
+
+	return dst, rembits
+}
diff --git a/src/vendor/golang.org/x/net/http2/hpack/tables.go b/src/vendor/golang.org/x/net/http2/hpack/tables.go
new file mode 100644
index 0000000..b9283a0
--- /dev/null
+++ b/src/vendor/golang.org/x/net/http2/hpack/tables.go
@@ -0,0 +1,352 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package hpack
+
+func pair(name, value string) HeaderField {
+	return HeaderField{Name: name, Value: value}
+}
+
+// http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-07#appendix-B
+var staticTable = [...]HeaderField{
+	pair(":authority", ""), // index 1 (1-based)
+	pair(":method", "GET"),
+	pair(":method", "POST"),
+	pair(":path", "/"),
+	pair(":path", "/index.html"),
+	pair(":scheme", "http"),
+	pair(":scheme", "https"),
+	pair(":status", "200"),
+	pair(":status", "204"),
+	pair(":status", "206"),
+	pair(":status", "304"),
+	pair(":status", "400"),
+	pair(":status", "404"),
+	pair(":status", "500"),
+	pair("accept-charset", ""),
+	pair("accept-encoding", "gzip, deflate"),
+	pair("accept-language", ""),
+	pair("accept-ranges", ""),
+	pair("accept", ""),
+	pair("access-control-allow-origin", ""),
+	pair("age", ""),
+	pair("allow", ""),
+	pair("authorization", ""),
+	pair("cache-control", ""),
+	pair("content-disposition", ""),
+	pair("content-encoding", ""),
+	pair("content-language", ""),
+	pair("content-length", ""),
+	pair("content-location", ""),
+	pair("content-range", ""),
+	pair("content-type", ""),
+	pair("cookie", ""),
+	pair("date", ""),
+	pair("etag", ""),
+	pair("expect", ""),
+	pair("expires", ""),
+	pair("from", ""),
+	pair("host", ""),
+	pair("if-match", ""),
+	pair("if-modified-since", ""),
+	pair("if-none-match", ""),
+	pair("if-range", ""),
+	pair("if-unmodified-since", ""),
+	pair("last-modified", ""),
+	pair("link", ""),
+	pair("location", ""),
+	pair("max-forwards", ""),
+	pair("proxy-authenticate", ""),
+	pair("proxy-authorization", ""),
+	pair("range", ""),
+	pair("referer", ""),
+	pair("refresh", ""),
+	pair("retry-after", ""),
+	pair("server", ""),
+	pair("set-cookie", ""),
+	pair("strict-transport-security", ""),
+	pair("transfer-encoding", ""),
+	pair("user-agent", ""),
+	pair("vary", ""),
+	pair("via", ""),
+	pair("www-authenticate", ""),
+}
+
+var huffmanCodes = [256]uint32{
+	0x1ff8,
+	0x7fffd8,
+	0xfffffe2,
+	0xfffffe3,
+	0xfffffe4,
+	0xfffffe5,
+	0xfffffe6,
+	0xfffffe7,
+	0xfffffe8,
+	0xffffea,
+	0x3ffffffc,
+	0xfffffe9,
+	0xfffffea,
+	0x3ffffffd,
+	0xfffffeb,
+	0xfffffec,
+	0xfffffed,
+	0xfffffee,
+	0xfffffef,
+	0xffffff0,
+	0xffffff1,
+	0xffffff2,
+	0x3ffffffe,
+	0xffffff3,
+	0xffffff4,
+	0xffffff5,
+	0xffffff6,
+	0xffffff7,
+	0xffffff8,
+	0xffffff9,
+	0xffffffa,
+	0xffffffb,
+	0x14,
+	0x3f8,
+	0x3f9,
+	0xffa,
+	0x1ff9,
+	0x15,
+	0xf8,
+	0x7fa,
+	0x3fa,
+	0x3fb,
+	0xf9,
+	0x7fb,
+	0xfa,
+	0x16,
+	0x17,
+	0x18,
+	0x0,
+	0x1,
+	0x2,
+	0x19,
+	0x1a,
+	0x1b,
+	0x1c,
+	0x1d,
+	0x1e,
+	0x1f,
+	0x5c,
+	0xfb,
+	0x7ffc,
+	0x20,
+	0xffb,
+	0x3fc,
+	0x1ffa,
+	0x21,
+	0x5d,
+	0x5e,
+	0x5f,
+	0x60,
+	0x61,
+	0x62,
+	0x63,
+	0x64,
+	0x65,
+	0x66,
+	0x67,
+	0x68,
+	0x69,
+	0x6a,
+	0x6b,
+	0x6c,
+	0x6d,
+	0x6e,
+	0x6f,
+	0x70,
+	0x71,
+	0x72,
+	0xfc,
+	0x73,
+	0xfd,
+	0x1ffb,
+	0x7fff0,
+	0x1ffc,
+	0x3ffc,
+	0x22,
+	0x7ffd,
+	0x3,
+	0x23,
+	0x4,
+	0x24,
+	0x5,
+	0x25,
+	0x26,
+	0x27,
+	0x6,
+	0x74,
+	0x75,
+	0x28,
+	0x29,
+	0x2a,
+	0x7,
+	0x2b,
+	0x76,
+	0x2c,
+	0x8,
+	0x9,
+	0x2d,
+	0x77,
+	0x78,
+	0x79,
+	0x7a,
+	0x7b,
+	0x7ffe,
+	0x7fc,
+	0x3ffd,
+	0x1ffd,
+	0xffffffc,
+	0xfffe6,
+	0x3fffd2,
+	0xfffe7,
+	0xfffe8,
+	0x3fffd3,
+	0x3fffd4,
+	0x3fffd5,
+	0x7fffd9,
+	0x3fffd6,
+	0x7fffda,
+	0x7fffdb,
+	0x7fffdc,
+	0x7fffdd,
+	0x7fffde,
+	0xffffeb,
+	0x7fffdf,
+	0xffffec,
+	0xffffed,
+	0x3fffd7,
+	0x7fffe0,
+	0xffffee,
+	0x7fffe1,
+	0x7fffe2,
+	0x7fffe3,
+	0x7fffe4,
+	0x1fffdc,
+	0x3fffd8,
+	0x7fffe5,
+	0x3fffd9,
+	0x7fffe6,
+	0x7fffe7,
+	0xffffef,
+	0x3fffda,
+	0x1fffdd,
+	0xfffe9,
+	0x3fffdb,
+	0x3fffdc,
+	0x7fffe8,
+	0x7fffe9,
+	0x1fffde,
+	0x7fffea,
+	0x3fffdd,
+	0x3fffde,
+	0xfffff0,
+	0x1fffdf,
+	0x3fffdf,
+	0x7fffeb,
+	0x7fffec,
+	0x1fffe0,
+	0x1fffe1,
+	0x3fffe0,
+	0x1fffe2,
+	0x7fffed,
+	0x3fffe1,
+	0x7fffee,
+	0x7fffef,
+	0xfffea,
+	0x3fffe2,
+	0x3fffe3,
+	0x3fffe4,
+	0x7ffff0,
+	0x3fffe5,
+	0x3fffe6,
+	0x7ffff1,
+	0x3ffffe0,
+	0x3ffffe1,
+	0xfffeb,
+	0x7fff1,
+	0x3fffe7,
+	0x7ffff2,
+	0x3fffe8,
+	0x1ffffec,
+	0x3ffffe2,
+	0x3ffffe3,
+	0x3ffffe4,
+	0x7ffffde,
+	0x7ffffdf,
+	0x3ffffe5,
+	0xfffff1,
+	0x1ffffed,
+	0x7fff2,
+	0x1fffe3,
+	0x3ffffe6,
+	0x7ffffe0,
+	0x7ffffe1,
+	0x3ffffe7,
+	0x7ffffe2,
+	0xfffff2,
+	0x1fffe4,
+	0x1fffe5,
+	0x3ffffe8,
+	0x3ffffe9,
+	0xffffffd,
+	0x7ffffe3,
+	0x7ffffe4,
+	0x7ffffe5,
+	0xfffec,
+	0xfffff3,
+	0xfffed,
+	0x1fffe6,
+	0x3fffe9,
+	0x1fffe7,
+	0x1fffe8,
+	0x7ffff3,
+	0x3fffea,
+	0x3fffeb,
+	0x1ffffee,
+	0x1ffffef,
+	0xfffff4,
+	0xfffff5,
+	0x3ffffea,
+	0x7ffff4,
+	0x3ffffeb,
+	0x7ffffe6,
+	0x3ffffec,
+	0x3ffffed,
+	0x7ffffe7,
+	0x7ffffe8,
+	0x7ffffe9,
+	0x7ffffea,
+	0x7ffffeb,
+	0xffffffe,
+	0x7ffffec,
+	0x7ffffed,
+	0x7ffffee,
+	0x7ffffef,
+	0x7fffff0,
+	0x3ffffee,
+}
+
+var huffmanCodeLen = [256]uint8{
+	13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28,
+	28, 28, 28, 28, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	6, 10, 10, 12, 13, 6, 8, 11, 10, 10, 8, 11, 8, 6, 6, 6,
+	5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6, 12, 10,
+	13, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 8, 13, 19, 13, 14, 6,
+	15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6, 6, 5,
+	6, 7, 6, 5, 5, 6, 7, 7, 7, 7, 7, 15, 11, 14, 13, 28,
+	20, 22, 20, 20, 22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23,
+	24, 24, 22, 23, 24, 23, 23, 23, 23, 21, 22, 23, 22, 23, 23, 24,
+	22, 21, 20, 22, 22, 23, 23, 21, 23, 22, 22, 24, 21, 22, 23, 23,
+	21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23, 22, 22, 23,
+	26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25,
+	19, 21, 26, 27, 27, 26, 27, 24, 21, 21, 26, 26, 28, 27, 27, 27,
+	20, 24, 20, 21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23,
+	26, 27, 26, 26, 27, 27, 27, 27, 27, 28, 27, 27, 27, 27, 27, 26,
+}