all: merge master into release-branch.go1.7

Change-Id: Ifb9647fa9817ed57aa4835a35a05020aba00a24e
diff --git a/doc/asm.html b/doc/asm.html
index 392af17..3e03c54 100644
--- a/doc/asm.html
+++ b/doc/asm.html
@@ -780,6 +780,64 @@
 
 </ul>
 
+<h3 id="s390x">IBM z/Architecture, a.k.a. s390x</h3>
+
+<p>
+The registers <code>R10</code> and <code>R11</code> are reserved.
+The assembler uses them to hold temporary values when assembling some instructions.
+</p>
+
+<p>
+<code>R13</code> points to the <code>g</code> (goroutine) structure.
+This register must be referred to as <code>g</code>; the name <code>R13</code> is not recognized.
+</p>
+
+<p>
+<code>R15</code> points to the stack frame and should typically only be accessed using the
+virtual registers <code>SP</code> and <code>FP</code>.
+</p>
+
+<p>
+Load- and store-multiple instructions operate on a range of registers.
+The range of registers is specified by a start register and an end register.
+For example, <code>LMG</code> <code>(R9),</code> <code>R5,</code> <code>R7</code> would load
+<code>R5</code>, <code>R6</code> and <code>R7</code> with the 64-bit values at
+<code>0(R9)</code>, <code>8(R9)</code> and <code>16(R9)</code> respectively.
+</p>
+
+<p>
+Storage-and-storage instructions such as <code>MVC</code> and <code>XC</code> are written
+with the length as the first argument.
+For example, <code>XC</code> <code>$8,</code> <code>(R9),</code> <code>(R9)</code> would clear
+eight bytes at the address specified in <code>R9</code>.
+</p>
+
+<p>
+If a vector instruction takes a length or an index as an argument then it will be the
+first argument.
+For example, <code>VLEIF</code> <code>$1,</code> <code>$16,</code> <code>V2</code> will load
+the value sixteen into index one of <code>V2</code>.
+Care should be taken when using vector instructions to ensure that they are available at
+runtime.
+To use vector instructions a machine must have both the vector facility (bit 129 in the
+facility list) and kernel support.
+Without kernel support a vector instruction will have no effect (it will be equivalent
+to a <code>NOP</code> instruction).
+</p>
+
+<p>
+Addressing modes:
+</p>
+
+<ul>
+
+<li>
+<code>(R5)(R6*1)</code>: The location at <code>R5</code> plus <code>R6</code>.
+It is a scaled mode as on the x86, but the only scale allowed is <code>1</code>.
+</li>
+
+</ul>
+
 <h3 id="unsupported_opcodes">Unsupported opcodes</h3>
 
 <p>
diff --git a/doc/go1.7.html b/doc/go1.7.html
index cf5d8a4..7e17ff2 100644
--- a/doc/go1.7.html
+++ b/doc/go1.7.html
@@ -102,6 +102,17 @@
 The OpenBSD port now requires OpenBSD 5.6 or later, for access to the <a href="http://man.openbsd.org/getentropy.2"><i>getentropy</i>(2)</a> system call.
 </p>
 
+<h3 id="known_issues">Known Issues</h3>
+
+<p>
+There are some instabilities on FreeBSD that are known but not understood.
+These can lead to program crashes in rare cases.
+See <a href="https://golang.org/issue/16136">issue 16136</a>,
+<a href="https://golang.org/issue/15658">issue 15658</a>,
+and <a href="https://golang.org/issue/16396">issue 16396</a>.
+Any help in solving these FreeBSD-specific issues would be appreciated.
+</p>
+
 <h2 id="tools">Tools</h2>
 
 <h3 id="cmd_asm">Assembler</h3>
@@ -906,6 +917,12 @@
 </p>
 
 <p>
+The server's <a href="/pkg/net/http/#Server.Serve"><code>Serve</code></a> method
+now only enables HTTP/2 support if the <code>Server.TLSConfig</code> field is <code>nil</code>
+or includes <code>"h2"</code> in its <code>TLSConfig.NextProto</code>.
+</p>
+
+<p>
 The server implementation now
 pads response codes less than 100 to three digits
 as required by the protocol,
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 756bcec..0350c29 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -156,6 +156,36 @@
 	return p
 }
 
+// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
+// See runtime/mkduff.go.
+func duffStart(size int64) int64 {
+	x, _ := duff(size)
+	return x
+}
+func duffAdj(size int64) int64 {
+	_, x := duff(size)
+	return x
+}
+
+// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
+// required to use the duffzero mechanism for a block of the given size.
+func duff(size int64) (int64, int64) {
+	if size < 32 || size > 1024 || size%dzClearStep != 0 {
+		panic("bad duffzero size")
+	}
+	steps := size / dzClearStep
+	blocks := steps / dzBlockLen
+	steps %= dzBlockLen
+	off := dzBlockSize * (dzBlocks - blocks)
+	var adj int64
+	if steps != 0 {
+		off -= dzAddSize
+		off -= dzMovSize * steps
+		adj -= dzClearStep * (dzBlockLen - steps)
+	}
+	return off, adj
+}
+
 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	s.SetLineno(v.Line)
 	switch v.Op {
@@ -649,10 +679,20 @@
 		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
 		opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0]))
 	case ssa.OpAMD64DUFFZERO:
-		p := gc.Prog(obj.ADUFFZERO)
+		off := duffStart(v.AuxInt)
+		adj := duffAdj(v.AuxInt)
+		var p *obj.Prog
+		if adj != 0 {
+			p = gc.Prog(x86.AADDQ)
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = adj
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = x86.REG_DI
+		}
+		p = gc.Prog(obj.ADUFFZERO)
 		p.To.Type = obj.TYPE_ADDR
 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
-		p.To.Offset = v.AuxInt
+		p.To.Offset = off
 	case ssa.OpAMD64MOVOconst:
 		if v.AuxInt != 0 {
 			v.Unimplementedf("MOVOconst can only do constant=0")
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index db274d7..d27eff0 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -389,7 +389,7 @@
 (Zero [size] destptr mem) && size <= 1024 && size%8 == 0 && size%16 != 0 && !config.noDuffDevice ->
 	(Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
 (Zero [size] destptr mem) && size <= 1024 && size%16 == 0 && !config.noDuffDevice ->
-	(DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVOconst [0]) mem)
+	(DUFFZERO [size] destptr (MOVOconst [0]) mem)
 
 // Large zeroing uses REP STOSQ.
 (Zero [size] destptr mem) && (size > 1024 || (config.noDuffDevice && size > 32)) && size%8 == 0 ->
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index b684b9c..43cc0eb 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -425,10 +425,10 @@
 		{name: "MOVQstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem"}, // store 8 bytes of ... arg1 ...
 		{name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem"}, // store 8 bytes of ... 8*arg1 ...
 
-		// arg0 = (duff-adjusted) pointer to start of memory to zero
+		// arg0 = pointer to start of memory to zero
 		// arg1 = value to store (will always be zero)
 		// arg2 = mem
-		// auxint = offset into duffzero code to start executing
+		// auxint = # of bytes to zero
 		// returns mem
 		{
 			name:      "DUFFZERO",
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 03c3882..61d4234 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -254,52 +254,6 @@
 	return false
 }
 
-// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
-// See runtime/mkduff.go.
-const (
-	dzBlocks    = 16 // number of MOV/ADD blocks
-	dzBlockLen  = 4  // number of clears per block
-	dzBlockSize = 19 // size of instructions in a single block
-	dzMovSize   = 4  // size of single MOV instruction w/ offset
-	dzAddSize   = 4  // size of single ADD instruction
-	dzClearStep = 16 // number of bytes cleared by each MOV instruction
-
-	dzTailLen  = 4 // number of final STOSQ instructions
-	dzTailSize = 2 // size of single STOSQ instruction
-
-	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
-	dzSize     = dzBlocks * dzBlockSize
-)
-
-func duffStart(size int64) int64 {
-	x, _ := duff(size)
-	return x
-}
-func duffAdj(size int64) int64 {
-	_, x := duff(size)
-	return x
-}
-
-// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
-// required to use the duffzero mechanism for a block of the given size.
-func duff(size int64) (int64, int64) {
-	if size < 32 || size > 1024 || size%dzClearStep != 0 {
-		panic("bad duffzero size")
-	}
-	// TODO: arch-dependent
-	steps := size / dzClearStep
-	blocks := steps / dzBlockLen
-	steps %= dzBlockLen
-	off := dzBlockSize * (dzBlocks - blocks)
-	var adj int64
-	if steps != 0 {
-		off -= dzAddSize
-		off -= dzMovSize * steps
-		adj -= dzClearStep * (dzBlockLen - steps)
-	}
-	return off, adj
-}
-
 // mergePoint finds a block among a's blocks which dominates b and is itself
 // dominated by all of a's blocks. Returns nil if it can't find one.
 // Might return nil even if one does exist.
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index cefd50c..a2b9e15 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -17175,7 +17175,7 @@
 	}
 	// match: (Zero [size] destptr mem)
 	// cond: size <= 1024 && size%16 == 0 && !config.noDuffDevice
-	// result: (DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVOconst [0]) mem)
+	// result: (DUFFZERO [size] destptr (MOVOconst [0]) mem)
 	for {
 		size := v.AuxInt
 		destptr := v.Args[0]
@@ -17184,14 +17184,11 @@
 			break
 		}
 		v.reset(OpAMD64DUFFZERO)
-		v.AuxInt = duffStart(size)
-		v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64())
-		v0.AuxInt = duffAdj(size)
-		v0.AddArg(destptr)
+		v.AuxInt = size
+		v.AddArg(destptr)
+		v0 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInt128)
+		v0.AuxInt = 0
 		v.AddArg(v0)
-		v1 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInt128)
-		v1.AuxInt = 0
-		v.AddArg(v1)
 		v.AddArg(mem)
 		return true
 	}
diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go
index 8a085ba..3e4dc7b 100644
--- a/src/compress/flate/deflate.go
+++ b/src/compress/flate/deflate.go
@@ -15,7 +15,17 @@
 	BestSpeed          = 1
 	BestCompression    = 9
 	DefaultCompression = -1
-	HuffmanOnly        = -2 // Disables match search and only does Huffman entropy reduction.
+
+	// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
+	// entropy encoding. This mode is useful in compressing data that has
+	// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
+	// that lacks an entropy encoder. Compression gains are achieved when
+	// certain bytes in the input stream occur more frequently than others.
+	//
+	// Note that HuffmanOnly produces a compressed output that is
+	// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
+	// continue to be able to decompress this output.
+	HuffmanOnly = -2
 )
 
 const (
@@ -644,7 +654,6 @@
 // a very fast compression for all types of input, but sacrificing considerable
 // compression efficiency.
 //
-//
 // If level is in the range [-2, 9] then the error returned will be nil.
 // Otherwise the error returned will be non-nil.
 func NewWriter(w io.Writer, level int) (*Writer, error) {
diff --git a/src/context/context_test.go b/src/context/context_test.go
index 90e78e5..cf18211 100644
--- a/src/context/context_test.go
+++ b/src/context/context_test.go
@@ -255,6 +255,12 @@
 	o = otherContext{c}
 	c, _ = WithDeadline(o, time.Now().Add(4*time.Second))
 	testDeadline(c, "WithDeadline+otherContext+WithDeadline", 2*time.Second, t)
+
+	c, _ = WithDeadline(Background(), time.Now().Add(-time.Millisecond))
+	testDeadline(c, "WithDeadline+inthepast", time.Second, t)
+
+	c, _ = WithDeadline(Background(), time.Now())
+	testDeadline(c, "WithDeadline+now", time.Second, t)
 }
 
 func TestTimeout(t *testing.T) {
diff --git a/src/crypto/x509/root_cgo_darwin.go b/src/crypto/x509/root_cgo_darwin.go
index 0e2fb35..a4b33c7 100644
--- a/src/crypto/x509/root_cgo_darwin.go
+++ b/src/crypto/x509/root_cgo_darwin.go
@@ -10,9 +10,65 @@
 #cgo CFLAGS: -mmacosx-version-min=10.6 -D__MAC_OS_X_VERSION_MAX_ALLOWED=1060
 #cgo LDFLAGS: -framework CoreFoundation -framework Security
 
+#include <errno.h>
+#include <sys/sysctl.h>
+
 #include <CoreFoundation/CoreFoundation.h>
 #include <Security/Security.h>
 
+// FetchPEMRoots_MountainLion is the version of FetchPEMRoots from Go 1.6
+// which still works on OS X 10.8 (Mountain Lion).
+// It lacks support for admin & user cert domains.
+// See golang.org/issue/16473
+int FetchPEMRoots_MountainLion(CFDataRef *pemRoots) {
+	if (pemRoots == NULL) {
+		return -1;
+	}
+	CFArrayRef certs = NULL;
+	OSStatus err = SecTrustCopyAnchorCertificates(&certs);
+	if (err != noErr) {
+		return -1;
+	}
+	CFMutableDataRef combinedData = CFDataCreateMutable(kCFAllocatorDefault, 0);
+	int i, ncerts = CFArrayGetCount(certs);
+	for (i = 0; i < ncerts; i++) {
+		CFDataRef data = NULL;
+		SecCertificateRef cert = (SecCertificateRef)CFArrayGetValueAtIndex(certs, i);
+		if (cert == NULL) {
+			continue;
+		}
+		// Note: SecKeychainItemExport is deprecated as of 10.7 in favor of SecItemExport.
+		// Once we support weak imports via cgo we should prefer that, and fall back to this
+		// for older systems.
+		err = SecKeychainItemExport(cert, kSecFormatX509Cert, kSecItemPemArmour, NULL, &data);
+		if (err != noErr) {
+			continue;
+		}
+		if (data != NULL) {
+			CFDataAppendBytes(combinedData, CFDataGetBytePtr(data), CFDataGetLength(data));
+			CFRelease(data);
+		}
+	}
+	CFRelease(certs);
+	*pemRoots = combinedData;
+	return 0;
+}
+
+// useOldCode reports whether the running machine is OS X 10.8 Mountain Lion
+// or older. We only support Mountain Lion and higher, but we'll at least try our
+// best on older machines and continue to use the old code path.
+//
+// See golang.org/issue/16473
+int useOldCode() {
+	char str[256];
+	size_t size = sizeof(str);
+	memset(str, 0, size);
+	sysctlbyname("kern.osrelease", str, &size, NULL, 0);
+	// OS X 10.8 is osrelease "12.*", 10.7 is 11.*, 10.6 is 10.*.
+	// We never supported things before that.
+	return memcmp(str, "12.", 3) == 0 || memcmp(str, "11.", 3) == 0 || memcmp(str, "10.", 3) == 0;
+}
+
 // FetchPEMRoots fetches the system's list of trusted X.509 root certificates.
 //
 // On success it returns 0 and fills pemRoots with a CFDataRef that contains the extracted root
@@ -21,6 +77,10 @@
 // Note: The CFDataRef returned in pemRoots must be released (using CFRelease) after
 // we've consumed its content.
 int FetchPEMRoots(CFDataRef *pemRoots) {
+	if (useOldCode()) {
+		return FetchPEMRoots_MountainLion(pemRoots);
+	}
+
 	// Get certificates from all domains, not just System, this lets
 	// the user add CAs to their "login" keychain, and Admins to add
 	// to the "System" keychain
diff --git a/src/net/dial_unix_test.go b/src/net/dial_unix_test.go
new file mode 100644
index 0000000..4705254
--- /dev/null
+++ b/src/net/dial_unix_test.go
@@ -0,0 +1,108 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package net
+
+import (
+	"context"
+	"syscall"
+	"testing"
+	"time"
+)
+
+// Issue 16523
+func TestDialContextCancelRace(t *testing.T) {
+	oldConnectFunc := connectFunc
+	oldGetsockoptIntFunc := getsockoptIntFunc
+	oldTestHookCanceledDial := testHookCanceledDial
+	defer func() {
+		connectFunc = oldConnectFunc
+		getsockoptIntFunc = oldGetsockoptIntFunc
+		testHookCanceledDial = oldTestHookCanceledDial
+	}()
+
+	ln, err := newLocalListener("tcp")
+	if err != nil {
+		t.Fatal(err)
+	}
+	listenerDone := make(chan struct{})
+	go func() {
+		defer close(listenerDone)
+		c, err := ln.Accept()
+		if err == nil {
+			c.Close()
+		}
+	}()
+	defer func() { <-listenerDone }()
+	defer ln.Close()
+
+	sawCancel := make(chan bool, 1)
+	testHookCanceledDial = func() {
+		sawCancel <- true
+	}
+
+	ctx, cancelCtx := context.WithCancel(context.Background())
+
+	connectFunc = func(fd int, addr syscall.Sockaddr) error {
+		err := oldConnectFunc(fd, addr)
+		t.Logf("connect(%d, addr) = %v", fd, err)
+		if err == nil {
+			// On some operating systems, localhost
+			// connects _sometimes_ succeed immediately.
+			// Prevent that, so we exercise the code path
+			// we're interested in testing. This seems
+			// harmless. It makes FreeBSD 10.10 work when
+			// run with many iterations. It failed about
+			// half the time previously.
+			return syscall.EINPROGRESS
+		}
+		return err
+	}
+
+	getsockoptIntFunc = func(fd, level, opt int) (val int, err error) {
+		val, err = oldGetsockoptIntFunc(fd, level, opt)
+		t.Logf("getsockoptIntFunc(%d, %d, %d) = (%v, %v)", fd, level, opt, val, err)
+		if level == syscall.SOL_SOCKET && opt == syscall.SO_ERROR && err == nil && val == 0 {
+			t.Logf("canceling context")
+
+			// Cancel the context at just the moment which
+			// caused the race in issue 16523.
+			cancelCtx()
+
+			// And wait for the "interrupter" goroutine to
+			// cancel the dial by messing with its write
+			// timeout before returning.
+			select {
+			case <-sawCancel:
+				t.Logf("saw cancel")
+			case <-time.After(5 * time.Second):
+				t.Errorf("didn't see cancel after 5 seconds")
+			}
+		}
+		return
+	}
+
+	var d Dialer
+	c, err := d.DialContext(ctx, "tcp", ln.Addr().String())
+	if err == nil {
+		c.Close()
+		t.Fatal("unexpected successful dial; want context canceled error")
+	}
+
+	select {
+	case <-ctx.Done():
+	case <-time.After(5 * time.Second):
+		t.Fatal("expected context to be canceled")
+	}
+
+	oe, ok := err.(*OpError)
+	if !ok || oe.Op != "dial" {
+		t.Fatalf("Dial error = %#v; want dial *OpError", err)
+	}
+	if oe.Err != ctx.Err() {
+		t.Errorf("DialContext = (%v, %v); want OpError with error %v", c, err, ctx.Err())
+	}
+}
diff --git a/src/net/fd_unix.go b/src/net/fd_unix.go
index 0f80bc7..11dde76 100644
--- a/src/net/fd_unix.go
+++ b/src/net/fd_unix.go
@@ -64,7 +64,7 @@
 	return fd.net + ":" + ls + "->" + rs
 }
 
-func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
+func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret error) {
 	// Do not need to call fd.writeLock here,
 	// because fd is not yet accessible to user,
 	// so no concurrent operations are possible.
@@ -101,21 +101,44 @@
 		defer fd.setWriteDeadline(noDeadline)
 	}
 
-	// Wait for the goroutine converting context.Done into a write timeout
-	// to exist, otherwise our caller might cancel the context and
-	// cause fd.setWriteDeadline(aLongTimeAgo) to cancel a successful dial.
-	done := make(chan bool) // must be unbuffered
-	defer func() { done <- true }()
-	go func() {
-		select {
-		case <-ctx.Done():
-			// Force the runtime's poller to immediately give
-			// up waiting for writability.
-			fd.setWriteDeadline(aLongTimeAgo)
-			<-done
-		case <-done:
-		}
-	}()
+	// Start the "interrupter" goroutine, if this context might be canceled.
+	// (The background context cannot)
+	//
+	// The interrupter goroutine waits for the context to be done and
+	// interrupts the dial (by altering the fd's write deadline, which
+	// wakes up waitWrite).
+	if ctx != context.Background() {
+		// Wait for the interrupter goroutine to exit before returning
+		// from connect.
+		done := make(chan struct{})
+		interruptRes := make(chan error)
+		defer func() {
+			close(done)
+			if ctxErr := <-interruptRes; ctxErr != nil && ret == nil {
+				// The interrupter goroutine called setWriteDeadline,
+				// but the connect code below had returned from
+				// waitWrite already and did a successful connect (ret
+				// == nil). Because we've now poisoned the connection
+				// by making it unwritable, don't return a successful
+				// dial. This was issue 16523.
+				ret = ctxErr
+				fd.Close() // prevent a leak
+			}
+		}()
+		go func() {
+			select {
+			case <-ctx.Done():
+				// Force the runtime's poller to immediately give up
+				// waiting for writability, unblocking waitWrite
+				// below.
+				fd.setWriteDeadline(aLongTimeAgo)
+				testHookCanceledDial()
+				interruptRes <- ctx.Err()
+			case <-done:
+				interruptRes <- nil
+			}
+		}()
+	}
 
 	for {
 		// Performing multiple connect system calls on a
diff --git a/src/net/hook_unix.go b/src/net/hook_unix.go
index 361ca59..cf52567 100644
--- a/src/net/hook_unix.go
+++ b/src/net/hook_unix.go
@@ -9,7 +9,8 @@
 import "syscall"
 
 var (
-	testHookDialChannel = func() {} // see golang.org/issue/5349
+	testHookDialChannel  = func() {} // for golang.org/issue/5349
+	testHookCanceledDial = func() {} // for golang.org/issue/16523
 
 	// Placeholders for socket system calls.
 	socketFunc        func(int, int, int) (int, error)         = syscall.Socket
diff --git a/src/net/http/h2_bundle.go b/src/net/http/h2_bundle.go
index 47e5f57..cd66c09 100644
--- a/src/net/http/h2_bundle.go
+++ b/src/net/http/h2_bundle.go
@@ -85,7 +85,16 @@
 	http2noDialOnMiss = false
 )
 
-func (p *http2clientConnPool) getClientConn(_ *Request, addr string, dialOnMiss bool) (*http2ClientConn, error) {
+func (p *http2clientConnPool) getClientConn(req *Request, addr string, dialOnMiss bool) (*http2ClientConn, error) {
+	if http2isConnectionCloseRequest(req) && dialOnMiss {
+		// It gets its own connection.
+		const singleUse = true
+		cc, err := p.t.dialClientConn(addr, singleUse)
+		if err != nil {
+			return nil, err
+		}
+		return cc, nil
+	}
 	p.mu.Lock()
 	for _, cc := range p.conns[addr] {
 		if cc.CanTakeNewRequest() {
@@ -128,7 +137,8 @@
 
 // run in its own goroutine.
 func (c *http2dialCall) dial(addr string) {
-	c.res, c.err = c.p.t.dialClientConn(addr)
+	const singleUse = false // shared conn
+	c.res, c.err = c.p.t.dialClientConn(addr, singleUse)
 	close(c.done)
 
 	c.p.mu.Lock()
@@ -1105,6 +1115,7 @@
 var (
 	http2errStreamID    = errors.New("invalid stream ID")
 	http2errDepStreamID = errors.New("invalid dependent stream ID")
+	http2errPadLength   = errors.New("pad length too large")
 )
 
 func http2validStreamIDOrZero(streamID uint32) bool {
@@ -1118,18 +1129,40 @@
 // WriteData writes a DATA frame.
 //
 // It will perform exactly one Write to the underlying Writer.
-// It is the caller's responsibility to not call other Write methods concurrently.
+// It is the caller's responsibility not to violate the maximum frame size
+// and to not call other Write methods concurrently.
 func (f *http2Framer) WriteData(streamID uint32, endStream bool, data []byte) error {
+	return f.WriteDataPadded(streamID, endStream, data, nil)
+}
 
+// WriteData writes a DATA frame with optional padding.
+//
+// If pad is nil, the padding bit is not sent.
+// The length of pad must not exceed 255 bytes.
+//
+// It will perform exactly one Write to the underlying Writer.
+// It is the caller's responsibility not to violate the maximum frame size
+// and to not call other Write methods concurrently.
+func (f *http2Framer) WriteDataPadded(streamID uint32, endStream bool, data, pad []byte) error {
 	if !http2validStreamID(streamID) && !f.AllowIllegalWrites {
 		return http2errStreamID
 	}
+	if len(pad) > 255 {
+		return http2errPadLength
+	}
 	var flags http2Flags
 	if endStream {
 		flags |= http2FlagDataEndStream
 	}
+	if pad != nil {
+		flags |= http2FlagDataPadded
+	}
 	f.startWrite(http2FrameData, flags, streamID)
+	if pad != nil {
+		f.wbuf = append(f.wbuf, byte(len(pad)))
+	}
 	f.wbuf = append(f.wbuf, data...)
+	f.wbuf = append(f.wbuf, pad...)
 	return f.endWrite()
 }
 
@@ -3803,6 +3836,9 @@
 	}
 	delete(sc.streams, st.id)
 	if p := st.body; p != nil {
+
+		sc.sendWindowUpdate(nil, p.Len())
+
 		p.CloseWithError(err)
 	}
 	st.cw.Close()
@@ -3879,36 +3915,51 @@
 
 func (sc *http2serverConn) processData(f *http2DataFrame) error {
 	sc.serveG.check()
+	data := f.Data()
 
 	id := f.Header().StreamID
 	st, ok := sc.streams[id]
 	if !ok || st.state != http2stateOpen || st.gotTrailerHeader {
 
+		if sc.inflow.available() < int32(f.Length) {
+			return http2StreamError{id, http2ErrCodeFlowControl}
+		}
+
+		sc.inflow.take(int32(f.Length))
+		sc.sendWindowUpdate(nil, int(f.Length))
+
 		return http2StreamError{id, http2ErrCodeStreamClosed}
 	}
 	if st.body == nil {
 		panic("internal error: should have a body in this state")
 	}
-	data := f.Data()
 
 	if st.declBodyBytes != -1 && st.bodyBytes+int64(len(data)) > st.declBodyBytes {
 		st.body.CloseWithError(fmt.Errorf("sender tried to send more than declared Content-Length of %d bytes", st.declBodyBytes))
 		return http2StreamError{id, http2ErrCodeStreamClosed}
 	}
-	if len(data) > 0 {
+	if f.Length > 0 {
 
-		if int(st.inflow.available()) < len(data) {
+		if st.inflow.available() < int32(f.Length) {
 			return http2StreamError{id, http2ErrCodeFlowControl}
 		}
-		st.inflow.take(int32(len(data)))
-		wrote, err := st.body.Write(data)
-		if err != nil {
-			return http2StreamError{id, http2ErrCodeStreamClosed}
+		st.inflow.take(int32(f.Length))
+
+		if len(data) > 0 {
+			wrote, err := st.body.Write(data)
+			if err != nil {
+				return http2StreamError{id, http2ErrCodeStreamClosed}
+			}
+			if wrote != len(data) {
+				panic("internal error: bad Writer")
+			}
+			st.bodyBytes += int64(len(data))
 		}
-		if wrote != len(data) {
-			panic("internal error: bad Writer")
+
+		if pad := int32(f.Length) - int32(len(data)); pad > 0 {
+			sc.sendWindowUpdate32(nil, pad)
+			sc.sendWindowUpdate32(st, pad)
 		}
-		st.bodyBytes += int64(len(data))
 	}
 	if f.StreamEnded() {
 		st.endStream()
@@ -4919,27 +4970,29 @@
 // ClientConn is the state of a single HTTP/2 client connection to an
 // HTTP/2 server.
 type http2ClientConn struct {
-	t        *http2Transport
-	tconn    net.Conn             // usually *tls.Conn, except specialized impls
-	tlsState *tls.ConnectionState // nil only for specialized impls
+	t         *http2Transport
+	tconn     net.Conn             // usually *tls.Conn, except specialized impls
+	tlsState  *tls.ConnectionState // nil only for specialized impls
+	singleUse bool                 // whether being used for a single http.Request
 
 	// readLoop goroutine fields:
 	readerDone chan struct{} // closed on error
 	readerErr  error         // set before readerDone is closed
 
-	mu           sync.Mutex // guards following
-	cond         *sync.Cond // hold mu; broadcast on flow/closed changes
-	flow         http2flow  // our conn-level flow control quota (cs.flow is per stream)
-	inflow       http2flow  // peer's conn-level flow control
-	closed       bool
-	goAway       *http2GoAwayFrame             // if non-nil, the GoAwayFrame we received
-	goAwayDebug  string                        // goAway frame's debug data, retained as a string
-	streams      map[uint32]*http2clientStream // client-initiated
-	nextStreamID uint32
-	bw           *bufio.Writer
-	br           *bufio.Reader
-	fr           *http2Framer
-	lastActive   time.Time
+	mu              sync.Mutex // guards following
+	cond            *sync.Cond // hold mu; broadcast on flow/closed changes
+	flow            http2flow  // our conn-level flow control quota (cs.flow is per stream)
+	inflow          http2flow  // peer's conn-level flow control
+	closed          bool
+	wantSettingsAck bool                          // we sent a SETTINGS frame and haven't heard back
+	goAway          *http2GoAwayFrame             // if non-nil, the GoAwayFrame we received
+	goAwayDebug     string                        // goAway frame's debug data, retained as a string
+	streams         map[uint32]*http2clientStream // client-initiated
+	nextStreamID    uint32
+	bw              *bufio.Writer
+	br              *bufio.Reader
+	fr              *http2Framer
+	lastActive      time.Time
 
 	// Settings from peer:
 	maxFrameSize         uint32
@@ -5117,7 +5170,7 @@
 	return err == http2errClientConnUnusable
 }
 
-func (t *http2Transport) dialClientConn(addr string) (*http2ClientConn, error) {
+func (t *http2Transport) dialClientConn(addr string, singleUse bool) (*http2ClientConn, error) {
 	host, _, err := net.SplitHostPort(addr)
 	if err != nil {
 		return nil, err
@@ -5126,7 +5179,7 @@
 	if err != nil {
 		return nil, err
 	}
-	return t.NewClientConn(tconn)
+	return t.newClientConn(tconn, singleUse)
 }
 
 func (t *http2Transport) newTLSConfig(host string) *tls.Config {
@@ -5187,13 +5240,13 @@
 }
 
 func (t *http2Transport) NewClientConn(c net.Conn) (*http2ClientConn, error) {
+	return t.newClientConn(c, false)
+}
+
+func (t *http2Transport) newClientConn(c net.Conn, singleUse bool) (*http2ClientConn, error) {
 	if http2VerboseLogs {
 		t.vlogf("http2: Transport creating client conn to %v", c.RemoteAddr())
 	}
-	if _, err := c.Write(http2clientPreface); err != nil {
-		t.vlogf("client preface write error: %v", err)
-		return nil, err
-	}
 
 	cc := &http2ClientConn{
 		t:                    t,
@@ -5204,6 +5257,8 @@
 		initialWindowSize:    65535,
 		maxConcurrentStreams: 1000,
 		streams:              make(map[uint32]*http2clientStream),
+		singleUse:            singleUse,
+		wantSettingsAck:      true,
 	}
 	cc.cond = sync.NewCond(&cc.mu)
 	cc.flow.add(int32(http2initialWindowSize))
@@ -5228,6 +5283,8 @@
 	if max := t.maxHeaderListSize(); max != 0 {
 		initialSettings = append(initialSettings, http2Setting{ID: http2SettingMaxHeaderListSize, Val: max})
 	}
+
+	cc.bw.Write(http2clientPreface)
 	cc.fr.WriteSettings(initialSettings...)
 	cc.fr.WriteWindowUpdate(0, http2transportDefaultConnFlow)
 	cc.inflow.add(http2transportDefaultConnFlow + http2initialWindowSize)
@@ -5236,32 +5293,6 @@
 		return nil, cc.werr
 	}
 
-	f, err := cc.fr.ReadFrame()
-	if err != nil {
-		return nil, err
-	}
-	sf, ok := f.(*http2SettingsFrame)
-	if !ok {
-		return nil, fmt.Errorf("expected settings frame, got: %T", f)
-	}
-	cc.fr.WriteSettingsAck()
-	cc.bw.Flush()
-
-	sf.ForeachSetting(func(s http2Setting) error {
-		switch s.ID {
-		case http2SettingMaxFrameSize:
-			cc.maxFrameSize = s.Val
-		case http2SettingMaxConcurrentStreams:
-			cc.maxConcurrentStreams = s.Val
-		case http2SettingInitialWindowSize:
-			cc.initialWindowSize = s.Val
-		default:
-
-			t.vlogf("Unhandled Setting: %v", s)
-		}
-		return nil
-	})
-
 	go cc.readLoop()
 	return cc, nil
 }
@@ -5288,6 +5319,9 @@
 }
 
 func (cc *http2ClientConn) canTakeNewRequestLocked() bool {
+	if cc.singleUse && cc.nextStreamID > 1 {
+		return false
+	}
 	return cc.goAway == nil && !cc.closed &&
 		int64(len(cc.streams)+1) < int64(cc.maxConcurrentStreams) &&
 		cc.nextStreamID < 2147483647
@@ -5494,22 +5528,26 @@
 	bodyWritten := false
 	ctx := http2reqContext(req)
 
+	handleReadLoopResponse := func(re http2resAndError) (*Response, error) {
+		res := re.res
+		if re.err != nil || res.StatusCode > 299 {
+
+			bodyWriter.cancel()
+			cs.abortRequestBodyWrite(http2errStopReqBodyWrite)
+		}
+		if re.err != nil {
+			cc.forgetStreamID(cs.ID)
+			return nil, re.err
+		}
+		res.Request = req
+		res.TLS = cc.tlsState
+		return res, nil
+	}
+
 	for {
 		select {
 		case re := <-readLoopResCh:
-			res := re.res
-			if re.err != nil || res.StatusCode > 299 {
-
-				bodyWriter.cancel()
-				cs.abortRequestBodyWrite(http2errStopReqBodyWrite)
-			}
-			if re.err != nil {
-				cc.forgetStreamID(cs.ID)
-				return nil, re.err
-			}
-			res.Request = req
-			res.TLS = cc.tlsState
-			return res, nil
+			return handleReadLoopResponse(re)
 		case <-respHeaderTimer:
 			cc.forgetStreamID(cs.ID)
 			if !hasBody || bodyWritten {
@@ -5541,6 +5579,12 @@
 
 			return nil, cs.resetErr
 		case err := <-bodyWriter.resc:
+
+			select {
+			case re := <-readLoopResCh:
+				return handleReadLoopResponse(re)
+			default:
+			}
 			if err != nil {
 				return nil, err
 			}
@@ -5648,26 +5692,24 @@
 		}
 	}
 
-	cc.wmu.Lock()
-	if !sentEnd {
-		var trls []byte
-		if hasTrailers {
-			cc.mu.Lock()
-			trls = cc.encodeTrailers(req)
-			cc.mu.Unlock()
-		}
+	var trls []byte
+	if !sentEnd && hasTrailers {
+		cc.mu.Lock()
+		defer cc.mu.Unlock()
+		trls = cc.encodeTrailers(req)
+	}
 
-		if len(trls) > 0 {
-			err = cc.writeHeaders(cs.ID, true, trls)
-		} else {
-			err = cc.fr.WriteData(cs.ID, true, nil)
-		}
+	cc.wmu.Lock()
+	defer cc.wmu.Unlock()
+
+	if len(trls) > 0 {
+		err = cc.writeHeaders(cs.ID, true, trls)
+	} else {
+		err = cc.fr.WriteData(cs.ID, true, nil)
 	}
 	if ferr := cc.bw.Flush(); ferr != nil && err == nil {
 		err = ferr
 	}
-	cc.wmu.Unlock()
-
 	return err
 }
 
@@ -5896,6 +5938,14 @@
 		e.LastStreamID, e.ErrCode, e.DebugData)
 }
 
+func http2isEOFOrNetReadError(err error) bool {
+	if err == io.EOF {
+		return true
+	}
+	ne, ok := err.(*net.OpError)
+	return ok && ne.Op == "read"
+}
+
 func (rl *http2clientConnReadLoop) cleanup() {
 	cc := rl.cc
 	defer cc.tconn.Close()
@@ -5904,16 +5954,14 @@
 
 	err := cc.readerErr
 	cc.mu.Lock()
-	if err == io.EOF {
-		if cc.goAway != nil {
-			err = http2GoAwayError{
-				LastStreamID: cc.goAway.LastStreamID,
-				ErrCode:      cc.goAway.ErrCode,
-				DebugData:    cc.goAwayDebug,
-			}
-		} else {
-			err = io.ErrUnexpectedEOF
+	if cc.goAway != nil && http2isEOFOrNetReadError(err) {
+		err = http2GoAwayError{
+			LastStreamID: cc.goAway.LastStreamID,
+			ErrCode:      cc.goAway.ErrCode,
+			DebugData:    cc.goAwayDebug,
 		}
+	} else if err == io.EOF {
+		err = io.ErrUnexpectedEOF
 	}
 	for _, cs := range rl.activeRes {
 		cs.bufPipe.CloseWithError(err)
@@ -5932,8 +5980,9 @@
 
 func (rl *http2clientConnReadLoop) run() error {
 	cc := rl.cc
-	rl.closeWhenIdle = cc.t.disableKeepAlives()
+	rl.closeWhenIdle = cc.t.disableKeepAlives() || cc.singleUse
 	gotReply := false
+	gotSettings := false
 	for {
 		f, err := cc.fr.ReadFrame()
 		if err != nil {
@@ -5950,6 +5999,13 @@
 		if http2VerboseLogs {
 			cc.vlogf("http2: Transport received %s", http2summarizeFrame(f))
 		}
+		if !gotSettings {
+			if _, ok := f.(*http2SettingsFrame); !ok {
+				cc.logf("protocol error: received %T before a SETTINGS frame", f)
+				return http2ConnectionError(http2ErrCodeProtocol)
+			}
+			gotSettings = true
+		}
 		maybeIdle := false
 
 		switch f := f.(type) {
@@ -6216,10 +6272,27 @@
 
 func (b http2transportResponseBody) Close() error {
 	cs := b.cs
-	if cs.bufPipe.Err() != io.EOF {
+	cc := cs.cc
 
-		cs.cc.writeStreamReset(cs.ID, http2ErrCodeCancel, nil)
+	serverSentStreamEnd := cs.bufPipe.Err() == io.EOF
+	unread := cs.bufPipe.Len()
+
+	if unread > 0 || !serverSentStreamEnd {
+		cc.mu.Lock()
+		cc.wmu.Lock()
+		if !serverSentStreamEnd {
+			cc.fr.WriteRSTStream(cs.ID, http2ErrCodeCancel)
+		}
+
+		if unread > 0 {
+			cc.inflow.add(int32(unread))
+			cc.fr.WriteWindowUpdate(0, uint32(unread))
+		}
+		cc.bw.Flush()
+		cc.wmu.Unlock()
+		cc.mu.Unlock()
 	}
+
 	cs.bufPipe.BreakWithError(http2errClosedResponseBody)
 	return nil
 }
@@ -6227,6 +6300,7 @@
 func (rl *http2clientConnReadLoop) processData(f *http2DataFrame) error {
 	cc := rl.cc
 	cs := cc.streamByID(f.StreamID, f.StreamEnded())
+	data := f.Data()
 	if cs == nil {
 		cc.mu.Lock()
 		neverSent := cc.nextStreamID
@@ -6237,27 +6311,49 @@
 			return http2ConnectionError(http2ErrCodeProtocol)
 		}
 
+		if f.Length > 0 {
+			cc.mu.Lock()
+			cc.inflow.add(int32(f.Length))
+			cc.mu.Unlock()
+
+			cc.wmu.Lock()
+			cc.fr.WriteWindowUpdate(0, uint32(f.Length))
+			cc.bw.Flush()
+			cc.wmu.Unlock()
+		}
 		return nil
 	}
-	if data := f.Data(); len(data) > 0 {
-		if cs.bufPipe.b == nil {
+	if f.Length > 0 {
+		if len(data) > 0 && cs.bufPipe.b == nil {
 
 			cc.logf("http2: Transport received DATA frame for closed stream; closing connection")
 			return http2ConnectionError(http2ErrCodeProtocol)
 		}
 
 		cc.mu.Lock()
-		if cs.inflow.available() >= int32(len(data)) {
-			cs.inflow.take(int32(len(data)))
+		if cs.inflow.available() >= int32(f.Length) {
+			cs.inflow.take(int32(f.Length))
 		} else {
 			cc.mu.Unlock()
 			return http2ConnectionError(http2ErrCodeFlowControl)
 		}
+
+		if pad := int32(f.Length) - int32(len(data)); pad > 0 {
+			cs.inflow.add(pad)
+			cc.inflow.add(pad)
+			cc.wmu.Lock()
+			cc.fr.WriteWindowUpdate(0, uint32(pad))
+			cc.fr.WriteWindowUpdate(cs.ID, uint32(pad))
+			cc.bw.Flush()
+			cc.wmu.Unlock()
+		}
 		cc.mu.Unlock()
 
-		if _, err := cs.bufPipe.Write(data); err != nil {
-			rl.endStreamError(cs, err)
-			return err
+		if len(data) > 0 {
+			if _, err := cs.bufPipe.Write(data); err != nil {
+				rl.endStreamError(cs, err)
+				return err
+			}
 		}
 	}
 
@@ -6282,7 +6378,7 @@
 	}
 	cs.bufPipe.closeWithErrorAndCode(err, code)
 	delete(rl.activeRes, cs.ID)
-	if cs.req.Close || cs.req.Header.Get("Connection") == "close" {
+	if http2isConnectionCloseRequest(cs.req) {
 		rl.closeWhenIdle = true
 	}
 }
@@ -6312,7 +6408,16 @@
 	cc := rl.cc
 	cc.mu.Lock()
 	defer cc.mu.Unlock()
-	return f.ForeachSetting(func(s http2Setting) error {
+
+	if f.IsAck() {
+		if cc.wantSettingsAck {
+			cc.wantSettingsAck = false
+			return nil
+		}
+		return http2ConnectionError(http2ErrCodeProtocol)
+	}
+
+	err := f.ForeachSetting(func(s http2Setting) error {
 		switch s.ID {
 		case http2SettingMaxFrameSize:
 			cc.maxFrameSize = s.Val
@@ -6327,6 +6432,16 @@
 		}
 		return nil
 	})
+	if err != nil {
+		return err
+	}
+
+	cc.wmu.Lock()
+	defer cc.wmu.Unlock()
+
+	cc.fr.WriteSettingsAck()
+	cc.bw.Flush()
+	return cc.werr
 }
 
 func (rl *http2clientConnReadLoop) processWindowUpdate(f *http2WindowUpdateFrame) error {
@@ -6538,6 +6653,12 @@
 	}
 }
 
+// isConnectionCloseRequest reports whether req should use its own
+// connection for a single request and then close the connection.
+func http2isConnectionCloseRequest(req *Request) bool {
+	return req.Close || httplex.HeaderValuesContainsToken(req.Header["Connection"], "close")
+}
+
 // writeFramer is implemented by any type that is used to write frames.
 type http2writeFramer interface {
 	writeFrame(http2writeContext) error
diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go
index 139ce3e..13e5f28 100644
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@@ -4716,3 +4716,14 @@
 	}
 	b.StopTimer()
 }
+
+// Verify this doesn't race (Issue 16505)
+func TestConcurrentServerServe(t *testing.T) {
+	for i := 0; i < 100; i++ {
+		ln1 := &oneConnListener{conn: nil}
+		ln2 := &oneConnListener{conn: nil}
+		srv := Server{}
+		go func() { srv.Serve(ln1) }()
+		go func() { srv.Serve(ln2) }()
+	}
+}
diff --git a/src/net/http/server.go b/src/net/http/server.go
index 7b2b4b2..89574a8 100644
--- a/src/net/http/server.go
+++ b/src/net/http/server.go
@@ -2129,8 +2129,8 @@
 	ErrorLog *log.Logger
 
 	disableKeepAlives int32     // accessed atomically.
-	nextProtoOnce     sync.Once // guards initialization of TLSNextProto in Serve
-	nextProtoErr      error
+	nextProtoOnce     sync.Once // guards setupHTTP2_* init
+	nextProtoErr      error     // result of http2.ConfigureServer if used
 }
 
 // A ConnState represents the state of a client connection to a server.
@@ -2260,10 +2260,8 @@
 	}
 	var tempDelay time.Duration // how long to sleep on accept failure
 
-	if srv.shouldConfigureHTTP2ForServe() {
-		if err := srv.setupHTTP2(); err != nil {
-			return err
-		}
+	if err := srv.setupHTTP2_Serve(); err != nil {
+		return err
 	}
 
 	// TODO: allow changing base context? can't imagine concrete
@@ -2408,7 +2406,7 @@
 
 	// Setup HTTP/2 before srv.Serve, to initialize srv.TLSConfig
 	// before we clone it and create the TLS Listener.
-	if err := srv.setupHTTP2(); err != nil {
+	if err := srv.setupHTTP2_ListenAndServeTLS(); err != nil {
 		return err
 	}
 
@@ -2436,14 +2434,36 @@
 	return srv.Serve(tlsListener)
 }
 
-func (srv *Server) setupHTTP2() error {
+// setupHTTP2_ListenAndServeTLS conditionally configures HTTP/2 on
+// srv and returns whether there was an error setting it up. If it is
+// not configured for policy reasons, nil is returned.
+func (srv *Server) setupHTTP2_ListenAndServeTLS() error {
 	srv.nextProtoOnce.Do(srv.onceSetNextProtoDefaults)
 	return srv.nextProtoErr
 }
 
+// setupHTTP2_Serve is called from (*Server).Serve and conditionally
+// configures HTTP/2 on srv using a more conservative policy than
+// setupHTTP2_ListenAndServeTLS because Serve may be called
+// concurrently.
+//
+// The tests named TestTransportAutomaticHTTP2* and
+// TestConcurrentServerServe in server_test.go demonstrate some
+// of the supported use cases and motivations.
+func (srv *Server) setupHTTP2_Serve() error {
+	srv.nextProtoOnce.Do(srv.onceSetNextProtoDefaults_Serve)
+	return srv.nextProtoErr
+}
+
+func (srv *Server) onceSetNextProtoDefaults_Serve() {
+	if srv.shouldConfigureHTTP2ForServe() {
+		srv.onceSetNextProtoDefaults()
+	}
+}
+
 // onceSetNextProtoDefaults configures HTTP/2, if the user hasn't
 // configured otherwise. (by setting srv.TLSNextProto non-nil)
-// It must only be called via srv.nextProtoOnce (use srv.setupHTTP2).
+// It must only be called via srv.nextProtoOnce (use srv.setupHTTP2_*).
 func (srv *Server) onceSetNextProtoDefaults() {
 	if strings.Contains(os.Getenv("GODEBUG"), "http2server=0") {
 		return
diff --git a/src/net/http/transport.go b/src/net/http/transport.go
index 9164d0d..009f3c5 100644
--- a/src/net/http/transport.go
+++ b/src/net/http/transport.go
@@ -383,6 +383,11 @@
 			return resp, nil
 		}
 		if !pconn.shouldRetryRequest(req, err) {
+			// Issue 16465: return underlying net.Conn.Read error from peek,
+			// as we've historically done.
+			if e, ok := err.(transportReadFromServerError); ok {
+				err = e.err
+			}
 			return nil, err
 		}
 		testHookRoundTripRetried()
@@ -415,11 +420,19 @@
 		// first, per golang.org/issue/15723
 		return false
 	}
-	if _, ok := err.(nothingWrittenError); ok {
+	switch err.(type) {
+	case nothingWrittenError:
 		// We never wrote anything, so it's safe to retry.
 		return true
+	case transportReadFromServerError:
+		// We got some non-EOF net.Conn.Read failure reading
+		// the 1st response byte from the server.
+		return true
 	}
-	if err == errServerClosedIdle || err == errServerClosedConn {
+	if err == errServerClosedIdle {
+		// The server replied with io.EOF while we were trying to
+		// read the response. Probably an unfortunately keep-alive
+		// timeout, just as the client was writing a request.
 		return true
 	}
 	return false // conservatively
@@ -566,10 +579,25 @@
 	errCloseIdleConns     = errors.New("http: CloseIdleConnections called")
 	errReadLoopExiting    = errors.New("http: persistConn.readLoop exiting")
 	errServerClosedIdle   = errors.New("http: server closed idle connection")
-	errServerClosedConn   = errors.New("http: server closed connection")
 	errIdleConnTimeout    = errors.New("http: idle connection timeout")
 )
 
+// transportReadFromServerError is used by Transport.readLoop when the
+// 1 byte peek read fails and we're actually anticipating a response.
+// Usually this is just due to the inherent keep-alive shut down race,
+// where the server closed the connection at the same time the client
+// wrote. The underlying err field is usually io.EOF or some
+// ECONNRESET sort of thing which varies by platform. But it might be
+// the user's custom net.Conn.Read error too, so we carry it along for
+// them to return from Transport.RoundTrip.
+type transportReadFromServerError struct {
+	err error
+}
+
+func (e transportReadFromServerError) Error() string {
+	return fmt.Sprintf("net/http: Transport failed to read from server: %v", e.err)
+}
+
 func (t *Transport) putOrCloseIdleConn(pconn *persistConn) {
 	if err := t.tryPutIdleConn(pconn); err != nil {
 		pconn.close(err)
@@ -1293,7 +1321,10 @@
 	if pc.isCanceled() {
 		return errRequestCanceled
 	}
-	if err == errServerClosedIdle || err == errServerClosedConn {
+	if err == errServerClosedIdle {
+		return err
+	}
+	if _, ok := err.(transportReadFromServerError); ok {
 		return err
 	}
 	if pc.isBroken() {
@@ -1314,7 +1345,11 @@
 		return errRequestCanceled
 	}
 	err := pc.closed
-	if err == errServerClosedIdle || err == errServerClosedConn {
+	if err == errServerClosedIdle {
+		// Don't decorate
+		return err
+	}
+	if _, ok := err.(transportReadFromServerError); ok {
 		// Don't decorate
 		return err
 	}
@@ -1383,7 +1418,7 @@
 		if err == nil {
 			resp, err = pc.readResponse(rc, trace)
 		} else {
-			err = errServerClosedConn
+			err = transportReadFromServerError{err}
 			closeErr = err
 		}
 
@@ -1784,6 +1819,7 @@
 	var re responseAndError
 	var respHeaderTimer <-chan time.Time
 	cancelChan := req.Request.Cancel
+	ctxDoneChan := req.Context().Done()
 WaitResponse:
 	for {
 		testHookWaitResLoop()
@@ -1815,9 +1851,11 @@
 		case <-cancelChan:
 			pc.t.CancelRequest(req.Request)
 			cancelChan = nil
-		case <-req.Context().Done():
+			ctxDoneChan = nil
+		case <-ctxDoneChan:
 			pc.t.CancelRequest(req.Request)
 			cancelChan = nil
+			ctxDoneChan = nil
 		}
 	}
 
diff --git a/src/net/http/transport_internal_test.go b/src/net/http/transport_internal_test.go
index a157d90..a05ca6e 100644
--- a/src/net/http/transport_internal_test.go
+++ b/src/net/http/transport_internal_test.go
@@ -46,17 +46,22 @@
 	conn.Close() // simulate the server hanging up on the client
 
 	_, err = pc.roundTrip(treq)
-	if err != errServerClosedConn && err != errServerClosedIdle {
+	if !isTransportReadFromServerError(err) && err != errServerClosedIdle {
 		t.Fatalf("roundTrip = %#v, %v; want errServerClosedConn or errServerClosedIdle", err, err)
 	}
 
 	<-pc.closech
 	err = pc.closed
-	if err != errServerClosedConn && err != errServerClosedIdle {
+	if !isTransportReadFromServerError(err) && err != errServerClosedIdle {
 		t.Fatalf("pc.closed = %#v, %v; want errServerClosedConn or errServerClosedIdle", err, err)
 	}
 }
 
+func isTransportReadFromServerError(err error) bool {
+	_, ok := err.(transportReadFromServerError)
+	return ok
+}
+
 func newLocalListener(t *testing.T) net.Listener {
 	ln, err := net.Listen("tcp", "127.0.0.1:0")
 	if err != nil {
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index 72b98f1..749d453 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -3511,6 +3511,45 @@
 	}
 }
 
+type funcConn struct {
+	net.Conn
+	read  func([]byte) (int, error)
+	write func([]byte) (int, error)
+}
+
+func (c funcConn) Read(p []byte) (int, error)  { return c.read(p) }
+func (c funcConn) Write(p []byte) (int, error) { return c.write(p) }
+func (c funcConn) Close() error                { return nil }
+
+// Issue 16465: Transport.RoundTrip should return the raw net.Conn.Read error from Peek
+// back to the caller.
+func TestTransportReturnsPeekError(t *testing.T) {
+	errValue := errors.New("specific error value")
+
+	wrote := make(chan struct{})
+	var wroteOnce sync.Once
+
+	tr := &Transport{
+		Dial: func(network, addr string) (net.Conn, error) {
+			c := funcConn{
+				read: func([]byte) (int, error) {
+					<-wrote
+					return 0, errValue
+				},
+				write: func(p []byte) (int, error) {
+					wroteOnce.Do(func() { close(wrote) })
+					return len(p), nil
+				},
+			}
+			return c, nil
+		},
+	}
+	_, err := tr.RoundTrip(httptest.NewRequest("GET", "http://fake.tld/", nil))
+	if err != errValue {
+		t.Errorf("error = %#v; want %v", err, errValue)
+	}
+}
+
 var errFakeRoundTrip = errors.New("fake roundtrip")
 
 type funcRoundTripper func()
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 1eabf43..3b238cb 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -741,11 +741,10 @@
 // can accumulate on a P before updating gcController.assistTime.
 const gcAssistTimeSlack = 5000
 
-// gcOverAssistBytes determines how many extra allocation bytes of
-// assist credit a GC assist builds up when an assist happens. This
-// amortizes the cost of an assist by pre-paying for this many bytes
-// of future allocations.
-const gcOverAssistBytes = 1 << 20
+// gcOverAssistWork determines how many extra units of scan work a GC
+// assist does when an assist happens. This amortizes the cost of an
+// assist by pre-paying for this many bytes of future allocations.
+const gcOverAssistWork = 64 << 10
 
 var work struct {
 	full  uint64                   // lock-free list of full blocks workbuf
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 00b96fd..aa7f7a7 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -393,10 +393,15 @@
 	}
 
 	// Compute the amount of scan work we need to do to make the
-	// balance positive. We over-assist to build up credit for
-	// future allocations and amortize the cost of assisting.
-	debtBytes := -gp.gcAssistBytes + gcOverAssistBytes
+	// balance positive. When the required amount of work is low,
+	// we over-assist to build up credit for future allocations
+	// and amortize the cost of assisting.
+	debtBytes := -gp.gcAssistBytes
 	scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
+	if scanWork < gcOverAssistWork {
+		scanWork = gcOverAssistWork
+		debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork))
+	}
 
 retry:
 	// Steal as much credit as we can from the background GC's
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index b7c41f1..25f7ed6 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -4,8 +4,69 @@
 
 // Package pprof writes runtime profiling data in the format expected
 // by the pprof visualization tool.
+//
+// Profiling a Go program
+//
+// The first step to profiling a Go program is to enable profiling.
+// Support for profiling benchmarks built with the standard testing
+// package is built into go test. For example, the following command
+// runs benchmarks in the current directory and writes the CPU and
+// memory profiles to cpu.prof and mem.prof:
+//
+//     go test -cpuprofile cpu.prof -memprofile mem.prof -bench .
+//
+// To add equivalent profiling support to a standalone program, add
+// code like the following to your main function:
+//
+//    var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`")
+//    var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
+//
+//    func main() {
+//        flag.Parse()
+//        if *cpuprofile != "" {
+//            f, err := os.Create(*cpuprofile)
+//            if err != nil {
+//                log.Fatal("could not create CPU profile: ", err)
+//            }
+//            if err := pprof.StartCPUProfile(f); err != nil {
+//                log.Fatal("could not start CPU profile: ", err)
+//            }
+//            defer pprof.StopCPUProfile()
+//        }
+//        ...
+//        if *memprofile != "" {
+//            f, err := os.Create(*memprofile)
+//            if err != nil {
+//                log.Fatal("could not create memory profile: ", err)
+//            }
+//            runtime.GC() // get up-to-date statistics
+//            if err := pprof.WriteHeapProfile(f); err != nil {
+//                log.Fatal("could not write memory profile: ", err)
+//            }
+//            f.Close()
+//        }
+//    }
+//
+// There is also a standard HTTP interface to profiling data. Adding
+// the following line will install handlers under the /debug/pprof/
+// URL to download live profiles:
+//
+//    import _ "net/http/pprof"
+//
+// See the net/http/pprof package for more details.
+//
+// Profiles can then be visualized with the pprof tool:
+//
+//    go tool pprof cpu.prof
+//
+// There are many commands available from the pprof command line.
+// Commonly used commands include "top", which prints a summary of the
+// top program hot-spots, and "web", which opens an interactive graph
+// of hot-spots and their call graphs. Use "help" for information on
+// all pprof commands.
+//
 // For more information about pprof, see
-// http://github.com/google/pprof/.
+// https://github.com/google/pprof/blob/master/doc/pprof.md.
 package pprof
 
 import (
diff --git a/test/fixedbugs/issue16515.go b/test/fixedbugs/issue16515.go
new file mode 100644
index 0000000..6b67436
--- /dev/null
+++ b/test/fixedbugs/issue16515.go
@@ -0,0 +1,53 @@
+// run
+
+// Copyright 2016 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// issue 16515: spilled Duff-adjusted address may be invalid
+
+package main
+
+import "runtime"
+
+type T [62]int // DUFFZERO with non-zero adjustment on AMD64
+
+var sink interface{}
+
+//go:noinline
+func zero(x *T) {
+	// Two DUFFZEROs on the same address with a function call in between.
+	// Duff-adjusted address will be spilled and loaded
+
+	*x = T{} // DUFFZERO
+	runtime.GC()
+	(*x)[0] = 1
+	g()      // call a function with large frame, trigger a stack move
+	*x = T{} // DUFFZERO again
+}
+
+//go:noinline
+// a function with large frame
+func g() {
+	var x [1000]int
+	_ = x
+}
+
+func main() {
+	var s struct { a T; b [8192-62]int } // allocate 64K, hopefully it's in a new span and a few bytes before it is garbage
+	sink = &s // force heap allocation
+	s.a[0] = 2
+	zero(&s.a)
+	if s.a[0] != 0 {
+		println("s.a[0] =", s.a[0])
+		panic("zeroing failed")
+	}
+
+	var a T // on stack
+	a[0] = 2
+	zero(&a)
+	if a[0] != 0 {
+		println("a[0] =", a[0])
+		panic("zeroing failed")
+	}
+}