[release-branch.go1.18] crypto/tls: avoid extra allocations in steady-state Handshake calls

The Read and Write methods of *tls.Conn call Handshake
unconditionally, every time they are called, expecting it to only
perform a new handshake if required.

However in go 1.17 handshakeContext was extended to set up a
cancelable Context, and importantly did so prior to checking if a
handshake is required. This thus causes it to allocate on every call,
even in those that are no-ops when called in a Read or Write on an
established connection, sometimes leading to very large numbers of
allocations during reads.

This change adds an early return, prior to setting up the context or
proceeding into the handshakeMutex and checking the handshake error, if
the handshake status atomic indicates handshake is already complete.

name                                     old allocs/op  new allocs/op  delta
Throughput/MaxPacket/1MB/TLSv12-10          1.07k ± 0%     0.62k ± 0%  -42.16%  (p=0.000 n=4+5)
Throughput/MaxPacket/1MB/TLSv13-10          1.70k ± 0%     1.25k ± 0%     ~     (p=0.079 n=4+5)
Throughput/MaxPacket/2MB/TLSv12-10          1.62k ± 0%     0.73k ± 0%  -55.18%  (p=0.008 n=5+5)
Throughput/MaxPacket/2MB/TLSv13-10          2.26k ± 0%     1.36k ± 0%  -39.64%  (p=0.008 n=5+5)
Throughput/MaxPacket/4MB/TLSv12-10          2.74k ± 0%     0.95k ± 0%  -65.35%  (p=0.008 n=5+5)
Throughput/MaxPacket/4MB/TLSv13-10          3.37k ± 0%     1.58k ± 0%  -53.15%  (p=0.008 n=5+5)
Throughput/MaxPacket/8MB/TLSv12-10          4.96k ± 0%     1.39k ± 0%  -72.06%  (p=0.016 n=4+5)
Throughput/MaxPacket/8MB/TLSv13-10          5.60k ± 0%     2.01k ± 0%  -64.05%  (p=0.008 n=5+5)
Throughput/MaxPacket/16MB/TLSv12-10         9.42k ± 0%     2.27k ± 1%  -75.92%  (p=0.016 n=4+5)
Throughput/MaxPacket/16MB/TLSv13-10         10.0k ± 0%      2.9k ± 0%  -71.39%  (p=0.008 n=5+5)
Throughput/MaxPacket/32MB/TLSv12-10         18.3k ± 0%      4.0k ± 0%  -77.97%  (p=0.008 n=5+5)
Throughput/MaxPacket/32MB/TLSv13-10         18.9k ± 0%      4.6k ± 0%  -75.62%  (p=0.008 n=5+5)
Throughput/MaxPacket/64MB/TLSv12-10         36.2k ± 0%      7.5k ± 0%  -79.15%  (p=0.008 n=5+5)
Throughput/MaxPacket/64MB/TLSv13-10         36.7k ± 0%      8.1k ± 0%  -78.06%  (p=0.008 n=5+5)
Throughput/DynamicPacket/1MB/TLSv12-10      1.12k ± 0%     0.63k ± 0%  -44.20%  (p=0.008 n=5+5)
Throughput/DynamicPacket/1MB/TLSv13-10      1.76k ± 0%     1.26k ± 0%  -28.22%  (p=0.016 n=5+4)
Throughput/DynamicPacket/2MB/TLSv12-10      1.68k ± 0%     0.74k ± 0%  -56.11%  (p=0.008 n=5+5)
Throughput/DynamicPacket/2MB/TLSv13-10      2.32k ± 0%     1.37k ± 0%  -40.80%  (p=0.008 n=5+5)
Throughput/DynamicPacket/4MB/TLSv12-10      2.80k ± 0%     0.96k ± 0%  -65.81%  (p=0.008 n=5+5)
Throughput/DynamicPacket/4MB/TLSv13-10      3.43k ± 0%     1.59k ± 0%  -53.57%  (p=0.008 n=5+5)
Throughput/DynamicPacket/8MB/TLSv12-10      5.03k ± 0%     1.39k ± 0%  -72.27%  (p=0.008 n=5+5)
Throughput/DynamicPacket/8MB/TLSv13-10      5.66k ± 0%     2.02k ± 0%  -64.27%  (p=0.008 n=5+5)
Throughput/DynamicPacket/16MB/TLSv12-10     9.48k ± 0%     2.28k ± 1%  -75.98%  (p=0.008 n=5+5)
Throughput/DynamicPacket/16MB/TLSv13-10     10.1k ± 0%      2.9k ± 0%  -71.34%  (p=0.008 n=5+5)
Throughput/DynamicPacket/32MB/TLSv12-10     18.4k ± 0%      4.0k ± 0%  -78.13%  (p=0.008 n=5+5)
Throughput/DynamicPacket/32MB/TLSv13-10     19.0k ± 0%      4.6k ± 0%  -75.54%  (p=0.008 n=5+5)
Throughput/DynamicPacket/64MB/TLSv12-10     36.2k ± 0%      7.6k ± 1%  -79.02%  (p=0.008 n=5+5)
Throughput/DynamicPacket/64MB/TLSv13-10     36.8k ± 0%      8.2k ± 1%  -77.76%  (p=0.008 n=5+5)

Fixes #52791

Change-Id: Iacb1f9bf7802022960d9dbce141b8c0587a614d4
Reviewed-on: https://go-review.googlesource.com/c/go/+/379034
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <valsorda@google.com>
Auto-Submit: Filippo Valsorda <filippo@golang.org>
Run-TryBot: Filippo Valsorda <filippo@golang.org>
(cherry picked from commit a4af35607536b2b0d73be94df188b9f5a157480c)
Reviewed-on: https://go-review.googlesource.com/c/go/+/405545
Reviewed-by: Roland Shoemaker <roland@golang.org>
Reviewed-by: Alex Rakoczy <alex@golang.org>
diff --git a/src/crypto/tls/conn.go b/src/crypto/tls/conn.go
index 28ab063..0dae8e3 100644
--- a/src/crypto/tls/conn.go
+++ b/src/crypto/tls/conn.go
@@ -32,6 +32,7 @@
 
 	// handshakeStatus is 1 if the connection is currently transferring
 	// application data (i.e. is not currently processing a handshake).
+	// handshakeStatus == 1 implies handshakeErr == nil.
 	// This field is only to be accessed with sync/atomic.
 	handshakeStatus uint32
 	// constant after handshake; protected by handshakeMutex
@@ -1403,6 +1404,13 @@
 }
 
 func (c *Conn) handshakeContext(ctx context.Context) (ret error) {
+	// Fast sync/atomic-based exit if there is no handshake in flight and the
+	// last one succeeded without an error. Avoids the expensive context setup
+	// and mutex for most Read and Write calls.
+	if c.handshakeComplete() {
+		return nil
+	}
+
 	handshakeCtx, cancel := context.WithCancel(ctx)
 	// Note: defer this before starting the "interrupter" goroutine
 	// so that we can tell the difference between the input being canceled and
@@ -1461,6 +1469,9 @@
 	if c.handshakeErr == nil && !c.handshakeComplete() {
 		c.handshakeErr = errors.New("tls: internal error: handshake should have had a result")
 	}
+	if c.handshakeErr != nil && c.handshakeComplete() {
+		panic("tls: internal error: handshake returned an error but is marked successful")
+	}
 
 	return c.handshakeErr
 }