[release-branch.go1.20] net/http: validate Host header before sending

Verify that the Host header we send is valid.
Avoids surprising behavior such as a Host of "go.dev\r\nX-Evil:oops"
adding an X-Evil header to HTTP/1 requests.

Add a test, skip the test for HTTP/2. HTTP/2 is not vulnerable to
header injection in the way HTTP/1 is, but x/net/http2 doesn't validate
the header and will go into a retry loop when the server rejects it.
CL 506995 adds the necessary validation to x/net/http2.

For #60374
Fixes #61076
For CVE-2023-29406

Change-Id: I05cb6866a9bead043101954dfded199258c6dd04
Reviewed-on: https://go-review.googlesource.com/c/go/+/506996
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
(cherry picked from commit 499458f7ca04087958987a33c2703c3ef03e27e2)
Reviewed-on: https://go-review.googlesource.com/c/go/+/507357
Reviewed-by: Damien Neil <dneil@google.com>
Run-TryBot: Tatiana Bradley <tatianabradley@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
diff --git a/src/net/http/http_test.go b/src/net/http/http_test.go
index 0d92fe5..f03272a 100644
--- a/src/net/http/http_test.go
+++ b/src/net/http/http_test.go
@@ -48,35 +48,6 @@
 	}
 }
 
-func TestCleanHost(t *testing.T) {
-	tests := []struct {
-		in, want string
-	}{
-		{"www.google.com", "www.google.com"},
-		{"www.google.com foo", "www.google.com"},
-		{"www.google.com/foo", "www.google.com"},
-		{" first character is a space", ""},
-		{"[1::6]:8080", "[1::6]:8080"},
-
-		// Punycode:
-		{"гофер.рф/foo", "xn--c1ae0ajs.xn--p1ai"},
-		{"bücher.de", "xn--bcher-kva.de"},
-		{"bücher.de:8080", "xn--bcher-kva.de:8080"},
-		// Verify we convert to lowercase before punycode:
-		{"BÜCHER.de", "xn--bcher-kva.de"},
-		{"BÜCHER.de:8080", "xn--bcher-kva.de:8080"},
-		// Verify we normalize to NFC before punycode:
-		{"gophér.nfc", "xn--gophr-esa.nfc"},            // NFC input; no work needed
-		{"goph\u0065\u0301r.nfd", "xn--gophr-esa.nfd"}, // NFD input
-	}
-	for _, tt := range tests {
-		got := cleanHost(tt.in)
-		if tt.want != got {
-			t.Errorf("cleanHost(%q) = %q, want %q", tt.in, got, tt.want)
-		}
-	}
-}
-
 // Test that cmd/go doesn't link in the HTTP server.
 //
 // This catches accidental dependencies between the HTTP transport and
diff --git a/src/net/http/request.go b/src/net/http/request.go
index a45c9e3..9c888b3 100644
--- a/src/net/http/request.go
+++ b/src/net/http/request.go
@@ -17,7 +17,6 @@
 	"io"
 	"mime"
 	"mime/multipart"
-	"net"
 	"net/http/httptrace"
 	"net/http/internal/ascii"
 	"net/textproto"
@@ -27,6 +26,7 @@
 	"strings"
 	"sync"
 
+	"golang.org/x/net/http/httpguts"
 	"golang.org/x/net/idna"
 )
 
@@ -575,12 +575,19 @@
 	// is not given, use the host from the request URL.
 	//
 	// Clean the host, in case it arrives with unexpected stuff in it.
-	host := cleanHost(r.Host)
+	host := r.Host
 	if host == "" {
 		if r.URL == nil {
 			return errMissingHost
 		}
-		host = cleanHost(r.URL.Host)
+		host = r.URL.Host
+	}
+	host, err = httpguts.PunycodeHostPort(host)
+	if err != nil {
+		return err
+	}
+	if !httpguts.ValidHostHeader(host) {
+		return errors.New("http: invalid Host header")
 	}
 
 	// According to RFC 6874, an HTTP client, proxy, or other
@@ -737,40 +744,6 @@
 	return idna.Lookup.ToASCII(v)
 }
 
-// cleanHost cleans up the host sent in request's Host header.
-//
-// It both strips anything after '/' or ' ', and puts the value
-// into Punycode form, if necessary.
-//
-// Ideally we'd clean the Host header according to the spec:
-//
-//	https://tools.ietf.org/html/rfc7230#section-5.4 (Host = uri-host [ ":" port ]")
-//	https://tools.ietf.org/html/rfc7230#section-2.7 (uri-host -> rfc3986's host)
-//	https://tools.ietf.org/html/rfc3986#section-3.2.2 (definition of host)
-//
-// But practically, what we are trying to avoid is the situation in
-// issue 11206, where a malformed Host header used in the proxy context
-// would create a bad request. So it is enough to just truncate at the
-// first offending character.
-func cleanHost(in string) string {
-	if i := strings.IndexAny(in, " /"); i != -1 {
-		in = in[:i]
-	}
-	host, port, err := net.SplitHostPort(in)
-	if err != nil { // input was just a host
-		a, err := idnaASCII(in)
-		if err != nil {
-			return in // garbage in, garbage out
-		}
-		return a
-	}
-	a, err := idnaASCII(host)
-	if err != nil {
-		return in // garbage in, garbage out
-	}
-	return net.JoinHostPort(a, port)
-}
-
 // removeZone removes IPv6 zone identifier from host.
 // E.g., "[fe80::1%en0]:8080" to "[fe80::1]:8080"
 func removeZone(host string) string {
diff --git a/src/net/http/request_test.go b/src/net/http/request_test.go
index 23e49d6..86c68e4 100644
--- a/src/net/http/request_test.go
+++ b/src/net/http/request_test.go
@@ -774,15 +774,8 @@
 	}
 	req.Host = "foo.com with spaces"
 	req.URL.Host = "foo.com with spaces"
-	req.Write(logWrites{t, &got})
-	want := []string{
-		"GET /after HTTP/1.1\r\n",
-		"Host: foo.com\r\n",
-		"User-Agent: " + DefaultUserAgent + "\r\n",
-		"\r\n",
-	}
-	if !reflect.DeepEqual(got, want) {
-		t.Errorf("Writes = %q\n  Want = %q", got, want)
+	if err := req.Write(logWrites{t, &got}); err == nil {
+		t.Errorf("Writing request with invalid Host: succeded, want error")
 	}
 }
 
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index 245f73b..f4896c5 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -6654,3 +6654,22 @@
 	}
 	wg.Wait()
 }
+
+func TestRequestSanitization(t *testing.T) { run(t, testRequestSanitization) }
+func testRequestSanitization(t *testing.T, mode testMode) {
+	if mode == http2Mode {
+		// Remove this after updating x/net.
+		t.Skip("https://go.dev/issue/60374 test fails when run with HTTP/2")
+	}
+	ts := newClientServerTest(t, mode, HandlerFunc(func(rw ResponseWriter, req *Request) {
+		if h, ok := req.Header["X-Evil"]; ok {
+			t.Errorf("request has X-Evil header: %q", h)
+		}
+	})).ts
+	req, _ := NewRequest("GET", ts.URL, nil)
+	req.Host = "go.dev\r\nX-Evil:evil"
+	resp, _ := ts.Client().Do(req)
+	if resp != nil {
+		resp.Body.Close()
+	}
+}