[release-branch.go1.19] net/http/httputil: avoid query parameter smuggling

Query parameter smuggling occurs when a proxy's interpretation
of query parameters differs from that of a downstream server.
Change ReverseProxy to avoid forwarding ignored query parameters.

Remove unparsable query parameters from the outbound request

   * if req.Form != nil after calling ReverseProxy.Director; and
   * before calling ReverseProxy.Rewrite.

This change preserves the existing behavior of forwarding the
raw query untouched if a Director hook does not parse the query
by calling Request.ParseForm (possibly indirectly).

Fixes #55843
For #54663
For CVE-2022-2880

Change-Id: If1621f6b0e73a49d79059dae9e6b256e0ff18ca9
Reviewed-on: https://go-review.googlesource.com/c/go/+/432976
Reviewed-by: Roland Shoemaker <roland@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/433735
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
diff --git a/src/net/http/httputil/reverseproxy.go b/src/net/http/httputil/reverseproxy.go
index b5d3ce7..cf39222 100644
--- a/src/net/http/httputil/reverseproxy.go
+++ b/src/net/http/httputil/reverseproxy.go
@@ -261,6 +261,9 @@
 	}
 
 	p.Director(outreq)
+	if outreq.Form != nil {
+		outreq.URL.RawQuery = cleanQueryParams(outreq.URL.RawQuery)
+	}
 	outreq.Close = false
 
 	reqUpType := upgradeType(outreq.Header)
@@ -639,3 +642,36 @@
 	_, err := io.Copy(c.backend, c.user)
 	errc <- err
 }
+
+func cleanQueryParams(s string) string {
+	reencode := func(s string) string {
+		v, _ := url.ParseQuery(s)
+		return v.Encode()
+	}
+	for i := 0; i < len(s); {
+		switch s[i] {
+		case ';':
+			return reencode(s)
+		case '%':
+			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+				return reencode(s)
+			}
+			i += 3
+		default:
+			i++
+		}
+	}
+	return s
+}
+
+func ishex(c byte) bool {
+	switch {
+	case '0' <= c && c <= '9':
+		return true
+	case 'a' <= c && c <= 'f':
+		return true
+	case 'A' <= c && c <= 'F':
+		return true
+	}
+	return false
+}
diff --git a/src/net/http/httputil/reverseproxy_test.go b/src/net/http/httputil/reverseproxy_test.go
index 90e8903..33b1ade 100644
--- a/src/net/http/httputil/reverseproxy_test.go
+++ b/src/net/http/httputil/reverseproxy_test.go
@@ -1537,3 +1537,77 @@
 		}
 	}
 }
+
+const (
+	testWantsCleanQuery = true
+	testWantsRawQuery   = false
+)
+
+func TestReverseProxyQueryParameterSmugglingDirectorDoesNotParseForm(t *testing.T) {
+	testReverseProxyQueryParameterSmuggling(t, testWantsRawQuery, func(u *url.URL) *ReverseProxy {
+		proxyHandler := NewSingleHostReverseProxy(u)
+		oldDirector := proxyHandler.Director
+		proxyHandler.Director = func(r *http.Request) {
+			oldDirector(r)
+		}
+		return proxyHandler
+	})
+}
+
+func TestReverseProxyQueryParameterSmugglingDirectorParsesForm(t *testing.T) {
+	testReverseProxyQueryParameterSmuggling(t, testWantsCleanQuery, func(u *url.URL) *ReverseProxy {
+		proxyHandler := NewSingleHostReverseProxy(u)
+		oldDirector := proxyHandler.Director
+		proxyHandler.Director = func(r *http.Request) {
+			// Parsing the form causes ReverseProxy to remove unparsable
+			// query parameters before forwarding.
+			r.FormValue("a")
+			oldDirector(r)
+		}
+		return proxyHandler
+	})
+}
+
+func testReverseProxyQueryParameterSmuggling(t *testing.T, wantCleanQuery bool, newProxy func(*url.URL) *ReverseProxy) {
+	const content = "response_content"
+	backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Write([]byte(r.URL.RawQuery))
+	}))
+	defer backend.Close()
+	backendURL, err := url.Parse(backend.URL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	proxyHandler := newProxy(backendURL)
+	frontend := httptest.NewServer(proxyHandler)
+	defer frontend.Close()
+
+	// Don't spam output with logs of queries containing semicolons.
+	backend.Config.ErrorLog = log.New(io.Discard, "", 0)
+	frontend.Config.ErrorLog = log.New(io.Discard, "", 0)
+
+	for _, test := range []struct {
+		rawQuery   string
+		cleanQuery string
+	}{{
+		rawQuery:   "a=1&a=2;b=3",
+		cleanQuery: "a=1",
+	}, {
+		rawQuery:   "a=1&a=%zz&b=3",
+		cleanQuery: "a=1&b=3",
+	}} {
+		res, err := frontend.Client().Get(frontend.URL + "?" + test.rawQuery)
+		if err != nil {
+			t.Fatalf("Get: %v", err)
+		}
+		defer res.Body.Close()
+		body, _ := io.ReadAll(res.Body)
+		wantQuery := test.rawQuery
+		if wantCleanQuery {
+			wantQuery = test.cleanQuery
+		}
+		if got, want := string(body), wantQuery; got != want {
+			t.Errorf("proxy forwarded raw query %q as %q, want %q", test.rawQuery, got, want)
+		}
+	}
+}