net/url, net/http: reject control characters in URLs

This is a more conservative version of the reverted CL 99135 (which
was reverted in CL 137716)

The net/url part rejects URLs with ASCII CTLs from being parsed and
the net/http part rejects writing them if a bogus url.URL is
constructed otherwise.

Updates #27302
Updates #22907

Change-Id: I09a2212eb74c63db575223277aec363c55421ed8
Reviewed-on: https://go-review.googlesource.com/c/159157
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
diff --git a/src/net/http/fs_test.go b/src/net/http/fs_test.go
index 255d215..762e88b 100644
--- a/src/net/http/fs_test.go
+++ b/src/net/http/fs_test.go
@@ -583,16 +583,23 @@
 	ts := httptest.NewServer(FileServer(Dir(".")))
 	defer ts.Close()
 
-	res, err := Get(ts.URL + "/..\x00")
+	c, err := net.Dial("tcp", ts.Listener.Addr().String())
 	if err != nil {
 		t.Fatal(err)
 	}
-	b, err := ioutil.ReadAll(res.Body)
+	defer c.Close()
+	_, err = fmt.Fprintf(c, "GET /..\x00 HTTP/1.0\r\n\r\n")
 	if err != nil {
-		t.Fatal("reading Body:", err)
+		t.Fatal(err)
+	}
+	var got bytes.Buffer
+	bufr := bufio.NewReader(io.TeeReader(c, &got))
+	res, err := ReadResponse(bufr, nil)
+	if err != nil {
+		t.Fatal("ReadResponse: ", err)
 	}
 	if res.StatusCode == 200 {
-		t.Errorf("got status 200; want an error. Body is:\n%s", string(b))
+		t.Errorf("got status 200; want an error. Body is:\n%s", got.Bytes())
 	}
 }
 
diff --git a/src/net/http/http.go b/src/net/http/http.go
index 624b2cf..5c03c16 100644
--- a/src/net/http/http.go
+++ b/src/net/http/http.go
@@ -59,6 +59,12 @@
 	return true
 }
 
+// isCTL reports whether r is an ASCII control character, including
+// the Extended ASCII control characters included in Unicode.
+func isCTL(r rune) bool {
+	return r < ' ' || 0x7f <= r && r <= 0x9f
+}
+
 func hexEscapeNonASCII(s string) string {
 	newLen := 0
 	for i := 0; i < len(s); i++ {
diff --git a/src/net/http/request.go b/src/net/http/request.go
index fb058f9..01ba1dc 100644
--- a/src/net/http/request.go
+++ b/src/net/http/request.go
@@ -550,7 +550,12 @@
 			ruri = r.URL.Opaque
 		}
 	}
-	// TODO(bradfitz): escape at least newlines in ruri?
+	if strings.IndexFunc(ruri, isCTL) != -1 {
+		return errors.New("net/http: can't write control character in Request.URL")
+	}
+	// TODO: validate r.Method too? At least it's less likely to
+	// come from an attacker (more likely to be a constant in
+	// code).
 
 	// Wrap the writer in a bufio Writer if it's not already buffered.
 	// Don't always call NewWriter, as that forces a bytes.Buffer
diff --git a/src/net/http/requestwrite_test.go b/src/net/http/requestwrite_test.go
index 7dbf0d4..b110b57 100644
--- a/src/net/http/requestwrite_test.go
+++ b/src/net/http/requestwrite_test.go
@@ -576,6 +576,17 @@
 			"User-Agent: Go-http-client/1.1\r\n" +
 			"X-Foo: X-Bar\r\n\r\n",
 	},
+
+	25: {
+		Req: Request{
+			Method: "GET",
+			URL: &url.URL{
+				Host:     "www.example.com",
+				RawQuery: "new\nline", // or any CTL
+			},
+		},
+		WantError: errors.New("net/http: can't write control character in Request.URL"),
+	},
 }
 
 func TestRequestWrite(t *testing.T) {
diff --git a/src/net/url/url.go b/src/net/url/url.go
index d84c95a..77078ad 100644
--- a/src/net/url/url.go
+++ b/src/net/url/url.go
@@ -513,6 +513,10 @@
 	var rest string
 	var err error
 
+	if strings.IndexFunc(rawurl, isCTL) != -1 {
+		return nil, errors.New("net/url: invalid control character in URL")
+	}
+
 	if rawurl == "" && viaRequest {
 		return nil, errors.New("empty url")
 	}
@@ -1134,3 +1138,9 @@
 	}
 	return true
 }
+
+// isCTL reports whether r is an ASCII control character, including
+// the Extended ASCII control characters included in Unicode.
+func isCTL(r rune) bool {
+	return r < ' ' || 0x7f <= r && r <= 0x9f
+}
diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go
index 7c4ada2..43d77f0 100644
--- a/src/net/url/url_test.go
+++ b/src/net/url/url_test.go
@@ -1738,12 +1738,27 @@
 }
 
 func TestInvalidUserPassword(t *testing.T) {
-	_, err := Parse("http://us\ner:pass\nword@foo.com/")
+	_, err := Parse("http://user^:passwo^rd@foo.com/")
 	if got, wantsub := fmt.Sprint(err), "net/url: invalid userinfo"; !strings.Contains(got, wantsub) {
 		t.Errorf("error = %q; want substring %q", got, wantsub)
 	}
 }
 
+func TestRejectControlCharacters(t *testing.T) {
+	tests := []string{
+		"http://foo.com/?foo\nbar",
+		"http\r://foo.com/",
+		"http://foo\x7f.com/",
+	}
+	for _, s := range tests {
+		_, err := Parse(s)
+		const wantSub = "net/url: invalid control character in URL"
+		if got := fmt.Sprint(err); !strings.Contains(got, wantSub) {
+			t.Errorf("Parse(%q) error = %q; want substring %q", s, got, wantSub)
+		}
+	}
+}
+
 var escapeBenchmarks = []struct {
 	unescaped string
 	query     string