net/url: reject space in host; do not escape < > " in host

Host names in URLs must not use %-escaping for ASCII bytes, per RFC 3986.

url.Parse has historically allowed spaces and < > " in the URL host.
In Go 1.5, URL's String method started escaping those,
but then Parse would rejects the escaped form.
This CL is an attempt at some consistency between Parse and String
as far as the accepted host characters and the encoding of host characters,
so that if Parse succeeds, then Parse -> String -> Parse also succeeds.

Allowing space seems like a mistake, so reject that in Parse.
(Similarly, reject \t, \x01, and so on, all of which were being allowed.)

Allowing < > " doesn't seem awful, so continue to do that,
and go back to the Go 1.4 behavior of not escaping them in String.

Fixes #11302.

Change-Id: I0bf65b874cd936598f20694574364352a5abbe5f
Reviewed-on: https://go-review.googlesource.com/17387
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/net/http/request_test.go b/src/net/http/request_test.go
index ddbf841..0ecdf85 100644
--- a/src/net/http/request_test.go
+++ b/src/net/http/request_test.go
@@ -539,10 +539,12 @@
 
 func TestRequestBadHost(t *testing.T) {
 	got := []string{}
-	req, err := NewRequest("GET", "http://foo.com with spaces/after", nil)
+	req, err := NewRequest("GET", "http://foo/after", nil)
 	if err != nil {
 		t.Fatal(err)
 	}
+	req.Host = "foo.com with spaces"
+	req.URL.Host = "foo.com with spaces"
 	req.Write(logWrites{t, &got})
 	want := []string{
 		"GET /after HTTP/1.1\r\n",
diff --git a/src/net/url/url.go b/src/net/url/url.go
index 510ac77..3ea7563 100644
--- a/src/net/url/url.go
+++ b/src/net/url/url.go
@@ -83,6 +83,12 @@
 	return "invalid URL escape " + strconv.Quote(string(e))
 }
 
+type InvalidHostError string
+
+func (e InvalidHostError) Error() string {
+	return "invalid character " + strconv.Quote(string(e)) + " in host name"
+}
+
 // Return true if the specified character should be escaped when
 // appearing in a URL string, according to RFC 3986.
 //
@@ -99,9 +105,13 @@
 		//	sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 		// as part of reg-name.
 		// We add : because we include :port as part of host.
-		// We add [ ] because we include [ipv6]:port as part of host
+		// We add [ ] because we include [ipv6]:port as part of host.
+		// We add < > because they're the only characters left that
+		// we could possibly allow, and Parse will reject them if we
+		// escape them (because hosts can't use %-encoding for
+		// ASCII bytes).
 		switch c {
-		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']':
+		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
 			return false
 		}
 	}
@@ -193,6 +203,9 @@
 			hasPlus = mode == encodeQueryComponent
 			i++
 		default:
+			if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
+				return "", InvalidHostError(s[i : i+1])
+			}
 			i++
 		}
 	}
diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go
index b1c3ceb..643905d 100644
--- a/src/net/url/url_test.go
+++ b/src/net/url/url_test.go
@@ -521,6 +521,16 @@
 		},
 		"",
 	},
+	// test that we can reparse the host names we accept.
+	{
+		"myscheme://authority<\"hi\">/foo",
+		&URL{
+			Scheme: "myscheme",
+			Host:   "authority<\"hi\">",
+			Path:   "/foo",
+		},
+		"",
+	},
 }
 
 // more useful string for debugging than fmt's struct printer
@@ -1239,6 +1249,7 @@
 		{"mysql://x@y(1.2.3.4:123)/foo", false},
 		{"mysql://x@y([2001:db8::1]:123)/foo", false},
 		{"http://[]%20%48%54%54%50%2f%31%2e%31%0a%4d%79%48%65%61%64%65%72%3a%20%31%32%33%0a%0a/", true}, // golang.org/issue/11208
+		{"http://a b.com/", true},                                                                       // no space in host name please
 	}
 	for _, tt := range tests {
 		u, err := Parse(tt.in)