http/httpproxy: support CIDR notation and ports with NO_PROXY

NO_PROXY includes support for CIDR, and notations can also
match exactly on port information if provided.
When specifying a port with IPv6, the address must be enclosed with
square brackets, [IPv6 address]:port.

Updates golang/go#16704 (fixes after vendor into std)

Change-Id: Ideb61a9ec60a6b1908f5a2c885cd6d9dd10c37cf
Reviewed-on: https://go-review.googlesource.com/115255
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/http/httpproxy/export_test.go b/http/httpproxy/export_test.go
index 36b29d2..5d30018 100644
--- a/http/httpproxy/export_test.go
+++ b/http/httpproxy/export_test.go
@@ -4,4 +4,10 @@
 
 package httpproxy
 
-var ExportUseProxy = (*Config).useProxy
+func ExportUseProxy(cfg *Config, host string) bool {
+	cfg1 := &config{
+		Config: *cfg,
+	}
+	cfg1.init()
+	return cfg1.useProxy(host)
+}
diff --git a/http/httpproxy/proxy.go b/http/httpproxy/proxy.go
index cbe1d2a..163645b 100644
--- a/http/httpproxy/proxy.go
+++ b/http/httpproxy/proxy.go
@@ -37,13 +37,18 @@
 	HTTPSProxy string
 
 	// NoProxy represents the NO_PROXY or no_proxy environment
-	// variable. It specifies URLs that should be excluded from
-	// proxying as a comma-separated list of domain names or a
-	// single asterisk (*) to indicate that no proxying should be
-	// done. A domain name matches that name and all subdomains. A
-	// domain name with a leading "." matches subdomains only. For
-	// example "foo.com" matches "foo.com" and "bar.foo.com";
-	// ".y.com" matches "x.y.com" but not "y.com".
+	// variable. It specifies a string that contains comma-separated values
+	// specifying hosts that should be excluded from proxying. Each value is
+	// represented by an IP address prefix (1.2.3.4), an IP address prefix in
+	// CIDR notation (1.2.3.4/8), a domain name, or a special DNS label (*).
+	// An IP address prefix and domain name can also include a literal port
+	// number (1.2.3.4:80).
+	// A domain name matches that name and all subdomains. A domain name with
+	// a leading "." matches subdomains only. For example "foo.com" matches
+	// "foo.com" and "bar.foo.com"; ".y.com" matches "x.y.com" but not "y.com".
+	// A single asterisk (*) indicates that no proxying should be done.
+	// A best effort is made to parse the string and errors are
+	// ignored.
 	NoProxy string
 
 	// CGI holds whether the current process is running
@@ -55,6 +60,26 @@
 	CGI bool
 }
 
+// config holds the parsed configuration for HTTP proxy settings.
+type config struct {
+	// Config represents the original configuration as defined above.
+	Config
+
+	// httpsProxy is the parsed URL of the HTTPSProxy if defined.
+	httpsProxy *url.URL
+
+	// httpProxy is the parsed URL of the HTTPProxy if defined.
+	httpProxy *url.URL
+
+	// ipMatchers represent all values in the NoProxy that are IP address
+	// prefixes or an IP address in CIDR notation.
+	ipMatchers []matcher
+
+	// domainMatchers represent all values in the NoProxy that are a domain
+	// name or hostname & domain name
+	domainMatchers []matcher
+}
+
 // FromEnvironment returns a Config instance populated from the
 // environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
 // lowercase versions thereof). HTTPS_PROXY takes precedence over
@@ -92,29 +117,40 @@
 // As a special case, if req.URL.Host is "localhost" (with or without a
 // port number), then a nil URL and nil error will be returned.
 func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
-	// Prevent Config changes from affecting the function calculation.
-	// TODO Preprocess proxy settings for more efficient evaluation.
-	cfg1 := *cfg
+	// Preprocess the Config settings for more efficient evaluation.
+	cfg1 := &config{
+		Config: *cfg,
+	}
+	cfg1.init()
 	return cfg1.proxyForURL
 }
 
-func (cfg *Config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
-	var proxy string
+func (cfg *config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
+	var proxy *url.URL
 	if reqURL.Scheme == "https" {
-		proxy = cfg.HTTPSProxy
+		proxy = cfg.httpsProxy
 	}
-	if proxy == "" {
-		proxy = cfg.HTTPProxy
-		if proxy != "" && cfg.CGI {
+	if proxy == nil {
+		proxy = cfg.httpProxy
+		if proxy != nil && cfg.CGI {
 			return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
 		}
 	}
-	if proxy == "" {
+	if proxy == nil {
 		return nil, nil
 	}
 	if !cfg.useProxy(canonicalAddr(reqURL)) {
 		return nil, nil
 	}
+
+	return proxy, nil
+}
+
+func parseProxy(proxy string) (*url.URL, error) {
+	if proxy == "" {
+		return nil, nil
+	}
+
 	proxyURL, err := url.Parse(proxy)
 	if err != nil ||
 		(proxyURL.Scheme != "http" &&
@@ -136,60 +172,107 @@
 // useProxy reports whether requests to addr should use a proxy,
 // according to the NO_PROXY or no_proxy environment variable.
 // addr is always a canonicalAddr with a host and port.
-func (cfg *Config) useProxy(addr string) bool {
+func (cfg *config) useProxy(addr string) bool {
 	if len(addr) == 0 {
 		return true
 	}
-	host, _, err := net.SplitHostPort(addr)
+	host, port, err := net.SplitHostPort(addr)
 	if err != nil {
 		return false
 	}
 	if host == "localhost" {
 		return false
 	}
-	if ip := net.ParseIP(host); ip != nil {
+	ip := net.ParseIP(host)
+	if ip != nil {
 		if ip.IsLoopback() {
 			return false
 		}
 	}
 
-	noProxy := cfg.NoProxy
-	if noProxy == "*" {
-		return false
-	}
+	addr = strings.ToLower(strings.TrimSpace(host))
 
-	addr = strings.ToLower(strings.TrimSpace(addr))
-	if hasPort(addr) {
-		addr = addr[:strings.LastIndex(addr, ":")]
+	if ip != nil {
+		for _, m := range cfg.ipMatchers {
+			if m.match(addr, port, ip) {
+				return false
+			}
+		}
 	}
-
-	for _, p := range strings.Split(noProxy, ",") {
-		p = strings.ToLower(strings.TrimSpace(p))
-		if len(p) == 0 {
-			continue
-		}
-		if hasPort(p) {
-			p = p[:strings.LastIndex(p, ":")]
-		}
-		if addr == p {
-			return false
-		}
-		if len(p) == 0 {
-			// There is no host part, likely the entry is malformed; ignore.
-			continue
-		}
-		if p[0] == '.' && (strings.HasSuffix(addr, p) || addr == p[1:]) {
-			// no_proxy ".foo.com" matches "bar.foo.com" or "foo.com"
-			return false
-		}
-		if p[0] != '.' && strings.HasSuffix(addr, p) && addr[len(addr)-len(p)-1] == '.' {
-			// no_proxy "foo.com" matches "bar.foo.com"
+	for _, m := range cfg.domainMatchers {
+		if m.match(addr, port, ip) {
 			return false
 		}
 	}
 	return true
 }
 
+func (c *config) init() {
+	if parsed, err := parseProxy(c.HTTPProxy); err == nil {
+		c.httpProxy = parsed
+	}
+	if parsed, err := parseProxy(c.HTTPSProxy); err == nil {
+		c.httpsProxy = parsed
+	}
+
+	for _, p := range strings.Split(c.NoProxy, ",") {
+		p = strings.ToLower(strings.TrimSpace(p))
+		if len(p) == 0 {
+			continue
+		}
+
+		if p == "*" {
+			c.ipMatchers = []matcher{allMatch{}}
+			c.domainMatchers = []matcher{allMatch{}}
+			return
+		}
+
+		// IPv4/CIDR, IPv6/CIDR
+		if _, pnet, err := net.ParseCIDR(p); err == nil {
+			c.ipMatchers = append(c.ipMatchers, cidrMatch{cidr: pnet})
+			continue
+		}
+
+		// IPv4:port, [IPv6]:port
+		phost, pport, err := net.SplitHostPort(p)
+		if err == nil {
+			if len(phost) == 0 {
+				// There is no host part, likely the entry is malformed; ignore.
+				continue
+			}
+			if phost[0] == '[' && phost[len(phost)-1] == ']' {
+				phost = phost[1 : len(phost)-1]
+			}
+		} else {
+			phost = p
+		}
+		// IPv4, IPv6
+		if pip := net.ParseIP(phost); pip != nil {
+			c.ipMatchers = append(c.ipMatchers, ipMatch{ip: pip, port: pport})
+			continue
+		}
+
+		if len(phost) == 0 {
+			// There is no host part, likely the entry is malformed; ignore.
+			continue
+		}
+
+		// domain.com or domain.com:80
+		// foo.com matches bar.foo.com
+		// .domain.com or .domain.com:port
+		// *.domain.com or *.domain.com:port
+		if strings.HasPrefix(phost, "*.") {
+			phost = phost[1:]
+		}
+		matchHost := false
+		if phost[0] != '.' {
+			matchHost = true
+			phost = "." + phost
+		}
+		c.domainMatchers = append(c.domainMatchers, domainMatch{host: phost, port: pport, matchHost: matchHost})
+	}
+}
+
 var portMap = map[string]string{
 	"http":   "80",
 	"https":  "443",
@@ -237,3 +320,51 @@
 	}
 	return true
 }
+
+// matcher represents the matching rule for a given value in the NO_PROXY list
+type matcher interface {
+	// match returns true if the host and optional port or ip and optional port
+	// are allowed
+	match(host, port string, ip net.IP) bool
+}
+
+// allMatch matches on all possible inputs
+type allMatch struct{}
+
+func (a allMatch) match(host, port string, ip net.IP) bool {
+	return true
+}
+
+type cidrMatch struct {
+	cidr *net.IPNet
+}
+
+func (m cidrMatch) match(host, port string, ip net.IP) bool {
+	return m.cidr.Contains(ip)
+}
+
+type ipMatch struct {
+	ip   net.IP
+	port string
+}
+
+func (m ipMatch) match(host, port string, ip net.IP) bool {
+	if m.ip.Equal(ip) {
+		return m.port == "" || m.port == port
+	}
+	return false
+}
+
+type domainMatch struct {
+	host string
+	port string
+
+	matchHost bool
+}
+
+func (m domainMatch) match(host, port string, ip net.IP) bool {
+	if strings.HasSuffix(host, m.host) || (m.matchHost && host == m.host[1:]) {
+		return m.port == "" || m.port == port
+	}
+	return false
+}
diff --git a/http/httpproxy/proxy_test.go b/http/httpproxy/proxy_test.go
index e4af199..9951246 100644
--- a/http/httpproxy/proxy_test.go
+++ b/http/httpproxy/proxy_test.go
@@ -144,7 +144,7 @@
 		HTTPProxy: "proxy",
 	},
 	req:  "http://example.com/",
-	want: "<nil>",
+	want: "http://proxy",
 }, {
 	cfg: httpproxy.Config{
 		NoProxy:   "ample.com",
@@ -269,19 +269,36 @@
 	{"[::1]", false},
 	{"[::2]", true}, // not a loopback address
 
-	{"barbaz.net", false},     // match as .barbaz.net
-	{"foobar.com", false},     // have a port but match
-	{"foofoobar.com", true},   // not match as a part of foobar.com
-	{"baz.com", true},         // not match as a part of barbaz.com
-	{"localhost.net", true},   // not match as suffix of address
-	{"local.localhost", true}, // not match as prefix as address
-	{"barbarbaz.net", true},   // not match because NO_PROXY have a '.'
-	{"www.foobar.com", false}, // match because NO_PROXY includes "foobar.com"
+	{"192.168.1.1", false},                // matches exact IPv4
+	{"192.168.1.2", true},                 // ports do not match
+	{"192.168.1.3", false},                // matches exact IPv4:port
+	{"192.168.1.4", true},                 // no match
+	{"10.0.0.2", false},                   // matches IPv4/CIDR
+	{"[2001:db8::52:0:1]", false},         // matches exact IPv6
+	{"[2001:db8::52:0:2]", true},          // no match
+	{"[2001:db8::52:0:3]", false},         // matches exact [IPv6]:port
+	{"[2002:db8:a::123]", false},          // matches IPv6/CIDR
+	{"[fe80::424b:c8be:1643:a1b6]", true}, // no match
+
+	{"barbaz.net", true},          // does not match as .barbaz.net
+	{"www.barbaz.net", false},     // does match as .barbaz.net
+	{"foobar.com", false},         // does match as foobar.com
+	{"www.foobar.com", false},     // match because NO_PROXY includes "foobar.com"
+	{"foofoobar.com", true},       // not match as a part of foobar.com
+	{"baz.com", true},             // not match as a part of barbaz.com
+	{"localhost.net", true},       // not match as suffix of address
+	{"local.localhost", true},     // not match as prefix as address
+	{"barbarbaz.net", true},       // not match, wrong domain
+	{"wildcard.io", true},         // does not match as *.wildcard.io
+	{"nested.wildcard.io", false}, // match as *.wildcard.io
+	{"awildcard.io", true},        // not a match because of '*'
 }
 
+var noProxy = "foobar.com, .barbaz.net, *.wildcard.io, 192.168.1.1, 192.168.1.2:81, 192.168.1.3:80, 10.0.0.0/30, 2001:db8::52:0:1, [2001:db8::52:0:2]:443, [2001:db8::52:0:3]:80, 2002:db8:a::45/64"
+
 func TestUseProxy(t *testing.T) {
 	cfg := &httpproxy.Config{
-		NoProxy: "foobar.com, .barbaz.net",
+		NoProxy: noProxy,
 	}
 	for _, test := range UseProxyTests {
 		if httpproxy.ExportUseProxy(cfg, test.host+":80") != test.match {
@@ -299,3 +316,36 @@
 		t.Errorf("useProxy unexpected return; got false; want true")
 	}
 }
+
+func TestAllNoProxy(t *testing.T) {
+	cfg := &httpproxy.Config{
+		NoProxy: "*",
+	}
+	for _, test := range UseProxyTests {
+		if httpproxy.ExportUseProxy(cfg, test.host+":80") != false {
+			t.Errorf("useProxy(%v) = true, want false", test.host)
+		}
+	}
+}
+
+func BenchmarkProxyForURL(b *testing.B) {
+	cfg := &httpproxy.Config{
+		HTTPProxy:  "http://proxy.example.org",
+		HTTPSProxy: "https://proxy.example.org",
+		NoProxy:    noProxy,
+	}
+	for _, test := range UseProxyTests {
+		u, err := url.Parse("https://" + test.host + ":80")
+		if err != nil {
+			b.Fatalf("parsed failed: %s", test.host)
+		}
+		proxyFunc := cfg.ProxyFunc()
+		b.Run(test.host, func(b *testing.B) {
+			for n := 0; n < b.N; n++ {
+				if au, e := proxyFunc(u); e != nil && test.match == (au != nil) {
+					b.Errorf("useProxy(%v) = %v, want %v", test.host, !test.match, test.match)
+				}
+			}
+		})
+	}
+}