http/httpguts/httplex.go - net - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package httpguts

 import (
 	"net"
 	"strings"
 	"unicode/utf8"

 	"golang.org/x/net/idna"
 )

 var isTokenTable = [127]bool{
 	'!':  true,
 	'#':  true,
 	'$':  true,
 	'%':  true,
 	'&':  true,
 	'\'': true,
 	'*':  true,
 	'+':  true,
 	'-':  true,
 	'.':  true,
 	'0':  true,
 	'1':  true,
 	'2':  true,
 	'3':  true,
 	'4':  true,
 	'5':  true,
 	'6':  true,
 	'7':  true,
 	'8':  true,
 	'9':  true,
 	'A':  true,
 	'B':  true,
 	'C':  true,
 	'D':  true,
 	'E':  true,
 	'F':  true,
 	'G':  true,
 	'H':  true,
 	'I':  true,
 	'J':  true,
 	'K':  true,
 	'L':  true,
 	'M':  true,
 	'N':  true,
 	'O':  true,
 	'P':  true,
 	'Q':  true,
 	'R':  true,
 	'S':  true,
 	'T':  true,
 	'U':  true,
 	'W':  true,
 	'V':  true,
 	'X':  true,
 	'Y':  true,
 	'Z':  true,
 	'^':  true,
 	'_':  true,
 	'`':  true,
 	'a':  true,
 	'b':  true,
 	'c':  true,
 	'd':  true,
 	'e':  true,
 	'f':  true,
 	'g':  true,
 	'h':  true,
 	'i':  true,
 	'j':  true,
 	'k':  true,
 	'l':  true,
 	'm':  true,
 	'n':  true,
 	'o':  true,
 	'p':  true,
 	'q':  true,
 	'r':  true,
 	's':  true,
 	't':  true,
 	'u':  true,
 	'v':  true,
 	'w':  true,
 	'x':  true,
 	'y':  true,
 	'z':  true,
 	'|':  true,
 	'~':  true,
 }

 func IsTokenRune(r rune) bool {
 	i := int(r)
 	return i < len(isTokenTable) && isTokenTable[i]
 }

 func isNotToken(r rune) bool {
 	return !IsTokenRune(r)
 }

 // HeaderValuesContainsToken reports whether any string in values
 // contains the provided token, ASCII case-insensitively.
 func HeaderValuesContainsToken(values []string, token string) bool {
 	for _, v := range values {
 		if headerValueContainsToken(v, token) {
 			return true
 		}
 	}
 	return false
 }

 // isOWS reports whether b is an optional whitespace byte, as defined
 // by RFC 7230 section 3.2.3.
 func isOWS(b byte) bool { return b == ' ' || b == '\t' }

 // trimOWS returns x with all optional whitespace removes from the
 // beginning and end.
 func trimOWS(x string) string {
 	// TODO: consider using strings.Trim(x, " \t") instead,
 	// if and when it's fast enough. See issue 10292.
 	// But this ASCII-only code will probably always beat UTF-8
 	// aware code.
 	for len(x) > 0 && isOWS(x[0]) {
 		x = x[1:]
 	}
 	for len(x) > 0 && isOWS(x[len(x)-1]) {
 		x = x[:len(x)-1]
 	}
 	return x
 }

 // headerValueContainsToken reports whether v (assumed to be a
 // 0#element, in the ABNF extension described in RFC 7230 section 7)
 // contains token amongst its comma-separated tokens, ASCII
 // case-insensitively.
 func headerValueContainsToken(v string, token string) bool {
 	v = trimOWS(v)
 	if comma := strings.IndexByte(v, ','); comma != -1 {
 		return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
 	}
 	return tokenEqual(v, token)
 }

 // lowerASCII returns the ASCII lowercase version of b.
 func lowerASCII(b byte) byte {
 	if 'A' <= b && b <= 'Z' {
 		return b + ('a' - 'A')
 	}
 	return b
 }

 // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
 func tokenEqual(t1, t2 string) bool {
 	if len(t1) != len(t2) {
 		return false
 	}
 	for i, b := range t1 {
 		if b >= utf8.RuneSelf {
 			// No UTF-8 or non-ASCII allowed in tokens.
 			return false
 		}
 		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
 			return false
 		}
 	}
 	return true
 }

 // isLWS reports whether b is linear white space, according
 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
 //      LWS            = [CRLF] 1*( SP | HT )
 func isLWS(b byte) bool { return b == ' ' || b == '\t' }

 // isCTL reports whether b is a control byte, according
 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
 //      CTL            = <any US-ASCII control character
 //                       (octets 0 - 31) and DEL (127)>
 func isCTL(b byte) bool {
 	const del = 0x7f // a CTL
 	return b < ' ' || b == del
 }

 // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
 // HTTP/2 imposes the additional restriction that uppercase ASCII
 // letters are not allowed.
 //
 //  RFC 7230 says:
 //   header-field   = field-name ":" OWS field-value OWS
 //   field-name     = token
 //   token          = 1*tchar
 //   tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
 //           "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
 func ValidHeaderFieldName(v string) bool {
 	if len(v) == 0 {
 		return false
 	}
 	for _, r := range v {
 		if !IsTokenRune(r) {
 			return false
 		}
 	}
 	return true
 }

 // ValidHostHeader reports whether h is a valid host header.
 func ValidHostHeader(h string) bool {
 	// The latest spec is actually this:
 	//
 	// http://tools.ietf.org/html/rfc7230#section-5.4
 	//     Host = uri-host [ ":" port ]
 	//
 	// Where uri-host is:
 	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
 	//
 	// But we're going to be much more lenient for now and just
 	// search for any byte that's not a valid byte in any of those
 	// expressions.
 	for i := 0; i < len(h); i++ {
 		if !validHostByte[h[i]] {
 			return false
 		}
 	}
 	return true
 }

 // See the validHostHeader comment.
 var validHostByte = [256]bool{
 	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
 	'8': true, '9': true,

 	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
 	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
 	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
 	'y': true, 'z': true,

 	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
 	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
 	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
 	'Y': true, 'Z': true,

 	'!':  true, // sub-delims
 	'$':  true, // sub-delims
 	'%':  true, // pct-encoded (and used in IPv6 zones)
 	'&':  true, // sub-delims
 	'(':  true, // sub-delims
 	')':  true, // sub-delims
 	'*':  true, // sub-delims
 	'+':  true, // sub-delims
 	',':  true, // sub-delims
 	'-':  true, // unreserved
 	'.':  true, // unreserved
 	':':  true, // IPv6address + Host expression's optional port
 	';':  true, // sub-delims
 	'=':  true, // sub-delims
 	'[':  true,
 	'\'': true, // sub-delims
 	']':  true,
 	'_':  true, // unreserved
 	'~':  true, // unreserved
 }

 // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
 //
 //        message-header = field-name ":" [ field-value ]
 //        field-value    = *( field-content | LWS )
 //        field-content  = <the OCTETs making up the field-value
 //                         and consisting of either *TEXT or combinations
 //                         of token, separators, and quoted-string>
 //
 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
 //
 //        TEXT           = <any OCTET except CTLs,
 //                          but including LWS>
 //        LWS            = [CRLF] 1*( SP | HT )
 //        CTL            = <any US-ASCII control character
 //                         (octets 0 - 31) and DEL (127)>
 //
 // RFC 7230 says:
 //  field-value    = *( field-content / obs-fold )
 //  obj-fold       =  N/A to http2, and deprecated
 //  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
 //  field-vchar    = VCHAR / obs-text
 //  obs-text       = %x80-FF
 //  VCHAR          = "any visible [USASCII] character"
 //
 // http2 further says: "Similarly, HTTP/2 allows header field values
 // that are not valid. While most of the values that can be encoded
 // will not alter header field parsing, carriage return (CR, ASCII
 // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
 // 0x0) might be exploited by an attacker if they are translated
 // verbatim. Any request or response that contains a character not
 // permitted in a header field value MUST be treated as malformed
 // (Section 8.1.2.6). Valid characters are defined by the
 // field-content ABNF rule in Section 3.2 of [RFC7230]."
 //
 // This function does not (yet?) properly handle the rejection of
 // strings that begin or end with SP or HTAB.
 func ValidHeaderFieldValue(v string) bool {
 	for i := 0; i < len(v); i++ {
 		b := v[i]
 		if isCTL(b) && !isLWS(b) {
 			return false
 		}
 	}
 	return true
 }

 func isASCII(s string) bool {
 	for i := 0; i < len(s); i++ {
 		if s[i] >= utf8.RuneSelf {
 			return false
 		}
 	}
 	return true
 }

 // PunycodeHostPort returns the IDNA Punycode version
 // of the provided "host" or "host:port" string.
 func PunycodeHostPort(v string) (string, error) {
 	if isASCII(v) {
 		return v, nil
 	}

 	host, port, err := net.SplitHostPort(v)
 	if err != nil {
 		// The input 'v' argument was just a "host" argument,
 		// without a port. This error should not be returned
 		// to the caller.
 		host = v
 		port = ""
 	}
 	host, err = idna.ToASCII(host)
 	if err != nil {
 		// Non-UTF-8? Not representable in Punycode, in any
 		// case.
 		return "", err
 	}
 	if port == "" {
 		return host, nil
 	}
 	return net.JoinHostPort(host, port), nil
 }
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package httpguts

	import (
	"net"
	"strings"
	"unicode/utf8"

	"golang.org/x/net/idna"
	)

	var isTokenTable = [127]bool{
	'!': true,
	'#': true,
	'$': true,
	'%': true,
	'&': true,
	'\'': true,
	'*': true,
	'+': true,
	'-': true,
	'.': true,
	'0': true,
	'1': true,
	'2': true,
	'3': true,
	'4': true,
	'5': true,
	'6': true,
	'7': true,
	'8': true,
	'9': true,
	'A': true,
	'B': true,
	'C': true,
	'D': true,
	'E': true,
	'F': true,
	'G': true,
	'H': true,
	'I': true,
	'J': true,
	'K': true,
	'L': true,
	'M': true,
	'N': true,
	'O': true,
	'P': true,
	'Q': true,
	'R': true,
	'S': true,
	'T': true,
	'U': true,
	'W': true,
	'V': true,
	'X': true,
	'Y': true,
	'Z': true,
	'^': true,
	'_': true,
	'`': true,
	'a': true,
	'b': true,
	'c': true,
	'd': true,
	'e': true,
	'f': true,
	'g': true,
	'h': true,
	'i': true,
	'j': true,
	'k': true,
	'l': true,
	'm': true,
	'n': true,
	'o': true,
	'p': true,
	'q': true,
	'r': true,
	's': true,
	't': true,
	'u': true,
	'v': true,
	'w': true,
	'x': true,
	'y': true,
	'z': true,
	'\|': true,
	'~': true,
	}

	func IsTokenRune(r rune) bool {
	i := int(r)
	return i < len(isTokenTable) && isTokenTable[i]
	}

	func isNotToken(r rune) bool {
	return !IsTokenRune(r)
	}

	// HeaderValuesContainsToken reports whether any string in values
	// contains the provided token, ASCII case-insensitively.
	func HeaderValuesContainsToken(values []string, token string) bool {
	for _, v := range values {
	if headerValueContainsToken(v, token) {
	return true
	}
	}
	return false
	}

	// isOWS reports whether b is an optional whitespace byte, as defined
	// by RFC 7230 section 3.2.3.
	func isOWS(b byte) bool { return b == ' ' \|\| b == '\t' }

	// trimOWS returns x with all optional whitespace removes from the
	// beginning and end.
	func trimOWS(x string) string {
	// TODO: consider using strings.Trim(x, " \t") instead,
	// if and when it's fast enough. See issue 10292.
	// But this ASCII-only code will probably always beat UTF-8
	// aware code.
	for len(x) > 0 && isOWS(x[0]) {
	x = x[1:]
	}
	for len(x) > 0 && isOWS(x[len(x)-1]) {
	x = x[:len(x)-1]
	}
	return x
	}

	// headerValueContainsToken reports whether v (assumed to be a
	// 0#element, in the ABNF extension described in RFC 7230 section 7)
	// contains token amongst its comma-separated tokens, ASCII
	// case-insensitively.
	func headerValueContainsToken(v string, token string) bool {
	v = trimOWS(v)
	if comma := strings.IndexByte(v, ','); comma != -1 {
	return tokenEqual(trimOWS(v[:comma]), token) \|\| headerValueContainsToken(v[comma+1:], token)
	}
	return tokenEqual(v, token)
	}

	// lowerASCII returns the ASCII lowercase version of b.
	func lowerASCII(b byte) byte {
	if 'A' <= b && b <= 'Z' {
	return b + ('a' - 'A')
	}
	return b
	}

	// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
	func tokenEqual(t1, t2 string) bool {
	if len(t1) != len(t2) {
	return false
	}
	for i, b := range t1 {
	if b >= utf8.RuneSelf {
	// No UTF-8 or non-ASCII allowed in tokens.
	return false
	}
	if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
	return false
	}
	}
	return true
	}

	// isLWS reports whether b is linear white space, according
	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
	// LWS = [CRLF] 1*( SP \| HT )
	func isLWS(b byte) bool { return b == ' ' \|\| b == '\t' }

	// isCTL reports whether b is a control byte, according
	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
	// CTL = <any US-ASCII control character
	// (octets 0 - 31) and DEL (127)>
	func isCTL(b byte) bool {
	const del = 0x7f // a CTL
	return b < ' ' \|\| b == del
	}

	// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
	// HTTP/2 imposes the additional restriction that uppercase ASCII
	// letters are not allowed.
	//
	// RFC 7230 says:
	// header-field = field-name ":" OWS field-value OWS
	// field-name = token
	// token = 1*tchar
	// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
	// "^" / "_" / "`" / "\|" / "~" / DIGIT / ALPHA
	func ValidHeaderFieldName(v string) bool {
	if len(v) == 0 {
	return false
	}
	for _, r := range v {
	if !IsTokenRune(r) {
	return false
	}
	}
	return true
	}

	// ValidHostHeader reports whether h is a valid host header.
	func ValidHostHeader(h string) bool {
	// The latest spec is actually this:
	//
	// http://tools.ietf.org/html/rfc7230#section-5.4
	// Host = uri-host [ ":" port ]
	//
	// Where uri-host is:
	// http://tools.ietf.org/html/rfc3986#section-3.2.2
	//
	// But we're going to be much more lenient for now and just
	// search for any byte that's not a valid byte in any of those
	// expressions.
	for i := 0; i < len(h); i++ {
	if !validHostByte[h[i]] {
	return false
	}
	}
	return true
	}

	// See the validHostHeader comment.
	var validHostByte = [256]bool{
	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
	'8': true, '9': true,

	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
	'y': true, 'z': true,

	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
	'Y': true, 'Z': true,

	'!': true, // sub-delims
	'$': true, // sub-delims
	'%': true, // pct-encoded (and used in IPv6 zones)
	'&': true, // sub-delims
	'(': true, // sub-delims
	')': true, // sub-delims
	'*': true, // sub-delims
	'+': true, // sub-delims
	',': true, // sub-delims
	'-': true, // unreserved
	'.': true, // unreserved
	':': true, // IPv6address + Host expression's optional port
	';': true, // sub-delims
	'=': true, // sub-delims
	'[': true,
	'\'': true, // sub-delims
	']': true,
	'_': true, // unreserved
	'~': true, // unreserved
	}

	// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
	//
	// message-header = field-name ":" [ field-value ]
	// field-value = *( field-content \| LWS )
	// field-content = <the OCTETs making up the field-value
	// and consisting of either *TEXT or combinations
	// of token, separators, and quoted-string>
	//
	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
	//
	// TEXT = <any OCTET except CTLs,
	// but including LWS>
	// LWS = [CRLF] 1*( SP \| HT )
	// CTL = <any US-ASCII control character
	// (octets 0 - 31) and DEL (127)>
	//
	// RFC 7230 says:
	// field-value = *( field-content / obs-fold )
	// obj-fold = N/A to http2, and deprecated
	// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
	// field-vchar = VCHAR / obs-text
	// obs-text = %x80-FF
	// VCHAR = "any visible [USASCII] character"
	//
	// http2 further says: "Similarly, HTTP/2 allows header field values
	// that are not valid. While most of the values that can be encoded
	// will not alter header field parsing, carriage return (CR, ASCII
	// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
	// 0x0) might be exploited by an attacker if they are translated
	// verbatim. Any request or response that contains a character not
	// permitted in a header field value MUST be treated as malformed
	// (Section 8.1.2.6). Valid characters are defined by the
	// field-content ABNF rule in Section 3.2 of [RFC7230]."
	//
	// This function does not (yet?) properly handle the rejection of
	// strings that begin or end with SP or HTAB.
	func ValidHeaderFieldValue(v string) bool {
	for i := 0; i < len(v); i++ {
	b := v[i]
	if isCTL(b) && !isLWS(b) {
	return false
	}
	}
	return true
	}

	func isASCII(s string) bool {
	for i := 0; i < len(s); i++ {
	if s[i] >= utf8.RuneSelf {
	return false
	}
	}
	return true
	}

	// PunycodeHostPort returns the IDNA Punycode version
	// of the provided "host" or "host:port" string.
	func PunycodeHostPort(v string) (string, error) {
	if isASCII(v) {
	return v, nil
	}

	host, port, err := net.SplitHostPort(v)
	if err != nil {
	// The input 'v' argument was just a "host" argument,
	// without a port. This error should not be returned
	// to the caller.
	host = v
	port = ""
	}
	host, err = idna.ToASCII(host)
	if err != nil {
	// Non-UTF-8? Not representable in Punycode, in any
	// case.
	return "", err
	}
	if port == "" {
	return host, nil
	}
	return net.JoinHostPort(host, port), nil
	}