| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package httpguts |
| |
| import ( |
| "net" |
| "strings" |
| "unicode/utf8" |
| |
| "golang.org/x/net/idna" |
| ) |
| |
| var isTokenTable = [127]bool{ |
| '!': true, |
| '#': true, |
| '$': true, |
| '%': true, |
| '&': true, |
| '\'': true, |
| '*': true, |
| '+': true, |
| '-': true, |
| '.': true, |
| '0': true, |
| '1': true, |
| '2': true, |
| '3': true, |
| '4': true, |
| '5': true, |
| '6': true, |
| '7': true, |
| '8': true, |
| '9': true, |
| 'A': true, |
| 'B': true, |
| 'C': true, |
| 'D': true, |
| 'E': true, |
| 'F': true, |
| 'G': true, |
| 'H': true, |
| 'I': true, |
| 'J': true, |
| 'K': true, |
| 'L': true, |
| 'M': true, |
| 'N': true, |
| 'O': true, |
| 'P': true, |
| 'Q': true, |
| 'R': true, |
| 'S': true, |
| 'T': true, |
| 'U': true, |
| 'W': true, |
| 'V': true, |
| 'X': true, |
| 'Y': true, |
| 'Z': true, |
| '^': true, |
| '_': true, |
| '`': true, |
| 'a': true, |
| 'b': true, |
| 'c': true, |
| 'd': true, |
| 'e': true, |
| 'f': true, |
| 'g': true, |
| 'h': true, |
| 'i': true, |
| 'j': true, |
| 'k': true, |
| 'l': true, |
| 'm': true, |
| 'n': true, |
| 'o': true, |
| 'p': true, |
| 'q': true, |
| 'r': true, |
| 's': true, |
| 't': true, |
| 'u': true, |
| 'v': true, |
| 'w': true, |
| 'x': true, |
| 'y': true, |
| 'z': true, |
| '|': true, |
| '~': true, |
| } |
| |
| func IsTokenRune(r rune) bool { |
| i := int(r) |
| return i < len(isTokenTable) && isTokenTable[i] |
| } |
| |
| func isNotToken(r rune) bool { |
| return !IsTokenRune(r) |
| } |
| |
| // HeaderValuesContainsToken reports whether any string in values |
| // contains the provided token, ASCII case-insensitively. |
| func HeaderValuesContainsToken(values []string, token string) bool { |
| for _, v := range values { |
| if headerValueContainsToken(v, token) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // isOWS reports whether b is an optional whitespace byte, as defined |
| // by RFC 7230 section 3.2.3. |
| func isOWS(b byte) bool { return b == ' ' || b == '\t' } |
| |
| // trimOWS returns x with all optional whitespace removes from the |
| // beginning and end. |
| func trimOWS(x string) string { |
| // TODO: consider using strings.Trim(x, " \t") instead, |
| // if and when it's fast enough. See issue 10292. |
| // But this ASCII-only code will probably always beat UTF-8 |
| // aware code. |
| for len(x) > 0 && isOWS(x[0]) { |
| x = x[1:] |
| } |
| for len(x) > 0 && isOWS(x[len(x)-1]) { |
| x = x[:len(x)-1] |
| } |
| return x |
| } |
| |
| // headerValueContainsToken reports whether v (assumed to be a |
| // 0#element, in the ABNF extension described in RFC 7230 section 7) |
| // contains token amongst its comma-separated tokens, ASCII |
| // case-insensitively. |
| func headerValueContainsToken(v string, token string) bool { |
| v = trimOWS(v) |
| if comma := strings.IndexByte(v, ','); comma != -1 { |
| return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) |
| } |
| return tokenEqual(v, token) |
| } |
| |
| // lowerASCII returns the ASCII lowercase version of b. |
| func lowerASCII(b byte) byte { |
| if 'A' <= b && b <= 'Z' { |
| return b + ('a' - 'A') |
| } |
| return b |
| } |
| |
| // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. |
| func tokenEqual(t1, t2 string) bool { |
| if len(t1) != len(t2) { |
| return false |
| } |
| for i, b := range t1 { |
| if b >= utf8.RuneSelf { |
| // No UTF-8 or non-ASCII allowed in tokens. |
| return false |
| } |
| if lowerASCII(byte(b)) != lowerASCII(t2[i]) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // isLWS reports whether b is linear white space, according |
| // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
| // LWS = [CRLF] 1*( SP | HT ) |
| func isLWS(b byte) bool { return b == ' ' || b == '\t' } |
| |
| // isCTL reports whether b is a control byte, according |
| // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
| // CTL = <any US-ASCII control character |
| // (octets 0 - 31) and DEL (127)> |
| func isCTL(b byte) bool { |
| const del = 0x7f // a CTL |
| return b < ' ' || b == del |
| } |
| |
| // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. |
| // HTTP/2 imposes the additional restriction that uppercase ASCII |
| // letters are not allowed. |
| // |
| // RFC 7230 says: |
| // header-field = field-name ":" OWS field-value OWS |
| // field-name = token |
| // token = 1*tchar |
| // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
| // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
| func ValidHeaderFieldName(v string) bool { |
| if len(v) == 0 { |
| return false |
| } |
| for _, r := range v { |
| if !IsTokenRune(r) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // ValidHostHeader reports whether h is a valid host header. |
| func ValidHostHeader(h string) bool { |
| // The latest spec is actually this: |
| // |
| // http://tools.ietf.org/html/rfc7230#section-5.4 |
| // Host = uri-host [ ":" port ] |
| // |
| // Where uri-host is: |
| // http://tools.ietf.org/html/rfc3986#section-3.2.2 |
| // |
| // But we're going to be much more lenient for now and just |
| // search for any byte that's not a valid byte in any of those |
| // expressions. |
| for i := 0; i < len(h); i++ { |
| if !validHostByte[h[i]] { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // See the validHostHeader comment. |
| var validHostByte = [256]bool{ |
| '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, |
| '8': true, '9': true, |
| |
| 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, |
| 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, |
| 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, |
| 'y': true, 'z': true, |
| |
| 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, |
| 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, |
| 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, |
| 'Y': true, 'Z': true, |
| |
| '!': true, // sub-delims |
| '$': true, // sub-delims |
| '%': true, // pct-encoded (and used in IPv6 zones) |
| '&': true, // sub-delims |
| '(': true, // sub-delims |
| ')': true, // sub-delims |
| '*': true, // sub-delims |
| '+': true, // sub-delims |
| ',': true, // sub-delims |
| '-': true, // unreserved |
| '.': true, // unreserved |
| ':': true, // IPv6address + Host expression's optional port |
| ';': true, // sub-delims |
| '=': true, // sub-delims |
| '[': true, |
| '\'': true, // sub-delims |
| ']': true, |
| '_': true, // unreserved |
| '~': true, // unreserved |
| } |
| |
| // ValidHeaderFieldValue reports whether v is a valid "field-value" according to |
| // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : |
| // |
| // message-header = field-name ":" [ field-value ] |
| // field-value = *( field-content | LWS ) |
| // field-content = <the OCTETs making up the field-value |
| // and consisting of either *TEXT or combinations |
| // of token, separators, and quoted-string> |
| // |
| // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : |
| // |
| // TEXT = <any OCTET except CTLs, |
| // but including LWS> |
| // LWS = [CRLF] 1*( SP | HT ) |
| // CTL = <any US-ASCII control character |
| // (octets 0 - 31) and DEL (127)> |
| // |
| // RFC 7230 says: |
| // field-value = *( field-content / obs-fold ) |
| // obj-fold = N/A to http2, and deprecated |
| // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
| // field-vchar = VCHAR / obs-text |
| // obs-text = %x80-FF |
| // VCHAR = "any visible [USASCII] character" |
| // |
| // http2 further says: "Similarly, HTTP/2 allows header field values |
| // that are not valid. While most of the values that can be encoded |
| // will not alter header field parsing, carriage return (CR, ASCII |
| // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII |
| // 0x0) might be exploited by an attacker if they are translated |
| // verbatim. Any request or response that contains a character not |
| // permitted in a header field value MUST be treated as malformed |
| // (Section 8.1.2.6). Valid characters are defined by the |
| // field-content ABNF rule in Section 3.2 of [RFC7230]." |
| // |
| // This function does not (yet?) properly handle the rejection of |
| // strings that begin or end with SP or HTAB. |
| func ValidHeaderFieldValue(v string) bool { |
| for i := 0; i < len(v); i++ { |
| b := v[i] |
| if isCTL(b) && !isLWS(b) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| func isASCII(s string) bool { |
| for i := 0; i < len(s); i++ { |
| if s[i] >= utf8.RuneSelf { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // PunycodeHostPort returns the IDNA Punycode version |
| // of the provided "host" or "host:port" string. |
| func PunycodeHostPort(v string) (string, error) { |
| if isASCII(v) { |
| return v, nil |
| } |
| |
| host, port, err := net.SplitHostPort(v) |
| if err != nil { |
| // The input 'v' argument was just a "host" argument, |
| // without a port. This error should not be returned |
| // to the caller. |
| host = v |
| port = "" |
| } |
| host, err = idna.ToASCII(host) |
| if err != nil { |
| // Non-UTF-8? Not representable in Punycode, in any |
| // case. |
| return "", err |
| } |
| if port == "" { |
| return host, nil |
| } |
| return net.JoinHostPort(host, port), nil |
| } |