|  | // Copyright 2011 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package template | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "strings" | 
|  | ) | 
|  |  | 
|  | // transitionFunc is the array of context transition functions for text nodes. | 
|  | // A transition function takes a context and template text input, and returns | 
|  | // the updated context and the number of bytes consumed from the front of the | 
|  | // input. | 
|  | var transitionFunc = [...]func(context, []byte) (context, int){ | 
|  | stateText:        tText, | 
|  | stateTag:         tTag, | 
|  | stateAttrName:    tAttrName, | 
|  | stateAfterName:   tAfterName, | 
|  | stateBeforeValue: tBeforeValue, | 
|  | stateHTMLCmt:     tHTMLCmt, | 
|  | stateRCDATA:      tSpecialTagEnd, | 
|  | stateAttr:        tAttr, | 
|  | stateURL:         tURL, | 
|  | stateSrcset:      tURL, | 
|  | stateJS:          tJS, | 
|  | stateJSDqStr:     tJSDelimited, | 
|  | stateJSSqStr:     tJSDelimited, | 
|  | stateJSRegexp:    tJSDelimited, | 
|  | stateJSBlockCmt:  tBlockCmt, | 
|  | stateJSLineCmt:   tLineCmt, | 
|  | stateCSS:         tCSS, | 
|  | stateCSSDqStr:    tCSSStr, | 
|  | stateCSSSqStr:    tCSSStr, | 
|  | stateCSSDqURL:    tCSSStr, | 
|  | stateCSSSqURL:    tCSSStr, | 
|  | stateCSSURL:      tCSSStr, | 
|  | stateCSSBlockCmt: tBlockCmt, | 
|  | stateCSSLineCmt:  tLineCmt, | 
|  | stateError:       tError, | 
|  | } | 
|  |  | 
|  | var commentStart = []byte("<!--") | 
|  | var commentEnd = []byte("-->") | 
|  |  | 
|  | // tText is the context transition function for the text state. | 
|  | func tText(c context, s []byte) (context, int) { | 
|  | k := 0 | 
|  | for { | 
|  | i := k + bytes.IndexByte(s[k:], '<') | 
|  | if i < k || i+1 == len(s) { | 
|  | return c, len(s) | 
|  | } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { | 
|  | return context{state: stateHTMLCmt}, i + 4 | 
|  | } | 
|  | i++ | 
|  | end := false | 
|  | if s[i] == '/' { | 
|  | if i+1 == len(s) { | 
|  | return c, len(s) | 
|  | } | 
|  | end, i = true, i+1 | 
|  | } | 
|  | j, e := eatTagName(s, i) | 
|  | if j != i { | 
|  | if end { | 
|  | e = elementNone | 
|  | } | 
|  | // We've found an HTML tag. | 
|  | return context{state: stateTag, element: e}, j | 
|  | } | 
|  | k = j | 
|  | } | 
|  | } | 
|  |  | 
|  | var elementContentType = [...]state{ | 
|  | elementNone:     stateText, | 
|  | elementScript:   stateJS, | 
|  | elementStyle:    stateCSS, | 
|  | elementTextarea: stateRCDATA, | 
|  | elementTitle:    stateRCDATA, | 
|  | } | 
|  |  | 
|  | // tTag is the context transition function for the tag state. | 
|  | func tTag(c context, s []byte) (context, int) { | 
|  | // Find the attribute name. | 
|  | i := eatWhiteSpace(s, 0) | 
|  | if i == len(s) { | 
|  | return c, len(s) | 
|  | } | 
|  | if s[i] == '>' { | 
|  | return context{ | 
|  | state:   elementContentType[c.element], | 
|  | element: c.element, | 
|  | }, i + 1 | 
|  | } | 
|  | j, err := eatAttrName(s, i) | 
|  | if err != nil { | 
|  | return context{state: stateError, err: err}, len(s) | 
|  | } | 
|  | state, attr := stateTag, attrNone | 
|  | if i == j { | 
|  | return context{ | 
|  | state: stateError, | 
|  | err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), | 
|  | }, len(s) | 
|  | } | 
|  |  | 
|  | attrName := strings.ToLower(string(s[i:j])) | 
|  | if c.element == elementScript && attrName == "type" { | 
|  | attr = attrScriptType | 
|  | } else { | 
|  | switch attrType(attrName) { | 
|  | case contentTypeURL: | 
|  | attr = attrURL | 
|  | case contentTypeCSS: | 
|  | attr = attrStyle | 
|  | case contentTypeJS: | 
|  | attr = attrScript | 
|  | case contentTypeSrcset: | 
|  | attr = attrSrcset | 
|  | } | 
|  | } | 
|  |  | 
|  | if j == len(s) { | 
|  | state = stateAttrName | 
|  | } else { | 
|  | state = stateAfterName | 
|  | } | 
|  | return context{state: state, element: c.element, attr: attr}, j | 
|  | } | 
|  |  | 
|  | // tAttrName is the context transition function for stateAttrName. | 
|  | func tAttrName(c context, s []byte) (context, int) { | 
|  | i, err := eatAttrName(s, 0) | 
|  | if err != nil { | 
|  | return context{state: stateError, err: err}, len(s) | 
|  | } else if i != len(s) { | 
|  | c.state = stateAfterName | 
|  | } | 
|  | return c, i | 
|  | } | 
|  |  | 
|  | // tAfterName is the context transition function for stateAfterName. | 
|  | func tAfterName(c context, s []byte) (context, int) { | 
|  | // Look for the start of the value. | 
|  | i := eatWhiteSpace(s, 0) | 
|  | if i == len(s) { | 
|  | return c, len(s) | 
|  | } else if s[i] != '=' { | 
|  | // Occurs due to tag ending '>', and valueless attribute. | 
|  | c.state = stateTag | 
|  | return c, i | 
|  | } | 
|  | c.state = stateBeforeValue | 
|  | // Consume the "=". | 
|  | return c, i + 1 | 
|  | } | 
|  |  | 
|  | var attrStartStates = [...]state{ | 
|  | attrNone:       stateAttr, | 
|  | attrScript:     stateJS, | 
|  | attrScriptType: stateAttr, | 
|  | attrStyle:      stateCSS, | 
|  | attrURL:        stateURL, | 
|  | attrSrcset:     stateSrcset, | 
|  | } | 
|  |  | 
|  | // tBeforeValue is the context transition function for stateBeforeValue. | 
|  | func tBeforeValue(c context, s []byte) (context, int) { | 
|  | i := eatWhiteSpace(s, 0) | 
|  | if i == len(s) { | 
|  | return c, len(s) | 
|  | } | 
|  | // Find the attribute delimiter. | 
|  | delim := delimSpaceOrTagEnd | 
|  | switch s[i] { | 
|  | case '\'': | 
|  | delim, i = delimSingleQuote, i+1 | 
|  | case '"': | 
|  | delim, i = delimDoubleQuote, i+1 | 
|  | } | 
|  | c.state, c.delim = attrStartStates[c.attr], delim | 
|  | return c, i | 
|  | } | 
|  |  | 
|  | // tHTMLCmt is the context transition function for stateHTMLCmt. | 
|  | func tHTMLCmt(c context, s []byte) (context, int) { | 
|  | if i := bytes.Index(s, commentEnd); i != -1 { | 
|  | return context{}, i + 3 | 
|  | } | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | // specialTagEndMarkers maps element types to the character sequence that | 
|  | // case-insensitively signals the end of the special tag body. | 
|  | var specialTagEndMarkers = [...][]byte{ | 
|  | elementScript:   []byte("script"), | 
|  | elementStyle:    []byte("style"), | 
|  | elementTextarea: []byte("textarea"), | 
|  | elementTitle:    []byte("title"), | 
|  | } | 
|  |  | 
|  | var ( | 
|  | specialTagEndPrefix = []byte("</") | 
|  | tagEndSeparators    = []byte("> \t\n\f/") | 
|  | ) | 
|  |  | 
|  | // tSpecialTagEnd is the context transition function for raw text and RCDATA | 
|  | // element states. | 
|  | func tSpecialTagEnd(c context, s []byte) (context, int) { | 
|  | if c.element != elementNone { | 
|  | if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { | 
|  | return context{}, i | 
|  | } | 
|  | } | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 | 
|  | func indexTagEnd(s []byte, tag []byte) int { | 
|  | res := 0 | 
|  | plen := len(specialTagEndPrefix) | 
|  | for len(s) > 0 { | 
|  | // Try to find the tag end prefix first | 
|  | i := bytes.Index(s, specialTagEndPrefix) | 
|  | if i == -1 { | 
|  | return i | 
|  | } | 
|  | s = s[i+plen:] | 
|  | // Try to match the actual tag if there is still space for it | 
|  | if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { | 
|  | s = s[len(tag):] | 
|  | // Check the tag is followed by a proper separator | 
|  | if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { | 
|  | return res + i | 
|  | } | 
|  | res += len(tag) | 
|  | } | 
|  | res += i + plen | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // tAttr is the context transition function for the attribute state. | 
|  | func tAttr(c context, s []byte) (context, int) { | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | // tURL is the context transition function for the URL state. | 
|  | func tURL(c context, s []byte) (context, int) { | 
|  | if bytes.ContainsAny(s, "#?") { | 
|  | c.urlPart = urlPartQueryOrFrag | 
|  | } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { | 
|  | // HTML5 uses "Valid URL potentially surrounded by spaces" for | 
|  | // attrs: https://www.w3.org/TR/html5/index.html#attributes-1 | 
|  | c.urlPart = urlPartPreQuery | 
|  | } | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | // tJS is the context transition function for the JS state. | 
|  | func tJS(c context, s []byte) (context, int) { | 
|  | i := bytes.IndexAny(s, `"'/`) | 
|  | if i == -1 { | 
|  | // Entire input is non string, comment, regexp tokens. | 
|  | c.jsCtx = nextJSCtx(s, c.jsCtx) | 
|  | return c, len(s) | 
|  | } | 
|  | c.jsCtx = nextJSCtx(s[:i], c.jsCtx) | 
|  | switch s[i] { | 
|  | case '"': | 
|  | c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp | 
|  | case '\'': | 
|  | c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp | 
|  | case '/': | 
|  | switch { | 
|  | case i+1 < len(s) && s[i+1] == '/': | 
|  | c.state, i = stateJSLineCmt, i+1 | 
|  | case i+1 < len(s) && s[i+1] == '*': | 
|  | c.state, i = stateJSBlockCmt, i+1 | 
|  | case c.jsCtx == jsCtxRegexp: | 
|  | c.state = stateJSRegexp | 
|  | case c.jsCtx == jsCtxDivOp: | 
|  | c.jsCtx = jsCtxRegexp | 
|  | default: | 
|  | return context{ | 
|  | state: stateError, | 
|  | err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), | 
|  | }, len(s) | 
|  | } | 
|  | default: | 
|  | panic("unreachable") | 
|  | } | 
|  | return c, i + 1 | 
|  | } | 
|  |  | 
|  | // tJSDelimited is the context transition function for the JS string and regexp | 
|  | // states. | 
|  | func tJSDelimited(c context, s []byte) (context, int) { | 
|  | specials := `\"` | 
|  | switch c.state { | 
|  | case stateJSSqStr: | 
|  | specials = `\'` | 
|  | case stateJSRegexp: | 
|  | specials = `\/[]` | 
|  | } | 
|  |  | 
|  | k, inCharset := 0, false | 
|  | for { | 
|  | i := k + bytes.IndexAny(s[k:], specials) | 
|  | if i < k { | 
|  | break | 
|  | } | 
|  | switch s[i] { | 
|  | case '\\': | 
|  | i++ | 
|  | if i == len(s) { | 
|  | return context{ | 
|  | state: stateError, | 
|  | err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), | 
|  | }, len(s) | 
|  | } | 
|  | case '[': | 
|  | inCharset = true | 
|  | case ']': | 
|  | inCharset = false | 
|  | default: | 
|  | // end delimiter | 
|  | if !inCharset { | 
|  | c.state, c.jsCtx = stateJS, jsCtxDivOp | 
|  | return c, i + 1 | 
|  | } | 
|  | } | 
|  | k = i + 1 | 
|  | } | 
|  |  | 
|  | if inCharset { | 
|  | // This can be fixed by making context richer if interpolation | 
|  | // into charsets is desired. | 
|  | return context{ | 
|  | state: stateError, | 
|  | err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), | 
|  | }, len(s) | 
|  | } | 
|  |  | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | var blockCommentEnd = []byte("*/") | 
|  |  | 
|  | // tBlockCmt is the context transition function for /*comment*/ states. | 
|  | func tBlockCmt(c context, s []byte) (context, int) { | 
|  | i := bytes.Index(s, blockCommentEnd) | 
|  | if i == -1 { | 
|  | return c, len(s) | 
|  | } | 
|  | switch c.state { | 
|  | case stateJSBlockCmt: | 
|  | c.state = stateJS | 
|  | case stateCSSBlockCmt: | 
|  | c.state = stateCSS | 
|  | default: | 
|  | panic(c.state.String()) | 
|  | } | 
|  | return c, i + 2 | 
|  | } | 
|  |  | 
|  | // tLineCmt is the context transition function for //comment states. | 
|  | func tLineCmt(c context, s []byte) (context, int) { | 
|  | var lineTerminators string | 
|  | var endState state | 
|  | switch c.state { | 
|  | case stateJSLineCmt: | 
|  | lineTerminators, endState = "\n\r\u2028\u2029", stateJS | 
|  | case stateCSSLineCmt: | 
|  | lineTerminators, endState = "\n\f\r", stateCSS | 
|  | // Line comments are not part of any published CSS standard but | 
|  | // are supported by the 4 major browsers. | 
|  | // This defines line comments as | 
|  | //     LINECOMMENT ::= "//" [^\n\f\d]* | 
|  | // since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines | 
|  | // newlines: | 
|  | //     nl ::= #xA | #xD #xA | #xD | #xC | 
|  | default: | 
|  | panic(c.state.String()) | 
|  | } | 
|  |  | 
|  | i := bytes.IndexAny(s, lineTerminators) | 
|  | if i == -1 { | 
|  | return c, len(s) | 
|  | } | 
|  | c.state = endState | 
|  | // Per section 7.4 of EcmaScript 5 : https://es5.github.com/#x7.4 | 
|  | // "However, the LineTerminator at the end of the line is not | 
|  | // considered to be part of the single-line comment; it is | 
|  | // recognized separately by the lexical grammar and becomes part | 
|  | // of the stream of input elements for the syntactic grammar." | 
|  | return c, i | 
|  | } | 
|  |  | 
|  | // tCSS is the context transition function for the CSS state. | 
|  | func tCSS(c context, s []byte) (context, int) { | 
|  | // CSS quoted strings are almost never used except for: | 
|  | // (1) URLs as in background: "/foo.png" | 
|  | // (2) Multiword font-names as in font-family: "Times New Roman" | 
|  | // (3) List separators in content values as in inline-lists: | 
|  | //    <style> | 
|  | //    ul.inlineList { list-style: none; padding:0 } | 
|  | //    ul.inlineList > li { display: inline } | 
|  | //    ul.inlineList > li:before { content: ", " } | 
|  | //    ul.inlineList > li:first-child:before { content: "" } | 
|  | //    </style> | 
|  | //    <ul class=inlineList><li>One<li>Two<li>Three</ul> | 
|  | // (4) Attribute value selectors as in a[href="http://example.com/"] | 
|  | // | 
|  | // We conservatively treat all strings as URLs, but make some | 
|  | // allowances to avoid confusion. | 
|  | // | 
|  | // In (1), our conservative assumption is justified. | 
|  | // In (2), valid font names do not contain ':', '?', or '#', so our | 
|  | // conservative assumption is fine since we will never transition past | 
|  | // urlPartPreQuery. | 
|  | // In (3), our protocol heuristic should not be tripped, and there | 
|  | // should not be non-space content after a '?' or '#', so as long as | 
|  | // we only %-encode RFC 3986 reserved characters we are ok. | 
|  | // In (4), we should URL escape for URL attributes, and for others we | 
|  | // have the attribute name available if our conservative assumption | 
|  | // proves problematic for real code. | 
|  |  | 
|  | k := 0 | 
|  | for { | 
|  | i := k + bytes.IndexAny(s[k:], `("'/`) | 
|  | if i < k { | 
|  | return c, len(s) | 
|  | } | 
|  | switch s[i] { | 
|  | case '(': | 
|  | // Look for url to the left. | 
|  | p := bytes.TrimRight(s[:i], "\t\n\f\r ") | 
|  | if endsWithCSSKeyword(p, "url") { | 
|  | j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) | 
|  | switch { | 
|  | case j != len(s) && s[j] == '"': | 
|  | c.state, j = stateCSSDqURL, j+1 | 
|  | case j != len(s) && s[j] == '\'': | 
|  | c.state, j = stateCSSSqURL, j+1 | 
|  | default: | 
|  | c.state = stateCSSURL | 
|  | } | 
|  | return c, j | 
|  | } | 
|  | case '/': | 
|  | if i+1 < len(s) { | 
|  | switch s[i+1] { | 
|  | case '/': | 
|  | c.state = stateCSSLineCmt | 
|  | return c, i + 2 | 
|  | case '*': | 
|  | c.state = stateCSSBlockCmt | 
|  | return c, i + 2 | 
|  | } | 
|  | } | 
|  | case '"': | 
|  | c.state = stateCSSDqStr | 
|  | return c, i + 1 | 
|  | case '\'': | 
|  | c.state = stateCSSSqStr | 
|  | return c, i + 1 | 
|  | } | 
|  | k = i + 1 | 
|  | } | 
|  | } | 
|  |  | 
|  | // tCSSStr is the context transition function for the CSS string and URL states. | 
|  | func tCSSStr(c context, s []byte) (context, int) { | 
|  | var endAndEsc string | 
|  | switch c.state { | 
|  | case stateCSSDqStr, stateCSSDqURL: | 
|  | endAndEsc = `\"` | 
|  | case stateCSSSqStr, stateCSSSqURL: | 
|  | endAndEsc = `\'` | 
|  | case stateCSSURL: | 
|  | // Unquoted URLs end with a newline or close parenthesis. | 
|  | // The below includes the wc (whitespace character) and nl. | 
|  | endAndEsc = "\\\t\n\f\r )" | 
|  | default: | 
|  | panic(c.state.String()) | 
|  | } | 
|  |  | 
|  | k := 0 | 
|  | for { | 
|  | i := k + bytes.IndexAny(s[k:], endAndEsc) | 
|  | if i < k { | 
|  | c, nread := tURL(c, decodeCSS(s[k:])) | 
|  | return c, k + nread | 
|  | } | 
|  | if s[i] == '\\' { | 
|  | i++ | 
|  | if i == len(s) { | 
|  | return context{ | 
|  | state: stateError, | 
|  | err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), | 
|  | }, len(s) | 
|  | } | 
|  | } else { | 
|  | c.state = stateCSS | 
|  | return c, i + 1 | 
|  | } | 
|  | c, _ = tURL(c, decodeCSS(s[:i+1])) | 
|  | k = i + 1 | 
|  | } | 
|  | } | 
|  |  | 
|  | // tError is the context transition function for the error state. | 
|  | func tError(c context, s []byte) (context, int) { | 
|  | return c, len(s) | 
|  | } | 
|  |  | 
|  | // eatAttrName returns the largest j such that s[i:j] is an attribute name. | 
|  | // It returns an error if s[i:] does not look like it begins with an | 
|  | // attribute name, such as encountering a quote mark without a preceding | 
|  | // equals sign. | 
|  | func eatAttrName(s []byte, i int) (int, *Error) { | 
|  | for j := i; j < len(s); j++ { | 
|  | switch s[j] { | 
|  | case ' ', '\t', '\n', '\f', '\r', '=', '>': | 
|  | return j, nil | 
|  | case '\'', '"', '<': | 
|  | // These result in a parse warning in HTML5 and are | 
|  | // indicative of serious problems if seen in an attr | 
|  | // name in a template. | 
|  | return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) | 
|  | default: | 
|  | // No-op. | 
|  | } | 
|  | } | 
|  | return len(s), nil | 
|  | } | 
|  |  | 
|  | var elementNameMap = map[string]element{ | 
|  | "script":   elementScript, | 
|  | "style":    elementStyle, | 
|  | "textarea": elementTextarea, | 
|  | "title":    elementTitle, | 
|  | } | 
|  |  | 
|  | // asciiAlpha reports whether c is an ASCII letter. | 
|  | func asciiAlpha(c byte) bool { | 
|  | return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' | 
|  | } | 
|  |  | 
|  | // asciiAlphaNum reports whether c is an ASCII letter or digit. | 
|  | func asciiAlphaNum(c byte) bool { | 
|  | return asciiAlpha(c) || '0' <= c && c <= '9' | 
|  | } | 
|  |  | 
|  | // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. | 
|  | func eatTagName(s []byte, i int) (int, element) { | 
|  | if i == len(s) || !asciiAlpha(s[i]) { | 
|  | return i, elementNone | 
|  | } | 
|  | j := i + 1 | 
|  | for j < len(s) { | 
|  | x := s[j] | 
|  | if asciiAlphaNum(x) { | 
|  | j++ | 
|  | continue | 
|  | } | 
|  | // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". | 
|  | if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { | 
|  | j += 2 | 
|  | continue | 
|  | } | 
|  | break | 
|  | } | 
|  | return j, elementNameMap[strings.ToLower(string(s[i:j]))] | 
|  | } | 
|  |  | 
|  | // eatWhiteSpace returns the largest j such that s[i:j] is white space. | 
|  | func eatWhiteSpace(s []byte, i int) int { | 
|  | for j := i; j < len(s); j++ { | 
|  | switch s[j] { | 
|  | case ' ', '\t', '\n', '\f', '\r': | 
|  | // No-op. | 
|  | default: | 
|  | return j | 
|  | } | 
|  | } | 
|  | return len(s) | 
|  | } |