| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package template |
| |
| import ( |
| "bytes" |
| "strings" |
| ) |
| |
| // transitionFunc is the array of context transition functions for text nodes. |
| // A transition function takes a context and template text input, and returns |
| // the updated context and the number of bytes consumed from the front of the |
| // input. |
| var transitionFunc = [...]func(context, []byte) (context, int){ |
| stateText: tText, |
| stateTag: tTag, |
| stateAttrName: tAttrName, |
| stateAfterName: tAfterName, |
| stateBeforeValue: tBeforeValue, |
| stateHTMLCmt: tHTMLCmt, |
| stateRCDATA: tSpecialTagEnd, |
| stateAttr: tAttr, |
| stateURL: tURL, |
| stateJS: tJS, |
| stateJSDqStr: tJSDelimited, |
| stateJSSqStr: tJSDelimited, |
| stateJSRegexp: tJSDelimited, |
| stateJSBlockCmt: tBlockCmt, |
| stateJSLineCmt: tLineCmt, |
| stateCSS: tCSS, |
| stateCSSDqStr: tCSSStr, |
| stateCSSSqStr: tCSSStr, |
| stateCSSDqURL: tCSSStr, |
| stateCSSSqURL: tCSSStr, |
| stateCSSURL: tCSSStr, |
| stateCSSBlockCmt: tBlockCmt, |
| stateCSSLineCmt: tLineCmt, |
| stateError: tError, |
| } |
| |
| var commentStart = []byte("<!--") |
| var commentEnd = []byte("-->") |
| |
| // tText is the context transition function for the text state. |
| func tText(c context, s []byte) (context, int) { |
| k := 0 |
| for { |
| i := k + bytes.IndexByte(s[k:], '<') |
| if i < k || i+1 == len(s) { |
| return c, len(s) |
| } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { |
| return context{state: stateHTMLCmt}, i + 4 |
| } |
| i++ |
| end := false |
| if s[i] == '/' { |
| if i+1 == len(s) { |
| return c, len(s) |
| } |
| end, i = true, i+1 |
| } |
| j, e := eatTagName(s, i) |
| if j != i { |
| if end { |
| e = elementNone |
| } |
| // We've found an HTML tag. |
| return context{state: stateTag, element: e}, j |
| } |
| k = j |
| } |
| } |
| |
| var elementContentType = [...]state{ |
| elementNone: stateText, |
| elementScript: stateJS, |
| elementStyle: stateCSS, |
| elementTextarea: stateRCDATA, |
| elementTitle: stateRCDATA, |
| } |
| |
| // tTag is the context transition function for the tag state. |
| func tTag(c context, s []byte) (context, int) { |
| // Find the attribute name. |
| i := eatWhiteSpace(s, 0) |
| if i == len(s) { |
| return c, len(s) |
| } |
| if s[i] == '>' { |
| return context{ |
| state: elementContentType[c.element], |
| element: c.element, |
| }, i + 1 |
| } |
| j, err := eatAttrName(s, i) |
| if err != nil { |
| return context{state: stateError, err: err}, len(s) |
| } |
| state, attr := stateTag, attrNone |
| if i == j { |
| return context{ |
| state: stateError, |
| err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), |
| }, len(s) |
| } |
| switch attrType(string(s[i:j])) { |
| case contentTypeURL: |
| attr = attrURL |
| case contentTypeCSS: |
| attr = attrStyle |
| case contentTypeJS: |
| attr = attrScript |
| } |
| if j == len(s) { |
| state = stateAttrName |
| } else { |
| state = stateAfterName |
| } |
| return context{state: state, element: c.element, attr: attr}, j |
| } |
| |
| // tAttrName is the context transition function for stateAttrName. |
| func tAttrName(c context, s []byte) (context, int) { |
| i, err := eatAttrName(s, 0) |
| if err != nil { |
| return context{state: stateError, err: err}, len(s) |
| } else if i != len(s) { |
| c.state = stateAfterName |
| } |
| return c, i |
| } |
| |
| // tAfterName is the context transition function for stateAfterName. |
| func tAfterName(c context, s []byte) (context, int) { |
| // Look for the start of the value. |
| i := eatWhiteSpace(s, 0) |
| if i == len(s) { |
| return c, len(s) |
| } else if s[i] != '=' { |
| // Occurs due to tag ending '>', and valueless attribute. |
| c.state = stateTag |
| return c, i |
| } |
| c.state = stateBeforeValue |
| // Consume the "=". |
| return c, i + 1 |
| } |
| |
| var attrStartStates = [...]state{ |
| attrNone: stateAttr, |
| attrScript: stateJS, |
| attrStyle: stateCSS, |
| attrURL: stateURL, |
| } |
| |
| // tBeforeValue is the context transition function for stateBeforeValue. |
| func tBeforeValue(c context, s []byte) (context, int) { |
| i := eatWhiteSpace(s, 0) |
| if i == len(s) { |
| return c, len(s) |
| } |
| // Find the attribute delimiter. |
| delim := delimSpaceOrTagEnd |
| switch s[i] { |
| case '\'': |
| delim, i = delimSingleQuote, i+1 |
| case '"': |
| delim, i = delimDoubleQuote, i+1 |
| } |
| c.state, c.delim = attrStartStates[c.attr], delim |
| return c, i |
| } |
| |
| // tHTMLCmt is the context transition function for stateHTMLCmt. |
| func tHTMLCmt(c context, s []byte) (context, int) { |
| if i := bytes.Index(s, commentEnd); i != -1 { |
| return context{}, i + 3 |
| } |
| return c, len(s) |
| } |
| |
| // specialTagEndMarkers maps element types to the character sequence that |
| // case-insensitively signals the end of the special tag body. |
| var specialTagEndMarkers = [...][]byte{ |
| elementScript: []byte("script"), |
| elementStyle: []byte("style"), |
| elementTextarea: []byte("textarea"), |
| elementTitle: []byte("title"), |
| } |
| |
| var ( |
| specialTagEndPrefix = []byte("</") |
| tagEndSeparators = []byte("> \t\n\f/") |
| ) |
| |
| // tSpecialTagEnd is the context transition function for raw text and RCDATA |
| // element states. |
| func tSpecialTagEnd(c context, s []byte) (context, int) { |
| if c.element != elementNone { |
| if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { |
| return context{}, i |
| } |
| } |
| return c, len(s) |
| } |
| |
| // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 |
| func indexTagEnd(s []byte, tag []byte) int { |
| res := 0 |
| plen := len(specialTagEndPrefix) |
| for len(s) > 0 { |
| // Try to find the tag end prefix first |
| i := bytes.Index(s, specialTagEndPrefix) |
| if i == -1 { |
| return i |
| } |
| s = s[i+plen:] |
| // Try to match the actual tag if there is still space for it |
| if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { |
| s = s[len(tag):] |
| // Check the tag is followed by a proper separator |
| if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { |
| return res + i |
| } |
| res += len(tag) |
| } |
| res += i + plen |
| } |
| return -1 |
| } |
| |
| // tAttr is the context transition function for the attribute state. |
| func tAttr(c context, s []byte) (context, int) { |
| return c, len(s) |
| } |
| |
| // tURL is the context transition function for the URL state. |
| func tURL(c context, s []byte) (context, int) { |
| if bytes.IndexAny(s, "#?") >= 0 { |
| c.urlPart = urlPartQueryOrFrag |
| } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { |
| // HTML5 uses "Valid URL potentially surrounded by spaces" for |
| // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 |
| c.urlPart = urlPartPreQuery |
| } |
| return c, len(s) |
| } |
| |
| // tJS is the context transition function for the JS state. |
| func tJS(c context, s []byte) (context, int) { |
| i := bytes.IndexAny(s, `"'/`) |
| if i == -1 { |
| // Entire input is non string, comment, regexp tokens. |
| c.jsCtx = nextJSCtx(s, c.jsCtx) |
| return c, len(s) |
| } |
| c.jsCtx = nextJSCtx(s[:i], c.jsCtx) |
| switch s[i] { |
| case '"': |
| c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp |
| case '\'': |
| c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp |
| case '/': |
| switch { |
| case i+1 < len(s) && s[i+1] == '/': |
| c.state, i = stateJSLineCmt, i+1 |
| case i+1 < len(s) && s[i+1] == '*': |
| c.state, i = stateJSBlockCmt, i+1 |
| case c.jsCtx == jsCtxRegexp: |
| c.state = stateJSRegexp |
| case c.jsCtx == jsCtxDivOp: |
| c.jsCtx = jsCtxRegexp |
| default: |
| return context{ |
| state: stateError, |
| err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), |
| }, len(s) |
| } |
| default: |
| panic("unreachable") |
| } |
| return c, i + 1 |
| } |
| |
| // tJSDelimited is the context transition function for the JS string and regexp |
| // states. |
| func tJSDelimited(c context, s []byte) (context, int) { |
| specials := `\"` |
| switch c.state { |
| case stateJSSqStr: |
| specials = `\'` |
| case stateJSRegexp: |
| specials = `\/[]` |
| } |
| |
| k, inCharset := 0, false |
| for { |
| i := k + bytes.IndexAny(s[k:], specials) |
| if i < k { |
| break |
| } |
| switch s[i] { |
| case '\\': |
| i++ |
| if i == len(s) { |
| return context{ |
| state: stateError, |
| err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), |
| }, len(s) |
| } |
| case '[': |
| inCharset = true |
| case ']': |
| inCharset = false |
| default: |
| // end delimiter |
| if !inCharset { |
| c.state, c.jsCtx = stateJS, jsCtxDivOp |
| return c, i + 1 |
| } |
| } |
| k = i + 1 |
| } |
| |
| if inCharset { |
| // This can be fixed by making context richer if interpolation |
| // into charsets is desired. |
| return context{ |
| state: stateError, |
| err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), |
| }, len(s) |
| } |
| |
| return c, len(s) |
| } |
| |
| var blockCommentEnd = []byte("*/") |
| |
| // tBlockCmt is the context transition function for /*comment*/ states. |
| func tBlockCmt(c context, s []byte) (context, int) { |
| i := bytes.Index(s, blockCommentEnd) |
| if i == -1 { |
| return c, len(s) |
| } |
| switch c.state { |
| case stateJSBlockCmt: |
| c.state = stateJS |
| case stateCSSBlockCmt: |
| c.state = stateCSS |
| default: |
| panic(c.state.String()) |
| } |
| return c, i + 2 |
| } |
| |
| // tLineCmt is the context transition function for //comment states. |
| func tLineCmt(c context, s []byte) (context, int) { |
| var lineTerminators string |
| var endState state |
| switch c.state { |
| case stateJSLineCmt: |
| lineTerminators, endState = "\n\r\u2028\u2029", stateJS |
| case stateCSSLineCmt: |
| lineTerminators, endState = "\n\f\r", stateCSS |
| // Line comments are not part of any published CSS standard but |
| // are supported by the 4 major browsers. |
| // This defines line comments as |
| // LINECOMMENT ::= "//" [^\n\f\d]* |
| // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines |
| // newlines: |
| // nl ::= #xA | #xD #xA | #xD | #xC |
| default: |
| panic(c.state.String()) |
| } |
| |
| i := bytes.IndexAny(s, lineTerminators) |
| if i == -1 { |
| return c, len(s) |
| } |
| c.state = endState |
| // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 |
| // "However, the LineTerminator at the end of the line is not |
| // considered to be part of the single-line comment; it is |
| // recognized separately by the lexical grammar and becomes part |
| // of the stream of input elements for the syntactic grammar." |
| return c, i |
| } |
| |
| // tCSS is the context transition function for the CSS state. |
| func tCSS(c context, s []byte) (context, int) { |
| // CSS quoted strings are almost never used except for: |
| // (1) URLs as in background: "/foo.png" |
| // (2) Multiword font-names as in font-family: "Times New Roman" |
| // (3) List separators in content values as in inline-lists: |
| // <style> |
| // ul.inlineList { list-style: none; padding:0 } |
| // ul.inlineList > li { display: inline } |
| // ul.inlineList > li:before { content: ", " } |
| // ul.inlineList > li:first-child:before { content: "" } |
| // </style> |
| // <ul class=inlineList><li>One<li>Two<li>Three</ul> |
| // (4) Attribute value selectors as in a[href="http://example.com/"] |
| // |
| // We conservatively treat all strings as URLs, but make some |
| // allowances to avoid confusion. |
| // |
| // In (1), our conservative assumption is justified. |
| // In (2), valid font names do not contain ':', '?', or '#', so our |
| // conservative assumption is fine since we will never transition past |
| // urlPartPreQuery. |
| // In (3), our protocol heuristic should not be tripped, and there |
| // should not be non-space content after a '?' or '#', so as long as |
| // we only %-encode RFC 3986 reserved characters we are ok. |
| // In (4), we should URL escape for URL attributes, and for others we |
| // have the attribute name available if our conservative assumption |
| // proves problematic for real code. |
| |
| k := 0 |
| for { |
| i := k + bytes.IndexAny(s[k:], `("'/`) |
| if i < k { |
| return c, len(s) |
| } |
| switch s[i] { |
| case '(': |
| // Look for url to the left. |
| p := bytes.TrimRight(s[:i], "\t\n\f\r ") |
| if endsWithCSSKeyword(p, "url") { |
| j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) |
| switch { |
| case j != len(s) && s[j] == '"': |
| c.state, j = stateCSSDqURL, j+1 |
| case j != len(s) && s[j] == '\'': |
| c.state, j = stateCSSSqURL, j+1 |
| default: |
| c.state = stateCSSURL |
| } |
| return c, j |
| } |
| case '/': |
| if i+1 < len(s) { |
| switch s[i+1] { |
| case '/': |
| c.state = stateCSSLineCmt |
| return c, i + 2 |
| case '*': |
| c.state = stateCSSBlockCmt |
| return c, i + 2 |
| } |
| } |
| case '"': |
| c.state = stateCSSDqStr |
| return c, i + 1 |
| case '\'': |
| c.state = stateCSSSqStr |
| return c, i + 1 |
| } |
| k = i + 1 |
| } |
| } |
| |
| // tCSSStr is the context transition function for the CSS string and URL states. |
| func tCSSStr(c context, s []byte) (context, int) { |
| var endAndEsc string |
| switch c.state { |
| case stateCSSDqStr, stateCSSDqURL: |
| endAndEsc = `\"` |
| case stateCSSSqStr, stateCSSSqURL: |
| endAndEsc = `\'` |
| case stateCSSURL: |
| // Unquoted URLs end with a newline or close parenthesis. |
| // The below includes the wc (whitespace character) and nl. |
| endAndEsc = "\\\t\n\f\r )" |
| default: |
| panic(c.state.String()) |
| } |
| |
| k := 0 |
| for { |
| i := k + bytes.IndexAny(s[k:], endAndEsc) |
| if i < k { |
| c, nread := tURL(c, decodeCSS(s[k:])) |
| return c, k + nread |
| } |
| if s[i] == '\\' { |
| i++ |
| if i == len(s) { |
| return context{ |
| state: stateError, |
| err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), |
| }, len(s) |
| } |
| } else { |
| c.state = stateCSS |
| return c, i + 1 |
| } |
| c, _ = tURL(c, decodeCSS(s[:i+1])) |
| k = i + 1 |
| } |
| } |
| |
| // tError is the context transition function for the error state. |
| func tError(c context, s []byte) (context, int) { |
| return c, len(s) |
| } |
| |
| // eatAttrName returns the largest j such that s[i:j] is an attribute name. |
| // It returns an error if s[i:] does not look like it begins with an |
| // attribute name, such as encountering a quote mark without a preceding |
| // equals sign. |
| func eatAttrName(s []byte, i int) (int, *Error) { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\t', '\n', '\f', '\r', '=', '>': |
| return j, nil |
| case '\'', '"', '<': |
| // These result in a parse warning in HTML5 and are |
| // indicative of serious problems if seen in an attr |
| // name in a template. |
| return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) |
| default: |
| // No-op. |
| } |
| } |
| return len(s), nil |
| } |
| |
| var elementNameMap = map[string]element{ |
| "script": elementScript, |
| "style": elementStyle, |
| "textarea": elementTextarea, |
| "title": elementTitle, |
| } |
| |
| // asciiAlpha reports whether c is an ASCII letter. |
| func asciiAlpha(c byte) bool { |
| return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
| } |
| |
| // asciiAlphaNum reports whether c is an ASCII letter or digit. |
| func asciiAlphaNum(c byte) bool { |
| return asciiAlpha(c) || '0' <= c && c <= '9' |
| } |
| |
| // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. |
| func eatTagName(s []byte, i int) (int, element) { |
| if i == len(s) || !asciiAlpha(s[i]) { |
| return i, elementNone |
| } |
| j := i + 1 |
| for j < len(s) { |
| x := s[j] |
| if asciiAlphaNum(x) { |
| j++ |
| continue |
| } |
| // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". |
| if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { |
| j += 2 |
| continue |
| } |
| break |
| } |
| return j, elementNameMap[strings.ToLower(string(s[i:j]))] |
| } |
| |
| // eatWhiteSpace returns the largest j such that s[i:j] is white space. |
| func eatWhiteSpace(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\t', '\n', '\f', '\r': |
| // No-op. |
| default: |
| return j |
| } |
| } |
| return len(s) |
| } |