src/html/template/transition.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package template

 import (
 	"bytes"
 	"strings"
 )

 // transitionFunc is the array of context transition functions for text nodes.
 // A transition function takes a context and template text input, and returns
 // the updated context and the number of bytes consumed from the front of the
 // input.
 var transitionFunc = [...]func(context, []byte) (context, int){
 	stateText:        tText,
 	stateTag:         tTag,
 	stateAttrName:    tAttrName,
 	stateAfterName:   tAfterName,
 	stateBeforeValue: tBeforeValue,
 	stateHTMLCmt:     tHTMLCmt,
 	stateRCDATA:      tSpecialTagEnd,
 	stateAttr:        tAttr,
 	stateURL:         tURL,
 	stateJS:          tJS,
 	stateJSDqStr:     tJSDelimited,
 	stateJSSqStr:     tJSDelimited,
 	stateJSRegexp:    tJSDelimited,
 	stateJSBlockCmt:  tBlockCmt,
 	stateJSLineCmt:   tLineCmt,
 	stateCSS:         tCSS,
 	stateCSSDqStr:    tCSSStr,
 	stateCSSSqStr:    tCSSStr,
 	stateCSSDqURL:    tCSSStr,
 	stateCSSSqURL:    tCSSStr,
 	stateCSSURL:      tCSSStr,
 	stateCSSBlockCmt: tBlockCmt,
 	stateCSSLineCmt:  tLineCmt,
 	stateError:       tError,
 }

 var commentStart = []byte("<!--")
 var commentEnd = []byte("-->")

 // tText is the context transition function for the text state.
 func tText(c context, s []byte) (context, int) {
 	k := 0
 	for {
 		i := k + bytes.IndexByte(s[k:], '<')
 		if i < k || i+1 == len(s) {
 			return c, len(s)
 		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
 			return context{state: stateHTMLCmt}, i + 4
 		}
 		i++
 		end := false
 		if s[i] == '/' {
 			if i+1 == len(s) {
 				return c, len(s)
 			}
 			end, i = true, i+1
 		}
 		j, e := eatTagName(s, i)
 		if j != i {
 			if end {
 				e = elementNone
 			}
 			// We've found an HTML tag.
 			return context{state: stateTag, element: e}, j
 		}
 		k = j
 	}
 }

 var elementContentType = [...]state{
 	elementNone:     stateText,
 	elementScript:   stateJS,
 	elementStyle:    stateCSS,
 	elementTextarea: stateRCDATA,
 	elementTitle:    stateRCDATA,
 }

 // tTag is the context transition function for the tag state.
 func tTag(c context, s []byte) (context, int) {
 	// Find the attribute name.
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
 		return c, len(s)
 	}
 	if s[i] == '>' {
 		return context{
 			state:   elementContentType[c.element],
 			element: c.element,
 		}, i + 1
 	}
 	j, err := eatAttrName(s, i)
 	if err != nil {
 		return context{state: stateError, err: err}, len(s)
 	}
 	state, attr := stateTag, attrNone
 	if i == j {
 		return context{
 			state: stateError,
 			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
 		}, len(s)
 	}
 	switch attrType(string(s[i:j])) {
 	case contentTypeURL:
 		attr = attrURL
 	case contentTypeCSS:
 		attr = attrStyle
 	case contentTypeJS:
 		attr = attrScript
 	}
 	if j == len(s) {
 		state = stateAttrName
 	} else {
 		state = stateAfterName
 	}
 	return context{state: state, element: c.element, attr: attr}, j
 }

 // tAttrName is the context transition function for stateAttrName.
 func tAttrName(c context, s []byte) (context, int) {
 	i, err := eatAttrName(s, 0)
 	if err != nil {
 		return context{state: stateError, err: err}, len(s)
 	} else if i != len(s) {
 		c.state = stateAfterName
 	}
 	return c, i
 }

 // tAfterName is the context transition function for stateAfterName.
 func tAfterName(c context, s []byte) (context, int) {
 	// Look for the start of the value.
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
 		return c, len(s)
 	} else if s[i] != '=' {
 		// Occurs due to tag ending '>', and valueless attribute.
 		c.state = stateTag
 		return c, i
 	}
 	c.state = stateBeforeValue
 	// Consume the "=".
 	return c, i + 1
 }

 var attrStartStates = [...]state{
 	attrNone:   stateAttr,
 	attrScript: stateJS,
 	attrStyle:  stateCSS,
 	attrURL:    stateURL,
 }

 // tBeforeValue is the context transition function for stateBeforeValue.
 func tBeforeValue(c context, s []byte) (context, int) {
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
 		return c, len(s)
 	}
 	// Find the attribute delimiter.
 	delim := delimSpaceOrTagEnd
 	switch s[i] {
 	case '\'':
 		delim, i = delimSingleQuote, i+1
 	case '"':
 		delim, i = delimDoubleQuote, i+1
 	}
 	c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
 	return c, i
 }

 // tHTMLCmt is the context transition function for stateHTMLCmt.
 func tHTMLCmt(c context, s []byte) (context, int) {
 	if i := bytes.Index(s, commentEnd); i != -1 {
 		return context{}, i + 3
 	}
 	return c, len(s)
 }

 // specialTagEndMarkers maps element types to the character sequence that
 // case-insensitively signals the end of the special tag body.
 var specialTagEndMarkers = [...][]byte{
 	elementScript:   []byte("script"),
 	elementStyle:    []byte("style"),
 	elementTextarea: []byte("textarea"),
 	elementTitle:    []byte("title"),
 }

 var (
 	specialTagEndPrefix = []byte("</")
 	tagEndSeparators    = []byte("> \t\n\f/")
 )

 // tSpecialTagEnd is the context transition function for raw text and RCDATA
 // element states.
 func tSpecialTagEnd(c context, s []byte) (context, int) {
 	if c.element != elementNone {
 		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
 			return context{}, i
 		}
 	}
 	return c, len(s)
 }

 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
 func indexTagEnd(s []byte, tag []byte) int {
 	res := 0
 	plen := len(specialTagEndPrefix)
 	for len(s) > 0 {
 		// Try to find the tag end prefix first
 		i := bytes.Index(s, specialTagEndPrefix)
 		if i == -1 {
 			return i
 		}
 		s = s[i+plen:]
 		// Try to match the actual tag if there is still space for it
 		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
 			s = s[len(tag):]
 			// Check the tag is followed by a proper separator
 			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
 				return res + i
 			}
 			res += len(tag)
 		}
 		res += i + plen
 	}
 	return -1
 }

 // tAttr is the context transition function for the attribute state.
 func tAttr(c context, s []byte) (context, int) {
 	return c, len(s)
 }

 // tURL is the context transition function for the URL state.
 func tURL(c context, s []byte) (context, int) {
 	if bytes.IndexAny(s, "#?") >= 0 {
 		c.urlPart = urlPartQueryOrFrag
 	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
 		// HTML5 uses "Valid URL potentially surrounded by spaces" for
 		// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
 		c.urlPart = urlPartPreQuery
 	}
 	return c, len(s)
 }

 // tJS is the context transition function for the JS state.
 func tJS(c context, s []byte) (context, int) {
 	i := bytes.IndexAny(s, `"'/`)
 	if i == -1 {
 		// Entire input is non string, comment, regexp tokens.
 		c.jsCtx = nextJSCtx(s, c.jsCtx)
 		return c, len(s)
 	}
 	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
 	switch s[i] {
 	case '"':
 		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
 	case '\'':
 		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
 	case '/':
 		switch {
 		case i+1 < len(s) && s[i+1] == '/':
 			c.state, i = stateJSLineCmt, i+1
 		case i+1 < len(s) && s[i+1] == '*':
 			c.state, i = stateJSBlockCmt, i+1
 		case c.jsCtx == jsCtxRegexp:
 			c.state = stateJSRegexp
 		case c.jsCtx == jsCtxDivOp:
 			c.jsCtx = jsCtxRegexp
 		default:
 			return context{
 				state: stateError,
 				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
 			}, len(s)
 		}
 	default:
 		panic("unreachable")
 	}
 	return c, i + 1
 }

 // tJSDelimited is the context transition function for the JS string and regexp
 // states.
 func tJSDelimited(c context, s []byte) (context, int) {
 	specials := `\"`
 	switch c.state {
 	case stateJSSqStr:
 		specials = `\'`
 	case stateJSRegexp:
 		specials = `\/[]`
 	}

 	k, inCharset := 0, false
 	for {
 		i := k + bytes.IndexAny(s[k:], specials)
 		if i < k {
 			break
 		}
 		switch s[i] {
 		case '\\':
 			i++
 			if i == len(s) {
 				return context{
 					state: stateError,
 					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
 				}, len(s)
 			}
 		case '[':
 			inCharset = true
 		case ']':
 			inCharset = false
 		default:
 			// end delimiter
 			if !inCharset {
 				c.state, c.jsCtx = stateJS, jsCtxDivOp
 				return c, i + 1
 			}
 		}
 		k = i + 1
 	}

 	if inCharset {
 		// This can be fixed by making context richer if interpolation
 		// into charsets is desired.
 		return context{
 			state: stateError,
 			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
 		}, len(s)
 	}

 	return c, len(s)
 }

 var blockCommentEnd = []byte("*/")

 // tBlockCmt is the context transition function for /*comment*/ states.
 func tBlockCmt(c context, s []byte) (context, int) {
 	i := bytes.Index(s, blockCommentEnd)
 	if i == -1 {
 		return c, len(s)
 	}
 	switch c.state {
 	case stateJSBlockCmt:
 		c.state = stateJS
 	case stateCSSBlockCmt:
 		c.state = stateCSS
 	default:
 		panic(c.state.String())
 	}
 	return c, i + 2
 }

 // tLineCmt is the context transition function for //comment states.
 func tLineCmt(c context, s []byte) (context, int) {
 	var lineTerminators string
 	var endState state
 	switch c.state {
 	case stateJSLineCmt:
 		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
 	case stateCSSLineCmt:
 		lineTerminators, endState = "\n\f\r", stateCSS
 		// Line comments are not part of any published CSS standard but
 		// are supported by the 4 major browsers.
 		// This defines line comments as
 		//     LINECOMMENT ::= "//" [^\n\f\d]*
 		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
 		// newlines:
 		//     nl ::= #xA | #xD #xA | #xD | #xC
 	default:
 		panic(c.state.String())
 	}

 	i := bytes.IndexAny(s, lineTerminators)
 	if i == -1 {
 		return c, len(s)
 	}
 	c.state = endState
 	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
 	// "However, the LineTerminator at the end of the line is not
 	// considered to be part of the single-line comment; it is
 	// recognized separately by the lexical grammar and becomes part
 	// of the stream of input elements for the syntactic grammar."
 	return c, i
 }

 // tCSS is the context transition function for the CSS state.
 func tCSS(c context, s []byte) (context, int) {
 	// CSS quoted strings are almost never used except for:
 	// (1) URLs as in background: "/foo.png"
 	// (2) Multiword font-names as in font-family: "Times New Roman"
 	// (3) List separators in content values as in inline-lists:
 	//    <style>
 	//    ul.inlineList { list-style: none; padding:0 }
 	//    ul.inlineList > li { display: inline }
 	//    ul.inlineList > li:before { content: ", " }
 	//    ul.inlineList > li:first-child:before { content: "" }
 	//    </style>
 	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
 	// (4) Attribute value selectors as in a[href="http://example.com/"]
 	//
 	// We conservatively treat all strings as URLs, but make some
 	// allowances to avoid confusion.
 	//
 	// In (1), our conservative assumption is justified.
 	// In (2), valid font names do not contain ':', '?', or '#', so our
 	// conservative assumption is fine since we will never transition past
 	// urlPartPreQuery.
 	// In (3), our protocol heuristic should not be tripped, and there
 	// should not be non-space content after a '?' or '#', so as long as
 	// we only %-encode RFC 3986 reserved characters we are ok.
 	// In (4), we should URL escape for URL attributes, and for others we
 	// have the attribute name available if our conservative assumption
 	// proves problematic for real code.

 	k := 0
 	for {
 		i := k + bytes.IndexAny(s[k:], `("'/`)
 		if i < k {
 			return c, len(s)
 		}
 		switch s[i] {
 		case '(':
 			// Look for url to the left.
 			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
 			if endsWithCSSKeyword(p, "url") {
 				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
 				switch {
 				case j != len(s) && s[j] == '"':
 					c.state, j = stateCSSDqURL, j+1
 				case j != len(s) && s[j] == '\'':
 					c.state, j = stateCSSSqURL, j+1
 				default:
 					c.state = stateCSSURL
 				}
 				return c, j
 			}
 		case '/':
 			if i+1 < len(s) {
 				switch s[i+1] {
 				case '/':
 					c.state = stateCSSLineCmt
 					return c, i + 2
 				case '*':
 					c.state = stateCSSBlockCmt
 					return c, i + 2
 				}
 			}
 		case '"':
 			c.state = stateCSSDqStr
 			return c, i + 1
 		case '\'':
 			c.state = stateCSSSqStr
 			return c, i + 1
 		}
 		k = i + 1
 	}
 }

 // tCSSStr is the context transition function for the CSS string and URL states.
 func tCSSStr(c context, s []byte) (context, int) {
 	var endAndEsc string
 	switch c.state {
 	case stateCSSDqStr, stateCSSDqURL:
 		endAndEsc = `\"`
 	case stateCSSSqStr, stateCSSSqURL:
 		endAndEsc = `\'`
 	case stateCSSURL:
 		// Unquoted URLs end with a newline or close parenthesis.
 		// The below includes the wc (whitespace character) and nl.
 		endAndEsc = "\\\t\n\f\r )"
 	default:
 		panic(c.state.String())
 	}

 	k := 0
 	for {
 		i := k + bytes.IndexAny(s[k:], endAndEsc)
 		if i < k {
 			c, nread := tURL(c, decodeCSS(s[k:]))
 			return c, k + nread
 		}
 		if s[i] == '\\' {
 			i++
 			if i == len(s) {
 				return context{
 					state: stateError,
 					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
 				}, len(s)
 			}
 		} else {
 			c.state = stateCSS
 			return c, i + 1
 		}
 		c, _ = tURL(c, decodeCSS(s[:i+1]))
 		k = i + 1
 	}
 }

 // tError is the context transition function for the error state.
 func tError(c context, s []byte) (context, int) {
 	return c, len(s)
 }

 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
 // It returns an error if s[i:] does not look like it begins with an
 // attribute name, such as encountering a quote mark without a preceding
 // equals sign.
 func eatAttrName(s []byte, i int) (int, *Error) {
 	for j := i; j < len(s); j++ {
 		switch s[j] {
 		case ' ', '\t', '\n', '\f', '\r', '=', '>':
 			return j, nil
 		case '\'', '"', '<':
 			// These result in a parse warning in HTML5 and are
 			// indicative of serious problems if seen in an attr
 			// name in a template.
 			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
 		default:
 			// No-op.
 		}
 	}
 	return len(s), nil
 }

 var elementNameMap = map[string]element{
 	"script":   elementScript,
 	"style":    elementStyle,
 	"textarea": elementTextarea,
 	"title":    elementTitle,
 }

 // asciiAlpha reports whether c is an ASCII letter.
 func asciiAlpha(c byte) bool {
 	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
 }

 // asciiAlphaNum reports whether c is an ASCII letter or digit.
 func asciiAlphaNum(c byte) bool {
 	return asciiAlpha(c) || '0' <= c && c <= '9'
 }

 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
 func eatTagName(s []byte, i int) (int, element) {
 	if i == len(s) || !asciiAlpha(s[i]) {
 		return i, elementNone
 	}
 	j := i + 1
 	for j < len(s) {
 		x := s[j]
 		if asciiAlphaNum(x) {
 			j++
 			continue
 		}
 		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
 		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
 			j += 2
 			continue
 		}
 		break
 	}
 	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
 }

 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
 func eatWhiteSpace(s []byte, i int) int {
 	for j := i; j < len(s); j++ {
 		switch s[j] {
 		case ' ', '\t', '\n', '\f', '\r':
 			// No-op.
 		default:
 			return j
 		}
 	}
 	return len(s)
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package template

	import (
	"bytes"
	"strings"
	)

	// transitionFunc is the array of context transition functions for text nodes.
	// A transition function takes a context and template text input, and returns
	// the updated context and the number of bytes consumed from the front of the
	// input.
	var transitionFunc = [...]func(context, []byte) (context, int){
	stateText: tText,
	stateTag: tTag,
	stateAttrName: tAttrName,
	stateAfterName: tAfterName,
	stateBeforeValue: tBeforeValue,
	stateHTMLCmt: tHTMLCmt,
	stateRCDATA: tSpecialTagEnd,
	stateAttr: tAttr,
	stateURL: tURL,
	stateJS: tJS,
	stateJSDqStr: tJSDelimited,
	stateJSSqStr: tJSDelimited,
	stateJSRegexp: tJSDelimited,
	stateJSBlockCmt: tBlockCmt,
	stateJSLineCmt: tLineCmt,
	stateCSS: tCSS,
	stateCSSDqStr: tCSSStr,
	stateCSSSqStr: tCSSStr,
	stateCSSDqURL: tCSSStr,
	stateCSSSqURL: tCSSStr,
	stateCSSURL: tCSSStr,
	stateCSSBlockCmt: tBlockCmt,
	stateCSSLineCmt: tLineCmt,
	stateError: tError,
	}

	var commentStart = []byte("<!--")
	var commentEnd = []byte("-->")

	// tText is the context transition function for the text state.
	func tText(c context, s []byte) (context, int) {
	k := 0
	for {
	i := k + bytes.IndexByte(s[k:], '<')
	if i < k \|\| i+1 == len(s) {
	return c, len(s)
	} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
	return context{state: stateHTMLCmt}, i + 4
	}
	i++
	end := false
	if s[i] == '/' {
	if i+1 == len(s) {
	return c, len(s)
	}
	end, i = true, i+1
	}
	j, e := eatTagName(s, i)
	if j != i {
	if end {
	e = elementNone
	}
	// We've found an HTML tag.
	return context{state: stateTag, element: e}, j
	}
	k = j
	}
	}

	var elementContentType = [...]state{
	elementNone: stateText,
	elementScript: stateJS,
	elementStyle: stateCSS,
	elementTextarea: stateRCDATA,
	elementTitle: stateRCDATA,
	}

	// tTag is the context transition function for the tag state.
	func tTag(c context, s []byte) (context, int) {
	// Find the attribute name.
	i := eatWhiteSpace(s, 0)
	if i == len(s) {
	return c, len(s)
	}
	if s[i] == '>' {
	return context{
	state: elementContentType[c.element],
	element: c.element,
	}, i + 1
	}
	j, err := eatAttrName(s, i)
	if err != nil {
	return context{state: stateError, err: err}, len(s)
	}
	state, attr := stateTag, attrNone
	if i == j {
	return context{
	state: stateError,
	err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
	}, len(s)
	}
	switch attrType(string(s[i:j])) {
	case contentTypeURL:
	attr = attrURL
	case contentTypeCSS:
	attr = attrStyle
	case contentTypeJS:
	attr = attrScript
	}
	if j == len(s) {
	state = stateAttrName
	} else {
	state = stateAfterName
	}
	return context{state: state, element: c.element, attr: attr}, j
	}

	// tAttrName is the context transition function for stateAttrName.
	func tAttrName(c context, s []byte) (context, int) {
	i, err := eatAttrName(s, 0)
	if err != nil {
	return context{state: stateError, err: err}, len(s)
	} else if i != len(s) {
	c.state = stateAfterName
	}
	return c, i
	}

	// tAfterName is the context transition function for stateAfterName.
	func tAfterName(c context, s []byte) (context, int) {
	// Look for the start of the value.
	i := eatWhiteSpace(s, 0)
	if i == len(s) {
	return c, len(s)
	} else if s[i] != '=' {
	// Occurs due to tag ending '>', and valueless attribute.
	c.state = stateTag
	return c, i
	}
	c.state = stateBeforeValue
	// Consume the "=".
	return c, i + 1
	}

	var attrStartStates = [...]state{
	attrNone: stateAttr,
	attrScript: stateJS,
	attrStyle: stateCSS,
	attrURL: stateURL,
	}

	// tBeforeValue is the context transition function for stateBeforeValue.
	func tBeforeValue(c context, s []byte) (context, int) {
	i := eatWhiteSpace(s, 0)
	if i == len(s) {
	return c, len(s)
	}
	// Find the attribute delimiter.
	delim := delimSpaceOrTagEnd
	switch s[i] {
	case '\'':
	delim, i = delimSingleQuote, i+1
	case '"':
	delim, i = delimDoubleQuote, i+1
	}
	c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
	return c, i
	}

	// tHTMLCmt is the context transition function for stateHTMLCmt.
	func tHTMLCmt(c context, s []byte) (context, int) {
	if i := bytes.Index(s, commentEnd); i != -1 {
	return context{}, i + 3
	}
	return c, len(s)
	}

	// specialTagEndMarkers maps element types to the character sequence that
	// case-insensitively signals the end of the special tag body.
	var specialTagEndMarkers = [...][]byte{
	elementScript: []byte("script"),
	elementStyle: []byte("style"),
	elementTextarea: []byte("textarea"),
	elementTitle: []byte("title"),
	}

	var (
	specialTagEndPrefix = []byte("</")
	tagEndSeparators = []byte("> \t\n\f/")
	)

	// tSpecialTagEnd is the context transition function for raw text and RCDATA
	// element states.
	func tSpecialTagEnd(c context, s []byte) (context, int) {
	if c.element != elementNone {
	if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
	return context{}, i
	}
	}
	return c, len(s)
	}

	// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
	func indexTagEnd(s []byte, tag []byte) int {
	res := 0
	plen := len(specialTagEndPrefix)
	for len(s) > 0 {
	// Try to find the tag end prefix first
	i := bytes.Index(s, specialTagEndPrefix)
	if i == -1 {
	return i
	}
	s = s[i+plen:]
	// Try to match the actual tag if there is still space for it
	if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
	s = s[len(tag):]
	// Check the tag is followed by a proper separator
	if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
	return res + i
	}
	res += len(tag)
	}
	res += i + plen
	}
	return -1
	}

	// tAttr is the context transition function for the attribute state.
	func tAttr(c context, s []byte) (context, int) {
	return c, len(s)
	}

	// tURL is the context transition function for the URL state.
	func tURL(c context, s []byte) (context, int) {
	if bytes.IndexAny(s, "#?") >= 0 {
	c.urlPart = urlPartQueryOrFrag
	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
	// HTML5 uses "Valid URL potentially surrounded by spaces" for
	// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
	c.urlPart = urlPartPreQuery
	}
	return c, len(s)
	}

	// tJS is the context transition function for the JS state.
	func tJS(c context, s []byte) (context, int) {
	i := bytes.IndexAny(s, `"'/`)
	if i == -1 {
	// Entire input is non string, comment, regexp tokens.
	c.jsCtx = nextJSCtx(s, c.jsCtx)
	return c, len(s)
	}
	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
	switch s[i] {
	case '"':
	c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
	case '\'':
	c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
	case '/':
	switch {
	case i+1 < len(s) && s[i+1] == '/':
	c.state, i = stateJSLineCmt, i+1
	case i+1 < len(s) && s[i+1] == '*':
	c.state, i = stateJSBlockCmt, i+1
	case c.jsCtx == jsCtxRegexp:
	c.state = stateJSRegexp
	case c.jsCtx == jsCtxDivOp:
	c.jsCtx = jsCtxRegexp
	default:
	return context{
	state: stateError,
	err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
	}, len(s)
	}
	default:
	panic("unreachable")
	}
	return c, i + 1
	}

	// tJSDelimited is the context transition function for the JS string and regexp
	// states.
	func tJSDelimited(c context, s []byte) (context, int) {
	specials := `\"`
	switch c.state {
	case stateJSSqStr:
	specials = `\'`
	case stateJSRegexp:
	specials = `\/[]`
	}

	k, inCharset := 0, false
	for {
	i := k + bytes.IndexAny(s[k:], specials)
	if i < k {
	break
	}
	switch s[i] {
	case '\\':
	i++
	if i == len(s) {
	return context{
	state: stateError,
	err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
	}, len(s)
	}
	case '[':
	inCharset = true
	case ']':
	inCharset = false
	default:
	// end delimiter
	if !inCharset {
	c.state, c.jsCtx = stateJS, jsCtxDivOp
	return c, i + 1
	}
	}
	k = i + 1
	}

	if inCharset {
	// This can be fixed by making context richer if interpolation
	// into charsets is desired.
	return context{
	state: stateError,
	err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
	}, len(s)
	}

	return c, len(s)
	}

	var blockCommentEnd = []byte("*/")

	// tBlockCmt is the context transition function for /comment/ states.
	func tBlockCmt(c context, s []byte) (context, int) {
	i := bytes.Index(s, blockCommentEnd)
	if i == -1 {
	return c, len(s)
	}
	switch c.state {
	case stateJSBlockCmt:
	c.state = stateJS
	case stateCSSBlockCmt:
	c.state = stateCSS
	default:
	panic(c.state.String())
	}
	return c, i + 2
	}

	// tLineCmt is the context transition function for //comment states.
	func tLineCmt(c context, s []byte) (context, int) {
	var lineTerminators string
	var endState state
	switch c.state {
	case stateJSLineCmt:
	lineTerminators, endState = "\n\r\u2028\u2029", stateJS
	case stateCSSLineCmt:
	lineTerminators, endState = "\n\f\r", stateCSS
	// Line comments are not part of any published CSS standard but
	// are supported by the 4 major browsers.
	// This defines line comments as
	// LINECOMMENT ::= "//" [^\n\f\d]*
	// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
	// newlines:
	// nl ::= #xA \| #xD #xA \| #xD \| #xC
	default:
	panic(c.state.String())
	}

	i := bytes.IndexAny(s, lineTerminators)
	if i == -1 {
	return c, len(s)
	}
	c.state = endState
	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
	// "However, the LineTerminator at the end of the line is not
	// considered to be part of the single-line comment; it is
	// recognized separately by the lexical grammar and becomes part
	// of the stream of input elements for the syntactic grammar."
	return c, i
	}

	// tCSS is the context transition function for the CSS state.
	func tCSS(c context, s []byte) (context, int) {
	// CSS quoted strings are almost never used except for:
	// (1) URLs as in background: "/foo.png"
	// (2) Multiword font-names as in font-family: "Times New Roman"
	// (3) List separators in content values as in inline-lists:
	// <style>
	// ul.inlineList { list-style: none; padding:0 }
	// ul.inlineList > li { display: inline }
	// ul.inlineList > li:before { content: ", " }
	// ul.inlineList > li:first-child:before { content: "" }
	// </style>
	// <ul class=inlineList><li>One<li>Two<li>Three</ul>
	// (4) Attribute value selectors as in a[href="http://example.com/"]
	//
	// We conservatively treat all strings as URLs, but make some
	// allowances to avoid confusion.
	//
	// In (1), our conservative assumption is justified.
	// In (2), valid font names do not contain ':', '?', or '#', so our
	// conservative assumption is fine since we will never transition past
	// urlPartPreQuery.
	// In (3), our protocol heuristic should not be tripped, and there
	// should not be non-space content after a '?' or '#', so as long as
	// we only %-encode RFC 3986 reserved characters we are ok.
	// In (4), we should URL escape for URL attributes, and for others we
	// have the attribute name available if our conservative assumption
	// proves problematic for real code.

	k := 0
	for {
	i := k + bytes.IndexAny(s[k:], `("'/`)
	if i < k {
	return c, len(s)
	}
	switch s[i] {
	case '(':
	// Look for url to the left.
	p := bytes.TrimRight(s[:i], "\t\n\f\r ")
	if endsWithCSSKeyword(p, "url") {
	j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
	switch {
	case j != len(s) && s[j] == '"':
	c.state, j = stateCSSDqURL, j+1
	case j != len(s) && s[j] == '\'':
	c.state, j = stateCSSSqURL, j+1
	default:
	c.state = stateCSSURL
	}
	return c, j
	}
	case '/':
	if i+1 < len(s) {
	switch s[i+1] {
	case '/':
	c.state = stateCSSLineCmt
	return c, i + 2
	case '*':
	c.state = stateCSSBlockCmt
	return c, i + 2
	}
	}
	case '"':
	c.state = stateCSSDqStr
	return c, i + 1
	case '\'':
	c.state = stateCSSSqStr
	return c, i + 1
	}
	k = i + 1
	}
	}

	// tCSSStr is the context transition function for the CSS string and URL states.
	func tCSSStr(c context, s []byte) (context, int) {
	var endAndEsc string
	switch c.state {
	case stateCSSDqStr, stateCSSDqURL:
	endAndEsc = `\"`
	case stateCSSSqStr, stateCSSSqURL:
	endAndEsc = `\'`
	case stateCSSURL:
	// Unquoted URLs end with a newline or close parenthesis.
	// The below includes the wc (whitespace character) and nl.
	endAndEsc = "\\\t\n\f\r )"
	default:
	panic(c.state.String())
	}

	k := 0
	for {
	i := k + bytes.IndexAny(s[k:], endAndEsc)
	if i < k {
	c, nread := tURL(c, decodeCSS(s[k:]))
	return c, k + nread
	}
	if s[i] == '\\' {
	i++
	if i == len(s) {
	return context{
	state: stateError,
	err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
	}, len(s)
	}
	} else {
	c.state = stateCSS
	return c, i + 1
	}
	c, _ = tURL(c, decodeCSS(s[:i+1]))
	k = i + 1
	}
	}

	// tError is the context transition function for the error state.
	func tError(c context, s []byte) (context, int) {
	return c, len(s)
	}

	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
	// It returns an error if s[i:] does not look like it begins with an
	// attribute name, such as encountering a quote mark without a preceding
	// equals sign.
	func eatAttrName(s []byte, i int) (int, *Error) {
	for j := i; j < len(s); j++ {
	switch s[j] {
	case ' ', '\t', '\n', '\f', '\r', '=', '>':
	return j, nil
	case '\'', '"', '<':
	// These result in a parse warning in HTML5 and are
	// indicative of serious problems if seen in an attr
	// name in a template.
	return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
	default:
	// No-op.
	}
	}
	return len(s), nil
	}

	var elementNameMap = map[string]element{
	"script": elementScript,
	"style": elementStyle,
	"textarea": elementTextarea,
	"title": elementTitle,
	}

	// asciiAlpha reports whether c is an ASCII letter.
	func asciiAlpha(c byte) bool {
	return 'A' <= c && c <= 'Z' \|\| 'a' <= c && c <= 'z'
	}

	// asciiAlphaNum reports whether c is an ASCII letter or digit.
	func asciiAlphaNum(c byte) bool {
	return asciiAlpha(c) \|\| '0' <= c && c <= '9'
	}

	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
	func eatTagName(s []byte, i int) (int, element) {
	if i == len(s) \|\| !asciiAlpha(s[i]) {
	return i, elementNone
	}
	j := i + 1
	for j < len(s) {
	x := s[j]
	if asciiAlphaNum(x) {
	j++
	continue
	}
	// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
	if (x == ':' \|\| x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
	j += 2
	continue
	}
	break
	}
	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
	}

	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
	func eatWhiteSpace(s []byte, i int) int {
	for j := i; j < len(s); j++ {
	switch s[j] {
	case ' ', '\t', '\n', '\f', '\r':
	// No-op.
	default:
	return j
	}
	}
	return len(s)
	}