src/pkg/exp/template/html/transition.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package html

 import (
 	"bytes"
 	"fmt"
 	"os"
 	"strings"
 )

 // transitionFunc is the array of context transition functions for text nodes.
 // A transition function takes a context and template text input, and returns
 // the updated context and any unconsumed text.
 var transitionFunc = [...]func(context, []byte) (context, []byte){
 	stateText:        tText,
 	stateTag:         tTag,
 	stateComment:     tComment,
 	stateRCDATA:      tSpecialTagEnd,
 	stateAttr:        tAttr,
 	stateURL:         tURL,
 	stateJS:          tJS,
 	stateJSDqStr:     tJSStr,
 	stateJSSqStr:     tJSStr,
 	stateJSRegexp:    tJSRegexp,
 	stateJSBlockCmt:  tBlockCmt,
 	stateJSLineCmt:   tLineCmt,
 	stateCSS:         tCSS,
 	stateCSSDqStr:    tCSSStr,
 	stateCSSSqStr:    tCSSStr,
 	stateCSSDqURL:    tCSSStr,
 	stateCSSSqURL:    tCSSStr,
 	stateCSSURL:      tCSSStr,
 	stateCSSBlockCmt: tBlockCmt,
 	stateCSSLineCmt:  tLineCmt,
 	stateError:       tError,
 }

 var commentStart = []byte("<!--")
 var commentEnd = []byte("-->")

 // tText is the context transition function for the text state.
 func tText(c context, s []byte) (context, []byte) {
 	for {
 		i := bytes.IndexByte(s, '<')
 		if i == -1 || i+1 == len(s) {
 			return c, nil
 		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
 			return context{state: stateComment}, s[i+4:]
 		}
 		i++
 		if s[i] == '/' {
 			if i+1 == len(s) {
 				return c, nil
 			}
 			i++
 		}
 		j, e := eatTagName(s, i)
 		if j != i {
 			// We've found an HTML tag.
 			return context{state: stateTag, element: e}, s[j:]
 		}
 		s = s[j:]
 	}
 	panic("unreachable")
 }

 var elementContentType = [...]state{
 	elementNone:     stateText,
 	elementScript:   stateJS,
 	elementStyle:    stateCSS,
 	elementTextarea: stateRCDATA,
 	elementTitle:    stateRCDATA,
 }

 // tTag is the context transition function for the tag state.
 func tTag(c context, s []byte) (context, []byte) {
 	// Find the attribute name.
 	attrStart := eatWhiteSpace(s, 0)
 	i, err := eatAttrName(s, attrStart)
 	if err != nil {
 		return context{
 			state:  stateError,
 			errStr: err.String(),
 		}, nil
 	}
 	if i == len(s) {
 		return c, nil
 	}
 	state := stateAttr
 	canonAttrName := strings.ToLower(string(s[attrStart:i]))
 	if urlAttr[canonAttrName] {
 		state = stateURL
 	} else if strings.HasPrefix(canonAttrName, "on") {
 		state = stateJS
 	} else if canonAttrName == "style" {
 		state = stateCSS
 	}

 	// Look for the start of the value.
 	i = eatWhiteSpace(s, i)
 	if i == len(s) {
 		return c, s[i:]
 	}
 	if s[i] == '>' {
 		state = elementContentType[c.element]
 		return context{state: state, element: c.element}, s[i+1:]
 	} else if s[i] != '=' {
 		// Possible due to a valueless attribute or '/' in "<input />".
 		return c, s[i:]
 	}
 	// Consume the "=".
 	i = eatWhiteSpace(s, i+1)

 	// Find the attribute delimiter.
 	delim := delimSpaceOrTagEnd
 	if i < len(s) {
 		switch s[i] {
 		case '\'':
 			delim, i = delimSingleQuote, i+1
 		case '"':
 			delim, i = delimDoubleQuote, i+1
 		}
 	}

 	return context{state: state, delim: delim, element: c.element}, s[i:]
 }

 // tComment is the context transition function for stateComment.
 func tComment(c context, s []byte) (context, []byte) {
 	i := bytes.Index(s, commentEnd)
 	if i != -1 {
 		return context{}, s[i+3:]
 	}
 	return c, nil
 }

 // specialTagEndMarkers maps element types to the character sequence that
 // case-insensitively signals the end of the special tag body.
 var specialTagEndMarkers = [...]string{
 	elementScript:   "</script",
 	elementStyle:    "</style",
 	elementTextarea: "</textarea",
 	elementTitle:    "</title",
 }

 // tSpecialTagEnd is the context transition function for raw text and RCDATA
 // element states.
 func tSpecialTagEnd(c context, s []byte) (context, []byte) {
 	if c.element != elementNone {
 		end := specialTagEndMarkers[c.element]
 		i := strings.Index(strings.ToLower(string(s)), end)
 		if i != -1 {
 			return context{state: stateTag}, s[i+len(end):]
 		}
 	}
 	return c, nil
 }

 // tAttr is the context transition function for the attribute state.
 func tAttr(c context, s []byte) (context, []byte) {
 	return c, nil
 }

 // tURL is the context transition function for the URL state.
 func tURL(c context, s []byte) (context, []byte) {
 	if bytes.IndexAny(s, "#?") >= 0 {
 		c.urlPart = urlPartQueryOrFrag
 	} else if len(s) != 0 && c.urlPart == urlPartNone {
 		c.urlPart = urlPartPreQuery
 	}
 	return c, nil
 }

 // tJS is the context transition function for the JS state.
 func tJS(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}

 	i := bytes.IndexAny(s, `"'/`)
 	if i == -1 {
 		// Entire input is non string, comment, regexp tokens.
 		c.jsCtx = nextJSCtx(s, c.jsCtx)
 		return c, nil
 	}
 	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
 	switch s[i] {
 	case '"':
 		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
 	case '\'':
 		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
 	case '/':
 		switch {
 		case i+1 < len(s) && s[i+1] == '/':
 			c.state, i = stateJSLineCmt, i+1
 		case i+1 < len(s) && s[i+1] == '*':
 			c.state, i = stateJSBlockCmt, i+1
 		case c.jsCtx == jsCtxRegexp:
 			c.state = stateJSRegexp
 		case c.jsCtx == jsCtxDivOp:
 			c.jsCtx = jsCtxRegexp
 		default:
 			return context{
 				state:  stateError,
 				errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
 			}, nil
 		}
 	default:
 		panic("unreachable")
 	}
 	return c, s[i+1:]
 }

 // tJSStr is the context transition function for the JS string states.
 func tJSStr(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}

 	quoteAndEsc := `\"`
 	if c.state == stateJSSqStr {
 		quoteAndEsc = `\'`
 	}

 	b := s
 	for {
 		i := bytes.IndexAny(b, quoteAndEsc)
 		if i == -1 {
 			return c, nil
 		}
 		if b[i] == '\\' {
 			i++
 			if i == len(b) {
 				return context{
 					state:  stateError,
 					errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
 				}, nil
 			}
 		} else {
 			c.state, c.jsCtx = stateJS, jsCtxDivOp
 			return c, b[i+1:]
 		}
 		b = b[i+1:]
 	}
 	panic("unreachable")
 }

 // tJSRegexp is the context transition function for the /RegExp/ literal state.
 func tJSRegexp(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}

 	b := s
 	inCharset := false
 	for {
 		i := bytes.IndexAny(b, `/[\]`)
 		if i == -1 {
 			break
 		}
 		switch b[i] {
 		case '/':
 			if !inCharset {
 				c.state, c.jsCtx = stateJS, jsCtxDivOp
 				return c, b[i+1:]
 			}
 		case '\\':
 			i++
 			if i == len(b) {
 				return context{
 					state:  stateError,
 					errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
 				}, nil
 			}
 		case '[':
 			inCharset = true
 		case ']':
 			inCharset = false
 		default:
 			panic("unreachable")
 		}
 		b = b[i+1:]
 	}

 	if inCharset {
 		// This can be fixed by making context richer if interpolation
 		// into charsets is desired.
 		return context{
 			state:  stateError,
 			errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
 		}, nil
 	}

 	return c, nil
 }

 var blockCommentEnd = []byte("*/")

 // tBlockCmt is the context transition function for /*comment*/ states.
 func tBlockCmt(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}
 	i := bytes.Index(s, blockCommentEnd)
 	if i == -1 {
 		return c, nil
 	}
 	switch c.state {
 	case stateJSBlockCmt:
 		c.state = stateJS
 	case stateCSSBlockCmt:
 		c.state = stateCSS
 	default:
 		panic(c.state.String())
 	}
 	return c, s[i+2:]
 }

 // tLineCmt is the context transition function for //comment states.
 func tLineCmt(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}
 	var lineTerminators string
 	var endState state
 	switch c.state {
 	case stateJSLineCmt:
 		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
 	case stateCSSLineCmt:
 		lineTerminators, endState = "\n\f\r", stateCSS
 		// Line comments are not part of any published CSS standard but
 		// are supported by the 4 major browsers.
 		// This defines line comments as
 		//     LINECOMMENT ::= "//" [^\n\f\d]*
 		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
 		// newlines:
 		//     nl ::= #xA | #xD #xA | #xD | #xC
 	default:
 		panic(c.state.String())
 	}

 	i := bytes.IndexAny(s, lineTerminators)
 	if i == -1 {
 		return c, nil
 	}
 	c.state = endState
 	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
 	// "However, the LineTerminator at the end of the line is not
 	// considered to be part of the single-line comment; it is recognised
 	// separately by the lexical grammar and becomes part of the stream of
 	// input elements for the syntactic grammar."
 	return c, s[i:]
 }

 // tCSS is the context transition function for the CSS state.
 func tCSS(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}

 	// CSS quoted strings are almost never used except for:
 	// (1) URLs as in background: "/foo.png"
 	// (2) Multiword font-names as in font-family: "Times New Roman"
 	// (3) List separators in content values as in inline-lists:
 	//    <style>
 	//    ul.inlineList { list-style: none; padding:0 }
 	//    ul.inlineList > li { display: inline }
 	//    ul.inlineList > li:before { content: ", " }
 	//    ul.inlineList > li:first-child:before { content: "" }
 	//    </style>
 	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
 	// (4) Attribute value selectors as in a[href="http://example.com/"]
 	//
 	// We conservatively treat all strings as URLs, but make some
 	// allowances to avoid confusion.
 	//
 	// In (1), our conservative assumption is justified.
 	// In (2), valid font names do not contain ':', '?', or '#', so our
 	// conservative assumption is fine since we will never transition past
 	// urlPartPreQuery.
 	// In (3), our protocol heuristic should not be tripped, and there
 	// should not be non-space content after a '?' or '#', so as long as
 	// we only %-encode RFC 3986 reserved characters we are ok.
 	// In (4), we should URL escape for URL attributes, and for others we
 	// have the attribute name available if our conservative assumption
 	// proves problematic for real code.

 	for {
 		i := bytes.IndexAny(s, `("'/`)
 		if i == -1 {
 			return c, nil
 		}
 		switch s[i] {
 		case '(':
 			// Look for url to the left.
 			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
 			if endsWithCSSKeyword(p, "url") {
 				q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
 				switch {
 				case len(q) != 0 && q[0] == '"':
 					c.state, s = stateCSSDqURL, q[1:]
 				case len(q) != 0 && q[0] == '\'':
 					c.state, s = stateCSSSqURL, q[1:]

 				default:
 					c.state, s = stateCSSURL, q
 				}
 				return c, s
 			}
 		case '/':
 			if i+1 < len(s) {
 				switch s[i+1] {
 				case '/':
 					c.state = stateCSSLineCmt
 					return c, s[i+2:]
 				case '*':
 					c.state = stateCSSBlockCmt
 					return c, s[i+2:]
 				}
 			}
 		case '"':
 			c.state = stateCSSDqStr
 			return c, s[i+1:]
 		case '\'':
 			c.state = stateCSSSqStr
 			return c, s[i+1:]
 		}
 		s = s[i+1:]
 	}
 	panic("unreachable")
 }

 // tCSSStr is the context transition function for the CSS string and URL states.
 func tCSSStr(c context, s []byte) (context, []byte) {
 	if d, t := tSpecialTagEnd(c, s); t != nil {
 		return d, t
 	}

 	var endAndEsc string
 	switch c.state {
 	case stateCSSDqStr, stateCSSDqURL:
 		endAndEsc = `\"`
 	case stateCSSSqStr, stateCSSSqURL:
 		endAndEsc = `\'`
 	case stateCSSURL:
 		// Unquoted URLs end with a newline or close parenthesis.
 		// The below includes the wc (whitespace character) and nl.
 		endAndEsc = "\\\t\n\f\r )"
 	default:
 		panic(c.state.String())
 	}

 	b := s
 	for {
 		i := bytes.IndexAny(b, endAndEsc)
 		if i == -1 {
 			return tURL(c, decodeCSS(b))
 		}
 		if b[i] == '\\' {
 			i++
 			if i == len(b) {
 				return context{
 					state:  stateError,
 					errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
 				}, nil
 			}
 		} else {
 			c.state = stateCSS
 			return c, b[i+1:]
 		}
 		c, _ = tURL(c, decodeCSS(b[:i+1]))
 		b = b[i+1:]
 	}
 	panic("unreachable")
 }

 // tError is the context transition function for the error state.
 func tError(c context, s []byte) (context, []byte) {
 	return c, nil
 }

 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
 // It returns an error if s[i:] does not look like it begins with an
 // attribute name, such as encountering a quote mark without a preceding
 // equals sign.
 func eatAttrName(s []byte, i int) (int, os.Error) {
 	for j := i; j < len(s); j++ {
 		switch s[j] {
 		case ' ', '\t', '\n', '\f', '\r', '=', '>':
 			return j, nil
 		case '\'', '"', '<':
 			// These result in a parse warning in HTML5 and are
 			// indicative of serious problems if seen in an attr
 			// name in a template.
 			return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
 		default:
 			// No-op.
 		}
 	}
 	return len(s), nil
 }

 var elementNameMap = map[string]element{
 	"script":   elementScript,
 	"style":    elementStyle,
 	"textarea": elementTextarea,
 	"title":    elementTitle,
 }

 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
 func eatTagName(s []byte, i int) (int, element) {
 	j := i
 	for ; j < len(s); j++ {
 		x := s[j]
 		if !(('a' <= x && x <= 'z') ||
 			('A' <= x && x <= 'Z') ||
 			('0' <= x && x <= '9' && i != j)) {
 			break
 		}
 	}
 	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
 }

 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
 func eatWhiteSpace(s []byte, i int) int {
 	for j := i; j < len(s); j++ {
 		switch s[j] {
 		case ' ', '\t', '\n', '\f', '\r':
 			// No-op.
 		default:
 			return j
 		}
 	}
 	return len(s)
 }

 // urlAttr is the set of attribute names whose values are URLs.
 // It consists of all "%URI"-typed attributes from
 // http://www.w3.org/TR/html4/index/attributes.html
 // as well as those attributes defined at
 // http://dev.w3.org/html5/spec/index.html#attributes-1
 // whose Value column in that table matches
 // "Valid [non-empty] URL potentially surrounded by spaces".
 var urlAttr = map[string]bool{
 	"action":     true,
 	"archive":    true,
 	"background": true,
 	"cite":       true,
 	"classid":    true,
 	"codebase":   true,
 	"data":       true,
 	"formaction": true,
 	"href":       true,
 	"icon":       true,
 	"longdesc":   true,
 	"manifest":   true,
 	"poster":     true,
 	"profile":    true,
 	"src":        true,
 	"usemap":     true,
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package html

	import (
	"bytes"
	"fmt"
	"os"
	"strings"
	)

	// transitionFunc is the array of context transition functions for text nodes.
	// A transition function takes a context and template text input, and returns
	// the updated context and any unconsumed text.
	var transitionFunc = [...]func(context, []byte) (context, []byte){
	stateText: tText,
	stateTag: tTag,
	stateComment: tComment,
	stateRCDATA: tSpecialTagEnd,
	stateAttr: tAttr,
	stateURL: tURL,
	stateJS: tJS,
	stateJSDqStr: tJSStr,
	stateJSSqStr: tJSStr,
	stateJSRegexp: tJSRegexp,
	stateJSBlockCmt: tBlockCmt,
	stateJSLineCmt: tLineCmt,
	stateCSS: tCSS,
	stateCSSDqStr: tCSSStr,
	stateCSSSqStr: tCSSStr,
	stateCSSDqURL: tCSSStr,
	stateCSSSqURL: tCSSStr,
	stateCSSURL: tCSSStr,
	stateCSSBlockCmt: tBlockCmt,
	stateCSSLineCmt: tLineCmt,
	stateError: tError,
	}

	var commentStart = []byte("<!--")
	var commentEnd = []byte("-->")

	// tText is the context transition function for the text state.
	func tText(c context, s []byte) (context, []byte) {
	for {
	i := bytes.IndexByte(s, '<')
	if i == -1 \|\| i+1 == len(s) {
	return c, nil
	} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
	return context{state: stateComment}, s[i+4:]
	}
	i++
	if s[i] == '/' {
	if i+1 == len(s) {
	return c, nil
	}
	i++
	}
	j, e := eatTagName(s, i)
	if j != i {
	// We've found an HTML tag.
	return context{state: stateTag, element: e}, s[j:]
	}
	s = s[j:]
	}
	panic("unreachable")
	}

	var elementContentType = [...]state{
	elementNone: stateText,
	elementScript: stateJS,
	elementStyle: stateCSS,
	elementTextarea: stateRCDATA,
	elementTitle: stateRCDATA,
	}

	// tTag is the context transition function for the tag state.
	func tTag(c context, s []byte) (context, []byte) {
	// Find the attribute name.
	attrStart := eatWhiteSpace(s, 0)
	i, err := eatAttrName(s, attrStart)
	if err != nil {
	return context{
	state: stateError,
	errStr: err.String(),
	}, nil
	}
	if i == len(s) {
	return c, nil
	}
	state := stateAttr
	canonAttrName := strings.ToLower(string(s[attrStart:i]))
	if urlAttr[canonAttrName] {
	state = stateURL
	} else if strings.HasPrefix(canonAttrName, "on") {
	state = stateJS
	} else if canonAttrName == "style" {
	state = stateCSS
	}

	// Look for the start of the value.
	i = eatWhiteSpace(s, i)
	if i == len(s) {
	return c, s[i:]
	}
	if s[i] == '>' {
	state = elementContentType[c.element]
	return context{state: state, element: c.element}, s[i+1:]
	} else if s[i] != '=' {
	// Possible due to a valueless attribute or '/' in "<input />".
	return c, s[i:]
	}
	// Consume the "=".
	i = eatWhiteSpace(s, i+1)

	// Find the attribute delimiter.
	delim := delimSpaceOrTagEnd
	if i < len(s) {
	switch s[i] {
	case '\'':
	delim, i = delimSingleQuote, i+1
	case '"':
	delim, i = delimDoubleQuote, i+1
	}
	}

	return context{state: state, delim: delim, element: c.element}, s[i:]
	}

	// tComment is the context transition function for stateComment.
	func tComment(c context, s []byte) (context, []byte) {
	i := bytes.Index(s, commentEnd)
	if i != -1 {
	return context{}, s[i+3:]
	}
	return c, nil
	}

	// specialTagEndMarkers maps element types to the character sequence that
	// case-insensitively signals the end of the special tag body.
	var specialTagEndMarkers = [...]string{
	elementScript: "</script",
	elementStyle: "</style",
	elementTextarea: "</textarea",
	elementTitle: "</title",
	}

	// tSpecialTagEnd is the context transition function for raw text and RCDATA
	// element states.
	func tSpecialTagEnd(c context, s []byte) (context, []byte) {
	if c.element != elementNone {
	end := specialTagEndMarkers[c.element]
	i := strings.Index(strings.ToLower(string(s)), end)
	if i != -1 {
	return context{state: stateTag}, s[i+len(end):]
	}
	}
	return c, nil
	}

	// tAttr is the context transition function for the attribute state.
	func tAttr(c context, s []byte) (context, []byte) {
	return c, nil
	}

	// tURL is the context transition function for the URL state.
	func tURL(c context, s []byte) (context, []byte) {
	if bytes.IndexAny(s, "#?") >= 0 {
	c.urlPart = urlPartQueryOrFrag
	} else if len(s) != 0 && c.urlPart == urlPartNone {
	c.urlPart = urlPartPreQuery
	}
	return c, nil
	}

	// tJS is the context transition function for the JS state.
	func tJS(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}

	i := bytes.IndexAny(s, `"'/`)
	if i == -1 {
	// Entire input is non string, comment, regexp tokens.
	c.jsCtx = nextJSCtx(s, c.jsCtx)
	return c, nil
	}
	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
	switch s[i] {
	case '"':
	c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
	case '\'':
	c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
	case '/':
	switch {
	case i+1 < len(s) && s[i+1] == '/':
	c.state, i = stateJSLineCmt, i+1
	case i+1 < len(s) && s[i+1] == '*':
	c.state, i = stateJSBlockCmt, i+1
	case c.jsCtx == jsCtxRegexp:
	c.state = stateJSRegexp
	case c.jsCtx == jsCtxDivOp:
	c.jsCtx = jsCtxRegexp
	default:
	return context{
	state: stateError,
	errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
	}, nil
	}
	default:
	panic("unreachable")
	}
	return c, s[i+1:]
	}

	// tJSStr is the context transition function for the JS string states.
	func tJSStr(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}

	quoteAndEsc := `\"`
	if c.state == stateJSSqStr {
	quoteAndEsc = `\'`
	}

	b := s
	for {
	i := bytes.IndexAny(b, quoteAndEsc)
	if i == -1 {
	return c, nil
	}
	if b[i] == '\\' {
	i++
	if i == len(b) {
	return context{
	state: stateError,
	errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
	}, nil
	}
	} else {
	c.state, c.jsCtx = stateJS, jsCtxDivOp
	return c, b[i+1:]
	}
	b = b[i+1:]
	}
	panic("unreachable")
	}

	// tJSRegexp is the context transition function for the /RegExp/ literal state.
	func tJSRegexp(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}

	b := s
	inCharset := false
	for {
	i := bytes.IndexAny(b, `/[\]`)
	if i == -1 {
	break
	}
	switch b[i] {
	case '/':
	if !inCharset {
	c.state, c.jsCtx = stateJS, jsCtxDivOp
	return c, b[i+1:]
	}
	case '\\':
	i++
	if i == len(b) {
	return context{
	state: stateError,
	errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
	}, nil
	}
	case '[':
	inCharset = true
	case ']':
	inCharset = false
	default:
	panic("unreachable")
	}
	b = b[i+1:]
	}

	if inCharset {
	// This can be fixed by making context richer if interpolation
	// into charsets is desired.
	return context{
	state: stateError,
	errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
	}, nil
	}

	return c, nil
	}

	var blockCommentEnd = []byte("*/")

	// tBlockCmt is the context transition function for /comment/ states.
	func tBlockCmt(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}
	i := bytes.Index(s, blockCommentEnd)
	if i == -1 {
	return c, nil
	}
	switch c.state {
	case stateJSBlockCmt:
	c.state = stateJS
	case stateCSSBlockCmt:
	c.state = stateCSS
	default:
	panic(c.state.String())
	}
	return c, s[i+2:]
	}

	// tLineCmt is the context transition function for //comment states.
	func tLineCmt(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}
	var lineTerminators string
	var endState state
	switch c.state {
	case stateJSLineCmt:
	lineTerminators, endState = "\n\r\u2028\u2029", stateJS
	case stateCSSLineCmt:
	lineTerminators, endState = "\n\f\r", stateCSS
	// Line comments are not part of any published CSS standard but
	// are supported by the 4 major browsers.
	// This defines line comments as
	// LINECOMMENT ::= "//" [^\n\f\d]*
	// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
	// newlines:
	// nl ::= #xA \| #xD #xA \| #xD \| #xC
	default:
	panic(c.state.String())
	}

	i := bytes.IndexAny(s, lineTerminators)
	if i == -1 {
	return c, nil
	}
	c.state = endState
	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
	// "However, the LineTerminator at the end of the line is not
	// considered to be part of the single-line comment; it is recognised
	// separately by the lexical grammar and becomes part of the stream of
	// input elements for the syntactic grammar."
	return c, s[i:]
	}

	// tCSS is the context transition function for the CSS state.
	func tCSS(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}

	// CSS quoted strings are almost never used except for:
	// (1) URLs as in background: "/foo.png"
	// (2) Multiword font-names as in font-family: "Times New Roman"
	// (3) List separators in content values as in inline-lists:
	// <style>
	// ul.inlineList { list-style: none; padding:0 }
	// ul.inlineList > li { display: inline }
	// ul.inlineList > li:before { content: ", " }
	// ul.inlineList > li:first-child:before { content: "" }
	// </style>
	// <ul class=inlineList><li>One<li>Two<li>Three</ul>
	// (4) Attribute value selectors as in a[href="http://example.com/"]
	//
	// We conservatively treat all strings as URLs, but make some
	// allowances to avoid confusion.
	//
	// In (1), our conservative assumption is justified.
	// In (2), valid font names do not contain ':', '?', or '#', so our
	// conservative assumption is fine since we will never transition past
	// urlPartPreQuery.
	// In (3), our protocol heuristic should not be tripped, and there
	// should not be non-space content after a '?' or '#', so as long as
	// we only %-encode RFC 3986 reserved characters we are ok.
	// In (4), we should URL escape for URL attributes, and for others we
	// have the attribute name available if our conservative assumption
	// proves problematic for real code.

	for {
	i := bytes.IndexAny(s, `("'/`)
	if i == -1 {
	return c, nil
	}
	switch s[i] {
	case '(':
	// Look for url to the left.
	p := bytes.TrimRight(s[:i], "\t\n\f\r ")
	if endsWithCSSKeyword(p, "url") {
	q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
	switch {
	case len(q) != 0 && q[0] == '"':
	c.state, s = stateCSSDqURL, q[1:]
	case len(q) != 0 && q[0] == '\'':
	c.state, s = stateCSSSqURL, q[1:]

	default:
	c.state, s = stateCSSURL, q
	}
	return c, s
	}
	case '/':
	if i+1 < len(s) {
	switch s[i+1] {
	case '/':
	c.state = stateCSSLineCmt
	return c, s[i+2:]
	case '*':
	c.state = stateCSSBlockCmt
	return c, s[i+2:]
	}
	}
	case '"':
	c.state = stateCSSDqStr
	return c, s[i+1:]
	case '\'':
	c.state = stateCSSSqStr
	return c, s[i+1:]
	}
	s = s[i+1:]
	}
	panic("unreachable")
	}

	// tCSSStr is the context transition function for the CSS string and URL states.
	func tCSSStr(c context, s []byte) (context, []byte) {
	if d, t := tSpecialTagEnd(c, s); t != nil {
	return d, t
	}

	var endAndEsc string
	switch c.state {
	case stateCSSDqStr, stateCSSDqURL:
	endAndEsc = `\"`
	case stateCSSSqStr, stateCSSSqURL:
	endAndEsc = `\'`
	case stateCSSURL:
	// Unquoted URLs end with a newline or close parenthesis.
	// The below includes the wc (whitespace character) and nl.
	endAndEsc = "\\\t\n\f\r )"
	default:
	panic(c.state.String())
	}

	b := s
	for {
	i := bytes.IndexAny(b, endAndEsc)
	if i == -1 {
	return tURL(c, decodeCSS(b))
	}
	if b[i] == '\\' {
	i++
	if i == len(b) {
	return context{
	state: stateError,
	errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
	}, nil
	}
	} else {
	c.state = stateCSS
	return c, b[i+1:]
	}
	c, _ = tURL(c, decodeCSS(b[:i+1]))
	b = b[i+1:]
	}
	panic("unreachable")
	}

	// tError is the context transition function for the error state.
	func tError(c context, s []byte) (context, []byte) {
	return c, nil
	}

	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
	// It returns an error if s[i:] does not look like it begins with an
	// attribute name, such as encountering a quote mark without a preceding
	// equals sign.
	func eatAttrName(s []byte, i int) (int, os.Error) {
	for j := i; j < len(s); j++ {
	switch s[j] {
	case ' ', '\t', '\n', '\f', '\r', '=', '>':
	return j, nil
	case '\'', '"', '<':
	// These result in a parse warning in HTML5 and are
	// indicative of serious problems if seen in an attr
	// name in a template.
	return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
	default:
	// No-op.
	}
	}
	return len(s), nil
	}

	var elementNameMap = map[string]element{
	"script": elementScript,
	"style": elementStyle,
	"textarea": elementTextarea,
	"title": elementTitle,
	}

	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
	func eatTagName(s []byte, i int) (int, element) {
	j := i
	for ; j < len(s); j++ {
	x := s[j]
	if !(('a' <= x && x <= 'z') \|\|
	('A' <= x && x <= 'Z') \|\|
	('0' <= x && x <= '9' && i != j)) {
	break
	}
	}
	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
	}

	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
	func eatWhiteSpace(s []byte, i int) int {
	for j := i; j < len(s); j++ {
	switch s[j] {
	case ' ', '\t', '\n', '\f', '\r':
	// No-op.
	default:
	return j
	}
	}
	return len(s)
	}

	// urlAttr is the set of attribute names whose values are URLs.
	// It consists of all "%URI"-typed attributes from
	// http://www.w3.org/TR/html4/index/attributes.html
	// as well as those attributes defined at
	// http://dev.w3.org/html5/spec/index.html#attributes-1
	// whose Value column in that table matches
	// "Valid [non-empty] URL potentially surrounded by spaces".
	var urlAttr = map[string]bool{
	"action": true,
	"archive": true,
	"background": true,
	"cite": true,
	"classid": true,
	"codebase": true,
	"data": true,
	"formaction": true,
	"href": true,
	"icon": true,
	"longdesc": true,
	"manifest": true,
	"poster": true,
	"profile": true,
	"src": true,
	"usemap": true,
	}