src/pkg/exp/template/html/js.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package html

 import (
 	"bytes"
 	"fmt"
 	"json"
 	"strings"
 	"utf8"
 )

 // nextJSCtx returns the context that determines whether a slash after the
 // given run of tokens tokens starts a regular expression instead of a division
 // operator: / or /=.
 //
 // This assumes that the token run does not include any string tokens, comment
 // tokens, regular expression literal tokens, or division operators.
 //
 // This fails on some valid but nonsensical JavaScript programs like
 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
 // fail on any known useful programs. It is based on the draft
 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
 // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
 	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
 	if len(s) == 0 {
 		return preceding
 	}

 	// All cases below are in the single-byte UTF-8 group.
 	switch c, n := s[len(s)-1], len(s); c {
 	case '+', '-':
 		// ++ and -- are not regexp preceders, but + and - are whether
 		// they are used as infix or prefix operators.
 		start := n - 1
 		// Count the number of adjacent dashes or pluses.
 		for start > 0 && s[start-1] == c {
 			start--
 		}
 		if (n-start)&1 == 1 {
 			// Reached for trailing minus signs since "---" is the
 			// same as "-- -".
 			return jsCtxRegexp
 		}
 		return jsCtxDivOp
 	case '.':
 		// Handle "42."
 		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
 			return jsCtxDivOp
 		}
 		return jsCtxRegexp
 	// Suffixes for all punctuators from section 7.7 of the language spec
 	// that only end binary operators not handled above.
 	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
 		return jsCtxRegexp
 	// Suffixes for all punctuators from section 7.7 of the language spec
 	// that are prefix operators not handled above.
 	case '!', '~':
 		return jsCtxRegexp
 	// Matches all the punctuators from section 7.7 of the language spec
 	// that are open brackets not handled above.
 	case '(', '[':
 		return jsCtxRegexp
 	// Matches all the punctuators from section 7.7 of the language spec
 	// that precede expression starts.
 	case ':', ';', '{':
 		return jsCtxRegexp
 	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
 	// are handled in the default except for '}' which can precede a
 	// division op as in
 	//    ({ valueOf: function () { return 42 } } / 2
 	// which is valid, but, in practice, developers don't divide object
 	// literals, so our heuristic works well for code like
 	//    function () { ... }  /foo/.test(x) && sideEffect();
 	// The ')' punctuator can precede a regular expression as in
 	//     if (b) /foo/.test(x) && ...
 	// but this is much less likely than
 	//     (a + b) / c
 	case '}':
 		return jsCtxRegexp
 	default:
 		// Look for an IdentifierName and see if it is a keyword that
 		// can precede a regular expression.
 		j := n
 		for j > 0 && isJSIdentPart(int(s[j-1])) {
 			j--
 		}
 		if regexpPrecederKeywords[string(s[j:])] {
 			return jsCtxRegexp
 		}
 	}
 	// Otherwise is a punctuator not listed above, or
 	// a string which precedes a div op, or an identifier
 	// which precedes a div op.
 	return jsCtxDivOp
 }

 // regexPrecederKeywords is a set of reserved JS keywords that can precede a
 // regular expression in JS source.
 var regexpPrecederKeywords = map[string]bool{
 	"break":      true,
 	"case":       true,
 	"continue":   true,
 	"delete":     true,
 	"do":         true,
 	"else":       true,
 	"finally":    true,
 	"in":         true,
 	"instanceof": true,
 	"return":     true,
 	"throw":      true,
 	"try":        true,
 	"typeof":     true,
 	"void":       true,
 }

 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
 // nether side-effects nor free variables outside (NaN, Infinity).
 func jsValEscaper(args ...interface{}) string {
 	var a interface{}
 	if len(args) == 1 {
 		a = args[0]
 	} else {
 		a = fmt.Sprint(args...)
 	}
 	// TODO: detect cycles before calling Marshal which loops infinitely on
 	// cyclic data. This may be an unnacceptable DoS risk.

 	// TODO: make sure that json.Marshal escapes codepoints U+2028 & U+2029
 	// so it falls within the subset of JSON which is valid JS and maybe
 	// post-process to prevent it from containing
 	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
 	// in case custom marshallers produce output containing those.

 	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.

 	// TODO: JSON allows arbitrary unicode codepoints, but EcmaScript
 	// defines a SourceCharacter as either a UTF-16 or UCS-2 code-unit.
 	// Determine whether supplemental codepoints in UTF-8 encoded JS inside
 	// string literals are properly interpreted by major interpreters.

 	b, err := json.Marshal(a)
 	if err != nil {
 		// Put a space before comment so that if it is flush against
 		// a division operator it is not turned into a line comment:
 		//     x/{{y}}
 		// turning into
 		//     x//* error marshalling y:
 		//          second line of error message */null
 		return fmt.Sprintf(" /* %s */null ", strings.Replace(err.String(), "*/", "* /", -1))
 	}
 	if len(b) != 0 {
 		first, _ := utf8.DecodeRune(b)
 		last, _ := utf8.DecodeLastRune(b)
 		if isJSIdentPart(first) || isJSIdentPart(last) {
 			return " " + string(b) + " "
 		}
 	}
 	return string(b)
 }

 // jsStrEscaper produces a string that can be included between quotes in
 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
 // or in an HTML5 event handler attribute such as onclick.
 func jsStrEscaper(args ...interface{}) string {
 	ok := false
 	var s string
 	if len(args) == 1 {
 		s, ok = args[0].(string)
 	}
 	if !ok {
 		s = fmt.Sprint(args...)
 	}
 	var b bytes.Buffer
 	written := 0
 	for i, r := range s {
 		var repl string
 		switch r {
 		// All cases must appear in the IndexAny call above.
 		case 0:
 			repl = `\0`
 		case '\t':
 			repl = `\t`
 		case '\n':
 			repl = `\n`
 		case '\v':
 			// "\v" == "v" on IE 6.
 			repl = `\x0b`
 		case '\f':
 			repl = `\f`
 		case '\r':
 			repl = `\r`
 		// Encode HTML specials as hex so the output can be embedded
 		// in HTML attributes without further encoding.
 		case '"':
 			repl = `\x22`
 		case '&':
 			repl = `\x26`
 		case '\'':
 			repl = `\x27`
 		case '+':
 			repl = `\x2b`
 		case '/':
 			repl = `\/`
 		case '<':
 			repl = `\x3c`
 		case '>':
 			repl = `\x3e`
 		case '\\':
 			repl = `\\`
 		case '\u2028':
 			repl = `\u2028`
 		case '\u2029':
 			repl = `\u2029`
 		default:
 			continue
 		}
 		b.WriteString(s[written:i])
 		b.WriteString(repl)
 		written = i + utf8.RuneLen(r)
 	}
 	if b.Len() == 0 {
 		return s
 	}
 	b.WriteString(s[written:])
 	return b.String()
 }

 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
 // specials so the result is treated literally when included in a regular
 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
 // the literal text of {{.X}} followed by the string "bar".
 func jsRegexpEscaper(args ...interface{}) string {
 	ok := false
 	var s string
 	if len(args) == 1 {
 		s, ok = args[0].(string)
 	}
 	if !ok {
 		s = fmt.Sprint(args...)
 	}
 	var b bytes.Buffer
 	written := 0
 	for i, r := range s {
 		var repl string
 		switch r {
 		// All cases must appear in the IndexAny call above.
 		case 0:
 			repl = `\0`
 		case '\t':
 			repl = `\t`
 		case '\n':
 			repl = `\n`
 		case '\v':
 			// "\v" == "v" on IE 6.
 			repl = `\x0b`
 		case '\f':
 			repl = `\f`
 		case '\r':
 			repl = `\r`
 		// Encode HTML specials as hex so the output can be embedded
 		// in HTML attributes without further encoding.
 		case '"':
 			repl = `\x22`
 		case '$':
 			repl = `\$`
 		case '&':
 			repl = `\x26`
 		case '\'':
 			repl = `\x27`
 		case '(':
 			repl = `\(`
 		case ')':
 			repl = `\)`
 		case '*':
 			repl = `\*`
 		case '+':
 			repl = `\x2b`
 		case '-':
 			repl = `\-`
 		case '.':
 			repl = `\.`
 		case '/':
 			repl = `\/`
 		case '<':
 			repl = `\x3c`
 		case '>':
 			repl = `\x3e`
 		case '?':
 			repl = `\?`
 		case '[':
 			repl = `\[`
 		case '\\':
 			repl = `\\`
 		case ']':
 			repl = `\]`
 		case '^':
 			repl = `\^`
 		case '{':
 			repl = `\{`
 		case '|':
 			repl = `\|`
 		case '}':
 			repl = `\}`
 		case '\u2028':
 			repl = `\u2028`
 		case '\u2029':
 			repl = `\u2029`
 		default:
 			continue
 		}
 		b.WriteString(s[written:i])
 		b.WriteString(repl)
 		written = i + utf8.RuneLen(r)
 	}
 	if b.Len() == 0 {
 		return s
 	}
 	b.WriteString(s[written:])
 	return b.String()
 }

 // isJSIdentPart is true if the given rune is a JS identifier part.
 // It does not handle all the non-Latin letters, joiners, and combining marks,
 // but it does handle every codepoint that can occur in a numeric literal or
 // a keyword.
 func isJSIdentPart(rune int) bool {
 	switch {
 	case '$' == rune:
 		return true
 	case '0' <= rune && rune <= '9':
 		return true
 	case 'A' <= rune && rune <= 'Z':
 		return true
 	case '_' == rune:
 		return true
 	case 'a' <= rune && rune <= 'z':
 		return true
 	}
 	return false
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package html

	import (
	"bytes"
	"fmt"
	"json"
	"strings"
	"utf8"
	)

	// nextJSCtx returns the context that determines whether a slash after the
	// given run of tokens tokens starts a regular expression instead of a division
	// operator: / or /=.
	//
	// This assumes that the token run does not include any string tokens, comment
	// tokens, regular expression literal tokens, or division operators.
	//
	// This fails on some valid but nonsensical JavaScript programs like
	// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
	// fail on any known useful programs. It is based on the draft
	// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
	// http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
	func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
	if len(s) == 0 {
	return preceding
	}

	// All cases below are in the single-byte UTF-8 group.
	switch c, n := s[len(s)-1], len(s); c {
	case '+', '-':
	// ++ and -- are not regexp preceders, but + and - are whether
	// they are used as infix or prefix operators.
	start := n - 1
	// Count the number of adjacent dashes or pluses.
	for start > 0 && s[start-1] == c {
	start--
	}
	if (n-start)&1 == 1 {
	// Reached for trailing minus signs since "---" is the
	// same as "-- -".
	return jsCtxRegexp
	}
	return jsCtxDivOp
	case '.':
	// Handle "42."
	if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
	return jsCtxDivOp
	}
	return jsCtxRegexp
	// Suffixes for all punctuators from section 7.7 of the language spec
	// that only end binary operators not handled above.
	case ',', '<', '>', '=', '*', '%', '&', '\|', '^', '?':
	return jsCtxRegexp
	// Suffixes for all punctuators from section 7.7 of the language spec
	// that are prefix operators not handled above.
	case '!', '~':
	return jsCtxRegexp
	// Matches all the punctuators from section 7.7 of the language spec
	// that are open brackets not handled above.
	case '(', '[':
	return jsCtxRegexp
	// Matches all the punctuators from section 7.7 of the language spec
	// that precede expression starts.
	case ':', ';', '{':
	return jsCtxRegexp
	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
	// are handled in the default except for '}' which can precede a
	// division op as in
	// ({ valueOf: function () { return 42 } } / 2
	// which is valid, but, in practice, developers don't divide object
	// literals, so our heuristic works well for code like
	// function () { ... } /foo/.test(x) && sideEffect();
	// The ')' punctuator can precede a regular expression as in
	// if (b) /foo/.test(x) && ...
	// but this is much less likely than
	// (a + b) / c
	case '}':
	return jsCtxRegexp
	default:
	// Look for an IdentifierName and see if it is a keyword that
	// can precede a regular expression.
	j := n
	for j > 0 && isJSIdentPart(int(s[j-1])) {
	j--
	}
	if regexpPrecederKeywords[string(s[j:])] {
	return jsCtxRegexp
	}
	}
	// Otherwise is a punctuator not listed above, or
	// a string which precedes a div op, or an identifier
	// which precedes a div op.
	return jsCtxDivOp
	}

	// regexPrecederKeywords is a set of reserved JS keywords that can precede a
	// regular expression in JS source.
	var regexpPrecederKeywords = map[string]bool{
	"break": true,
	"case": true,
	"continue": true,
	"delete": true,
	"do": true,
	"else": true,
	"finally": true,
	"in": true,
	"instanceof": true,
	"return": true,
	"throw": true,
	"try": true,
	"typeof": true,
	"void": true,
	}

	// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
	// nether side-effects nor free variables outside (NaN, Infinity).
	func jsValEscaper(args ...interface{}) string {
	var a interface{}
	if len(args) == 1 {
	a = args[0]
	} else {
	a = fmt.Sprint(args...)
	}
	// TODO: detect cycles before calling Marshal which loops infinitely on
	// cyclic data. This may be an unnacceptable DoS risk.

	// TODO: make sure that json.Marshal escapes codepoints U+2028 & U+2029
	// so it falls within the subset of JSON which is valid JS and maybe
	// post-process to prevent it from containing
	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
	// in case custom marshallers produce output containing those.

	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.

	// TODO: JSON allows arbitrary unicode codepoints, but EcmaScript
	// defines a SourceCharacter as either a UTF-16 or UCS-2 code-unit.
	// Determine whether supplemental codepoints in UTF-8 encoded JS inside
	// string literals are properly interpreted by major interpreters.

	b, err := json.Marshal(a)
	if err != nil {
	// Put a space before comment so that if it is flush against
	// a division operator it is not turned into a line comment:
	// x/{{y}}
	// turning into
	// x//* error marshalling y:
	// second line of error message */null
	return fmt.Sprintf(" /* %s /null ", strings.Replace(err.String(), "/", "* /", -1))
	}
	if len(b) != 0 {
	first, _ := utf8.DecodeRune(b)
	last, _ := utf8.DecodeLastRune(b)
	if isJSIdentPart(first) \|\| isJSIdentPart(last) {
	return " " + string(b) + " "
	}
	}
	return string(b)
	}

	// jsStrEscaper produces a string that can be included between quotes in
	// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
	// or in an HTML5 event handler attribute such as onclick.
	func jsStrEscaper(args ...interface{}) string {
	ok := false
	var s string
	if len(args) == 1 {
	s, ok = args[0].(string)
	}
	if !ok {
	s = fmt.Sprint(args...)
	}
	var b bytes.Buffer
	written := 0
	for i, r := range s {
	var repl string
	switch r {
	// All cases must appear in the IndexAny call above.
	case 0:
	repl = `\0`
	case '\t':
	repl = `\t`
	case '\n':
	repl = `\n`
	case '\v':
	// "\v" == "v" on IE 6.
	repl = `\x0b`
	case '\f':
	repl = `\f`
	case '\r':
	repl = `\r`
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	case '"':
	repl = `\x22`
	case '&':
	repl = `\x26`
	case '\'':
	repl = `\x27`
	case '+':
	repl = `\x2b`
	case '/':
	repl = `\/`
	case '<':
	repl = `\x3c`
	case '>':
	repl = `\x3e`
	case '\\':
	repl = `\\`
	case '\u2028':
	repl = `\u2028`
	case '\u2029':
	repl = `\u2029`
	default:
	continue
	}
	b.WriteString(s[written:i])
	b.WriteString(repl)
	written = i + utf8.RuneLen(r)
	}
	if b.Len() == 0 {
	return s
	}
	b.WriteString(s[written:])
	return b.String()
	}

	// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
	// specials so the result is treated literally when included in a regular
	// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
	// the literal text of {{.X}} followed by the string "bar".
	func jsRegexpEscaper(args ...interface{}) string {
	ok := false
	var s string
	if len(args) == 1 {
	s, ok = args[0].(string)
	}
	if !ok {
	s = fmt.Sprint(args...)
	}
	var b bytes.Buffer
	written := 0
	for i, r := range s {
	var repl string
	switch r {
	// All cases must appear in the IndexAny call above.
	case 0:
	repl = `\0`
	case '\t':
	repl = `\t`
	case '\n':
	repl = `\n`
	case '\v':
	// "\v" == "v" on IE 6.
	repl = `\x0b`
	case '\f':
	repl = `\f`
	case '\r':
	repl = `\r`
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	case '"':
	repl = `\x22`
	case '$':
	repl = `\$`
	case '&':
	repl = `\x26`
	case '\'':
	repl = `\x27`
	case '(':
	repl = `\(`
	case ')':
	repl = `\)`
	case '*':
	repl = `\*`
	case '+':
	repl = `\x2b`
	case '-':
	repl = `\-`
	case '.':
	repl = `\.`
	case '/':
	repl = `\/`
	case '<':
	repl = `\x3c`
	case '>':
	repl = `\x3e`
	case '?':
	repl = `\?`
	case '[':
	repl = `\[`
	case '\\':
	repl = `\\`
	case ']':
	repl = `\]`
	case '^':
	repl = `\^`
	case '{':
	repl = `\{`
	case '\|':
	repl = `\\|`
	case '}':
	repl = `\}`
	case '\u2028':
	repl = `\u2028`
	case '\u2029':
	repl = `\u2029`
	default:
	continue
	}
	b.WriteString(s[written:i])
	b.WriteString(repl)
	written = i + utf8.RuneLen(r)
	}
	if b.Len() == 0 {
	return s
	}
	b.WriteString(s[written:])
	return b.String()
	}

	// isJSIdentPart is true if the given rune is a JS identifier part.
	// It does not handle all the non-Latin letters, joiners, and combining marks,
	// but it does handle every codepoint that can occur in a numeric literal or
	// a keyword.
	func isJSIdentPart(rune int) bool {
	switch {
	case '$' == rune:
	return true
	case '0' <= rune && rune <= '9':
	return true
	case 'A' <= rune && rune <= 'Z':
	return true
	case '_' == rune:
	return true
	case 'a' <= rune && rune <= 'z':
	return true
	}
	return false
	}