src/pkg/exp/template/html/css.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package html

 import (
 	"bytes"
 	"fmt"
 	"unicode"
 	"utf8"
 )

 // endsWithCSSKeyword returns whether b ends with an ident that
 // case-insensitively matches the lower-case kw.
 func endsWithCSSKeyword(b []byte, kw string) bool {
 	i := len(b) - len(kw)
 	if i < 0 {
 		// Too short.
 		return false
 	}
 	if i != 0 {
 		r, _ := utf8.DecodeLastRune(b[:i])
 		if isCSSNmchar(r) {
 			// Too long.
 			return false
 		}
 	}
 	// Many CSS keywords, such as "!important" can have characters encoded,
 	// but the URI production does not allow that according to
 	// http://www.w3.org/TR/css3-syntax/#TOK-URI
 	// This does not attempt to recognize encoded keywords. For example,
 	// given "\75\72\6c" and "url" this return false.
 	return string(bytes.ToLower(b[i:])) == kw
 }

 // isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
 func isCSSNmchar(rune int) bool {
 	// Based on the CSS3 nmchar production but ignores multi-rune escape
 	// sequences.
 	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
 	return 'a' <= rune && rune <= 'z' ||
 		'A' <= rune && rune <= 'Z' ||
 		'0' <= rune && rune <= '9' ||
 		'-' == rune ||
 		'_' == rune ||
 		// Non-ASCII cases below.
 		0x80 <= rune && rune <= 0xd7ff ||
 		0xe000 <= rune && rune <= 0xfffd ||
 		0x10000 <= rune && rune <= 0x10ffff
 }

 // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
 // If there is no change, it returns the input, otherwise it returns a slice
 // backed by a new array.
 // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
 func decodeCSS(s []byte) []byte {
 	i := bytes.IndexByte(s, '\\')
 	if i == -1 {
 		return s
 	}
 	// The UTF-8 sequence for a codepoint is never longer than 1 + the
 	// number hex digits need to represent that codepoint, so len(s) is an
 	// upper bound on the output length.
 	b := make([]byte, 0, len(s))
 	for len(s) != 0 {
 		i := bytes.IndexByte(s, '\\')
 		if i == -1 {
 			i = len(s)
 		}
 		b, s = append(b, s[:i]...), s[i:]
 		if len(s) < 2 {
 			break
 		}
 		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
 		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
 		if isHex(s[1]) {
 			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
 			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
 			j := 2
 			for j < len(s) && j < 7 && isHex(s[j]) {
 				j++
 			}
 			rune := hexDecode(s[1:j])
 			if rune > unicode.MaxRune {
 				rune, j = rune/16, j-1
 			}
 			n := utf8.EncodeRune(b[len(b):cap(b)], rune)
 			// The optional space at the end allows a hex
 			// sequence to be followed by a literal hex.
 			// string(decodeCSS([]byte(`\A B`))) == "\nB"
 			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
 		} else {
 			// `\\` decodes to `\` and `\"` to `"`.
 			_, n := utf8.DecodeRune(s[1:])
 			b, s = append(b, s[1:1+n]...), s[1+n:]
 		}
 	}
 	return b
 }

 // isHex returns whether the given character is a hex digit.
 func isHex(c byte) bool {
 	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
 }

 // hexDecode decodes a short hex digit sequence: "10" -> 16.
 func hexDecode(s []byte) int {
 	n := 0
 	for _, c := range s {
 		n <<= 4
 		switch {
 		case '0' <= c && c <= '9':
 			n |= int(c - '0')
 		case 'a' <= c && c <= 'f':
 			n |= int(c-'a') + 10
 		case 'A' <= c && c <= 'F':
 			n |= int(c-'A') + 10
 		default:
 			panic(fmt.Sprintf("Bad hex digit in %q", s))
 		}
 	}
 	return n
 }

 // skipCSSSpace returns a suffix of c, skipping over a single space.
 func skipCSSSpace(c []byte) []byte {
 	if len(c) == 0 {
 		return c
 	}
 	// wc ::= #x9 | #xA | #xC | #xD | #x20
 	switch c[0] {
 	case '\t', '\n', '\f', ' ':
 		return c[1:]
 	case '\r':
 		// This differs from CSS3's wc production because it contains a
 		// probable spec error whereby wc contains all the single byte
 		// sequences in nl (newline) but not CRLF.
 		if len(c) >= 2 && c[1] == '\n' {
 			return c[2:]
 		}
 		return c[1:]
 	}
 	return c
 }

 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
 func cssEscaper(args ...interface{}) string {
 	s, _ := stringify(args...)
 	var b bytes.Buffer
 	written := 0
 	for i, r := range s {
 		var repl string
 		switch r {
 		case 0:
 			repl = `\0`
 		case '\t':
 			repl = `\9`
 		case '\n':
 			repl = `\a`
 		case '\f':
 			repl = `\c`
 		case '\r':
 			repl = `\d`
 		// Encode HTML specials as hex so the output can be embedded
 		// in HTML attributes without further encoding.
 		case '"':
 			repl = `\22`
 		case '&':
 			repl = `\26`
 		case '\'':
 			repl = `\27`
 		case '(':
 			repl = `\28`
 		case ')':
 			repl = `\29`
 		case '+':
 			repl = `\2b`
 		case '/':
 			repl = `\2f`
 		case ':':
 			repl = `\3a`
 		case ';':
 			repl = `\3b`
 		case '<':
 			repl = `\3c`
 		case '>':
 			repl = `\3e`
 		case '\\':
 			repl = `\\`
 		case '{':
 			repl = `\7b`
 		case '}':
 			repl = `\7d`
 		default:
 			continue
 		}
 		b.WriteString(s[written:i])
 		b.WriteString(repl)
 		written = i + utf8.RuneLen(r)
 		if repl != `\\` && (written == len(s) || isHex(s[written])) {
 			b.WriteByte(' ')
 		}
 	}
 	if written == 0 {
 		return s
 	}
 	b.WriteString(s[written:])
 	return b.String()
 }

 var expressionBytes = []byte("expression")
 var mozBindingBytes = []byte("mozbinding")

 // cssValueFilter allows innocuous CSS values in the output including CSS
 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
 // (inherit, blue), and colors (#888).
 // It filters out unsafe values, such as those that affect token boundaries,
 // and anything that might execute scripts.
 func cssValueFilter(args ...interface{}) string {
 	s, t := stringify(args...)
 	if t == contentTypeCSS {
 		return s
 	}
 	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)

 	// CSS3 error handling is specified as honoring string boundaries per
 	// http://www.w3.org/TR/css3-syntax/#error-handling :
 	//     Malformed declarations. User agents must handle unexpected
 	//     tokens encountered while parsing a declaration by reading until
 	//     the end of the declaration, while observing the rules for
 	//     matching pairs of (), [], {}, "", and '', and correctly handling
 	//     escapes. For example, a malformed declaration may be missing a
 	//     property, colon (:) or value.
 	// So we need to make sure that values do not have mismatched bracket
 	// or quote characters to prevent the browser from restarting parsing
 	// inside a string that might embed JavaScript source.
 	for i, c := range b {
 		switch c {
 		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
 			return filterFailsafe
 		case '-':
 			// Disallow <!-- or -->.
 			// -- should not appear in valid identifiers.
 			if i != 0 && '-' == b[i-1] {
 				return filterFailsafe
 			}
 		default:
 			if c < 0x80 && isCSSNmchar(int(c)) {
 				id = append(id, c)
 			}
 		}
 	}
 	id = bytes.ToLower(id)
 	if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
 		return filterFailsafe
 	}
 	return string(b)
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package html

	import (
	"bytes"
	"fmt"
	"unicode"
	"utf8"
	)

	// endsWithCSSKeyword returns whether b ends with an ident that
	// case-insensitively matches the lower-case kw.
	func endsWithCSSKeyword(b []byte, kw string) bool {
	i := len(b) - len(kw)
	if i < 0 {
	// Too short.
	return false
	}
	if i != 0 {
	r, _ := utf8.DecodeLastRune(b[:i])
	if isCSSNmchar(r) {
	// Too long.
	return false
	}
	}
	// Many CSS keywords, such as "!important" can have characters encoded,
	// but the URI production does not allow that according to
	// http://www.w3.org/TR/css3-syntax/#TOK-URI
	// This does not attempt to recognize encoded keywords. For example,
	// given "\75\72\6c" and "url" this return false.
	return string(bytes.ToLower(b[i:])) == kw
	}

	// isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
	func isCSSNmchar(rune int) bool {
	// Based on the CSS3 nmchar production but ignores multi-rune escape
	// sequences.
	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
	return 'a' <= rune && rune <= 'z' \|\|
	'A' <= rune && rune <= 'Z' \|\|
	'0' <= rune && rune <= '9' \|\|
	'-' == rune \|\|
	'_' == rune \|\|
	// Non-ASCII cases below.
	0x80 <= rune && rune <= 0xd7ff \|\|
	0xe000 <= rune && rune <= 0xfffd \|\|
	0x10000 <= rune && rune <= 0x10ffff
	}

	// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
	// If there is no change, it returns the input, otherwise it returns a slice
	// backed by a new array.
	// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
	func decodeCSS(s []byte) []byte {
	i := bytes.IndexByte(s, '\\')
	if i == -1 {
	return s
	}
	// The UTF-8 sequence for a codepoint is never longer than 1 + the
	// number hex digits need to represent that codepoint, so len(s) is an
	// upper bound on the output length.
	b := make([]byte, 0, len(s))
	for len(s) != 0 {
	i := bytes.IndexByte(s, '\\')
	if i == -1 {
	i = len(s)
	}
	b, s = append(b, s[:i]...), s[i:]
	if len(s) < 2 {
	break
	}
	// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
	// escape ::= unicode \| '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
	if isHex(s[1]) {
	// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
	// unicode ::= '\' [0-9a-fA-F]{1,6} wc?
	j := 2
	for j < len(s) && j < 7 && isHex(s[j]) {
	j++
	}
	rune := hexDecode(s[1:j])
	if rune > unicode.MaxRune {
	rune, j = rune/16, j-1
	}
	n := utf8.EncodeRune(b[len(b):cap(b)], rune)
	// The optional space at the end allows a hex
	// sequence to be followed by a literal hex.
	// string(decodeCSS([]byte(`\A B`))) == "\nB"
	b, s = b[:len(b)+n], skipCSSSpace(s[j:])
	} else {
	// `\\` decodes to `\` and `\"` to `"`.
	_, n := utf8.DecodeRune(s[1:])
	b, s = append(b, s[1:1+n]...), s[1+n:]
	}
	}
	return b
	}

	// isHex returns whether the given character is a hex digit.
	func isHex(c byte) bool {
	return '0' <= c && c <= '9' \|\| 'a' <= c && c <= 'f' \|\| 'A' <= c && c <= 'F'
	}

	// hexDecode decodes a short hex digit sequence: "10" -> 16.
	func hexDecode(s []byte) int {
	n := 0
	for _, c := range s {
	n <<= 4
	switch {
	case '0' <= c && c <= '9':
	n \|= int(c - '0')
	case 'a' <= c && c <= 'f':
	n \|= int(c-'a') + 10
	case 'A' <= c && c <= 'F':
	n \|= int(c-'A') + 10
	default:
	panic(fmt.Sprintf("Bad hex digit in %q", s))
	}
	}
	return n
	}

	// skipCSSSpace returns a suffix of c, skipping over a single space.
	func skipCSSSpace(c []byte) []byte {
	if len(c) == 0 {
	return c
	}
	// wc ::= #x9 \| #xA \| #xC \| #xD \| #x20
	switch c[0] {
	case '\t', '\n', '\f', ' ':
	return c[1:]
	case '\r':
	// This differs from CSS3's wc production because it contains a
	// probable spec error whereby wc contains all the single byte
	// sequences in nl (newline) but not CRLF.
	if len(c) >= 2 && c[1] == '\n' {
	return c[2:]
	}
	return c[1:]
	}
	return c
	}

	// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
	func cssEscaper(args ...interface{}) string {
	s, _ := stringify(args...)
	var b bytes.Buffer
	written := 0
	for i, r := range s {
	var repl string
	switch r {
	case 0:
	repl = `\0`
	case '\t':
	repl = `\9`
	case '\n':
	repl = `\a`
	case '\f':
	repl = `\c`
	case '\r':
	repl = `\d`
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	case '"':
	repl = `\22`
	case '&':
	repl = `\26`
	case '\'':
	repl = `\27`
	case '(':
	repl = `\28`
	case ')':
	repl = `\29`
	case '+':
	repl = `\2b`
	case '/':
	repl = `\2f`
	case ':':
	repl = `\3a`
	case ';':
	repl = `\3b`
	case '<':
	repl = `\3c`
	case '>':
	repl = `\3e`
	case '\\':
	repl = `\\`
	case '{':
	repl = `\7b`
	case '}':
	repl = `\7d`
	default:
	continue
	}
	b.WriteString(s[written:i])
	b.WriteString(repl)
	written = i + utf8.RuneLen(r)
	if repl != `\\` && (written == len(s) \|\| isHex(s[written])) {
	b.WriteByte(' ')
	}
	}
	if written == 0 {
	return s
	}
	b.WriteString(s[written:])
	return b.String()
	}

	var expressionBytes = []byte("expression")
	var mozBindingBytes = []byte("mozbinding")

	// cssValueFilter allows innocuous CSS values in the output including CSS
	// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
	// (inherit, blue), and colors (#888).
	// It filters out unsafe values, such as those that affect token boundaries,
	// and anything that might execute scripts.
	func cssValueFilter(args ...interface{}) string {
	s, t := stringify(args...)
	if t == contentTypeCSS {
	return s
	}
	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)

	// CSS3 error handling is specified as honoring string boundaries per
	// http://www.w3.org/TR/css3-syntax/#error-handling :
	// Malformed declarations. User agents must handle unexpected
	// tokens encountered while parsing a declaration by reading until
	// the end of the declaration, while observing the rules for
	// matching pairs of (), [], {}, "", and '', and correctly handling
	// escapes. For example, a malformed declaration may be missing a
	// property, colon (:) or value.
	// So we need to make sure that values do not have mismatched bracket
	// or quote characters to prevent the browser from restarting parsing
	// inside a string that might embed JavaScript source.
	for i, c := range b {
	switch c {
	case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
	return filterFailsafe
	case '-':
	// Disallow <!-- or -->.
	// -- should not appear in valid identifiers.
	if i != 0 && '-' == b[i-1] {
	return filterFailsafe
	}
	default:
	if c < 0x80 && isCSSNmchar(int(c)) {
	id = append(id, c)
	}
	}
	}
	id = bytes.ToLower(id)
	if bytes.Index(id, expressionBytes) != -1 \|\| bytes.Index(id, mozBindingBytes) != -1 {
	return filterFailsafe
	}
	return string(b)
	}