src/html/template/url.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package template

 import (
 	"fmt"
 	"strings"
 )

 // urlFilter returns its input unless it contains an unsafe scheme in which
 // case it defangs the entire URL.
 //
 // Schemes that cause unintended side effects that are irreversible without user
 // interaction are considered unsafe. For example, clicking on a "javascript:"
 // link can immediately trigger JavaScript code execution.
 //
 // This filter conservatively assumes that all schemes other than the following
 // are unsafe:
 //   - http:   Navigates to a new website, and may open a new window or tab.
 //     These side effects can be reversed by navigating back to the
 //     previous website, or closing the window or tab. No irreversible
 //     changes will take place without further user interaction with
 //     the new website.
 //   - https:  Same as http.
 //   - mailto: Opens an email program and starts a new draft. This side effect
 //     is not irreversible until the user explicitly clicks send; it
 //     can be undone by closing the email program.
 //
 // To allow URLs containing other schemes to bypass this filter, developers must
 // explicitly indicate that such a URL is expected and safe by encapsulating it
 // in a template.URL value.
 func urlFilter(args ...any) string {
 	s, t := stringify(args...)
 	if t == contentTypeURL {
 		return s
 	}
 	if !isSafeURL(s) {
 		return "#" + filterFailsafe
 	}
 	return s
 }

 // isSafeURL is true if s is a relative URL or if URL has a protocol in
 // (http, https, mailto).
 func isSafeURL(s string) bool {
 	if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
 		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
 			return false
 		}
 	}
 	return true
 }

 // urlEscaper produces an output that can be embedded in a URL query.
 // The output can be embedded in an HTML attribute without further escaping.
 func urlEscaper(args ...any) string {
 	return urlProcessor(false, args...)
 }

 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
 // string or parenthesis delimited url(...).
 // The normalizer does not encode all HTML specials. Specifically, it does not
 // encode '&' so correct embedding in an HTML attribute requires escaping of
 // '&' to '&amp;'.
 func urlNormalizer(args ...any) string {
 	return urlProcessor(true, args...)
 }

 // urlProcessor normalizes (when norm is true) or escapes its input to produce
 // a valid hierarchical or opaque URL part.
 func urlProcessor(norm bool, args ...any) string {
 	s, t := stringify(args...)
 	if t == contentTypeURL {
 		norm = true
 	}
 	var b strings.Builder
 	if processURLOnto(s, norm, &b) {
 		return b.String()
 	}
 	return s
 }

 // processURLOnto appends a normalized URL corresponding to its input to b
 // and reports whether the appended content differs from s.
 func processURLOnto(s string, norm bool, b *strings.Builder) bool {
 	b.Grow(len(s) + 16)
 	written := 0
 	// The byte loop below assumes that all URLs use UTF-8 as the
 	// content-encoding. This is similar to the URI to IRI encoding scheme
 	// defined in section 3.1 of  RFC 3987, and behaves the same as the
 	// EcmaScript builtin encodeURIComponent.
 	// It should not cause any misencoding of URLs in pages with
 	// Content-type: text/html;charset=UTF-8.
 	for i, n := 0, len(s); i < n; i++ {
 		c := s[i]
 		switch c {
 		// Single quote and parens are sub-delims in RFC 3986, but we
 		// escape them so the output can be embedded in single
 		// quoted attributes and unquoted CSS url(...) constructs.
 		// Single quotes are reserved in URLs, but are only used in
 		// the obsolete "mark" rule in an appendix in RFC 3986
 		// so can be safely encoded.
 		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
 			if norm {
 				continue
 			}
 		// Unreserved according to RFC 3986 sec 2.3
 		// "For consistency, percent-encoded octets in the ranges of
 		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
 		// period (%2E), underscore (%5F), or tilde (%7E) should not be
 		// created by URI producers
 		case '-', '.', '_', '~':
 			continue
 		case '%':
 			// When normalizing do not re-encode valid escapes.
 			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
 				continue
 			}
 		default:
 			// Unreserved according to RFC 3986 sec 2.3
 			if 'a' <= c && c <= 'z' {
 				continue
 			}
 			if 'A' <= c && c <= 'Z' {
 				continue
 			}
 			if '0' <= c && c <= '9' {
 				continue
 			}
 		}
 		b.WriteString(s[written:i])
 		fmt.Fprintf(b, "%%%02x", c)
 		written = i + 1
 	}
 	b.WriteString(s[written:])
 	return written != 0
 }

 // Filters and normalizes srcset values which are comma separated
 // URLs followed by metadata.
 func srcsetFilterAndEscaper(args ...any) string {
 	s, t := stringify(args...)
 	switch t {
 	case contentTypeSrcset:
 		return s
 	case contentTypeURL:
 		// Normalizing gets rid of all HTML whitespace
 		// which separate the image URL from its metadata.
 		var b strings.Builder
 		if processURLOnto(s, true, &b) {
 			s = b.String()
 		}
 		// Additionally, commas separate one source from another.
 		return strings.ReplaceAll(s, ",", "%2c")
 	}

 	var b strings.Builder
 	written := 0
 	for i := 0; i < len(s); i++ {
 		if s[i] == ',' {
 			filterSrcsetElement(s, written, i, &b)
 			b.WriteString(",")
 			written = i + 1
 		}
 	}
 	filterSrcsetElement(s, written, len(s), &b)
 	return b.String()
 }

 // Derived from https://play.golang.org/p/Dhmj7FORT5
 const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"

 // isHTMLSpace is true iff c is a whitespace character per
 // https://infra.spec.whatwg.org/#ascii-whitespace
 func isHTMLSpace(c byte) bool {
 	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
 }

 func isHTMLSpaceOrASCIIAlnum(c byte) bool {
 	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
 }

 func filterSrcsetElement(s string, left int, right int, b *strings.Builder) {
 	start := left
 	for start < right && isHTMLSpace(s[start]) {
 		start++
 	}
 	end := right
 	for i := start; i < right; i++ {
 		if isHTMLSpace(s[i]) {
 			end = i
 			break
 		}
 	}
 	if url := s[start:end]; isSafeURL(url) {
 		// If image metadata is only spaces or alnums then
 		// we don't need to URL normalize it.
 		metadataOk := true
 		for i := end; i < right; i++ {
 			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
 				metadataOk = false
 				break
 			}
 		}
 		if metadataOk {
 			b.WriteString(s[left:start])
 			processURLOnto(url, true, b)
 			b.WriteString(s[end:right])
 			return
 		}
 	}
 	b.WriteString("#")
 	b.WriteString(filterFailsafe)
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package template

	import (
	"fmt"
	"strings"
	)

	// urlFilter returns its input unless it contains an unsafe scheme in which
	// case it defangs the entire URL.
	//
	// Schemes that cause unintended side effects that are irreversible without user
	// interaction are considered unsafe. For example, clicking on a "javascript:"
	// link can immediately trigger JavaScript code execution.
	//
	// This filter conservatively assumes that all schemes other than the following
	// are unsafe:
	// - http: Navigates to a new website, and may open a new window or tab.
	// These side effects can be reversed by navigating back to the
	// previous website, or closing the window or tab. No irreversible
	// changes will take place without further user interaction with
	// the new website.
	// - https: Same as http.
	// - mailto: Opens an email program and starts a new draft. This side effect
	// is not irreversible until the user explicitly clicks send; it
	// can be undone by closing the email program.
	//
	// To allow URLs containing other schemes to bypass this filter, developers must
	// explicitly indicate that such a URL is expected and safe by encapsulating it
	// in a template.URL value.
	func urlFilter(args ...any) string {
	s, t := stringify(args...)
	if t == contentTypeURL {
	return s
	}
	if !isSafeURL(s) {
	return "#" + filterFailsafe
	}
	return s
	}

	// isSafeURL is true if s is a relative URL or if URL has a protocol in
	// (http, https, mailto).
	func isSafeURL(s string) bool {
	if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
	if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
	return false
	}
	}
	return true
	}

	// urlEscaper produces an output that can be embedded in a URL query.
	// The output can be embedded in an HTML attribute without further escaping.
	func urlEscaper(args ...any) string {
	return urlProcessor(false, args...)
	}

	// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
	// string or parenthesis delimited url(...).
	// The normalizer does not encode all HTML specials. Specifically, it does not
	// encode '&' so correct embedding in an HTML attribute requires escaping of
	// '&' to '&'.
	func urlNormalizer(args ...any) string {
	return urlProcessor(true, args...)
	}

	// urlProcessor normalizes (when norm is true) or escapes its input to produce
	// a valid hierarchical or opaque URL part.
	func urlProcessor(norm bool, args ...any) string {
	s, t := stringify(args...)
	if t == contentTypeURL {
	norm = true
	}
	var b strings.Builder
	if processURLOnto(s, norm, &b) {
	return b.String()
	}
	return s
	}

	// processURLOnto appends a normalized URL corresponding to its input to b
	// and reports whether the appended content differs from s.
	func processURLOnto(s string, norm bool, b *strings.Builder) bool {
	b.Grow(len(s) + 16)
	written := 0
	// The byte loop below assumes that all URLs use UTF-8 as the
	// content-encoding. This is similar to the URI to IRI encoding scheme
	// defined in section 3.1 of RFC 3987, and behaves the same as the
	// EcmaScript builtin encodeURIComponent.
	// It should not cause any misencoding of URLs in pages with
	// Content-type: text/html;charset=UTF-8.
	for i, n := 0, len(s); i < n; i++ {
	c := s[i]
	switch c {
	// Single quote and parens are sub-delims in RFC 3986, but we
	// escape them so the output can be embedded in single
	// quoted attributes and unquoted CSS url(...) constructs.
	// Single quotes are reserved in URLs, but are only used in
	// the obsolete "mark" rule in an appendix in RFC 3986
	// so can be safely encoded.
	case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
	if norm {
	continue
	}
	// Unreserved according to RFC 3986 sec 2.3
	// "For consistency, percent-encoded octets in the ranges of
	// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
	// period (%2E), underscore (%5F), or tilde (%7E) should not be
	// created by URI producers
	case '-', '.', '_', '~':
	continue
	case '%':
	// When normalizing do not re-encode valid escapes.
	if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
	continue
	}
	default:
	// Unreserved according to RFC 3986 sec 2.3
	if 'a' <= c && c <= 'z' {
	continue
	}
	if 'A' <= c && c <= 'Z' {
	continue
	}
	if '0' <= c && c <= '9' {
	continue
	}
	}
	b.WriteString(s[written:i])
	fmt.Fprintf(b, "%%%02x", c)
	written = i + 1
	}
	b.WriteString(s[written:])
	return written != 0
	}

	// Filters and normalizes srcset values which are comma separated
	// URLs followed by metadata.
	func srcsetFilterAndEscaper(args ...any) string {
	s, t := stringify(args...)
	switch t {
	case contentTypeSrcset:
	return s
	case contentTypeURL:
	// Normalizing gets rid of all HTML whitespace
	// which separate the image URL from its metadata.
	var b strings.Builder
	if processURLOnto(s, true, &b) {
	s = b.String()
	}
	// Additionally, commas separate one source from another.
	return strings.ReplaceAll(s, ",", "%2c")
	}

	var b strings.Builder
	written := 0
	for i := 0; i < len(s); i++ {
	if s[i] == ',' {
	filterSrcsetElement(s, written, i, &b)
	b.WriteString(",")
	written = i + 1
	}
	}
	filterSrcsetElement(s, written, len(s), &b)
	return b.String()
	}

	// Derived from https://play.golang.org/p/Dhmj7FORT5
	const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"

	// isHTMLSpace is true iff c is a whitespace character per
	// https://infra.spec.whatwg.org/#ascii-whitespace
	func isHTMLSpace(c byte) bool {
	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
	}

	func isHTMLSpaceOrASCIIAlnum(c byte) bool {
	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
	}

	func filterSrcsetElement(s string, left int, right int, b *strings.Builder) {
	start := left
	for start < right && isHTMLSpace(s[start]) {
	start++
	}
	end := right
	for i := start; i < right; i++ {
	if isHTMLSpace(s[i]) {
	end = i
	break
	}
	}
	if url := s[start:end]; isSafeURL(url) {
	// If image metadata is only spaces or alnums then
	// we don't need to URL normalize it.
	metadataOk := true
	for i := end; i < right; i++ {
	if !isHTMLSpaceOrASCIIAlnum(s[i]) {
	metadataOk = false
	break
	}
	}
	if metadataOk {
	b.WriteString(s[left:start])
	processURLOnto(url, true, b)
	b.WriteString(s[end:right])
	return
	}
	}
	b.WriteString("#")
	b.WriteString(filterFailsafe)
	}