internal/sanitizer/sanitizer.go - pkgsite - Git at Google

 // Copyright 2023 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package sanitizer provides a simple html sanitizer to remove
 // tags and attributes that could potentially cause security issues.
 package sanitizer

 import (
 	"bytes"
 	"net/url"
 	"regexp"
 	"strings"

 	"golang.org/x/net/html"
 )

 // SanitizeBytes returns a sanitized version of the input.
 // It throws out any attributes or tags that are not explicitly
 // allowed in allowElems or allowAttributes below, including
 // any child nodes of elements that are not allowed. It returns
 // the empty string if there was an error parsing the input.
 func SanitizeBytes(b []byte) []byte {
 	// TODO(matloob): We want to sanitize a fragment that would
 	// appear in the body. Can we call ParseFragment without
 	// creating the body node here?
 	document, err := html.Parse(strings.NewReader("<html><head></head><body></body></html>"))
 	if err != nil {
 		return nil
 	}
 	body := document.FirstChild.LastChild // document.FirstChild is the <html> node

 	nodes, err := html.ParseFragment(bytes.NewReader(b), body)
 	if err != nil {
 		return []byte{}
 	}
 	keepNodes := sanitizeNodes(nodes)
 	var buf bytes.Buffer
 	for _, n := range keepNodes {
 		html.Render(&buf, n)
 	}
 	return buf.Bytes()
 }

 // sanitize sanitizes the attributes and children of n.
 // It returns false if the node should be cut out, and a list
 // of parent-less nodes the node should be replaced with.
 func sanitize(n *html.Node) ([]*html.Node, bool) {
 	switch n.Type {
 	case html.TextNode:
 		return nil, true // Assume text nodes are safe
 	case html.ElementNode:
 		if n.Namespace != "" {
 			return nil, false
 		}
 		n.Data = strings.ToLower(n.Data)
 		if !allowElemsMap[n.Data] {
 			switch n.Data {
 			case "frame", "frameset", "iframe", "noembed", "noframes", "noscript", "nostyle", "object", "script", "style", "title":
 				return nil, false
 			default:
 				return extractSanitizedChildren(n), false
 			}
 		}
 		keepAttr := []html.Attribute{}
 		for _, attr := range n.Attr {
 			if attr.Namespace != "" {
 				continue
 			}
 			if allow, ok := allowAttrMap[""][attr.Key]; ok {
 				// This is an attribute that can be present on
 				// any tag.
 				if !allow(attr.Val) {
 					continue
 				}
 			} else if allow, ok := allowAttrMap[n.Data][attr.Key]; ok {
 				if !allow(attr.Val) {
 					continue
 				}
 				if roundtripAttrs[attr.Key][n.Data] {
 					attr.Val = roundtripURL(attr.Val)
 					if attr.Val == "" {
 						continue
 					}
 				}
 			} else {
 				// There is no allow function. It's not allowed.
 				continue
 			}
 			keepAttr = append(keepAttr, attr)
 		}
 		if n.Data == "a" {
 			if len(keepAttr) == 0 {
 				return extractSanitizedChildren(n), false
 			}
 			keepAttr = addRelNoFollow(keepAttr)
 		}
 		if n.Data == "img" {
 			if len(keepAttr) == 0 {
 				return nil, false
 			}
 		}
 		n.Attr = keepAttr
 		replaceChildren := make(map[*html.Node][]*html.Node)
 		for child := n.FirstChild; child != nil; child = child.NextSibling {
 			if replace, ok := sanitize(child); !ok {
 				replaceChildren[child] = replace
 			}
 		}
 		for child, replace := range replaceChildren {
 			for _, r := range replace {
 				n.InsertBefore(r, child)
 			}
 			n.RemoveChild(child)
 		}
 		return nil, true
 	default:
 		return nil, false
 	}
 }

 func extractSanitizedChildren(node *html.Node) []*html.Node {
 	var children []*html.Node
 	for child := node.FirstChild; child != nil; child = child.NextSibling {
 		children = append(children, child)
 	}
 	for _, child := range children {
 		node.RemoveChild(child)
 	}
 	return sanitizeNodes(children)
 }

 func sanitizeNodes(nodes []*html.Node) []*html.Node {
 	var keepNodes []*html.Node
 	for _, n := range nodes {
 		if replace, ok := sanitize(n); ok {
 			keepNodes = append(keepNodes, n)
 		} else {
 			keepNodes = append(keepNodes, replace...)
 		}
 	}
 	return keepNodes
 }

 // addRelNoFollow adds a rel="nofollow" attribute to the attributes
 // if the href attribute is present. If there's already a rel
 // attribute present its value is replaced with "nofollow".
 func addRelNoFollow(attrs []html.Attribute) []html.Attribute {
 	hasHref := false
 	for _, attr := range attrs {
 		if attr.Namespace == "" && attr.Key == "href" {
 			hasHref = true
 		}
 	}
 	if !hasHref {
 		return attrs
 	}
 	hasRel := false
 	for i := range attrs {
 		if attrs[i].Namespace == "" && attrs[i].Key == "rel" {
 			hasRel = true
 			attrs[i].Val = "nofollow"
 		}
 	}
 	if !hasRel {
 		attrs = append(attrs, html.Attribute{Key: "rel", Val: "nofollow"})
 	}
 	return attrs
 }

 var allowAttrMap map[string]map[string]func(string) bool
 var allowElemsMap map[string]bool

 func init() {
 	allowAttrMap = make(map[string]map[string]func(string) bool)
 	for _, aa := range allowAttrs {
 		if allowAttrMap[aa.elem] == nil {
 			allowAttrMap[aa.elem] = make(map[string]func(string) bool)
 		}
 		allowAttrMap[aa.elem][aa.attr] = aa.allow
 	}
 	allowElemsMap = make(map[string]bool)
 	for _, ae := range allowElems {
 		allowElemsMap[ae] = true
 	}
 }

 // allowElems is the list of elements that are allowed in the
 // sanitized html.
 var allowElems = []string{
 	"a",
 	"abbr",
 	"article",
 	"aside",
 	"b",
 	"bdi",
 	"bdo",
 	"blockquote",
 	"br",
 	"caption",
 	"cite",
 	"code",
 	"col",
 	"colgroup",
 	"details",
 	"dd",
 	"del",
 	"div",
 	"dl",
 	"dt",
 	"em",
 	"figcaption",
 	"figure",
 	"h1",
 	"h2",
 	"h3",
 	"h4",
 	"h5",
 	"h6",
 	"hr",
 	"i",
 	"img",
 	"ins",
 	"li",
 	"mark",
 	"ol",
 	"p",
 	"pre",
 	"q",
 	"rp",
 	"rt",
 	"ruby",
 	"s",
 	"samp",
 	"section",
 	"small",
 	"span",
 	"strike",
 	"strong",
 	"sub",
 	"sup",
 	"summary",
 	"table",
 	"tbody",
 	"tfoot",
 	"thead",
 	"time",
 	"td",
 	"th",
 	"tr",
 	"tt",
 	"u",
 	"ul",
 	"var",
 	"wbr",
 }

 type allowAttr struct {
 	elem  string // "" to apply to all elements
 	attr  string
 	allow func(string) bool
 }

 var allowAttrs = []allowAttr{
 	// bluemonday AllowStandardAttributes
 	{"", "dir", re(`^(?i)(rtl|ltr)$`)},
 	{"", "lang", re(`^[a-zA-Z]{2,20}$`)},
 	{"", "id", re(`^[a-zA-Z0-9\:\-_\.]+$`)},
 	{"", "title", para},

 	{"details", "open", re(`(?i)^(|open)$`)},
 	{"blockquote", "cite", validURL},
 	{"a", "href", validURL},
 	{"bdi", "dir", re(`(?i)^(rtl|ltr)$`)},
 	{"bdo", "dir", re(`(?i)^(rtl|ltr)$`)},
 	{"map", "name", re(`([\p{L}\p{N}_-]+)`)},
 	{"img", "usemap", re(`(?i)^#[\p{L}\p{N}_-]+$`)},
 	{"img", "src", validURL},
 	{"img", "align", re(`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`)},
 	{"img", "alt", para},
 	{"img", "height", numOrPercent},
 	{"img", "width", re(`^[0-9]+([%]|[a-z]+)?;?/?$`)}, // a hacky regexp to allow most commonly appearing width errors through
 	{"div", "align", align},
 	{"div", "width", numOrPercent},
 	{"div", "role", re(`^[a-z]+$`)},
 	{"div", "aria-level", integer},
 	{"del", "cite", para},
 	{"del", "datetime", iso8601},
 	{"ins", "cite", para},
 	{"ins", "datetime", iso8601},
 	{"p", "align", align},        // pkgsite allows all values
 	{"p", "width", numOrPercent}, // pkgsite allows all values
 	{"q", "cite", validURL},
 	{"time", "datetime", iso8601},
 	{"ol", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)},
 	{"ul", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)},
 	{"li", "type", re(`(?i)^(circle|disc|square|a|A|i|I|1)$`)},
 	{"li", "value", integer},

 	{"table", "height", numOrPercent},
 	{"table", "width", numOrPercent},
 	{"table", "summary", para},
 	{"col", "align", align},
 	{"col", "height", numOrPercent},
 	{"col", "width", numOrPercent},
 	{"col", "span", integer},
 	{"col", "valign", valign},
 	{"colgroup", "align", align},
 	{"colgroup", "height", numOrPercent},
 	{"colgroup", "width", numOrPercent},
 	{"colgroup", "span", integer},
 	{"colgroup", "valign", valign},
 	{"thead", "align", align},
 	{"thead", "valign", valign},
 	{"tr", "align", align},
 	{"tr", "valign", valign},
 	{"td", "abbr", para},
 	{"td", "align", align},
 	{"td", "colspan", integer},
 	{"td", "rowspan", integer},
 	{"td", "headers", spaceSepTokens},
 	{"td", "height", numOrPercent},
 	{"td", "width", numOrPercent},
 	{"td", "scope", re(`(?i)(?:row|col)(?:group)?`)},
 	{"td", "valign", valign},
 	{"td", "nowrap", re(`(?i)|nowrap`)},
 	{"th", "abbr", para},
 	{"th", "align", align},
 	{"th", "colspan", integer},
 	{"th", "rowspan", integer},
 	{"th", "headers", spaceSepTokens},
 	{"th", "height", numOrPercent},
 	{"th", "width", numOrPercent},
 	{"th", "scope", re(`(?i)(?:row|col)(?:group)?`)},
 	{"th", "valign", valign},
 	{"th", "nowrap", re(`(?i)|nowrap`)},
 	{"tbody", "align", align},
 	{"tbody", "valign", valign},
 	{"tfoot", "align", align},
 	{"tfoot", "valign", valign},

 	// Needed to preserve github styles heading font-sizes
 	// Can we be stricter than spaceSepTokens?
 	{"h1", "class", spaceSepTokens},
 	{"h2", "class", spaceSepTokens},
 	{"h3", "class", spaceSepTokens},
 	{"h4", "class", spaceSepTokens},
 	{"h5", "class", spaceSepTokens},
 	{"h6", "class", spaceSepTokens},
 }

 // roundtripAttrs is a map from attribute keys which should be checked
 // against roundtripURL to maps of tags which are allowed to have them.
 var roundtripAttrs = map[string]map[string]bool{
 	"src":  {"img": true},
 	"href": {"a": true},
 	"cite": {
 		"blockquote": true,
 		"del":        true,
 		"ins":        true,
 		"q":          true,
 	},
 }

 var align = re(`(?i)^(center|justify|left|right)$`)

 var valign = re(`(?i)^(baseline|bottom|middle|top)$`)

 var para = re(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)

 var spaceSepTokens = re(`^([\s\p{L}\p{N}_-]+)$`)

 var numOrPercent = re(`^[0-9]+[%]?$`)

 var integer = re(`^[0-9]+$`)

 var iso8601 = re(`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
 	`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`)

 func re(rx string) func(string) bool {
 	return regexp.MustCompile(rx).MatchString
 }

 // validURL returns true if the URL is a valid url according to the
 // following rules: it must be url.Parsable when the spaces are trimmed,
 // it can not contain interior newlines, tabs, or spaces, and its scheme,
 // if present must be mailto, http, or https.
 func validURL(rawurl string) bool {
 	rawurl = strings.TrimSpace(rawurl)

 	if strings.ContainsAny(rawurl, " \t") {
 		return false
 	}

 	u, err := url.Parse(rawurl)
 	if err != nil {
 		return false
 	}

 	switch u.Scheme {
 	case "", "mailto", "http", "https":
 		break
 	default:
 		return false
 	}

 	return true
 }

 func roundtripURL(rawurl string) string {
 	u, err := url.Parse(strings.TrimSpace(rawurl))
 	if err != nil {
 		return ""
 	}
 	return u.String()
 }
	// Copyright 2023 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package sanitizer provides a simple html sanitizer to remove
	// tags and attributes that could potentially cause security issues.
	package sanitizer

	import (
	"bytes"
	"net/url"
	"regexp"
	"strings"

	"golang.org/x/net/html"
	)

	// SanitizeBytes returns a sanitized version of the input.
	// It throws out any attributes or tags that are not explicitly
	// allowed in allowElems or allowAttributes below, including
	// any child nodes of elements that are not allowed. It returns
	// the empty string if there was an error parsing the input.
	func SanitizeBytes(b []byte) []byte {
	// TODO(matloob): We want to sanitize a fragment that would
	// appear in the body. Can we call ParseFragment without
	// creating the body node here?
	document, err := html.Parse(strings.NewReader("<html><head></head><body></body></html>"))
	if err != nil {
	return nil
	}
	body := document.FirstChild.LastChild // document.FirstChild is the <html> node

	nodes, err := html.ParseFragment(bytes.NewReader(b), body)
	if err != nil {
	return []byte{}
	}
	keepNodes := sanitizeNodes(nodes)
	var buf bytes.Buffer
	for _, n := range keepNodes {
	html.Render(&buf, n)
	}
	return buf.Bytes()
	}

	// sanitize sanitizes the attributes and children of n.
	// It returns false if the node should be cut out, and a list
	// of parent-less nodes the node should be replaced with.
	func sanitize(n html.Node) ([]html.Node, bool) {
	switch n.Type {
	case html.TextNode:
	return nil, true // Assume text nodes are safe
	case html.ElementNode:
	if n.Namespace != "" {
	return nil, false
	}
	n.Data = strings.ToLower(n.Data)
	if !allowElemsMap[n.Data] {
	switch n.Data {
	case "frame", "frameset", "iframe", "noembed", "noframes", "noscript", "nostyle", "object", "script", "style", "title":
	return nil, false
	default:
	return extractSanitizedChildren(n), false
	}
	}
	keepAttr := []html.Attribute{}
	for _, attr := range n.Attr {
	if attr.Namespace != "" {
	continue
	}
	if allow, ok := allowAttrMap[""][attr.Key]; ok {
	// This is an attribute that can be present on
	// any tag.
	if !allow(attr.Val) {
	continue
	}
	} else if allow, ok := allowAttrMap[n.Data][attr.Key]; ok {
	if !allow(attr.Val) {
	continue
	}
	if roundtripAttrs[attr.Key][n.Data] {
	attr.Val = roundtripURL(attr.Val)
	if attr.Val == "" {
	continue
	}
	}
	} else {
	// There is no allow function. It's not allowed.
	continue
	}
	keepAttr = append(keepAttr, attr)
	}
	if n.Data == "a" {
	if len(keepAttr) == 0 {
	return extractSanitizedChildren(n), false
	}
	keepAttr = addRelNoFollow(keepAttr)
	}
	if n.Data == "img" {
	if len(keepAttr) == 0 {
	return nil, false
	}
	}
	n.Attr = keepAttr
	replaceChildren := make(map[html.Node][]html.Node)
	for child := n.FirstChild; child != nil; child = child.NextSibling {
	if replace, ok := sanitize(child); !ok {
	replaceChildren[child] = replace
	}
	}
	for child, replace := range replaceChildren {
	for _, r := range replace {
	n.InsertBefore(r, child)
	}
	n.RemoveChild(child)
	}
	return nil, true
	default:
	return nil, false
	}
	}

	func extractSanitizedChildren(node html.Node) []html.Node {
	var children []*html.Node
	for child := node.FirstChild; child != nil; child = child.NextSibling {
	children = append(children, child)
	}
	for _, child := range children {
	node.RemoveChild(child)
	}
	return sanitizeNodes(children)
	}

	func sanitizeNodes(nodes []html.Node) []html.Node {
	var keepNodes []*html.Node
	for _, n := range nodes {
	if replace, ok := sanitize(n); ok {
	keepNodes = append(keepNodes, n)
	} else {
	keepNodes = append(keepNodes, replace...)
	}
	}
	return keepNodes
	}

	// addRelNoFollow adds a rel="nofollow" attribute to the attributes
	// if the href attribute is present. If there's already a rel
	// attribute present its value is replaced with "nofollow".
	func addRelNoFollow(attrs []html.Attribute) []html.Attribute {
	hasHref := false
	for _, attr := range attrs {
	if attr.Namespace == "" && attr.Key == "href" {
	hasHref = true
	}
	}
	if !hasHref {
	return attrs
	}
	hasRel := false
	for i := range attrs {
	if attrs[i].Namespace == "" && attrs[i].Key == "rel" {
	hasRel = true
	attrs[i].Val = "nofollow"
	}
	}
	if !hasRel {
	attrs = append(attrs, html.Attribute{Key: "rel", Val: "nofollow"})
	}
	return attrs
	}

	var allowAttrMap map[string]map[string]func(string) bool
	var allowElemsMap map[string]bool

	func init() {
	allowAttrMap = make(map[string]map[string]func(string) bool)
	for _, aa := range allowAttrs {
	if allowAttrMap[aa.elem] == nil {
	allowAttrMap[aa.elem] = make(map[string]func(string) bool)
	}
	allowAttrMap[aa.elem][aa.attr] = aa.allow
	}
	allowElemsMap = make(map[string]bool)
	for _, ae := range allowElems {
	allowElemsMap[ae] = true
	}
	}

	// allowElems is the list of elements that are allowed in the
	// sanitized html.
	var allowElems = []string{
	"a",
	"abbr",
	"article",
	"aside",
	"b",
	"bdi",
	"bdo",
	"blockquote",
	"br",
	"caption",
	"cite",
	"code",
	"col",
	"colgroup",
	"details",
	"dd",
	"del",
	"div",
	"dl",
	"dt",
	"em",
	"figcaption",
	"figure",
	"h1",
	"h2",
	"h3",
	"h4",
	"h5",
	"h6",
	"hr",
	"i",
	"img",
	"ins",
	"li",
	"mark",
	"ol",
	"p",
	"pre",
	"q",
	"rp",
	"rt",
	"ruby",
	"s",
	"samp",
	"section",
	"small",
	"span",
	"strike",
	"strong",
	"sub",
	"sup",
	"summary",
	"table",
	"tbody",
	"tfoot",
	"thead",
	"time",
	"td",
	"th",
	"tr",
	"tt",
	"u",
	"ul",
	"var",
	"wbr",
	}

	type allowAttr struct {
	elem string // "" to apply to all elements
	attr string
	allow func(string) bool
	}

	var allowAttrs = []allowAttr{
	// bluemonday AllowStandardAttributes
	{"", "dir", re(`^(?i)(rtl\|ltr)$`)},
	{"", "lang", re(`^[a-zA-Z]{2,20}$`)},
	{"", "id", re(`^[a-zA-Z0-9\:\-_\.]+$`)},
	{"", "title", para},

	{"details", "open", re(`(?i)^(\|open)$`)},
	{"blockquote", "cite", validURL},
	{"a", "href", validURL},
	{"bdi", "dir", re(`(?i)^(rtl\|ltr)$`)},
	{"bdo", "dir", re(`(?i)^(rtl\|ltr)$`)},
	{"map", "name", re(`([\p{L}\p{N}_-]+)`)},
	{"img", "usemap", re(`(?i)^#[\p{L}\p{N}_-]+$`)},
	{"img", "src", validURL},
	{"img", "align", re(`(?i)^(left\|right\|top\|texttop\|middle\|absmiddle\|baseline\|bottom\|absbottom)$`)},
	{"img", "alt", para},
	{"img", "height", numOrPercent},
	{"img", "width", re(`^[0-9]+([%]\|[a-z]+)?;?/?$`)}, // a hacky regexp to allow most commonly appearing width errors through
	{"div", "align", align},
	{"div", "width", numOrPercent},
	{"div", "role", re(`^[a-z]+$`)},
	{"div", "aria-level", integer},
	{"del", "cite", para},
	{"del", "datetime", iso8601},
	{"ins", "cite", para},
	{"ins", "datetime", iso8601},
	{"p", "align", align}, // pkgsite allows all values
	{"p", "width", numOrPercent}, // pkgsite allows all values
	{"q", "cite", validURL},
	{"time", "datetime", iso8601},
	{"ol", "type", re(`(?i)^(circle\|disc\|square\|a\|A\|i\|I\|1)$`)},
	{"ul", "type", re(`(?i)^(circle\|disc\|square\|a\|A\|i\|I\|1)$`)},
	{"li", "type", re(`(?i)^(circle\|disc\|square\|a\|A\|i\|I\|1)$`)},
	{"li", "value", integer},

	{"table", "height", numOrPercent},
	{"table", "width", numOrPercent},
	{"table", "summary", para},
	{"col", "align", align},
	{"col", "height", numOrPercent},
	{"col", "width", numOrPercent},
	{"col", "span", integer},
	{"col", "valign", valign},
	{"colgroup", "align", align},
	{"colgroup", "height", numOrPercent},
	{"colgroup", "width", numOrPercent},
	{"colgroup", "span", integer},
	{"colgroup", "valign", valign},
	{"thead", "align", align},
	{"thead", "valign", valign},
	{"tr", "align", align},
	{"tr", "valign", valign},
	{"td", "abbr", para},
	{"td", "align", align},
	{"td", "colspan", integer},
	{"td", "rowspan", integer},
	{"td", "headers", spaceSepTokens},
	{"td", "height", numOrPercent},
	{"td", "width", numOrPercent},
	{"td", "scope", re(`(?i)(?:row\|col)(?:group)?`)},
	{"td", "valign", valign},
	{"td", "nowrap", re(`(?i)\|nowrap`)},
	{"th", "abbr", para},
	{"th", "align", align},
	{"th", "colspan", integer},
	{"th", "rowspan", integer},
	{"th", "headers", spaceSepTokens},
	{"th", "height", numOrPercent},
	{"th", "width", numOrPercent},
	{"th", "scope", re(`(?i)(?:row\|col)(?:group)?`)},
	{"th", "valign", valign},
	{"th", "nowrap", re(`(?i)\|nowrap`)},
	{"tbody", "align", align},
	{"tbody", "valign", valign},
	{"tfoot", "align", align},
	{"tfoot", "valign", valign},

	// Needed to preserve github styles heading font-sizes
	// Can we be stricter than spaceSepTokens?
	{"h1", "class", spaceSepTokens},
	{"h2", "class", spaceSepTokens},
	{"h3", "class", spaceSepTokens},
	{"h4", "class", spaceSepTokens},
	{"h5", "class", spaceSepTokens},
	{"h6", "class", spaceSepTokens},
	}

	// roundtripAttrs is a map from attribute keys which should be checked
	// against roundtripURL to maps of tags which are allowed to have them.
	var roundtripAttrs = map[string]map[string]bool{
	"src": {"img": true},
	"href": {"a": true},
	"cite": {
	"blockquote": true,
	"del": true,
	"ins": true,
	"q": true,
	},
	}

	var align = re(`(?i)^(center\|justify\|left\|right)$`)

	var valign = re(`(?i)^(baseline\|bottom\|middle\|top)$`)

	var para = re(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)

	var spaceSepTokens = re(`^([\s\p{L}\p{N}_-]+)$`)

	var numOrPercent = re(`^[0-9]+[%]?$`)

	var integer = re(`^[0-9]+$`)

	var iso8601 = re(`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
	`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`)

	func re(rx string) func(string) bool {
	return regexp.MustCompile(rx).MatchString
	}

	// validURL returns true if the URL is a valid url according to the
	// following rules: it must be url.Parsable when the spaces are trimmed,
	// it can not contain interior newlines, tabs, or spaces, and its scheme,
	// if present must be mailto, http, or https.
	func validURL(rawurl string) bool {
	rawurl = strings.TrimSpace(rawurl)

	if strings.ContainsAny(rawurl, " \t") {
	return false
	}

	u, err := url.Parse(rawurl)
	if err != nil {
	return false
	}

	switch u.Scheme {
	case "", "mailto", "http", "https":
	break
	default:
	return false
	}

	return true
	}

	func roundtripURL(rawurl string) string {
	u, err := url.Parse(strings.TrimSpace(rawurl))
	if err != nil {
	return ""
	}
	return u.String()
	}