go.net/html: move exp/html and exp/html/atom here to the go.net
sub-repo.

It's a straight copy, except for these modifications:
* "exp/html" and "exp/html/atom" imports were renamed, and
* the "TODO... When this package moves out of exp" comment was
  deleted from atom/atom.go.

The matching change is at https://golang.org/cl/7317043

The rationale was discussed at
https://groups.google.com/d/topic/golang-nuts/Qq5hTQyPuLg/discussion

R=adg, remyoudompheng, dave
CC=golang-dev
https://golang.org/cl/7310063
diff --git a/html/atom/atom.go b/html/atom/atom.go
new file mode 100644
index 0000000..227404b
--- /dev/null
+++ b/html/atom/atom.go
@@ -0,0 +1,78 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package atom provides integer codes (also known as atoms) for a fixed set of
+// frequently occurring HTML strings: tag names and attribute keys such as "p"
+// and "id".
+//
+// Sharing an atom's name between all elements with the same tag can result in
+// fewer string allocations when tokenizing and parsing HTML. Integer
+// comparisons are also generally faster than string comparisons.
+//
+// The value of an atom's particular code is not guaranteed to stay the same
+// between versions of this package. Neither is any ordering guaranteed:
+// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
+// be dense. The only guarantees are that e.g. looking up "div" will yield
+// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
+package atom
+
+// Atom is an integer code for a string. The zero value maps to "".
+type Atom uint32
+
+// String returns the atom's name.
+func (a Atom) String() string {
+	start := uint32(a >> 8)
+	n := uint32(a & 0xff)
+	if start+n > uint32(len(atomText)) {
+		return ""
+	}
+	return atomText[start : start+n]
+}
+
+func (a Atom) string() string {
+	return atomText[a>>8 : a>>8+a&0xff]
+}
+
+// fnv computes the FNV hash with an arbitrary starting value h.
+func fnv(h uint32, s []byte) uint32 {
+	for i := range s {
+		h ^= uint32(s[i])
+		h *= 16777619
+	}
+	return h
+}
+
+func match(s string, t []byte) bool {
+	for i, c := range t {
+		if s[i] != c {
+			return false
+		}
+	}
+	return true
+}
+
+// Lookup returns the atom whose name is s. It returns zero if there is no
+// such atom. The lookup is case sensitive.
+func Lookup(s []byte) Atom {
+	if len(s) == 0 || len(s) > maxAtomLen {
+		return 0
+	}
+	h := fnv(hash0, s)
+	if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
+		return a
+	}
+	if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
+		return a
+	}
+	return 0
+}
+
+// String returns a string whose contents are equal to s. In that sense, it is
+// equivalent to string(s) but may be more efficient.
+func String(s []byte) string {
+	if a := Lookup(s); a != 0 {
+		return a.String()
+	}
+	return string(s)
+}
diff --git a/html/atom/atom_test.go b/html/atom/atom_test.go
new file mode 100644
index 0000000..6e33704
--- /dev/null
+++ b/html/atom/atom_test.go
@@ -0,0 +1,109 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+	"sort"
+	"testing"
+)
+
+func TestKnown(t *testing.T) {
+	for _, s := range testAtomList {
+		if atom := Lookup([]byte(s)); atom.String() != s {
+			t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
+		}
+	}
+}
+
+func TestHits(t *testing.T) {
+	for _, a := range table {
+		if a == 0 {
+			continue
+		}
+		got := Lookup([]byte(a.String()))
+		if got != a {
+			t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
+		}
+	}
+}
+
+func TestMisses(t *testing.T) {
+	testCases := []string{
+		"",
+		"\x00",
+		"\xff",
+		"A",
+		"DIV",
+		"Div",
+		"dIV",
+		"aa",
+		"a\x00",
+		"ab",
+		"abb",
+		"abbr0",
+		"abbr ",
+		" abbr",
+		" a",
+		"acceptcharset",
+		"acceptCharset",
+		"accept_charset",
+		"h0",
+		"h1h2",
+		"h7",
+		"onClick",
+		"λ",
+		// The following string has the same hash (0xa1d7fab7) as "onmouseover".
+		"\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
+	}
+	for _, tc := range testCases {
+		got := Lookup([]byte(tc))
+		if got != 0 {
+			t.Errorf("Lookup(%q): got %d, want 0", tc, got)
+		}
+	}
+}
+
+func TestForeignObject(t *testing.T) {
+	const (
+		afo = Foreignobject
+		afO = ForeignObject
+		sfo = "foreignobject"
+		sfO = "foreignObject"
+	)
+	if got := Lookup([]byte(sfo)); got != afo {
+		t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
+	}
+	if got := Lookup([]byte(sfO)); got != afO {
+		t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
+	}
+	if got := afo.String(); got != sfo {
+		t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
+	}
+	if got := afO.String(); got != sfO {
+		t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
+	}
+}
+
+func BenchmarkLookup(b *testing.B) {
+	sortedTable := make([]string, 0, len(table))
+	for _, a := range table {
+		if a != 0 {
+			sortedTable = append(sortedTable, a.String())
+		}
+	}
+	sort.Strings(sortedTable)
+
+	x := make([][]byte, 1000)
+	for i := range x {
+		x[i] = []byte(sortedTable[i%len(sortedTable)])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for _, s := range x {
+			Lookup(s)
+		}
+	}
+}
diff --git a/html/atom/gen.go b/html/atom/gen.go
new file mode 100644
index 0000000..9958a71
--- /dev/null
+++ b/html/atom/gen.go
@@ -0,0 +1,636 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates table.go and table_test.go.
+// Invoke as
+//
+//	go run gen.go |gofmt >table.go
+//	go run gen.go -test |gofmt >table_test.go
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"os"
+	"sort"
+	"strings"
+)
+
+// identifier converts s to a Go exported identifier.
+// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
+func identifier(s string) string {
+	b := make([]byte, 0, len(s))
+	cap := true
+	for _, c := range s {
+		if c == '-' {
+			cap = true
+			continue
+		}
+		if cap && 'a' <= c && c <= 'z' {
+			c -= 'a' - 'A'
+		}
+		cap = false
+		b = append(b, byte(c))
+	}
+	return string(b)
+}
+
+var test = flag.Bool("test", false, "generate table_test.go")
+
+func main() {
+	flag.Parse()
+
+	var all []string
+	all = append(all, elements...)
+	all = append(all, attributes...)
+	all = append(all, eventHandlers...)
+	all = append(all, extra...)
+	sort.Strings(all)
+
+	if *test {
+		fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
+		fmt.Printf("package atom\n\n")
+		fmt.Printf("var testAtomList = []string{\n")
+		for _, s := range all {
+			fmt.Printf("\t%q,\n", s)
+		}
+		fmt.Printf("}\n")
+		return
+	}
+
+	// uniq - lists have dups
+	// compute max len too
+	maxLen := 0
+	w := 0
+	for _, s := range all {
+		if w == 0 || all[w-1] != s {
+			if maxLen < len(s) {
+				maxLen = len(s)
+			}
+			all[w] = s
+			w++
+		}
+	}
+	all = all[:w]
+
+	// Find hash that minimizes table size.
+	var best *table
+	for i := 0; i < 1000000; i++ {
+		if best != nil && 1<<(best.k-1) < len(all) {
+			break
+		}
+		h := rand.Uint32()
+		for k := uint(0); k <= 16; k++ {
+			if best != nil && k >= best.k {
+				break
+			}
+			var t table
+			if t.init(h, k, all) {
+				best = &t
+				break
+			}
+		}
+	}
+	if best == nil {
+		fmt.Fprintf(os.Stderr, "failed to construct string table\n")
+		os.Exit(1)
+	}
+
+	// Lay out strings, using overlaps when possible.
+	layout := append([]string{}, all...)
+
+	// Remove strings that are substrings of other strings
+	for changed := true; changed; {
+		changed = false
+		for i, s := range layout {
+			if s == "" {
+				continue
+			}
+			for j, t := range layout {
+				if i != j && t != "" && strings.Contains(s, t) {
+					changed = true
+					layout[j] = ""
+				}
+			}
+		}
+	}
+
+	// Join strings where one suffix matches another prefix.
+	for {
+		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
+		// maximizing overlap length k.
+		besti := -1
+		bestj := -1
+		bestk := 0
+		for i, s := range layout {
+			if s == "" {
+				continue
+			}
+			for j, t := range layout {
+				if i == j {
+					continue
+				}
+				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
+					if s[len(s)-k:] == t[:k] {
+						besti = i
+						bestj = j
+						bestk = k
+					}
+				}
+			}
+		}
+		if bestk > 0 {
+			layout[besti] += layout[bestj][bestk:]
+			layout[bestj] = ""
+			continue
+		}
+		break
+	}
+
+	text := strings.Join(layout, "")
+
+	atom := map[string]uint32{}
+	for _, s := range all {
+		off := strings.Index(text, s)
+		if off < 0 {
+			panic("lost string " + s)
+		}
+		atom[s] = uint32(off<<8 | len(s))
+	}
+
+	// Generate the Go code.
+	fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
+	fmt.Printf("package atom\n\nconst (\n")
+	for _, s := range all {
+		fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
+	}
+	fmt.Printf(")\n\n")
+
+	fmt.Printf("const hash0 = %#x\n\n", best.h0)
+	fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
+
+	fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
+	for i, s := range best.tab {
+		if s == "" {
+			continue
+		}
+		fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
+	}
+	fmt.Printf("}\n")
+	datasize := (1 << best.k) * 4
+
+	fmt.Printf("const atomText =\n")
+	textsize := len(text)
+	for len(text) > 60 {
+		fmt.Printf("\t%q +\n", text[:60])
+		text = text[60:]
+	}
+	fmt.Printf("\t%q\n\n", text)
+
+	fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
+}
+
+type byLen []string
+
+func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
+func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byLen) Len() int           { return len(x) }
+
+// fnv computes the FNV hash with an arbitrary starting value h.
+func fnv(h uint32, s string) uint32 {
+	for i := 0; i < len(s); i++ {
+		h ^= uint32(s[i])
+		h *= 16777619
+	}
+	return h
+}
+
+// A table represents an attempt at constructing the lookup table.
+// The lookup table uses cuckoo hashing, meaning that each string
+// can be found in one of two positions.
+type table struct {
+	h0   uint32
+	k    uint
+	mask uint32
+	tab  []string
+}
+
+// hash returns the two hashes for s.
+func (t *table) hash(s string) (h1, h2 uint32) {
+	h := fnv(t.h0, s)
+	h1 = h & t.mask
+	h2 = (h >> 16) & t.mask
+	return
+}
+
+// init initializes the table with the given parameters.
+// h0 is the initial hash value,
+// k is the number of bits of hash value to use, and
+// x is the list of strings to store in the table.
+// init returns false if the table cannot be constructed.
+func (t *table) init(h0 uint32, k uint, x []string) bool {
+	t.h0 = h0
+	t.k = k
+	t.tab = make([]string, 1<<k)
+	t.mask = 1<<k - 1
+	for _, s := range x {
+		if !t.insert(s) {
+			return false
+		}
+	}
+	return true
+}
+
+// insert inserts s in the table.
+func (t *table) insert(s string) bool {
+	h1, h2 := t.hash(s)
+	if t.tab[h1] == "" {
+		t.tab[h1] = s
+		return true
+	}
+	if t.tab[h2] == "" {
+		t.tab[h2] = s
+		return true
+	}
+	if t.push(h1, 0) {
+		t.tab[h1] = s
+		return true
+	}
+	if t.push(h2, 0) {
+		t.tab[h2] = s
+		return true
+	}
+	return false
+}
+
+// push attempts to push aside the entry in slot i.
+func (t *table) push(i uint32, depth int) bool {
+	if depth > len(t.tab) {
+		return false
+	}
+	s := t.tab[i]
+	h1, h2 := t.hash(s)
+	j := h1 + h2 - i
+	if t.tab[j] != "" && !t.push(j, depth+1) {
+		return false
+	}
+	t.tab[j] = s
+	return true
+}
+
+// The lists of element names and attribute keys were taken from
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/section-index.html
+// as of the "HTML Living Standard - Last Updated 30 May 2012" version.
+
+var elements = []string{
+	"a",
+	"abbr",
+	"address",
+	"area",
+	"article",
+	"aside",
+	"audio",
+	"b",
+	"base",
+	"bdi",
+	"bdo",
+	"blockquote",
+	"body",
+	"br",
+	"button",
+	"canvas",
+	"caption",
+	"cite",
+	"code",
+	"col",
+	"colgroup",
+	"command",
+	"data",
+	"datalist",
+	"dd",
+	"del",
+	"details",
+	"dfn",
+	"dialog",
+	"div",
+	"dl",
+	"dt",
+	"em",
+	"embed",
+	"fieldset",
+	"figcaption",
+	"figure",
+	"footer",
+	"form",
+	"h1",
+	"h2",
+	"h3",
+	"h4",
+	"h5",
+	"h6",
+	"head",
+	"header",
+	"hgroup",
+	"hr",
+	"html",
+	"i",
+	"iframe",
+	"img",
+	"input",
+	"ins",
+	"kbd",
+	"keygen",
+	"label",
+	"legend",
+	"li",
+	"link",
+	"map",
+	"mark",
+	"menu",
+	"meta",
+	"meter",
+	"nav",
+	"noscript",
+	"object",
+	"ol",
+	"optgroup",
+	"option",
+	"output",
+	"p",
+	"param",
+	"pre",
+	"progress",
+	"q",
+	"rp",
+	"rt",
+	"ruby",
+	"s",
+	"samp",
+	"script",
+	"section",
+	"select",
+	"small",
+	"source",
+	"span",
+	"strong",
+	"style",
+	"sub",
+	"summary",
+	"sup",
+	"table",
+	"tbody",
+	"td",
+	"textarea",
+	"tfoot",
+	"th",
+	"thead",
+	"time",
+	"title",
+	"tr",
+	"track",
+	"u",
+	"ul",
+	"var",
+	"video",
+	"wbr",
+}
+
+var attributes = []string{
+	"accept",
+	"accept-charset",
+	"accesskey",
+	"action",
+	"alt",
+	"async",
+	"autocomplete",
+	"autofocus",
+	"autoplay",
+	"border",
+	"challenge",
+	"charset",
+	"checked",
+	"cite",
+	"class",
+	"cols",
+	"colspan",
+	"command",
+	"content",
+	"contenteditable",
+	"contextmenu",
+	"controls",
+	"coords",
+	"crossorigin",
+	"data",
+	"datetime",
+	"default",
+	"defer",
+	"dir",
+	"dirname",
+	"disabled",
+	"download",
+	"draggable",
+	"dropzone",
+	"enctype",
+	"for",
+	"form",
+	"formaction",
+	"formenctype",
+	"formmethod",
+	"formnovalidate",
+	"formtarget",
+	"headers",
+	"height",
+	"hidden",
+	"high",
+	"href",
+	"hreflang",
+	"http-equiv",
+	"icon",
+	"id",
+	"inert",
+	"ismap",
+	"itemid",
+	"itemprop",
+	"itemref",
+	"itemscope",
+	"itemtype",
+	"keytype",
+	"kind",
+	"label",
+	"lang",
+	"list",
+	"loop",
+	"low",
+	"manifest",
+	"max",
+	"maxlength",
+	"media",
+	"mediagroup",
+	"method",
+	"min",
+	"multiple",
+	"muted",
+	"name",
+	"novalidate",
+	"open",
+	"optimum",
+	"pattern",
+	"ping",
+	"placeholder",
+	"poster",
+	"preload",
+	"radiogroup",
+	"readonly",
+	"rel",
+	"required",
+	"reversed",
+	"rows",
+	"rowspan",
+	"sandbox",
+	"spellcheck",
+	"scope",
+	"scoped",
+	"seamless",
+	"selected",
+	"shape",
+	"size",
+	"sizes",
+	"span",
+	"src",
+	"srcdoc",
+	"srclang",
+	"start",
+	"step",
+	"style",
+	"tabindex",
+	"target",
+	"title",
+	"translate",
+	"type",
+	"typemustmatch",
+	"usemap",
+	"value",
+	"width",
+	"wrap",
+}
+
+var eventHandlers = []string{
+	"onabort",
+	"onafterprint",
+	"onbeforeprint",
+	"onbeforeunload",
+	"onblur",
+	"oncancel",
+	"oncanplay",
+	"oncanplaythrough",
+	"onchange",
+	"onclick",
+	"onclose",
+	"oncontextmenu",
+	"oncuechange",
+	"ondblclick",
+	"ondrag",
+	"ondragend",
+	"ondragenter",
+	"ondragleave",
+	"ondragover",
+	"ondragstart",
+	"ondrop",
+	"ondurationchange",
+	"onemptied",
+	"onended",
+	"onerror",
+	"onfocus",
+	"onhashchange",
+	"oninput",
+	"oninvalid",
+	"onkeydown",
+	"onkeypress",
+	"onkeyup",
+	"onload",
+	"onloadeddata",
+	"onloadedmetadata",
+	"onloadstart",
+	"onmessage",
+	"onmousedown",
+	"onmousemove",
+	"onmouseout",
+	"onmouseover",
+	"onmouseup",
+	"onmousewheel",
+	"onoffline",
+	"ononline",
+	"onpagehide",
+	"onpageshow",
+	"onpause",
+	"onplay",
+	"onplaying",
+	"onpopstate",
+	"onprogress",
+	"onratechange",
+	"onreset",
+	"onresize",
+	"onscroll",
+	"onseeked",
+	"onseeking",
+	"onselect",
+	"onshow",
+	"onstalled",
+	"onstorage",
+	"onsubmit",
+	"onsuspend",
+	"ontimeupdate",
+	"onunload",
+	"onvolumechange",
+	"onwaiting",
+}
+
+// extra are ad-hoc values not covered by any of the lists above.
+var extra = []string{
+	"align",
+	"annotation",
+	"annotation-xml",
+	"applet",
+	"basefont",
+	"bgsound",
+	"big",
+	"blink",
+	"center",
+	"color",
+	"desc",
+	"face",
+	"font",
+	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
+	"foreignobject",
+	"frame",
+	"frameset",
+	"image",
+	"isindex",
+	"listing",
+	"malignmark",
+	"marquee",
+	"math",
+	"mglyph",
+	"mi",
+	"mn",
+	"mo",
+	"ms",
+	"mtext",
+	"nobr",
+	"noembed",
+	"noframes",
+	"plaintext",
+	"prompt",
+	"public",
+	"spacer",
+	"strike",
+	"svg",
+	"system",
+	"tt",
+	"xmp",
+}
diff --git a/html/atom/table.go b/html/atom/table.go
new file mode 100644
index 0000000..20b8b8a
--- /dev/null
+++ b/html/atom/table.go
@@ -0,0 +1,694 @@
+// generated by go run gen.go; DO NOT EDIT
+
+package atom
+
+const (
+	A                Atom = 0x1
+	Abbr             Atom = 0x4
+	Accept           Atom = 0x2106
+	AcceptCharset    Atom = 0x210e
+	Accesskey        Atom = 0x3309
+	Action           Atom = 0x21b06
+	Address          Atom = 0x5d507
+	Align            Atom = 0x1105
+	Alt              Atom = 0x4503
+	Annotation       Atom = 0x18d0a
+	AnnotationXml    Atom = 0x18d0e
+	Applet           Atom = 0x2d106
+	Area             Atom = 0x31804
+	Article          Atom = 0x39907
+	Aside            Atom = 0x4f05
+	Async            Atom = 0x9305
+	Audio            Atom = 0xaf05
+	Autocomplete     Atom = 0xd50c
+	Autofocus        Atom = 0xe109
+	Autoplay         Atom = 0x10c08
+	B                Atom = 0x101
+	Base             Atom = 0x11404
+	Basefont         Atom = 0x11408
+	Bdi              Atom = 0x1a03
+	Bdo              Atom = 0x12503
+	Bgsound          Atom = 0x13807
+	Big              Atom = 0x14403
+	Blink            Atom = 0x14705
+	Blockquote       Atom = 0x14c0a
+	Body             Atom = 0x2f04
+	Border           Atom = 0x15606
+	Br               Atom = 0x202
+	Button           Atom = 0x15c06
+	Canvas           Atom = 0x4b06
+	Caption          Atom = 0x1e007
+	Center           Atom = 0x2df06
+	Challenge        Atom = 0x23e09
+	Charset          Atom = 0x2807
+	Checked          Atom = 0x33f07
+	Cite             Atom = 0x9704
+	Class            Atom = 0x3d905
+	Code             Atom = 0x16f04
+	Col              Atom = 0x17603
+	Colgroup         Atom = 0x17608
+	Color            Atom = 0x18305
+	Cols             Atom = 0x18804
+	Colspan          Atom = 0x18807
+	Command          Atom = 0x19b07
+	Content          Atom = 0x42c07
+	Contenteditable  Atom = 0x42c0f
+	Contextmenu      Atom = 0x3480b
+	Controls         Atom = 0x1ae08
+	Coords           Atom = 0x1ba06
+	Crossorigin      Atom = 0x1c40b
+	Data             Atom = 0x44304
+	Datalist         Atom = 0x44308
+	Datetime         Atom = 0x25b08
+	Dd               Atom = 0x28802
+	Default          Atom = 0x5207
+	Defer            Atom = 0x17105
+	Del              Atom = 0x4d603
+	Desc             Atom = 0x4804
+	Details          Atom = 0x6507
+	Dfn              Atom = 0x8303
+	Dialog           Atom = 0x1b06
+	Dir              Atom = 0x9d03
+	Dirname          Atom = 0x9d07
+	Disabled         Atom = 0x10008
+	Div              Atom = 0x10703
+	Dl               Atom = 0x13e02
+	Download         Atom = 0x40908
+	Draggable        Atom = 0x1a109
+	Dropzone         Atom = 0x3a208
+	Dt               Atom = 0x4e402
+	Em               Atom = 0x7f02
+	Embed            Atom = 0x7f05
+	Enctype          Atom = 0x23007
+	Face             Atom = 0x2dd04
+	Fieldset         Atom = 0x1d508
+	Figcaption       Atom = 0x1dd0a
+	Figure           Atom = 0x1f106
+	Font             Atom = 0x11804
+	Footer           Atom = 0x5906
+	For              Atom = 0x1fd03
+	ForeignObject    Atom = 0x1fd0d
+	Foreignobject    Atom = 0x20a0d
+	Form             Atom = 0x21704
+	Formaction       Atom = 0x2170a
+	Formenctype      Atom = 0x22c0b
+	Formmethod       Atom = 0x2470a
+	Formnovalidate   Atom = 0x2510e
+	Formtarget       Atom = 0x2660a
+	Frame            Atom = 0x8705
+	Frameset         Atom = 0x8708
+	H1               Atom = 0x13602
+	H2               Atom = 0x29602
+	H3               Atom = 0x2c502
+	H4               Atom = 0x30e02
+	H5               Atom = 0x4e602
+	H6               Atom = 0x27002
+	Head             Atom = 0x2fa04
+	Header           Atom = 0x2fa06
+	Headers          Atom = 0x2fa07
+	Height           Atom = 0x27206
+	Hgroup           Atom = 0x27a06
+	Hidden           Atom = 0x28606
+	High             Atom = 0x29304
+	Hr               Atom = 0x13102
+	Href             Atom = 0x29804
+	Hreflang         Atom = 0x29808
+	Html             Atom = 0x27604
+	HttpEquiv        Atom = 0x2a00a
+	I                Atom = 0x601
+	Icon             Atom = 0x42b04
+	Id               Atom = 0x5102
+	Iframe           Atom = 0x2b406
+	Image            Atom = 0x2ba05
+	Img              Atom = 0x2bf03
+	Inert            Atom = 0x4c105
+	Input            Atom = 0x3f605
+	Ins              Atom = 0x1cd03
+	Isindex          Atom = 0x2c707
+	Ismap            Atom = 0x2ce05
+	Itemid           Atom = 0x9806
+	Itemprop         Atom = 0x57e08
+	Itemref          Atom = 0x2d707
+	Itemscope        Atom = 0x2e509
+	Itemtype         Atom = 0x2ef08
+	Kbd              Atom = 0x1903
+	Keygen           Atom = 0x3906
+	Keytype          Atom = 0x51207
+	Kind             Atom = 0xfd04
+	Label            Atom = 0xba05
+	Lang             Atom = 0x29c04
+	Legend           Atom = 0x1a806
+	Li               Atom = 0x1202
+	Link             Atom = 0x14804
+	List             Atom = 0x44704
+	Listing          Atom = 0x44707
+	Loop             Atom = 0xbe04
+	Low              Atom = 0x13f03
+	Malignmark       Atom = 0x100a
+	Manifest         Atom = 0x5b608
+	Map              Atom = 0x2d003
+	Mark             Atom = 0x1604
+	Marquee          Atom = 0x5f207
+	Math             Atom = 0x2f704
+	Max              Atom = 0x30603
+	Maxlength        Atom = 0x30609
+	Media            Atom = 0xa205
+	Mediagroup       Atom = 0xa20a
+	Menu             Atom = 0x34f04
+	Meta             Atom = 0x45604
+	Meter            Atom = 0x26105
+	Method           Atom = 0x24b06
+	Mglyph           Atom = 0x2c006
+	Mi               Atom = 0x9b02
+	Min              Atom = 0x31003
+	Mn               Atom = 0x25402
+	Mo               Atom = 0x47a02
+	Ms               Atom = 0x2e802
+	Mtext            Atom = 0x31305
+	Multiple         Atom = 0x32108
+	Muted            Atom = 0x32905
+	Name             Atom = 0xa004
+	Nav              Atom = 0x3e03
+	Nobr             Atom = 0x7404
+	Noembed          Atom = 0x7d07
+	Noframes         Atom = 0x8508
+	Noscript         Atom = 0x28b08
+	Novalidate       Atom = 0x2550a
+	Object           Atom = 0x21106
+	Ol               Atom = 0xcd02
+	Onabort          Atom = 0x16007
+	Onafterprint     Atom = 0x1e50c
+	Onbeforeprint    Atom = 0x21f0d
+	Onbeforeunload   Atom = 0x5c90e
+	Onblur           Atom = 0x3e206
+	Oncancel         Atom = 0xb308
+	Oncanplay        Atom = 0x12709
+	Oncanplaythrough Atom = 0x12710
+	Onchange         Atom = 0x3b808
+	Onclick          Atom = 0x2ad07
+	Onclose          Atom = 0x32e07
+	Oncontextmenu    Atom = 0x3460d
+	Oncuechange      Atom = 0x3530b
+	Ondblclick       Atom = 0x35e0a
+	Ondrag           Atom = 0x36806
+	Ondragend        Atom = 0x36809
+	Ondragenter      Atom = 0x3710b
+	Ondragleave      Atom = 0x37c0b
+	Ondragover       Atom = 0x3870a
+	Ondragstart      Atom = 0x3910b
+	Ondrop           Atom = 0x3a006
+	Ondurationchange Atom = 0x3b010
+	Onemptied        Atom = 0x3a709
+	Onended          Atom = 0x3c007
+	Onerror          Atom = 0x3c707
+	Onfocus          Atom = 0x3ce07
+	Onhashchange     Atom = 0x3e80c
+	Oninput          Atom = 0x3f407
+	Oninvalid        Atom = 0x3fb09
+	Onkeydown        Atom = 0x40409
+	Onkeypress       Atom = 0x4110a
+	Onkeyup          Atom = 0x42107
+	Onload           Atom = 0x43b06
+	Onloadeddata     Atom = 0x43b0c
+	Onloadedmetadata Atom = 0x44e10
+	Onloadstart      Atom = 0x4640b
+	Onmessage        Atom = 0x46f09
+	Onmousedown      Atom = 0x4780b
+	Onmousemove      Atom = 0x4830b
+	Onmouseout       Atom = 0x48e0a
+	Onmouseover      Atom = 0x49b0b
+	Onmouseup        Atom = 0x4a609
+	Onmousewheel     Atom = 0x4af0c
+	Onoffline        Atom = 0x4bb09
+	Ononline         Atom = 0x4c608
+	Onpagehide       Atom = 0x4ce0a
+	Onpageshow       Atom = 0x4d90a
+	Onpause          Atom = 0x4e807
+	Onplay           Atom = 0x4f206
+	Onplaying        Atom = 0x4f209
+	Onpopstate       Atom = 0x4fb0a
+	Onprogress       Atom = 0x5050a
+	Onratechange     Atom = 0x5190c
+	Onreset          Atom = 0x52507
+	Onresize         Atom = 0x52c08
+	Onscroll         Atom = 0x53a08
+	Onseeked         Atom = 0x54208
+	Onseeking        Atom = 0x54a09
+	Onselect         Atom = 0x55308
+	Onshow           Atom = 0x55d06
+	Onstalled        Atom = 0x56609
+	Onstorage        Atom = 0x56f09
+	Onsubmit         Atom = 0x57808
+	Onsuspend        Atom = 0x58809
+	Ontimeupdate     Atom = 0x1190c
+	Onunload         Atom = 0x59108
+	Onvolumechange   Atom = 0x5990e
+	Onwaiting        Atom = 0x5a709
+	Open             Atom = 0x58404
+	Optgroup         Atom = 0xc008
+	Optimum          Atom = 0x5b007
+	Option           Atom = 0x5c506
+	Output           Atom = 0x49506
+	P                Atom = 0xc01
+	Param            Atom = 0xc05
+	Pattern          Atom = 0x6e07
+	Ping             Atom = 0xab04
+	Placeholder      Atom = 0xc70b
+	Plaintext        Atom = 0xf109
+	Poster           Atom = 0x17d06
+	Pre              Atom = 0x27f03
+	Preload          Atom = 0x27f07
+	Progress         Atom = 0x50708
+	Prompt           Atom = 0x5bf06
+	Public           Atom = 0x42706
+	Q                Atom = 0x15101
+	Radiogroup       Atom = 0x30a
+	Readonly         Atom = 0x31908
+	Rel              Atom = 0x28003
+	Required         Atom = 0x1f508
+	Reversed         Atom = 0x5e08
+	Rows             Atom = 0x7704
+	Rowspan          Atom = 0x7707
+	Rp               Atom = 0x1eb02
+	Rt               Atom = 0x16502
+	Ruby             Atom = 0xd104
+	S                Atom = 0x2c01
+	Samp             Atom = 0x6b04
+	Sandbox          Atom = 0xe907
+	Scope            Atom = 0x2e905
+	Scoped           Atom = 0x2e906
+	Script           Atom = 0x28d06
+	Seamless         Atom = 0x33308
+	Section          Atom = 0x3dd07
+	Select           Atom = 0x55506
+	Selected         Atom = 0x55508
+	Shape            Atom = 0x1b505
+	Size             Atom = 0x53004
+	Sizes            Atom = 0x53005
+	Small            Atom = 0x1bf05
+	Source           Atom = 0x1cf06
+	Spacer           Atom = 0x30006
+	Span             Atom = 0x7a04
+	Spellcheck       Atom = 0x33a0a
+	Src              Atom = 0x3d403
+	Srcdoc           Atom = 0x3d406
+	Srclang          Atom = 0x41a07
+	Start            Atom = 0x39705
+	Step             Atom = 0x5bc04
+	Strike           Atom = 0x50e06
+	Strong           Atom = 0x53406
+	Style            Atom = 0x5db05
+	Sub              Atom = 0x57a03
+	Summary          Atom = 0x5e007
+	Sup              Atom = 0x5e703
+	Svg              Atom = 0x5ea03
+	System           Atom = 0x5ed06
+	Tabindex         Atom = 0x45c08
+	Table            Atom = 0x43605
+	Target           Atom = 0x26a06
+	Tbody            Atom = 0x2e05
+	Td               Atom = 0x4702
+	Textarea         Atom = 0x31408
+	Tfoot            Atom = 0x5805
+	Th               Atom = 0x13002
+	Thead            Atom = 0x2f905
+	Time             Atom = 0x11b04
+	Title            Atom = 0x8e05
+	Tr               Atom = 0xf902
+	Track            Atom = 0xf905
+	Translate        Atom = 0x16609
+	Tt               Atom = 0x7002
+	Type             Atom = 0x23304
+	Typemustmatch    Atom = 0x2330d
+	U                Atom = 0xb01
+	Ul               Atom = 0x5602
+	Usemap           Atom = 0x4ec06
+	Value            Atom = 0x4005
+	Var              Atom = 0x10903
+	Video            Atom = 0x2a905
+	Wbr              Atom = 0x14103
+	Width            Atom = 0x4e205
+	Wrap             Atom = 0x56204
+	Xmp              Atom = 0xef03
+)
+
+const hash0 = 0xc17da63e
+
+const maxAtomLen = 16
+
+var table = [1 << 9]Atom{
+	0x1:   0x4830b, // onmousemove
+	0x2:   0x5a709, // onwaiting
+	0x4:   0x5bf06, // prompt
+	0x7:   0x5b007, // optimum
+	0x8:   0x1604,  // mark
+	0xa:   0x2d707, // itemref
+	0xb:   0x4d90a, // onpageshow
+	0xc:   0x55506, // select
+	0xd:   0x1a109, // draggable
+	0xe:   0x3e03,  // nav
+	0xf:   0x19b07, // command
+	0x11:  0xb01,   // u
+	0x14:  0x2fa07, // headers
+	0x15:  0x44308, // datalist
+	0x17:  0x6b04,  // samp
+	0x1a:  0x40409, // onkeydown
+	0x1b:  0x53a08, // onscroll
+	0x1c:  0x17603, // col
+	0x20:  0x57e08, // itemprop
+	0x21:  0x2a00a, // http-equiv
+	0x22:  0x5e703, // sup
+	0x24:  0x1f508, // required
+	0x2b:  0x27f07, // preload
+	0x2c:  0x21f0d, // onbeforeprint
+	0x2d:  0x3710b, // ondragenter
+	0x2e:  0x4e402, // dt
+	0x2f:  0x57808, // onsubmit
+	0x30:  0x13102, // hr
+	0x31:  0x3460d, // oncontextmenu
+	0x33:  0x2ba05, // image
+	0x34:  0x4e807, // onpause
+	0x35:  0x27a06, // hgroup
+	0x36:  0xab04,  // ping
+	0x37:  0x55308, // onselect
+	0x3a:  0x10703, // div
+	0x40:  0x9b02,  // mi
+	0x41:  0x33308, // seamless
+	0x42:  0x2807,  // charset
+	0x43:  0x5102,  // id
+	0x44:  0x4fb0a, // onpopstate
+	0x45:  0x4d603, // del
+	0x46:  0x5f207, // marquee
+	0x47:  0x3309,  // accesskey
+	0x49:  0x5906,  // footer
+	0x4a:  0x2d106, // applet
+	0x4b:  0x2ce05, // ismap
+	0x51:  0x34f04, // menu
+	0x52:  0x2f04,  // body
+	0x55:  0x8708,  // frameset
+	0x56:  0x52507, // onreset
+	0x57:  0x14705, // blink
+	0x58:  0x8e05,  // title
+	0x59:  0x39907, // article
+	0x5b:  0x13002, // th
+	0x5d:  0x15101, // q
+	0x5e:  0x58404, // open
+	0x5f:  0x31804, // area
+	0x61:  0x43b06, // onload
+	0x62:  0x3f605, // input
+	0x63:  0x11404, // base
+	0x64:  0x18807, // colspan
+	0x65:  0x51207, // keytype
+	0x66:  0x13e02, // dl
+	0x68:  0x1d508, // fieldset
+	0x6a:  0x31003, // min
+	0x6b:  0x10903, // var
+	0x6f:  0x2fa06, // header
+	0x70:  0x16502, // rt
+	0x71:  0x17608, // colgroup
+	0x72:  0x25402, // mn
+	0x74:  0x16007, // onabort
+	0x75:  0x3906,  // keygen
+	0x76:  0x4bb09, // onoffline
+	0x77:  0x23e09, // challenge
+	0x78:  0x2d003, // map
+	0x7a:  0x30e02, // h4
+	0x7b:  0x3c707, // onerror
+	0x7c:  0x30609, // maxlength
+	0x7d:  0x31305, // mtext
+	0x7e:  0x5805,  // tfoot
+	0x7f:  0x11804, // font
+	0x80:  0x100a,  // malignmark
+	0x81:  0x45604, // meta
+	0x82:  0x9305,  // async
+	0x83:  0x2c502, // h3
+	0x84:  0x28802, // dd
+	0x85:  0x29804, // href
+	0x86:  0xa20a,  // mediagroup
+	0x87:  0x1ba06, // coords
+	0x88:  0x41a07, // srclang
+	0x89:  0x35e0a, // ondblclick
+	0x8a:  0x4005,  // value
+	0x8c:  0xb308,  // oncancel
+	0x8e:  0x33a0a, // spellcheck
+	0x8f:  0x8705,  // frame
+	0x91:  0x14403, // big
+	0x94:  0x21b06, // action
+	0x95:  0x9d03,  // dir
+	0x97:  0x31908, // readonly
+	0x99:  0x43605, // table
+	0x9a:  0x5e007, // summary
+	0x9b:  0x14103, // wbr
+	0x9c:  0x30a,   // radiogroup
+	0x9d:  0xa004,  // name
+	0x9f:  0x5ed06, // system
+	0xa1:  0x18305, // color
+	0xa2:  0x4b06,  // canvas
+	0xa3:  0x27604, // html
+	0xa5:  0x54a09, // onseeking
+	0xac:  0x1b505, // shape
+	0xad:  0x28003, // rel
+	0xae:  0x12710, // oncanplaythrough
+	0xaf:  0x3870a, // ondragover
+	0xb1:  0x1fd0d, // foreignObject
+	0xb3:  0x7704,  // rows
+	0xb6:  0x44707, // listing
+	0xb7:  0x49506, // output
+	0xb9:  0x3480b, // contextmenu
+	0xbb:  0x13f03, // low
+	0xbc:  0x1eb02, // rp
+	0xbd:  0x58809, // onsuspend
+	0xbe:  0x15c06, // button
+	0xbf:  0x4804,  // desc
+	0xc1:  0x3dd07, // section
+	0xc2:  0x5050a, // onprogress
+	0xc3:  0x56f09, // onstorage
+	0xc4:  0x2f704, // math
+	0xc5:  0x4f206, // onplay
+	0xc7:  0x5602,  // ul
+	0xc8:  0x6e07,  // pattern
+	0xc9:  0x4af0c, // onmousewheel
+	0xca:  0x36809, // ondragend
+	0xcb:  0xd104,  // ruby
+	0xcc:  0xc01,   // p
+	0xcd:  0x32e07, // onclose
+	0xce:  0x26105, // meter
+	0xcf:  0x13807, // bgsound
+	0xd2:  0x27206, // height
+	0xd4:  0x101,   // b
+	0xd5:  0x2ef08, // itemtype
+	0xd8:  0x1e007, // caption
+	0xd9:  0x10008, // disabled
+	0xdc:  0x5ea03, // svg
+	0xdd:  0x1bf05, // small
+	0xde:  0x44304, // data
+	0xe0:  0x4c608, // ononline
+	0xe1:  0x2c006, // mglyph
+	0xe3:  0x7f05,  // embed
+	0xe4:  0xf902,  // tr
+	0xe5:  0x4640b, // onloadstart
+	0xe7:  0x3b010, // ondurationchange
+	0xed:  0x12503, // bdo
+	0xee:  0x4702,  // td
+	0xef:  0x4f05,  // aside
+	0xf0:  0x29602, // h2
+	0xf1:  0x50708, // progress
+	0xf2:  0x14c0a, // blockquote
+	0xf4:  0xba05,  // label
+	0xf5:  0x601,   // i
+	0xf7:  0x7707,  // rowspan
+	0xfb:  0x4f209, // onplaying
+	0xfd:  0x2bf03, // img
+	0xfe:  0xc008,  // optgroup
+	0xff:  0x42c07, // content
+	0x101: 0x5190c, // onratechange
+	0x103: 0x3e80c, // onhashchange
+	0x104: 0x6507,  // details
+	0x106: 0x40908, // download
+	0x109: 0xe907,  // sandbox
+	0x10b: 0x42c0f, // contenteditable
+	0x10d: 0x37c0b, // ondragleave
+	0x10e: 0x2106,  // accept
+	0x10f: 0x55508, // selected
+	0x112: 0x2170a, // formaction
+	0x113: 0x2df06, // center
+	0x115: 0x44e10, // onloadedmetadata
+	0x116: 0x14804, // link
+	0x117: 0x11b04, // time
+	0x118: 0x1c40b, // crossorigin
+	0x119: 0x3ce07, // onfocus
+	0x11a: 0x56204, // wrap
+	0x11b: 0x42b04, // icon
+	0x11d: 0x2a905, // video
+	0x11e: 0x3d905, // class
+	0x121: 0x5990e, // onvolumechange
+	0x122: 0x3e206, // onblur
+	0x123: 0x2e509, // itemscope
+	0x124: 0x5db05, // style
+	0x127: 0x42706, // public
+	0x129: 0x2510e, // formnovalidate
+	0x12a: 0x55d06, // onshow
+	0x12c: 0x16609, // translate
+	0x12d: 0x9704,  // cite
+	0x12e: 0x2e802, // ms
+	0x12f: 0x1190c, // ontimeupdate
+	0x130: 0xfd04,  // kind
+	0x131: 0x2660a, // formtarget
+	0x135: 0x3c007, // onended
+	0x136: 0x28606, // hidden
+	0x137: 0x2c01,  // s
+	0x139: 0x2470a, // formmethod
+	0x13a: 0x44704, // list
+	0x13c: 0x27002, // h6
+	0x13d: 0xcd02,  // ol
+	0x13e: 0x3530b, // oncuechange
+	0x13f: 0x20a0d, // foreignobject
+	0x143: 0x5c90e, // onbeforeunload
+	0x145: 0x3a709, // onemptied
+	0x146: 0x17105, // defer
+	0x147: 0xef03,  // xmp
+	0x148: 0xaf05,  // audio
+	0x149: 0x1903,  // kbd
+	0x14c: 0x46f09, // onmessage
+	0x14d: 0x5c506, // option
+	0x14e: 0x4503,  // alt
+	0x14f: 0x33f07, // checked
+	0x150: 0x10c08, // autoplay
+	0x152: 0x202,   // br
+	0x153: 0x2550a, // novalidate
+	0x156: 0x7d07,  // noembed
+	0x159: 0x2ad07, // onclick
+	0x15a: 0x4780b, // onmousedown
+	0x15b: 0x3b808, // onchange
+	0x15e: 0x3fb09, // oninvalid
+	0x15f: 0x2e906, // scoped
+	0x160: 0x1ae08, // controls
+	0x161: 0x32905, // muted
+	0x163: 0x4ec06, // usemap
+	0x164: 0x1dd0a, // figcaption
+	0x165: 0x36806, // ondrag
+	0x166: 0x29304, // high
+	0x168: 0x3d403, // src
+	0x169: 0x17d06, // poster
+	0x16b: 0x18d0e, // annotation-xml
+	0x16c: 0x5bc04, // step
+	0x16d: 0x4,     // abbr
+	0x16e: 0x1b06,  // dialog
+	0x170: 0x1202,  // li
+	0x172: 0x47a02, // mo
+	0x175: 0x1fd03, // for
+	0x176: 0x1cd03, // ins
+	0x178: 0x53004, // size
+	0x17a: 0x5207,  // default
+	0x17b: 0x1a03,  // bdi
+	0x17c: 0x4ce0a, // onpagehide
+	0x17d: 0x9d07,  // dirname
+	0x17e: 0x23304, // type
+	0x17f: 0x21704, // form
+	0x180: 0x4c105, // inert
+	0x181: 0x12709, // oncanplay
+	0x182: 0x8303,  // dfn
+	0x183: 0x45c08, // tabindex
+	0x186: 0x7f02,  // em
+	0x187: 0x29c04, // lang
+	0x189: 0x3a208, // dropzone
+	0x18a: 0x4110a, // onkeypress
+	0x18b: 0x25b08, // datetime
+	0x18c: 0x18804, // cols
+	0x18d: 0x1,     // a
+	0x18e: 0x43b0c, // onloadeddata
+	0x191: 0x15606, // border
+	0x192: 0x2e05,  // tbody
+	0x193: 0x24b06, // method
+	0x195: 0xbe04,  // loop
+	0x196: 0x2b406, // iframe
+	0x198: 0x2fa04, // head
+	0x19e: 0x5b608, // manifest
+	0x19f: 0xe109,  // autofocus
+	0x1a0: 0x16f04, // code
+	0x1a1: 0x53406, // strong
+	0x1a2: 0x32108, // multiple
+	0x1a3: 0xc05,   // param
+	0x1a6: 0x23007, // enctype
+	0x1a7: 0x2dd04, // face
+	0x1a8: 0xf109,  // plaintext
+	0x1a9: 0x13602, // h1
+	0x1aa: 0x56609, // onstalled
+	0x1ad: 0x28d06, // script
+	0x1ae: 0x30006, // spacer
+	0x1af: 0x52c08, // onresize
+	0x1b0: 0x49b0b, // onmouseover
+	0x1b1: 0x59108, // onunload
+	0x1b2: 0x54208, // onseeked
+	0x1b4: 0x2330d, // typemustmatch
+	0x1b5: 0x1f106, // figure
+	0x1b6: 0x48e0a, // onmouseout
+	0x1b7: 0x27f03, // pre
+	0x1b8: 0x4e205, // width
+	0x1bb: 0x7404,  // nobr
+	0x1be: 0x7002,  // tt
+	0x1bf: 0x1105,  // align
+	0x1c0: 0x3f407, // oninput
+	0x1c3: 0x42107, // onkeyup
+	0x1c6: 0x1e50c, // onafterprint
+	0x1c7: 0x210e,  // accept-charset
+	0x1c8: 0x9806,  // itemid
+	0x1cb: 0x50e06, // strike
+	0x1cc: 0x57a03, // sub
+	0x1cd: 0xf905,  // track
+	0x1ce: 0x39705, // start
+	0x1d0: 0x11408, // basefont
+	0x1d6: 0x1cf06, // source
+	0x1d7: 0x1a806, // legend
+	0x1d8: 0x2f905, // thead
+	0x1da: 0x2e905, // scope
+	0x1dd: 0x21106, // object
+	0x1de: 0xa205,  // media
+	0x1df: 0x18d0a, // annotation
+	0x1e0: 0x22c0b, // formenctype
+	0x1e2: 0x28b08, // noscript
+	0x1e4: 0x53005, // sizes
+	0x1e5: 0xd50c,  // autocomplete
+	0x1e6: 0x7a04,  // span
+	0x1e7: 0x8508,  // noframes
+	0x1e8: 0x26a06, // target
+	0x1e9: 0x3a006, // ondrop
+	0x1ea: 0x3d406, // srcdoc
+	0x1ec: 0x5e08,  // reversed
+	0x1f0: 0x2c707, // isindex
+	0x1f3: 0x29808, // hreflang
+	0x1f5: 0x4e602, // h5
+	0x1f6: 0x5d507, // address
+	0x1fa: 0x30603, // max
+	0x1fb: 0xc70b,  // placeholder
+	0x1fc: 0x31408, // textarea
+	0x1fe: 0x4a609, // onmouseup
+	0x1ff: 0x3910b, // ondragstart
+}
+
+const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
+	"genavaluealtdescanvasidefaultfootereversedetailsampatternobr" +
+	"owspanoembedfnoframesetitleasyncitemidirnamediagroupingaudio" +
+	"ncancelabelooptgrouplaceholderubyautocompleteautofocusandbox" +
+	"mplaintextrackindisabledivarautoplaybasefontimeupdatebdoncan" +
+	"playthrough1bgsoundlowbrbigblinkblockquoteborderbuttonabortr" +
+	"anslatecodefercolgroupostercolorcolspannotation-xmlcommandra" +
+	"ggablegendcontrolshapecoordsmallcrossoriginsourcefieldsetfig" +
+	"captionafterprintfigurequiredforeignObjectforeignobjectforma" +
+	"ctionbeforeprintformenctypemustmatchallengeformmethodformnov" +
+	"alidatetimeterformtargeth6heightmlhgroupreloadhiddenoscripth" +
+	"igh2hreflanghttp-equivideonclickiframeimageimglyph3isindexis" +
+	"mappletitemrefacenteritemscopeditemtypematheaderspacermaxlen" +
+	"gth4minmtextareadonlymultiplemutedoncloseamlesspellcheckedon" +
+	"contextmenuoncuechangeondblclickondragendondragenterondragle" +
+	"aveondragoverondragstarticleondropzonemptiedondurationchange" +
+	"onendedonerroronfocusrcdoclassectionbluronhashchangeoninputo" +
+	"ninvalidonkeydownloadonkeypressrclangonkeyupublicontentedita" +
+	"bleonloadeddatalistingonloadedmetadatabindexonloadstartonmes" +
+	"sageonmousedownonmousemoveonmouseoutputonmouseoveronmouseupo" +
+	"nmousewheelonofflinertononlineonpagehidelonpageshowidth5onpa" +
+	"usemaponplayingonpopstateonprogresstrikeytypeonratechangeonr" +
+	"esetonresizestrongonscrollonseekedonseekingonselectedonshowr" +
+	"aponstalledonstorageonsubmitempropenonsuspendonunloadonvolum" +
+	"echangeonwaitingoptimumanifestepromptoptionbeforeunloaddress" +
+	"tylesummarysupsvgsystemarquee"
diff --git a/html/atom/table_test.go b/html/atom/table_test.go
new file mode 100644
index 0000000..db016a1
--- /dev/null
+++ b/html/atom/table_test.go
@@ -0,0 +1,341 @@
+// generated by go run gen.go -test; DO NOT EDIT
+
+package atom
+
+var testAtomList = []string{
+	"a",
+	"abbr",
+	"accept",
+	"accept-charset",
+	"accesskey",
+	"action",
+	"address",
+	"align",
+	"alt",
+	"annotation",
+	"annotation-xml",
+	"applet",
+	"area",
+	"article",
+	"aside",
+	"async",
+	"audio",
+	"autocomplete",
+	"autofocus",
+	"autoplay",
+	"b",
+	"base",
+	"basefont",
+	"bdi",
+	"bdo",
+	"bgsound",
+	"big",
+	"blink",
+	"blockquote",
+	"body",
+	"border",
+	"br",
+	"button",
+	"canvas",
+	"caption",
+	"center",
+	"challenge",
+	"charset",
+	"checked",
+	"cite",
+	"cite",
+	"class",
+	"code",
+	"col",
+	"colgroup",
+	"color",
+	"cols",
+	"colspan",
+	"command",
+	"command",
+	"content",
+	"contenteditable",
+	"contextmenu",
+	"controls",
+	"coords",
+	"crossorigin",
+	"data",
+	"data",
+	"datalist",
+	"datetime",
+	"dd",
+	"default",
+	"defer",
+	"del",
+	"desc",
+	"details",
+	"dfn",
+	"dialog",
+	"dir",
+	"dirname",
+	"disabled",
+	"div",
+	"dl",
+	"download",
+	"draggable",
+	"dropzone",
+	"dt",
+	"em",
+	"embed",
+	"enctype",
+	"face",
+	"fieldset",
+	"figcaption",
+	"figure",
+	"font",
+	"footer",
+	"for",
+	"foreignObject",
+	"foreignobject",
+	"form",
+	"form",
+	"formaction",
+	"formenctype",
+	"formmethod",
+	"formnovalidate",
+	"formtarget",
+	"frame",
+	"frameset",
+	"h1",
+	"h2",
+	"h3",
+	"h4",
+	"h5",
+	"h6",
+	"head",
+	"header",
+	"headers",
+	"height",
+	"hgroup",
+	"hidden",
+	"high",
+	"hr",
+	"href",
+	"hreflang",
+	"html",
+	"http-equiv",
+	"i",
+	"icon",
+	"id",
+	"iframe",
+	"image",
+	"img",
+	"inert",
+	"input",
+	"ins",
+	"isindex",
+	"ismap",
+	"itemid",
+	"itemprop",
+	"itemref",
+	"itemscope",
+	"itemtype",
+	"kbd",
+	"keygen",
+	"keytype",
+	"kind",
+	"label",
+	"label",
+	"lang",
+	"legend",
+	"li",
+	"link",
+	"list",
+	"listing",
+	"loop",
+	"low",
+	"malignmark",
+	"manifest",
+	"map",
+	"mark",
+	"marquee",
+	"math",
+	"max",
+	"maxlength",
+	"media",
+	"mediagroup",
+	"menu",
+	"meta",
+	"meter",
+	"method",
+	"mglyph",
+	"mi",
+	"min",
+	"mn",
+	"mo",
+	"ms",
+	"mtext",
+	"multiple",
+	"muted",
+	"name",
+	"nav",
+	"nobr",
+	"noembed",
+	"noframes",
+	"noscript",
+	"novalidate",
+	"object",
+	"ol",
+	"onabort",
+	"onafterprint",
+	"onbeforeprint",
+	"onbeforeunload",
+	"onblur",
+	"oncancel",
+	"oncanplay",
+	"oncanplaythrough",
+	"onchange",
+	"onclick",
+	"onclose",
+	"oncontextmenu",
+	"oncuechange",
+	"ondblclick",
+	"ondrag",
+	"ondragend",
+	"ondragenter",
+	"ondragleave",
+	"ondragover",
+	"ondragstart",
+	"ondrop",
+	"ondurationchange",
+	"onemptied",
+	"onended",
+	"onerror",
+	"onfocus",
+	"onhashchange",
+	"oninput",
+	"oninvalid",
+	"onkeydown",
+	"onkeypress",
+	"onkeyup",
+	"onload",
+	"onloadeddata",
+	"onloadedmetadata",
+	"onloadstart",
+	"onmessage",
+	"onmousedown",
+	"onmousemove",
+	"onmouseout",
+	"onmouseover",
+	"onmouseup",
+	"onmousewheel",
+	"onoffline",
+	"ononline",
+	"onpagehide",
+	"onpageshow",
+	"onpause",
+	"onplay",
+	"onplaying",
+	"onpopstate",
+	"onprogress",
+	"onratechange",
+	"onreset",
+	"onresize",
+	"onscroll",
+	"onseeked",
+	"onseeking",
+	"onselect",
+	"onshow",
+	"onstalled",
+	"onstorage",
+	"onsubmit",
+	"onsuspend",
+	"ontimeupdate",
+	"onunload",
+	"onvolumechange",
+	"onwaiting",
+	"open",
+	"optgroup",
+	"optimum",
+	"option",
+	"output",
+	"p",
+	"param",
+	"pattern",
+	"ping",
+	"placeholder",
+	"plaintext",
+	"poster",
+	"pre",
+	"preload",
+	"progress",
+	"prompt",
+	"public",
+	"q",
+	"radiogroup",
+	"readonly",
+	"rel",
+	"required",
+	"reversed",
+	"rows",
+	"rowspan",
+	"rp",
+	"rt",
+	"ruby",
+	"s",
+	"samp",
+	"sandbox",
+	"scope",
+	"scoped",
+	"script",
+	"seamless",
+	"section",
+	"select",
+	"selected",
+	"shape",
+	"size",
+	"sizes",
+	"small",
+	"source",
+	"spacer",
+	"span",
+	"span",
+	"spellcheck",
+	"src",
+	"srcdoc",
+	"srclang",
+	"start",
+	"step",
+	"strike",
+	"strong",
+	"style",
+	"style",
+	"sub",
+	"summary",
+	"sup",
+	"svg",
+	"system",
+	"tabindex",
+	"table",
+	"target",
+	"tbody",
+	"td",
+	"textarea",
+	"tfoot",
+	"th",
+	"thead",
+	"time",
+	"title",
+	"title",
+	"tr",
+	"track",
+	"translate",
+	"tt",
+	"type",
+	"typemustmatch",
+	"u",
+	"ul",
+	"usemap",
+	"value",
+	"var",
+	"video",
+	"wbr",
+	"width",
+	"wrap",
+	"xmp",
+}
diff --git a/html/const.go b/html/const.go
new file mode 100644
index 0000000..d7cc8bb
--- /dev/null
+++ b/html/const.go
@@ -0,0 +1,100 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// Section 12.2.3.2 of the HTML5 specification says "The following elements
+// have varying levels of special parsing rules".
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
+var isSpecialElementMap = map[string]bool{
+	"address":    true,
+	"applet":     true,
+	"area":       true,
+	"article":    true,
+	"aside":      true,
+	"base":       true,
+	"basefont":   true,
+	"bgsound":    true,
+	"blockquote": true,
+	"body":       true,
+	"br":         true,
+	"button":     true,
+	"caption":    true,
+	"center":     true,
+	"col":        true,
+	"colgroup":   true,
+	"command":    true,
+	"dd":         true,
+	"details":    true,
+	"dir":        true,
+	"div":        true,
+	"dl":         true,
+	"dt":         true,
+	"embed":      true,
+	"fieldset":   true,
+	"figcaption": true,
+	"figure":     true,
+	"footer":     true,
+	"form":       true,
+	"frame":      true,
+	"frameset":   true,
+	"h1":         true,
+	"h2":         true,
+	"h3":         true,
+	"h4":         true,
+	"h5":         true,
+	"h6":         true,
+	"head":       true,
+	"header":     true,
+	"hgroup":     true,
+	"hr":         true,
+	"html":       true,
+	"iframe":     true,
+	"img":        true,
+	"input":      true,
+	"isindex":    true,
+	"li":         true,
+	"link":       true,
+	"listing":    true,
+	"marquee":    true,
+	"menu":       true,
+	"meta":       true,
+	"nav":        true,
+	"noembed":    true,
+	"noframes":   true,
+	"noscript":   true,
+	"object":     true,
+	"ol":         true,
+	"p":          true,
+	"param":      true,
+	"plaintext":  true,
+	"pre":        true,
+	"script":     true,
+	"section":    true,
+	"select":     true,
+	"style":      true,
+	"summary":    true,
+	"table":      true,
+	"tbody":      true,
+	"td":         true,
+	"textarea":   true,
+	"tfoot":      true,
+	"th":         true,
+	"thead":      true,
+	"title":      true,
+	"tr":         true,
+	"ul":         true,
+	"wbr":        true,
+	"xmp":        true,
+}
+
+func isSpecialElement(element *Node) bool {
+	switch element.Namespace {
+	case "", "html":
+		return isSpecialElementMap[element.Data]
+	case "svg":
+		return element.Data == "foreignObject"
+	}
+	return false
+}
diff --git a/html/doc.go b/html/doc.go
new file mode 100644
index 0000000..fac0f54
--- /dev/null
+++ b/html/doc.go
@@ -0,0 +1,106 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package html implements an HTML5-compliant tokenizer and parser.
+
+Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
+caller's responsibility to ensure that r provides UTF-8 encoded HTML.
+
+	z := html.NewTokenizer(r)
+
+Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
+which parses the next token and returns its type, or an error:
+
+	for {
+		tt := z.Next()
+		if tt == html.ErrorToken {
+			// ...
+			return ...
+		}
+		// Process the current token.
+	}
+
+There are two APIs for retrieving the current token. The high-level API is to
+call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
+allow optionally calling Raw after Next but before Token, Text, TagName, or
+TagAttr. In EBNF notation, the valid call sequence per token is:
+
+	Next {Raw} [ Token | Text | TagName {TagAttr} ]
+
+Token returns an independent data structure that completely describes a token.
+Entities (such as "&lt;") are unescaped, tag names and attribute keys are
+lower-cased, and attributes are collected into a []Attribute. For example:
+
+	for {
+		if z.Next() == html.ErrorToken {
+			// Returning io.EOF indicates success.
+			return z.Err()
+		}
+		emitToken(z.Token())
+	}
+
+The low-level API performs fewer allocations and copies, but the contents of
+the []byte values returned by Text, TagName and TagAttr may change on the next
+call to Next. For example, to extract an HTML page's anchor text:
+
+	depth := 0
+	for {
+		tt := z.Next()
+		switch tt {
+		case ErrorToken:
+			return z.Err()
+		case TextToken:
+			if depth > 0 {
+				// emitBytes should copy the []byte it receives,
+				// if it doesn't process it immediately.
+				emitBytes(z.Text())
+			}
+		case StartTagToken, EndTagToken:
+			tn, _ := z.TagName()
+			if len(tn) == 1 && tn[0] == 'a' {
+				if tt == StartTagToken {
+					depth++
+				} else {
+					depth--
+				}
+			}
+		}
+	}
+
+Parsing is done by calling Parse with an io.Reader, which returns the root of
+the parse tree (the document element) as a *Node. It is the caller's
+responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
+example, to process each anchor node in depth-first order:
+
+	doc, err := html.Parse(r)
+	if err != nil {
+		// ...
+	}
+	var f func(*html.Node)
+	f = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.Data == "a" {
+			// Do something with n...
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			f(c)
+		}
+	}
+	f(doc)
+
+The relevant specifications include:
+http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html and
+http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
+*/
+package html
+
+// The tokenization algorithm implemented by this package is not a line-by-line
+// transliteration of the relatively verbose state-machine in the WHATWG
+// specification. A more direct approach is used instead, where the program
+// counter implies the state, such as whether it is tokenizing a tag or a text
+// node. Specification compliance is verified by checking expected and actual
+// outputs over a test suite rather than aiming for algorithmic fidelity.
+
+// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
+// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/html/doctype.go b/html/doctype.go
new file mode 100644
index 0000000..c484e5a
--- /dev/null
+++ b/html/doctype.go
@@ -0,0 +1,156 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"strings"
+)
+
+// parseDoctype parses the data from a DoctypeToken into a name,
+// public identifier, and system identifier. It returns a Node whose Type
+// is DoctypeNode, whose Data is the name, and which has attributes
+// named "system" and "public" for the two identifiers if they were present.
+// quirks is whether the document should be parsed in "quirks mode".
+func parseDoctype(s string) (n *Node, quirks bool) {
+	n = &Node{Type: DoctypeNode}
+
+	// Find the name.
+	space := strings.IndexAny(s, whitespace)
+	if space == -1 {
+		space = len(s)
+	}
+	n.Data = s[:space]
+	// The comparison to "html" is case-sensitive.
+	if n.Data != "html" {
+		quirks = true
+	}
+	n.Data = strings.ToLower(n.Data)
+	s = strings.TrimLeft(s[space:], whitespace)
+
+	if len(s) < 6 {
+		// It can't start with "PUBLIC" or "SYSTEM".
+		// Ignore the rest of the string.
+		return n, quirks || s != ""
+	}
+
+	key := strings.ToLower(s[:6])
+	s = s[6:]
+	for key == "public" || key == "system" {
+		s = strings.TrimLeft(s, whitespace)
+		if s == "" {
+			break
+		}
+		quote := s[0]
+		if quote != '"' && quote != '\'' {
+			break
+		}
+		s = s[1:]
+		q := strings.IndexRune(s, rune(quote))
+		var id string
+		if q == -1 {
+			id = s
+			s = ""
+		} else {
+			id = s[:q]
+			s = s[q+1:]
+		}
+		n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
+		if key == "public" {
+			key = "system"
+		} else {
+			key = ""
+		}
+	}
+
+	if key != "" || s != "" {
+		quirks = true
+	} else if len(n.Attr) > 0 {
+		if n.Attr[0].Key == "public" {
+			public := strings.ToLower(n.Attr[0].Val)
+			switch public {
+			case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
+				quirks = true
+			default:
+				for _, q := range quirkyIDs {
+					if strings.HasPrefix(public, q) {
+						quirks = true
+						break
+					}
+				}
+			}
+			// The following two public IDs only cause quirks mode if there is no system ID.
+			if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
+				strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
+				quirks = true
+			}
+		}
+		if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
+			strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
+			quirks = true
+		}
+	}
+
+	return n, quirks
+}
+
+// quirkyIDs is a list of public doctype identifiers that cause a document
+// to be interpreted in quirks mode. The identifiers should be in lower case.
+var quirkyIDs = []string{
+	"+//silmaril//dtd html pro v0r11 19970101//",
+	"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+	"-//as//dtd html 3.0 aswedit + extensions//",
+	"-//ietf//dtd html 2.0 level 1//",
+	"-//ietf//dtd html 2.0 level 2//",
+	"-//ietf//dtd html 2.0 strict level 1//",
+	"-//ietf//dtd html 2.0 strict level 2//",
+	"-//ietf//dtd html 2.0 strict//",
+	"-//ietf//dtd html 2.0//",
+	"-//ietf//dtd html 2.1e//",
+	"-//ietf//dtd html 3.0//",
+	"-//ietf//dtd html 3.2 final//",
+	"-//ietf//dtd html 3.2//",
+	"-//ietf//dtd html 3//",
+	"-//ietf//dtd html level 0//",
+	"-//ietf//dtd html level 1//",
+	"-//ietf//dtd html level 2//",
+	"-//ietf//dtd html level 3//",
+	"-//ietf//dtd html strict level 0//",
+	"-//ietf//dtd html strict level 1//",
+	"-//ietf//dtd html strict level 2//",
+	"-//ietf//dtd html strict level 3//",
+	"-//ietf//dtd html strict//",
+	"-//ietf//dtd html//",
+	"-//metrius//dtd metrius presentational//",
+	"-//microsoft//dtd internet explorer 2.0 html strict//",
+	"-//microsoft//dtd internet explorer 2.0 html//",
+	"-//microsoft//dtd internet explorer 2.0 tables//",
+	"-//microsoft//dtd internet explorer 3.0 html strict//",
+	"-//microsoft//dtd internet explorer 3.0 html//",
+	"-//microsoft//dtd internet explorer 3.0 tables//",
+	"-//netscape comm. corp.//dtd html//",
+	"-//netscape comm. corp.//dtd strict html//",
+	"-//o'reilly and associates//dtd html 2.0//",
+	"-//o'reilly and associates//dtd html extended 1.0//",
+	"-//o'reilly and associates//dtd html extended relaxed 1.0//",
+	"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+	"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+	"-//spyglass//dtd html 2.0 extended//",
+	"-//sq//dtd html 2.0 hotmetal + extensions//",
+	"-//sun microsystems corp.//dtd hotjava html//",
+	"-//sun microsystems corp.//dtd hotjava strict html//",
+	"-//w3c//dtd html 3 1995-03-24//",
+	"-//w3c//dtd html 3.2 draft//",
+	"-//w3c//dtd html 3.2 final//",
+	"-//w3c//dtd html 3.2//",
+	"-//w3c//dtd html 3.2s draft//",
+	"-//w3c//dtd html 4.0 frameset//",
+	"-//w3c//dtd html 4.0 transitional//",
+	"-//w3c//dtd html experimental 19960712//",
+	"-//w3c//dtd html experimental 970421//",
+	"-//w3c//dtd w3 html//",
+	"-//w3o//dtd w3 html 3.0//",
+	"-//webtechs//dtd mozilla html 2.0//",
+	"-//webtechs//dtd mozilla html//",
+}
diff --git a/html/entity.go b/html/entity.go
new file mode 100644
index 0000000..af8a007
--- /dev/null
+++ b/html/entity.go
@@ -0,0 +1,2253 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// All entities that do not end with ';' are 6 or fewer bytes long.
+const longestEntityWithoutSemicolon = 6
+
+// entity is a map from HTML entity names to their values. The semicolon matters:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html
+// lists both "amp" and "amp;" as two separate entries.
+//
+// Note that the HTML5 list is larger than the HTML4 list at
+// http://www.w3.org/TR/html4/sgml/entities.html
+var entity = map[string]rune{
+	"AElig;":                           '\U000000C6',
+	"AMP;":                             '\U00000026',
+	"Aacute;":                          '\U000000C1',
+	"Abreve;":                          '\U00000102',
+	"Acirc;":                           '\U000000C2',
+	"Acy;":                             '\U00000410',
+	"Afr;":                             '\U0001D504',
+	"Agrave;":                          '\U000000C0',
+	"Alpha;":                           '\U00000391',
+	"Amacr;":                           '\U00000100',
+	"And;":                             '\U00002A53',
+	"Aogon;":                           '\U00000104',
+	"Aopf;":                            '\U0001D538',
+	"ApplyFunction;":                   '\U00002061',
+	"Aring;":                           '\U000000C5',
+	"Ascr;":                            '\U0001D49C',
+	"Assign;":                          '\U00002254',
+	"Atilde;":                          '\U000000C3',
+	"Auml;":                            '\U000000C4',
+	"Backslash;":                       '\U00002216',
+	"Barv;":                            '\U00002AE7',
+	"Barwed;":                          '\U00002306',
+	"Bcy;":                             '\U00000411',
+	"Because;":                         '\U00002235',
+	"Bernoullis;":                      '\U0000212C',
+	"Beta;":                            '\U00000392',
+	"Bfr;":                             '\U0001D505',
+	"Bopf;":                            '\U0001D539',
+	"Breve;":                           '\U000002D8',
+	"Bscr;":                            '\U0000212C',
+	"Bumpeq;":                          '\U0000224E',
+	"CHcy;":                            '\U00000427',
+	"COPY;":                            '\U000000A9',
+	"Cacute;":                          '\U00000106',
+	"Cap;":                             '\U000022D2',
+	"CapitalDifferentialD;":            '\U00002145',
+	"Cayleys;":                         '\U0000212D',
+	"Ccaron;":                          '\U0000010C',
+	"Ccedil;":                          '\U000000C7',
+	"Ccirc;":                           '\U00000108',
+	"Cconint;":                         '\U00002230',
+	"Cdot;":                            '\U0000010A',
+	"Cedilla;":                         '\U000000B8',
+	"CenterDot;":                       '\U000000B7',
+	"Cfr;":                             '\U0000212D',
+	"Chi;":                             '\U000003A7',
+	"CircleDot;":                       '\U00002299',
+	"CircleMinus;":                     '\U00002296',
+	"CirclePlus;":                      '\U00002295',
+	"CircleTimes;":                     '\U00002297',
+	"ClockwiseContourIntegral;":        '\U00002232',
+	"CloseCurlyDoubleQuote;":           '\U0000201D',
+	"CloseCurlyQuote;":                 '\U00002019',
+	"Colon;":                           '\U00002237',
+	"Colone;":                          '\U00002A74',
+	"Congruent;":                       '\U00002261',
+	"Conint;":                          '\U0000222F',
+	"ContourIntegral;":                 '\U0000222E',
+	"Copf;":                            '\U00002102',
+	"Coproduct;":                       '\U00002210',
+	"CounterClockwiseContourIntegral;": '\U00002233',
+	"Cross;":                    '\U00002A2F',
+	"Cscr;":                     '\U0001D49E',
+	"Cup;":                      '\U000022D3',
+	"CupCap;":                   '\U0000224D',
+	"DD;":                       '\U00002145',
+	"DDotrahd;":                 '\U00002911',
+	"DJcy;":                     '\U00000402',
+	"DScy;":                     '\U00000405',
+	"DZcy;":                     '\U0000040F',
+	"Dagger;":                   '\U00002021',
+	"Darr;":                     '\U000021A1',
+	"Dashv;":                    '\U00002AE4',
+	"Dcaron;":                   '\U0000010E',
+	"Dcy;":                      '\U00000414',
+	"Del;":                      '\U00002207',
+	"Delta;":                    '\U00000394',
+	"Dfr;":                      '\U0001D507',
+	"DiacriticalAcute;":         '\U000000B4',
+	"DiacriticalDot;":           '\U000002D9',
+	"DiacriticalDoubleAcute;":   '\U000002DD',
+	"DiacriticalGrave;":         '\U00000060',
+	"DiacriticalTilde;":         '\U000002DC',
+	"Diamond;":                  '\U000022C4',
+	"DifferentialD;":            '\U00002146',
+	"Dopf;":                     '\U0001D53B',
+	"Dot;":                      '\U000000A8',
+	"DotDot;":                   '\U000020DC',
+	"DotEqual;":                 '\U00002250',
+	"DoubleContourIntegral;":    '\U0000222F',
+	"DoubleDot;":                '\U000000A8',
+	"DoubleDownArrow;":          '\U000021D3',
+	"DoubleLeftArrow;":          '\U000021D0',
+	"DoubleLeftRightArrow;":     '\U000021D4',
+	"DoubleLeftTee;":            '\U00002AE4',
+	"DoubleLongLeftArrow;":      '\U000027F8',
+	"DoubleLongLeftRightArrow;": '\U000027FA',
+	"DoubleLongRightArrow;":     '\U000027F9',
+	"DoubleRightArrow;":         '\U000021D2',
+	"DoubleRightTee;":           '\U000022A8',
+	"DoubleUpArrow;":            '\U000021D1',
+	"DoubleUpDownArrow;":        '\U000021D5',
+	"DoubleVerticalBar;":        '\U00002225',
+	"DownArrow;":                '\U00002193',
+	"DownArrowBar;":             '\U00002913',
+	"DownArrowUpArrow;":         '\U000021F5',
+	"DownBreve;":                '\U00000311',
+	"DownLeftRightVector;":      '\U00002950',
+	"DownLeftTeeVector;":        '\U0000295E',
+	"DownLeftVector;":           '\U000021BD',
+	"DownLeftVectorBar;":        '\U00002956',
+	"DownRightTeeVector;":       '\U0000295F',
+	"DownRightVector;":          '\U000021C1',
+	"DownRightVectorBar;":       '\U00002957',
+	"DownTee;":                  '\U000022A4',
+	"DownTeeArrow;":             '\U000021A7',
+	"Downarrow;":                '\U000021D3',
+	"Dscr;":                     '\U0001D49F',
+	"Dstrok;":                   '\U00000110',
+	"ENG;":                      '\U0000014A',
+	"ETH;":                      '\U000000D0',
+	"Eacute;":                   '\U000000C9',
+	"Ecaron;":                   '\U0000011A',
+	"Ecirc;":                    '\U000000CA',
+	"Ecy;":                      '\U0000042D',
+	"Edot;":                     '\U00000116',
+	"Efr;":                      '\U0001D508',
+	"Egrave;":                   '\U000000C8',
+	"Element;":                  '\U00002208',
+	"Emacr;":                    '\U00000112',
+	"EmptySmallSquare;":         '\U000025FB',
+	"EmptyVerySmallSquare;":     '\U000025AB',
+	"Eogon;":                    '\U00000118',
+	"Eopf;":                     '\U0001D53C',
+	"Epsilon;":                  '\U00000395',
+	"Equal;":                    '\U00002A75',
+	"EqualTilde;":               '\U00002242',
+	"Equilibrium;":              '\U000021CC',
+	"Escr;":                     '\U00002130',
+	"Esim;":                     '\U00002A73',
+	"Eta;":                      '\U00000397',
+	"Euml;":                     '\U000000CB',
+	"Exists;":                   '\U00002203',
+	"ExponentialE;":             '\U00002147',
+	"Fcy;":                      '\U00000424',
+	"Ffr;":                      '\U0001D509',
+	"FilledSmallSquare;":        '\U000025FC',
+	"FilledVerySmallSquare;":    '\U000025AA',
+	"Fopf;":                     '\U0001D53D',
+	"ForAll;":                   '\U00002200',
+	"Fouriertrf;":               '\U00002131',
+	"Fscr;":                     '\U00002131',
+	"GJcy;":                     '\U00000403',
+	"GT;":                       '\U0000003E',
+	"Gamma;":                    '\U00000393',
+	"Gammad;":                   '\U000003DC',
+	"Gbreve;":                   '\U0000011E',
+	"Gcedil;":                   '\U00000122',
+	"Gcirc;":                    '\U0000011C',
+	"Gcy;":                      '\U00000413',
+	"Gdot;":                     '\U00000120',
+	"Gfr;":                      '\U0001D50A',
+	"Gg;":                       '\U000022D9',
+	"Gopf;":                     '\U0001D53E',
+	"GreaterEqual;":             '\U00002265',
+	"GreaterEqualLess;":         '\U000022DB',
+	"GreaterFullEqual;":         '\U00002267',
+	"GreaterGreater;":           '\U00002AA2',
+	"GreaterLess;":              '\U00002277',
+	"GreaterSlantEqual;":        '\U00002A7E',
+	"GreaterTilde;":             '\U00002273',
+	"Gscr;":                     '\U0001D4A2',
+	"Gt;":                       '\U0000226B',
+	"HARDcy;":                   '\U0000042A',
+	"Hacek;":                    '\U000002C7',
+	"Hat;":                      '\U0000005E',
+	"Hcirc;":                    '\U00000124',
+	"Hfr;":                      '\U0000210C',
+	"HilbertSpace;":             '\U0000210B',
+	"Hopf;":                     '\U0000210D',
+	"HorizontalLine;":           '\U00002500',
+	"Hscr;":                     '\U0000210B',
+	"Hstrok;":                   '\U00000126',
+	"HumpDownHump;":             '\U0000224E',
+	"HumpEqual;":                '\U0000224F',
+	"IEcy;":                     '\U00000415',
+	"IJlig;":                    '\U00000132',
+	"IOcy;":                     '\U00000401',
+	"Iacute;":                   '\U000000CD',
+	"Icirc;":                    '\U000000CE',
+	"Icy;":                      '\U00000418',
+	"Idot;":                     '\U00000130',
+	"Ifr;":                      '\U00002111',
+	"Igrave;":                   '\U000000CC',
+	"Im;":                       '\U00002111',
+	"Imacr;":                    '\U0000012A',
+	"ImaginaryI;":               '\U00002148',
+	"Implies;":                  '\U000021D2',
+	"Int;":                      '\U0000222C',
+	"Integral;":                 '\U0000222B',
+	"Intersection;":             '\U000022C2',
+	"InvisibleComma;":           '\U00002063',
+	"InvisibleTimes;":           '\U00002062',
+	"Iogon;":                    '\U0000012E',
+	"Iopf;":                     '\U0001D540',
+	"Iota;":                     '\U00000399',
+	"Iscr;":                     '\U00002110',
+	"Itilde;":                   '\U00000128',
+	"Iukcy;":                    '\U00000406',
+	"Iuml;":                     '\U000000CF',
+	"Jcirc;":                    '\U00000134',
+	"Jcy;":                      '\U00000419',
+	"Jfr;":                      '\U0001D50D',
+	"Jopf;":                     '\U0001D541',
+	"Jscr;":                     '\U0001D4A5',
+	"Jsercy;":                   '\U00000408',
+	"Jukcy;":                    '\U00000404',
+	"KHcy;":                     '\U00000425',
+	"KJcy;":                     '\U0000040C',
+	"Kappa;":                    '\U0000039A',
+	"Kcedil;":                   '\U00000136',
+	"Kcy;":                      '\U0000041A',
+	"Kfr;":                      '\U0001D50E',
+	"Kopf;":                     '\U0001D542',
+	"Kscr;":                     '\U0001D4A6',
+	"LJcy;":                     '\U00000409',
+	"LT;":                       '\U0000003C',
+	"Lacute;":                   '\U00000139',
+	"Lambda;":                   '\U0000039B',
+	"Lang;":                     '\U000027EA',
+	"Laplacetrf;":               '\U00002112',
+	"Larr;":                     '\U0000219E',
+	"Lcaron;":                   '\U0000013D',
+	"Lcedil;":                   '\U0000013B',
+	"Lcy;":                      '\U0000041B',
+	"LeftAngleBracket;":         '\U000027E8',
+	"LeftArrow;":                '\U00002190',
+	"LeftArrowBar;":             '\U000021E4',
+	"LeftArrowRightArrow;":      '\U000021C6',
+	"LeftCeiling;":              '\U00002308',
+	"LeftDoubleBracket;":        '\U000027E6',
+	"LeftDownTeeVector;":        '\U00002961',
+	"LeftDownVector;":           '\U000021C3',
+	"LeftDownVectorBar;":        '\U00002959',
+	"LeftFloor;":                '\U0000230A',
+	"LeftRightArrow;":           '\U00002194',
+	"LeftRightVector;":          '\U0000294E',
+	"LeftTee;":                  '\U000022A3',
+	"LeftTeeArrow;":             '\U000021A4',
+	"LeftTeeVector;":            '\U0000295A',
+	"LeftTriangle;":             '\U000022B2',
+	"LeftTriangleBar;":          '\U000029CF',
+	"LeftTriangleEqual;":        '\U000022B4',
+	"LeftUpDownVector;":         '\U00002951',
+	"LeftUpTeeVector;":          '\U00002960',
+	"LeftUpVector;":             '\U000021BF',
+	"LeftUpVectorBar;":          '\U00002958',
+	"LeftVector;":               '\U000021BC',
+	"LeftVectorBar;":            '\U00002952',
+	"Leftarrow;":                '\U000021D0',
+	"Leftrightarrow;":           '\U000021D4',
+	"LessEqualGreater;":         '\U000022DA',
+	"LessFullEqual;":            '\U00002266',
+	"LessGreater;":              '\U00002276',
+	"LessLess;":                 '\U00002AA1',
+	"LessSlantEqual;":           '\U00002A7D',
+	"LessTilde;":                '\U00002272',
+	"Lfr;":                      '\U0001D50F',
+	"Ll;":                       '\U000022D8',
+	"Lleftarrow;":               '\U000021DA',
+	"Lmidot;":                   '\U0000013F',
+	"LongLeftArrow;":            '\U000027F5',
+	"LongLeftRightArrow;":       '\U000027F7',
+	"LongRightArrow;":           '\U000027F6',
+	"Longleftarrow;":            '\U000027F8',
+	"Longleftrightarrow;":       '\U000027FA',
+	"Longrightarrow;":           '\U000027F9',
+	"Lopf;":                     '\U0001D543',
+	"LowerLeftArrow;":           '\U00002199',
+	"LowerRightArrow;":          '\U00002198',
+	"Lscr;":                     '\U00002112',
+	"Lsh;":                      '\U000021B0',
+	"Lstrok;":                   '\U00000141',
+	"Lt;":                       '\U0000226A',
+	"Map;":                      '\U00002905',
+	"Mcy;":                      '\U0000041C',
+	"MediumSpace;":              '\U0000205F',
+	"Mellintrf;":                '\U00002133',
+	"Mfr;":                      '\U0001D510',
+	"MinusPlus;":                '\U00002213',
+	"Mopf;":                     '\U0001D544',
+	"Mscr;":                     '\U00002133',
+	"Mu;":                       '\U0000039C',
+	"NJcy;":                     '\U0000040A',
+	"Nacute;":                   '\U00000143',
+	"Ncaron;":                   '\U00000147',
+	"Ncedil;":                   '\U00000145',
+	"Ncy;":                      '\U0000041D',
+	"NegativeMediumSpace;":      '\U0000200B',
+	"NegativeThickSpace;":       '\U0000200B',
+	"NegativeThinSpace;":        '\U0000200B',
+	"NegativeVeryThinSpace;":    '\U0000200B',
+	"NestedGreaterGreater;":     '\U0000226B',
+	"NestedLessLess;":           '\U0000226A',
+	"NewLine;":                  '\U0000000A',
+	"Nfr;":                      '\U0001D511',
+	"NoBreak;":                  '\U00002060',
+	"NonBreakingSpace;":         '\U000000A0',
+	"Nopf;":                     '\U00002115',
+	"Not;":                      '\U00002AEC',
+	"NotCongruent;":             '\U00002262',
+	"NotCupCap;":                '\U0000226D',
+	"NotDoubleVerticalBar;":     '\U00002226',
+	"NotElement;":               '\U00002209',
+	"NotEqual;":                 '\U00002260',
+	"NotExists;":                '\U00002204',
+	"NotGreater;":               '\U0000226F',
+	"NotGreaterEqual;":          '\U00002271',
+	"NotGreaterLess;":           '\U00002279',
+	"NotGreaterTilde;":          '\U00002275',
+	"NotLeftTriangle;":          '\U000022EA',
+	"NotLeftTriangleEqual;":     '\U000022EC',
+	"NotLess;":                  '\U0000226E',
+	"NotLessEqual;":             '\U00002270',
+	"NotLessGreater;":           '\U00002278',
+	"NotLessTilde;":             '\U00002274',
+	"NotPrecedes;":              '\U00002280',
+	"NotPrecedesSlantEqual;":    '\U000022E0',
+	"NotReverseElement;":        '\U0000220C',
+	"NotRightTriangle;":         '\U000022EB',
+	"NotRightTriangleEqual;":    '\U000022ED',
+	"NotSquareSubsetEqual;":     '\U000022E2',
+	"NotSquareSupersetEqual;":   '\U000022E3',
+	"NotSubsetEqual;":           '\U00002288',
+	"NotSucceeds;":              '\U00002281',
+	"NotSucceedsSlantEqual;":    '\U000022E1',
+	"NotSupersetEqual;":         '\U00002289',
+	"NotTilde;":                 '\U00002241',
+	"NotTildeEqual;":            '\U00002244',
+	"NotTildeFullEqual;":        '\U00002247',
+	"NotTildeTilde;":            '\U00002249',
+	"NotVerticalBar;":           '\U00002224',
+	"Nscr;":                     '\U0001D4A9',
+	"Ntilde;":                   '\U000000D1',
+	"Nu;":                       '\U0000039D',
+	"OElig;":                    '\U00000152',
+	"Oacute;":                   '\U000000D3',
+	"Ocirc;":                    '\U000000D4',
+	"Ocy;":                      '\U0000041E',
+	"Odblac;":                   '\U00000150',
+	"Ofr;":                      '\U0001D512',
+	"Ograve;":                   '\U000000D2',
+	"Omacr;":                    '\U0000014C',
+	"Omega;":                    '\U000003A9',
+	"Omicron;":                  '\U0000039F',
+	"Oopf;":                     '\U0001D546',
+	"OpenCurlyDoubleQuote;":     '\U0000201C',
+	"OpenCurlyQuote;":           '\U00002018',
+	"Or;":                       '\U00002A54',
+	"Oscr;":                     '\U0001D4AA',
+	"Oslash;":                   '\U000000D8',
+	"Otilde;":                   '\U000000D5',
+	"Otimes;":                   '\U00002A37',
+	"Ouml;":                     '\U000000D6',
+	"OverBar;":                  '\U0000203E',
+	"OverBrace;":                '\U000023DE',
+	"OverBracket;":              '\U000023B4',
+	"OverParenthesis;":          '\U000023DC',
+	"PartialD;":                 '\U00002202',
+	"Pcy;":                      '\U0000041F',
+	"Pfr;":                      '\U0001D513',
+	"Phi;":                      '\U000003A6',
+	"Pi;":                       '\U000003A0',
+	"PlusMinus;":                '\U000000B1',
+	"Poincareplane;":            '\U0000210C',
+	"Popf;":                     '\U00002119',
+	"Pr;":                       '\U00002ABB',
+	"Precedes;":                 '\U0000227A',
+	"PrecedesEqual;":            '\U00002AAF',
+	"PrecedesSlantEqual;":       '\U0000227C',
+	"PrecedesTilde;":            '\U0000227E',
+	"Prime;":                    '\U00002033',
+	"Product;":                  '\U0000220F',
+	"Proportion;":               '\U00002237',
+	"Proportional;":             '\U0000221D',
+	"Pscr;":                     '\U0001D4AB',
+	"Psi;":                      '\U000003A8',
+	"QUOT;":                     '\U00000022',
+	"Qfr;":                      '\U0001D514',
+	"Qopf;":                     '\U0000211A',
+	"Qscr;":                     '\U0001D4AC',
+	"RBarr;":                    '\U00002910',
+	"REG;":                      '\U000000AE',
+	"Racute;":                   '\U00000154',
+	"Rang;":                     '\U000027EB',
+	"Rarr;":                     '\U000021A0',
+	"Rarrtl;":                   '\U00002916',
+	"Rcaron;":                   '\U00000158',
+	"Rcedil;":                   '\U00000156',
+	"Rcy;":                      '\U00000420',
+	"Re;":                       '\U0000211C',
+	"ReverseElement;":           '\U0000220B',
+	"ReverseEquilibrium;":       '\U000021CB',
+	"ReverseUpEquilibrium;":     '\U0000296F',
+	"Rfr;":                      '\U0000211C',
+	"Rho;":                      '\U000003A1',
+	"RightAngleBracket;":        '\U000027E9',
+	"RightArrow;":               '\U00002192',
+	"RightArrowBar;":            '\U000021E5',
+	"RightArrowLeftArrow;":      '\U000021C4',
+	"RightCeiling;":             '\U00002309',
+	"RightDoubleBracket;":       '\U000027E7',
+	"RightDownTeeVector;":       '\U0000295D',
+	"RightDownVector;":          '\U000021C2',
+	"RightDownVectorBar;":       '\U00002955',
+	"RightFloor;":               '\U0000230B',
+	"RightTee;":                 '\U000022A2',
+	"RightTeeArrow;":            '\U000021A6',
+	"RightTeeVector;":           '\U0000295B',
+	"RightTriangle;":            '\U000022B3',
+	"RightTriangleBar;":         '\U000029D0',
+	"RightTriangleEqual;":       '\U000022B5',
+	"RightUpDownVector;":        '\U0000294F',
+	"RightUpTeeVector;":         '\U0000295C',
+	"RightUpVector;":            '\U000021BE',
+	"RightUpVectorBar;":         '\U00002954',
+	"RightVector;":              '\U000021C0',
+	"RightVectorBar;":           '\U00002953',
+	"Rightarrow;":               '\U000021D2',
+	"Ropf;":                     '\U0000211D',
+	"RoundImplies;":             '\U00002970',
+	"Rrightarrow;":              '\U000021DB',
+	"Rscr;":                     '\U0000211B',
+	"Rsh;":                      '\U000021B1',
+	"RuleDelayed;":              '\U000029F4',
+	"SHCHcy;":                   '\U00000429',
+	"SHcy;":                     '\U00000428',
+	"SOFTcy;":                   '\U0000042C',
+	"Sacute;":                   '\U0000015A',
+	"Sc;":                       '\U00002ABC',
+	"Scaron;":                   '\U00000160',
+	"Scedil;":                   '\U0000015E',
+	"Scirc;":                    '\U0000015C',
+	"Scy;":                      '\U00000421',
+	"Sfr;":                      '\U0001D516',
+	"ShortDownArrow;":           '\U00002193',
+	"ShortLeftArrow;":           '\U00002190',
+	"ShortRightArrow;":          '\U00002192',
+	"ShortUpArrow;":             '\U00002191',
+	"Sigma;":                    '\U000003A3',
+	"SmallCircle;":              '\U00002218',
+	"Sopf;":                     '\U0001D54A',
+	"Sqrt;":                     '\U0000221A',
+	"Square;":                   '\U000025A1',
+	"SquareIntersection;":       '\U00002293',
+	"SquareSubset;":             '\U0000228F',
+	"SquareSubsetEqual;":        '\U00002291',
+	"SquareSuperset;":           '\U00002290',
+	"SquareSupersetEqual;":      '\U00002292',
+	"SquareUnion;":              '\U00002294',
+	"Sscr;":                     '\U0001D4AE',
+	"Star;":                     '\U000022C6',
+	"Sub;":                      '\U000022D0',
+	"Subset;":                   '\U000022D0',
+	"SubsetEqual;":              '\U00002286',
+	"Succeeds;":                 '\U0000227B',
+	"SucceedsEqual;":            '\U00002AB0',
+	"SucceedsSlantEqual;":       '\U0000227D',
+	"SucceedsTilde;":            '\U0000227F',
+	"SuchThat;":                 '\U0000220B',
+	"Sum;":                      '\U00002211',
+	"Sup;":                      '\U000022D1',
+	"Superset;":                 '\U00002283',
+	"SupersetEqual;":            '\U00002287',
+	"Supset;":                   '\U000022D1',
+	"THORN;":                    '\U000000DE',
+	"TRADE;":                    '\U00002122',
+	"TSHcy;":                    '\U0000040B',
+	"TScy;":                     '\U00000426',
+	"Tab;":                      '\U00000009',
+	"Tau;":                      '\U000003A4',
+	"Tcaron;":                   '\U00000164',
+	"Tcedil;":                   '\U00000162',
+	"Tcy;":                      '\U00000422',
+	"Tfr;":                      '\U0001D517',
+	"Therefore;":                '\U00002234',
+	"Theta;":                    '\U00000398',
+	"ThinSpace;":                '\U00002009',
+	"Tilde;":                    '\U0000223C',
+	"TildeEqual;":               '\U00002243',
+	"TildeFullEqual;":           '\U00002245',
+	"TildeTilde;":               '\U00002248',
+	"Topf;":                     '\U0001D54B',
+	"TripleDot;":                '\U000020DB',
+	"Tscr;":                     '\U0001D4AF',
+	"Tstrok;":                   '\U00000166',
+	"Uacute;":                   '\U000000DA',
+	"Uarr;":                     '\U0000219F',
+	"Uarrocir;":                 '\U00002949',
+	"Ubrcy;":                    '\U0000040E',
+	"Ubreve;":                   '\U0000016C',
+	"Ucirc;":                    '\U000000DB',
+	"Ucy;":                      '\U00000423',
+	"Udblac;":                   '\U00000170',
+	"Ufr;":                      '\U0001D518',
+	"Ugrave;":                   '\U000000D9',
+	"Umacr;":                    '\U0000016A',
+	"UnderBar;":                 '\U0000005F',
+	"UnderBrace;":               '\U000023DF',
+	"UnderBracket;":             '\U000023B5',
+	"UnderParenthesis;":         '\U000023DD',
+	"Union;":                    '\U000022C3',
+	"UnionPlus;":                '\U0000228E',
+	"Uogon;":                    '\U00000172',
+	"Uopf;":                     '\U0001D54C',
+	"UpArrow;":                  '\U00002191',
+	"UpArrowBar;":               '\U00002912',
+	"UpArrowDownArrow;":         '\U000021C5',
+	"UpDownArrow;":              '\U00002195',
+	"UpEquilibrium;":            '\U0000296E',
+	"UpTee;":                    '\U000022A5',
+	"UpTeeArrow;":               '\U000021A5',
+	"Uparrow;":                  '\U000021D1',
+	"Updownarrow;":              '\U000021D5',
+	"UpperLeftArrow;":           '\U00002196',
+	"UpperRightArrow;":          '\U00002197',
+	"Upsi;":                     '\U000003D2',
+	"Upsilon;":                  '\U000003A5',
+	"Uring;":                    '\U0000016E',
+	"Uscr;":                     '\U0001D4B0',
+	"Utilde;":                   '\U00000168',
+	"Uuml;":                     '\U000000DC',
+	"VDash;":                    '\U000022AB',
+	"Vbar;":                     '\U00002AEB',
+	"Vcy;":                      '\U00000412',
+	"Vdash;":                    '\U000022A9',
+	"Vdashl;":                   '\U00002AE6',
+	"Vee;":                      '\U000022C1',
+	"Verbar;":                   '\U00002016',
+	"Vert;":                     '\U00002016',
+	"VerticalBar;":              '\U00002223',
+	"VerticalLine;":             '\U0000007C',
+	"VerticalSeparator;":        '\U00002758',
+	"VerticalTilde;":            '\U00002240',
+	"VeryThinSpace;":            '\U0000200A',
+	"Vfr;":                      '\U0001D519',
+	"Vopf;":                     '\U0001D54D',
+	"Vscr;":                     '\U0001D4B1',
+	"Vvdash;":                   '\U000022AA',
+	"Wcirc;":                    '\U00000174',
+	"Wedge;":                    '\U000022C0',
+	"Wfr;":                      '\U0001D51A',
+	"Wopf;":                     '\U0001D54E',
+	"Wscr;":                     '\U0001D4B2',
+	"Xfr;":                      '\U0001D51B',
+	"Xi;":                       '\U0000039E',
+	"Xopf;":                     '\U0001D54F',
+	"Xscr;":                     '\U0001D4B3',
+	"YAcy;":                     '\U0000042F',
+	"YIcy;":                     '\U00000407',
+	"YUcy;":                     '\U0000042E',
+	"Yacute;":                   '\U000000DD',
+	"Ycirc;":                    '\U00000176',
+	"Ycy;":                      '\U0000042B',
+	"Yfr;":                      '\U0001D51C',
+	"Yopf;":                     '\U0001D550',
+	"Yscr;":                     '\U0001D4B4',
+	"Yuml;":                     '\U00000178',
+	"ZHcy;":                     '\U00000416',
+	"Zacute;":                   '\U00000179',
+	"Zcaron;":                   '\U0000017D',
+	"Zcy;":                      '\U00000417',
+	"Zdot;":                     '\U0000017B',
+	"ZeroWidthSpace;":           '\U0000200B',
+	"Zeta;":                     '\U00000396',
+	"Zfr;":                      '\U00002128',
+	"Zopf;":                     '\U00002124',
+	"Zscr;":                     '\U0001D4B5',
+	"aacute;":                   '\U000000E1',
+	"abreve;":                   '\U00000103',
+	"ac;":                       '\U0000223E',
+	"acd;":                      '\U0000223F',
+	"acirc;":                    '\U000000E2',
+	"acute;":                    '\U000000B4',
+	"acy;":                      '\U00000430',
+	"aelig;":                    '\U000000E6',
+	"af;":                       '\U00002061',
+	"afr;":                      '\U0001D51E',
+	"agrave;":                   '\U000000E0',
+	"alefsym;":                  '\U00002135',
+	"aleph;":                    '\U00002135',
+	"alpha;":                    '\U000003B1',
+	"amacr;":                    '\U00000101',
+	"amalg;":                    '\U00002A3F',
+	"amp;":                      '\U00000026',
+	"and;":                      '\U00002227',
+	"andand;":                   '\U00002A55',
+	"andd;":                     '\U00002A5C',
+	"andslope;":                 '\U00002A58',
+	"andv;":                     '\U00002A5A',
+	"ang;":                      '\U00002220',
+	"ange;":                     '\U000029A4',
+	"angle;":                    '\U00002220',
+	"angmsd;":                   '\U00002221',
+	"angmsdaa;":                 '\U000029A8',
+	"angmsdab;":                 '\U000029A9',
+	"angmsdac;":                 '\U000029AA',
+	"angmsdad;":                 '\U000029AB',
+	"angmsdae;":                 '\U000029AC',
+	"angmsdaf;":                 '\U000029AD',
+	"angmsdag;":                 '\U000029AE',
+	"angmsdah;":                 '\U000029AF',
+	"angrt;":                    '\U0000221F',
+	"angrtvb;":                  '\U000022BE',
+	"angrtvbd;":                 '\U0000299D',
+	"angsph;":                   '\U00002222',
+	"angst;":                    '\U000000C5',
+	"angzarr;":                  '\U0000237C',
+	"aogon;":                    '\U00000105',
+	"aopf;":                     '\U0001D552',
+	"ap;":                       '\U00002248',
+	"apE;":                      '\U00002A70',
+	"apacir;":                   '\U00002A6F',
+	"ape;":                      '\U0000224A',
+	"apid;":                     '\U0000224B',
+	"apos;":                     '\U00000027',
+	"approx;":                   '\U00002248',
+	"approxeq;":                 '\U0000224A',
+	"aring;":                    '\U000000E5',
+	"ascr;":                     '\U0001D4B6',
+	"ast;":                      '\U0000002A',
+	"asymp;":                    '\U00002248',
+	"asympeq;":                  '\U0000224D',
+	"atilde;":                   '\U000000E3',
+	"auml;":                     '\U000000E4',
+	"awconint;":                 '\U00002233',
+	"awint;":                    '\U00002A11',
+	"bNot;":                     '\U00002AED',
+	"backcong;":                 '\U0000224C',
+	"backepsilon;":              '\U000003F6',
+	"backprime;":                '\U00002035',
+	"backsim;":                  '\U0000223D',
+	"backsimeq;":                '\U000022CD',
+	"barvee;":                   '\U000022BD',
+	"barwed;":                   '\U00002305',
+	"barwedge;":                 '\U00002305',
+	"bbrk;":                     '\U000023B5',
+	"bbrktbrk;":                 '\U000023B6',
+	"bcong;":                    '\U0000224C',
+	"bcy;":                      '\U00000431',
+	"bdquo;":                    '\U0000201E',
+	"becaus;":                   '\U00002235',
+	"because;":                  '\U00002235',
+	"bemptyv;":                  '\U000029B0',
+	"bepsi;":                    '\U000003F6',
+	"bernou;":                   '\U0000212C',
+	"beta;":                     '\U000003B2',
+	"beth;":                     '\U00002136',
+	"between;":                  '\U0000226C',
+	"bfr;":                      '\U0001D51F',
+	"bigcap;":                   '\U000022C2',
+	"bigcirc;":                  '\U000025EF',
+	"bigcup;":                   '\U000022C3',
+	"bigodot;":                  '\U00002A00',
+	"bigoplus;":                 '\U00002A01',
+	"bigotimes;":                '\U00002A02',
+	"bigsqcup;":                 '\U00002A06',
+	"bigstar;":                  '\U00002605',
+	"bigtriangledown;":          '\U000025BD',
+	"bigtriangleup;":            '\U000025B3',
+	"biguplus;":                 '\U00002A04',
+	"bigvee;":                   '\U000022C1',
+	"bigwedge;":                 '\U000022C0',
+	"bkarow;":                   '\U0000290D',
+	"blacklozenge;":             '\U000029EB',
+	"blacksquare;":              '\U000025AA',
+	"blacktriangle;":            '\U000025B4',
+	"blacktriangledown;":        '\U000025BE',
+	"blacktriangleleft;":        '\U000025C2',
+	"blacktriangleright;":       '\U000025B8',
+	"blank;":                    '\U00002423',
+	"blk12;":                    '\U00002592',
+	"blk14;":                    '\U00002591',
+	"blk34;":                    '\U00002593',
+	"block;":                    '\U00002588',
+	"bnot;":                     '\U00002310',
+	"bopf;":                     '\U0001D553',
+	"bot;":                      '\U000022A5',
+	"bottom;":                   '\U000022A5',
+	"bowtie;":                   '\U000022C8',
+	"boxDL;":                    '\U00002557',
+	"boxDR;":                    '\U00002554',
+	"boxDl;":                    '\U00002556',
+	"boxDr;":                    '\U00002553',
+	"boxH;":                     '\U00002550',
+	"boxHD;":                    '\U00002566',
+	"boxHU;":                    '\U00002569',
+	"boxHd;":                    '\U00002564',
+	"boxHu;":                    '\U00002567',
+	"boxUL;":                    '\U0000255D',
+	"boxUR;":                    '\U0000255A',
+	"boxUl;":                    '\U0000255C',
+	"boxUr;":                    '\U00002559',
+	"boxV;":                     '\U00002551',
+	"boxVH;":                    '\U0000256C',
+	"boxVL;":                    '\U00002563',
+	"boxVR;":                    '\U00002560',
+	"boxVh;":                    '\U0000256B',
+	"boxVl;":                    '\U00002562',
+	"boxVr;":                    '\U0000255F',
+	"boxbox;":                   '\U000029C9',
+	"boxdL;":                    '\U00002555',
+	"boxdR;":                    '\U00002552',
+	"boxdl;":                    '\U00002510',
+	"boxdr;":                    '\U0000250C',
+	"boxh;":                     '\U00002500',
+	"boxhD;":                    '\U00002565',
+	"boxhU;":                    '\U00002568',
+	"boxhd;":                    '\U0000252C',
+	"boxhu;":                    '\U00002534',
+	"boxminus;":                 '\U0000229F',
+	"boxplus;":                  '\U0000229E',
+	"boxtimes;":                 '\U000022A0',
+	"boxuL;":                    '\U0000255B',
+	"boxuR;":                    '\U00002558',
+	"boxul;":                    '\U00002518',
+	"boxur;":                    '\U00002514',
+	"boxv;":                     '\U00002502',
+	"boxvH;":                    '\U0000256A',
+	"boxvL;":                    '\U00002561',
+	"boxvR;":                    '\U0000255E',
+	"boxvh;":                    '\U0000253C',
+	"boxvl;":                    '\U00002524',
+	"boxvr;":                    '\U0000251C',
+	"bprime;":                   '\U00002035',
+	"breve;":                    '\U000002D8',
+	"brvbar;":                   '\U000000A6',
+	"bscr;":                     '\U0001D4B7',
+	"bsemi;":                    '\U0000204F',
+	"bsim;":                     '\U0000223D',
+	"bsime;":                    '\U000022CD',
+	"bsol;":                     '\U0000005C',
+	"bsolb;":                    '\U000029C5',
+	"bsolhsub;":                 '\U000027C8',
+	"bull;":                     '\U00002022',
+	"bullet;":                   '\U00002022',
+	"bump;":                     '\U0000224E',
+	"bumpE;":                    '\U00002AAE',
+	"bumpe;":                    '\U0000224F',
+	"bumpeq;":                   '\U0000224F',
+	"cacute;":                   '\U00000107',
+	"cap;":                      '\U00002229',
+	"capand;":                   '\U00002A44',
+	"capbrcup;":                 '\U00002A49',
+	"capcap;":                   '\U00002A4B',
+	"capcup;":                   '\U00002A47',
+	"capdot;":                   '\U00002A40',
+	"caret;":                    '\U00002041',
+	"caron;":                    '\U000002C7',
+	"ccaps;":                    '\U00002A4D',
+	"ccaron;":                   '\U0000010D',
+	"ccedil;":                   '\U000000E7',
+	"ccirc;":                    '\U00000109',
+	"ccups;":                    '\U00002A4C',
+	"ccupssm;":                  '\U00002A50',
+	"cdot;":                     '\U0000010B',
+	"cedil;":                    '\U000000B8',
+	"cemptyv;":                  '\U000029B2',
+	"cent;":                     '\U000000A2',
+	"centerdot;":                '\U000000B7',
+	"cfr;":                      '\U0001D520',
+	"chcy;":                     '\U00000447',
+	"check;":                    '\U00002713',
+	"checkmark;":                '\U00002713',
+	"chi;":                      '\U000003C7',
+	"cir;":                      '\U000025CB',
+	"cirE;":                     '\U000029C3',
+	"circ;":                     '\U000002C6',
+	"circeq;":                   '\U00002257',
+	"circlearrowleft;":          '\U000021BA',
+	"circlearrowright;":         '\U000021BB',
+	"circledR;":                 '\U000000AE',
+	"circledS;":                 '\U000024C8',
+	"circledast;":               '\U0000229B',
+	"circledcirc;":              '\U0000229A',
+	"circleddash;":              '\U0000229D',
+	"cire;":                     '\U00002257',
+	"cirfnint;":                 '\U00002A10',
+	"cirmid;":                   '\U00002AEF',
+	"cirscir;":                  '\U000029C2',
+	"clubs;":                    '\U00002663',
+	"clubsuit;":                 '\U00002663',
+	"colon;":                    '\U0000003A',
+	"colone;":                   '\U00002254',
+	"coloneq;":                  '\U00002254',
+	"comma;":                    '\U0000002C',
+	"commat;":                   '\U00000040',
+	"comp;":                     '\U00002201',
+	"compfn;":                   '\U00002218',
+	"complement;":               '\U00002201',
+	"complexes;":                '\U00002102',
+	"cong;":                     '\U00002245',
+	"congdot;":                  '\U00002A6D',
+	"conint;":                   '\U0000222E',
+	"copf;":                     '\U0001D554',
+	"coprod;":                   '\U00002210',
+	"copy;":                     '\U000000A9',
+	"copysr;":                   '\U00002117',
+	"crarr;":                    '\U000021B5',
+	"cross;":                    '\U00002717',
+	"cscr;":                     '\U0001D4B8',
+	"csub;":                     '\U00002ACF',
+	"csube;":                    '\U00002AD1',
+	"csup;":                     '\U00002AD0',
+	"csupe;":                    '\U00002AD2',
+	"ctdot;":                    '\U000022EF',
+	"cudarrl;":                  '\U00002938',
+	"cudarrr;":                  '\U00002935',
+	"cuepr;":                    '\U000022DE',
+	"cuesc;":                    '\U000022DF',
+	"cularr;":                   '\U000021B6',
+	"cularrp;":                  '\U0000293D',
+	"cup;":                      '\U0000222A',
+	"cupbrcap;":                 '\U00002A48',
+	"cupcap;":                   '\U00002A46',
+	"cupcup;":                   '\U00002A4A',
+	"cupdot;":                   '\U0000228D',
+	"cupor;":                    '\U00002A45',
+	"curarr;":                   '\U000021B7',
+	"curarrm;":                  '\U0000293C',
+	"curlyeqprec;":              '\U000022DE',
+	"curlyeqsucc;":              '\U000022DF',
+	"curlyvee;":                 '\U000022CE',
+	"curlywedge;":               '\U000022CF',
+	"curren;":                   '\U000000A4',
+	"curvearrowleft;":           '\U000021B6',
+	"curvearrowright;":          '\U000021B7',
+	"cuvee;":                    '\U000022CE',
+	"cuwed;":                    '\U000022CF',
+	"cwconint;":                 '\U00002232',
+	"cwint;":                    '\U00002231',
+	"cylcty;":                   '\U0000232D',
+	"dArr;":                     '\U000021D3',
+	"dHar;":                     '\U00002965',
+	"dagger;":                   '\U00002020',
+	"daleth;":                   '\U00002138',
+	"darr;":                     '\U00002193',
+	"dash;":                     '\U00002010',
+	"dashv;":                    '\U000022A3',
+	"dbkarow;":                  '\U0000290F',
+	"dblac;":                    '\U000002DD',
+	"dcaron;":                   '\U0000010F',
+	"dcy;":                      '\U00000434',
+	"dd;":                       '\U00002146',
+	"ddagger;":                  '\U00002021',
+	"ddarr;":                    '\U000021CA',
+	"ddotseq;":                  '\U00002A77',
+	"deg;":                      '\U000000B0',
+	"delta;":                    '\U000003B4',
+	"demptyv;":                  '\U000029B1',
+	"dfisht;":                   '\U0000297F',
+	"dfr;":                      '\U0001D521',
+	"dharl;":                    '\U000021C3',
+	"dharr;":                    '\U000021C2',
+	"diam;":                     '\U000022C4',
+	"diamond;":                  '\U000022C4',
+	"diamondsuit;":              '\U00002666',
+	"diams;":                    '\U00002666',
+	"die;":                      '\U000000A8',
+	"digamma;":                  '\U000003DD',
+	"disin;":                    '\U000022F2',
+	"div;":                      '\U000000F7',
+	"divide;":                   '\U000000F7',
+	"divideontimes;":            '\U000022C7',
+	"divonx;":                   '\U000022C7',
+	"djcy;":                     '\U00000452',
+	"dlcorn;":                   '\U0000231E',
+	"dlcrop;":                   '\U0000230D',
+	"dollar;":                   '\U00000024',
+	"dopf;":                     '\U0001D555',
+	"dot;":                      '\U000002D9',
+	"doteq;":                    '\U00002250',
+	"doteqdot;":                 '\U00002251',
+	"dotminus;":                 '\U00002238',
+	"dotplus;":                  '\U00002214',
+	"dotsquare;":                '\U000022A1',
+	"doublebarwedge;":           '\U00002306',
+	"downarrow;":                '\U00002193',
+	"downdownarrows;":           '\U000021CA',
+	"downharpoonleft;":          '\U000021C3',
+	"downharpoonright;":         '\U000021C2',
+	"drbkarow;":                 '\U00002910',
+	"drcorn;":                   '\U0000231F',
+	"drcrop;":                   '\U0000230C',
+	"dscr;":                     '\U0001D4B9',
+	"dscy;":                     '\U00000455',
+	"dsol;":                     '\U000029F6',
+	"dstrok;":                   '\U00000111',
+	"dtdot;":                    '\U000022F1',
+	"dtri;":                     '\U000025BF',
+	"dtrif;":                    '\U000025BE',
+	"duarr;":                    '\U000021F5',
+	"duhar;":                    '\U0000296F',
+	"dwangle;":                  '\U000029A6',
+	"dzcy;":                     '\U0000045F',
+	"dzigrarr;":                 '\U000027FF',
+	"eDDot;":                    '\U00002A77',
+	"eDot;":                     '\U00002251',
+	"eacute;":                   '\U000000E9',
+	"easter;":                   '\U00002A6E',
+	"ecaron;":                   '\U0000011B',
+	"ecir;":                     '\U00002256',
+	"ecirc;":                    '\U000000EA',
+	"ecolon;":                   '\U00002255',
+	"ecy;":                      '\U0000044D',
+	"edot;":                     '\U00000117',
+	"ee;":                       '\U00002147',
+	"efDot;":                    '\U00002252',
+	"efr;":                      '\U0001D522',
+	"eg;":                       '\U00002A9A',
+	"egrave;":                   '\U000000E8',
+	"egs;":                      '\U00002A96',
+	"egsdot;":                   '\U00002A98',
+	"el;":                       '\U00002A99',
+	"elinters;":                 '\U000023E7',
+	"ell;":                      '\U00002113',
+	"els;":                      '\U00002A95',
+	"elsdot;":                   '\U00002A97',
+	"emacr;":                    '\U00000113',
+	"empty;":                    '\U00002205',
+	"emptyset;":                 '\U00002205',
+	"emptyv;":                   '\U00002205',
+	"emsp;":                     '\U00002003',
+	"emsp13;":                   '\U00002004',
+	"emsp14;":                   '\U00002005',
+	"eng;":                      '\U0000014B',
+	"ensp;":                     '\U00002002',
+	"eogon;":                    '\U00000119',
+	"eopf;":                     '\U0001D556',
+	"epar;":                     '\U000022D5',
+	"eparsl;":                   '\U000029E3',
+	"eplus;":                    '\U00002A71',
+	"epsi;":                     '\U000003B5',
+	"epsilon;":                  '\U000003B5',
+	"epsiv;":                    '\U000003F5',
+	"eqcirc;":                   '\U00002256',
+	"eqcolon;":                  '\U00002255',
+	"eqsim;":                    '\U00002242',
+	"eqslantgtr;":               '\U00002A96',
+	"eqslantless;":              '\U00002A95',
+	"equals;":                   '\U0000003D',
+	"equest;":                   '\U0000225F',
+	"equiv;":                    '\U00002261',
+	"equivDD;":                  '\U00002A78',
+	"eqvparsl;":                 '\U000029E5',
+	"erDot;":                    '\U00002253',
+	"erarr;":                    '\U00002971',
+	"escr;":                     '\U0000212F',
+	"esdot;":                    '\U00002250',
+	"esim;":                     '\U00002242',
+	"eta;":                      '\U000003B7',
+	"eth;":                      '\U000000F0',
+	"euml;":                     '\U000000EB',
+	"euro;":                     '\U000020AC',
+	"excl;":                     '\U00000021',
+	"exist;":                    '\U00002203',
+	"expectation;":              '\U00002130',
+	"exponentiale;":             '\U00002147',
+	"fallingdotseq;":            '\U00002252',
+	"fcy;":                      '\U00000444',
+	"female;":                   '\U00002640',
+	"ffilig;":                   '\U0000FB03',
+	"fflig;":                    '\U0000FB00',
+	"ffllig;":                   '\U0000FB04',
+	"ffr;":                      '\U0001D523',
+	"filig;":                    '\U0000FB01',
+	"flat;":                     '\U0000266D',
+	"fllig;":                    '\U0000FB02',
+	"fltns;":                    '\U000025B1',
+	"fnof;":                     '\U00000192',
+	"fopf;":                     '\U0001D557',
+	"forall;":                   '\U00002200',
+	"fork;":                     '\U000022D4',
+	"forkv;":                    '\U00002AD9',
+	"fpartint;":                 '\U00002A0D',
+	"frac12;":                   '\U000000BD',
+	"frac13;":                   '\U00002153',
+	"frac14;":                   '\U000000BC',
+	"frac15;":                   '\U00002155',
+	"frac16;":                   '\U00002159',
+	"frac18;":                   '\U0000215B',
+	"frac23;":                   '\U00002154',
+	"frac25;":                   '\U00002156',
+	"frac34;":                   '\U000000BE',
+	"frac35;":                   '\U00002157',
+	"frac38;":                   '\U0000215C',
+	"frac45;":                   '\U00002158',
+	"frac56;":                   '\U0000215A',
+	"frac58;":                   '\U0000215D',
+	"frac78;":                   '\U0000215E',
+	"frasl;":                    '\U00002044',
+	"frown;":                    '\U00002322',
+	"fscr;":                     '\U0001D4BB',
+	"gE;":                       '\U00002267',
+	"gEl;":                      '\U00002A8C',
+	"gacute;":                   '\U000001F5',
+	"gamma;":                    '\U000003B3',
+	"gammad;":                   '\U000003DD',
+	"gap;":                      '\U00002A86',
+	"gbreve;":                   '\U0000011F',
+	"gcirc;":                    '\U0000011D',
+	"gcy;":                      '\U00000433',
+	"gdot;":                     '\U00000121',
+	"ge;":                       '\U00002265',
+	"gel;":                      '\U000022DB',
+	"geq;":                      '\U00002265',
+	"geqq;":                     '\U00002267',
+	"geqslant;":                 '\U00002A7E',
+	"ges;":                      '\U00002A7E',
+	"gescc;":                    '\U00002AA9',
+	"gesdot;":                   '\U00002A80',
+	"gesdoto;":                  '\U00002A82',
+	"gesdotol;":                 '\U00002A84',
+	"gesles;":                   '\U00002A94',
+	"gfr;":                      '\U0001D524',
+	"gg;":                       '\U0000226B',
+	"ggg;":                      '\U000022D9',
+	"gimel;":                    '\U00002137',
+	"gjcy;":                     '\U00000453',
+	"gl;":                       '\U00002277',
+	"glE;":                      '\U00002A92',
+	"gla;":                      '\U00002AA5',
+	"glj;":                      '\U00002AA4',
+	"gnE;":                      '\U00002269',
+	"gnap;":                     '\U00002A8A',
+	"gnapprox;":                 '\U00002A8A',
+	"gne;":                      '\U00002A88',
+	"gneq;":                     '\U00002A88',
+	"gneqq;":                    '\U00002269',
+	"gnsim;":                    '\U000022E7',
+	"gopf;":                     '\U0001D558',
+	"grave;":                    '\U00000060',
+	"gscr;":                     '\U0000210A',
+	"gsim;":                     '\U00002273',
+	"gsime;":                    '\U00002A8E',
+	"gsiml;":                    '\U00002A90',
+	"gt;":                       '\U0000003E',
+	"gtcc;":                     '\U00002AA7',
+	"gtcir;":                    '\U00002A7A',
+	"gtdot;":                    '\U000022D7',
+	"gtlPar;":                   '\U00002995',
+	"gtquest;":                  '\U00002A7C',
+	"gtrapprox;":                '\U00002A86',
+	"gtrarr;":                   '\U00002978',
+	"gtrdot;":                   '\U000022D7',
+	"gtreqless;":                '\U000022DB',
+	"gtreqqless;":               '\U00002A8C',
+	"gtrless;":                  '\U00002277',
+	"gtrsim;":                   '\U00002273',
+	"hArr;":                     '\U000021D4',
+	"hairsp;":                   '\U0000200A',
+	"half;":                     '\U000000BD',
+	"hamilt;":                   '\U0000210B',
+	"hardcy;":                   '\U0000044A',
+	"harr;":                     '\U00002194',
+	"harrcir;":                  '\U00002948',
+	"harrw;":                    '\U000021AD',
+	"hbar;":                     '\U0000210F',
+	"hcirc;":                    '\U00000125',
+	"hearts;":                   '\U00002665',
+	"heartsuit;":                '\U00002665',
+	"hellip;":                   '\U00002026',
+	"hercon;":                   '\U000022B9',
+	"hfr;":                      '\U0001D525',
+	"hksearow;":                 '\U00002925',
+	"hkswarow;":                 '\U00002926',
+	"hoarr;":                    '\U000021FF',
+	"homtht;":                   '\U0000223B',
+	"hookleftarrow;":            '\U000021A9',
+	"hookrightarrow;":           '\U000021AA',
+	"hopf;":                     '\U0001D559',
+	"horbar;":                   '\U00002015',
+	"hscr;":                     '\U0001D4BD',
+	"hslash;":                   '\U0000210F',
+	"hstrok;":                   '\U00000127',
+	"hybull;":                   '\U00002043',
+	"hyphen;":                   '\U00002010',
+	"iacute;":                   '\U000000ED',
+	"ic;":                       '\U00002063',
+	"icirc;":                    '\U000000EE',
+	"icy;":                      '\U00000438',
+	"iecy;":                     '\U00000435',
+	"iexcl;":                    '\U000000A1',
+	"iff;":                      '\U000021D4',
+	"ifr;":                      '\U0001D526',
+	"igrave;":                   '\U000000EC',
+	"ii;":                       '\U00002148',
+	"iiiint;":                   '\U00002A0C',
+	"iiint;":                    '\U0000222D',
+	"iinfin;":                   '\U000029DC',
+	"iiota;":                    '\U00002129',
+	"ijlig;":                    '\U00000133',
+	"imacr;":                    '\U0000012B',
+	"image;":                    '\U00002111',
+	"imagline;":                 '\U00002110',
+	"imagpart;":                 '\U00002111',
+	"imath;":                    '\U00000131',
+	"imof;":                     '\U000022B7',
+	"imped;":                    '\U000001B5',
+	"in;":                       '\U00002208',
+	"incare;":                   '\U00002105',
+	"infin;":                    '\U0000221E',
+	"infintie;":                 '\U000029DD',
+	"inodot;":                   '\U00000131',
+	"int;":                      '\U0000222B',
+	"intcal;":                   '\U000022BA',
+	"integers;":                 '\U00002124',
+	"intercal;":                 '\U000022BA',
+	"intlarhk;":                 '\U00002A17',
+	"intprod;":                  '\U00002A3C',
+	"iocy;":                     '\U00000451',
+	"iogon;":                    '\U0000012F',
+	"iopf;":                     '\U0001D55A',
+	"iota;":                     '\U000003B9',
+	"iprod;":                    '\U00002A3C',
+	"iquest;":                   '\U000000BF',
+	"iscr;":                     '\U0001D4BE',
+	"isin;":                     '\U00002208',
+	"isinE;":                    '\U000022F9',
+	"isindot;":                  '\U000022F5',
+	"isins;":                    '\U000022F4',
+	"isinsv;":                   '\U000022F3',
+	"isinv;":                    '\U00002208',
+	"it;":                       '\U00002062',
+	"itilde;":                   '\U00000129',
+	"iukcy;":                    '\U00000456',
+	"iuml;":                     '\U000000EF',
+	"jcirc;":                    '\U00000135',
+	"jcy;":                      '\U00000439',
+	"jfr;":                      '\U0001D527',
+	"jmath;":                    '\U00000237',
+	"jopf;":                     '\U0001D55B',
+	"jscr;":                     '\U0001D4BF',
+	"jsercy;":                   '\U00000458',
+	"jukcy;":                    '\U00000454',
+	"kappa;":                    '\U000003BA',
+	"kappav;":                   '\U000003F0',
+	"kcedil;":                   '\U00000137',
+	"kcy;":                      '\U0000043A',
+	"kfr;":                      '\U0001D528',
+	"kgreen;":                   '\U00000138',
+	"khcy;":                     '\U00000445',
+	"kjcy;":                     '\U0000045C',
+	"kopf;":                     '\U0001D55C',
+	"kscr;":                     '\U0001D4C0',
+	"lAarr;":                    '\U000021DA',
+	"lArr;":                     '\U000021D0',
+	"lAtail;":                   '\U0000291B',
+	"lBarr;":                    '\U0000290E',
+	"lE;":                       '\U00002266',
+	"lEg;":                      '\U00002A8B',
+	"lHar;":                     '\U00002962',
+	"lacute;":                   '\U0000013A',
+	"laemptyv;":                 '\U000029B4',
+	"lagran;":                   '\U00002112',
+	"lambda;":                   '\U000003BB',
+	"lang;":                     '\U000027E8',
+	"langd;":                    '\U00002991',
+	"langle;":                   '\U000027E8',
+	"lap;":                      '\U00002A85',
+	"laquo;":                    '\U000000AB',
+	"larr;":                     '\U00002190',
+	"larrb;":                    '\U000021E4',
+	"larrbfs;":                  '\U0000291F',
+	"larrfs;":                   '\U0000291D',
+	"larrhk;":                   '\U000021A9',
+	"larrlp;":                   '\U000021AB',
+	"larrpl;":                   '\U00002939',
+	"larrsim;":                  '\U00002973',
+	"larrtl;":                   '\U000021A2',
+	"lat;":                      '\U00002AAB',
+	"latail;":                   '\U00002919',
+	"late;":                     '\U00002AAD',
+	"lbarr;":                    '\U0000290C',
+	"lbbrk;":                    '\U00002772',
+	"lbrace;":                   '\U0000007B',
+	"lbrack;":                   '\U0000005B',
+	"lbrke;":                    '\U0000298B',
+	"lbrksld;":                  '\U0000298F',
+	"lbrkslu;":                  '\U0000298D',
+	"lcaron;":                   '\U0000013E',
+	"lcedil;":                   '\U0000013C',
+	"lceil;":                    '\U00002308',
+	"lcub;":                     '\U0000007B',
+	"lcy;":                      '\U0000043B',
+	"ldca;":                     '\U00002936',
+	"ldquo;":                    '\U0000201C',
+	"ldquor;":                   '\U0000201E',
+	"ldrdhar;":                  '\U00002967',
+	"ldrushar;":                 '\U0000294B',
+	"ldsh;":                     '\U000021B2',
+	"le;":                       '\U00002264',
+	"leftarrow;":                '\U00002190',
+	"leftarrowtail;":            '\U000021A2',
+	"leftharpoondown;":          '\U000021BD',
+	"leftharpoonup;":            '\U000021BC',
+	"leftleftarrows;":           '\U000021C7',
+	"leftrightarrow;":           '\U00002194',
+	"leftrightarrows;":          '\U000021C6',
+	"leftrightharpoons;":        '\U000021CB',
+	"leftrightsquigarrow;":      '\U000021AD',
+	"leftthreetimes;":           '\U000022CB',
+	"leg;":                      '\U000022DA',
+	"leq;":                      '\U00002264',
+	"leqq;":                     '\U00002266',
+	"leqslant;":                 '\U00002A7D',
+	"les;":                      '\U00002A7D',
+	"lescc;":                    '\U00002AA8',
+	"lesdot;":                   '\U00002A7F',
+	"lesdoto;":                  '\U00002A81',
+	"lesdotor;":                 '\U00002A83',
+	"lesges;":                   '\U00002A93',
+	"lessapprox;":               '\U00002A85',
+	"lessdot;":                  '\U000022D6',
+	"lesseqgtr;":                '\U000022DA',
+	"lesseqqgtr;":               '\U00002A8B',
+	"lessgtr;":                  '\U00002276',
+	"lesssim;":                  '\U00002272',
+	"lfisht;":                   '\U0000297C',
+	"lfloor;":                   '\U0000230A',
+	"lfr;":                      '\U0001D529',
+	"lg;":                       '\U00002276',
+	"lgE;":                      '\U00002A91',
+	"lhard;":                    '\U000021BD',
+	"lharu;":                    '\U000021BC',
+	"lharul;":                   '\U0000296A',
+	"lhblk;":                    '\U00002584',
+	"ljcy;":                     '\U00000459',
+	"ll;":                       '\U0000226A',
+	"llarr;":                    '\U000021C7',
+	"llcorner;":                 '\U0000231E',
+	"llhard;":                   '\U0000296B',
+	"lltri;":                    '\U000025FA',
+	"lmidot;":                   '\U00000140',
+	"lmoust;":                   '\U000023B0',
+	"lmoustache;":               '\U000023B0',
+	"lnE;":                      '\U00002268',
+	"lnap;":                     '\U00002A89',
+	"lnapprox;":                 '\U00002A89',
+	"lne;":                      '\U00002A87',
+	"lneq;":                     '\U00002A87',
+	"lneqq;":                    '\U00002268',
+	"lnsim;":                    '\U000022E6',
+	"loang;":                    '\U000027EC',
+	"loarr;":                    '\U000021FD',
+	"lobrk;":                    '\U000027E6',
+	"longleftarrow;":            '\U000027F5',
+	"longleftrightarrow;":       '\U000027F7',
+	"longmapsto;":               '\U000027FC',
+	"longrightarrow;":           '\U000027F6',
+	"looparrowleft;":            '\U000021AB',
+	"looparrowright;":           '\U000021AC',
+	"lopar;":                    '\U00002985',
+	"lopf;":                     '\U0001D55D',
+	"loplus;":                   '\U00002A2D',
+	"lotimes;":                  '\U00002A34',
+	"lowast;":                   '\U00002217',
+	"lowbar;":                   '\U0000005F',
+	"loz;":                      '\U000025CA',
+	"lozenge;":                  '\U000025CA',
+	"lozf;":                     '\U000029EB',
+	"lpar;":                     '\U00000028',
+	"lparlt;":                   '\U00002993',
+	"lrarr;":                    '\U000021C6',
+	"lrcorner;":                 '\U0000231F',
+	"lrhar;":                    '\U000021CB',
+	"lrhard;":                   '\U0000296D',
+	"lrm;":                      '\U0000200E',
+	"lrtri;":                    '\U000022BF',
+	"lsaquo;":                   '\U00002039',
+	"lscr;":                     '\U0001D4C1',
+	"lsh;":                      '\U000021B0',
+	"lsim;":                     '\U00002272',
+	"lsime;":                    '\U00002A8D',
+	"lsimg;":                    '\U00002A8F',
+	"lsqb;":                     '\U0000005B',
+	"lsquo;":                    '\U00002018',
+	"lsquor;":                   '\U0000201A',
+	"lstrok;":                   '\U00000142',
+	"lt;":                       '\U0000003C',
+	"ltcc;":                     '\U00002AA6',
+	"ltcir;":                    '\U00002A79',
+	"ltdot;":                    '\U000022D6',
+	"lthree;":                   '\U000022CB',
+	"ltimes;":                   '\U000022C9',
+	"ltlarr;":                   '\U00002976',
+	"ltquest;":                  '\U00002A7B',
+	"ltrPar;":                   '\U00002996',
+	"ltri;":                     '\U000025C3',
+	"ltrie;":                    '\U000022B4',
+	"ltrif;":                    '\U000025C2',
+	"lurdshar;":                 '\U0000294A',
+	"luruhar;":                  '\U00002966',
+	"mDDot;":                    '\U0000223A',
+	"macr;":                     '\U000000AF',
+	"male;":                     '\U00002642',
+	"malt;":                     '\U00002720',
+	"maltese;":                  '\U00002720',
+	"map;":                      '\U000021A6',
+	"mapsto;":                   '\U000021A6',
+	"mapstodown;":               '\U000021A7',
+	"mapstoleft;":               '\U000021A4',
+	"mapstoup;":                 '\U000021A5',
+	"marker;":                   '\U000025AE',
+	"mcomma;":                   '\U00002A29',
+	"mcy;":                      '\U0000043C',
+	"mdash;":                    '\U00002014',
+	"measuredangle;":            '\U00002221',
+	"mfr;":                      '\U0001D52A',
+	"mho;":                      '\U00002127',
+	"micro;":                    '\U000000B5',
+	"mid;":                      '\U00002223',
+	"midast;":                   '\U0000002A',
+	"midcir;":                   '\U00002AF0',
+	"middot;":                   '\U000000B7',
+	"minus;":                    '\U00002212',
+	"minusb;":                   '\U0000229F',
+	"minusd;":                   '\U00002238',
+	"minusdu;":                  '\U00002A2A',
+	"mlcp;":                     '\U00002ADB',
+	"mldr;":                     '\U00002026',
+	"mnplus;":                   '\U00002213',
+	"models;":                   '\U000022A7',
+	"mopf;":                     '\U0001D55E',
+	"mp;":                       '\U00002213',
+	"mscr;":                     '\U0001D4C2',
+	"mstpos;":                   '\U0000223E',
+	"mu;":                       '\U000003BC',
+	"multimap;":                 '\U000022B8',
+	"mumap;":                    '\U000022B8',
+	"nLeftarrow;":               '\U000021CD',
+	"nLeftrightarrow;":          '\U000021CE',
+	"nRightarrow;":              '\U000021CF',
+	"nVDash;":                   '\U000022AF',
+	"nVdash;":                   '\U000022AE',
+	"nabla;":                    '\U00002207',
+	"nacute;":                   '\U00000144',
+	"nap;":                      '\U00002249',
+	"napos;":                    '\U00000149',
+	"napprox;":                  '\U00002249',
+	"natur;":                    '\U0000266E',
+	"natural;":                  '\U0000266E',
+	"naturals;":                 '\U00002115',
+	"nbsp;":                     '\U000000A0',
+	"ncap;":                     '\U00002A43',
+	"ncaron;":                   '\U00000148',
+	"ncedil;":                   '\U00000146',
+	"ncong;":                    '\U00002247',
+	"ncup;":                     '\U00002A42',
+	"ncy;":                      '\U0000043D',
+	"ndash;":                    '\U00002013',
+	"ne;":                       '\U00002260',
+	"neArr;":                    '\U000021D7',
+	"nearhk;":                   '\U00002924',
+	"nearr;":                    '\U00002197',
+	"nearrow;":                  '\U00002197',
+	"nequiv;":                   '\U00002262',
+	"nesear;":                   '\U00002928',
+	"nexist;":                   '\U00002204',
+	"nexists;":                  '\U00002204',
+	"nfr;":                      '\U0001D52B',
+	"nge;":                      '\U00002271',
+	"ngeq;":                     '\U00002271',
+	"ngsim;":                    '\U00002275',
+	"ngt;":                      '\U0000226F',
+	"ngtr;":                     '\U0000226F',
+	"nhArr;":                    '\U000021CE',
+	"nharr;":                    '\U000021AE',
+	"nhpar;":                    '\U00002AF2',
+	"ni;":                       '\U0000220B',
+	"nis;":                      '\U000022FC',
+	"nisd;":                     '\U000022FA',
+	"niv;":                      '\U0000220B',
+	"njcy;":                     '\U0000045A',
+	"nlArr;":                    '\U000021CD',
+	"nlarr;":                    '\U0000219A',
+	"nldr;":                     '\U00002025',
+	"nle;":                      '\U00002270',
+	"nleftarrow;":               '\U0000219A',
+	"nleftrightarrow;":          '\U000021AE',
+	"nleq;":                     '\U00002270',
+	"nless;":                    '\U0000226E',
+	"nlsim;":                    '\U00002274',
+	"nlt;":                      '\U0000226E',
+	"nltri;":                    '\U000022EA',
+	"nltrie;":                   '\U000022EC',
+	"nmid;":                     '\U00002224',
+	"nopf;":                     '\U0001D55F',
+	"not;":                      '\U000000AC',
+	"notin;":                    '\U00002209',
+	"notinva;":                  '\U00002209',
+	"notinvb;":                  '\U000022F7',
+	"notinvc;":                  '\U000022F6',
+	"notni;":                    '\U0000220C',
+	"notniva;":                  '\U0000220C',
+	"notnivb;":                  '\U000022FE',
+	"notnivc;":                  '\U000022FD',
+	"npar;":                     '\U00002226',
+	"nparallel;":                '\U00002226',
+	"npolint;":                  '\U00002A14',
+	"npr;":                      '\U00002280',
+	"nprcue;":                   '\U000022E0',
+	"nprec;":                    '\U00002280',
+	"nrArr;":                    '\U000021CF',
+	"nrarr;":                    '\U0000219B',
+	"nrightarrow;":              '\U0000219B',
+	"nrtri;":                    '\U000022EB',
+	"nrtrie;":                   '\U000022ED',
+	"nsc;":                      '\U00002281',
+	"nsccue;":                   '\U000022E1',
+	"nscr;":                     '\U0001D4C3',
+	"nshortmid;":                '\U00002224',
+	"nshortparallel;":           '\U00002226',
+	"nsim;":                     '\U00002241',
+	"nsime;":                    '\U00002244',
+	"nsimeq;":                   '\U00002244',
+	"nsmid;":                    '\U00002224',
+	"nspar;":                    '\U00002226',
+	"nsqsube;":                  '\U000022E2',
+	"nsqsupe;":                  '\U000022E3',
+	"nsub;":                     '\U00002284',
+	"nsube;":                    '\U00002288',
+	"nsubseteq;":                '\U00002288',
+	"nsucc;":                    '\U00002281',
+	"nsup;":                     '\U00002285',
+	"nsupe;":                    '\U00002289',
+	"nsupseteq;":                '\U00002289',
+	"ntgl;":                     '\U00002279',
+	"ntilde;":                   '\U000000F1',
+	"ntlg;":                     '\U00002278',
+	"ntriangleleft;":            '\U000022EA',
+	"ntrianglelefteq;":          '\U000022EC',
+	"ntriangleright;":           '\U000022EB',
+	"ntrianglerighteq;":         '\U000022ED',
+	"nu;":                       '\U000003BD',
+	"num;":                      '\U00000023',
+	"numero;":                   '\U00002116',
+	"numsp;":                    '\U00002007',
+	"nvDash;":                   '\U000022AD',
+	"nvHarr;":                   '\U00002904',
+	"nvdash;":                   '\U000022AC',
+	"nvinfin;":                  '\U000029DE',
+	"nvlArr;":                   '\U00002902',
+	"nvrArr;":                   '\U00002903',
+	"nwArr;":                    '\U000021D6',
+	"nwarhk;":                   '\U00002923',
+	"nwarr;":                    '\U00002196',
+	"nwarrow;":                  '\U00002196',
+	"nwnear;":                   '\U00002927',
+	"oS;":                       '\U000024C8',
+	"oacute;":                   '\U000000F3',
+	"oast;":                     '\U0000229B',
+	"ocir;":                     '\U0000229A',
+	"ocirc;":                    '\U000000F4',
+	"ocy;":                      '\U0000043E',
+	"odash;":                    '\U0000229D',
+	"odblac;":                   '\U00000151',
+	"odiv;":                     '\U00002A38',
+	"odot;":                     '\U00002299',
+	"odsold;":                   '\U000029BC',
+	"oelig;":                    '\U00000153',
+	"ofcir;":                    '\U000029BF',
+	"ofr;":                      '\U0001D52C',
+	"ogon;":                     '\U000002DB',
+	"ograve;":                   '\U000000F2',
+	"ogt;":                      '\U000029C1',
+	"ohbar;":                    '\U000029B5',
+	"ohm;":                      '\U000003A9',
+	"oint;":                     '\U0000222E',
+	"olarr;":                    '\U000021BA',
+	"olcir;":                    '\U000029BE',
+	"olcross;":                  '\U000029BB',
+	"oline;":                    '\U0000203E',
+	"olt;":                      '\U000029C0',
+	"omacr;":                    '\U0000014D',
+	"omega;":                    '\U000003C9',
+	"omicron;":                  '\U000003BF',
+	"omid;":                     '\U000029B6',
+	"ominus;":                   '\U00002296',
+	"oopf;":                     '\U0001D560',
+	"opar;":                     '\U000029B7',
+	"operp;":                    '\U000029B9',
+	"oplus;":                    '\U00002295',
+	"or;":                       '\U00002228',
+	"orarr;":                    '\U000021BB',
+	"ord;":                      '\U00002A5D',
+	"order;":                    '\U00002134',
+	"orderof;":                  '\U00002134',
+	"ordf;":                     '\U000000AA',
+	"ordm;":                     '\U000000BA',
+	"origof;":                   '\U000022B6',
+	"oror;":                     '\U00002A56',
+	"orslope;":                  '\U00002A57',
+	"orv;":                      '\U00002A5B',
+	"oscr;":                     '\U00002134',
+	"oslash;":                   '\U000000F8',
+	"osol;":                     '\U00002298',
+	"otilde;":                   '\U000000F5',
+	"otimes;":                   '\U00002297',
+	"otimesas;":                 '\U00002A36',
+	"ouml;":                     '\U000000F6',
+	"ovbar;":                    '\U0000233D',
+	"par;":                      '\U00002225',
+	"para;":                     '\U000000B6',
+	"parallel;":                 '\U00002225',
+	"parsim;":                   '\U00002AF3',
+	"parsl;":                    '\U00002AFD',
+	"part;":                     '\U00002202',
+	"pcy;":                      '\U0000043F',
+	"percnt;":                   '\U00000025',
+	"period;":                   '\U0000002E',
+	"permil;":                   '\U00002030',
+	"perp;":                     '\U000022A5',
+	"pertenk;":                  '\U00002031',
+	"pfr;":                      '\U0001D52D',
+	"phi;":                      '\U000003C6',
+	"phiv;":                     '\U000003D5',
+	"phmmat;":                   '\U00002133',
+	"phone;":                    '\U0000260E',
+	"pi;":                       '\U000003C0',
+	"pitchfork;":                '\U000022D4',
+	"piv;":                      '\U000003D6',
+	"planck;":                   '\U0000210F',
+	"planckh;":                  '\U0000210E',
+	"plankv;":                   '\U0000210F',
+	"plus;":                     '\U0000002B',
+	"plusacir;":                 '\U00002A23',
+	"plusb;":                    '\U0000229E',
+	"pluscir;":                  '\U00002A22',
+	"plusdo;":                   '\U00002214',
+	"plusdu;":                   '\U00002A25',
+	"pluse;":                    '\U00002A72',
+	"plusmn;":                   '\U000000B1',
+	"plussim;":                  '\U00002A26',
+	"plustwo;":                  '\U00002A27',
+	"pm;":                       '\U000000B1',
+	"pointint;":                 '\U00002A15',
+	"popf;":                     '\U0001D561',
+	"pound;":                    '\U000000A3',
+	"pr;":                       '\U0000227A',
+	"prE;":                      '\U00002AB3',
+	"prap;":                     '\U00002AB7',
+	"prcue;":                    '\U0000227C',
+	"pre;":                      '\U00002AAF',
+	"prec;":                     '\U0000227A',
+	"precapprox;":               '\U00002AB7',
+	"preccurlyeq;":              '\U0000227C',
+	"preceq;":                   '\U00002AAF',
+	"precnapprox;":              '\U00002AB9',
+	"precneqq;":                 '\U00002AB5',
+	"precnsim;":                 '\U000022E8',
+	"precsim;":                  '\U0000227E',
+	"prime;":                    '\U00002032',
+	"primes;":                   '\U00002119',
+	"prnE;":                     '\U00002AB5',
+	"prnap;":                    '\U00002AB9',
+	"prnsim;":                   '\U000022E8',
+	"prod;":                     '\U0000220F',
+	"profalar;":                 '\U0000232E',
+	"profline;":                 '\U00002312',
+	"profsurf;":                 '\U00002313',
+	"prop;":                     '\U0000221D',
+	"propto;":                   '\U0000221D',
+	"prsim;":                    '\U0000227E',
+	"prurel;":                   '\U000022B0',
+	"pscr;":                     '\U0001D4C5',
+	"psi;":                      '\U000003C8',
+	"puncsp;":                   '\U00002008',
+	"qfr;":                      '\U0001D52E',
+	"qint;":                     '\U00002A0C',
+	"qopf;":                     '\U0001D562',
+	"qprime;":                   '\U00002057',
+	"qscr;":                     '\U0001D4C6',
+	"quaternions;":              '\U0000210D',
+	"quatint;":                  '\U00002A16',
+	"quest;":                    '\U0000003F',
+	"questeq;":                  '\U0000225F',
+	"quot;":                     '\U00000022',
+	"rAarr;":                    '\U000021DB',
+	"rArr;":                     '\U000021D2',
+	"rAtail;":                   '\U0000291C',
+	"rBarr;":                    '\U0000290F',
+	"rHar;":                     '\U00002964',
+	"racute;":                   '\U00000155',
+	"radic;":                    '\U0000221A',
+	"raemptyv;":                 '\U000029B3',
+	"rang;":                     '\U000027E9',
+	"rangd;":                    '\U00002992',
+	"range;":                    '\U000029A5',
+	"rangle;":                   '\U000027E9',
+	"raquo;":                    '\U000000BB',
+	"rarr;":                     '\U00002192',
+	"rarrap;":                   '\U00002975',
+	"rarrb;":                    '\U000021E5',
+	"rarrbfs;":                  '\U00002920',
+	"rarrc;":                    '\U00002933',
+	"rarrfs;":                   '\U0000291E',
+	"rarrhk;":                   '\U000021AA',
+	"rarrlp;":                   '\U000021AC',
+	"rarrpl;":                   '\U00002945',
+	"rarrsim;":                  '\U00002974',
+	"rarrtl;":                   '\U000021A3',
+	"rarrw;":                    '\U0000219D',
+	"ratail;":                   '\U0000291A',
+	"ratio;":                    '\U00002236',
+	"rationals;":                '\U0000211A',
+	"rbarr;":                    '\U0000290D',
+	"rbbrk;":                    '\U00002773',
+	"rbrace;":                   '\U0000007D',
+	"rbrack;":                   '\U0000005D',
+	"rbrke;":                    '\U0000298C',
+	"rbrksld;":                  '\U0000298E',
+	"rbrkslu;":                  '\U00002990',
+	"rcaron;":                   '\U00000159',
+	"rcedil;":                   '\U00000157',
+	"rceil;":                    '\U00002309',
+	"rcub;":                     '\U0000007D',
+	"rcy;":                      '\U00000440',
+	"rdca;":                     '\U00002937',
+	"rdldhar;":                  '\U00002969',
+	"rdquo;":                    '\U0000201D',
+	"rdquor;":                   '\U0000201D',
+	"rdsh;":                     '\U000021B3',
+	"real;":                     '\U0000211C',
+	"realine;":                  '\U0000211B',
+	"realpart;":                 '\U0000211C',
+	"reals;":                    '\U0000211D',
+	"rect;":                     '\U000025AD',
+	"reg;":                      '\U000000AE',
+	"rfisht;":                   '\U0000297D',
+	"rfloor;":                   '\U0000230B',
+	"rfr;":                      '\U0001D52F',
+	"rhard;":                    '\U000021C1',
+	"rharu;":                    '\U000021C0',
+	"rharul;":                   '\U0000296C',
+	"rho;":                      '\U000003C1',
+	"rhov;":                     '\U000003F1',
+	"rightarrow;":               '\U00002192',
+	"rightarrowtail;":           '\U000021A3',
+	"rightharpoondown;":         '\U000021C1',
+	"rightharpoonup;":           '\U000021C0',
+	"rightleftarrows;":          '\U000021C4',
+	"rightleftharpoons;":        '\U000021CC',
+	"rightrightarrows;":         '\U000021C9',
+	"rightsquigarrow;":          '\U0000219D',
+	"rightthreetimes;":          '\U000022CC',
+	"ring;":                     '\U000002DA',
+	"risingdotseq;":             '\U00002253',
+	"rlarr;":                    '\U000021C4',
+	"rlhar;":                    '\U000021CC',
+	"rlm;":                      '\U0000200F',
+	"rmoust;":                   '\U000023B1',
+	"rmoustache;":               '\U000023B1',
+	"rnmid;":                    '\U00002AEE',
+	"roang;":                    '\U000027ED',
+	"roarr;":                    '\U000021FE',
+	"robrk;":                    '\U000027E7',
+	"ropar;":                    '\U00002986',
+	"ropf;":                     '\U0001D563',
+	"roplus;":                   '\U00002A2E',
+	"rotimes;":                  '\U00002A35',
+	"rpar;":                     '\U00000029',
+	"rpargt;":                   '\U00002994',
+	"rppolint;":                 '\U00002A12',
+	"rrarr;":                    '\U000021C9',
+	"rsaquo;":                   '\U0000203A',
+	"rscr;":                     '\U0001D4C7',
+	"rsh;":                      '\U000021B1',
+	"rsqb;":                     '\U0000005D',
+	"rsquo;":                    '\U00002019',
+	"rsquor;":                   '\U00002019',
+	"rthree;":                   '\U000022CC',
+	"rtimes;":                   '\U000022CA',
+	"rtri;":                     '\U000025B9',
+	"rtrie;":                    '\U000022B5',
+	"rtrif;":                    '\U000025B8',
+	"rtriltri;":                 '\U000029CE',
+	"ruluhar;":                  '\U00002968',
+	"rx;":                       '\U0000211E',
+	"sacute;":                   '\U0000015B',
+	"sbquo;":                    '\U0000201A',
+	"sc;":                       '\U0000227B',
+	"scE;":                      '\U00002AB4',
+	"scap;":                     '\U00002AB8',
+	"scaron;":                   '\U00000161',
+	"sccue;":                    '\U0000227D',
+	"sce;":                      '\U00002AB0',
+	"scedil;":                   '\U0000015F',
+	"scirc;":                    '\U0000015D',
+	"scnE;":                     '\U00002AB6',
+	"scnap;":                    '\U00002ABA',
+	"scnsim;":                   '\U000022E9',
+	"scpolint;":                 '\U00002A13',
+	"scsim;":                    '\U0000227F',
+	"scy;":                      '\U00000441',
+	"sdot;":                     '\U000022C5',
+	"sdotb;":                    '\U000022A1',
+	"sdote;":                    '\U00002A66',
+	"seArr;":                    '\U000021D8',
+	"searhk;":                   '\U00002925',
+	"searr;":                    '\U00002198',
+	"searrow;":                  '\U00002198',
+	"sect;":                     '\U000000A7',
+	"semi;":                     '\U0000003B',
+	"seswar;":                   '\U00002929',
+	"setminus;":                 '\U00002216',
+	"setmn;":                    '\U00002216',
+	"sext;":                     '\U00002736',
+	"sfr;":                      '\U0001D530',
+	"sfrown;":                   '\U00002322',
+	"sharp;":                    '\U0000266F',
+	"shchcy;":                   '\U00000449',
+	"shcy;":                     '\U00000448',
+	"shortmid;":                 '\U00002223',
+	"shortparallel;":            '\U00002225',
+	"shy;":                      '\U000000AD',
+	"sigma;":                    '\U000003C3',
+	"sigmaf;":                   '\U000003C2',
+	"sigmav;":                   '\U000003C2',
+	"sim;":                      '\U0000223C',
+	"simdot;":                   '\U00002A6A',
+	"sime;":                     '\U00002243',
+	"simeq;":                    '\U00002243',
+	"simg;":                     '\U00002A9E',
+	"simgE;":                    '\U00002AA0',
+	"siml;":                     '\U00002A9D',
+	"simlE;":                    '\U00002A9F',
+	"simne;":                    '\U00002246',
+	"simplus;":                  '\U00002A24',
+	"simrarr;":                  '\U00002972',
+	"slarr;":                    '\U00002190',
+	"smallsetminus;":            '\U00002216',
+	"smashp;":                   '\U00002A33',
+	"smeparsl;":                 '\U000029E4',
+	"smid;":                     '\U00002223',
+	"smile;":                    '\U00002323',
+	"smt;":                      '\U00002AAA',
+	"smte;":                     '\U00002AAC',
+	"softcy;":                   '\U0000044C',
+	"sol;":                      '\U0000002F',
+	"solb;":                     '\U000029C4',
+	"solbar;":                   '\U0000233F',
+	"sopf;":                     '\U0001D564',
+	"spades;":                   '\U00002660',
+	"spadesuit;":                '\U00002660',
+	"spar;":                     '\U00002225',
+	"sqcap;":                    '\U00002293',
+	"sqcup;":                    '\U00002294',
+	"sqsub;":                    '\U0000228F',
+	"sqsube;":                   '\U00002291',
+	"sqsubset;":                 '\U0000228F',
+	"sqsubseteq;":               '\U00002291',
+	"sqsup;":                    '\U00002290',
+	"sqsupe;":                   '\U00002292',
+	"sqsupset;":                 '\U00002290',
+	"sqsupseteq;":               '\U00002292',
+	"squ;":                      '\U000025A1',
+	"square;":                   '\U000025A1',
+	"squarf;":                   '\U000025AA',
+	"squf;":                     '\U000025AA',
+	"srarr;":                    '\U00002192',
+	"sscr;":                     '\U0001D4C8',
+	"ssetmn;":                   '\U00002216',
+	"ssmile;":                   '\U00002323',
+	"sstarf;":                   '\U000022C6',
+	"star;":                     '\U00002606',
+	"starf;":                    '\U00002605',
+	"straightepsilon;":          '\U000003F5',
+	"straightphi;":              '\U000003D5',
+	"strns;":                    '\U000000AF',
+	"sub;":                      '\U00002282',
+	"subE;":                     '\U00002AC5',
+	"subdot;":                   '\U00002ABD',
+	"sube;":                     '\U00002286',
+	"subedot;":                  '\U00002AC3',
+	"submult;":                  '\U00002AC1',
+	"subnE;":                    '\U00002ACB',
+	"subne;":                    '\U0000228A',
+	"subplus;":                  '\U00002ABF',
+	"subrarr;":                  '\U00002979',
+	"subset;":                   '\U00002282',
+	"subseteq;":                 '\U00002286',
+	"subseteqq;":                '\U00002AC5',
+	"subsetneq;":                '\U0000228A',
+	"subsetneqq;":               '\U00002ACB',
+	"subsim;":                   '\U00002AC7',
+	"subsub;":                   '\U00002AD5',
+	"subsup;":                   '\U00002AD3',
+	"succ;":                     '\U0000227B',
+	"succapprox;":               '\U00002AB8',
+	"succcurlyeq;":              '\U0000227D',
+	"succeq;":                   '\U00002AB0',
+	"succnapprox;":              '\U00002ABA',
+	"succneqq;":                 '\U00002AB6',
+	"succnsim;":                 '\U000022E9',
+	"succsim;":                  '\U0000227F',
+	"sum;":                      '\U00002211',
+	"sung;":                     '\U0000266A',
+	"sup;":                      '\U00002283',
+	"sup1;":                     '\U000000B9',
+	"sup2;":                     '\U000000B2',
+	"sup3;":                     '\U000000B3',
+	"supE;":                     '\U00002AC6',
+	"supdot;":                   '\U00002ABE',
+	"supdsub;":                  '\U00002AD8',
+	"supe;":                     '\U00002287',
+	"supedot;":                  '\U00002AC4',
+	"suphsol;":                  '\U000027C9',
+	"suphsub;":                  '\U00002AD7',
+	"suplarr;":                  '\U0000297B',
+	"supmult;":                  '\U00002AC2',
+	"supnE;":                    '\U00002ACC',
+	"supne;":                    '\U0000228B',
+	"supplus;":                  '\U00002AC0',
+	"supset;":                   '\U00002283',
+	"supseteq;":                 '\U00002287',
+	"supseteqq;":                '\U00002AC6',
+	"supsetneq;":                '\U0000228B',
+	"supsetneqq;":               '\U00002ACC',
+	"supsim;":                   '\U00002AC8',
+	"supsub;":                   '\U00002AD4',
+	"supsup;":                   '\U00002AD6',
+	"swArr;":                    '\U000021D9',
+	"swarhk;":                   '\U00002926',
+	"swarr;":                    '\U00002199',
+	"swarrow;":                  '\U00002199',
+	"swnwar;":                   '\U0000292A',
+	"szlig;":                    '\U000000DF',
+	"target;":                   '\U00002316',
+	"tau;":                      '\U000003C4',
+	"tbrk;":                     '\U000023B4',
+	"tcaron;":                   '\U00000165',
+	"tcedil;":                   '\U00000163',
+	"tcy;":                      '\U00000442',
+	"tdot;":                     '\U000020DB',
+	"telrec;":                   '\U00002315',
+	"tfr;":                      '\U0001D531',
+	"there4;":                   '\U00002234',
+	"therefore;":                '\U00002234',
+	"theta;":                    '\U000003B8',
+	"thetasym;":                 '\U000003D1',
+	"thetav;":                   '\U000003D1',
+	"thickapprox;":              '\U00002248',
+	"thicksim;":                 '\U0000223C',
+	"thinsp;":                   '\U00002009',
+	"thkap;":                    '\U00002248',
+	"thksim;":                   '\U0000223C',
+	"thorn;":                    '\U000000FE',
+	"tilde;":                    '\U000002DC',
+	"times;":                    '\U000000D7',
+	"timesb;":                   '\U000022A0',
+	"timesbar;":                 '\U00002A31',
+	"timesd;":                   '\U00002A30',
+	"tint;":                     '\U0000222D',
+	"toea;":                     '\U00002928',
+	"top;":                      '\U000022A4',
+	"topbot;":                   '\U00002336',
+	"topcir;":                   '\U00002AF1',
+	"topf;":                     '\U0001D565',
+	"topfork;":                  '\U00002ADA',
+	"tosa;":                     '\U00002929',
+	"tprime;":                   '\U00002034',
+	"trade;":                    '\U00002122',
+	"triangle;":                 '\U000025B5',
+	"triangledown;":             '\U000025BF',
+	"triangleleft;":             '\U000025C3',
+	"trianglelefteq;":           '\U000022B4',
+	"triangleq;":                '\U0000225C',
+	"triangleright;":            '\U000025B9',
+	"trianglerighteq;":          '\U000022B5',
+	"tridot;":                   '\U000025EC',
+	"trie;":                     '\U0000225C',
+	"triminus;":                 '\U00002A3A',
+	"triplus;":                  '\U00002A39',
+	"trisb;":                    '\U000029CD',
+	"tritime;":                  '\U00002A3B',
+	"trpezium;":                 '\U000023E2',
+	"tscr;":                     '\U0001D4C9',
+	"tscy;":                     '\U00000446',
+	"tshcy;":                    '\U0000045B',
+	"tstrok;":                   '\U00000167',
+	"twixt;":                    '\U0000226C',
+	"twoheadleftarrow;":         '\U0000219E',
+	"twoheadrightarrow;":        '\U000021A0',
+	"uArr;":                     '\U000021D1',
+	"uHar;":                     '\U00002963',
+	"uacute;":                   '\U000000FA',
+	"uarr;":                     '\U00002191',
+	"ubrcy;":                    '\U0000045E',
+	"ubreve;":                   '\U0000016D',
+	"ucirc;":                    '\U000000FB',
+	"ucy;":                      '\U00000443',
+	"udarr;":                    '\U000021C5',
+	"udblac;":                   '\U00000171',
+	"udhar;":                    '\U0000296E',
+	"ufisht;":                   '\U0000297E',
+	"ufr;":                      '\U0001D532',
+	"ugrave;":                   '\U000000F9',
+	"uharl;":                    '\U000021BF',
+	"uharr;":                    '\U000021BE',
+	"uhblk;":                    '\U00002580',
+	"ulcorn;":                   '\U0000231C',
+	"ulcorner;":                 '\U0000231C',
+	"ulcrop;":                   '\U0000230F',
+	"ultri;":                    '\U000025F8',
+	"umacr;":                    '\U0000016B',
+	"uml;":                      '\U000000A8',
+	"uogon;":                    '\U00000173',
+	"uopf;":                     '\U0001D566',
+	"uparrow;":                  '\U00002191',
+	"updownarrow;":              '\U00002195',
+	"upharpoonleft;":            '\U000021BF',
+	"upharpoonright;":           '\U000021BE',
+	"uplus;":                    '\U0000228E',
+	"upsi;":                     '\U000003C5',
+	"upsih;":                    '\U000003D2',
+	"upsilon;":                  '\U000003C5',
+	"upuparrows;":               '\U000021C8',
+	"urcorn;":                   '\U0000231D',
+	"urcorner;":                 '\U0000231D',
+	"urcrop;":                   '\U0000230E',
+	"uring;":                    '\U0000016F',
+	"urtri;":                    '\U000025F9',
+	"uscr;":                     '\U0001D4CA',
+	"utdot;":                    '\U000022F0',
+	"utilde;":                   '\U00000169',
+	"utri;":                     '\U000025B5',
+	"utrif;":                    '\U000025B4',
+	"uuarr;":                    '\U000021C8',
+	"uuml;":                     '\U000000FC',
+	"uwangle;":                  '\U000029A7',
+	"vArr;":                     '\U000021D5',
+	"vBar;":                     '\U00002AE8',
+	"vBarv;":                    '\U00002AE9',
+	"vDash;":                    '\U000022A8',
+	"vangrt;":                   '\U0000299C',
+	"varepsilon;":               '\U000003F5',
+	"varkappa;":                 '\U000003F0',
+	"varnothing;":               '\U00002205',
+	"varphi;":                   '\U000003D5',
+	"varpi;":                    '\U000003D6',
+	"varpropto;":                '\U0000221D',
+	"varr;":                     '\U00002195',
+	"varrho;":                   '\U000003F1',
+	"varsigma;":                 '\U000003C2',
+	"vartheta;":                 '\U000003D1',
+	"vartriangleleft;":          '\U000022B2',
+	"vartriangleright;":         '\U000022B3',
+	"vcy;":                      '\U00000432',
+	"vdash;":                    '\U000022A2',
+	"vee;":                      '\U00002228',
+	"veebar;":                   '\U000022BB',
+	"veeeq;":                    '\U0000225A',
+	"vellip;":                   '\U000022EE',
+	"verbar;":                   '\U0000007C',
+	"vert;":                     '\U0000007C',
+	"vfr;":                      '\U0001D533',
+	"vltri;":                    '\U000022B2',
+	"vopf;":                     '\U0001D567',
+	"vprop;":                    '\U0000221D',
+	"vrtri;":                    '\U000022B3',
+	"vscr;":                     '\U0001D4CB',
+	"vzigzag;":                  '\U0000299A',
+	"wcirc;":                    '\U00000175',
+	"wedbar;":                   '\U00002A5F',
+	"wedge;":                    '\U00002227',
+	"wedgeq;":                   '\U00002259',
+	"weierp;":                   '\U00002118',
+	"wfr;":                      '\U0001D534',
+	"wopf;":                     '\U0001D568',
+	"wp;":                       '\U00002118',
+	"wr;":                       '\U00002240',
+	"wreath;":                   '\U00002240',
+	"wscr;":                     '\U0001D4CC',
+	"xcap;":                     '\U000022C2',
+	"xcirc;":                    '\U000025EF',
+	"xcup;":                     '\U000022C3',
+	"xdtri;":                    '\U000025BD',
+	"xfr;":                      '\U0001D535',
+	"xhArr;":                    '\U000027FA',
+	"xharr;":                    '\U000027F7',
+	"xi;":                       '\U000003BE',
+	"xlArr;":                    '\U000027F8',
+	"xlarr;":                    '\U000027F5',
+	"xmap;":                     '\U000027FC',
+	"xnis;":                     '\U000022FB',
+	"xodot;":                    '\U00002A00',
+	"xopf;":                     '\U0001D569',
+	"xoplus;":                   '\U00002A01',
+	"xotime;":                   '\U00002A02',
+	"xrArr;":                    '\U000027F9',
+	"xrarr;":                    '\U000027F6',
+	"xscr;":                     '\U0001D4CD',
+	"xsqcup;":                   '\U00002A06',
+	"xuplus;":                   '\U00002A04',
+	"xutri;":                    '\U000025B3',
+	"xvee;":                     '\U000022C1',
+	"xwedge;":                   '\U000022C0',
+	"yacute;":                   '\U000000FD',
+	"yacy;":                     '\U0000044F',
+	"ycirc;":                    '\U00000177',
+	"ycy;":                      '\U0000044B',
+	"yen;":                      '\U000000A5',
+	"yfr;":                      '\U0001D536',
+	"yicy;":                     '\U00000457',
+	"yopf;":                     '\U0001D56A',
+	"yscr;":                     '\U0001D4CE',
+	"yucy;":                     '\U0000044E',
+	"yuml;":                     '\U000000FF',
+	"zacute;":                   '\U0000017A',
+	"zcaron;":                   '\U0000017E',
+	"zcy;":                      '\U00000437',
+	"zdot;":                     '\U0000017C',
+	"zeetrf;":                   '\U00002128',
+	"zeta;":                     '\U000003B6',
+	"zfr;":                      '\U0001D537',
+	"zhcy;":                     '\U00000436',
+	"zigrarr;":                  '\U000021DD',
+	"zopf;":                     '\U0001D56B',
+	"zscr;":                     '\U0001D4CF',
+	"zwj;":                      '\U0000200D',
+	"zwnj;":                     '\U0000200C',
+	"AElig":                     '\U000000C6',
+	"AMP":                       '\U00000026',
+	"Aacute":                    '\U000000C1',
+	"Acirc":                     '\U000000C2',
+	"Agrave":                    '\U000000C0',
+	"Aring":                     '\U000000C5',
+	"Atilde":                    '\U000000C3',
+	"Auml":                      '\U000000C4',
+	"COPY":                      '\U000000A9',
+	"Ccedil":                    '\U000000C7',
+	"ETH":                       '\U000000D0',
+	"Eacute":                    '\U000000C9',
+	"Ecirc":                     '\U000000CA',
+	"Egrave":                    '\U000000C8',
+	"Euml":                      '\U000000CB',
+	"GT":                        '\U0000003E',
+	"Iacute":                    '\U000000CD',
+	"Icirc":                     '\U000000CE',
+	"Igrave":                    '\U000000CC',
+	"Iuml":                      '\U000000CF',
+	"LT":                        '\U0000003C',
+	"Ntilde":                    '\U000000D1',
+	"Oacute":                    '\U000000D3',
+	"Ocirc":                     '\U000000D4',
+	"Ograve":                    '\U000000D2',
+	"Oslash":                    '\U000000D8',
+	"Otilde":                    '\U000000D5',
+	"Ouml":                      '\U000000D6',
+	"QUOT":                      '\U00000022',
+	"REG":                       '\U000000AE',
+	"THORN":                     '\U000000DE',
+	"Uacute":                    '\U000000DA',
+	"Ucirc":                     '\U000000DB',
+	"Ugrave":                    '\U000000D9',
+	"Uuml":                      '\U000000DC',
+	"Yacute":                    '\U000000DD',
+	"aacute":                    '\U000000E1',
+	"acirc":                     '\U000000E2',
+	"acute":                     '\U000000B4',
+	"aelig":                     '\U000000E6',
+	"agrave":                    '\U000000E0',
+	"amp":                       '\U00000026',
+	"aring":                     '\U000000E5',
+	"atilde":                    '\U000000E3',
+	"auml":                      '\U000000E4',
+	"brvbar":                    '\U000000A6',
+	"ccedil":                    '\U000000E7',
+	"cedil":                     '\U000000B8',
+	"cent":                      '\U000000A2',
+	"copy":                      '\U000000A9',
+	"curren":                    '\U000000A4',
+	"deg":                       '\U000000B0',
+	"divide":                    '\U000000F7',
+	"eacute":                    '\U000000E9',
+	"ecirc":                     '\U000000EA',
+	"egrave":                    '\U000000E8',
+	"eth":                       '\U000000F0',
+	"euml":                      '\U000000EB',
+	"frac12":                    '\U000000BD',
+	"frac14":                    '\U000000BC',
+	"frac34":                    '\U000000BE',
+	"gt":                        '\U0000003E',
+	"iacute":                    '\U000000ED',
+	"icirc":                     '\U000000EE',
+	"iexcl":                     '\U000000A1',
+	"igrave":                    '\U000000EC',
+	"iquest":                    '\U000000BF',
+	"iuml":                      '\U000000EF',
+	"laquo":                     '\U000000AB',
+	"lt":                        '\U0000003C',
+	"macr":                      '\U000000AF',
+	"micro":                     '\U000000B5',
+	"middot":                    '\U000000B7',
+	"nbsp":                      '\U000000A0',
+	"not":                       '\U000000AC',
+	"ntilde":                    '\U000000F1',
+	"oacute":                    '\U000000F3',
+	"ocirc":                     '\U000000F4',
+	"ograve":                    '\U000000F2',
+	"ordf":                      '\U000000AA',
+	"ordm":                      '\U000000BA',
+	"oslash":                    '\U000000F8',
+	"otilde":                    '\U000000F5',
+	"ouml":                      '\U000000F6',
+	"para":                      '\U000000B6',
+	"plusmn":                    '\U000000B1',
+	"pound":                     '\U000000A3',
+	"quot":                      '\U00000022',
+	"raquo":                     '\U000000BB',
+	"reg":                       '\U000000AE',
+	"sect":                      '\U000000A7',
+	"shy":                       '\U000000AD',
+	"sup1":                      '\U000000B9',
+	"sup2":                      '\U000000B2',
+	"sup3":                      '\U000000B3',
+	"szlig":                     '\U000000DF',
+	"thorn":                     '\U000000FE',
+	"times":                     '\U000000D7',
+	"uacute":                    '\U000000FA',
+	"ucirc":                     '\U000000FB',
+	"ugrave":                    '\U000000F9',
+	"uml":                       '\U000000A8',
+	"uuml":                      '\U000000FC',
+	"yacute":                    '\U000000FD',
+	"yen":                       '\U000000A5',
+	"yuml":                      '\U000000FF',
+}
+
+// HTML entities that are two unicode codepoints.
+var entity2 = map[string][2]rune{
+	// TODO(nigeltao): Handle replacements that are wider than their names.
+	// "nLt;":                     {'\u226A', '\u20D2'},
+	// "nGt;":                     {'\u226B', '\u20D2'},
+	"NotEqualTilde;":           {'\u2242', '\u0338'},
+	"NotGreaterFullEqual;":     {'\u2267', '\u0338'},
+	"NotGreaterGreater;":       {'\u226B', '\u0338'},
+	"NotGreaterSlantEqual;":    {'\u2A7E', '\u0338'},
+	"NotHumpDownHump;":         {'\u224E', '\u0338'},
+	"NotHumpEqual;":            {'\u224F', '\u0338'},
+	"NotLeftTriangleBar;":      {'\u29CF', '\u0338'},
+	"NotLessLess;":             {'\u226A', '\u0338'},
+	"NotLessSlantEqual;":       {'\u2A7D', '\u0338'},
+	"NotNestedGreaterGreater;": {'\u2AA2', '\u0338'},
+	"NotNestedLessLess;":       {'\u2AA1', '\u0338'},
+	"NotPrecedesEqual;":        {'\u2AAF', '\u0338'},
+	"NotRightTriangleBar;":     {'\u29D0', '\u0338'},
+	"NotSquareSubset;":         {'\u228F', '\u0338'},
+	"NotSquareSuperset;":       {'\u2290', '\u0338'},
+	"NotSubset;":               {'\u2282', '\u20D2'},
+	"NotSucceedsEqual;":        {'\u2AB0', '\u0338'},
+	"NotSucceedsTilde;":        {'\u227F', '\u0338'},
+	"NotSuperset;":             {'\u2283', '\u20D2'},
+	"ThickSpace;":              {'\u205F', '\u200A'},
+	"acE;":                     {'\u223E', '\u0333'},
+	"bne;":                     {'\u003D', '\u20E5'},
+	"bnequiv;":                 {'\u2261', '\u20E5'},
+	"caps;":                    {'\u2229', '\uFE00'},
+	"cups;":                    {'\u222A', '\uFE00'},
+	"fjlig;":                   {'\u0066', '\u006A'},
+	"gesl;":                    {'\u22DB', '\uFE00'},
+	"gvertneqq;":               {'\u2269', '\uFE00'},
+	"gvnE;":                    {'\u2269', '\uFE00'},
+	"lates;":                   {'\u2AAD', '\uFE00'},
+	"lesg;":                    {'\u22DA', '\uFE00'},
+	"lvertneqq;":               {'\u2268', '\uFE00'},
+	"lvnE;":                    {'\u2268', '\uFE00'},
+	"nGg;":                     {'\u22D9', '\u0338'},
+	"nGtv;":                    {'\u226B', '\u0338'},
+	"nLl;":                     {'\u22D8', '\u0338'},
+	"nLtv;":                    {'\u226A', '\u0338'},
+	"nang;":                    {'\u2220', '\u20D2'},
+	"napE;":                    {'\u2A70', '\u0338'},
+	"napid;":                   {'\u224B', '\u0338'},
+	"nbump;":                   {'\u224E', '\u0338'},
+	"nbumpe;":                  {'\u224F', '\u0338'},
+	"ncongdot;":                {'\u2A6D', '\u0338'},
+	"nedot;":                   {'\u2250', '\u0338'},
+	"nesim;":                   {'\u2242', '\u0338'},
+	"ngE;":                     {'\u2267', '\u0338'},
+	"ngeqq;":                   {'\u2267', '\u0338'},
+	"ngeqslant;":               {'\u2A7E', '\u0338'},
+	"nges;":                    {'\u2A7E', '\u0338'},
+	"nlE;":                     {'\u2266', '\u0338'},
+	"nleqq;":                   {'\u2266', '\u0338'},
+	"nleqslant;":               {'\u2A7D', '\u0338'},
+	"nles;":                    {'\u2A7D', '\u0338'},
+	"notinE;":                  {'\u22F9', '\u0338'},
+	"notindot;":                {'\u22F5', '\u0338'},
+	"nparsl;":                  {'\u2AFD', '\u20E5'},
+	"npart;":                   {'\u2202', '\u0338'},
+	"npre;":                    {'\u2AAF', '\u0338'},
+	"npreceq;":                 {'\u2AAF', '\u0338'},
+	"nrarrc;":                  {'\u2933', '\u0338'},
+	"nrarrw;":                  {'\u219D', '\u0338'},
+	"nsce;":                    {'\u2AB0', '\u0338'},
+	"nsubE;":                   {'\u2AC5', '\u0338'},
+	"nsubset;":                 {'\u2282', '\u20D2'},
+	"nsubseteqq;":              {'\u2AC5', '\u0338'},
+	"nsucceq;":                 {'\u2AB0', '\u0338'},
+	"nsupE;":                   {'\u2AC6', '\u0338'},
+	"nsupset;":                 {'\u2283', '\u20D2'},
+	"nsupseteqq;":              {'\u2AC6', '\u0338'},
+	"nvap;":                    {'\u224D', '\u20D2'},
+	"nvge;":                    {'\u2265', '\u20D2'},
+	"nvgt;":                    {'\u003E', '\u20D2'},
+	"nvle;":                    {'\u2264', '\u20D2'},
+	"nvlt;":                    {'\u003C', '\u20D2'},
+	"nvltrie;":                 {'\u22B4', '\u20D2'},
+	"nvrtrie;":                 {'\u22B5', '\u20D2'},
+	"nvsim;":                   {'\u223C', '\u20D2'},
+	"race;":                    {'\u223D', '\u0331'},
+	"smtes;":                   {'\u2AAC', '\uFE00'},
+	"sqcaps;":                  {'\u2293', '\uFE00'},
+	"sqcups;":                  {'\u2294', '\uFE00'},
+	"varsubsetneq;":            {'\u228A', '\uFE00'},
+	"varsubsetneqq;":           {'\u2ACB', '\uFE00'},
+	"varsupsetneq;":            {'\u228B', '\uFE00'},
+	"varsupsetneqq;":           {'\u2ACC', '\uFE00'},
+	"vnsub;":                   {'\u2282', '\u20D2'},
+	"vnsup;":                   {'\u2283', '\u20D2'},
+	"vsubnE;":                  {'\u2ACB', '\uFE00'},
+	"vsubne;":                  {'\u228A', '\uFE00'},
+	"vsupnE;":                  {'\u2ACC', '\uFE00'},
+	"vsupne;":                  {'\u228B', '\uFE00'},
+}
diff --git a/html/entity_test.go b/html/entity_test.go
new file mode 100644
index 0000000..b53f866
--- /dev/null
+++ b/html/entity_test.go
@@ -0,0 +1,29 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"testing"
+	"unicode/utf8"
+)
+
+func TestEntityLength(t *testing.T) {
+	// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
+	// The +1 comes from the leading "&". This property implies that the length of
+	// unescaped text is <= the length of escaped text.
+	for k, v := range entity {
+		if 1+len(k) < utf8.RuneLen(v) {
+			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
+		}
+		if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
+			t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
+		}
+	}
+	for k, v := range entity2 {
+		if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
+			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
+		}
+	}
+}
diff --git a/html/escape.go b/html/escape.go
new file mode 100644
index 0000000..75bddff
--- /dev/null
+++ b/html/escape.go
@@ -0,0 +1,258 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bytes"
+	"strings"
+	"unicode/utf8"
+)
+
+// These replacements permit compatibility with old numeric entities that
+// assumed Windows-1252 encoding.
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
+var replacementTable = [...]rune{
+	'\u20AC', // First entry is what 0x80 should be replaced with.
+	'\u0081',
+	'\u201A',
+	'\u0192',
+	'\u201E',
+	'\u2026',
+	'\u2020',
+	'\u2021',
+	'\u02C6',
+	'\u2030',
+	'\u0160',
+	'\u2039',
+	'\u0152',
+	'\u008D',
+	'\u017D',
+	'\u008F',
+	'\u0090',
+	'\u2018',
+	'\u2019',
+	'\u201C',
+	'\u201D',
+	'\u2022',
+	'\u2013',
+	'\u2014',
+	'\u02DC',
+	'\u2122',
+	'\u0161',
+	'\u203A',
+	'\u0153',
+	'\u009D',
+	'\u017E',
+	'\u0178', // Last entry is 0x9F.
+	// 0x00->'\uFFFD' is handled programmatically.
+	// 0x0D->'\u000D' is a no-op.
+}
+
+// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
+// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
+// Precondition: b[src] == '&' && dst <= src.
+// attribute should be true if parsing an attribute value.
+func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
+	// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
+
+	// i starts at 1 because we already know that s[0] == '&'.
+	i, s := 1, b[src:]
+
+	if len(s) <= 1 {
+		b[dst] = b[src]
+		return dst + 1, src + 1
+	}
+
+	if s[i] == '#' {
+		if len(s) <= 3 { // We need to have at least "&#.".
+			b[dst] = b[src]
+			return dst + 1, src + 1
+		}
+		i++
+		c := s[i]
+		hex := false
+		if c == 'x' || c == 'X' {
+			hex = true
+			i++
+		}
+
+		x := '\x00'
+		for i < len(s) {
+			c = s[i]
+			i++
+			if hex {
+				if '0' <= c && c <= '9' {
+					x = 16*x + rune(c) - '0'
+					continue
+				} else if 'a' <= c && c <= 'f' {
+					x = 16*x + rune(c) - 'a' + 10
+					continue
+				} else if 'A' <= c && c <= 'F' {
+					x = 16*x + rune(c) - 'A' + 10
+					continue
+				}
+			} else if '0' <= c && c <= '9' {
+				x = 10*x + rune(c) - '0'
+				continue
+			}
+			if c != ';' {
+				i--
+			}
+			break
+		}
+
+		if i <= 3 { // No characters matched.
+			b[dst] = b[src]
+			return dst + 1, src + 1
+		}
+
+		if 0x80 <= x && x <= 0x9F {
+			// Replace characters from Windows-1252 with UTF-8 equivalents.
+			x = replacementTable[x-0x80]
+		} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
+			// Replace invalid characters with the replacement character.
+			x = '\uFFFD'
+		}
+
+		return dst + utf8.EncodeRune(b[dst:], x), src + i
+	}
+
+	// Consume the maximum number of characters possible, with the
+	// consumed characters matching one of the named references.
+
+	for i < len(s) {
+		c := s[i]
+		i++
+		// Lower-cased characters are more common in entities, so we check for them first.
+		if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+			continue
+		}
+		if c != ';' {
+			i--
+		}
+		break
+	}
+
+	entityName := string(s[1:i])
+	if entityName == "" {
+		// No-op.
+	} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
+		// No-op.
+	} else if x := entity[entityName]; x != 0 {
+		return dst + utf8.EncodeRune(b[dst:], x), src + i
+	} else if x := entity2[entityName]; x[0] != 0 {
+		dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
+		return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
+	} else if !attribute {
+		maxLen := len(entityName) - 1
+		if maxLen > longestEntityWithoutSemicolon {
+			maxLen = longestEntityWithoutSemicolon
+		}
+		for j := maxLen; j > 1; j-- {
+			if x := entity[entityName[:j]]; x != 0 {
+				return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
+			}
+		}
+	}
+
+	dst1, src1 = dst+i, src+i
+	copy(b[dst:dst1], b[src:src1])
+	return dst1, src1
+}
+
+// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
+// attribute should be true if parsing an attribute value.
+func unescape(b []byte, attribute bool) []byte {
+	for i, c := range b {
+		if c == '&' {
+			dst, src := unescapeEntity(b, i, i, attribute)
+			for src < len(b) {
+				c := b[src]
+				if c == '&' {
+					dst, src = unescapeEntity(b, dst, src, attribute)
+				} else {
+					b[dst] = c
+					dst, src = dst+1, src+1
+				}
+			}
+			return b[0:dst]
+		}
+	}
+	return b
+}
+
+// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
+func lower(b []byte) []byte {
+	for i, c := range b {
+		if 'A' <= c && c <= 'Z' {
+			b[i] = c + 'a' - 'A'
+		}
+	}
+	return b
+}
+
+const escapedChars = "&'<>\"\r"
+
+func escape(w writer, s string) error {
+	i := strings.IndexAny(s, escapedChars)
+	for i != -1 {
+		if _, err := w.WriteString(s[:i]); err != nil {
+			return err
+		}
+		var esc string
+		switch s[i] {
+		case '&':
+			esc = "&amp;"
+		case '\'':
+			// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
+			esc = "&#39;"
+		case '<':
+			esc = "&lt;"
+		case '>':
+			esc = "&gt;"
+		case '"':
+			// "&#34;" is shorter than "&quot;".
+			esc = "&#34;"
+		case '\r':
+			esc = "&#13;"
+		default:
+			panic("unrecognized escape character")
+		}
+		s = s[i+1:]
+		if _, err := w.WriteString(esc); err != nil {
+			return err
+		}
+		i = strings.IndexAny(s, escapedChars)
+	}
+	_, err := w.WriteString(s)
+	return err
+}
+
+// EscapeString escapes special characters like "<" to become "&lt;". It
+// escapes only five such characters: <, >, &, ' and ".
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func EscapeString(s string) string {
+	if strings.IndexAny(s, escapedChars) == -1 {
+		return s
+	}
+	var buf bytes.Buffer
+	escape(&buf, s)
+	return buf.String()
+}
+
+// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
+// larger range of entities than EscapeString escapes. For example, "&aacute;"
+// unescapes to "á", as does "&#225;" and "&xE1;".
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func UnescapeString(s string) string {
+	for _, c := range s {
+		if c == '&' {
+			return string(unescape([]byte(s), false))
+		}
+	}
+	return s
+}
diff --git a/html/example_test.go b/html/example_test.go
new file mode 100644
index 0000000..47341f0
--- /dev/null
+++ b/html/example_test.go
@@ -0,0 +1,40 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This example demonstrates parsing HTML data and walking the resulting tree.
+package html_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	"code.google.com/p/go.net/html"
+)
+
+func ExampleParse() {
+	s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
+	doc, err := html.Parse(strings.NewReader(s))
+	if err != nil {
+		log.Fatal(err)
+	}
+	var f func(*html.Node)
+	f = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.Data == "a" {
+			for _, a := range n.Attr {
+				if a.Key == "href" {
+					fmt.Println(a.Val)
+					break
+				}
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			f(c)
+		}
+	}
+	f(doc)
+	// Output:
+	// foo
+	// /bar/baz
+}
diff --git a/html/foreign.go b/html/foreign.go
new file mode 100644
index 0000000..d3b3844
--- /dev/null
+++ b/html/foreign.go
@@ -0,0 +1,226 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"strings"
+)
+
+func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
+	for i := range aa {
+		if newName, ok := nameMap[aa[i].Key]; ok {
+			aa[i].Key = newName
+		}
+	}
+}
+
+func adjustForeignAttributes(aa []Attribute) {
+	for i, a := range aa {
+		if a.Key == "" || a.Key[0] != 'x' {
+			continue
+		}
+		switch a.Key {
+		case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
+			"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
+			j := strings.Index(a.Key, ":")
+			aa[i].Namespace = a.Key[:j]
+			aa[i].Key = a.Key[j+1:]
+		}
+	}
+}
+
+func htmlIntegrationPoint(n *Node) bool {
+	if n.Type != ElementNode {
+		return false
+	}
+	switch n.Namespace {
+	case "math":
+		if n.Data == "annotation-xml" {
+			for _, a := range n.Attr {
+				if a.Key == "encoding" {
+					val := strings.ToLower(a.Val)
+					if val == "text/html" || val == "application/xhtml+xml" {
+						return true
+					}
+				}
+			}
+		}
+	case "svg":
+		switch n.Data {
+		case "desc", "foreignObject", "title":
+			return true
+		}
+	}
+	return false
+}
+
+func mathMLTextIntegrationPoint(n *Node) bool {
+	if n.Namespace != "math" {
+		return false
+	}
+	switch n.Data {
+	case "mi", "mo", "mn", "ms", "mtext":
+		return true
+	}
+	return false
+}
+
+// Section 12.2.5.5.
+var breakout = map[string]bool{
+	"b":          true,
+	"big":        true,
+	"blockquote": true,
+	"body":       true,
+	"br":         true,
+	"center":     true,
+	"code":       true,
+	"dd":         true,
+	"div":        true,
+	"dl":         true,
+	"dt":         true,
+	"em":         true,
+	"embed":      true,
+	"h1":         true,
+	"h2":         true,
+	"h3":         true,
+	"h4":         true,
+	"h5":         true,
+	"h6":         true,
+	"head":       true,
+	"hr":         true,
+	"i":          true,
+	"img":        true,
+	"li":         true,
+	"listing":    true,
+	"menu":       true,
+	"meta":       true,
+	"nobr":       true,
+	"ol":         true,
+	"p":          true,
+	"pre":        true,
+	"ruby":       true,
+	"s":          true,
+	"small":      true,
+	"span":       true,
+	"strong":     true,
+	"strike":     true,
+	"sub":        true,
+	"sup":        true,
+	"table":      true,
+	"tt":         true,
+	"u":          true,
+	"ul":         true,
+	"var":        true,
+}
+
+// Section 12.2.5.5.
+var svgTagNameAdjustments = map[string]string{
+	"altglyph":            "altGlyph",
+	"altglyphdef":         "altGlyphDef",
+	"altglyphitem":        "altGlyphItem",
+	"animatecolor":        "animateColor",
+	"animatemotion":       "animateMotion",
+	"animatetransform":    "animateTransform",
+	"clippath":            "clipPath",
+	"feblend":             "feBlend",
+	"fecolormatrix":       "feColorMatrix",
+	"fecomponenttransfer": "feComponentTransfer",
+	"fecomposite":         "feComposite",
+	"feconvolvematrix":    "feConvolveMatrix",
+	"fediffuselighting":   "feDiffuseLighting",
+	"fedisplacementmap":   "feDisplacementMap",
+	"fedistantlight":      "feDistantLight",
+	"feflood":             "feFlood",
+	"fefunca":             "feFuncA",
+	"fefuncb":             "feFuncB",
+	"fefuncg":             "feFuncG",
+	"fefuncr":             "feFuncR",
+	"fegaussianblur":      "feGaussianBlur",
+	"feimage":             "feImage",
+	"femerge":             "feMerge",
+	"femergenode":         "feMergeNode",
+	"femorphology":        "feMorphology",
+	"feoffset":            "feOffset",
+	"fepointlight":        "fePointLight",
+	"fespecularlighting":  "feSpecularLighting",
+	"fespotlight":         "feSpotLight",
+	"fetile":              "feTile",
+	"feturbulence":        "feTurbulence",
+	"foreignobject":       "foreignObject",
+	"glyphref":            "glyphRef",
+	"lineargradient":      "linearGradient",
+	"radialgradient":      "radialGradient",
+	"textpath":            "textPath",
+}
+
+// Section 12.2.5.1
+var mathMLAttributeAdjustments = map[string]string{
+	"definitionurl": "definitionURL",
+}
+
+var svgAttributeAdjustments = map[string]string{
+	"attributename":             "attributeName",
+	"attributetype":             "attributeType",
+	"basefrequency":             "baseFrequency",
+	"baseprofile":               "baseProfile",
+	"calcmode":                  "calcMode",
+	"clippathunits":             "clipPathUnits",
+	"contentscripttype":         "contentScriptType",
+	"contentstyletype":          "contentStyleType",
+	"diffuseconstant":           "diffuseConstant",
+	"edgemode":                  "edgeMode",
+	"externalresourcesrequired": "externalResourcesRequired",
+	"filterres":                 "filterRes",
+	"filterunits":               "filterUnits",
+	"glyphref":                  "glyphRef",
+	"gradienttransform":         "gradientTransform",
+	"gradientunits":             "gradientUnits",
+	"kernelmatrix":              "kernelMatrix",
+	"kernelunitlength":          "kernelUnitLength",
+	"keypoints":                 "keyPoints",
+	"keysplines":                "keySplines",
+	"keytimes":                  "keyTimes",
+	"lengthadjust":              "lengthAdjust",
+	"limitingconeangle":         "limitingConeAngle",
+	"markerheight":              "markerHeight",
+	"markerunits":               "markerUnits",
+	"markerwidth":               "markerWidth",
+	"maskcontentunits":          "maskContentUnits",
+	"maskunits":                 "maskUnits",
+	"numoctaves":                "numOctaves",
+	"pathlength":                "pathLength",
+	"patterncontentunits":       "patternContentUnits",
+	"patterntransform":          "patternTransform",
+	"patternunits":              "patternUnits",
+	"pointsatx":                 "pointsAtX",
+	"pointsaty":                 "pointsAtY",
+	"pointsatz":                 "pointsAtZ",
+	"preservealpha":             "preserveAlpha",
+	"preserveaspectratio":       "preserveAspectRatio",
+	"primitiveunits":            "primitiveUnits",
+	"refx":                      "refX",
+	"refy":                      "refY",
+	"repeatcount":               "repeatCount",
+	"repeatdur":                 "repeatDur",
+	"requiredextensions":        "requiredExtensions",
+	"requiredfeatures":          "requiredFeatures",
+	"specularconstant":          "specularConstant",
+	"specularexponent":          "specularExponent",
+	"spreadmethod":              "spreadMethod",
+	"startoffset":               "startOffset",
+	"stddeviation":              "stdDeviation",
+	"stitchtiles":               "stitchTiles",
+	"surfacescale":              "surfaceScale",
+	"systemlanguage":            "systemLanguage",
+	"tablevalues":               "tableValues",
+	"targetx":                   "targetX",
+	"targety":                   "targetY",
+	"textlength":                "textLength",
+	"viewbox":                   "viewBox",
+	"viewtarget":                "viewTarget",
+	"xchannelselector":          "xChannelSelector",
+	"ychannelselector":          "yChannelSelector",
+	"zoomandpan":                "zoomAndPan",
+}
diff --git a/html/node.go b/html/node.go
new file mode 100644
index 0000000..e7b4e50
--- /dev/null
+++ b/html/node.go
@@ -0,0 +1,193 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"code.google.com/p/go.net/html/atom"
+)
+
+// A NodeType is the type of a Node.
+type NodeType uint32
+
+const (
+	ErrorNode NodeType = iota
+	TextNode
+	DocumentNode
+	ElementNode
+	CommentNode
+	DoctypeNode
+	scopeMarkerNode
+)
+
+// Section 12.2.3.3 says "scope markers are inserted when entering applet
+// elements, buttons, object elements, marquees, table cells, and table
+// captions, and are used to prevent formatting from 'leaking'".
+var scopeMarker = Node{Type: scopeMarkerNode}
+
+// A Node consists of a NodeType and some Data (tag name for element nodes,
+// content for text) and are part of a tree of Nodes. Element nodes may also
+// have a Namespace and contain a slice of Attributes. Data is unescaped, so
+// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
+// is the atom for Data, or zero if Data is not a known tag name.
+//
+// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
+// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
+// "svg" is short for "http://www.w3.org/2000/svg".
+type Node struct {
+	Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
+
+	Type      NodeType
+	DataAtom  atom.Atom
+	Data      string
+	Namespace string
+	Attr      []Attribute
+}
+
+// InsertBefore inserts newChild as a child of n, immediately before oldChild
+// in the sequence of n's children. oldChild may be nil, in which case newChild
+// is appended to the end of n's children.
+//
+// It will panic if newChild already has a parent or siblings.
+func (n *Node) InsertBefore(newChild, oldChild *Node) {
+	if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
+		panic("html: InsertBefore called for an attached child Node")
+	}
+	var prev, next *Node
+	if oldChild != nil {
+		prev, next = oldChild.PrevSibling, oldChild
+	} else {
+		prev = n.LastChild
+	}
+	if prev != nil {
+		prev.NextSibling = newChild
+	} else {
+		n.FirstChild = newChild
+	}
+	if next != nil {
+		next.PrevSibling = newChild
+	} else {
+		n.LastChild = newChild
+	}
+	newChild.Parent = n
+	newChild.PrevSibling = prev
+	newChild.NextSibling = next
+}
+
+// AppendChild adds a node c as a child of n.
+//
+// It will panic if c already has a parent or siblings.
+func (n *Node) AppendChild(c *Node) {
+	if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
+		panic("html: AppendChild called for an attached child Node")
+	}
+	last := n.LastChild
+	if last != nil {
+		last.NextSibling = c
+	} else {
+		n.FirstChild = c
+	}
+	n.LastChild = c
+	c.Parent = n
+	c.PrevSibling = last
+}
+
+// RemoveChild removes a node c that is a child of n. Afterwards, c will have
+// no parent and no siblings.
+//
+// It will panic if c's parent is not n.
+func (n *Node) RemoveChild(c *Node) {
+	if c.Parent != n {
+		panic("html: RemoveChild called for a non-child Node")
+	}
+	if n.FirstChild == c {
+		n.FirstChild = c.NextSibling
+	}
+	if c.NextSibling != nil {
+		c.NextSibling.PrevSibling = c.PrevSibling
+	}
+	if n.LastChild == c {
+		n.LastChild = c.PrevSibling
+	}
+	if c.PrevSibling != nil {
+		c.PrevSibling.NextSibling = c.NextSibling
+	}
+	c.Parent = nil
+	c.PrevSibling = nil
+	c.NextSibling = nil
+}
+
+// reparentChildren reparents all of src's child nodes to dst.
+func reparentChildren(dst, src *Node) {
+	for {
+		child := src.FirstChild
+		if child == nil {
+			break
+		}
+		src.RemoveChild(child)
+		dst.AppendChild(child)
+	}
+}
+
+// clone returns a new node with the same type, data and attributes.
+// The clone has no parent, no siblings and no children.
+func (n *Node) clone() *Node {
+	m := &Node{
+		Type:     n.Type,
+		DataAtom: n.DataAtom,
+		Data:     n.Data,
+		Attr:     make([]Attribute, len(n.Attr)),
+	}
+	copy(m.Attr, n.Attr)
+	return m
+}
+
+// nodeStack is a stack of nodes.
+type nodeStack []*Node
+
+// pop pops the stack. It will panic if s is empty.
+func (s *nodeStack) pop() *Node {
+	i := len(*s)
+	n := (*s)[i-1]
+	*s = (*s)[:i-1]
+	return n
+}
+
+// top returns the most recently pushed node, or nil if s is empty.
+func (s *nodeStack) top() *Node {
+	if i := len(*s); i > 0 {
+		return (*s)[i-1]
+	}
+	return nil
+}
+
+// index returns the index of the top-most occurrence of n in the stack, or -1
+// if n is not present.
+func (s *nodeStack) index(n *Node) int {
+	for i := len(*s) - 1; i >= 0; i-- {
+		if (*s)[i] == n {
+			return i
+		}
+	}
+	return -1
+}
+
+// insert inserts a node at the given index.
+func (s *nodeStack) insert(i int, n *Node) {
+	(*s) = append(*s, nil)
+	copy((*s)[i+1:], (*s)[i:])
+	(*s)[i] = n
+}
+
+// remove removes a node from the stack. It is a no-op if n is not present.
+func (s *nodeStack) remove(n *Node) {
+	i := s.index(n)
+	if i == -1 {
+		return
+	}
+	copy((*s)[i:], (*s)[i+1:])
+	j := len(*s) - 1
+	(*s)[j] = nil
+	*s = (*s)[:j]
+}
diff --git a/html/node_test.go b/html/node_test.go
new file mode 100644
index 0000000..471102f
--- /dev/null
+++ b/html/node_test.go
@@ -0,0 +1,146 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"fmt"
+)
+
+// checkTreeConsistency checks that a node and its descendants are all
+// consistent in their parent/child/sibling relationships.
+func checkTreeConsistency(n *Node) error {
+	return checkTreeConsistency1(n, 0)
+}
+
+func checkTreeConsistency1(n *Node, depth int) error {
+	if depth == 1e4 {
+		return fmt.Errorf("html: tree looks like it contains a cycle")
+	}
+	if err := checkNodeConsistency(n); err != nil {
+		return err
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if err := checkTreeConsistency1(c, depth+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// checkNodeConsistency checks that a node's parent/child/sibling relationships
+// are consistent.
+func checkNodeConsistency(n *Node) error {
+	if n == nil {
+		return nil
+	}
+
+	nParent := 0
+	for p := n.Parent; p != nil; p = p.Parent {
+		nParent++
+		if nParent == 1e4 {
+			return fmt.Errorf("html: parent list looks like an infinite loop")
+		}
+	}
+
+	nForward := 0
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		nForward++
+		if nForward == 1e6 {
+			return fmt.Errorf("html: forward list of children looks like an infinite loop")
+		}
+		if c.Parent != n {
+			return fmt.Errorf("html: inconsistent child/parent relationship")
+		}
+	}
+
+	nBackward := 0
+	for c := n.LastChild; c != nil; c = c.PrevSibling {
+		nBackward++
+		if nBackward == 1e6 {
+			return fmt.Errorf("html: backward list of children looks like an infinite loop")
+		}
+		if c.Parent != n {
+			return fmt.Errorf("html: inconsistent child/parent relationship")
+		}
+	}
+
+	if n.Parent != nil {
+		if n.Parent == n {
+			return fmt.Errorf("html: inconsistent parent relationship")
+		}
+		if n.Parent == n.FirstChild {
+			return fmt.Errorf("html: inconsistent parent/first relationship")
+		}
+		if n.Parent == n.LastChild {
+			return fmt.Errorf("html: inconsistent parent/last relationship")
+		}
+		if n.Parent == n.PrevSibling {
+			return fmt.Errorf("html: inconsistent parent/prev relationship")
+		}
+		if n.Parent == n.NextSibling {
+			return fmt.Errorf("html: inconsistent parent/next relationship")
+		}
+
+		parentHasNAsAChild := false
+		for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
+			if c == n {
+				parentHasNAsAChild = true
+				break
+			}
+		}
+		if !parentHasNAsAChild {
+			return fmt.Errorf("html: inconsistent parent/child relationship")
+		}
+	}
+
+	if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
+		return fmt.Errorf("html: inconsistent prev/next relationship")
+	}
+	if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
+		return fmt.Errorf("html: inconsistent next/prev relationship")
+	}
+
+	if (n.FirstChild == nil) != (n.LastChild == nil) {
+		return fmt.Errorf("html: inconsistent first/last relationship")
+	}
+	if n.FirstChild != nil && n.FirstChild == n.LastChild {
+		// We have a sole child.
+		if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
+			return fmt.Errorf("html: inconsistent sole child's sibling relationship")
+		}
+	}
+
+	seen := map[*Node]bool{}
+
+	var last *Node
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if seen[c] {
+			return fmt.Errorf("html: inconsistent repeated child")
+		}
+		seen[c] = true
+		last = c
+	}
+	if last != n.LastChild {
+		return fmt.Errorf("html: inconsistent last relationship")
+	}
+
+	var first *Node
+	for c := n.LastChild; c != nil; c = c.PrevSibling {
+		if !seen[c] {
+			return fmt.Errorf("html: inconsistent missing child")
+		}
+		delete(seen, c)
+		first = c
+	}
+	if first != n.FirstChild {
+		return fmt.Errorf("html: inconsistent first relationship")
+	}
+
+	if len(seen) != 0 {
+		return fmt.Errorf("html: inconsistent forwards/backwards child list")
+	}
+
+	return nil
+}
diff --git a/html/parse.go b/html/parse.go
new file mode 100644
index 0000000..bf99ec6
--- /dev/null
+++ b/html/parse.go
@@ -0,0 +1,2092 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+
+	a "code.google.com/p/go.net/html/atom"
+)
+
+// A parser implements the HTML5 parsing algorithm:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
+type parser struct {
+	// tokenizer provides the tokens for the parser.
+	tokenizer *Tokenizer
+	// tok is the most recently read token.
+	tok Token
+	// Self-closing tags like <hr/> are treated as start tags, except that
+	// hasSelfClosingToken is set while they are being processed.
+	hasSelfClosingToken bool
+	// doc is the document root element.
+	doc *Node
+	// The stack of open elements (section 12.2.3.2) and active formatting
+	// elements (section 12.2.3.3).
+	oe, afe nodeStack
+	// Element pointers (section 12.2.3.4).
+	head, form *Node
+	// Other parsing state flags (section 12.2.3.5).
+	scripting, framesetOK bool
+	// im is the current insertion mode.
+	im insertionMode
+	// originalIM is the insertion mode to go back to after completing a text
+	// or inTableText insertion mode.
+	originalIM insertionMode
+	// fosterParenting is whether new elements should be inserted according to
+	// the foster parenting rules (section 12.2.5.3).
+	fosterParenting bool
+	// quirks is whether the parser is operating in "quirks mode."
+	quirks bool
+	// fragment is whether the parser is parsing an HTML fragment.
+	fragment bool
+	// context is the context element when parsing an HTML fragment
+	// (section 12.4).
+	context *Node
+}
+
+func (p *parser) top() *Node {
+	if n := p.oe.top(); n != nil {
+		return n
+	}
+	return p.doc
+}
+
+// Stop tags for use in popUntil. These come from section 12.2.3.2.
+var (
+	defaultScopeStopTags = map[string][]a.Atom{
+		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object},
+		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
+		"svg":  {a.Desc, a.ForeignObject, a.Title},
+	}
+)
+
+type scope int
+
+const (
+	defaultScope scope = iota
+	listItemScope
+	buttonScope
+	tableScope
+	tableRowScope
+	tableBodyScope
+	selectScope
+)
+
+// popUntil pops the stack of open elements at the highest element whose tag
+// is in matchTags, provided there is no higher element in the scope's stop
+// tags (as defined in section 12.2.3.2). It returns whether or not there was
+// such an element. If there was not, popUntil leaves the stack unchanged.
+//
+// For example, the set of stop tags for table scope is: "html", "table". If
+// the stack was:
+// ["html", "body", "font", "table", "b", "i", "u"]
+// then popUntil(tableScope, "font") would return false, but
+// popUntil(tableScope, "i") would return true and the stack would become:
+// ["html", "body", "font", "table", "b"]
+//
+// If an element's tag is in both the stop tags and matchTags, then the stack
+// will be popped and the function returns true (provided, of course, there was
+// no higher element in the stack that was also in the stop tags). For example,
+// popUntil(tableScope, "table") returns true and leaves:
+// ["html", "body", "font"]
+func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
+	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
+		p.oe = p.oe[:i]
+		return true
+	}
+	return false
+}
+
+// indexOfElementInScope returns the index in p.oe of the highest element whose
+// tag is in matchTags that is in scope. If no matching element is in scope, it
+// returns -1.
+func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
+	for i := len(p.oe) - 1; i >= 0; i-- {
+		tagAtom := p.oe[i].DataAtom
+		if p.oe[i].Namespace == "" {
+			for _, t := range matchTags {
+				if t == tagAtom {
+					return i
+				}
+			}
+			switch s {
+			case defaultScope:
+				// No-op.
+			case listItemScope:
+				if tagAtom == a.Ol || tagAtom == a.Ul {
+					return -1
+				}
+			case buttonScope:
+				if tagAtom == a.Button {
+					return -1
+				}
+			case tableScope:
+				if tagAtom == a.Html || tagAtom == a.Table {
+					return -1
+				}
+			case selectScope:
+				if tagAtom != a.Optgroup && tagAtom != a.Option {
+					return -1
+				}
+			default:
+				panic("unreachable")
+			}
+		}
+		switch s {
+		case defaultScope, listItemScope, buttonScope:
+			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
+				if t == tagAtom {
+					return -1
+				}
+			}
+		}
+	}
+	return -1
+}
+
+// elementInScope is like popUntil, except that it doesn't modify the stack of
+// open elements.
+func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
+	return p.indexOfElementInScope(s, matchTags...) != -1
+}
+
+// clearStackToContext pops elements off the stack of open elements until a
+// scope-defined element is found.
+func (p *parser) clearStackToContext(s scope) {
+	for i := len(p.oe) - 1; i >= 0; i-- {
+		tagAtom := p.oe[i].DataAtom
+		switch s {
+		case tableScope:
+			if tagAtom == a.Html || tagAtom == a.Table {
+				p.oe = p.oe[:i+1]
+				return
+			}
+		case tableRowScope:
+			if tagAtom == a.Html || tagAtom == a.Tr {
+				p.oe = p.oe[:i+1]
+				return
+			}
+		case tableBodyScope:
+			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
+				p.oe = p.oe[:i+1]
+				return
+			}
+		default:
+			panic("unreachable")
+		}
+	}
+}
+
+// generateImpliedEndTags pops nodes off the stack of open elements as long as
+// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
+// If exceptions are specified, nodes with that name will not be popped off.
+func (p *parser) generateImpliedEndTags(exceptions ...string) {
+	var i int
+loop:
+	for i = len(p.oe) - 1; i >= 0; i-- {
+		n := p.oe[i]
+		if n.Type == ElementNode {
+			switch n.DataAtom {
+			case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
+				for _, except := range exceptions {
+					if n.Data == except {
+						break loop
+					}
+				}
+				continue
+			}
+		}
+		break
+	}
+
+	p.oe = p.oe[:i+1]
+}
+
+// addChild adds a child node n to the top element, and pushes n onto the stack
+// of open elements if it is an element node.
+func (p *parser) addChild(n *Node) {
+	if p.shouldFosterParent() {
+		p.fosterParent(n)
+	} else {
+		p.top().AppendChild(n)
+	}
+
+	if n.Type == ElementNode {
+		p.oe = append(p.oe, n)
+	}
+}
+
+// shouldFosterParent returns whether the next node to be added should be
+// foster parented.
+func (p *parser) shouldFosterParent() bool {
+	if p.fosterParenting {
+		switch p.top().DataAtom {
+		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			return true
+		}
+	}
+	return false
+}
+
+// fosterParent adds a child node according to the foster parenting rules.
+// Section 12.2.5.3, "foster parenting".
+func (p *parser) fosterParent(n *Node) {
+	var table, parent, prev *Node
+	var i int
+	for i = len(p.oe) - 1; i >= 0; i-- {
+		if p.oe[i].DataAtom == a.Table {
+			table = p.oe[i]
+			break
+		}
+	}
+
+	if table == nil {
+		// The foster parent is the html element.
+		parent = p.oe[0]
+	} else {
+		parent = table.Parent
+	}
+	if parent == nil {
+		parent = p.oe[i-1]
+	}
+
+	if table != nil {
+		prev = table.PrevSibling
+	} else {
+		prev = parent.LastChild
+	}
+	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
+		prev.Data += n.Data
+		return
+	}
+
+	parent.InsertBefore(n, table)
+}
+
+// addText adds text to the preceding node if it is a text node, or else it
+// calls addChild with a new text node.
+func (p *parser) addText(text string) {
+	if text == "" {
+		return
+	}
+
+	if p.shouldFosterParent() {
+		p.fosterParent(&Node{
+			Type: TextNode,
+			Data: text,
+		})
+		return
+	}
+
+	t := p.top()
+	if n := t.LastChild; n != nil && n.Type == TextNode {
+		n.Data += text
+		return
+	}
+	p.addChild(&Node{
+		Type: TextNode,
+		Data: text,
+	})
+}
+
+// addElement adds a child element based on the current token.
+func (p *parser) addElement() {
+	p.addChild(&Node{
+		Type:     ElementNode,
+		DataAtom: p.tok.DataAtom,
+		Data:     p.tok.Data,
+		Attr:     p.tok.Attr,
+	})
+}
+
+// Section 12.2.3.3.
+func (p *parser) addFormattingElement() {
+	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
+	p.addElement()
+
+	// Implement the Noah's Ark clause, but with three per family instead of two.
+	identicalElements := 0
+findIdenticalElements:
+	for i := len(p.afe) - 1; i >= 0; i-- {
+		n := p.afe[i]
+		if n.Type == scopeMarkerNode {
+			break
+		}
+		if n.Type != ElementNode {
+			continue
+		}
+		if n.Namespace != "" {
+			continue
+		}
+		if n.DataAtom != tagAtom {
+			continue
+		}
+		if len(n.Attr) != len(attr) {
+			continue
+		}
+	compareAttributes:
+		for _, t0 := range n.Attr {
+			for _, t1 := range attr {
+				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
+					// Found a match for this attribute, continue with the next attribute.
+					continue compareAttributes
+				}
+			}
+			// If we get here, there is no attribute that matches a.
+			// Therefore the element is not identical to the new one.
+			continue findIdenticalElements
+		}
+
+		identicalElements++
+		if identicalElements >= 3 {
+			p.afe.remove(n)
+		}
+	}
+
+	p.afe = append(p.afe, p.top())
+}
+
+// Section 12.2.3.3.
+func (p *parser) clearActiveFormattingElements() {
+	for {
+		n := p.afe.pop()
+		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
+			return
+		}
+	}
+}
+
+// Section 12.2.3.3.
+func (p *parser) reconstructActiveFormattingElements() {
+	n := p.afe.top()
+	if n == nil {
+		return
+	}
+	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
+		return
+	}
+	i := len(p.afe) - 1
+	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
+		if i == 0 {
+			i = -1
+			break
+		}
+		i--
+		n = p.afe[i]
+	}
+	for {
+		i++
+		clone := p.afe[i].clone()
+		p.addChild(clone)
+		p.afe[i] = clone
+		if i == len(p.afe)-1 {
+			break
+		}
+	}
+}
+
+// Section 12.2.4.
+func (p *parser) acknowledgeSelfClosingTag() {
+	p.hasSelfClosingToken = false
+}
+
+// An insertion mode (section 12.2.3.1) is the state transition function from
+// a particular state in the HTML5 parser's state machine. It updates the
+// parser's fields depending on parser.tok (where ErrorToken means EOF).
+// It returns whether the token was consumed.
+type insertionMode func(*parser) bool
+
+// setOriginalIM sets the insertion mode to return to after completing a text or
+// inTableText insertion mode.
+// Section 12.2.3.1, "using the rules for".
+func (p *parser) setOriginalIM() {
+	if p.originalIM != nil {
+		panic("html: bad parser state: originalIM was set twice")
+	}
+	p.originalIM = p.im
+}
+
+// Section 12.2.3.1, "reset the insertion mode".
+func (p *parser) resetInsertionMode() {
+	for i := len(p.oe) - 1; i >= 0; i-- {
+		n := p.oe[i]
+		if i == 0 && p.context != nil {
+			n = p.context
+		}
+
+		switch n.DataAtom {
+		case a.Select:
+			p.im = inSelectIM
+		case a.Td, a.Th:
+			p.im = inCellIM
+		case a.Tr:
+			p.im = inRowIM
+		case a.Tbody, a.Thead, a.Tfoot:
+			p.im = inTableBodyIM
+		case a.Caption:
+			p.im = inCaptionIM
+		case a.Colgroup:
+			p.im = inColumnGroupIM
+		case a.Table:
+			p.im = inTableIM
+		case a.Head:
+			p.im = inBodyIM
+		case a.Body:
+			p.im = inBodyIM
+		case a.Frameset:
+			p.im = inFramesetIM
+		case a.Html:
+			p.im = beforeHeadIM
+		default:
+			continue
+		}
+		return
+	}
+	p.im = inBodyIM
+}
+
+const whitespace = " \t\r\n\f"
+
+// Section 12.2.5.4.1.
+func initialIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
+	case CommentToken:
+		p.doc.AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		n, quirks := parseDoctype(p.tok.Data)
+		p.doc.AppendChild(n)
+		p.quirks = quirks
+		p.im = beforeHTMLIM
+		return true
+	}
+	p.quirks = true
+	p.im = beforeHTMLIM
+	return false
+}
+
+// Section 12.2.5.4.2.
+func beforeHTMLIM(p *parser) bool {
+	switch p.tok.Type {
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	case TextToken:
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
+	case StartTagToken:
+		if p.tok.DataAtom == a.Html {
+			p.addElement()
+			p.im = beforeHeadIM
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Head, a.Body, a.Html, a.Br:
+			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
+			return false
+		default:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.doc.AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	}
+	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
+	return false
+}
+
+// Section 12.2.5.4.3.
+func beforeHeadIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Head:
+			p.addElement()
+			p.head = p.top()
+			p.im = inHeadIM
+			return true
+		case a.Html:
+			return inBodyIM(p)
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Head, a.Body, a.Html, a.Br:
+			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+			return false
+		default:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	}
+
+	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+	return false
+}
+
+// Section 12.2.5.4.4.
+func inHeadIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) < len(p.tok.Data) {
+			// Add the initial whitespace to the current node.
+			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+			if s == "" {
+				return true
+			}
+			p.tok.Data = s
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			return true
+		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+			return true
+		case a.Head:
+			// Ignore the token.
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Head:
+			n := p.oe.pop()
+			if n.DataAtom != a.Head {
+				panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
+			}
+			p.im = afterHeadIM
+			return true
+		case a.Body, a.Html, a.Br:
+			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+			return false
+		default:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	}
+
+	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+	return false
+}
+
+// Section 12.2.5.4.6.
+func afterHeadIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) < len(p.tok.Data) {
+			// Add the initial whitespace to the current node.
+			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+			if s == "" {
+				return true
+			}
+			p.tok.Data = s
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Body:
+			p.addElement()
+			p.framesetOK = false
+			p.im = inBodyIM
+			return true
+		case a.Frameset:
+			p.addElement()
+			p.im = inFramesetIM
+			return true
+		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
+			p.oe = append(p.oe, p.head)
+			defer p.oe.remove(p.head)
+			return inHeadIM(p)
+		case a.Head:
+			// Ignore the token.
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Body, a.Html, a.Br:
+			// Drop down to creating an implied <body> tag.
+		default:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	}
+
+	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
+	p.framesetOK = true
+	return false
+}
+
+// copyAttributes copies attributes of src not found on dst to dst.
+func copyAttributes(dst *Node, src Token) {
+	if len(src.Attr) == 0 {
+		return
+	}
+	attr := map[string]string{}
+	for _, t := range dst.Attr {
+		attr[t.Key] = t.Val
+	}
+	for _, t := range src.Attr {
+		if _, ok := attr[t.Key]; !ok {
+			dst.Attr = append(dst.Attr, t)
+			attr[t.Key] = t.Val
+		}
+	}
+}
+
+// Section 12.2.5.4.7.
+func inBodyIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		d := p.tok.Data
+		switch n := p.oe.top(); n.DataAtom {
+		case a.Pre, a.Listing:
+			if n.FirstChild == nil {
+				// Ignore a newline at the start of a <pre> block.
+				if d != "" && d[0] == '\r' {
+					d = d[1:]
+				}
+				if d != "" && d[0] == '\n' {
+					d = d[1:]
+				}
+			}
+		}
+		d = strings.Replace(d, "\x00", "", -1)
+		if d == "" {
+			return true
+		}
+		p.reconstructActiveFormattingElements()
+		p.addText(d)
+		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
+			// There were non-whitespace characters inserted.
+			p.framesetOK = false
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			copyAttributes(p.oe[0], p.tok)
+		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
+			return inHeadIM(p)
+		case a.Body:
+			if len(p.oe) >= 2 {
+				body := p.oe[1]
+				if body.Type == ElementNode && body.DataAtom == a.Body {
+					p.framesetOK = false
+					copyAttributes(body, p.tok)
+				}
+			}
+		case a.Frameset:
+			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
+				// Ignore the token.
+				return true
+			}
+			body := p.oe[1]
+			if body.Parent != nil {
+				body.Parent.RemoveChild(body)
+			}
+			p.oe = p.oe[:1]
+			p.addElement()
+			p.im = inFramesetIM
+			return true
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+			p.popUntil(buttonScope, a.P)
+			switch n := p.top(); n.DataAtom {
+			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+				p.oe.pop()
+			}
+			p.addElement()
+		case a.Pre, a.Listing:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+			// The newline, if any, will be dealt with by the TextToken case.
+			p.framesetOK = false
+		case a.Form:
+			if p.form == nil {
+				p.popUntil(buttonScope, a.P)
+				p.addElement()
+				p.form = p.top()
+			}
+		case a.Li:
+			p.framesetOK = false
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				node := p.oe[i]
+				switch node.DataAtom {
+				case a.Li:
+					p.oe = p.oe[:i]
+				case a.Address, a.Div, a.P:
+					continue
+				default:
+					if !isSpecialElement(node) {
+						continue
+					}
+				}
+				break
+			}
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Dd, a.Dt:
+			p.framesetOK = false
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				node := p.oe[i]
+				switch node.DataAtom {
+				case a.Dd, a.Dt:
+					p.oe = p.oe[:i]
+				case a.Address, a.Div, a.P:
+					continue
+				default:
+					if !isSpecialElement(node) {
+						continue
+					}
+				}
+				break
+			}
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Plaintext:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Button:
+			p.popUntil(defaultScope, a.Button)
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+		case a.A:
+			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
+				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
+					p.inBodyEndTagFormatting(a.A)
+					p.oe.remove(n)
+					p.afe.remove(n)
+					break
+				}
+			}
+			p.reconstructActiveFormattingElements()
+			p.addFormattingElement()
+		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+			p.reconstructActiveFormattingElements()
+			p.addFormattingElement()
+		case a.Nobr:
+			p.reconstructActiveFormattingElements()
+			if p.elementInScope(defaultScope, a.Nobr) {
+				p.inBodyEndTagFormatting(a.Nobr)
+				p.reconstructActiveFormattingElements()
+			}
+			p.addFormattingElement()
+		case a.Applet, a.Marquee, a.Object:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.afe = append(p.afe, &scopeMarker)
+			p.framesetOK = false
+		case a.Table:
+			if !p.quirks {
+				p.popUntil(buttonScope, a.P)
+			}
+			p.addElement()
+			p.framesetOK = false
+			p.im = inTableIM
+			return true
+		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			if p.tok.DataAtom == a.Input {
+				for _, t := range p.tok.Attr {
+					if t.Key == "type" {
+						if strings.ToLower(t.Val) == "hidden" {
+							// Skip setting framesetOK = false
+							return true
+						}
+					}
+				}
+			}
+			p.framesetOK = false
+		case a.Param, a.Source, a.Track:
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+		case a.Hr:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			p.framesetOK = false
+		case a.Image:
+			p.tok.DataAtom = a.Img
+			p.tok.Data = a.Img.String()
+			return false
+		case a.Isindex:
+			if p.form != nil {
+				// Ignore the token.
+				return true
+			}
+			action := ""
+			prompt := "This is a searchable index. Enter search keywords: "
+			attr := []Attribute{{Key: "name", Val: "isindex"}}
+			for _, t := range p.tok.Attr {
+				switch t.Key {
+				case "action":
+					action = t.Val
+				case "name":
+					// Ignore the attribute.
+				case "prompt":
+					prompt = t.Val
+				default:
+					attr = append(attr, t)
+				}
+			}
+			p.acknowledgeSelfClosingTag()
+			p.popUntil(buttonScope, a.P)
+			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
+			if action != "" {
+				p.form.Attr = []Attribute{{Key: "action", Val: action}}
+			}
+			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
+			p.addText(prompt)
+			p.addChild(&Node{
+				Type:     ElementNode,
+				DataAtom: a.Input,
+				Data:     a.Input.String(),
+				Attr:     attr,
+			})
+			p.oe.pop()
+			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
+			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
+		case a.Textarea:
+			p.addElement()
+			p.setOriginalIM()
+			p.framesetOK = false
+			p.im = textIM
+		case a.Xmp:
+			p.popUntil(buttonScope, a.P)
+			p.reconstructActiveFormattingElements()
+			p.framesetOK = false
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Iframe:
+			p.framesetOK = false
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Noembed, a.Noscript:
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Select:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+			p.im = inSelectIM
+			return true
+		case a.Optgroup, a.Option:
+			if p.top().DataAtom == a.Option {
+				p.oe.pop()
+			}
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+		case a.Rp, a.Rt:
+			if p.elementInScope(defaultScope, a.Ruby) {
+				p.generateImpliedEndTags()
+			}
+			p.addElement()
+		case a.Math, a.Svg:
+			p.reconstructActiveFormattingElements()
+			if p.tok.DataAtom == a.Math {
+				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
+			} else {
+				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
+			}
+			adjustForeignAttributes(p.tok.Attr)
+			p.addElement()
+			p.top().Namespace = p.tok.Data
+			if p.hasSelfClosingToken {
+				p.oe.pop()
+				p.acknowledgeSelfClosingTag()
+			}
+			return true
+		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+			// Ignore the token.
+		default:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Body:
+			if p.elementInScope(defaultScope, a.Body) {
+				p.im = afterBodyIM
+			}
+		case a.Html:
+			if p.elementInScope(defaultScope, a.Body) {
+				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
+				return false
+			}
+			return true
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+			p.popUntil(defaultScope, p.tok.DataAtom)
+		case a.Form:
+			node := p.form
+			p.form = nil
+			i := p.indexOfElementInScope(defaultScope, a.Form)
+			if node == nil || i == -1 || p.oe[i] != node {
+				// Ignore the token.
+				return true
+			}
+			p.generateImpliedEndTags()
+			p.oe.remove(node)
+		case a.P:
+			if !p.elementInScope(buttonScope, a.P) {
+				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
+			}
+			p.popUntil(buttonScope, a.P)
+		case a.Li:
+			p.popUntil(listItemScope, a.Li)
+		case a.Dd, a.Dt:
+			p.popUntil(defaultScope, p.tok.DataAtom)
+		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
+		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+			p.inBodyEndTagFormatting(p.tok.DataAtom)
+		case a.Applet, a.Marquee, a.Object:
+			if p.popUntil(defaultScope, p.tok.DataAtom) {
+				p.clearActiveFormattingElements()
+			}
+		case a.Br:
+			p.tok.Type = StartTagToken
+			return false
+		default:
+			p.inBodyEndTagOther(p.tok.DataAtom)
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	}
+
+	return true
+}
+
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
+	// This is the "adoption agency" algorithm, described at
+	// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
+
+	// TODO: this is a fairly literal line-by-line translation of that algorithm.
+	// Once the code successfully parses the comprehensive test suite, we should
+	// refactor this code to be more idiomatic.
+
+	// Steps 1-3. The outer loop.
+	for i := 0; i < 8; i++ {
+		// Step 4. Find the formatting element.
+		var formattingElement *Node
+		for j := len(p.afe) - 1; j >= 0; j-- {
+			if p.afe[j].Type == scopeMarkerNode {
+				break
+			}
+			if p.afe[j].DataAtom == tagAtom {
+				formattingElement = p.afe[j]
+				break
+			}
+		}
+		if formattingElement == nil {
+			p.inBodyEndTagOther(tagAtom)
+			return
+		}
+		feIndex := p.oe.index(formattingElement)
+		if feIndex == -1 {
+			p.afe.remove(formattingElement)
+			return
+		}
+		if !p.elementInScope(defaultScope, tagAtom) {
+			// Ignore the tag.
+			return
+		}
+
+		// Steps 5-6. Find the furthest block.
+		var furthestBlock *Node
+		for _, e := range p.oe[feIndex:] {
+			if isSpecialElement(e) {
+				furthestBlock = e
+				break
+			}
+		}
+		if furthestBlock == nil {
+			e := p.oe.pop()
+			for e != formattingElement {
+				e = p.oe.pop()
+			}
+			p.afe.remove(e)
+			return
+		}
+
+		// Steps 7-8. Find the common ancestor and bookmark node.
+		commonAncestor := p.oe[feIndex-1]
+		bookmark := p.afe.index(formattingElement)
+
+		// Step 9. The inner loop. Find the lastNode to reparent.
+		lastNode := furthestBlock
+		node := furthestBlock
+		x := p.oe.index(node)
+		// Steps 9.1-9.3.
+		for j := 0; j < 3; j++ {
+			// Step 9.4.
+			x--
+			node = p.oe[x]
+			// Step 9.5.
+			if p.afe.index(node) == -1 {
+				p.oe.remove(node)
+				continue
+			}
+			// Step 9.6.
+			if node == formattingElement {
+				break
+			}
+			// Step 9.7.
+			clone := node.clone()
+			p.afe[p.afe.index(node)] = clone
+			p.oe[p.oe.index(node)] = clone
+			node = clone
+			// Step 9.8.
+			if lastNode == furthestBlock {
+				bookmark = p.afe.index(node) + 1
+			}
+			// Step 9.9.
+			if lastNode.Parent != nil {
+				lastNode.Parent.RemoveChild(lastNode)
+			}
+			node.AppendChild(lastNode)
+			// Step 9.10.
+			lastNode = node
+		}
+
+		// Step 10. Reparent lastNode to the common ancestor,
+		// or for misnested table nodes, to the foster parent.
+		if lastNode.Parent != nil {
+			lastNode.Parent.RemoveChild(lastNode)
+		}
+		switch commonAncestor.DataAtom {
+		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			p.fosterParent(lastNode)
+		default:
+			commonAncestor.AppendChild(lastNode)
+		}
+
+		// Steps 11-13. Reparent nodes from the furthest block's children
+		// to a clone of the formatting element.
+		clone := formattingElement.clone()
+		reparentChildren(clone, furthestBlock)
+		furthestBlock.AppendChild(clone)
+
+		// Step 14. Fix up the list of active formatting elements.
+		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
+			// Move the bookmark with the rest of the list.
+			bookmark--
+		}
+		p.afe.remove(formattingElement)
+		p.afe.insert(bookmark, clone)
+
+		// Step 15. Fix up the stack of open elements.
+		p.oe.remove(formattingElement)
+		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
+	}
+}
+
+// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
+	for i := len(p.oe) - 1; i >= 0; i-- {
+		if p.oe[i].DataAtom == tagAtom {
+			p.oe = p.oe[:i]
+			break
+		}
+		if isSpecialElement(p.oe[i]) {
+			break
+		}
+	}
+}
+
+// Section 12.2.5.4.8.
+func textIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		p.oe.pop()
+	case TextToken:
+		d := p.tok.Data
+		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
+			// Ignore a newline at the start of a <textarea> block.
+			if d != "" && d[0] == '\r' {
+				d = d[1:]
+			}
+			if d != "" && d[0] == '\n' {
+				d = d[1:]
+			}
+		}
+		if d == "" {
+			return true
+		}
+		p.addText(d)
+		return true
+	case EndTagToken:
+		p.oe.pop()
+	}
+	p.im = p.originalIM
+	p.originalIM = nil
+	return p.tok.Type == EndTagToken
+}
+
+// Section 12.2.5.4.9.
+func inTableIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		// Stop parsing.
+		return true
+	case TextToken:
+		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
+		switch p.oe.top().DataAtom {
+		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			if strings.Trim(p.tok.Data, whitespace) == "" {
+				p.addText(p.tok.Data)
+				return true
+			}
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Caption:
+			p.clearStackToContext(tableScope)
+			p.afe = append(p.afe, &scopeMarker)
+			p.addElement()
+			p.im = inCaptionIM
+			return true
+		case a.Colgroup:
+			p.clearStackToContext(tableScope)
+			p.addElement()
+			p.im = inColumnGroupIM
+			return true
+		case a.Col:
+			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
+			return false
+		case a.Tbody, a.Tfoot, a.Thead:
+			p.clearStackToContext(tableScope)
+			p.addElement()
+			p.im = inTableBodyIM
+			return true
+		case a.Td, a.Th, a.Tr:
+			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
+			return false
+		case a.Table:
+			if p.popUntil(tableScope, a.Table) {
+				p.resetInsertionMode()
+				return false
+			}
+			// Ignore the token.
+			return true
+		case a.Style, a.Script:
+			return inHeadIM(p)
+		case a.Input:
+			for _, t := range p.tok.Attr {
+				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
+					p.addElement()
+					p.oe.pop()
+					return true
+				}
+			}
+			// Otherwise drop down to the default action.
+		case a.Form:
+			if p.form != nil {
+				// Ignore the token.
+				return true
+			}
+			p.addElement()
+			p.form = p.oe.pop()
+		case a.Select:
+			p.reconstructActiveFormattingElements()
+			switch p.top().DataAtom {
+			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+				p.fosterParenting = true
+			}
+			p.addElement()
+			p.fosterParenting = false
+			p.framesetOK = false
+			p.im = inSelectInTableIM
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Table:
+			if p.popUntil(tableScope, a.Table) {
+				p.resetInsertionMode()
+				return true
+			}
+			// Ignore the token.
+			return true
+		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	}
+
+	p.fosterParenting = true
+	defer func() { p.fosterParenting = false }()
+
+	return inBodyIM(p)
+}
+
+// Section 12.2.5.4.11.
+func inCaptionIM(p *parser) bool {
+	switch p.tok.Type {
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
+			if p.popUntil(tableScope, a.Caption) {
+				p.clearActiveFormattingElements()
+				p.im = inTableIM
+				return false
+			} else {
+				// Ignore the token.
+				return true
+			}
+		case a.Select:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+			p.im = inSelectInTableIM
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Caption:
+			if p.popUntil(tableScope, a.Caption) {
+				p.clearActiveFormattingElements()
+				p.im = inTableIM
+			}
+			return true
+		case a.Table:
+			if p.popUntil(tableScope, a.Caption) {
+				p.clearActiveFormattingElements()
+				p.im = inTableIM
+				return false
+			} else {
+				// Ignore the token.
+				return true
+			}
+		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+			// Ignore the token.
+			return true
+		}
+	}
+	return inBodyIM(p)
+}
+
+// Section 12.2.5.4.12.
+func inColumnGroupIM(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) < len(p.tok.Data) {
+			// Add the initial whitespace to the current node.
+			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+			if s == "" {
+				return true
+			}
+			p.tok.Data = s
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Col:
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Colgroup:
+			if p.oe.top().DataAtom != a.Html {
+				p.oe.pop()
+				p.im = inTableIM
+			}
+			return true
+		case a.Col:
+			// Ignore the token.
+			return true
+		}
+	}
+	if p.oe.top().DataAtom != a.Html {
+		p.oe.pop()
+		p.im = inTableIM
+		return false
+	}
+	return true
+}
+
+// Section 12.2.5.4.13.
+func inTableBodyIM(p *parser) bool {
+	switch p.tok.Type {
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Tr:
+			p.clearStackToContext(tableBodyScope)
+			p.addElement()
+			p.im = inRowIM
+			return true
+		case a.Td, a.Th:
+			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
+			return false
+		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
+			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
+				p.im = inTableIM
+				return false
+			}
+			// Ignore the token.
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Tbody, a.Tfoot, a.Thead:
+			if p.elementInScope(tableScope, p.tok.DataAtom) {
+				p.clearStackToContext(tableBodyScope)
+				p.oe.pop()
+				p.im = inTableIM
+			}
+			return true
+		case a.Table:
+			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
+				p.im = inTableIM
+				return false
+			}
+			// Ignore the token.
+			return true
+		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
+			// Ignore the token.
+			return true
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	}
+
+	return inTableIM(p)
+}
+
+// Section 12.2.5.4.14.
+func inRowIM(p *parser) bool {
+	switch p.tok.Type {
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Td, a.Th:
+			p.clearStackToContext(tableRowScope)
+			p.addElement()
+			p.afe = append(p.afe, &scopeMarker)
+			p.im = inCellIM
+			return true
+		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			if p.popUntil(tableScope, a.Tr) {
+				p.im = inTableBodyIM
+				return false
+			}
+			// Ignore the token.
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Tr:
+			if p.popUntil(tableScope, a.Tr) {
+				p.im = inTableBodyIM
+				return true
+			}
+			// Ignore the token.
+			return true
+		case a.Table:
+			if p.popUntil(tableScope, a.Tr) {
+				p.im = inTableBodyIM
+				return false
+			}
+			// Ignore the token.
+			return true
+		case a.Tbody, a.Tfoot, a.Thead:
+			if p.elementInScope(tableScope, p.tok.DataAtom) {
+				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
+				return false
+			}
+			// Ignore the token.
+			return true
+		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
+			// Ignore the token.
+			return true
+		}
+	}
+
+	return inTableIM(p)
+}
+
+// Section 12.2.5.4.15.
+func inCellIM(p *parser) bool {
+	switch p.tok.Type {
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+			if p.popUntil(tableScope, a.Td, a.Th) {
+				// Close the cell and reprocess.
+				p.clearActiveFormattingElements()
+				p.im = inRowIM
+				return false
+			}
+			// Ignore the token.
+			return true
+		case a.Select:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+			p.im = inSelectInTableIM
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Td, a.Th:
+			if !p.popUntil(tableScope, p.tok.DataAtom) {
+				// Ignore the token.
+				return true
+			}
+			p.clearActiveFormattingElements()
+			p.im = inRowIM
+			return true
+		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
+			// Ignore the token.
+			return true
+		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			if !p.elementInScope(tableScope, p.tok.DataAtom) {
+				// Ignore the token.
+				return true
+			}
+			// Close the cell and reprocess.
+			p.popUntil(tableScope, a.Td, a.Th)
+			p.clearActiveFormattingElements()
+			p.im = inRowIM
+			return false
+		}
+	}
+	return inBodyIM(p)
+}
+
+// Section 12.2.5.4.16.
+func inSelectIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		// Stop parsing.
+		return true
+	case TextToken:
+		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Option:
+			if p.top().DataAtom == a.Option {
+				p.oe.pop()
+			}
+			p.addElement()
+		case a.Optgroup:
+			if p.top().DataAtom == a.Option {
+				p.oe.pop()
+			}
+			if p.top().DataAtom == a.Optgroup {
+				p.oe.pop()
+			}
+			p.addElement()
+		case a.Select:
+			p.tok.Type = EndTagToken
+			return false
+		case a.Input, a.Keygen, a.Textarea:
+			if p.elementInScope(selectScope, a.Select) {
+				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
+				return false
+			}
+			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
+			p.tokenizer.NextIsNotRawText()
+			// Ignore the token.
+			return true
+		case a.Script:
+			return inHeadIM(p)
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Option:
+			if p.top().DataAtom == a.Option {
+				p.oe.pop()
+			}
+		case a.Optgroup:
+			i := len(p.oe) - 1
+			if p.oe[i].DataAtom == a.Option {
+				i--
+			}
+			if p.oe[i].DataAtom == a.Optgroup {
+				p.oe = p.oe[:i]
+			}
+		case a.Select:
+			if p.popUntil(selectScope, a.Select) {
+				p.resetInsertionMode()
+			}
+		}
+	case CommentToken:
+		p.doc.AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	}
+
+	return true
+}
+
+// Section 12.2.5.4.17.
+func inSelectInTableIM(p *parser) bool {
+	switch p.tok.Type {
+	case StartTagToken, EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
+			if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
+				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
+				return false
+			} else {
+				// Ignore the token.
+				return true
+			}
+		}
+	}
+	return inSelectIM(p)
+}
+
+// Section 12.2.5.4.18.
+func afterBodyIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		// Stop parsing.
+		return true
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) == 0 {
+			// It was all whitespace.
+			return inBodyIM(p)
+		}
+	case StartTagToken:
+		if p.tok.DataAtom == a.Html {
+			return inBodyIM(p)
+		}
+	case EndTagToken:
+		if p.tok.DataAtom == a.Html {
+			if !p.fragment {
+				p.im = afterAfterBodyIM
+			}
+			return true
+		}
+	case CommentToken:
+		// The comment is attached to the <html> element.
+		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
+			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
+		}
+		p.oe[0].AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	}
+	p.im = inBodyIM
+	return false
+}
+
+// Section 12.2.5.4.19.
+func inFramesetIM(p *parser) bool {
+	switch p.tok.Type {
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case TextToken:
+		// Ignore all text but whitespace.
+		s := strings.Map(func(c rune) rune {
+			switch c {
+			case ' ', '\t', '\n', '\f', '\r':
+				return c
+			}
+			return -1
+		}, p.tok.Data)
+		if s != "" {
+			p.addText(s)
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Frameset:
+			p.addElement()
+		case a.Frame:
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+		case a.Noframes:
+			return inHeadIM(p)
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Frameset:
+			if p.oe.top().DataAtom != a.Html {
+				p.oe.pop()
+				if p.oe.top().DataAtom != a.Frameset {
+					p.im = afterFramesetIM
+					return true
+				}
+			}
+		}
+	default:
+		// Ignore the token.
+	}
+	return true
+}
+
+// Section 12.2.5.4.20.
+func afterFramesetIM(p *parser) bool {
+	switch p.tok.Type {
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case TextToken:
+		// Ignore all text but whitespace.
+		s := strings.Map(func(c rune) rune {
+			switch c {
+			case ' ', '\t', '\n', '\f', '\r':
+				return c
+			}
+			return -1
+		}, p.tok.Data)
+		if s != "" {
+			p.addText(s)
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Noframes:
+			return inHeadIM(p)
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			p.im = afterAfterFramesetIM
+			return true
+		}
+	default:
+		// Ignore the token.
+	}
+	return true
+}
+
+// Section 12.2.5.4.21.
+func afterAfterBodyIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		// Stop parsing.
+		return true
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) == 0 {
+			// It was all whitespace.
+			return inBodyIM(p)
+		}
+	case StartTagToken:
+		if p.tok.DataAtom == a.Html {
+			return inBodyIM(p)
+		}
+	case CommentToken:
+		p.doc.AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+		return true
+	case DoctypeToken:
+		return inBodyIM(p)
+	}
+	p.im = inBodyIM
+	return false
+}
+
+// Section 12.2.5.4.22.
+func afterAfterFramesetIM(p *parser) bool {
+	switch p.tok.Type {
+	case CommentToken:
+		p.doc.AppendChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case TextToken:
+		// Ignore all text but whitespace.
+		s := strings.Map(func(c rune) rune {
+			switch c {
+			case ' ', '\t', '\n', '\f', '\r':
+				return c
+			}
+			return -1
+		}, p.tok.Data)
+		if s != "" {
+			p.tok.Data = s
+			return inBodyIM(p)
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Noframes:
+			return inHeadIM(p)
+		}
+	case DoctypeToken:
+		return inBodyIM(p)
+	default:
+		// Ignore the token.
+	}
+	return true
+}
+
+const whitespaceOrNUL = whitespace + "\x00"
+
+// Section 12.2.5.5.
+func parseForeignContent(p *parser) bool {
+	switch p.tok.Type {
+	case TextToken:
+		if p.framesetOK {
+			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
+		}
+		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
+		p.addText(p.tok.Data)
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case StartTagToken:
+		b := breakout[p.tok.Data]
+		if p.tok.DataAtom == a.Font {
+		loop:
+			for _, attr := range p.tok.Attr {
+				switch attr.Key {
+				case "color", "face", "size":
+					b = true
+					break loop
+				}
+			}
+		}
+		if b {
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				n := p.oe[i]
+				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+					p.oe = p.oe[:i+1]
+					break
+				}
+			}
+			return false
+		}
+		switch p.top().Namespace {
+		case "math":
+			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
+		case "svg":
+			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
+			// SVG wants e.g. "foreignObject" with a capital second "O".
+			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
+				p.tok.DataAtom = a.Lookup([]byte(x))
+				p.tok.Data = x
+			}
+			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
+		default:
+			panic("html: bad parser state: unexpected namespace")
+		}
+		adjustForeignAttributes(p.tok.Attr)
+		namespace := p.top().Namespace
+		p.addElement()
+		p.top().Namespace = namespace
+		if namespace != "" {
+			// Don't let the tokenizer go into raw text mode in foreign content
+			// (e.g. in an SVG <title> tag).
+			p.tokenizer.NextIsNotRawText()
+		}
+		if p.hasSelfClosingToken {
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+		}
+	case EndTagToken:
+		for i := len(p.oe) - 1; i >= 0; i-- {
+			if p.oe[i].Namespace == "" {
+				return p.im(p)
+			}
+			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
+				p.oe = p.oe[:i]
+				break
+			}
+		}
+		return true
+	default:
+		// Ignore the token.
+	}
+	return true
+}
+
+// Section 12.2.5.
+func (p *parser) inForeignContent() bool {
+	if len(p.oe) == 0 {
+		return false
+	}
+	n := p.oe[len(p.oe)-1]
+	if n.Namespace == "" {
+		return false
+	}
+	if mathMLTextIntegrationPoint(n) {
+		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
+			return false
+		}
+		if p.tok.Type == TextToken {
+			return false
+		}
+	}
+	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
+		return false
+	}
+	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
+		return false
+	}
+	if p.tok.Type == ErrorToken {
+		return false
+	}
+	return true
+}
+
+// parseImpliedToken parses a token as though it had appeared in the parser's
+// input.
+func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
+	realToken, selfClosing := p.tok, p.hasSelfClosingToken
+	p.tok = Token{
+		Type:     t,
+		DataAtom: dataAtom,
+		Data:     data,
+	}
+	p.hasSelfClosingToken = false
+	p.parseCurrentToken()
+	p.tok, p.hasSelfClosingToken = realToken, selfClosing
+}
+
+// parseCurrentToken runs the current token through the parsing routines
+// until it is consumed.
+func (p *parser) parseCurrentToken() {
+	if p.tok.Type == SelfClosingTagToken {
+		p.hasSelfClosingToken = true
+		p.tok.Type = StartTagToken
+	}
+
+	consumed := false
+	for !consumed {
+		if p.inForeignContent() {
+			consumed = parseForeignContent(p)
+		} else {
+			consumed = p.im(p)
+		}
+	}
+
+	if p.hasSelfClosingToken {
+		// This is a parse error, but ignore it.
+		p.hasSelfClosingToken = false
+	}
+}
+
+func (p *parser) parse() error {
+	// Iterate until EOF. Any other error will cause an early return.
+	var err error
+	for err != io.EOF {
+		// CDATA sections are allowed only in foreign content.
+		n := p.oe.top()
+		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
+		// Read and parse the next token.
+		p.tokenizer.Next()
+		p.tok = p.tokenizer.Token()
+		if p.tok.Type == ErrorToken {
+			err = p.tokenizer.Err()
+			if err != nil && err != io.EOF {
+				return err
+			}
+		}
+		p.parseCurrentToken()
+	}
+	return nil
+}
+
+// Parse returns the parse tree for the HTML from the given Reader.
+// The input is assumed to be UTF-8 encoded.
+func Parse(r io.Reader) (*Node, error) {
+	p := &parser{
+		tokenizer: NewTokenizer(r),
+		doc: &Node{
+			Type: DocumentNode,
+		},
+		scripting:  true,
+		framesetOK: true,
+		im:         initialIM,
+	}
+	err := p.parse()
+	if err != nil {
+		return nil, err
+	}
+	return p.doc, nil
+}
+
+// ParseFragment parses a fragment of HTML and returns the nodes that were
+// found. If the fragment is the InnerHTML for an existing element, pass that
+// element in context.
+func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+	contextTag := ""
+	if context != nil {
+		if context.Type != ElementNode {
+			return nil, errors.New("html: ParseFragment of non-element Node")
+		}
+		// The next check isn't just context.DataAtom.String() == context.Data because
+		// it is valid to pass an element whose tag isn't a known atom. For example,
+		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
+		if context.DataAtom != a.Lookup([]byte(context.Data)) {
+			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
+		}
+		contextTag = context.DataAtom.String()
+	}
+	p := &parser{
+		tokenizer: NewTokenizerFragment(r, contextTag),
+		doc: &Node{
+			Type: DocumentNode,
+		},
+		scripting: true,
+		fragment:  true,
+		context:   context,
+	}
+
+	root := &Node{
+		Type:     ElementNode,
+		DataAtom: a.Html,
+		Data:     a.Html.String(),
+	}
+	p.doc.AppendChild(root)
+	p.oe = nodeStack{root}
+	p.resetInsertionMode()
+
+	for n := context; n != nil; n = n.Parent {
+		if n.Type == ElementNode && n.DataAtom == a.Form {
+			p.form = n
+			break
+		}
+	}
+
+	err := p.parse()
+	if err != nil {
+		return nil, err
+	}
+
+	parent := p.doc
+	if context != nil {
+		parent = root
+	}
+
+	var result []*Node
+	for c := parent.FirstChild; c != nil; {
+		next := c.NextSibling
+		parent.RemoveChild(c)
+		result = append(result, c)
+		c = next
+	}
+	return result, nil
+}
diff --git a/html/parse_test.go b/html/parse_test.go
new file mode 100644
index 0000000..253b04d
--- /dev/null
+++ b/html/parse_test.go
@@ -0,0 +1,388 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strings"
+	"testing"
+
+	"code.google.com/p/go.net/html/atom"
+)
+
+// readParseTest reads a single test case from r.
+func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
+	line, err := r.ReadSlice('\n')
+	if err != nil {
+		return "", "", "", err
+	}
+	var b []byte
+
+	// Read the HTML.
+	if string(line) != "#data\n" {
+		return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
+	}
+	for {
+		line, err = r.ReadSlice('\n')
+		if err != nil {
+			return "", "", "", err
+		}
+		if line[0] == '#' {
+			break
+		}
+		b = append(b, line...)
+	}
+	text = strings.TrimSuffix(string(b), "\n")
+	b = b[:0]
+
+	// Skip the error list.
+	if string(line) != "#errors\n" {
+		return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
+	}
+	for {
+		line, err = r.ReadSlice('\n')
+		if err != nil {
+			return "", "", "", err
+		}
+		if line[0] == '#' {
+			break
+		}
+	}
+
+	if string(line) == "#document-fragment\n" {
+		line, err = r.ReadSlice('\n')
+		if err != nil {
+			return "", "", "", err
+		}
+		context = strings.TrimSpace(string(line))
+		line, err = r.ReadSlice('\n')
+		if err != nil {
+			return "", "", "", err
+		}
+	}
+
+	// Read the dump of what the parse tree should be.
+	if string(line) != "#document\n" {
+		return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
+	}
+	inQuote := false
+	for {
+		line, err = r.ReadSlice('\n')
+		if err != nil && err != io.EOF {
+			return "", "", "", err
+		}
+		trimmed := bytes.Trim(line, "| \n")
+		if len(trimmed) > 0 {
+			if line[0] == '|' && trimmed[0] == '"' {
+				inQuote = true
+			}
+			if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
+				inQuote = false
+			}
+		}
+		if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
+			break
+		}
+		b = append(b, line...)
+	}
+	return text, string(b), context, nil
+}
+
+func dumpIndent(w io.Writer, level int) {
+	io.WriteString(w, "| ")
+	for i := 0; i < level; i++ {
+		io.WriteString(w, "  ")
+	}
+}
+
+type sortedAttributes []Attribute
+
+func (a sortedAttributes) Len() int {
+	return len(a)
+}
+
+func (a sortedAttributes) Less(i, j int) bool {
+	if a[i].Namespace != a[j].Namespace {
+		return a[i].Namespace < a[j].Namespace
+	}
+	return a[i].Key < a[j].Key
+}
+
+func (a sortedAttributes) Swap(i, j int) {
+	a[i], a[j] = a[j], a[i]
+}
+
+func dumpLevel(w io.Writer, n *Node, level int) error {
+	dumpIndent(w, level)
+	switch n.Type {
+	case ErrorNode:
+		return errors.New("unexpected ErrorNode")
+	case DocumentNode:
+		return errors.New("unexpected DocumentNode")
+	case ElementNode:
+		if n.Namespace != "" {
+			fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
+		} else {
+			fmt.Fprintf(w, "<%s>", n.Data)
+		}
+		attr := sortedAttributes(n.Attr)
+		sort.Sort(attr)
+		for _, a := range attr {
+			io.WriteString(w, "\n")
+			dumpIndent(w, level+1)
+			if a.Namespace != "" {
+				fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
+			} else {
+				fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
+			}
+		}
+	case TextNode:
+		fmt.Fprintf(w, `"%s"`, n.Data)
+	case CommentNode:
+		fmt.Fprintf(w, "<!-- %s -->", n.Data)
+	case DoctypeNode:
+		fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
+		if n.Attr != nil {
+			var p, s string
+			for _, a := range n.Attr {
+				switch a.Key {
+				case "public":
+					p = a.Val
+				case "system":
+					s = a.Val
+				}
+			}
+			if p != "" || s != "" {
+				fmt.Fprintf(w, ` "%s"`, p)
+				fmt.Fprintf(w, ` "%s"`, s)
+			}
+		}
+		io.WriteString(w, ">")
+	case scopeMarkerNode:
+		return errors.New("unexpected scopeMarkerNode")
+	default:
+		return errors.New("unknown node type")
+	}
+	io.WriteString(w, "\n")
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if err := dumpLevel(w, c, level+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func dump(n *Node) (string, error) {
+	if n == nil || n.FirstChild == nil {
+		return "", nil
+	}
+	var b bytes.Buffer
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if err := dumpLevel(&b, c, 0); err != nil {
+			return "", err
+		}
+	}
+	return b.String(), nil
+}
+
+const testDataDir = "testdata/webkit/"
+
+func TestParser(t *testing.T) {
+	testFiles, err := filepath.Glob(testDataDir + "*.dat")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, tf := range testFiles {
+		f, err := os.Open(tf)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer f.Close()
+		r := bufio.NewReader(f)
+
+		for i := 0; ; i++ {
+			text, want, context, err := readParseTest(r)
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			err = testParseCase(text, want, context)
+
+			if err != nil {
+				t.Errorf("%s test #%d %q, %s", tf, i, text, err)
+			}
+		}
+	}
+}
+
+// testParseCase tests one test case from the test files. If the test does not
+// pass, it returns an error that explains the failure.
+// text is the HTML to be parsed, want is a dump of the correct parse tree,
+// and context is the name of the context node, if any.
+func testParseCase(text, want, context string) (err error) {
+	defer func() {
+		if x := recover(); x != nil {
+			switch e := x.(type) {
+			case error:
+				err = e
+			default:
+				err = fmt.Errorf("%v", e)
+			}
+		}
+	}()
+
+	var doc *Node
+	if context == "" {
+		doc, err = Parse(strings.NewReader(text))
+		if err != nil {
+			return err
+		}
+	} else {
+		contextNode := &Node{
+			Type:     ElementNode,
+			DataAtom: atom.Lookup([]byte(context)),
+			Data:     context,
+		}
+		nodes, err := ParseFragment(strings.NewReader(text), contextNode)
+		if err != nil {
+			return err
+		}
+		doc = &Node{
+			Type: DocumentNode,
+		}
+		for _, n := range nodes {
+			doc.AppendChild(n)
+		}
+	}
+
+	if err := checkTreeConsistency(doc); err != nil {
+		return err
+	}
+
+	got, err := dump(doc)
+	if err != nil {
+		return err
+	}
+	// Compare the parsed tree to the #document section.
+	if got != want {
+		return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
+	}
+
+	if renderTestBlacklist[text] || context != "" {
+		return nil
+	}
+
+	// Check that rendering and re-parsing results in an identical tree.
+	pr, pw := io.Pipe()
+	go func() {
+		pw.CloseWithError(Render(pw, doc))
+	}()
+	doc1, err := Parse(pr)
+	if err != nil {
+		return err
+	}
+	got1, err := dump(doc1)
+	if err != nil {
+		return err
+	}
+	if got != got1 {
+		return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
+	}
+
+	return nil
+}
+
+// Some test input result in parse trees are not 'well-formed' despite
+// following the HTML5 recovery algorithms. Rendering and re-parsing such a
+// tree will not result in an exact clone of that tree. We blacklist such
+// inputs from the render test.
+var renderTestBlacklist = map[string]bool{
+	// The second <a> will be reparented to the first <table>'s parent. This
+	// results in an <a> whose parent is an <a>, which is not 'well-formed'.
+	`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
+	// The same thing with a <p>:
+	`<p><table></p>`: true,
+	// More cases of <a> being reparented:
+	`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
+	`<a><table><a></table><p><a><div><a>`:                                     true,
+	`<a><table><td><a><table></table><a></tr><a></table><a>`:                  true,
+	// A similar reparenting situation involving <nobr>:
+	`<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
+	// A <plaintext> element is reparented, putting it before a table.
+	// A <plaintext> element can't have anything after it in HTML.
+	`<table><plaintext><td>`:                                   true,
+	`<!doctype html><table><plaintext></plaintext>`:            true,
+	`<!doctype html><table><tbody><plaintext></plaintext>`:     true,
+	`<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
+	// A form inside a table inside a form doesn't work either.
+	`<!doctype html><form><table></form><form></table></form>`: true,
+	// A script that ends at EOF may escape its own closing tag when rendered.
+	`<!doctype html><script><!--<script `:          true,
+	`<!doctype html><script><!--<script <`:         true,
+	`<!doctype html><script><!--<script <a`:        true,
+	`<!doctype html><script><!--<script </`:        true,
+	`<!doctype html><script><!--<script </s`:       true,
+	`<!doctype html><script><!--<script </script`:  true,
+	`<!doctype html><script><!--<script </scripta`: true,
+	`<!doctype html><script><!--<script -`:         true,
+	`<!doctype html><script><!--<script -a`:        true,
+	`<!doctype html><script><!--<script -<`:        true,
+	`<!doctype html><script><!--<script --`:        true,
+	`<!doctype html><script><!--<script --a`:       true,
+	`<!doctype html><script><!--<script --<`:       true,
+	`<script><!--<script `:                         true,
+	`<script><!--<script <a`:                       true,
+	`<script><!--<script </script`:                 true,
+	`<script><!--<script </scripta`:                true,
+	`<script><!--<script -`:                        true,
+	`<script><!--<script -a`:                       true,
+	`<script><!--<script --`:                       true,
+	`<script><!--<script --a`:                      true,
+	`<script><!--<script <`:                        true,
+	`<script><!--<script </`:                       true,
+	`<script><!--<script </s`:                      true,
+	// Reconstructing the active formatting elements results in a <plaintext>
+	// element that contains an <a> element.
+	`<!doctype html><p><a><plaintext>b`: true,
+}
+
+func TestNodeConsistency(t *testing.T) {
+	// inconsistentNode is a Node whose DataAtom and Data do not agree.
+	inconsistentNode := &Node{
+		Type:     ElementNode,
+		DataAtom: atom.Frameset,
+		Data:     "table",
+	}
+	_, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
+	if err == nil {
+		t.Errorf("got nil error, want non-nil")
+	}
+}
+
+func BenchmarkParser(b *testing.B) {
+	buf, err := ioutil.ReadFile("testdata/go1.html")
+	if err != nil {
+		b.Fatalf("could not read testdata/go1.html: %v", err)
+	}
+	b.SetBytes(int64(len(buf)))
+	runtime.GC()
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Parse(bytes.NewBuffer(buf))
+	}
+}
diff --git a/html/render.go b/html/render.go
new file mode 100644
index 0000000..d34564f
--- /dev/null
+++ b/html/render.go
@@ -0,0 +1,271 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+)
+
+type writer interface {
+	io.Writer
+	io.ByteWriter
+	WriteString(string) (int, error)
+}
+
+// Render renders the parse tree n to the given writer.
+//
+// Rendering is done on a 'best effort' basis: calling Parse on the output of
+// Render will always result in something similar to the original tree, but it
+// is not necessarily an exact clone unless the original tree was 'well-formed'.
+// 'Well-formed' is not easily specified; the HTML5 specification is
+// complicated.
+//
+// Calling Parse on arbitrary input typically results in a 'well-formed' parse
+// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
+// For example, in a 'well-formed' parse tree, no <a> element is a child of
+// another <a> element: parsing "<a><a>" results in two sibling elements.
+// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
+// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
+// children; the <a> is reparented to the <table>'s parent. However, calling
+// Parse on "<a><table><a>" does not return an error, but the result has an <a>
+// element with an <a> child, and is therefore not 'well-formed'.
+//
+// Programmatically constructed trees are typically also 'well-formed', but it
+// is possible to construct a tree that looks innocuous but, when rendered and
+// re-parsed, results in a different tree. A simple example is that a solitary
+// text node would become a tree containing <html>, <head> and <body> elements.
+// Another example is that the programmatic equivalent of "a<head>b</head>c"
+// becomes "<html><head><head/><body>abc</body></html>".
+func Render(w io.Writer, n *Node) error {
+	if x, ok := w.(writer); ok {
+		return render(x, n)
+	}
+	buf := bufio.NewWriter(w)
+	if err := render(buf, n); err != nil {
+		return err
+	}
+	return buf.Flush()
+}
+
+// plaintextAbort is returned from render1 when a <plaintext> element
+// has been rendered. No more end tags should be rendered after that.
+var plaintextAbort = errors.New("html: internal error (plaintext abort)")
+
+func render(w writer, n *Node) error {
+	err := render1(w, n)
+	if err == plaintextAbort {
+		err = nil
+	}
+	return err
+}
+
+func render1(w writer, n *Node) error {
+	// Render non-element nodes; these are the easy cases.
+	switch n.Type {
+	case ErrorNode:
+		return errors.New("html: cannot render an ErrorNode node")
+	case TextNode:
+		return escape(w, n.Data)
+	case DocumentNode:
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			if err := render1(w, c); err != nil {
+				return err
+			}
+		}
+		return nil
+	case ElementNode:
+		// No-op.
+	case CommentNode:
+		if _, err := w.WriteString("<!--"); err != nil {
+			return err
+		}
+		if _, err := w.WriteString(n.Data); err != nil {
+			return err
+		}
+		if _, err := w.WriteString("-->"); err != nil {
+			return err
+		}
+		return nil
+	case DoctypeNode:
+		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
+			return err
+		}
+		if _, err := w.WriteString(n.Data); err != nil {
+			return err
+		}
+		if n.Attr != nil {
+			var p, s string
+			for _, a := range n.Attr {
+				switch a.Key {
+				case "public":
+					p = a.Val
+				case "system":
+					s = a.Val
+				}
+			}
+			if p != "" {
+				if _, err := w.WriteString(" PUBLIC "); err != nil {
+					return err
+				}
+				if err := writeQuoted(w, p); err != nil {
+					return err
+				}
+				if s != "" {
+					if err := w.WriteByte(' '); err != nil {
+						return err
+					}
+					if err := writeQuoted(w, s); err != nil {
+						return err
+					}
+				}
+			} else if s != "" {
+				if _, err := w.WriteString(" SYSTEM "); err != nil {
+					return err
+				}
+				if err := writeQuoted(w, s); err != nil {
+					return err
+				}
+			}
+		}
+		return w.WriteByte('>')
+	default:
+		return errors.New("html: unknown node type")
+	}
+
+	// Render the <xxx> opening tag.
+	if err := w.WriteByte('<'); err != nil {
+		return err
+	}
+	if _, err := w.WriteString(n.Data); err != nil {
+		return err
+	}
+	for _, a := range n.Attr {
+		if err := w.WriteByte(' '); err != nil {
+			return err
+		}
+		if a.Namespace != "" {
+			if _, err := w.WriteString(a.Namespace); err != nil {
+				return err
+			}
+			if err := w.WriteByte(':'); err != nil {
+				return err
+			}
+		}
+		if _, err := w.WriteString(a.Key); err != nil {
+			return err
+		}
+		if _, err := w.WriteString(`="`); err != nil {
+			return err
+		}
+		if err := escape(w, a.Val); err != nil {
+			return err
+		}
+		if err := w.WriteByte('"'); err != nil {
+			return err
+		}
+	}
+	if voidElements[n.Data] {
+		if n.FirstChild != nil {
+			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
+		}
+		_, err := w.WriteString("/>")
+		return err
+	}
+	if err := w.WriteByte('>'); err != nil {
+		return err
+	}
+
+	// Add initial newline where there is danger of a newline beging ignored.
+	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
+		switch n.Data {
+		case "pre", "listing", "textarea":
+			if err := w.WriteByte('\n'); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Render any child nodes.
+	switch n.Data {
+	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			if c.Type == TextNode {
+				if _, err := w.WriteString(c.Data); err != nil {
+					return err
+				}
+			} else {
+				if err := render1(w, c); err != nil {
+					return err
+				}
+			}
+		}
+		if n.Data == "plaintext" {
+			// Don't render anything else. <plaintext> must be the
+			// last element in the file, with no closing tag.
+			return plaintextAbort
+		}
+	default:
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			if err := render1(w, c); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Render the </xxx> closing tag.
+	if _, err := w.WriteString("</"); err != nil {
+		return err
+	}
+	if _, err := w.WriteString(n.Data); err != nil {
+		return err
+	}
+	return w.WriteByte('>')
+}
+
+// writeQuoted writes s to w surrounded by quotes. Normally it will use double
+// quotes, but if s contains a double quote, it will use single quotes.
+// It is used for writing the identifiers in a doctype declaration.
+// In valid HTML, they can't contain both types of quotes.
+func writeQuoted(w writer, s string) error {
+	var q byte = '"'
+	if strings.Contains(s, `"`) {
+		q = '\''
+	}
+	if err := w.WriteByte(q); err != nil {
+		return err
+	}
+	if _, err := w.WriteString(s); err != nil {
+		return err
+	}
+	if err := w.WriteByte(q); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Section 12.1.2, "Elements", gives this list of void elements. Void elements
+// are those that can't have any contents.
+var voidElements = map[string]bool{
+	"area":    true,
+	"base":    true,
+	"br":      true,
+	"col":     true,
+	"command": true,
+	"embed":   true,
+	"hr":      true,
+	"img":     true,
+	"input":   true,
+	"keygen":  true,
+	"link":    true,
+	"meta":    true,
+	"param":   true,
+	"source":  true,
+	"track":   true,
+	"wbr":     true,
+}
diff --git a/html/render_test.go b/html/render_test.go
new file mode 100644
index 0000000..11da54b
--- /dev/null
+++ b/html/render_test.go
@@ -0,0 +1,156 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestRenderer(t *testing.T) {
+	nodes := [...]*Node{
+		0: {
+			Type: ElementNode,
+			Data: "html",
+		},
+		1: {
+			Type: ElementNode,
+			Data: "head",
+		},
+		2: {
+			Type: ElementNode,
+			Data: "body",
+		},
+		3: {
+			Type: TextNode,
+			Data: "0<1",
+		},
+		4: {
+			Type: ElementNode,
+			Data: "p",
+			Attr: []Attribute{
+				{
+					Key: "id",
+					Val: "A",
+				},
+				{
+					Key: "foo",
+					Val: `abc"def`,
+				},
+			},
+		},
+		5: {
+			Type: TextNode,
+			Data: "2",
+		},
+		6: {
+			Type: ElementNode,
+			Data: "b",
+			Attr: []Attribute{
+				{
+					Key: "empty",
+					Val: "",
+				},
+			},
+		},
+		7: {
+			Type: TextNode,
+			Data: "3",
+		},
+		8: {
+			Type: ElementNode,
+			Data: "i",
+			Attr: []Attribute{
+				{
+					Key: "backslash",
+					Val: `\`,
+				},
+			},
+		},
+		9: {
+			Type: TextNode,
+			Data: "&4",
+		},
+		10: {
+			Type: TextNode,
+			Data: "5",
+		},
+		11: {
+			Type: ElementNode,
+			Data: "blockquote",
+		},
+		12: {
+			Type: ElementNode,
+			Data: "br",
+		},
+		13: {
+			Type: TextNode,
+			Data: "6",
+		},
+	}
+
+	// Build a tree out of those nodes, based on a textual representation.
+	// Only the ".\t"s are significant. The trailing HTML-like text is
+	// just commentary. The "0:" prefixes are for easy cross-reference with
+	// the nodes array.
+	treeAsText := [...]string{
+		0: `<html>`,
+		1: `.	<head>`,
+		2: `.	<body>`,
+		3: `.	.	"0&lt;1"`,
+		4: `.	.	<p id="A" foo="abc&#34;def">`,
+		5: `.	.	.	"2"`,
+		6: `.	.	.	<b empty="">`,
+		7: `.	.	.	.	"3"`,
+		8: `.	.	.	<i backslash="\">`,
+		9: `.	.	.	.	"&amp;4"`,
+		10: `.	.	"5"`,
+		11: `.	.	<blockquote>`,
+		12: `.	.	<br>`,
+		13: `.	.	"6"`,
+	}
+	if len(nodes) != len(treeAsText) {
+		t.Fatal("len(nodes) != len(treeAsText)")
+	}
+	var stack [8]*Node
+	for i, line := range treeAsText {
+		level := 0
+		for line[0] == '.' {
+			// Strip a leading ".\t".
+			line = line[2:]
+			level++
+		}
+		n := nodes[i]
+		if level == 0 {
+			if stack[0] != nil {
+				t.Fatal("multiple root nodes")
+			}
+			stack[0] = n
+		} else {
+			stack[level-1].AppendChild(n)
+			stack[level] = n
+			for i := level + 1; i < len(stack); i++ {
+				stack[i] = nil
+			}
+		}
+		// At each stage of tree construction, we check all nodes for consistency.
+		for j, m := range nodes {
+			if err := checkNodeConsistency(m); err != nil {
+				t.Fatalf("i=%d, j=%d: %v", i, j, err)
+			}
+		}
+	}
+
+	want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&#34;def">` +
+		`2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
+		`5<blockquote></blockquote><br/>6</body></html>`
+	b := new(bytes.Buffer)
+	if err := Render(b, nodes[0]); err != nil {
+		t.Fatal(err)
+	}
+	if got := b.String(); got != want {
+		t.Errorf("got vs want:\n%s\n%s\n", got, want)
+	}
+}
diff --git a/html/testdata/go1.html b/html/testdata/go1.html
new file mode 100644
index 0000000..a782cc7
--- /dev/null
+++ b/html/testdata/go1.html
@@ -0,0 +1,2237 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+
+  <title>Go 1 Release Notes - The Go Programming Language</title>
+
+<link type="text/css" rel="stylesheet" href="/doc/style.css">
+<script type="text/javascript" src="/doc/godocs.js"></script>
+
+<link rel="search" type="application/opensearchdescription+xml" title="godoc" href="/opensearch.xml" />
+
+<script type="text/javascript">
+var _gaq = _gaq || [];
+_gaq.push(["_setAccount", "UA-11222381-2"]);
+_gaq.push(["_trackPageview"]);
+</script>
+</head>
+<body>
+
+<div id="topbar"><div class="container wide">
+
+<form method="GET" action="/search">
+<div id="menu">
+<a href="/doc/">Documents</a>
+<a href="/ref/">References</a>
+<a href="/pkg/">Packages</a>
+<a href="/project/">The Project</a>
+<a href="/help/">Help</a>
+<input type="text" id="search" name="q" class="inactive" value="Search">
+</div>
+<div id="heading"><a href="/">The Go Programming Language</a></div>
+</form>
+
+</div></div>
+
+<div id="page" class="wide">
+
+
+  <div id="plusone"><g:plusone size="small" annotation="none"></g:plusone></div>
+  <h1>Go 1 Release Notes</h1>
+
+
+
+
+<div id="nav"></div>
+
+
+
+
+<h2 id="introduction">Introduction to Go 1</h2>
+
+<p>
+Go version 1, Go 1 for short, defines a language and a set of core libraries
+that provide a stable foundation for creating reliable products, projects, and
+publications.
+</p>
+
+<p>
+The driving motivation for Go 1 is stability for its users. People should be able to
+write Go programs and expect that they will continue to compile and run without
+change, on a time scale of years, including in production environments such as
+Google App Engine. Similarly, people should be able to write books about Go, be
+able to say which version of Go the book is describing, and have that version
+number still be meaningful much later.
+</p>
+
+<p>
+Code that compiles in Go 1 should, with few exceptions, continue to compile and
+run throughout the lifetime of that version, even as we issue updates and bug
+fixes such as Go version 1.1, 1.2, and so on. Other than critical fixes, changes
+made to the language and library for subsequent releases of Go 1 may
+add functionality but will not break existing Go 1 programs.
+<a href="go1compat.html">The Go 1 compatibility document</a>
+explains the compatibility guidelines in more detail.
+</p>
+
+<p>
+Go 1 is a representation of Go as it used today, not a wholesale rethinking of
+the language. We avoided designing new features and instead focused on cleaning
+up problems and inconsistencies and improving portability. There are a number
+changes to the Go language and packages that we had considered for some time and
+prototyped but not released primarily because they are significant and
+backwards-incompatible. Go 1 was an opportunity to get them out, which is
+helpful for the long term, but also means that Go 1 introduces incompatibilities
+for old programs. Fortunately, the <code>go</code> <code>fix</code> tool can
+automate much of the work needed to bring programs up to the Go 1 standard.
+</p>
+
+<p>
+This document outlines the major changes in Go 1 that will affect programmers
+updating existing code; its reference point is the prior release, r60 (tagged as
+r60.3). It also explains how to update code from r60 to run under Go 1.
+</p>
+
+<h2 id="language">Changes to the language</h2>
+
+<h3 id="append">Append</h3>
+
+<p>
+The <code>append</code> predeclared variadic function makes it easy to grow a slice
+by adding elements to the end.
+A common use is to add bytes to the end of a byte slice when generating output.
+However, <code>append</code> did not provide a way to append a string to a <code>[]byte</code>,
+which is another common case.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/greeting := ..byte/` `/append.*hello/`}}
+-->    greeting := []byte{}
+    greeting = append(greeting, []byte(&#34;hello &#34;)...)</pre>
+
+<p>
+By analogy with the similar property of <code>copy</code>, Go 1
+permits a string to be appended (byte-wise) directly to a byte
+slice, reducing the friction between strings and byte slices.
+The conversion is no longer necessary:
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/append.*world/`}}
+-->    greeting = append(greeting, &#34;world&#34;...)</pre>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes.
+</p>
+
+<h3 id="close">Close</h3>
+
+<p>
+The <code>close</code> predeclared function provides a mechanism
+for a sender to signal that no more values will be sent.
+It is important to the implementation of <code>for</code> <code>range</code>
+loops over channels and is helpful in other situations.
+Partly by design and partly because of race conditions that can occur otherwise,
+it is intended for use only by the goroutine sending on the channel,
+not by the goroutine receiving data.
+However, before Go 1 there was no compile-time checking that <code>close</code>
+was being used correctly.
+</p>
+
+<p>
+To close this gap, at least in part, Go 1 disallows <code>close</code> on receive-only channels.
+Attempting to close such a channel is a compile-time error.
+</p>
+
+<pre>
+    var c chan int
+    var csend chan&lt;- int = c
+    var crecv &lt;-chan int = c
+    close(c)     // legal
+    close(csend) // legal
+    close(crecv) // illegal
+</pre>
+
+<p>
+<em>Updating</em>:
+Existing code that attempts to close a receive-only channel was
+erroneous even before Go 1 and should be fixed.  The compiler will
+now reject such code.
+</p>
+
+<h3 id="literals">Composite literals</h3>
+
+<p>
+In Go 1, a composite literal of array, slice, or map type can elide the
+type specification for the elements' initializers if they are of pointer type.
+All four of the initializations in this example are legal; the last one was illegal before Go 1.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/type Date struct/` `/STOP/`}}
+-->    type Date struct {
+        month string
+        day   int
+    }
+    <span class="comment">// Struct values, fully qualified; always legal.</span>
+    holiday1 := []Date{
+        Date{&#34;Feb&#34;, 14},
+        Date{&#34;Nov&#34;, 11},
+        Date{&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Struct values, type name elided; always legal.</span>
+    holiday2 := []Date{
+        {&#34;Feb&#34;, 14},
+        {&#34;Nov&#34;, 11},
+        {&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Pointers, fully qualified, always legal.</span>
+    holiday3 := []*Date{
+        &amp;Date{&#34;Feb&#34;, 14},
+        &amp;Date{&#34;Nov&#34;, 11},
+        &amp;Date{&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Pointers, type name elided; legal in Go 1.</span>
+    holiday4 := []*Date{
+        {&#34;Feb&#34;, 14},
+        {&#34;Nov&#34;, 11},
+        {&#34;Dec&#34;, 25},
+    }</pre>
+
+<p>
+<em>Updating</em>:
+This change has no effect on existing code, but the command
+<code>gofmt</code> <code>-s</code> applied to existing source
+will, among other things, elide explicit element types wherever permitted.
+</p>
+
+
+<h3 id="init">Goroutines during init</h3>
+
+<p>
+The old language defined that <code>go</code> statements executed during initialization created goroutines but that they did not begin to run until initialization of the entire program was complete.
+This introduced clumsiness in many places and, in effect, limited the utility
+of the <code>init</code> construct:
+if it was possible for another package to use the library during initialization, the library
+was forced to avoid goroutines.
+This design was done for reasons of simplicity and safety but,
+as our confidence in the language grew, it seemed unnecessary.
+Running goroutines during initialization is no more complex or unsafe than running them during normal execution.
+</p>
+
+<p>
+In Go 1, code that uses goroutines can be called from
+<code>init</code> routines and global initialization expressions
+without introducing a deadlock.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/PackageGlobal/` `/^}/`}}
+-->var PackageGlobal int
+
+func init() {
+    c := make(chan int)
+    go initializationFunction(c)
+    PackageGlobal = &lt;-c
+}</pre>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes,
+although it's possible that code that depends on goroutines not starting before <code>main</code> will break.
+There was no such code in the standard repository.
+</p>
+
+<h3 id="rune">The rune type</h3>
+
+<p>
+The language spec allows the <code>int</code> type to be 32 or 64 bits wide, but current implementations set <code>int</code> to 32 bits even on 64-bit platforms.
+It would be preferable to have <code>int</code> be 64 bits on 64-bit platforms.
+(There are important consequences for indexing large slices.)
+However, this change would waste space when processing Unicode characters with
+the old language because the <code>int</code> type was also used to hold Unicode code points: each code point would waste an extra 32 bits of storage if <code>int</code> grew from 32 bits to 64.
+</p>
+
+<p>
+To make changing to 64-bit <code>int</code> feasible,
+Go 1 introduces a new basic type, <code>rune</code>, to represent
+individual Unicode code points.
+It is an alias for <code>int32</code>, analogous to <code>byte</code>
+as an alias for <code>uint8</code>.
+</p>
+
+<p>
+Character literals such as <code>'a'</code>, <code>'語'</code>, and <code>'\u0345'</code>
+now have default type <code>rune</code>,
+analogous to <code>1.0</code> having default type <code>float64</code>.
+A variable initialized to a character constant will therefore
+have type <code>rune</code> unless otherwise specified.
+</p>
+
+<p>
+Libraries have been updated to use <code>rune</code> rather than <code>int</code>
+when appropriate. For instance, the functions <code>unicode.ToLower</code> and
+relatives now take and return a <code>rune</code>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/STARTRUNE/` `/ENDRUNE/`}}
+-->    delta := &#39;δ&#39; <span class="comment">// delta has type rune.</span>
+    var DELTA rune
+    DELTA = unicode.ToUpper(delta)
+    epsilon := unicode.ToLower(DELTA + 1)
+    if epsilon != &#39;δ&#39;+1 {
+        log.Fatal(&#34;inconsistent casing for Greek&#34;)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+Most source code will be unaffected by this because the type inference from
+<code>:=</code> initializers introduces the new type silently, and it propagates
+from there.
+Some code may get type errors that a trivial conversion will resolve.
+</p>
+
+<h3 id="error">The error type</h3>
+
+<p>
+Go 1 introduces a new built-in type, <code>error</code>, which has the following definition:
+</p>
+
+<pre>
+    type error interface {
+        Error() string
+    }
+</pre>
+
+<p>
+Since the consequences of this type are all in the package library,
+it is discussed <a href="#errors">below</a>.
+</p>
+
+<h3 id="delete">Deleting from maps</h3>
+
+<p>
+In the old language, to delete the entry with key <code>k</code> from map <code>m</code>, one wrote the statement,
+</p>
+
+<pre>
+    m[k] = value, false
+</pre>
+
+<p>
+This syntax was a peculiar special case, the only two-to-one assignment.
+It required passing a value (usually ignored) that is evaluated but discarded,
+plus a boolean that was nearly always the constant <code>false</code>.
+It did the job but was odd and a point of contention.
+</p>
+
+<p>
+In Go 1, that syntax has gone; instead there is a new built-in
+function, <code>delete</code>.  The call
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/delete\(m, k\)/`}}
+-->    delete(m, k)</pre>
+
+<p>
+will delete the map entry retrieved by the expression <code>m[k]</code>.
+There is no return value. Deleting a non-existent entry is a no-op.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will convert expressions of the form <code>m[k] = value,
+false</code> into <code>delete(m, k)</code> when it is clear that
+the ignored value can be safely discarded from the program and
+<code>false</code> refers to the predefined boolean constant.
+The fix tool
+will flag other uses of the syntax for inspection by the programmer.
+</p>
+
+<h3 id="iteration">Iterating in maps</h3>
+
+<p>
+The old language specification did not define the order of iteration for maps,
+and in practice it differed across hardware platforms.
+This caused tests that iterated over maps to be fragile and non-portable, with the
+unpleasant property that a test might always pass on one machine but break on another.
+</p>
+
+<p>
+In Go 1, the order in which elements are visited when iterating
+over a map using a <code>for</code> <code>range</code> statement
+is defined to be unpredictable, even if the same loop is run multiple
+times with the same map.
+Code should not assume that the elements are visited in any particular order.
+</p>
+
+<p>
+This change means that code that depends on iteration order is very likely to break early and be fixed long before it becomes a problem.
+Just as important, it allows the map implementation to ensure better map balancing even when programs are using range loops to select an element from a map.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/Sunday/` `/^	}/`}}
+-->    m := map[string]int{&#34;Sunday&#34;: 0, &#34;Monday&#34;: 1}
+    for name, value := range m {
+        <span class="comment">// This loop should not assume Sunday will be visited first.</span>
+        f(name, value)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+This is one change where tools cannot help.  Most existing code
+will be unaffected, but some programs may break or misbehave; we
+recommend manual checking of all range statements over maps to
+verify they do not depend on iteration order. There were a few such
+examples in the standard repository; they have been fixed.
+Note that it was already incorrect to depend on the iteration order, which
+was unspecified. This change codifies the unpredictability.
+</p>
+
+<h3 id="multiple_assignment">Multiple assignment</h3>
+
+<p>
+The language specification has long guaranteed that in assignments
+the right-hand-side expressions are all evaluated before any left-hand-side expressions are assigned.
+To guarantee predictable behavior,
+Go 1 refines the specification further.
+</p>
+
+<p>
+If the left-hand side of the assignment
+statement contains expressions that require evaluation, such as
+function calls or array indexing operations, these will all be done
+using the usual left-to-right rule before any variables are assigned
+their value.  Once everything is evaluated, the actual assignments
+proceed in left-to-right order.
+</p>
+
+<p>
+These examples illustrate the behavior.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/sa :=/` `/then sc.0. = 2/`}}
+-->    sa := []int{1, 2, 3}
+    i := 0
+    i, sa[i] = 1, 2 <span class="comment">// sets i = 1, sa[0] = 2</span>
+
+    sb := []int{1, 2, 3}
+    j := 0
+    sb[j], j = 2, 1 <span class="comment">// sets sb[0] = 2, j = 1</span>
+
+    sc := []int{1, 2, 3}
+    sc[0], sc[0] = 1, 2 <span class="comment">// sets sc[0] = 1, then sc[0] = 2 (so sc[0] = 2 at end)</span></pre>
+
+<p>
+<em>Updating</em>:
+This is one change where tools cannot help, but breakage is unlikely.
+No code in the standard repository was broken by this change, and code
+that depended on the previous unspecified behavior was already incorrect.
+</p>
+
+<h3 id="shadowing">Returns and shadowed variables</h3>
+
+<p>
+A common mistake is to use <code>return</code> (without arguments) after an assignment to a variable that has the same name as a result variable but is not the same variable.
+This situation is called <em>shadowing</em>: the result variable has been shadowed by another variable with the same name declared in an inner scope.
+</p>
+
+<p>
+In functions with named return values,
+the Go 1 compilers disallow return statements without arguments if any of the named return values is shadowed at the point of the return statement.
+(It isn't part of the specification, because this is one area we are still exploring;
+the situation is analogous to the compilers rejecting functions that do not end with an explicit return statement.)
+</p>
+
+<p>
+This function implicitly returns a shadowed return value and will be rejected by the compiler:
+</p>
+
+<pre>
+    func Bug() (i, j, k int) {
+        for i = 0; i &lt; 5; i++ {
+            for j := 0; j &lt; 5; j++ { // Redeclares j.
+                k += i*j
+                if k > 100 {
+                    return // Rejected: j is shadowed here.
+                }
+            }
+        }
+        return // OK: j is not shadowed here.
+    }
+</pre>
+
+<p>
+<em>Updating</em>:
+Code that shadows return values in this way will be rejected by the compiler and will need to be fixed by hand.
+The few cases that arose in the standard repository were mostly bugs.
+</p>
+
+<h3 id="unexported">Copying structs with unexported fields</h3>
+
+<p>
+The old language did not allow a package to make a copy of a struct value containing unexported fields belonging to a different package.
+There was, however, a required exception for a method receiver;
+also, the implementations of <code>copy</code> and <code>append</code> have never honored the restriction.
+</p>
+
+<p>
+Go 1 will allow packages to copy struct values containing unexported fields from other packages.
+Besides resolving the inconsistency,
+this change admits a new kind of API: a package can return an opaque value without resorting to a pointer or interface.
+The new implementations of <code>time.Time</code> and
+<code>reflect.Value</code> are examples of types taking advantage of this new property.
+</p>
+
+<p>
+As an example, if package <code>p</code> includes the definitions,
+</p>
+
+<pre>
+    type Struct struct {
+        Public int
+        secret int
+    }
+    func NewStruct(a int) Struct {  // Note: not a pointer.
+        return Struct{a, f(a)}
+    }
+    func (s Struct) String() string {
+        return fmt.Sprintf("{%d (secret %d)}", s.Public, s.secret)
+    }
+</pre>
+
+<p>
+a package that imports <code>p</code> can assign and copy values of type
+<code>p.Struct</code> at will.
+Behind the scenes the unexported fields will be assigned and copied just
+as if they were exported,
+but the client code will never be aware of them. The code
+</p>
+
+<pre>
+    import "p"
+
+    myStruct := p.NewStruct(23)
+    copyOfMyStruct := myStruct
+    fmt.Println(myStruct, copyOfMyStruct)
+</pre>
+
+<p>
+will show that the secret field of the struct has been copied to the new value.
+</p>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes.
+</p>
+
+<h3 id="equality">Equality</h3>
+
+<p>
+Before Go 1, the language did not define equality on struct and array values.
+This meant,
+among other things, that structs and arrays could not be used as map keys.
+On the other hand, Go did define equality on function and map values.
+Function equality was problematic in the presence of closures
+(when are two closures equal?)
+while map equality compared pointers, not the maps' content, which was usually
+not what the user would want.
+</p>
+
+<p>
+Go 1 addressed these issues.
+First, structs and arrays can be compared for equality and inequality
+(<code>==</code> and <code>!=</code>),
+and therefore be used as map keys,
+provided they are composed from elements for which equality is also defined,
+using element-wise comparison.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/type Day struct/` `/Printf/`}}
+-->    type Day struct {
+        long  string
+        short string
+    }
+    Christmas := Day{&#34;Christmas&#34;, &#34;XMas&#34;}
+    Thanksgiving := Day{&#34;Thanksgiving&#34;, &#34;Turkey&#34;}
+    holiday := map[Day]bool{
+        Christmas:    true,
+        Thanksgiving: true,
+    }
+    fmt.Printf(&#34;Christmas is a holiday: %t\n&#34;, holiday[Christmas])</pre>
+
+<p>
+Second, Go 1 removes the definition of equality for function values,
+except for comparison with <code>nil</code>.
+Finally, map equality is gone too, also except for comparison with <code>nil</code>.
+</p>
+
+<p>
+Note that equality is still undefined for slices, for which the
+calculation is in general infeasible.  Also note that the ordered
+comparison operators (<code>&lt;</code> <code>&lt;=</code>
+<code>&gt;</code> <code>&gt;=</code>) are still undefined for
+structs and arrays.
+
+<p>
+<em>Updating</em>:
+Struct and array equality is a new feature, so existing code needs no changes.
+Existing code that depends on function or map equality will be
+rejected by the compiler and will need to be fixed by hand.
+Few programs will be affected, but the fix may require some
+redesign.
+</p>
+
+<h2 id="packages">The package hierarchy</h2>
+
+<p>
+Go 1 addresses many deficiencies in the old standard library and
+cleans up a number of packages, making them more internally consistent
+and portable.
+</p>
+
+<p>
+This section describes how the packages have been rearranged in Go 1.
+Some have moved, some have been renamed, some have been deleted.
+New packages are described in later sections.
+</p>
+
+<h3 id="hierarchy">The package hierarchy</h3>
+
+<p>
+Go 1 has a rearranged package hierarchy that groups related items
+into subdirectories. For instance, <code>utf8</code> and
+<code>utf16</code> now occupy subdirectories of <code>unicode</code>.
+Also, <a href="#subrepo">some packages</a> have moved into
+subrepositories of
+<a href="http://code.google.com/p/go"><code>code.google.com/p/go</code></a>
+while <a href="#deleted">others</a> have been deleted outright.
+</p>
+
+<table class="codetable" frame="border" summary="Moved packages">
+<colgroup align="left" width="60%"></colgroup>
+<colgroup align="left" width="40%"></colgroup>
+<tr>
+<th align="left">Old path</th>
+<th align="left">New path</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>asn1</td> <td>encoding/asn1</td></tr>
+<tr><td>csv</td> <td>encoding/csv</td></tr>
+<tr><td>gob</td> <td>encoding/gob</td></tr>
+<tr><td>json</td> <td>encoding/json</td></tr>
+<tr><td>xml</td> <td>encoding/xml</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exp/template/html</td> <td>html/template</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>big</td> <td>math/big</td></tr>
+<tr><td>cmath</td> <td>math/cmplx</td></tr>
+<tr><td>rand</td> <td>math/rand</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>http</td> <td>net/http</td></tr>
+<tr><td>http/cgi</td> <td>net/http/cgi</td></tr>
+<tr><td>http/fcgi</td> <td>net/http/fcgi</td></tr>
+<tr><td>http/httptest</td> <td>net/http/httptest</td></tr>
+<tr><td>http/pprof</td> <td>net/http/pprof</td></tr>
+<tr><td>mail</td> <td>net/mail</td></tr>
+<tr><td>rpc</td> <td>net/rpc</td></tr>
+<tr><td>rpc/jsonrpc</td> <td>net/rpc/jsonrpc</td></tr>
+<tr><td>smtp</td> <td>net/smtp</td></tr>
+<tr><td>url</td> <td>net/url</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exec</td> <td>os/exec</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>scanner</td> <td>text/scanner</td></tr>
+<tr><td>tabwriter</td> <td>text/tabwriter</td></tr>
+<tr><td>template</td> <td>text/template</td></tr>
+<tr><td>template/parse</td> <td>text/template/parse</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>utf8</td> <td>unicode/utf8</td></tr>
+<tr><td>utf16</td> <td>unicode/utf16</td></tr>
+</table>
+
+<p>
+Note that the package names for the old <code>cmath</code> and
+<code>exp/template/html</code> packages have changed to <code>cmplx</code>
+and <code>template</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update all imports and package renames for packages that
+remain inside the standard repository.  Programs that import packages
+that are no longer in the standard repository will need to be edited
+by hand.
+</p>
+
+<h3 id="exp">The package tree exp</h3>
+
+<p>
+Because they are not standardized, the packages under the <code>exp</code> directory will not be available in the
+standard Go 1 release distributions, although they will be available in source code form
+in <a href="http://code.google.com/p/go/">the repository</a> for
+developers who wish to use them.
+</p>
+
+<p>
+Several packages have moved under <code>exp</code> at the time of Go 1's release:
+</p>
+
+<ul>
+<li><code>ebnf</code></li>
+<li><code>html</code><sup>&#8224;</sup></li>
+<li><code>go/types</code></li>
+</ul>
+
+<p>
+(<sup>&#8224;</sup>The <code>EscapeString</code> and <code>UnescapeString</code> types remain
+in package <code>html</code>.)
+</p>
+
+<p>
+All these packages are available under the same names, with the prefix <code>exp/</code>: <code>exp/ebnf</code> etc.
+</p>
+
+<p>
+Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
+</p>
+
+<p>
+Finally, the <code>gotype</code> command now resides in <code>exp/gotype</code>, while
+<code>ebnflint</code> is now in <code>exp/ebnflint</code>.
+If they are installed, they now reside in <code>$GOROOT/bin/tool</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses packages in <code>exp</code> will need to be updated by hand,
+or else compiled from an installation that has <code>exp</code> available.
+The <code>go</code> <code>fix</code> tool or the compiler will complain about such uses.
+</p>
+
+<h3 id="old">The package tree old</h3>
+
+<p>
+Because they are deprecated, the packages under the <code>old</code> directory will not be available in the
+standard Go 1 release distributions, although they will be available in source code form for
+developers who wish to use them.
+</p>
+
+<p>
+The packages in their new locations are:
+</p>
+
+<ul>
+<li><code>old/netchan</code></li>
+<li><code>old/regexp</code></li>
+<li><code>old/template</code></li>
+</ul>
+
+<p>
+<em>Updating</em>:
+Code that uses packages now in <code>old</code> will need to be updated by hand,
+or else compiled from an installation that has <code>old</code> available.
+The <code>go</code> <code>fix</code> tool will warn about such uses.
+</p>
+
+<h3 id="deleted">Deleted packages</h3>
+
+<p>
+Go 1 deletes several packages outright:
+</p>
+
+<ul>
+<li><code>container/vector</code></li>
+<li><code>exp/datafmt</code></li>
+<li><code>go/typechecker</code></li>
+<li><code>try</code></li>
+</ul>
+
+<p>
+and also the command <code>gotry</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses <code>container/vector</code> should be updated to use
+slices directly.  See
+<a href="http://code.google.com/p/go-wiki/wiki/SliceTricks">the Go
+Language Community Wiki</a> for some suggestions.
+Code that uses the other packages (there should be almost zero) will need to be rethought.
+</p>
+
+<h3 id="subrepo">Packages moving to subrepositories</h3>
+
+<p>
+Go 1 has moved a number of packages into other repositories, usually sub-repositories of
+<a href="http://code.google.com/p/go/">the main Go repository</a>.
+This table lists the old and new import paths:
+
+<table class="codetable" frame="border" summary="Sub-repositories">
+<colgroup align="left" width="40%"></colgroup>
+<colgroup align="left" width="60%"></colgroup>
+<tr>
+<th align="left">Old</th>
+<th align="left">New</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>crypto/bcrypt</td> <td>code.google.com/p/go.crypto/bcrypt</tr>
+<tr><td>crypto/blowfish</td> <td>code.google.com/p/go.crypto/blowfish</tr>
+<tr><td>crypto/cast5</td> <td>code.google.com/p/go.crypto/cast5</tr>
+<tr><td>crypto/md4</td> <td>code.google.com/p/go.crypto/md4</tr>
+<tr><td>crypto/ocsp</td> <td>code.google.com/p/go.crypto/ocsp</tr>
+<tr><td>crypto/openpgp</td> <td>code.google.com/p/go.crypto/openpgp</tr>
+<tr><td>crypto/openpgp/armor</td> <td>code.google.com/p/go.crypto/openpgp/armor</tr>
+<tr><td>crypto/openpgp/elgamal</td> <td>code.google.com/p/go.crypto/openpgp/elgamal</tr>
+<tr><td>crypto/openpgp/errors</td> <td>code.google.com/p/go.crypto/openpgp/errors</tr>
+<tr><td>crypto/openpgp/packet</td> <td>code.google.com/p/go.crypto/openpgp/packet</tr>
+<tr><td>crypto/openpgp/s2k</td> <td>code.google.com/p/go.crypto/openpgp/s2k</tr>
+<tr><td>crypto/ripemd160</td> <td>code.google.com/p/go.crypto/ripemd160</tr>
+<tr><td>crypto/twofish</td> <td>code.google.com/p/go.crypto/twofish</tr>
+<tr><td>crypto/xtea</td> <td>code.google.com/p/go.crypto/xtea</tr>
+<tr><td>exp/ssh</td> <td>code.google.com/p/go.crypto/ssh</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image/bmp</td> <td>code.google.com/p/go.image/bmp</tr>
+<tr><td>image/tiff</td> <td>code.google.com/p/go.image/tiff</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>net/dict</td> <td>code.google.com/p/go.net/dict</tr>
+<tr><td>net/websocket</td> <td>code.google.com/p/go.net/websocket</tr>
+<tr><td>exp/spdy</td> <td>code.google.com/p/go.net/spdy</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>encoding/git85</td> <td>code.google.com/p/go.codereview/git85</tr>
+<tr><td>patch</td> <td>code.google.com/p/go.codereview/patch</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exp/wingui</td> <td>code.google.com/p/gowingui</tr>
+</table>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update imports of these packages to use the new import paths.
+Installations that depend on these packages will need to install them using
+a <code>go get</code> command.
+</p>
+
+<h2 id="major">Major changes to the library</h2>
+
+<p>
+This section describes significant changes to the core libraries, the ones that
+affect the most programs.
+</p>
+
+<h3 id="errors">The error type and errors package</h3>
+
+<p>
+The placement of <code>os.Error</code> in package <code>os</code> is mostly historical: errors first came up when implementing package <code>os</code>, and they seemed system-related at the time.
+Since then it has become clear that errors are more fundamental than the operating system.  For example, it would be nice to use <code>Errors</code> in packages that <code>os</code> depends on, like <code>syscall</code>.
+Also, having <code>Error</code> in <code>os</code> introduces many dependencies on <code>os</code> that would otherwise not exist.
+</p>
+
+<p>
+Go 1 solves these problems by introducing a built-in <code>error</code> interface type and a separate <code>errors</code> package (analogous to <code>bytes</code> and <code>strings</code>) that contains utility functions.
+It replaces <code>os.NewError</code> with
+<a href="/pkg/errors/#New"><code>errors.New</code></a>,
+giving errors a more central place in the environment.
+</p>
+
+<p>
+So the widely-used <code>String</code> method does not cause accidental satisfaction
+of the <code>error</code> interface, the <code>error</code> interface uses instead
+the name <code>Error</code> for that method:
+</p>
+
+<pre>
+    type error interface {
+        Error() string
+    }
+</pre>
+
+<p>
+The <code>fmt</code> library automatically invokes <code>Error</code>, as it already
+does for <code>String</code>, for easy printing of error values.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/START ERROR EXAMPLE/` `/END ERROR EXAMPLE/`}}
+-->type SyntaxError struct {
+    File    string
+    Line    int
+    Message string
+}
+
+func (se *SyntaxError) Error() string {
+    return fmt.Sprintf(&#34;%s:%d: %s&#34;, se.File, se.Line, se.Message)
+}</pre>
+
+<p>
+All standard packages have been updated to use the new interface; the old <code>os.Error</code> is gone.
+</p>
+
+<p>
+A new package, <a href="/pkg/errors/"><code>errors</code></a>, contains the function
+</p>
+
+<pre>
+func New(text string) error
+</pre>
+
+<p>
+to turn a string into an error. It replaces the old <code>os.NewError</code>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/ErrSyntax/`}}
+-->    var ErrSyntax = errors.New(&#34;syntax error&#34;)</pre>
+		
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+Code that defines error types with a <code>String</code> method will need to be updated
+by hand to rename the methods to <code>Error</code>.
+</p>
+
+<h3 id="errno">System call errors</h3>
+
+<p>
+The old <code>syscall</code> package, which predated <code>os.Error</code>
+(and just about everything else),
+returned errors as <code>int</code> values.
+In turn, the <code>os</code> package forwarded many of these errors, such
+as <code>EINVAL</code>, but using a different set of errors on each platform.
+This behavior was unpleasant and unportable.
+</p>
+
+<p>
+In Go 1, the
+<a href="/pkg/syscall/"><code>syscall</code></a>
+package instead returns an <code>error</code> for system call errors.
+On Unix, the implementation is done by a
+<a href="/pkg/syscall/#Errno"><code>syscall.Errno</code></a> type
+that satisfies <code>error</code> and replaces the old <code>os.Errno</code>.
+</p>
+
+<p>
+The changes affecting <code>os.EINVAL</code> and relatives are
+described <a href="#os">elsewhere</a>.
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+Regardless, most code should use the <code>os</code> package
+rather than <code>syscall</code> and so will be unaffected.
+</p>
+
+<h3 id="time">Time</h3>
+
+<p>
+Time is always a challenge to support well in a programming language.
+The old Go <code>time</code> package had <code>int64</code> units, no
+real type safety,
+and no distinction between absolute times and durations.
+</p>
+
+<p>
+One of the most sweeping changes in the Go 1 library is therefore a
+complete redesign of the
+<a href="/pkg/time/"><code>time</code></a> package.
+Instead of an integer number of nanoseconds as an <code>int64</code>,
+and a separate <code>*time.Time</code> type to deal with human
+units such as hours and years,
+there are now two fundamental types:
+<a href="/pkg/time/#Time"><code>time.Time</code></a>
+(a value, so the <code>*</code> is gone), which represents a moment in time;
+and <a href="/pkg/time/#Duration"><code>time.Duration</code></a>,
+which represents an interval.
+Both have nanosecond resolution.
+A <code>Time</code> can represent any time into the ancient
+past and remote future, while a <code>Duration</code> can
+span plus or minus only about 290 years.
+There are methods on these types, plus a number of helpful
+predefined constant durations such as <code>time.Second</code>.
+</p>
+
+<p>
+Among the new methods are things like
+<a href="/pkg/time/#Time.Add"><code>Time.Add</code></a>,
+which adds a <code>Duration</code> to a <code>Time</code>, and
+<a href="/pkg/time/#Time.Sub"><code>Time.Sub</code></a>,
+which subtracts two <code>Times</code> to yield a <code>Duration</code>.
+</p>
+
+<p>
+The most important semantic change is that the Unix epoch (Jan 1, 1970) is now
+relevant only for those functions and methods that mention Unix:
+<a href="/pkg/time/#Unix"><code>time.Unix</code></a>
+and the <a href="/pkg/time/#Time.Unix"><code>Unix</code></a>
+and <a href="/pkg/time/#Time.UnixNano"><code>UnixNano</code></a> methods
+of the <code>Time</code> type.
+In particular,
+<a href="/pkg/time/#Now"><code>time.Now</code></a>
+returns a <code>time.Time</code> value rather than, in the old
+API, an integer nanosecond count since the Unix epoch.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/sleepUntil/` `/^}/`}}
+--><span class="comment">// sleepUntil sleeps until the specified time. It returns immediately if it&#39;s too late.</span>
+func sleepUntil(wakeup time.Time) {
+    now := time.Now() <span class="comment">// A Time.</span>
+    if !wakeup.After(now) {
+        return
+    }
+    delta := wakeup.Sub(now) <span class="comment">// A Duration.</span>
+    fmt.Printf(&#34;Sleeping for %.3fs\n&#34;, delta.Seconds())
+    time.Sleep(delta)
+}</pre>
+
+<p>
+The new types, methods, and constants have been propagated through
+all the standard packages that use time, such as <code>os</code> and
+its representation of file time stamps.
+</p>
+
+<p>
+<em>Updating</em>:
+The <code>go</code> <code>fix</code> tool will update many uses of the old <code>time</code> package to use the new
+types and methods, although it does not replace values such as <code>1e9</code>
+representing nanoseconds per second.
+Also, because of type changes in some of the values that arise,
+some of the expressions rewritten by the fix tool may require
+further hand editing; in such cases the rewrite will include
+the correct function or method for the old functionality, but
+may have the wrong type or require further analysis.
+</p>
+
+<h2 id="minor">Minor changes to the library</h2>
+
+<p>
+This section describes smaller changes, such as those to less commonly
+used packages or that affect
+few programs beyond the need to run <code>go</code> <code>fix</code>.
+This category includes packages that are new in Go 1.
+Collectively they improve portability, regularize behavior, and
+make the interfaces more modern and Go-like.
+</p>
+
+<h3 id="archive_zip">The archive/zip package</h3>
+
+<p>
+In Go 1, <a href="/pkg/archive/zip/#Writer"><code>*zip.Writer</code></a> no
+longer has a <code>Write</code> method. Its presence was a mistake.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="bufio">The bufio package</h3>
+
+<p>
+In Go 1, <a href="/pkg/bufio/#NewReaderSize"><code>bufio.NewReaderSize</code></a>
+and
+<a href="/pkg/bufio/#NewWriterSize"><code>bufio.NewWriterSize</code></a>
+functions no longer return an error for invalid sizes.
+If the argument size is too small or invalid, it is adjusted.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update calls that assign the error to _.
+Calls that aren't fixed will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="compress">The compress/flate, compress/gzip and compress/zlib packages</h3>
+
+<p>
+In Go 1, the <code>NewWriterXxx</code> functions in
+<a href="/pkg/compress/flate"><code>compress/flate</code></a>,
+<a href="/pkg/compress/gzip"><code>compress/gzip</code></a> and
+<a href="/pkg/compress/zlib"><code>compress/zlib</code></a>
+all return <code>(*Writer, error)</code> if they take a compression level,
+and <code>*Writer</code> otherwise. Package <code>gzip</code>'s
+<code>Compressor</code> and <code>Decompressor</code> types have been renamed
+to <code>Writer</code> and <code>Reader</code>. Package <code>flate</code>'s
+<code>WrongValueError</code> type has been removed.
+</p>
+
+<p>
+<em>Updating</em>
+Running <code>go</code> <code>fix</code> will update old names and calls that assign the error to _.
+Calls that aren't fixed will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="crypto_aes_des">The crypto/aes and crypto/des packages</h3>
+
+<p>
+In Go 1, the <code>Reset</code> method has been removed. Go does not guarantee
+that memory is not copied and therefore this method was misleading.
+</p>
+
+<p>
+The cipher-specific types <code>*aes.Cipher</code>, <code>*des.Cipher</code>,
+and <code>*des.TripleDESCipher</code> have been removed in favor of
+<code>cipher.Block</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Remove the calls to Reset. Replace uses of the specific cipher types with
+cipher.Block.
+</p>
+
+<h3 id="crypto_elliptic">The crypto/elliptic package</h3>
+
+<p>
+In Go 1, <a href="/pkg/crypto/elliptic/#Curve"><code>elliptic.Curve</code></a>
+has been made an interface to permit alternative implementations. The curve
+parameters have been moved to the
+<a href="/pkg/crypto/elliptic/#CurveParams"><code>elliptic.CurveParams</code></a>
+structure.
+</p>
+
+<p>
+<em>Updating</em>:
+Existing users of <code>*elliptic.Curve</code> will need to change to
+simply <code>elliptic.Curve</code>. Calls to <code>Marshal</code>,
+<code>Unmarshal</code> and <code>GenerateKey</code> are now functions
+in <code>crypto/elliptic</code> that take an <code>elliptic.Curve</code>
+as their first argument.
+</p>
+
+<h3 id="crypto_hmac">The crypto/hmac package</h3>
+
+<p>
+In Go 1, the hash-specific functions, such as <code>hmac.NewMD5</code>, have
+been removed from <code>crypto/hmac</code>. Instead, <code>hmac.New</code> takes
+a function that returns a <code>hash.Hash</code>, such as <code>md5.New</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will perform the needed changes.
+</p>
+
+<h3 id="crypto_x509">The crypto/x509 package</h3>
+
+<p>
+In Go 1, the
+<a href="/pkg/crypto/x509/#CreateCertificate"><code>CreateCertificate</code></a>
+and
+<a href="/pkg/crypto/x509/#CreateCRL"><code>CreateCRL</code></a>
+functions in <code>crypto/x509</code> have been altered to take an
+<code>interface{}</code> where they previously took a <code>*rsa.PublicKey</code>
+or <code>*rsa.PrivateKey</code>. This will allow other public key algorithms
+to be implemented in the future.
+</p>
+
+<p>
+<em>Updating</em>:
+No changes will be needed.
+</p>
+
+<h3 id="encoding_binary">The encoding/binary package</h3>
+
+<p>
+In Go 1, the <code>binary.TotalSize</code> function has been replaced by
+<a href="/pkg/encoding/binary/#Size"><code>Size</code></a>,
+which takes an <code>interface{}</code> argument rather than
+a <code>reflect.Value</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="encoding_xml">The encoding/xml package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/encoding/xml/"><code>xml</code></a> package
+has been brought closer in design to the other marshaling packages such
+as <a href="/pkg/encoding/gob/"><code>encoding/gob</code></a>.
+</p>
+
+<p>
+The old <code>Parser</code> type is renamed
+<a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> and has a new
+<a href="/pkg/encoding/xml/#Decoder.Decode"><code>Decode</code></a> method. An
+<a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a> type was also introduced.
+</p>
+
+<p>
+The functions <a href="/pkg/encoding/xml/#Marshal"><code>Marshal</code></a>
+and <a href="/pkg/encoding/xml/#Unmarshal"><code>Unmarshal</code></a>
+work with <code>[]byte</code> values now. To work with streams,
+use the new <a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a>
+and <a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> types.
+</p>
+
+<p>
+When marshaling or unmarshaling values, the format of supported flags in
+field tags has changed to be closer to the
+<a href="/pkg/encoding/json"><code>json</code></a> package
+(<code>`xml:"name,flag"`</code>). The matching done between field tags, field
+names, and the XML attribute and element names is now case-sensitive.
+The <code>XMLName</code> field tag, if present, must also match the name
+of the XML element being marshaled.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update most uses of the package except for some calls to
+<code>Unmarshal</code>. Special care must be taken with field tags,
+since the fix tool will not update them and if not fixed by hand they will
+misbehave silently in some cases. For example, the old
+<code>"attr"</code> is now written <code>",attr"</code> while plain
+<code>"attr"</code> remains valid but with a different meaning.
+</p>
+
+<h3 id="expvar">The expvar package</h3>
+
+<p>
+In Go 1, the <code>RemoveAll</code> function has been removed.
+The <code>Iter</code> function and Iter method on <code>*Map</code> have
+been replaced by
+<a href="/pkg/expvar/#Do"><code>Do</code></a>
+and
+<a href="/pkg/expvar/#Map.Do"><code>(*Map).Do</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Most code using <code>expvar</code> will not need changing. The rare code that used
+<code>Iter</code> can be updated to pass a closure to <code>Do</code> to achieve the same effect.
+</p>
+
+<h3 id="flag">The flag package</h3>
+
+<p>
+In Go 1, the interface <a href="/pkg/flag/#Value"><code>flag.Value</code></a> has changed slightly.
+The <code>Set</code> method now returns an <code>error</code> instead of
+a <code>bool</code> to indicate success or failure.
+</p>
+
+<p>
+There is also a new kind of flag, <code>Duration</code>, to support argument
+values specifying time intervals.
+Values for such flags must be given units, just as <code>time.Duration</code>
+formats them: <code>10s</code>, <code>1h30m</code>, etc.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/timeout/`}}
+-->var timeout = flag.Duration(&#34;timeout&#34;, 30*time.Second, &#34;how long to wait for completion&#34;)</pre>
+
+<p>
+<em>Updating</em>:
+Programs that implement their own flags will need minor manual fixes to update their
+<code>Set</code> methods.
+The <code>Duration</code> flag is new and affects no existing code.
+</p>
+
+
+<h3 id="go">The go/* packages</h3>
+
+<p>
+Several packages under <code>go</code> have slightly revised APIs.
+</p>
+
+<p>
+A concrete <code>Mode</code> type was introduced for configuration mode flags
+in the packages
+<a href="/pkg/go/scanner/"><code>go/scanner</code></a>,
+<a href="/pkg/go/parser/"><code>go/parser</code></a>,
+<a href="/pkg/go/printer/"><code>go/printer</code></a>, and
+<a href="/pkg/go/doc/"><code>go/doc</code></a>.
+</p>
+
+<p>
+The modes <code>AllowIllegalChars</code> and <code>InsertSemis</code> have been removed
+from the <a href="/pkg/go/scanner/"><code>go/scanner</code></a> package. They were mostly
+useful for scanning text other then Go source files. Instead, the
+<a href="/pkg/text/scanner/"><code>text/scanner</code></a> package should be used
+for that purpose.
+</p>
+
+<p>
+The <a href="/pkg/go/scanner/#ErrorHandler"><code>ErrorHandler</code></a> provided
+to the scanner's <a href="/pkg/go/scanner/#Scanner.Init"><code>Init</code></a> method is
+now simply a function rather than an interface. The <code>ErrorVector</code> type has
+been removed in favor of the (existing) <a href="/pkg/go/scanner/#ErrorList"><code>ErrorList</code></a>
+type, and the <code>ErrorVector</code> methods have been migrated. Instead of embedding
+an <code>ErrorVector</code> in a client of the scanner, now a client should maintain
+an <code>ErrorList</code>.
+</p>
+
+<p>
+The set of parse functions provided by the <a href="/pkg/go/parser/"><code>go/parser</code></a>
+package has been reduced to the primary parse function
+<a href="/pkg/go/parser/#ParseFile"><code>ParseFile</code></a>, and a couple of
+convenience functions <a href="/pkg/go/parser/#ParseDir"><code>ParseDir</code></a>
+and <a href="/pkg/go/parser/#ParseExpr"><code>ParseExpr</code></a>.
+</p>
+
+<p>
+The <a href="/pkg/go/printer/"><code>go/printer</code></a> package supports an additional
+configuration mode <a href="/pkg/go/printer/#Mode"><code>SourcePos</code></a>;
+if set, the printer will emit <code>//line</code> comments such that the generated
+output contains the original source code position information. The new type
+<a href="/pkg/go/printer/#CommentedNode"><code>CommentedNode</code></a> can be
+used to provide comments associated with an arbitrary
+<a href="/pkg/go/ast/#Node"><code>ast.Node</code></a> (until now only
+<a href="/pkg/go/ast/#File"><code>ast.File</code></a> carried comment information).
+</p>
+
+<p>
+The type names of the <a href="/pkg/go/doc/"><code>go/doc</code></a> package have been
+streamlined by removing the <code>Doc</code> suffix: <code>PackageDoc</code>
+is now <code>Package</code>, <code>ValueDoc</code> is <code>Value</code>, etc.
+Also, all types now consistently have a <code>Name</code> field (or <code>Names</code>,
+in the case of type <code>Value</code>) and <code>Type.Factories</code> has become
+<code>Type.Funcs</code>.
+Instead of calling <code>doc.NewPackageDoc(pkg, importpath)</code>,
+documentation for a package is created with:
+</p>
+
+<pre>
+    doc.New(pkg, importpath, mode)
+</pre>
+
+<p>
+where the new <code>mode</code> parameter specifies the operation mode:
+if set to <a href="/pkg/go/doc/#AllDecls"><code>AllDecls</code></a>, all declarations
+(not just exported ones) are considered.
+The function <code>NewFileDoc</code> was removed, and the function
+<code>CommentText</code> has become the method
+<a href="/pkg/go/ast/#Text"><code>Text</code></a> of
+<a href="/pkg/go/ast/#CommentGroup"><code>ast.CommentGroup</code></a>.
+</p>
+
+<p>
+In package <a href="/pkg/go/token/"><code>go/token</code></a>, the
+<a href="/pkg/go/token/#FileSet"><code>token.FileSet</code></a> method <code>Files</code>
+(which originally returned a channel of <code>*token.File</code>s) has been replaced
+with the iterator <a href="/pkg/go/token/#FileSet.Iterate"><code>Iterate</code></a> that
+accepts a function argument instead.
+</p>
+
+<p>
+In package <a href="/pkg/go/build/"><code>go/build</code></a>, the API
+has been nearly completely replaced.
+The package still computes Go package information
+but it does not run the build: the <code>Cmd</code> and <code>Script</code>
+types are gone.
+(To build code, use the new
+<a href="/cmd/go/"><code>go</code></a> command instead.)
+The <code>DirInfo</code> type is now named
+<a href="/pkg/go/build/#Package"><code>Package</code></a>.
+<code>FindTree</code> and <code>ScanDir</code> are replaced by
+<a href="/pkg/go/build/#Import"><code>Import</code></a>
+and
+<a href="/pkg/go/build/#ImportDir"><code>ImportDir</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses packages in <code>go</code> will have to be updated by hand; the
+compiler will reject incorrect uses. Templates used in conjunction with any of the
+<code>go/doc</code> types may need manual fixes; the renamed fields will lead
+to run-time errors.
+</p>
+
+<h3 id="hash">The hash package</h3>
+
+<p>
+In Go 1, the definition of <a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> includes
+a new method, <code>BlockSize</code>.  This new method is used primarily in the
+cryptographic libraries.
+</p>
+
+<p>
+The <code>Sum</code> method of the
+<a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> interface now takes a
+<code>[]byte</code> argument, to which the hash value will be appended.
+The previous behavior can be recreated by adding a <code>nil</code> argument to the call.
+</p>
+
+<p>
+<em>Updating</em>:
+Existing implementations of <code>hash.Hash</code> will need to add a
+<code>BlockSize</code> method.  Hashes that process the input one byte at
+a time can implement <code>BlockSize</code> to return 1.
+Running <code>go</code> <code>fix</code> will update calls to the <code>Sum</code> methods of the various
+implementations of <code>hash.Hash</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Since the package's functionality is new, no updating is necessary.
+</p>
+
+<h3 id="http">The http package</h3>
+
+<p>
+In Go 1 the <a href="/pkg/net/http/"><code>http</code></a> package is refactored,
+putting some of the utilities into a
+<a href="/pkg/net/http/httputil/"><code>httputil</code></a> subdirectory.
+These pieces are only rarely needed by HTTP clients.
+The affected items are:
+</p>
+
+<ul>
+<li>ClientConn</li>
+<li>DumpRequest</li>
+<li>DumpRequestOut</li>
+<li>DumpResponse</li>
+<li>NewChunkedReader</li>
+<li>NewChunkedWriter</li>
+<li>NewClientConn</li>
+<li>NewProxyClientConn</li>
+<li>NewServerConn</li>
+<li>NewSingleHostReverseProxy</li>
+<li>ReverseProxy</li>
+<li>ServerConn</li>
+</ul>
+
+<p>
+The <code>Request.RawURL</code> field has been removed; it was a
+historical artifact.
+</p>
+
+<p>
+The <code>Handle</code> and <code>HandleFunc</code>
+functions, and the similarly-named methods of <code>ServeMux</code>,
+now panic if an attempt is made to register the same pattern twice.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update the few programs that are affected except for
+uses of <code>RawURL</code>, which must be fixed by hand.
+</p>
+
+<h3 id="image">The image package</h3>
+
+<p>
+The <a href="/pkg/image/"><code>image</code></a> package has had a number of
+minor changes, rearrangements and renamings.
+</p>
+
+<p>
+Most of the color handling code has been moved into its own package,
+<a href="/pkg/image/color/"><code>image/color</code></a>.
+For the elements that moved, a symmetry arises; for instance,
+each pixel of an
+<a href="/pkg/image/#RGBA"><code>image.RGBA</code></a>
+is a
+<a href="/pkg/image/color/#RGBA"><code>color.RGBA</code></a>.
+</p>
+
+<p>
+The old <code>image/ycbcr</code> package has been folded, with some
+renamings, into the
+<a href="/pkg/image/"><code>image</code></a>
+and
+<a href="/pkg/image/color/"><code>image/color</code></a>
+packages.
+</p>
+
+<p>
+The old <code>image.ColorImage</code> type is still in the <code>image</code>
+package but has been renamed
+<a href="/pkg/image/#Uniform"><code>image.Uniform</code></a>,
+while <code>image.Tiled</code> has been removed.
+</p>
+
+<p>
+This table lists the renamings.
+</p>
+
+<table class="codetable" frame="border" summary="image renames">
+<colgroup align="left" width="50%"></colgroup>
+<colgroup align="left" width="50%"></colgroup>
+<tr>
+<th align="left">Old</th>
+<th align="left">New</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.Color</td> <td>color.Color</td></tr>
+<tr><td>image.ColorModel</td> <td>color.Model</td></tr>
+<tr><td>image.ColorModelFunc</td> <td>color.ModelFunc</td></tr>
+<tr><td>image.PalettedColorModel</td> <td>color.Palette</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.RGBAColor</td> <td>color.RGBA</td></tr>
+<tr><td>image.RGBA64Color</td> <td>color.RGBA64</td></tr>
+<tr><td>image.NRGBAColor</td> <td>color.NRGBA</td></tr>
+<tr><td>image.NRGBA64Color</td> <td>color.NRGBA64</td></tr>
+<tr><td>image.AlphaColor</td> <td>color.Alpha</td></tr>
+<tr><td>image.Alpha16Color</td> <td>color.Alpha16</td></tr>
+<tr><td>image.GrayColor</td> <td>color.Gray</td></tr>
+<tr><td>image.Gray16Color</td> <td>color.Gray16</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.RGBAColorModel</td> <td>color.RGBAModel</td></tr>
+<tr><td>image.RGBA64ColorModel</td> <td>color.RGBA64Model</td></tr>
+<tr><td>image.NRGBAColorModel</td> <td>color.NRGBAModel</td></tr>
+<tr><td>image.NRGBA64ColorModel</td> <td>color.NRGBA64Model</td></tr>
+<tr><td>image.AlphaColorModel</td> <td>color.AlphaModel</td></tr>
+<tr><td>image.Alpha16ColorModel</td> <td>color.Alpha16Model</td></tr>
+<tr><td>image.GrayColorModel</td> <td>color.GrayModel</td></tr>
+<tr><td>image.Gray16ColorModel</td> <td>color.Gray16Model</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>ycbcr.RGBToYCbCr</td> <td>color.RGBToYCbCr</td></tr>
+<tr><td>ycbcr.YCbCrToRGB</td> <td>color.YCbCrToRGB</td></tr>
+<tr><td>ycbcr.YCbCrColorModel</td> <td>color.YCbCrModel</td></tr>
+<tr><td>ycbcr.YCbCrColor</td> <td>color.YCbCr</td></tr>
+<tr><td>ycbcr.YCbCr</td> <td>image.YCbCr</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>ycbcr.SubsampleRatio444</td> <td>image.YCbCrSubsampleRatio444</td></tr>
+<tr><td>ycbcr.SubsampleRatio422</td> <td>image.YCbCrSubsampleRatio422</td></tr>
+<tr><td>ycbcr.SubsampleRatio420</td> <td>image.YCbCrSubsampleRatio420</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.ColorImage</td> <td>image.Uniform</td></tr>
+</table>
+
+<p>
+The image package's <code>New</code> functions
+(<a href="/pkg/image/#NewRGBA"><code>NewRGBA</code></a>,
+<a href="/pkg/image/#NewRGBA64"><code>NewRGBA64</code></a>, etc.)
+take an <a href="/pkg/image/#Rectangle"><code>image.Rectangle</code></a> as an argument
+instead of four integers.
+</p>
+
+<p>
+Finally, there are new predefined <code>color.Color</code> variables
+<a href="/pkg/image/color/#Black"><code>color.Black</code></a>,
+<a href="/pkg/image/color/#White"><code>color.White</code></a>,
+<a href="/pkg/image/color/#Opaque"><code>color.Opaque</code></a>
+and
+<a href="/pkg/image/color/#Transparent"><code>color.Transparent</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+</p>
+
+<h3 id="log_syslog">The log/syslog package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/log/syslog/#NewLogger"><code>syslog.NewLogger</code></a>
+function returns an error as well as a <code>log.Logger</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="mime">The mime package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/mime/#FormatMediaType"><code>FormatMediaType</code></a> function
+of the <code>mime</code> package has  been simplified to make it
+consistent with
+<a href="/pkg/mime/#ParseMediaType"><code>ParseMediaType</code></a>.
+It now takes <code>"text/html"</code> rather than <code>"text"</code> and <code>"html"</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="net">The net package</h3>
+
+<p>
+In Go 1, the various <code>SetTimeout</code>,
+<code>SetReadTimeout</code>, and <code>SetWriteTimeout</code> methods
+have been replaced with
+<a href="/pkg/net/#IPConn.SetDeadline"><code>SetDeadline</code></a>,
+<a href="/pkg/net/#IPConn.SetReadDeadline"><code>SetReadDeadline</code></a>, and
+<a href="/pkg/net/#IPConn.SetWriteDeadline"><code>SetWriteDeadline</code></a>,
+respectively.  Rather than taking a timeout value in nanoseconds that
+apply to any activity on the connection, the new methods set an
+absolute deadline (as a <code>time.Time</code> value) after which
+reads and writes will time out and no longer block.
+</p>
+
+<p>
+There are also new functions
+<a href="/pkg/net/#DialTimeout"><code>net.DialTimeout</code></a>
+to simplify timing out dialing a network address and
+<a href="/pkg/net/#ListenMulticastUDP"><code>net.ListenMulticastUDP</code></a>
+to allow multicast UDP to listen concurrently across multiple listeners.
+The <code>net.ListenMulticastUDP</code> function replaces the old
+<code>JoinGroup</code> and <code>LeaveGroup</code> methods.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the old methods will fail to compile and must be updated by hand.
+The semantic change makes it difficult for the fix tool to update automatically.
+</p>
+
+<h3 id="os">The os package</h3>
+
+<p>
+The <code>Time</code> function has been removed; callers should use
+the <a href="/pkg/time/#Time"><code>Time</code></a> type from the
+<code>time</code> package.
+</p>
+
+<p>
+The <code>Exec</code> function has been removed; callers should use
+<code>Exec</code> from the <code>syscall</code> package, where available.
+</p>
+
+<p>
+The <code>ShellExpand</code> function has been renamed to <a
+href="/pkg/os/#ExpandEnv"><code>ExpandEnv</code></a>.
+</p>
+
+<p>
+The <a href="/pkg/os/#NewFile"><code>NewFile</code></a> function
+now takes a <code>uintptr</code> fd, instead of an <code>int</code>.
+The <a href="/pkg/os/#File.Fd"><code>Fd</code></a> method on files now
+also returns a <code>uintptr</code>.
+</p>
+
+<p>
+There are no longer error constants such as <code>EINVAL</code>
+in the <code>os</code> package, since the set of values varied with
+the underlying operating system. There are new portable functions like
+<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>
+to test common error properties, plus a few new error values
+with more Go-like names, such as
+<a href="/pkg/os/#ErrPermission"><code>ErrPermission</code></a>
+and
+<a href="/pkg/os/#ErrNoEnv"><code>ErrNoEnv</code></a>.
+</p>
+
+<p>
+The <code>Getenverror</code> function has been removed. To distinguish
+between a non-existent environment variable and an empty string,
+use <a href="/pkg/os/#Environ"><code>os.Environ</code></a> or
+<a href="/pkg/syscall/#Getenv"><code>syscall.Getenv</code></a>.
+</p>
+
+
+<p>
+The <a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a> method has
+dropped its option argument and the associated constants are gone
+from the package.
+Also, the function <code>Wait</code> is gone; only the method of
+the <code>Process</code> type persists.
+</p>
+
+<p>
+The <code>Waitmsg</code> type returned by
+<a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a>
+has been replaced with a more portable
+<a href="/pkg/os/#ProcessState"><code>ProcessState</code></a>
+type with accessor methods to recover information about the
+process.
+Because of changes to <code>Wait</code>, the <code>ProcessState</code>
+value always describes an exited process.
+Portability concerns simplified the interface in other ways, but the values returned by the
+<a href="/pkg/os/#ProcessState.Sys"><code>ProcessState.Sys</code></a> and
+<a href="/pkg/os/#ProcessState.SysUsage"><code>ProcessState.SysUsage</code></a>
+methods can be type-asserted to underlying system-specific data structures such as
+<a href="/pkg/syscall/#WaitStatus"><code>syscall.WaitStatus</code></a> and
+<a href="/pkg/syscall/#Rusage"><code>syscall.Rusage</code></a> on Unix.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will drop a zero argument to <code>Process.Wait</code>.
+All other changes will be caught by the compiler and must be updated by hand.
+</p>
+
+<h4 id="os_fileinfo">The os.FileInfo type</h4>
+
+<p>
+Go 1 redefines the <a href="/pkg/os/#FileInfo"><code>os.FileInfo</code></a> type,
+changing it from a struct to an interface:
+</p>
+
+<pre>
+    type FileInfo interface {
+        Name() string       // base name of the file
+        Size() int64        // length in bytes
+        Mode() FileMode     // file mode bits
+        ModTime() time.Time // modification time
+        IsDir() bool        // abbreviation for Mode().IsDir()
+        Sys() interface{}   // underlying data source (can return nil)
+    }
+</pre>
+
+<p>
+The file mode information has been moved into a subtype called
+<a href="/pkg/os/#FileMode"><code>os.FileMode</code></a>,
+a simple integer type with <code>IsDir</code>, <code>Perm</code>, and <code>String</code>
+methods.
+</p>
+
+<p>
+The system-specific details of file modes and properties such as (on Unix)
+i-number have been removed from <code>FileInfo</code> altogether.
+Instead, each operating system's <code>os</code> package provides an
+implementation of the <code>FileInfo</code> interface, which
+has a <code>Sys</code> method that returns the
+system-specific representation of file metadata.
+For instance, to discover the i-number of a file on a Unix system, unpack
+the <code>FileInfo</code> like this:
+</p>
+
+<pre>
+    fi, err := os.Stat("hello.go")
+    if err != nil {
+        log.Fatal(err)
+    }
+    // Check that it's a Unix file.
+    unixStat, ok := fi.Sys().(*syscall.Stat_t)
+    if !ok {
+        log.Fatal("hello.go: not a Unix file")
+    }
+    fmt.Printf("file i-number: %d\n", unixStat.Ino)
+</pre>
+
+<p>
+Assuming (which is unwise) that <code>"hello.go"</code> is a Unix file,
+the i-number expression could be contracted to
+</p>
+
+<pre>
+    fi.Sys().(*syscall.Stat_t).Ino
+</pre>
+
+<p>
+The vast majority of uses of <code>FileInfo</code> need only the methods
+of the standard interface.
+</p>
+
+<p>
+The <code>os</code> package no longer contains wrappers for the POSIX errors
+such as <code>ENOENT</code>.
+For the few programs that need to verify particular error conditions, there are
+now the boolean functions
+<a href="/pkg/os/#IsExist"><code>IsExist</code></a>,
+<a href="/pkg/os/#IsNotExist"><code>IsNotExist</code></a>
+and
+<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/os\.Open/` `/}/`}}
+-->    f, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
+    if os.IsExist(err) {
+        log.Printf(&#34;%s already exists&#34;, name)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update code that uses the old equivalent of the current <code>os.FileInfo</code>
+and <code>os.FileMode</code> API.
+Code that needs system-specific file details will need to be updated by hand.
+Code that uses the old POSIX error values from the <code>os</code> package
+will fail to compile and will also need to be updated by hand.
+</p>
+
+<h3 id="os_signal">The os/signal package</h3>
+
+<p>
+The <code>os/signal</code> package in Go 1 replaces the
+<code>Incoming</code> function, which returned a channel
+that received all incoming signals,
+with the selective <code>Notify</code> function, which asks
+for delivery of specific signals on an existing channel.
+</p>
+
+<p>
+<em>Updating</em>:
+Code must be updated by hand.
+A literal translation of
+</p>
+<pre>
+c := signal.Incoming()
+</pre>
+<p>
+is
+</p>
+<pre>
+c := make(chan os.Signal)
+signal.Notify(c) // ask for all signals
+</pre>
+<p>
+but most code should list the specific signals it wants to handle instead:
+</p>
+<pre>
+c := make(chan os.Signal)
+signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT)
+</pre>
+
+<h3 id="path_filepath">The path/filepath package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/path/filepath/#Walk"><code>Walk</code></a> function of the
+<code>path/filepath</code> package
+has been changed to take a function value of type
+<a href="/pkg/path/filepath/#WalkFunc"><code>WalkFunc</code></a>
+instead of a <code>Visitor</code> interface value.
+<code>WalkFunc</code> unifies the handling of both files and directories.
+</p>
+
+<pre>
+    type WalkFunc func(path string, info os.FileInfo, err error) error
+</pre>
+
+<p>
+The <code>WalkFunc</code> function will be called even for files or directories that could not be opened;
+in such cases the error argument will describe the failure.
+If a directory's contents are to be skipped,
+the function should return the value <a href="/pkg/path/filepath/#variables"><code>filepath.SkipDir</code></a>
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/STARTWALK/` `/ENDWALK/`}}
+-->    markFn := func(path string, info os.FileInfo, err error) error {
+        if path == &#34;pictures&#34; { <span class="comment">// Will skip walking of directory pictures and its contents.</span>
+            return filepath.SkipDir
+        }
+        if err != nil {
+            return err
+        }
+        log.Println(path)
+        return nil
+    }
+    err := filepath.Walk(&#34;.&#34;, markFn)
+    if err != nil {
+        log.Fatal(err)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+The change simplifies most code but has subtle consequences, so affected programs
+will need to be updated by hand.
+The compiler will catch code using the old interface.
+</p>
+
+<h3 id="regexp">The regexp package</h3>
+
+<p>
+The <a href="/pkg/regexp/"><code>regexp</code></a> package has been rewritten.
+It has the same interface but the specification of the regular expressions
+it supports has changed from the old "egrep" form to that of
+<a href="http://code.google.com/p/re2/">RE2</a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the package should have its regular expressions checked by hand.
+</p>
+
+<h3 id="runtime">The runtime package</h3>
+
+<p>
+In Go 1, much of the API exported by package
+<code>runtime</code> has been removed in favor of
+functionality provided by other packages.
+Code using the <code>runtime.Type</code> interface
+or its specific concrete type implementations should
+now use package <a href="/pkg/reflect/"><code>reflect</code></a>.
+Code using <code>runtime.Semacquire</code> or <code>runtime.Semrelease</code>
+should use channels or the abstractions in package <a href="/pkg/sync/"><code>sync</code></a>.
+The <code>runtime.Alloc</code>, <code>runtime.Free</code>,
+and <code>runtime.Lookup</code> functions, an unsafe API created for
+debugging the memory allocator, have no replacement.
+</p>
+
+<p>
+Before, <code>runtime.MemStats</code> was a global variable holding
+statistics about memory allocation, and calls to <code>runtime.UpdateMemStats</code>
+ensured that it was up to date.
+In Go 1, <code>runtime.MemStats</code> is a struct type, and code should use
+<a href="/pkg/runtime/#ReadMemStats"><code>runtime.ReadMemStats</code></a>
+to obtain the current statistics.
+</p>
+
+<p>
+The package adds a new function,
+<a href="/pkg/runtime/#NumCPU"><code>runtime.NumCPU</code></a>, that returns the number of CPUs available
+for parallel execution, as reported by the operating system kernel.
+Its value can inform the setting of <code>GOMAXPROCS</code>.
+The <code>runtime.Cgocalls</code> and <code>runtime.Goroutines</code> functions
+have been renamed to <code>runtime.NumCgoCall</code> and <code>runtime.NumGoroutine</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update code for the function renamings.
+Other code will need to be updated by hand.
+</p>
+
+<h3 id="strconv">The strconv package</h3>
+
+<p>
+In Go 1, the
+<a href="/pkg/strconv/"><code>strconv</code></a>
+package has been significantly reworked to make it more Go-like and less C-like,
+although <code>Atoi</code> lives on (it's similar to
+<code>int(ParseInt(x, 10, 0))</code>, as does
+<code>Itoa(x)</code> (<code>FormatInt(int64(x), 10)</code>).
+There are also new variants of some of the functions that append to byte slices rather than
+return strings, to allow control over allocation.
+</p>
+
+<p>
+This table summarizes the renamings; see the
+<a href="/pkg/strconv/">package documentation</a>
+for full details.
+</p>
+
+<table class="codetable" frame="border" summary="strconv renames">
+<colgroup align="left" width="50%"></colgroup>
+<colgroup align="left" width="50%"></colgroup>
+<tr>
+<th align="left">Old call</th>
+<th align="left">New call</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atob(x)</td> <td>ParseBool(x)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atof32(x)</td> <td>ParseFloat(x, 32)§</td></tr>
+<tr><td>Atof64(x)</td> <td>ParseFloat(x, 64)</td></tr>
+<tr><td>AtofN(x, n)</td> <td>ParseFloat(x, n)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atoi(x)</td> <td>Atoi(x)</td></tr>
+<tr><td>Atoi(x)</td> <td>ParseInt(x, 10, 0)§</td></tr>
+<tr><td>Atoi64(x)</td> <td>ParseInt(x, 10, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atoui(x)</td> <td>ParseUint(x, 10, 0)§</td></tr>
+<tr><td>Atoui64(x)</td> <td>ParseUint(x, 10, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Btoi64(x, b)</td> <td>ParseInt(x, b, 64)</td></tr>
+<tr><td>Btoui64(x, b)</td> <td>ParseUint(x, b, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Btoa(x)</td> <td>FormatBool(x)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Ftoa32(x, f, p)</td> <td>FormatFloat(float64(x), f, p, 32)</td></tr>
+<tr><td>Ftoa64(x, f, p)</td> <td>FormatFloat(x, f, p, 64)</td></tr>
+<tr><td>FtoaN(x, f, p, n)</td> <td>FormatFloat(x, f, p, n)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Itoa(x)</td> <td>Itoa(x)</td></tr>
+<tr><td>Itoa(x)</td> <td>FormatInt(int64(x), 10)</td></tr>
+<tr><td>Itoa64(x)</td> <td>FormatInt(x, 10)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Itob(x, b)</td> <td>FormatInt(int64(x), b)</td></tr>
+<tr><td>Itob64(x, b)</td> <td>FormatInt(x, b)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Uitoa(x)</td> <td>FormatUint(uint64(x), 10)</td></tr>
+<tr><td>Uitoa64(x)</td> <td>FormatUint(x, 10)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Uitob(x, b)</td> <td>FormatUint(uint64(x), b)</td></tr>
+<tr><td>Uitob64(x, b)</td> <td>FormatUint(x, b)</td></tr>
+</table>
+		
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+<br>
+§ <code>Atoi</code> persists but <code>Atoui</code> and <code>Atof32</code> do not, so
+they may require
+a cast that must be added by hand; the <code>go</code> <code>fix</code> tool will warn about it.
+</p>
+
+
+<h3 id="templates">The template packages</h3>
+
+<p>
+The <code>template</code> and <code>exp/template/html</code> packages have moved to 
+<a href="/pkg/text/template/"><code>text/template</code></a> and
+<a href="/pkg/html/template/"><code>html/template</code></a>.
+More significant, the interface to these packages has been simplified.
+The template language is the same, but the concept of "template set" is gone
+and the functions and methods of the packages have changed accordingly,
+often by elimination.
+</p>
+
+<p>
+Instead of sets, a <code>Template</code> object
+may contain multiple named template definitions,
+in effect constructing
+name spaces for template invocation.
+A template can invoke any other template associated with it, but only those
+templates associated with it.
+The simplest way to associate templates is to parse them together, something
+made easier with the new structure of the packages.
+</p>
+
+<p>
+<em>Updating</em>:
+The imports will be updated by fix tool.
+Single-template uses will be otherwise be largely unaffected.
+Code that uses multiple templates in concert will need to be updated by hand.
+The <a href="/pkg/text/template/#examples">examples</a> in
+the documentation for <code>text/template</code> can provide guidance.
+</p>
+
+<h3 id="testing">The testing package</h3>
+
+<p>
+The testing package has a type, <code>B</code>, passed as an argument to benchmark functions.
+In Go 1, <code>B</code> has new methods, analogous to those of <code>T</code>, enabling
+logging and failure reporting.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/func.*Benchmark/` `/^}/`}}
+-->func BenchmarkSprintf(b *testing.B) {
+    <span class="comment">// Verify correctness before running benchmark.</span>
+    b.StopTimer()
+    got := fmt.Sprintf(&#34;%x&#34;, 23)
+    const expect = &#34;17&#34;
+    if expect != got {
+        b.Fatalf(&#34;expected %q; got %q&#34;, expect, got)
+    }
+    b.StartTimer()
+    for i := 0; i &lt; b.N; i++ {
+        fmt.Sprintf(&#34;%x&#34;, 23)
+    }
+}</pre>
+
+<p>
+<em>Updating</em>:
+Existing code is unaffected, although benchmarks that use <code>println</code>
+or <code>panic</code> should be updated to use the new methods.
+</p>
+
+<h3 id="testing_script">The testing/script package</h3>
+
+<p>
+The testing/script package has been deleted. It was a dreg.
+</p>
+
+<p>
+<em>Updating</em>:
+No code is likely to be affected.
+</p>
+
+<h3 id="unsafe">The unsafe package</h3>
+
+<p>
+In Go 1, the functions
+<code>unsafe.Typeof</code>, <code>unsafe.Reflect</code>,
+<code>unsafe.Unreflect</code>, <code>unsafe.New</code>, and
+<code>unsafe.NewArray</code> have been removed;
+they duplicated safer functionality provided by
+package <a href="/pkg/reflect/"><code>reflect</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code using these functions must be rewritten to use
+package <a href="/pkg/reflect/"><code>reflect</code></a>.
+The changes to <a href="http://code.google.com/p/go/source/detail?r=2646dc956207">encoding/gob</a> and the <a href="http://code.google.com/p/goprotobuf/source/detail?r=5340ad310031">protocol buffer library</a>
+may be helpful as examples.
+</p>
+
+<h3 id="url">The url package</h3>
+
+<p>
+In Go 1 several fields from the <a href="/pkg/net/url/#URL"><code>url.URL</code></a> type
+were removed or replaced.
+</p>
+
+<p>
+The <a href="/pkg/net/url/#URL.String"><code>String</code></a> method now
+predictably rebuilds an encoded URL string using all of <code>URL</code>'s
+fields as necessary. The resulting string will also no longer have
+passwords escaped.
+</p>
+
+<p>
+The <code>Raw</code> field has been removed. In most cases the <code>String</code>
+method may be used in its place.
+</p>
+
+<p>
+The old <code>RawUserinfo</code> field is replaced by the <code>User</code>
+field, of type <a href="/pkg/net/url/#Userinfo"><code>*net.Userinfo</code></a>.
+Values of this type may be created using the new <a href="/pkg/net/url/#User"><code>net.User</code></a>
+and <a href="/pkg/net/url/#UserPassword"><code>net.UserPassword</code></a>
+functions. The <code>EscapeUserinfo</code> and <code>UnescapeUserinfo</code>
+functions are also gone.
+</p>
+
+<p>
+The <code>RawAuthority</code> field has been removed. The same information is
+available in the <code>Host</code> and <code>User</code> fields.
+</p>
+
+<p>
+The <code>RawPath</code> field and the <code>EncodedPath</code> method have
+been removed. The path information in rooted URLs (with a slash following the
+schema) is now available only in decoded form in the <code>Path</code> field.
+Occasionally, the encoded data may be required to obtain information that
+was lost in the decoding process. These cases must be handled by accessing
+the data the URL was built from.
+</p>
+
+<p>
+URLs with non-rooted paths, such as <code>"mailto:dev@golang.org?subject=Hi"</code>,
+are also handled differently. The <code>OpaquePath</code> boolean field has been
+removed and a new <code>Opaque</code> string field introduced to hold the encoded
+path for such URLs. In Go 1, the cited URL parses as:
+</p>
+
+<pre>
+    URL{
+        Scheme: "mailto",
+        Opaque: "dev@golang.org",
+        RawQuery: "subject=Hi",
+    }
+</pre>
+
+<p>
+A new <a href="/pkg/net/url/#URL.RequestURI"><code>RequestURI</code></a> method was
+added to <code>URL</code>.
+</p>
+
+<p>
+The <code>ParseWithReference</code> function has been renamed to <code>ParseWithFragment</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the old fields will fail to compile and must be updated by hand.
+The semantic changes make it difficult for the fix tool to update automatically.
+</p>
+
+<h2 id="cmd_go">The go command</h2>
+
+<p>
+Go 1 introduces the <a href="/cmd/go/">go command</a>, a tool for fetching,
+building, and installing Go packages and commands. The <code>go</code> command
+does away with makefiles, instead using Go source code to find dependencies and
+determine build conditions. Most existing Go programs will no longer require
+makefiles to be built.
+</p>
+
+<p>
+See <a href="/doc/code.html">How to Write Go Code</a> for a primer on the
+<code>go</code> command and the <a href="/cmd/go/">go command documentation</a>
+for the full details.
+</p>
+
+<p>
+<em>Updating</em>:
+Projects that depend on the Go project's old makefile-based build
+infrastructure (<code>Make.pkg</code>, <code>Make.cmd</code>, and so on) should
+switch to using the <code>go</code> command for building Go code and, if
+necessary, rewrite their makefiles to perform any auxiliary build tasks.
+</p>
+
+<h2 id="cmd_cgo">The cgo command</h2>
+
+<p>
+In Go 1, the <a href="/cmd/cgo">cgo command</a>
+uses a different <code>_cgo_export.h</code>
+file, which is generated for packages containing <code>//export</code> lines.
+The <code>_cgo_export.h</code> file now begins with the C preamble comment,
+so that exported function definitions can use types defined there.
+This has the effect of compiling the preamble multiple times, so a
+package using <code>//export</code> must not put function definitions
+or variable initializations in the C preamble.
+</p>
+
+<h2 id="releases">Packaged releases</h2>
+
+<p>
+One of the most significant changes associated with Go 1 is the availability
+of prepackaged, downloadable distributions.
+They are available for many combinations of architecture and operating system
+(including Windows) and the list will grow.
+Installation details are described on the
+<a href="/doc/install">Getting Started</a> page, while
+the distributions themselves are listed on the
+<a href="http://code.google.com/p/go/downloads/list">downloads page</a>.
+
+
+</div>
+
+<div id="footer">
+Build version go1.0.1.<br>
+Except as <a href="http://code.google.com/policies.html#restrictions">noted</a>,
+the content of this page is licensed under the
+Creative Commons Attribution 3.0 License,
+and code is licensed under a <a href="/LICENSE">BSD license</a>.<br>
+<a href="/doc/tos.html">Terms of Service</a> | 
+<a href="http://www.google.com/intl/en/privacy/privacy-policy.html">Privacy Policy</a>
+</div>
+
+<script type="text/javascript">
+(function() {
+  var ga = document.createElement("script"); ga.type = "text/javascript"; ga.async = true;
+  ga.src = ("https:" == document.location.protocol ? "https://ssl" : "http://www") + ".google-analytics.com/ga.js";
+  var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(ga, s);
+})();
+</script>
+</body>
+<script type="text/javascript">
+  (function() {
+    var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
+    po.src = 'https://apis.google.com/js/plusone.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
+  })();
+</script>
+</html>
+
diff --git a/html/testdata/webkit/README b/html/testdata/webkit/README
new file mode 100644
index 0000000..9b4c2d8
--- /dev/null
+++ b/html/testdata/webkit/README
@@ -0,0 +1,28 @@
+The *.dat files in this directory are copied from The WebKit Open Source
+Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
+WebKit is licensed under a BSD style license.
+http://webkit.org/coding/bsd-license.html says:
+
+Copyright (C) 2009 Apple Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/html/testdata/webkit/adoption01.dat b/html/testdata/webkit/adoption01.dat
new file mode 100644
index 0000000..787e1b0
--- /dev/null
+++ b/html/testdata/webkit/adoption01.dat
@@ -0,0 +1,194 @@
+#data
+<a><p></a></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+#data
+<a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<button>2</a>3</button>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <button>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<b>2</a>3</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+#data
+<a>1<div>2<div>3</a>4</div>5</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+#data
+<table><a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
+
+#data
+<b><b><a><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <a>
+|         <p>
+|           <a>
+
+#data
+<b><a><b><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <a>
+|         <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<a><b><b><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <b>
+|     <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "1"
+|       <s>
+|         id="A"
+|         "2"
+|         <b>
+|           id="B"
+|           "3"
+|     <s>
+|       id="A"
+|       <b>
+|         id="B"
+|         "4"
+|     <b>
+|       id="B"
+|       "5"
+
+#data
+<table><a>1<td>2</td>3</table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <a>
+|       "3"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "2"
+
+#data
+<table>A<td>B</td>C</table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "AC"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<a><svg><tr><input></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <svg svg>
+|         <svg tr>
+|           <svg input>
diff --git a/html/testdata/webkit/adoption02.dat b/html/testdata/webkit/adoption02.dat
new file mode 100644
index 0000000..d18151b
--- /dev/null
+++ b/html/testdata/webkit/adoption02.dat
@@ -0,0 +1,31 @@
+#data
+<b>1<i>2<p>3</b>4
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <i>
+|         "2"
+|     <i>
+|       <p>
+|         <b>
+|           "3"
+|         "4"
+
+#data
+<a><div><style></style><address><a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|         <style>
+|       <address>
+|         <a>
+|         <a>
diff --git a/html/testdata/webkit/comments01.dat b/html/testdata/webkit/comments01.dat
new file mode 100644
index 0000000..44f1876
--- /dev/null
+++ b/html/testdata/webkit/comments01.dat
@@ -0,0 +1,135 @@
+#data
+FOO<!-- BAR -->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --!>BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --   >BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --   >BAZ -->
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
+
+#data
+FOO<!---->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!--->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!-->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+<?xml version="1.0">Hi
+#errors
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+|     "Hi"
+
+#data
+<?xml version="1.0">
+#errors
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?xml version
+#errors
+#document
+| <!-- ?xml version -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+FOO<!----->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!-- - -->
+|     "BAZ"
diff --git a/html/testdata/webkit/doctype01.dat b/html/testdata/webkit/doctype01.dat
new file mode 100644
index 0000000..ae45732
--- /dev/null
+++ b/html/testdata/webkit/doctype01.dat
@@ -0,0 +1,370 @@
+#data
+<!DOCTYPE html>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!dOctYpE HtMl>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPEhtml>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE>Hello
+#errors
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE >Hello
+#errors
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco "ddd>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM    >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE   potato       sYstEM  ggg>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM taco  >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM 'taco"'>Hello
+#errors
+#document
+| <!DOCTYPE potato "" "taco"">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "taco">Hello
+#errors
+#document
+| <!DOCTYPE potato "" "taco">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "tai'co">Hello
+#errors
+#document
+| <!DOCTYPE potato "" "tai'co">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEMtaco "ddd">Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato grass SYSTEM taco>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIcgoof>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC goof>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "go'of">Hello
+#errors
+#document
+| <!DOCTYPE potato "go'of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go'of'>Hello
+#errors
+#document
+| <!DOCTYPE potato "go" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
+#errors
+#document
+| <!DOCTYPE potato "go:hh   of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
+#errors
+#document
+| <!DOCTYPE potato "W3C-//dfdf" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">Hello
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE ...>Hello
+#errors
+#document
+| <!DOCTYPE ...>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
+<!-- internal declarations -->
+]>
+#errors
+#document
+| <!DOCTYPE root-element>
+| <html>
+|   <head>
+|   <body>
+|     "]>"
+
+#data
+<!DOCTYPE html PUBLIC
+  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
+    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
+#errors
+#document
+| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "Mine!"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
diff --git a/html/testdata/webkit/entities01.dat b/html/testdata/webkit/entities01.dat
new file mode 100644
index 0000000..c8073b7
--- /dev/null
+++ b/html/testdata/webkit/entities01.dat
@@ -0,0 +1,603 @@
+#data
+FOO&gt;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gtBAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gt BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO> BAR"
+
+#data
+FOO&gt;;;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>;;BAR"
+
+#data
+I'm &notit; I tell you
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ¬it; I tell you"
+
+#data
+I'm &notin; I tell you
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ∉ I tell you"
+
+#data
+FOO& BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO& BAR"
+
+#data
+FOO&<BAR>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&"
+|     <bar>
+
+#data
+FOO&&&&gt;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&&&>BAR"
+
+#data
+FOO&#41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#X41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#BAR"
+
+#data
+FOO&#ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#ZOO"
+
+#data
+FOO&#xBAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOºR"
+
+#data
+FOO&#xZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#xZOO"
+
+#data
+FOO&#XZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#XZOO"
+
+#data
+FOO&#41BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO䆺R"
+
+#data
+FOO&#x41ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOAZOO"
+
+#data
+FOO&#x0000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#x0078;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOxZOO"
+
+#data
+FOO&#x0079;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOyZOO"
+
+#data
+FOO&#x0080;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO€ZOO"
+
+#data
+FOO&#x0081;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0082;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‚ZOO"
+
+#data
+FOO&#x0083;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOƒZOO"
+
+#data
+FOO&#x0084;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO„ZOO"
+
+#data
+FOO&#x0085;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO…ZOO"
+
+#data
+FOO&#x0086;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO†ZOO"
+
+#data
+FOO&#x0087;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‡ZOO"
+
+#data
+FOO&#x0088;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOˆZOO"
+
+#data
+FOO&#x0089;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‰ZOO"
+
+#data
+FOO&#x008A;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŠZOO"
+
+#data
+FOO&#x008B;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‹ZOO"
+
+#data
+FOO&#x008C;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŒZOO"
+
+#data
+FOO&#x008D;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x008E;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŽZOO"
+
+#data
+FOO&#x008F;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0090;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0091;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‘ZOO"
+
+#data
+FOO&#x0092;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO’ZOO"
+
+#data
+FOO&#x0093;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO“ZOO"
+
+#data
+FOO&#x0094;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO”ZOO"
+
+#data
+FOO&#x0095;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO•ZOO"
+
+#data
+FOO&#x0096;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO–ZOO"
+
+#data
+FOO&#x0097;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO—ZOO"
+
+#data
+FOO&#x0098;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO˜ZOO"
+
+#data
+FOO&#x0099;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO™ZOO"
+
+#data
+FOO&#x009A;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOšZOO"
+
+#data
+FOO&#x009B;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO›ZOO"
+
+#data
+FOO&#x009C;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOœZOO"
+
+#data
+FOO&#x009D;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x009E;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOžZOO"
+
+#data
+FOO&#x009F;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŸZOO"
+
+#data
+FOO&#x00A0;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO ZOO"
+
+#data
+FOO&#xD7FF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO퟿ZOO"
+
+#data
+FOO&#xD800;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xD801;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFE;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xE000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x10FFFE;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿾ZOO"
+
+#data
+FOO&#x1087D4;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􈟔ZOO"
+
+#data
+FOO&#x10FFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿿ZOO"
+
+#data
+FOO&#x110000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xFFFFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
diff --git a/html/testdata/webkit/entities02.dat b/html/testdata/webkit/entities02.dat
new file mode 100644
index 0000000..e2fb42a
--- /dev/null
+++ b/html/testdata/webkit/entities02.dat
@@ -0,0 +1,249 @@
+#data
+<div bar="ZZ&gt;YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>YY"
+
+#data
+<div bar="ZZ&"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar='ZZ&'></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar=ZZ&></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar="ZZ&gt=YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt=YY"
+
+#data
+<div bar="ZZ&gt0YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt0YY"
+
+#data
+<div bar="ZZ&gt9YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt9YY"
+
+#data
+<div bar="ZZ&gtaYY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtaYY"
+
+#data
+<div bar="ZZ&gtZYY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtZYY"
+
+#data
+<div bar="ZZ&gt YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ> YY"
+
+#data
+<div bar="ZZ&gt"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar='ZZ&gt'></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar=ZZ&gt></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar="ZZ&pound_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod_id=23"
+
+#data
+<div bar="ZZ&pound;_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod;_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ∏_id=23"
+
+#data
+<div bar="ZZ&pound=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&pound=23"
+
+#data
+<div bar="ZZ&prod=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod=23"
+
+#data
+<div>ZZ&pound_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod_id=23"
+
+#data
+<div>ZZ&pound;_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod;_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ∏_id=23"
+
+#data
+<div>ZZ&pound=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£=23"
+
+#data
+<div>ZZ&prod=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod=23"
diff --git a/html/testdata/webkit/html5test-com.dat b/html/testdata/webkit/html5test-com.dat
new file mode 100644
index 0000000..d7cb71d
--- /dev/null
+++ b/html/testdata/webkit/html5test-com.dat
@@ -0,0 +1,246 @@
+#data
+<div<div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div<div>
+
+#data
+<div foo<bar=''>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo<bar=""
+
+#data
+<div foo=`bar`>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="`bar`"
+
+#data
+<div \"foo=''>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       \"foo=""
+
+#data
+<a href='\nbar'></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="\nbar"
+
+#data
+<!DOCTYPE html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+&lang;&rang;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "⟨⟩"
+
+#data
+&apos;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "'"
+
+#data
+&ImaginaryI;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "ⅈ"
+
+#data
+&Kopf;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "𝕂"
+
+#data
+&notinva;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "∉"
+
+#data
+<?import namespace="foo" implementation="#bar">
+#errors
+#document
+| <!-- ?import namespace="foo" implementation="#bar" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!--foo--bar-->
+#errors
+#document
+| <!-- foo--bar -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<![CDATA[x]]>
+#errors
+#document
+| <!-- [CDATA[x]] -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea><!--</textarea>--></textarea>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<textarea><!--</textarea>-->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<style><!--</style>--></style>
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>-->
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<ul><li>A </li> <li>B</li></ul>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         "A "
+|       " "
+|       <li>
+|         "B"
+
+#data
+<table><form><input type=hidden><input></form><div></div></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <div>
+|     <table>
+|       <form>
+|       <input>
+|         type="hidden"
+
+#data
+<i>A<b>B<p></i>C</b>D
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "A"
+|       <b>
+|         "B"
+|     <b>
+|     <p>
+|       <b>
+|         <i>
+|         "C"
+|       "D"
+
+#data
+<div></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<svg></svg>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<math></math>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
diff --git a/html/testdata/webkit/inbody01.dat b/html/testdata/webkit/inbody01.dat
new file mode 100644
index 0000000..3f2bd37
--- /dev/null
+++ b/html/testdata/webkit/inbody01.dat
@@ -0,0 +1,43 @@
+#data
+<button>1</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       "1"
+
+#data
+<foo>1<p>2</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <p>
+|         "2"
+
+#data
+<dd>1</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       "1"
+
+#data
+<foo>1<dd>2</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <dd>
+|         "2"
diff --git a/html/testdata/webkit/isindex.dat b/html/testdata/webkit/isindex.dat
new file mode 100644
index 0000000..88325ff
--- /dev/null
+++ b/html/testdata/webkit/isindex.dat
@@ -0,0 +1,40 @@
+#data
+<isindex>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<isindex name="A" action="B" prompt="C" foo="D">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       action="B"
+|       <hr>
+|       <label>
+|         "C"
+|         <input>
+|           foo="D"
+|           name="isindex"
+|       <hr>
+
+#data
+<form><isindex>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
diff --git a/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat b/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
new file mode 100644
index 0000000..a5ebb1e
--- /dev/null
+++ b/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
Binary files differ
diff --git a/html/testdata/webkit/pending-spec-changes.dat b/html/testdata/webkit/pending-spec-changes.dat
new file mode 100644
index 0000000..5a92084
--- /dev/null
+++ b/html/testdata/webkit/pending-spec-changes.dat
@@ -0,0 +1,52 @@
+#data
+<input type="hidden"><frameset>
+#errors
+21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+31: “frameset” start tag seen.
+31: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><table><caption><svg>foo</table>bar
+#errors
+47: End tag “table” did not match the name of the current open element (“svg”).
+47: “table” closed but “caption” was still open.
+47: End tag “table” seen, but there were open elements.
+36: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           "foo"
+|     "bar"
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+30: A table cell was implicitly closed, but there were open elements.
+26: Unclosed element “desc”.
+20: Unclosed element “svg”.
+37: Stray end tag “desc”.
+45: End of file seen and there were open elements.
+45: Unclosed element “circle”.
+7: Unclosed element “table”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
diff --git a/html/testdata/webkit/plain-text-unsafe.dat b/html/testdata/webkit/plain-text-unsafe.dat
new file mode 100644
index 0000000..04cc11f
--- /dev/null
+++ b/html/testdata/webkit/plain-text-unsafe.dat
Binary files differ
diff --git a/html/testdata/webkit/scriptdata01.dat b/html/testdata/webkit/scriptdata01.dat
new file mode 100644
index 0000000..76b67f4
--- /dev/null
+++ b/html/testdata/webkit/scriptdata01.dat
@@ -0,0 +1,308 @@
+#data
+FOO<script>'Hello'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'Hello'"
+|     "BAR"
+
+#data
+FOO<script></script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script >BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/ >BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script type="text/plain"></scriptx>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "</scriptx>BAR"
+
+#data
+FOO<script></script foo=">" dd>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script>'<'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<'"
+|     "BAR"
+
+#data
+FOO<script>'<!'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!'"
+|     "BAR"
+
+#data
+FOO<script>'<!-'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-'"
+|     "BAR"
+
+#data
+FOO<script>'<!--'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!--'"
+|     "BAR"
+
+#data
+FOO<script>'<!---'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!---'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- potato'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- potato'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --'</script>BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt> -->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt> -->'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --!>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -- >'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt '</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt\'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+|     "QUX"
diff --git a/html/testdata/webkit/scripted/adoption01.dat b/html/testdata/webkit/scripted/adoption01.dat
new file mode 100644
index 0000000..4e08d0e
--- /dev/null
+++ b/html/testdata/webkit/scripted/adoption01.dat
@@ -0,0 +1,15 @@
+#data
+<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         id="B"
+|         <script>
+|           "document.getElementById("A").id = "B""
+|     <b>
+|       id="A"
+|       "TEXT"
diff --git a/html/testdata/webkit/scripted/webkit01.dat b/html/testdata/webkit/scripted/webkit01.dat
new file mode 100644
index 0000000..ef4a41c
--- /dev/null
+++ b/html/testdata/webkit/scripted/webkit01.dat
@@ -0,0 +1,28 @@
+#data
+1<script>document.write("2")</script>3
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("2")"
+|     "23"
+
+#data
+1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
+|     <script>
+|       "document.write('2')"
+|     "2"
+|     <script>
+|       "document.write('3')"
+|     "34"
diff --git a/html/testdata/webkit/tables01.dat b/html/testdata/webkit/tables01.dat
new file mode 100644
index 0000000..c4b47e4
--- /dev/null
+++ b/html/testdata/webkit/tables01.dat
@@ -0,0 +1,212 @@
+#data
+<table><th>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+
+#data
+<table><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><col foo='bar'>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|           foo="bar"
+
+#data
+<table><colgroup></html>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+<table></table><p>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <p>
+|       "foo"
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><select><option>3</select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "3"
+|     <table>
+
+#data
+<table><select><table></table></select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|     <table>
+
+#data
+<table><select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+
+#data
+<table><select><option>A<tr><td>B</td></tr></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<table><td></body></caption></col></colgroup></html>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td>A</table>B
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+|     "B"
+
+#data
+<table><tr><caption>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <caption>
+
+#data
+<table><tr></body></caption></col></colgroup></html></td></th><td>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|         <tr>
+
+#data
+<table><td><button><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <button>
+|           <td>
+
+#data
+<table><tr><td><svg><desc><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
diff --git a/html/testdata/webkit/tests1.dat b/html/testdata/webkit/tests1.dat
new file mode 100644
index 0000000..cbf8bdd
--- /dev/null
+++ b/html/testdata/webkit/tests1.dat
@@ -0,0 +1,1952 @@
+#data
+Test
+#errors
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<p>One<p>Two
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "One"
+|     <p>
+|       "Two"
+
+#data
+Line1<br>Line2<br>Line3<br>Line4
+#errors
+Line: 1 Col: 5 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Line1"
+|     <br>
+|     "Line2"
+|     <br>
+|     "Line3"
+|     <br>
+|     "Line4"
+
+#data
+<html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body></body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end tag (body).
+Line: 1 Col: 26 Unexpected end tag (html).
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (html). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</head>
+#errors
+Line: 1 Col: 7 Unexpected end tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</body>
+#errors
+Line: 1 Col: 7 Unexpected end tag (body). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected end tag (body) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</html>
+#errors
+Line: 1 Col: 7 Unexpected end tag (html). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected end tag (html) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<b><table><td><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>X
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 30 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+|       "X"
+
+#data
+<h1>Hello<h2>World
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+13: Heading cannot be a child of another heading.
+18: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       "Hello"
+|     <h2>
+|       "World"
+
+#data
+<a><p>X<a>Y</a>Z</p></a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 10 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 24 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+|         "X"
+|       <a>
+|         "Y"
+|       "Z"
+
+#data
+<b><button>foo</b>bar
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+|         "foo"
+|       "bar"
+
+#data
+<!DOCTYPE html><span><button>foo</span>bar
+#errors
+39: End tag “span” seen but there were unclosed elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <span>
+|       <button>
+|         "foobar"
+
+#data
+<p><b><div><marquee></p></b></div>X
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+Line: 1 Col: 24 Unexpected end tag (p). Ignored.
+Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           <p>
+|           "X"
+
+#data
+<script><div></script></div><title><p></title><p><p>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected end tag (div). Ignored.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<div>"
+|     <title>
+|       "<p>"
+|   <body>
+|     <p>
+|     <p>
+
+#data
+<!--><div>--<!-->
+#errors
+Line: 1 Col: 5 Incorrect comment.
+Line: 1 Col: 10 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 17 Incorrect comment.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "--"
+|       <!--  -->
+
+#data
+<p><hr></p>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <hr>
+|     <p>
+
+#data
+<select><b><option><select><option></b></select>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
+Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 48 Unexpected end tag (select). Ignored.
+Line: 1 Col: 49 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+|       "X"
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 63 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 64 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+|       <b>
+|         "X"
+|       "C"
+|     <a>
+|       "Y"
+
+#data
+<a X>0<b>1<a Y>2
+#errors
+Line: 1 Col: 5 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       x=""
+|       "0"
+|       <b>
+|         "1"
+|     <b>
+|       <a>
+|         y=""
+|         "2"
+
+#data
+<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
+#errors
+Line: 1 Col: 7 Unexpected '-' after '--' found in comment.
+Line: 1 Col: 14 Unexpected start tag (font). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 41 Unexpected start tag (b) in table context caused voodoo mode.
+Line: 1 Col: 48 Unexpected implied end tag (b) in the table phase.
+Line: 1 Col: 48 Unexpected table cell start tag (th) in the table body phase.
+Line: 1 Col: 63 Got table cell end tag (th) while required end tags are missing.
+Line: 1 Col: 71 Unexpected end of file. Expected table content.
+#document
+| <!-- - -->
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <div>
+|         "helloexcite!"
+|         <b>
+|           "me!"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <th>
+|                 <i>
+|                   "please!"
+|             <!-- X -->
+
+#data
+<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
+#errors
+Line: 1 Col: 61 Unexpected end tag (li). Missing end tag (body).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       "hello"
+|     <li>
+|       "world"
+|       <ul>
+|         "how"
+|         <li>
+|           "do"
+|       "you"
+|   <!-- do -->
+
+#data
+<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
+#errors
+Line: 1 Col: 54 Unexpected end tag (option) in the select phase. Ignored.
+Line: 1 Col: 55 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <option>
+|       "B"
+|     <optgroup>
+|       "C"
+|       <select>
+|         "DE"
+
+#data
+<
+#errors
+Line: 1 Col: 1 Expected tag name. Got something else instead
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<#
+#errors
+Line: 1 Col: 1 Expected tag name. Got something else instead
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<#"
+
+#data
+</
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected end of file.
+Line: 1 Col: 2 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "</"
+
+#data
+</#
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character '#' found.
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?#
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?# -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!#
+#errors
+Line: 1 Col: 3 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COMMENT?>
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 11 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?COMMENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COMMENT>
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 10 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- COMMENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COMMENT >
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
+Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  COMMENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COM--MENT?>
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 13 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?COM--MENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COM--MENT>
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- COM--MENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COM--MENT >
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
+Line: 1 Col: 14 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  COM--MENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><style> EOF
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       " EOF"
+|   <body>
+
+#data
+<!DOCTYPE html><script> <!-- </script> --> </script> EOF
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "-->  EOF"
+
+#data
+<b><p></b>TEST
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 10 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <p>
+|       <b>
+|       "TEST"
+
+#data
+<p id=a><b><p id=b></b>TEST
+#errors
+Line: 1 Col: 8 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end tag (p). Ignored.
+Line: 1 Col: 23 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="a"
+|       <b>
+|     <p>
+|       id="b"
+|       "TEST"
+
+#data
+<b id=a><p><b id=b></p></b>TEST
+#errors
+Line: 1 Col: 8 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (p). Ignored.
+Line: 1 Col: 27 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 31 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       id="a"
+|       <p>
+|         <b>
+|           id="b"
+|       "TEST"
+
+#data
+<!DOCTYPE html><title>U-test</title><body><div><p>Test<u></p></div></body>
+#errors
+Line: 1 Col: 61 Unexpected end tag (p). Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "U-test"
+|   <body>
+|     <div>
+|       <p>
+|         "Test"
+|         <u>
+
+#data
+<!DOCTYPE html><font><table></font></table></font>
+#errors
+Line: 1 Col: 35 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 35 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <table>
+
+#data
+<font><p>hello<b>cruel</font>world
+#errors
+Line: 1 Col: 6 Unexpected start tag (font). Expected DOCTYPE.
+Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     <p>
+|       <font>
+|         "hello"
+|         <b>
+|           "cruel"
+|       <b>
+|         "world"
+
+#data
+<b>Test</i>Test
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 11 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "TestTest"
+
+#data
+<b>A<cite>B<div>C
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<b>A<cite>B<div>C</cite>D
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end tag (cite). Ignored.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "CD"
+
+#data
+<b>A<cite>B<div>C</b>D
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 21 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 22 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|     <div>
+|       <b>
+|         "C"
+|       "D"
+
+#data
+
+#errors
+Line: 1 Col: 0 Unexpected End of file. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<DIV>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 5 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<DIV> abc
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 9 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc"
+
+#data
+<DIV> abc <B>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+
+#data
+<DIV> abc <B> def
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def"
+
+#data
+<DIV> abc <B> def <I>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+
+#data
+<DIV> abc <B> def <I> ghi
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi"
+
+#data
+<DIV> abc <B> def <I> ghi <P>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           <p>
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           <p>
+|             " jkl"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 38 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         <p>
+|           <b>
+|             " jkl "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         <p>
+|           <b>
+|             " jkl "
+|           " mno"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P> stu
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 60 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+|       " stu"
+
+#data
+<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
+#errors
+Line: 1 Col: 1040 Unexpected start tag (test). Expected DOCTYPE.
+Line: 1 Col: 1040 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <test>
+|       attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
+
+#data
+<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
+#errors
+Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 39 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 39 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 39 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 45 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|       <a>
+|         href="foo"
+|         "br"
+|       <a>
+|         href="foo"
+|         "x"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|     <a>
+|       href="foo"
+|       "aoe"
+
+#data
+<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 60 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "abax"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 href="foo"
+|                 "br"
+|       "aoe"
+
+#data
+<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 29 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|     <a>
+|       href="blah"
+|       "x"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <a>
+|               href="foo"
+|               "br"
+|     <a>
+|       href="blah"
+|       "aoe"
+
+#data
+<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
+#errors
+Line: 1 Col: 10 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 45 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="a"
+|       "aa"
+|       <marquee>
+|         "aa"
+|         <a>
+|           href="b"
+|           "bb"
+|       "aa"
+
+#data
+<wbr><strike><code></strike><code><strike></code>
+#errors
+Line: 1 Col: 5 Unexpected start tag (wbr). Expected DOCTYPE.
+Line: 1 Col: 28 End tag (strike) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 49 Unexpected end tag (code). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     <strike>
+|       <code>
+|     <code>
+|       <code>
+|         <strike>
+
+#data
+<!DOCTYPE html><spacer>foo
+#errors
+26: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <spacer>
+|       "foo"
+
+#data
+<title><meta></title><link><title><meta></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<meta>"
+|     <link>
+|     <title>
+|       "<meta>"
+|   <body>
+
+#data
+<style><!--</style><meta><script>--><link></script>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 51 Unexpected end of file. Expected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|     <meta>
+|     <script>
+|       "--><link>"
+|   <body>
+
+#data
+<head><meta></head><link>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected start tag (link) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <meta>
+|     <link>
+|   <body>
+
+#data
+<table><tr><tr><td><td><span><th><span>X</table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 33 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 48 Got table cell end tag (th) while required end tags are missing.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|         <tr>
+|           <td>
+|           <td>
+|             <span>
+|           <th>
+|             <span>
+|               "X"
+
+#data
+<body><body><base><link><meta><title><p></title><body><p></body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (body).
+Line: 1 Col: 54 Unexpected start tag (body).
+Line: 1 Col: 64 Unexpected end tag (p). Missing end tag (body).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     <link>
+|     <meta>
+|     <title>
+|       "<p>"
+|     <p>
+
+#data
+<textarea><p></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<p>"
+
+#data
+<p><image></p>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected start tag (image). Treated as img.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <img>
+
+#data
+<a><table><a></table><p><a><div><a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 13 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 13 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 21 Unexpected end tag (table). Expected end tag (a).
+Line: 1 Col: 27 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 27 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 32 Unexpected end tag (p). Ignored.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 35 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|     <p>
+|       <a>
+|     <div>
+|       <a>
+
+#data
+<head></p><meta><p>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+|     <p>
+
+#data
+<head></html><meta><p>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected start tag (meta).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <p>
+
+#data
+<b><table><td><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<h1><h2>
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+8: Heading cannot be a child of another heading.
+8: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|     <h2>
+
+#data
+<a><p><a></a></p></a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 9 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 21 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+|       <a>
+
+#data
+<b><button></b></button></b>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+
+#data
+<p><b><div><marquee></p></b></div>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+Line: 1 Col: 24 Unexpected end tag (p). Ignored.
+Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           <p>
+
+#data
+<script></script></div><title></title><p><p>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (div). Ignored.
+#document
+| <html>
+|   <head>
+|     <script>
+|     <title>
+|   <body>
+|     <p>
+|     <p>
+
+#data
+<p><hr></p>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <hr>
+|     <p>
+
+#data
+<select><b><option><select><option></b></select>
+#errors
+Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
+Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 48 Unexpected end tag (select). Ignored.
+Line: 1 Col: 48 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+
+#data
+<html><head><title></title><body></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|   <body>
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 54 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 54 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 54 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+
+#data
+<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
+#errors
+Line: 1 Col: 4 Unexpected start tag (ul). Expected DOCTYPE.
+Line: 1 Col: 45 Missing end tag (div, li).
+Line: 1 Col: 58 Missing end tag (address, li).
+Line: 1 Col: 69 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|       <div>
+|         <li>
+|       <li>
+|       <li>
+|         <div>
+|       <li>
+|         <address>
+|       <li>
+|         <b>
+|           <em>
+|       <li>
+
+#data
+<ul><li><ul></li><li>a</li></ul></li></ul>
+#errors
+XXX: fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <ul>
+|           <li>
+|             "a"
+
+#data
+<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+
+#data
+<h1><table><td><h3></table><h3></h1>
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+15: “td” start tag in table body.
+27: Unclosed elements.
+31: Heading cannot be a child of another heading.
+36: End tag “h1” seen but there were unclosed elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <h3>
+|     <h3>
+
+#data
+<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|       <thead>
+|         <tr>
+|           <td>
+
+#data
+<table><col><tbody><col><tr><col><td><col></table><col>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 55 Unexpected start tag col. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+|         <col>
+
+#data
+<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 52 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 80 Unexpected start tag colgroup. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|       <tbody>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+
+#data
+</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
+Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
+Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
+Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
+Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
+Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
+Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
+Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
+Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
+Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
+Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
+Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
+Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
+Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
+Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
+Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
+Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
+Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
+Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
+Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
+Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
+Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 140 This element (img) has no end tag.
+Line: 1 Col: 148 Unexpected end tag (title). Ignored.
+Line: 1 Col: 155 Unexpected end tag (span). Ignored.
+Line: 1 Col: 163 Unexpected end tag (style). Ignored.
+Line: 1 Col: 172 Unexpected end tag (script). Ignored.
+Line: 1 Col: 180 Unexpected end tag (table). Ignored.
+Line: 1 Col: 185 Unexpected end tag (th). Ignored.
+Line: 1 Col: 190 Unexpected end tag (td). Ignored.
+Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 203 This element (frame) has no end tag.
+Line: 1 Col: 210 This element (area) has no end tag.
+Line: 1 Col: 217 Unexpected end tag (link). Ignored.
+Line: 1 Col: 225 This element (param) has no end tag.
+Line: 1 Col: 230 This element (hr) has no end tag.
+Line: 1 Col: 238 This element (input) has no end tag.
+Line: 1 Col: 244 Unexpected end tag (col). Ignored.
+Line: 1 Col: 251 Unexpected end tag (base). Ignored.
+Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 269 This element (basefont) has no end tag.
+Line: 1 Col: 279 This element (bgsound) has no end tag.
+Line: 1 Col: 287 This element (embed) has no end tag.
+Line: 1 Col: 296 This element (spacer) has no end tag.
+Line: 1 Col: 300 Unexpected end tag (p). Ignored.
+Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 460 This element (wbr) has no end tag.
+Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 513 Unexpected end tag (html). Ignored.
+Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 520 Unexpected end tag (head). Ignored.
+Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 537 This element (image) has no end tag.
+Line: 1 Col: 547 This element (isindex) has no end tag.
+Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 599 Unexpected end tag (option). Ignored.
+Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     <p>
+
+#data
+<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
+Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
+Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
+Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
+Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
+Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
+Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
+Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
+Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
+Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
+Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
+Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
+Line: 1 Col: 99 Unexpected end tag (select). Ignored.
+Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
+Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
+Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
+Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
+Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
+Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
+Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
+Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
+Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
+Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
+Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
+Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
+Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
+Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
+Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
+Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
+Line: 1 Col: 151 This element (img) has no end tag.
+Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
+Line: 1 Col: 159 Unexpected end tag (title). Ignored.
+Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 166 Unexpected end tag (span). Ignored.
+Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
+Line: 1 Col: 174 Unexpected end tag (style). Ignored.
+Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
+Line: 1 Col: 183 Unexpected end tag (script). Ignored.
+Line: 1 Col: 196 Unexpected end tag (th). Ignored.
+Line: 1 Col: 201 Unexpected end tag (td). Ignored.
+Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 214 This element (frame) has no end tag.
+Line: 1 Col: 221 This element (area) has no end tag.
+Line: 1 Col: 228 Unexpected end tag (link). Ignored.
+Line: 1 Col: 236 This element (param) has no end tag.
+Line: 1 Col: 241 This element (hr) has no end tag.
+Line: 1 Col: 249 This element (input) has no end tag.
+Line: 1 Col: 255 Unexpected end tag (col). Ignored.
+Line: 1 Col: 262 Unexpected end tag (base). Ignored.
+Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 280 This element (basefont) has no end tag.
+Line: 1 Col: 290 This element (bgsound) has no end tag.
+Line: 1 Col: 298 This element (embed) has no end tag.
+Line: 1 Col: 307 This element (spacer) has no end tag.
+Line: 1 Col: 311 Unexpected end tag (p). Ignored.
+Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 471 This element (wbr) has no end tag.
+Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 524 Unexpected end tag (html). Ignored.
+Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 531 Unexpected end tag (head). Ignored.
+Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 548 This element (image) has no end tag.
+Line: 1 Col: 558 This element (isindex) has no end tag.
+Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 610 Unexpected end tag (option). Ignored.
+Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     <table>
+|       <tbody>
+|         <tr>
+|     <p>
+
+#data
+<frameset>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 10 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/html/testdata/webkit/tests10.dat b/html/testdata/webkit/tests10.dat
new file mode 100644
index 0000000..4f8df86
--- /dev/null
+++ b/html/testdata/webkit/tests10.dat
@@ -0,0 +1,799 @@
+#data
+<!DOCTYPE html><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><svg></svg><![CDATA[a]]>
+#errors
+29: Bogus comment
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <!-- [CDATA[a]] -->
+
+#data
+<!DOCTYPE html><body><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><body><select><svg></svg></select>
+#errors
+35: Stray “svg” start tag.
+42: Stray end tag “svg”
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><svg></svg></option></select>
+#errors
+43: Stray “svg” start tag.
+50: Stray end tag “svg”
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><svg></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+41: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+46: Stray end tag “g”.
+53: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+46: Stray end tag “g”.
+58: Stray end tag “g”.
+65: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
+#errors
+41: Start tag “svg” seen in “table”.
+53: Stray end tag “g”.
+65: Stray end tag “g”.
+72: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
+#errors
+45: Start tag “svg” seen in “table”.
+57: Stray end tag “g”.
+69: Stray end tag “g”.
+76: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+|             <p>
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         <p>
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+70: HTML start tag “p” in a foreign namespace context.
+81: “table” closed but “caption” was still open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         <p>
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
+#errors
+78: “table” closed but “caption” was still open.
+78: Unclosed elements on stack.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+44: Start tag “svg” seen in “table”.
+56: Stray end tag “g”.
+68: Stray end tag “g”.
+71: HTML start tag “p” in a foreign namespace context.
+71: Start tag “p” seen in “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+|     <table>
+|       <colgroup>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+50: Stray “svg” start tag.
+54: Stray “g” start tag.
+62: Stray end tag “g”
+66: Stray “g” start tag.
+74: Stray end tag “g”
+77: Stray “p” start tag.
+88: “table” end tag with “select” open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+36: Start tag “select” seen in “table”.
+42: Stray “svg” start tag.
+46: Stray “g” start tag.
+54: Stray end tag “g”
+58: Stray “g” start tag.
+66: Stray end tag “g”
+69: Stray “p” start tag.
+80: “table” end tag with “select” open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
+#errors
+41: Stray “svg” start tag.
+68: HTML start tag “p” in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
+#errors
+34: Stray “svg” start tag.
+61: HTML start tag “p” in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
+#errors
+31: Stray “svg” start tag.
+35: Stray “g” start tag.
+40: Stray end tag “g”
+44: Stray “g” start tag.
+49: Stray end tag “g”
+52: Stray “p” start tag.
+58: Stray “span” start tag.
+58: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
+#errors
+42: Stray “svg” start tag.
+46: Stray “g” start tag.
+51: Stray end tag “g”
+55: Stray “g” start tag.
+60: Stray end tag “g”
+63: Stray “p” start tag.
+69: Stray “span” start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <svg svg>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
+
+#data
+<svg></path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<div><svg></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|     "a"
+
+#data
+<div><svg><path></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|     "a"
+
+#data
+<div><svg><path></svg><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|       <path>
+
+#data
+<div><svg><path><foreignObject><math></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <math math>
+|               "a"
+
+#data
+<div><svg><path><foreignObject><p></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <p>
+|               "a"
+
+#data
+<!DOCTYPE html><svg><desc><div><svg><ul>a
+#errors
+40: HTML start tag “ul” in a foreign namespace context.
+41: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <div>
+|           <svg svg>
+|           <ul>
+|             "a"
+
+#data
+<!DOCTYPE html><svg><desc><svg><ul>a
+#errors
+35: HTML start tag “ul” in a foreign namespace context.
+36: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <svg svg>
+|         <ul>
+|           "a"
+
+#data
+<!DOCTYPE html><p><svg><desc><p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <svg svg>
+|         <svg desc>
+|           <p>
+
+#data
+<!DOCTYPE html><p><svg><title><p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <svg svg>
+|         <svg title>
+|           <p>
+
+#data
+<div><svg><path><foreignObject><p></foreignObject><p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <p>
+|             <p>
+
+#data
+<math><mi><div><object><div><span></span></div></object></div></mi><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <div>
+|           <object>
+|             <div>
+|               <span>
+|       <math mi>
+
+#data
+<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <div>
+|       <math mi>
+
+#data
+<svg><script></script><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg script>
+|       <svg path>
+
+#data
+<table><svg></svg><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<math><mi><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math mglyph>
+
+#data
+<math><mi><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math malignmark>
+
+#data
+<math><mo><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math mglyph>
+
+#data
+<math><mo><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math malignmark>
+
+#data
+<math><mn><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math mglyph>
+
+#data
+<math><mn><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math malignmark>
+
+#data
+<math><ms><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math mglyph>
+
+#data
+<math><ms><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math malignmark>
+
+#data
+<math><mtext><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math mglyph>
+
+#data
+<math><mtext><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math malignmark>
+
+#data
+<math><annotation-xml><svg></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <math math>
+|                 <math mi>
+|               <span>
+|           <svg path>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <math math>
+|               <math mi>
+|                 <svg svg>
+|               <math mo>
+|             <span>
+|           <svg path>
+|       <math mi>
diff --git a/html/testdata/webkit/tests11.dat b/html/testdata/webkit/tests11.dat
new file mode 100644
index 0000000..638cde4
--- /dev/null
+++ b/html/testdata/webkit/tests11.dat
@@ -0,0 +1,482 @@
+#data
+<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       attributename=""
+|       attributetype=""
+|       basefrequency=""
+|       baseprofile=""
+|       calcmode=""
+|       clippathunits=""
+|       contentscripttype=""
+|       contentstyletype=""
+|       diffuseconstant=""
+|       edgemode=""
+|       externalresourcesrequired=""
+|       filterres=""
+|       filterunits=""
+|       glyphref=""
+|       gradienttransform=""
+|       gradientunits=""
+|       kernelmatrix=""
+|       kernelunitlength=""
+|       keypoints=""
+|       keysplines=""
+|       keytimes=""
+|       lengthadjust=""
+|       limitingconeangle=""
+|       markerheight=""
+|       markerunits=""
+|       markerwidth=""
+|       maskcontentunits=""
+|       maskunits=""
+|       numoctaves=""
+|       pathlength=""
+|       patterncontentunits=""
+|       patterntransform=""
+|       patternunits=""
+|       pointsatx=""
+|       pointsaty=""
+|       pointsatz=""
+|       preservealpha=""
+|       preserveaspectratio=""
+|       primitiveunits=""
+|       refx=""
+|       refy=""
+|       repeatcount=""
+|       repeatdur=""
+|       requiredextensions=""
+|       requiredfeatures=""
+|       specularconstant=""
+|       specularexponent=""
+|       spreadmethod=""
+|       startoffset=""
+|       stddeviation=""
+|       stitchtiles=""
+|       surfacescale=""
+|       systemlanguage=""
+|       tablevalues=""
+|       targetx=""
+|       targety=""
+|       textlength=""
+|       viewbox=""
+|       viewtarget=""
+|       xchannelselector=""
+|       ychannelselector=""
+|       zoomandpan=""
+
+#data
+<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math altglyph>
+|       <math altglyphdef>
+|       <math altglyphitem>
+|       <math animatecolor>
+|       <math animatemotion>
+|       <math animatetransform>
+|       <math clippath>
+|       <math feblend>
+|       <math fecolormatrix>
+|       <math fecomponenttransfer>
+|       <math fecomposite>
+|       <math feconvolvematrix>
+|       <math fediffuselighting>
+|       <math fedisplacementmap>
+|       <math fedistantlight>
+|       <math feflood>
+|       <math fefunca>
+|       <math fefuncb>
+|       <math fefuncg>
+|       <math fefuncr>
+|       <math fegaussianblur>
+|       <math feimage>
+|       <math femerge>
+|       <math femergenode>
+|       <math femorphology>
+|       <math feoffset>
+|       <math fepointlight>
+|       <math fespecularlighting>
+|       <math fespotlight>
+|       <math fetile>
+|       <math feturbulence>
+|       <math foreignobject>
+|       <math glyphref>
+|       <math lineargradient>
+|       <math radialgradient>
+|       <math textpath>
+
+#data
+<!DOCTYPE html><body><svg><solidColor /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg solidcolor>
diff --git a/html/testdata/webkit/tests12.dat b/html/testdata/webkit/tests12.dat
new file mode 100644
index 0000000..63107d2
--- /dev/null
+++ b/html/testdata/webkit/tests12.dat
@@ -0,0 +1,62 @@
+#data
+<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|       <math math>
+|         <math mtext>
+|           <i>
+|             "baz"
+|         <math annotation-xml>
+|           <svg svg>
+|             <svg desc>
+|               <b>
+|                 "eggs"
+|             <svg g>
+|               <svg foreignObject>
+|                 <p>
+|                   "spam"
+|                 <table>
+|                   <tbody>
+|                     <tr>
+|                       <td>
+|                         <img>
+|             <svg g>
+|               "quux"
+|       "bar"
+
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <math math>
+|       <math mtext>
+|         <i>
+|           "baz"
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg desc>
+|             <b>
+|               "eggs"
+|           <svg g>
+|             <svg foreignObject>
+|               <p>
+|                 "spam"
+|               <table>
+|                 <tbody>
+|                   <tr>
+|                     <td>
+|                       <img>
+|           <svg g>
+|             "quux"
+|     "bar"
diff --git a/html/testdata/webkit/tests14.dat b/html/testdata/webkit/tests14.dat
new file mode 100644
index 0000000..b8713f8
--- /dev/null
+++ b/html/testdata/webkit/tests14.dat
@@ -0,0 +1,74 @@
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+|     <span>
+
+#data
+<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
+#errors
+15: Unexpected start tag html
+#document
+| <!DOCTYPE html>
+| <html>
+|   abc:def="gh"
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
+#errors
+15: Unexpected start tag html
+#document
+| <!DOCTYPE html>
+| <html>
+|   xml:lang="bar"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456><html 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   789="012"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html><body 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     789="012"
diff --git a/html/testdata/webkit/tests15.dat b/html/testdata/webkit/tests15.dat
new file mode 100644
index 0000000..6ce1c0d
--- /dev/null
+++ b/html/testdata/webkit/tests15.dat
@@ -0,0 +1,208 @@
+#data
+<!DOCTYPE html><p><b><i><u></p> <p>X
+#errors
+Line: 1 Col: 31 Unexpected end tag (p). Ignored.
+Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           " "
+|           <p>
+|             "X"
+
+#data
+<p><b><i><u></p>
+<p>X
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end tag (p). Ignored.
+Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           "
+"
+|           <p>
+|             "X"
+
+#data
+<!doctype html></html> <head>
+#errors
+Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " "
+
+#data
+<!doctype html></body><meta>
+#errors
+Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+
+#data
+<html></html><!-- foo -->
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  foo  -->
+
+#data
+<!doctype html></body><title>X</title>
+#errors
+Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table> X<meta></table>
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " X"
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table> x</table>
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+
+#data
+<!doctype html><table> x </table>
+#errors
+Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x "
+|     <table>
+
+#data
+<!doctype html><table><tr> x</table>
+#errors
+Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table>X<style> <tr>x </style> </table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
+#errors
+Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         "foo"
+|       <table>
+|         " "
+|         <tbody>
+|           <tr>
+|             <td>
+|               "bar"
+|             " "
+
+#data
+<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
+#errors
+6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+13: Stray start tag “frame”.
+21: Stray end tag “frame”.
+29: Stray end tag “frame”.
+39: “frameset” start tag after “body” already open.
+105: End of file seen inside an [R]CDATA element.
+105: End of file seen and there were open elements.
+XXX: These errors are wrong, please fix me!
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+|       "</frameset><noframes>"
+
+#data
+<!DOCTYPE html><object></html>
+#errors
+1: Expected closing tag. Unexpected end of file
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
diff --git a/html/testdata/webkit/tests16.dat b/html/testdata/webkit/tests16.dat
new file mode 100644
index 0000000..c8ef66f
--- /dev/null
+++ b/html/testdata/webkit/tests16.dat
@@ -0,0 +1,2299 @@
+#data
+<!doctype html><script>
+#errors
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script>a
+#errors
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<!doctype html><script><
+#errors
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<!doctype html><script></
+#errors
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<!doctype html><script></S
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<!doctype html><script></SC
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<!doctype html><script></SCR
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<!doctype html><script></SCRI
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<!doctype html><script></SCRIP
+#errors
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT
+#errors
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT 
+#errors
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script></s
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<!doctype html><script></sc
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<!doctype html><script></scr
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<!doctype html><script></scri
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<!doctype html><script></scrip
+#errors
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<!doctype html><script></script
+#errors
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<!doctype html><script></script 
+#errors
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script><!
+#errors
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<!doctype html><script><!a
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<!doctype html><script><!-
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<!doctype html><script><!-a
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<!doctype html><script><!--
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--a
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<!doctype html><script><!--<
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<!doctype html><script><!--<a
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<!doctype html><script><!--</
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<!doctype html><script><!--</script
+#errors
+Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<!doctype html><script><!--</script 
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<s
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script
+#errors
+Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script 
+#errors
+Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script <
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script <a
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </s
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script
+#errors
+Line: 1 Col: 43 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </scripta
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script 
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script>
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script/
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <
+#errors
+Line: 1 Col: 45 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <a
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script
+#errors
+Line: 1 Col: 52 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script 
+#errors
+Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script/
+#errors
+Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script -
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -a
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -<
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --a
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --<
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -->
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --><
+#errors
+Line: 1 Col: 39 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></
+#errors
+Line: 1 Col: 40 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script 
+#errors
+Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script/
+#errors
+Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script><\/script>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>--><!--</script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-- ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- -></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- - ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script>--!></script>X
+#errors
+Line: 1 Col: 49 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<!doctype html><script><!--<scr'+'ipt></script>--></script>
+#errors
+Line: 1 Col: 59 Unexpected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt></script>X
+#errors
+Line: 1 Col: 57 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<!doctype html><style><!--<style></style>--></style>
+#errors
+Line: 1 Col: 52 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...</style>...--></style>
+#errors
+Line: 1 Col: 51 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<!doctype html><style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...<style><!--...--!></style>--></style>
+#errors
+Line: 1 Col: 66 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<!doctype html><style>...<style><!--...</style><!-- --></style>
+#errors
+Line: 1 Col: 63 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<!doctype html><style>...<!--[if IE]><style>...</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<!doctype html><title><!--<title></title>--></title>
+#errors
+Line: 1 Col: 52 Unexpected end tag (title).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><title>&lt;/title></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<!doctype html><title>foo/title><link></head><body>X
+#errors
+Line: 1 Col: 52 Unexpected end of file. Expected end tag (title).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+Line: 1 Col: 64 Unexpected end tag (noscript).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><noframes><!--<noframes></noframes>--></noframes>
+#errors
+Line: 1 Col: 64 Unexpected end tag (noframes).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<!doctype html><textarea><!--<textarea></textarea>--></textarea>
+#errors
+Line: 1 Col: 64 Unexpected end tag (textarea).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<!doctype html><textarea>&lt;/textarea></textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<!doctype html><textarea>&lt;</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<"
+
+#data
+<!doctype html><textarea>a&lt;b</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "a<b"
+
+#data
+<!doctype html><iframe><!--<iframe></iframe>--></iframe>
+#errors
+Line: 1 Col: 56 Unexpected end tag (iframe).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<!doctype html><xmp><!--<xmp></xmp>--></xmp>
+#errors
+Line: 1 Col: 44 Unexpected end tag (xmp).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<!doctype html><noembed><!--<noembed></noembed>--></noembed>
+#errors
+Line: 1 Col: 60 Unexpected end tag (noembed).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 8 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script>a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<script><
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<script></
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<script></S
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<script></SC
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<script></SCR
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<script></SCRI
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<script></SCRIP
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<script></SCRIPT
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<script></SCRIPT 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script></s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<script></sc
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<script></scr
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<script></scri
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<script></scrip
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<script></script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<script></script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script><!
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<script><!a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<script><!-
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<script><!-a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<script><!--
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<script><!--<
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<script><!--<a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<script><!--</
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<script><!--</script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<script><!--</script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--<s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<script><!--<script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<script><!--<script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<script><!--<script <
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<script><!--<script <a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<script><!--<script </
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<script><!--<script </s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<script><!--<script </script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<script><!--<script </scripta
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<script><!--<script </script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<script><!--<script </script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<script><!--<script </script <
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<script><!--<script </script <a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<script><!--<script </script </
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<script><!--<script </script </script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<script><!--<script </script </script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script -
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<script><!--<script -a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<script><!--<script --
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<script><!--<script --a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<script><!--<script -->
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --><
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<script><!--<script --></
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<script><!--<script --></script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<script><!--<script --></script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script><\/script>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<script><!--<script></scr'+'ipt>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>--><!--</script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-- ></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- -></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- - ></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<script><!--<script>--!></script>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<script><!--<scr'+'ipt></script>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 44 Unexpected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<script><!--<script></scr'+'ipt></script>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 42 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<style><!--<style></style>--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<style><!--...</style>...--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 36 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<style><!--...<style><!--...--!></style>--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 51 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--...</style><!-- --><style>@import ...</style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<style>...<style><!--...</style><!-- --></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 48 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<style>...<!--[if IE]><style>...</style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<title><!--<title></title>--></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<title>&lt;/title></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<title>foo/title><link></head><body>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (noscript).
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<noscript><iframe></noscript>X
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<noframes><!--<noframes></noframes>--></noframes>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (noframes).
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<noframes><body><script><!--...</script></body></noframes></html>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<textarea><!--<textarea></textarea>--></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (textarea).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<textarea>&lt;/textarea></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<iframe><!--<iframe></iframe>--></iframe>
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+Line: 1 Col: 41 Unexpected end tag (iframe).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<xmp><!--<xmp></xmp>--></xmp>
+#errors
+Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end tag (xmp).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<noembed><!--<noembed></noembed>--></noembed>
+#errors
+Line: 1 Col: 9 Unexpected start tag (noembed). Expected DOCTYPE.
+Line: 1 Col: 45 Unexpected end tag (noembed).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<!doctype html><table>
+
+#errors
+Line 2 Col 0 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "
+"
+
+#data
+<!doctype html><table><td><span><font></span><span>
+#errors
+Line 1 Col 26 Unexpected table cell start tag (td) in the table body phase.
+Line 1 Col 45 Unexpected end tag (span).
+Line 1 Col 51 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <span>
+|               <font>
+|             <font>
+|               <span>
+
+#data
+<!doctype html><form><table></form><form></table></form>
+#errors
+35: Stray end tag “form”.
+41: Start tag “form” seen in “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+|         <form>
diff --git a/html/testdata/webkit/tests17.dat b/html/testdata/webkit/tests17.dat
new file mode 100644
index 0000000..7b555f8
--- /dev/null
+++ b/html/testdata/webkit/tests17.dat
@@ -0,0 +1,153 @@
+#data
+<!doctype html><table><tbody><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tr><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<!doctype html><table><tr><td><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><tr><th><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><caption><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <select>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><th>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tbody>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><thead>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tfoot>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><caption>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><table><tr></table>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|     "a"
diff --git a/html/testdata/webkit/tests18.dat b/html/testdata/webkit/tests18.dat
new file mode 100644
index 0000000..680e1f0
--- /dev/null
+++ b/html/testdata/webkit/tests18.dat
@@ -0,0 +1,269 @@
+#data
+<!doctype html><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><table><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+
+#data
+<!doctype html><table><tbody><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <plaintext>
+|               "</plaintext>"
+
+#data
+<!doctype html><table><caption><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <plaintext>
+|           "</plaintext>"
+
+#data
+<!doctype html><table><tr><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <style>
+|             "</script>"
+
+#data
+<!doctype html><table><tr><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <script>
+|             "</style>"
+
+#data
+<!doctype html><table><caption><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <style>
+|           "</script>"
+|         "abc"
+
+#data
+<!doctype html><table><td><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <style>
+|               "</script>"
+|             "abc"
+
+#data
+<!doctype html><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+
+#data
+<!doctype html><table><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+
+#data
+<!doctype html><table><tr><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><frameset></frameset><noframes>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+|   <!-- abc -->
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+| <!-- abc -->
+
+#data
+<!doctype html><table><tr></tbody><tfoot>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <tfoot>
+
+#data
+<!doctype html><table><td><svg></svg>abc<td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|             "abc"
+|           <td>
diff --git a/html/testdata/webkit/tests19.dat b/html/testdata/webkit/tests19.dat
new file mode 100644
index 0000000..0d62f5a
--- /dev/null
+++ b/html/testdata/webkit/tests19.dat
@@ -0,0 +1,1237 @@
+#data
+<!doctype html><math><mn DefinitionUrl="foo">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         definitionURL="foo"
+
+#data
+<!doctype html><html></p><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <!-- foo -->
+|   <head>
+|   <body>
+
+#data
+<!doctype html><head></head></p><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <!-- foo -->
+|   <body>
+
+#data
+<!doctype html><body><p><pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <pre>
+
+#data
+<!doctype html><body><p><listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <listing>
+
+#data
+<!doctype html><p><plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <plaintext>
+
+#data
+<!doctype html><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <h1>
+
+#data
+<!doctype html><form><isindex>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<!doctype html><isindex action="POST">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       action="POST"
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><isindex prompt="this is isindex">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "this is isindex"
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><isindex type="hidden">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|           type="hidden"
+|       <hr>
+
+#data
+<!doctype html><isindex name="foo">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><ruby><p><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <p>
+|       <rp>
+
+#data
+<!doctype html><ruby><div><span><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rp>
+
+#data
+<!doctype html><ruby><div><p><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <p>
+|         <rp>
+
+#data
+<!doctype html><ruby><p><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <p>
+|       <rt>
+
+#data
+<!doctype html><ruby><div><span><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rt>
+
+#data
+<!doctype html><ruby><div><p><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <p>
+|         <rt>
+
+#data
+<!doctype html><math/><foo>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <foo>
+
+#data
+<!doctype html><svg/><foo>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <foo>
+
+#data
+<!doctype html><div></body><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|   <!-- foo -->
+
+#data
+<!doctype html><h1><div><h3><span></h1>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <div>
+|         <h3>
+|           <span>
+|         "foo"
+
+#data
+<!doctype html><p></h3>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+
+#data
+<!doctype html><h3><li>abc</h2>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h3>
+|       <li>
+|         "abc"
+|     "foo"
+
+#data
+<!doctype html><table>abc<!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><table>  <!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <!-- foo -->
+
+#data
+<!doctype html><table> b <!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " b "
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><select><option><option>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><p><math><mi><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mi>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mo><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mo>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mn><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mn>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><ms><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math ms>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mtext><p><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mtext>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><frameset></noframes>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html c=d><body></html><html a=b>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><html c=d><frameset></frameset></html><html a=b>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+| <!-- foo -->
+
+#data
+<!doctype html><html><frameset></frameset></html>  
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!doctype html><html><frameset></frameset></html>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html></p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<html><frameset></frameset></html><!doctype html>
+#errors
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><p><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><p>a<frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "a"
+
+#data
+<!doctype html><p> <frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><pre><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!doctype html><listing><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+
+#data
+<!doctype html><li><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+
+#data
+<!doctype html><dd><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+
+#data
+<!doctype html><dt><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+
+#data
+<!doctype html><button><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <button>
+
+#data
+<!doctype html><applet><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <applet>
+
+#data
+<!doctype html><marquee><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <marquee>
+
+#data
+<!doctype html><object><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
+
+#data
+<!doctype html><table><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+<!doctype html><area><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+
+#data
+<!doctype html><basefont><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <basefont>
+|   <frameset>
+
+#data
+<!doctype html><bgsound><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <bgsound>
+|   <frameset>
+
+#data
+<!doctype html><br><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<!doctype html><embed><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+
+#data
+<!doctype html><img><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html><input><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+
+#data
+<!doctype html><keygen><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+
+#data
+<!doctype html><wbr><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+
+#data
+<!doctype html><hr><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+
+#data
+<!doctype html><textarea></textarea><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!doctype html><xmp></xmp><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+
+#data
+<!doctype html><iframe></iframe><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+
+#data
+<!doctype html><select></select><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><svg></svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><math></math><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg><foreignObject><div> <frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg>a</svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "a"
+
+#data
+<!doctype html><svg> </svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<html>aaa<frameset></frameset>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "aaa"
+
+#data
+<html> a <frameset></frameset>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "a "
+
+#data
+<!doctype html><div><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><div><body><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<!doctype html><p><math></p>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|     "a"
+
+#data
+<!doctype html><p><math><mn><span></p>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mn>
+|           <span>
+|             <p>
+|             "a"
+
+#data
+<!doctype html><math></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!doctype html><meta charset="ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       charset="ascii"
+|   <body>
+
+#data
+<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       content="text/html;charset=ascii"
+|       http-equiv="content-type"
+|   <body>
+
+#data
+<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
+|     <meta>
+|       charset="utf8"
+|   <body>
+
+#data
+<!doctype html><html a=b><head></head><html c=d>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><image/>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html>a<i>b<table>c<b>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "a"
+|     <i>
+|       "bc"
+|       <b>
+|         "de"
+|       "f"
+|       <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+
+#data
+<!doctype html><table><i>a<b>b<div>c</i>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|       <div>
+|         <i>
+|           "c"
+|     <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <div>
+|         "b"
+|     <i>
+|       "c"
+|       <b>
+|         "d"
+|     <b>
+|       "e"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><table><i>a<div>b<b>c</i>d
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <i>
+|               "a"
+|             <div>
+|               <i>
+|                 "b"
+|                 <b>
+|                   "c"
+|               <b>
+|                 "d"
+|             <table>
+
+#data
+<!doctype html><body><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+
+#data
+<!doctype html><body><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <bgsound>
+
+#data
+<!doctype html><figcaption><article></figcaption>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><summary><article></summary>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><p><a><plaintext>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <a>
+|     <plaintext>
+|       <a>
+|         "b"
+
+#data
+<!DOCTYPE html><div>a<a></div>b<p>c</p>d
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "a"
+|       <a>
+|     <a>
+|       "b"
+|       <p>
+|         "c"
+|       "d"
diff --git a/html/testdata/webkit/tests2.dat b/html/testdata/webkit/tests2.dat
new file mode 100644
index 0000000..60d8592
--- /dev/null
+++ b/html/testdata/webkit/tests2.dat
@@ -0,0 +1,763 @@
+#data
+<!DOCTYPE html>Test
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<textarea>test</div>test
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "test</div>test"
+
+#data
+<table><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td>test</tbody></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<frame>test
+#errors
+Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected start tag frame. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test"
+
+#data
+<!DOCTYPE html><frameset>test
+#errors
+Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset><!DOCTYPE html>
+#errors
+Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
+Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><font><p><b>test</font>
+#errors
+Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     <p>
+|       <font>
+|         <b>
+|           "test"
+
+#data
+<!DOCTYPE html><dt><div><dd>
+#errors
+Line: 1 Col: 28 Missing end tag (div, dt).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+|       <div>
+|     <dd>
+
+#data
+<script></x
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</x"
+|   <body>
+
+#data
+<table><plaintext><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
+Line: 1 Col: 22 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<td>"
+|     <table>
+
+#data
+<plaintext></plaintext>
+#errors
+Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!DOCTYPE html><table><tr>TEST
+#errors
+Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 30 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "TEST"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
+#errors
+Line: 1 Col: 37 Unexpected start tag (body).
+Line: 1 Col: 53 Unexpected start tag (body).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     t1="1"
+|     t2="2"
+|     t3="3"
+|     t4="4"
+
+#data
+</b test
+#errors
+Line: 1 Col: 8 Unexpected end of file in attribute name.
+Line: 1 Col: 8 End tag contains unexpected attributes.
+Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
+Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html></b test<b &=&amp>X
+#errors
+Line: 1 Col: 32 Named entity didn't end with ';'.
+Line: 1 Col: 33 End tag contains unexpected attributes.
+Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 54 Unexpected end of file in the tag name.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       type="text/x-foobar;baz"
+|       "X</SCRipt"
+|   <body>
+
+#data
+&
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&#
+#errors
+Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#"
+
+#data
+&#X
+#errors
+Line: 1 Col: 3 Numeric entity expected but none found.
+Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#X"
+
+#data
+&#x
+#errors
+Line: 1 Col: 3 Numeric entity expected but none found.
+Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#x"
+
+#data
+&#45
+#errors
+Line: 1 Col: 4 Numeric entity didn't end with ';'.
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "-"
+
+#data
+&x-test
+#errors
+Line: 1 Col: 1 Named entity expected. Got none.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&x-test"
+
+#data
+<!doctypehtml><p><li>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <li>
+
+#data
+<!doctypehtml><p><dt>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <dt>
+
+#data
+<!doctypehtml><p><dd>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <dd>
+
+#data
+<!doctypehtml><p><form>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <form>
+
+#data
+<!DOCTYPE html><p></P>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     "X"
+
+#data
+&AMP
+#errors
+Line: 1 Col: 4 Named entity didn't end with ';'.
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&AMp;
+#errors
+Line: 1 Col: 1 Named entity expected. Got none.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&AMp;"
+
+#data
+<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
+#errors
+Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
+
+#data
+<!DOCTYPE html>X</body>X
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html><!-- X
+#errors
+Line: 1 Col: 21 Unexpected end of file in comment.
+#document
+| <!DOCTYPE html>
+| <!--  X -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><caption>test TEST</caption><td>test
+#errors
+Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         "test TEST"
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<!DOCTYPE html><select><option><optgroup>
+#errors
+Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
+#errors
+Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <option>
+|     <option>
+
+#data
+<!DOCTYPE html><select><optgroup><option><optgroup>
+#errors
+Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><datalist><option>foo</datalist>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <datalist>
+|       <option>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html><font><input><input></font>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <input>
+|       <input>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX
+#errors
+Line: 1 Col: 29 Unexpected end of file in comment (-)
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<isindex test=x name=x>
+#errors
+Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|           test="x"
+|       <hr>
+
+#data
+test
+test
+#errors
+Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test
+test"
+
+#data
+<!DOCTYPE html><body><title>test</body></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "test</body>"
+
+#data
+<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
+x { content:"</style" } </style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <meta>
+|       name="z"
+|     <link>
+|       rel="foo"
+|     <style>
+|       "
+x { content:"</style" } "
+
+#data
+<!DOCTYPE html><select><optgroup></optgroup></select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+
+#data
+ 
+ 
+#errors
+Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>  <html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><script>
+</script>  <title>x</title>  </head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "
+"
+|     "  "
+|     <title>
+|       "x"
+|     "  "
+|   <body>
+
+#data
+<!DOCTYPE html><html><body><html id=x>
+#errors
+Line: 1 Col: 38 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</body><html id="x">
+#errors
+Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
+Line: 1 Col: 36 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!DOCTYPE html><head><html id=x>
+#errors
+Line: 1 Col: 32 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html>X</html> 
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X "
+
+#data
+<!DOCTYPE html>X</html><p>X
+#errors
+Line: 1 Col: 26 Unexpected start tag (p).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <p>
+|       "X"
+
+#data
+<!DOCTYPE html>X<p/x/y/z>
+#errors
+Line: 1 Col: 19 Expected a > after the /.
+Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
+Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <p>
+|       x=""
+|       y=""
+|       z=""
+
+#data
+<!DOCTYPE html><!--x--
+#errors
+Line: 1 Col: 22 Unexpected end of file in comment (--).
+#document
+| <!DOCTYPE html>
+| <!-- x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><tr><td></p></table>
+#errors
+Line: 1 Col: 34 Unexpected end tag (p). Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <p>
+
+#data
+<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
+#errors
+Line: 1 Col: 20 Expected space or '>'. Got ''
+Line: 1 Col: 25 Erroneous DOCTYPE.
+Line: 1 Col: 35 Unexpected character in comment found.
+#document
+| <!DOCTYPE <!doctype>
+| <html>
+|   <head>
+|   <body>
+|     ">"
+|     <!-- <!--x -->
+|     "-->"
+
+#data
+<!doctype html><div><form></form><div></div></div>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <form>
+|       <div>
diff --git a/html/testdata/webkit/tests20.dat b/html/testdata/webkit/tests20.dat
new file mode 100644
index 0000000..6bd8256
--- /dev/null
+++ b/html/testdata/webkit/tests20.dat
@@ -0,0 +1,455 @@
+#data
+<!doctype html><p><button><button>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|       <button>
+
+#data
+<!doctype html><p><button><address>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <address>
+
+#data
+<!doctype html><p><button><blockquote>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <blockquote>
+
+#data
+<!doctype html><p><button><menu>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <menu>
+
+#data
+<!doctype html><p><button><p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <p>
+
+#data
+<!doctype html><p><button><ul>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <ul>
+
+#data
+<!doctype html><p><button><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <h1>
+
+#data
+<!doctype html><p><button><h6>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <h6>
+
+#data
+<!doctype html><p><button><listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <listing>
+
+#data
+<!doctype html><p><button><pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <pre>
+
+#data
+<!doctype html><p><button><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <form>
+
+#data
+<!doctype html><p><button><li>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <li>
+
+#data
+<!doctype html><p><button><dd>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dd>
+
+#data
+<!doctype html><p><button><dt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dt>
+
+#data
+<!doctype html><p><button><plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <plaintext>
+
+#data
+<!doctype html><p><button><table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <table>
+
+#data
+<!doctype html><p><button><hr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <hr>
+
+#data
+<!doctype html><p><button><xmp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <xmp>
+
+#data
+<!doctype html><p><button></p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <p>
+
+#data
+<!doctype html><address><button></address>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<!doctype html><address><button></address>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<p><table></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <p>
+|       <table>
+
+#data
+<!doctype html><svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!doctype html><p><figcaption>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <figcaption>
+
+#data
+<!doctype html><p><summary>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <summary>
+
+#data
+<!doctype html><form><table><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+
+#data
+<!doctype html><table><form><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><table><form></table><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><svg><foreignObject><p>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <p>
+
+#data
+<!doctype html><svg><title>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         "abc"
+
+#data
+<option><span><option>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <span>
+|         <option>
+
+#data
+<option><option>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|     <option>
+
+#data
+<math><annotation-xml><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/svg+xml"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/svg+xml"
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/xhtml+xml"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/xhtml+xml"
+|         <div>
+
+#data
+<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="aPPlication/xhtmL+xMl"
+|         <div>
+
+#data
+<math><annotation-xml encoding="text/html"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="text/html"
+|         <div>
+
+#data
+<math><annotation-xml encoding="Text/htmL"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="Text/htmL"
+|         <div>
+
+#data
+<math><annotation-xml encoding=" text/html "><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding=" text/html "
+|     <div>
diff --git a/html/testdata/webkit/tests21.dat b/html/testdata/webkit/tests21.dat
new file mode 100644
index 0000000..1260ec0
--- /dev/null
+++ b/html/testdata/webkit/tests21.dat
@@ -0,0 +1,221 @@
+#data
+<svg><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<math><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       "foo"
+
+#data
+<div><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]]
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]]"
+
+#data
+<svg><![CDATA[]
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]"
+
+#data
+<svg><![CDATA[]>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]>a"
+
+#data
+<svg><foreignObject><div><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[<svg>]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[</svg>a]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>a"
+
+#data
+<svg><![CDATA[</svg>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>]]><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <svg path>
+
+#data
+<svg><![CDATA[<svg>]]></path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[<svg>]]><!--path-->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <!-- path -->
+
+#data
+<svg><![CDATA[<svg>]]>path
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>path"
+
+#data
+<svg><![CDATA[<!--svg-->]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<!--svg-->"
diff --git a/html/testdata/webkit/tests22.dat b/html/testdata/webkit/tests22.dat
new file mode 100644
index 0000000..aab27b2
--- /dev/null
+++ b/html/testdata/webkit/tests22.dat
@@ -0,0 +1,157 @@
+#data
+<a><b><big><em><strong><div>X</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <big>
+|           <em>
+|             <strong>
+|     <big>
+|       <em>
+|         <strong>
+|           <div>
+|             <a>
+|               "X"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           <div>
+|                             id="10"
+|                             "A"
+
+#data
+<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
+#errors
+Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
+Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <cite>
+|       <b>
+|         <cite>
+|           <i>
+|             <cite>
+|               <i>
+|                 <cite>
+|                   <i>
+|       <i>
+|         <i>
+|           <div>
+|             <b>
+|               "X"
+|             "TEST"
diff --git a/html/testdata/webkit/tests23.dat b/html/testdata/webkit/tests23.dat
new file mode 100644
index 0000000..34d2a73
--- /dev/null
+++ b/html/testdata/webkit/tests23.dat
@@ -0,0 +1,155 @@
+#data
+<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
+#errors
+3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+116: Unclosed elements.
+117: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           color="red"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 size="4"
+|                 <font>
+|                   size="4"
+|                   <font>
+|                     size="4"
+|                     <font>
+|                       color="red"
+|     <p>
+|       <font>
+|         color="red"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 color="red"
+|                 "X"
+
+#data
+<p><font size=4><font size=4><font size=4><font size=4><p>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
+
+#data
+<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="5"
+|               <font>
+|                 size="4"
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     <p>
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               <object>
+|                 <b>
+|                   id="a"
+|                   <b>
+|                     id="a"
+|                     "X"
+|     <p>
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               "Y"
diff --git a/html/testdata/webkit/tests24.dat b/html/testdata/webkit/tests24.dat
new file mode 100644
index 0000000..f6dc7eb
--- /dev/null
+++ b/html/testdata/webkit/tests24.dat
@@ -0,0 +1,79 @@
+#data
+<!DOCTYPE html>&NotEqualTilde;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸"
+
+#data
+<!DOCTYPE html>&NotEqualTilde;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸A"
+
+#data
+<!DOCTYPE html>&ThickSpace;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  "
+
+#data
+<!DOCTYPE html>&ThickSpace;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  A"
+
+#data
+<!DOCTYPE html>&NotSubset;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒"
+
+#data
+<!DOCTYPE html>&NotSubset;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒A"
+
+#data
+<!DOCTYPE html>&Gopf;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾"
+
+#data
+<!DOCTYPE html>&Gopf;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾A"
diff --git a/html/testdata/webkit/tests25.dat b/html/testdata/webkit/tests25.dat
new file mode 100644
index 0000000..00de729
--- /dev/null
+++ b/html/testdata/webkit/tests25.dat
@@ -0,0 +1,219 @@
+#data
+<!DOCTYPE html><body><foo>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "A"
+
+#data
+<!DOCTYPE html><body><area>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+|     "A"
+
+#data
+<!DOCTYPE html><body><base>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     "A"
+
+#data
+<!DOCTYPE html><body><basefont>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+|     "A"
+
+#data
+<!DOCTYPE html><body><bgsound>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+|     "A"
+
+#data
+<!DOCTYPE html><body><br>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     "A"
+
+#data
+<!DOCTYPE html><body><col>A
+#errors
+26: Stray start tag “col”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><command>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <command>
+|     "A"
+
+#data
+<!DOCTYPE html><body><embed>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+|     "A"
+
+#data
+<!DOCTYPE html><body><frame>A
+#errors
+26: Stray start tag “frame”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><hr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+|     "A"
+
+#data
+<!DOCTYPE html><body><img>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|     "A"
+
+#data
+<!DOCTYPE html><body><input>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     "A"
+
+#data
+<!DOCTYPE html><body><keygen>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "A"
+
+#data
+<!DOCTYPE html><body><link>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <link>
+|     "A"
+
+#data
+<!DOCTYPE html><body><meta>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     "A"
+
+#data
+<!DOCTYPE html><body><param>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <param>
+|     "A"
+
+#data
+<!DOCTYPE html><body><source>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <source>
+|     "A"
+
+#data
+<!DOCTYPE html><body><track>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <track>
+|     "A"
+
+#data
+<!DOCTYPE html><body><wbr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     "A"
diff --git a/html/testdata/webkit/tests26.dat b/html/testdata/webkit/tests26.dat
new file mode 100644
index 0000000..fae11ff
--- /dev/null
+++ b/html/testdata/webkit/tests26.dat
@@ -0,0 +1,313 @@
+#data
+<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="#1"
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <br>
+|       <a>
+|         href="#2"
+|     <a>
+|       href="#2"
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       <br>
+|       <a>
+|         href="#3"
+|     <a>
+|       href="#3"
+|       <nobr>
+|         "3"
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <nobr>
+|           <i>
+|         <i>
+|           <nobr>
+|             "2"
+|           <nobr>
+|         <nobr>
+|           "3"
+|         <table>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <td>
+|                 <nobr>
+|                   <i>
+|                 <i>
+|                   <nobr>
+|                     "2"
+|                   <nobr>
+|                 <nobr>
+|                   "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|     <div>
+|       <b>
+|         <nobr>
+|         <nobr>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <div>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|         <ins>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <ins>
+|       <nobr>
+|     <nobr>
+|       <i>
+|         "2"
+
+#data
+<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+
+#data
+<p><code x</code></p>
+
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <code>
+|         code=""
+|         x<=""
+|     <code>
+|       code=""
+|       x<=""
+|       "
+"
+
+#data
+<!DOCTYPE html><svg><foreignObject><p><i></p>a
+#errors
+45: End tag “p” seen, but there were open elements.
+41: Unclosed element “i”.
+46: End of file seen and there were open elements.
+35: Unclosed element “foreignObject”.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <p>
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a
+#errors
+56: End tag “p” seen, but there were open elements.
+52: Unclosed element “i”.
+57: End of file seen and there were open elements.
+46: Unclosed element “foreignObject”.
+31: Unclosed element “svg”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg foreignObject>
+|                 <p>
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><math><mtext><p><i></p>a
+#errors
+38: End tag “p” seen, but there were open elements.
+34: Unclosed element “i”.
+39: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <p>
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a
+#errors
+53: End tag “p” seen, but there were open elements.
+49: Unclosed element “i”.
+54: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mtext>
+|                 <p>
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><body><div><!/div>a
+#errors
+29: Bogus comment.
+34: End of file seen and there were open elements.
+26: Unclosed element “div”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- /div -->
+|       "a"
diff --git a/html/testdata/webkit/tests3.dat b/html/testdata/webkit/tests3.dat
new file mode 100644
index 0000000..38dc501
--- /dev/null
+++ b/html/testdata/webkit/tests3.dat
@@ -0,0 +1,305 @@
+#data
+<head></head><style></style>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <style>
+|   <body>
+
+#data
+<head></head><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<head></head><!-- --><style></style><!-- --><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <style>
+|     <script>
+|   <!--   -->
+|   <!--   -->
+|   <body>
+
+#data
+<head></head><!-- -->x<style></style><!-- --><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <!--   -->
+|   <body>
+|     "x"
+|     <style>
+|     <!--   -->
+|     <script>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
+</span></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|     <span>
+|       "
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x
+y</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x
+y"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x<div>
+y</pre></body></html>
+#errors
+Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|       <div>
+|         "
+y"
+
+#data
+<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+A"
+
+#data
+<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
+#errors
+Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+
+#data
+<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
+#errors
+Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea>foo<span>bar</span><i>baz
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo<span>bar</span><i>baz"
+
+#data
+<title>foo<span>bar</em><i>baz
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo<span>bar</em><i>baz"
+|   <body>
+
+#data
+<!DOCTYPE html><textarea>
+</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!DOCTYPE html><textarea>
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo"
+
+#data
+<!DOCTYPE html><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
+#errors
+Line: 1 Col: 60 Missing end tag (div, li).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           <p>
+|       <li>
+
+#data
+<!doctype html><nobr><nobr><nobr>
+#errors
+Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><nobr><nobr></nobr><nobr>
+#errors
+Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><html><body><p><table></table></body></html>
+#errors
+Not known
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <table>
+
+#data
+<p><table></table>
+#errors
+Not known
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <table>
diff --git a/html/testdata/webkit/tests4.dat b/html/testdata/webkit/tests4.dat
new file mode 100644
index 0000000..3c50632
--- /dev/null
+++ b/html/testdata/webkit/tests4.dat
@@ -0,0 +1,59 @@
+#data
+direct div content
+#errors
+#document-fragment
+div
+#document
+| "direct div content"
+
+#data
+direct textarea content
+#errors
+#document-fragment
+textarea
+#document
+| "direct textarea content"
+
+#data
+textarea content with <em>pseudo</em> <foo>markup
+#errors
+#document-fragment
+textarea
+#document
+| "textarea content with <em>pseudo</em> <foo>markup"
+
+#data
+this is &#x0043;DATA inside a <style> element
+#errors
+#document-fragment
+style
+#document
+| "this is &#x0043;DATA inside a <style> element"
+
+#data
+</plaintext>
+#errors
+#document-fragment
+plaintext
+#document
+| "</plaintext>"
+
+#data
+setting html's innerHTML
+#errors
+Line: 1 Col: 24 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "setting html's innerHTML"
+
+#data
+<title>setting head's innerHTML</title>
+#errors
+#document-fragment
+head
+#document
+| <title>
+|   "setting head's innerHTML"
diff --git a/html/testdata/webkit/tests5.dat b/html/testdata/webkit/tests5.dat
new file mode 100644
index 0000000..d7b5128
--- /dev/null
+++ b/html/testdata/webkit/tests5.dat
@@ -0,0 +1,191 @@
+#data
+<style> <!-- </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|   <body>
+|     "x"
+
+#data
+<style> <!-- </style> --> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<style> <!--> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!--> "
+|   <body>
+|     "x"
+
+#data
+<style> <!---> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!---> "
+|   <body>
+|     "x"
+
+#data
+<iframe> <!---> </iframe>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!---> "
+|     "x"
+
+#data
+<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<script> <!-- </script> --> </script>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<title> <!-- </title> --> </title>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<style> <!</-- </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!</-- "
+|   <body>
+|     "x"
+
+#data
+<p><xmp></xmp>
+#errors
+XXX: Unknown
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <xmp>
+
+#data
+<xmp> <!-- > --> </xmp>
+#errors
+Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       " <!-- > --> "
+
+#data
+<title>&amp;</title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<title><!--&amp;--></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<title><!--</title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--"
+|   <body>
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "-->"
diff --git a/html/testdata/webkit/tests6.dat b/html/testdata/webkit/tests6.dat
new file mode 100644
index 0000000..f28ece4
--- /dev/null
+++ b/html/testdata/webkit/tests6.dat
@@ -0,0 +1,663 @@
+#data
+<!doctype html></head> <head>
+#errors
+Line: 1 Col: 29 Unexpected start tag head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   " "
+|   <body>
+
+#data
+<!doctype html><form><div></form><div>
+#errors
+33: End tag "form" seen but there were unclosed elements.
+38: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype html><title>&amp;</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype html><title><!--&amp;--></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<!doctype>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
+Line: 1 Col: 10 Erroneous DOCTYPE.
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!---x
+#errors
+Line: 1 Col: 6 Unexpected end of file in comment.
+Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- -x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+<div>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body).
+Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
+#document-fragment
+div
+#document
+| "
+"
+| <div>
+
+#data
+<frameset></frameset>
+foo
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+<noframes>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+|   <noframes>
+
+#data
+<frameset></frameset>
+<div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</html>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<form><form>
+#errors
+Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (form).
+Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<button><button>
+#errors
+Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|     <button>
+
+#data
+<table><tr><td></th>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (th). Ignored.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (td). Ignored.
+Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+</caption><div>
+#errors
+Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption><div></caption>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
+Line: 1 Col: 31 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><caption></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+</table><div>
+#errors
+Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
+Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (body). Ignored.
+Line: 1 Col: 29 Unexpected end tag (col). Ignored.
+Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 47 Unexpected end tag (html). Ignored.
+Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 60 Unexpected end tag (td). Ignored.
+Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 73 Unexpected end tag (th). Ignored.
+Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+<table><caption><div></div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><tr><td></body></caption></col></colgroup></html>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end tag (body). Ignored.
+Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 38 Unexpected end tag (col). Ignored.
+Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 56 Unexpected end tag (html). Ignored.
+Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+</table></tbody></tfoot></thead></tr><div>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
+#document-fragment
+td
+#document
+| <div>
+
+#data
+<table><colgroup>foo
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 20 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+foo<col>
+#errors
+Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<table><colgroup></col>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 23 This element (col) has no end tag.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+
+#data
+<frameset><div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</frameset><frame>
+#errors
+Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+<frameset></div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><div>
+#errors
+Line: 1 Col: 7 Unexpected end tag (body). Ignored.
+Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
+#document-fragment
+body
+#document
+| <div>
+
+#data
+<table><tr><div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 16 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</tbody></tfoot></thead><td>
+#errors
+Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<table><tr><div><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<caption><col><colgroup><tbody><tfoot><thead><tr>
+#errors
+Line: 1 Col: 9 Unexpected start tag (caption).
+Line: 1 Col: 14 Unexpected start tag (col).
+Line: 1 Col: 24 Unexpected start tag (colgroup).
+Line: 1 Col: 31 Unexpected start tag (tbody).
+Line: 1 Col: 38 Unexpected start tag (tfoot).
+Line: 1 Col: 45 Unexpected start tag (thead).
+Line: 1 Col: 49 Unexpected end of file. Expected table content.
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></thead>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
+Line: 1 Col: 22 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+</table><tr>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 12 Unexpected end of file. Expected table content.
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></body></caption></col></colgroup></html></td></th></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
+Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
+Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
+Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
+Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
+Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
+Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
+Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
+Line: 1 Col: 70 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><tbody></div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 20 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
+Line: 1 Col: 14 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <table>
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end tag (body). Ignored.
+Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 30 Unexpected end tag (col). Ignored.
+Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 48 Unexpected end tag (html). Ignored.
+Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 61 Unexpected end tag (td). Ignored.
+Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 74 Unexpected end tag (th). Ignored.
+Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 87 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+</table><tr>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 12 Unexpected end of file. Expected table content.
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<body></body></html>
+#errors
+Line: 1 Col: 20 Unexpected html end tag in inner html mode.
+Line: 1 Col: 20 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+
+#data
+<html><frameset></frameset></html> 
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   " "
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
+#errors
+Line: 1 Col: 50 Erroneous DOCTYPE.
+Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<param><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<source><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<track><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</html><frameset></frameset>
+#errors
+7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+17: Stray “frameset” start tag.
+17: “frameset” start tag seen.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><frameset></frameset>
+#errors
+7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+17: Stray “frameset” start tag.
+17: “frameset” start tag seen.
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/html/testdata/webkit/tests7.dat b/html/testdata/webkit/tests7.dat
new file mode 100644
index 0000000..f5193c6
--- /dev/null
+++ b/html/testdata/webkit/tests7.dat
@@ -0,0 +1,390 @@
+#data
+<!doctype html><body><title>X</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table><title>X</title></table>
+#errors
+Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
+Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <table>
+
+#data
+<!doctype html><head></head><title>X</title>
+#errors
+Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html></head><title>X</title>
+#errors
+Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html><table><meta></table>
+#errors
+Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table>X<tr><td><table> <meta></table></table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <meta>
+|             <table>
+|               " "
+
+#data
+<!doctype html><html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><table><style> <tr>x </style> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><table><TBODY><script> <tr>x </script> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <script>
+|           " <tr>x "
+|         " "
+
+#data
+<!doctype html><p><applet><p>X</p></applet>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <applet>
+|         <p>
+|           "X"
+
+#data
+<!doctype html><listing>
+X</listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       "X"
+
+#data
+<!doctype html><select><input>X
+#errors
+Line: 1 Col: 30 Unexpected input start tag in the select phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <input>
+|     "X"
+
+#data
+<!doctype html><select><select>X
+#errors
+Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     "X"
+
+#data
+<!doctype html><table><input type=hidDEN></table>
+#errors
+Line: 1 Col: 41 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>X<input type=hidDEN></table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type=hidDEN></table>
+#errors
+Line: 1 Col: 43 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type='hidDEN'></table>
+#errors
+Line: 1 Col: 45 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
+#errors
+Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type=" hidden"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><select>X<tr>
+#errors
+Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
+Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
+Line: 1 Col: 35 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select>X</select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!DOCTYPE hTmL><html></html>
+#errors
+Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML><html></html>
+#errors
+Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>X</body></body>
+#errors
+Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
+Line: 1 Col: 21 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "X"
+
+#data
+<div><p>a</x> b
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (x). Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <p>
+|         "a b"
+
+#data
+<table><tr><td><code></code> </table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <code>
+|             " "
+
+#data
+<table><b><tr><td>aaa</td></tr>bbb</table>ccc
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <b>
+|       "bbb"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "aaa"
+|     <b>
+|       "ccc"
+
+#data
+A<table><tr> B</tr> B</table>
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A B B"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+A<table><tr> B</tr> </em>C</table>
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A BC"
+|     <table>
+|       <tbody>
+|         <tr>
+|         " "
+
+#data
+<select><keygen>
+#errors
+Not known
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <keygen>
diff --git a/html/testdata/webkit/tests8.dat b/html/testdata/webkit/tests8.dat
new file mode 100644
index 0000000..90e6c91
--- /dev/null
+++ b/html/testdata/webkit/tests8.dat
@@ -0,0 +1,148 @@
+#data
+<div>
+<div></div>
+</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 3 Col: 7 Unexpected end tag (span). Ignored.
+Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "
+"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>
+</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 2 Col: 7 Unexpected end tag (span). Ignored.
+Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>x</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end tag (span). Ignored.
+Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+
+#data
+<div>x<div></div>y</span>z
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end tag (span). Ignored.
+Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "yz"
+
+#data
+<table><div>x<div></div>x</span>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
+Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 32 Unexpected end tag (span). Ignored.
+Line: 1 Col: 33 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+|     <table>
+
+#data
+x<table>x
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 9 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "xx"
+|     <table>
+
+#data
+x<table><table>x
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
+Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 16 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <table>
+|     "x"
+|     <table>
+
+#data
+<b>a<div></div><div></b>y
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "a"
+|       <div>
+|     <div>
+|       <b>
+|       "y"
+
+#data
+<a><div><p></a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|       <p>
+|         <a>
diff --git a/html/testdata/webkit/tests9.dat b/html/testdata/webkit/tests9.dat
new file mode 100644
index 0000000..554e27a
--- /dev/null
+++ b/html/testdata/webkit/tests9.dat
@@ -0,0 +1,457 @@
+#data
+<!DOCTYPE html><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><body><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><math><mi>
+#errors
+25: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+
+#data
+<!DOCTYPE html><math><annotation-xml><svg><u>
+#errors
+45: HTML start tag “u” in a foreign namespace context.
+45: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|     <u>
+
+#data
+<!DOCTYPE html><body><select><math></math></select>
+#errors
+Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><math></math></option></select>
+#errors
+Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><math></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
+#errors
+Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
+#errors
+Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+|             <p>
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         <p>
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
+Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         <p>
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
+#errors
+Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
+Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
+Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+|     <table>
+|       <colgroup>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
+Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
+Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
+Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
+#errors
+Line: 1 Col: 41 Unexpected start tag (math).
+Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
+#errors
+Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
+Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
+#errors
+Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
+Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
+Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
+Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
+#errors
+Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
+Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
+Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <math math>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
diff --git a/html/testdata/webkit/tests_innerHTML_1.dat b/html/testdata/webkit/tests_innerHTML_1.dat
new file mode 100644
index 0000000..6c78661
--- /dev/null
+++ b/html/testdata/webkit/tests_innerHTML_1.dat
@@ -0,0 +1,741 @@
+#data
+<body><span>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<body><span>
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   <span>
+
+#data
+<frameset><span>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<frameset><span>
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <frameset>
+
+#data
+<table><tr>
+#errors
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+</table><tr>
+#errors
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<a>
+#errors
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a>
+#errors
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a><caption>a
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <caption>
+|   "a"
+
+#data
+<a><colgroup><col>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <colgroup>
+|   <col>
+
+#data
+<a><tbody><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><tfoot><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tfoot>
+|   <tr>
+
+#data
+<a><thead><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <thead>
+|   <tr>
+
+#data
+<a><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><th>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <th>
+
+#data
+<a><td>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <td>
+
+#data
+<table></table><tbody>
+#errors
+#document-fragment
+caption
+#document
+| <table>
+
+#data
+</table><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></table>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+</caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><col><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><colgroup><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><html><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tbody><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><td><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tfoot><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><thead><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><th><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tr><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span></table><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+</colgroup><col>
+#errors
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<a><col>
+#errors
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<caption><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<a><tr>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<td><table><tbody><a><tr>
+#errors
+#document-fragment
+tbody
+#document
+| <tr>
+|   <td>
+|     <a>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table><a><tr></tr><tr>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <a>
+|   <table>
+|     <tbody>
+|       <tr>
+|       <tr>
+
+#data
+<caption><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<col><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<colgroup><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tbody><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tfoot><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<thead><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tr><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<caption><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<th><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tr><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tbody><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</td><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tfoot><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</thead><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</th><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tr><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<table><td><td>
+#errors
+#document-fragment
+td
+#document
+| <table>
+|   <tbody>
+|     <tr>
+|       <td>
+|       <td>
+
+#data
+</select><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<input><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<keygen><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<textarea><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+</html><!--abc-->
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+| <!-- abc -->
+
+#data
+</frameset><frame>
+#errors
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
diff --git a/html/testdata/webkit/tricky01.dat b/html/testdata/webkit/tricky01.dat
new file mode 100644
index 0000000..0841992
--- /dev/null
+++ b/html/testdata/webkit/tricky01.dat
@@ -0,0 +1,261 @@
+#data
+<b><p>Bold </b> Not bold</p>
+Also not bold.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <p>
+|       <b>
+|         "Bold "
+|       " Not bold"
+|     "
+Also not bold."
+
+#data
+<html>
+<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
+<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
+<p>Italic and red. </i> Red.</font> I should not be red.</p>
+<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       color="red"
+|       <i>
+|         "Italic and Red"
+|     <i>
+|       <p>
+|         <font>
+|           color="red"
+|           "Italic and Red "
+|         " Just italic."
+|       " Italic only."
+|     " Plain
+"
+|     <p>
+|       "I should not be red. "
+|       <font>
+|         color="red"
+|         "Red. "
+|         <i>
+|           "Italic and red."
+|     <font>
+|       color="red"
+|       <i>
+|         "
+"
+|     <p>
+|       <font>
+|         color="red"
+|         <i>
+|           "Italic and red. "
+|         " Red."
+|       " I should not be red."
+|     "
+"
+|     <b>
+|       "Bold "
+|       <i>
+|         "Bold and italic"
+|     <i>
+|       " Only Italic "
+|     " Plain"
+
+#data
+<html><body>
+<p><font size="7">First paragraph.</p>
+<p>Second paragraph.</p></font>
+<b><p><i>Bold and Italic</b> Italic</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <p>
+|       <font>
+|         size="7"
+|         "First paragraph."
+|     <font>
+|       size="7"
+|       "
+"
+|       <p>
+|         "Second paragraph."
+|     "
+"
+|     <b>
+|     <p>
+|       <b>
+|         <i>
+|           "Bold and Italic"
+|       <i>
+|         " Italic"
+
+#data
+<html>
+<dl>
+<dt><b>Boo
+<dd>Goo?
+</dl>
+</html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       "
+"
+|       <dt>
+|         <b>
+|           "Boo
+"
+|       <dd>
+|         <b>
+|           "Goo?
+"
+|     <b>
+|       "
+"
+
+#data
+<html><body>
+<label><a><div>Hello<div>World</div></a></label>  
+</body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <label>
+|       <a>
+|       <div>
+|         <a>
+|           "Hello"
+|           <div>
+|             "World"
+|         "  
+"
+
+#data
+<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       " "
+|       <font>
+|         "a"
+|     <font>
+|       <img>
+|       " "
+|     <table>
+|       " "
+|       <tbody>
+|         <tr>
+|           <td>
+|             " "
+|           " "
+|         " "
+
+#data
+<table><tr><p><a><p>You should see this text.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <a>
+|     <p>
+|       <a>
+|         "You should see this text."
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<TABLE>
+<TR>
+<CENTER><CENTER><TD></TD></TR><TR>
+<FONT>
+<TABLE><tr></tr></TABLE>
+</P>
+<a></font><font></a>
+This page contains an insanely badly-nested tag sequence.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <center>
+|     <font>
+|       "
+"
+|     <table>
+|       "
+"
+|       <tbody>
+|         <tr>
+|           "
+"
+|           <td>
+|         <tr>
+|           "
+"
+|     <table>
+|       <tbody>
+|         <tr>
+|     <font>
+|       "
+"
+|       <p>
+|       "
+"
+|       <a>
+|     <a>
+|       <font>
+|     <font>
+|       "
+This page contains an insanely badly-nested tag sequence."
+
+#data
+<html>
+<body>
+<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
+</body>
+</html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <b>
+|       <nobr>
+|     <div>
+|       <b>
+|         <nobr>
+|           "This text is in a div inside a nobr"
+|         "More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. "
+|       <pre>
+|         "A pre tag outside everything else."
+|       "
+
+"
diff --git a/html/testdata/webkit/webkit01.dat b/html/testdata/webkit/webkit01.dat
new file mode 100644
index 0000000..9d425e9
--- /dev/null
+++ b/html/testdata/webkit/webkit01.dat
@@ -0,0 +1,610 @@
+#data
+Test
+#errors
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<div></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<div>Test</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Test"
+
+#data
+<di
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div>Hello</div>
+<script>
+console.log("PASS");
+</script>
+<div>Bye</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("PASS");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<div foo="bar">Hello</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="bar"
+|       "Hello"
+
+#data
+<div>Hello</div>
+<script>
+console.log("FOO<span>BAR</span>BAZ");
+</script>
+<div>Bye</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("FOO<span>BAR</span>BAZ");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<foo bar="baz"></foo><potato quack="duck"></potato>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|     <potato>
+|       quack="duck"
+
+#data
+<foo bar="baz"><potato quack="duck"></potato></foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|       <potato>
+|         quack="duck"
+
+#data
+<foo></foo bar="baz"><potato></potato quack="duck">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|     <potato>
+
+#data
+</ tttt>
+#errors
+#document
+| <!--  tttt -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div FOO ><img><img></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo=""
+|       <img>
+|       <img>
+
+#data
+<p>Test</p<p>Test2</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "TestTest2"
+
+#data
+<rdar://problem/6869687>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <rdar:>
+|       6869687=""
+|       problem=""
+
+#data
+<A>test< /A>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "test< /A>"
+
+#data
+&lt;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<body foo='bar'><body foo='baz' yo='mama'>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     foo="bar"
+|     yo="mama"
+
+#data
+<body></br foo="bar"></body>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<bdy><br foo="bar"></body>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       <br>
+|         foo="bar"
+
+#data
+<body></body></br foo="bar">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<bdy></body><br foo="bar">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       <br>
+|         foo="bar"
+
+#data
+<html><body></body></html><!-- Hi there -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rp>
+|           "xx"
+
+#data
+<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rt>
+|           "xx"
+
+#data
+<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
+#errors
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <!-- 1 -->
+|     <noframes>
+|       "A"
+|     <!-- 2 -->
+|   <!-- 3 -->
+|   <noframes>
+|     "B"
+|   <!-- 4 -->
+|   <noframes>
+|     "C"
+| <!-- 5 -->
+| <!-- 6 -->
+
+#data
+<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <option>
+|       "B"
+|       <select>
+|         <option>
+|           "C"
+|     <option>
+|       "D"
+|       <select>
+|         <option>
+|           "E"
+|     <option>
+|       "F"
+|       <select>
+|         <option>
+|           "G"
+
+#data
+<dd><dd><dt><dt><dd><li><li>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|     <dd>
+|     <dt>
+|     <dt>
+|     <dd>
+|       <li>
+|       <li>
+
+#data
+<div><b></div><div><nobr>a<nobr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <b>
+|     <div>
+|       <b>
+|         <nobr>
+|           "a"
+|         <nobr>
+
+#data
+<head></head>
+<body></body>
+#errors
+#document
+| <html>
+|   <head>
+|   "
+"
+|   <body>
+
+#data
+<head></head> <style></style>ddd
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|   " "
+|   <body>
+|     "ddd"
+
+#data
+<kbd><table></kbd><col><select><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+
+#data
+<kbd><table></kbd><col><select><tr></table><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+|       <div>
+
+#data
+<a><li><style></style><title></title></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <li>
+|       <a>
+|         <style>
+|         <title>
+
+#data
+<font></p><p><meta><title></title></font>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <p>
+|     <p>
+|       <font>
+|         <meta>
+|         <title>
+
+#data
+<a><center><title></title><a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <center>
+|       <a>
+|         <title>
+|       <a>
+
+#data
+<svg><title><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <div>
+
+#data
+<svg><title><rect><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <rect>
+|           <div>
+
+#data
+<svg><title><svg><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <svg svg>
+|         <div>
+
+#data
+<img <="" FAIL>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|       <=""
+|       fail=""
+
+#data
+<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           id="foo"
+|           "A"
+|       <li>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<svg><em><desc></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <em>
+|       <desc>
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
+
+#data
+<svg><tfoot></mi><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg tfoot>
+|         <svg td>
+
+#data
+<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mrow>
+|         <math mrow>
+|           <math mn>
+|             "1"
+|         <math mi>
+|           "a"
+
+#data
+<!doctype html><input type="hidden"><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><input type="button"><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type="button"
diff --git a/html/testdata/webkit/webkit02.dat b/html/testdata/webkit/webkit02.dat
new file mode 100644
index 0000000..905783d
--- /dev/null
+++ b/html/testdata/webkit/webkit02.dat
@@ -0,0 +1,159 @@
+#data
+<foo bar=qux/>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="qux/"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         "<strong>A</strong>"
+|       <span>
+|         "B"
+
+#data
+<div><sarcasm><div></div></sarcasm></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <sarcasm>
+|         <div>
+
+#data
+<html><body><img src="" border="0" alt="><div>A</div></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<table><td></tbody>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td></thead>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><td></tfoot>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><thead><td></tbody>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<legend>test</legend>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <legend>
+|       "test"
+
+#data
+<table><input>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <table>
+
+#data
+<b><em><dcell><postfield><postfield><postfield><postfield><missing_glyph><missing_glyph><missing_glyph><missing_glyph><hkern><aside></b></em>
+#errors
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <dcell>
+|       <postfield>
+|         <postfield>
+|           <postfield>
+|             <postfield>
+|               <missing_glyph>
+|                 <missing_glyph>
+|                   <missing_glyph>
+|                     <missing_glyph>
+|                       <hkern>
+| <aside>
+|   <em>
+|     <b>
+
+#data
+<isindex action="x">
+#errors
+#document-fragment
+table
+#document
+| <form>
+|   action="x"
+| <hr>
+| <label>
+|   "This is a searchable index. Enter search keywords: "
+|   <input>
+|     name="isindex"
+| <hr>
+
+#data
+<option><XH<optgroup></optgroup>
+#errors
+#document-fragment
+select
+#document
+| <option>
diff --git a/html/token.go b/html/token.go
new file mode 100644
index 0000000..a1f4389
--- /dev/null
+++ b/html/token.go
@@ -0,0 +1,1174 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bytes"
+	"io"
+	"strconv"
+	"strings"
+
+	"code.google.com/p/go.net/html/atom"
+)
+
+// A TokenType is the type of a Token.
+type TokenType uint32
+
+const (
+	// ErrorToken means that an error occurred during tokenization.
+	ErrorToken TokenType = iota
+	// TextToken means a text node.
+	TextToken
+	// A StartTagToken looks like <a>.
+	StartTagToken
+	// An EndTagToken looks like </a>.
+	EndTagToken
+	// A SelfClosingTagToken tag looks like <br/>.
+	SelfClosingTagToken
+	// A CommentToken looks like <!--x-->.
+	CommentToken
+	// A DoctypeToken looks like <!DOCTYPE x>
+	DoctypeToken
+)
+
+// String returns a string representation of the TokenType.
+func (t TokenType) String() string {
+	switch t {
+	case ErrorToken:
+		return "Error"
+	case TextToken:
+		return "Text"
+	case StartTagToken:
+		return "StartTag"
+	case EndTagToken:
+		return "EndTag"
+	case SelfClosingTagToken:
+		return "SelfClosingTag"
+	case CommentToken:
+		return "Comment"
+	case DoctypeToken:
+		return "Doctype"
+	}
+	return "Invalid(" + strconv.Itoa(int(t)) + ")"
+}
+
+// An Attribute is an attribute namespace-key-value triple. Namespace is
+// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
+// does not contain escapable characters like '&', '<' or '>'), and Val is
+// unescaped (it looks like "a<b" rather than "a&lt;b").
+//
+// Namespace is only used by the parser, not the tokenizer.
+type Attribute struct {
+	Namespace, Key, Val string
+}
+
+// A Token consists of a TokenType and some Data (tag name for start and end
+// tags, content for text, comments and doctypes). A tag Token may also contain
+// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
+// rather than "a&lt;b"). For tag Tokens, DataAtom is the atom for Data, or
+// zero if Data is not a known tag name.
+type Token struct {
+	Type     TokenType
+	DataAtom atom.Atom
+	Data     string
+	Attr     []Attribute
+}
+
+// tagString returns a string representation of a tag Token's Data and Attr.
+func (t Token) tagString() string {
+	if len(t.Attr) == 0 {
+		return t.Data
+	}
+	buf := bytes.NewBufferString(t.Data)
+	for _, a := range t.Attr {
+		buf.WriteByte(' ')
+		buf.WriteString(a.Key)
+		buf.WriteString(`="`)
+		escape(buf, a.Val)
+		buf.WriteByte('"')
+	}
+	return buf.String()
+}
+
+// String returns a string representation of the Token.
+func (t Token) String() string {
+	switch t.Type {
+	case ErrorToken:
+		return ""
+	case TextToken:
+		return EscapeString(t.Data)
+	case StartTagToken:
+		return "<" + t.tagString() + ">"
+	case EndTagToken:
+		return "</" + t.tagString() + ">"
+	case SelfClosingTagToken:
+		return "<" + t.tagString() + "/>"
+	case CommentToken:
+		return "<!--" + t.Data + "-->"
+	case DoctypeToken:
+		return "<!DOCTYPE " + t.Data + ">"
+	}
+	return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
+}
+
+// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
+// the end is exclusive.
+type span struct {
+	start, end int
+}
+
+// A Tokenizer returns a stream of HTML Tokens.
+type Tokenizer struct {
+	// r is the source of the HTML text.
+	r io.Reader
+	// tt is the TokenType of the current token.
+	tt TokenType
+	// err is the first error encountered during tokenization. It is possible
+	// for tt != Error && err != nil to hold: this means that Next returned a
+	// valid token but the subsequent Next call will return an error token.
+	// For example, if the HTML text input was just "plain", then the first
+	// Next call would set z.err to io.EOF but return a TextToken, and all
+	// subsequent Next calls would return an ErrorToken.
+	// err is never reset. Once it becomes non-nil, it stays non-nil.
+	err error
+	// buf[raw.start:raw.end] holds the raw bytes of the current token.
+	// buf[raw.end:] is buffered input that will yield future tokens.
+	raw span
+	buf []byte
+	// buf[data.start:data.end] holds the raw bytes of the current token's data:
+	// a text token's text, a tag token's tag name, etc.
+	data span
+	// pendingAttr is the attribute key and value currently being tokenized.
+	// When complete, pendingAttr is pushed onto attr. nAttrReturned is
+	// incremented on each call to TagAttr.
+	pendingAttr   [2]span
+	attr          [][2]span
+	nAttrReturned int
+	// rawTag is the "script" in "</script>" that closes the next token. If
+	// non-empty, the subsequent call to Next will return a raw or RCDATA text
+	// token: one that treats "<p>" as text instead of an element.
+	// rawTag's contents are lower-cased.
+	rawTag string
+	// textIsRaw is whether the current text token's data is not escaped.
+	textIsRaw bool
+	// convertNUL is whether NUL bytes in the current token's data should
+	// be converted into \ufffd replacement characters.
+	convertNUL bool
+	// allowCDATA is whether CDATA sections are allowed in the current context.
+	allowCDATA bool
+}
+
+// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as
+// the text "foo". The default value is false, which means to recognize it as
+// a bogus comment "<!-- [CDATA[foo]] -->" instead.
+//
+// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and
+// only if tokenizing foreign content, such as MathML and SVG. However,
+// tracking foreign-contentness is difficult to do purely in the tokenizer,
+// as opposed to the parser, due to HTML integration points: an <svg> element
+// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to-
+// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the
+// responsibility of the user of a tokenizer to call AllowCDATA as appropriate.
+// In practice, if using the tokenizer without caring whether MathML or SVG
+// CDATA is text or comments, such as tokenizing HTML to find all the anchor
+// text, it is acceptable to ignore this responsibility.
+func (z *Tokenizer) AllowCDATA(allowCDATA bool) {
+	z.allowCDATA = allowCDATA
+}
+
+// NextIsNotRawText instructs the tokenizer that the next token should not be
+// considered as 'raw text'. Some elements, such as script and title elements,
+// normally require the next token after the opening tag to be 'raw text' that
+// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>"
+// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and
+// an end tag token for "</title>". There are no distinct start tag or end tag
+// tokens for the "<b>" and "</b>".
+//
+// This tokenizer implementation will generally look for raw text at the right
+// times. Strictly speaking, an HTML5 compliant tokenizer should not look for
+// raw text if in foreign content: <title> generally needs raw text, but a
+// <title> inside an <svg> does not. Another example is that a <textarea>
+// generally needs raw text, but a <textarea> is not allowed as an immediate
+// child of a <select>; in normal parsing, a <textarea> implies </select>, but
+// one cannot close the implicit element when parsing a <select>'s InnerHTML.
+// Similarly to AllowCDATA, tracking the correct moment to override raw-text-
+// ness is difficult to do purely in the tokenizer, as opposed to the parser.
+// For strict compliance with the HTML5 tokenization algorithm, it is the
+// responsibility of the user of a tokenizer to call NextIsNotRawText as
+// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this
+// responsibility for basic usage.
+//
+// Note that this 'raw text' concept is different from the one offered by the
+// Tokenizer.Raw method.
+func (z *Tokenizer) NextIsNotRawText() {
+	z.rawTag = ""
+}
+
+// Err returns the error associated with the most recent ErrorToken token.
+// This is typically io.EOF, meaning the end of tokenization.
+func (z *Tokenizer) Err() error {
+	if z.tt != ErrorToken {
+		return nil
+	}
+	return z.err
+}
+
+// readByte returns the next byte from the input stream, doing a buffered read
+// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
+// slice that holds all the bytes read so far for the current token.
+// It sets z.err if the underlying reader returns an error.
+// Pre-condition: z.err == nil.
+func (z *Tokenizer) readByte() byte {
+	if z.raw.end >= len(z.buf) {
+		// Our buffer is exhausted and we have to read from z.r.
+		// We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
+		// z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
+		// allocate a new buffer before the copy.
+		c := cap(z.buf)
+		d := z.raw.end - z.raw.start
+		var buf1 []byte
+		if 2*d > c {
+			buf1 = make([]byte, d, 2*c)
+		} else {
+			buf1 = z.buf[:d]
+		}
+		copy(buf1, z.buf[z.raw.start:z.raw.end])
+		if x := z.raw.start; x != 0 {
+			// Adjust the data/attr spans to refer to the same contents after the copy.
+			z.data.start -= x
+			z.data.end -= x
+			z.pendingAttr[0].start -= x
+			z.pendingAttr[0].end -= x
+			z.pendingAttr[1].start -= x
+			z.pendingAttr[1].end -= x
+			for i := range z.attr {
+				z.attr[i][0].start -= x
+				z.attr[i][0].end -= x
+				z.attr[i][1].start -= x
+				z.attr[i][1].end -= x
+			}
+		}
+		z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
+		// Now that we have copied the live bytes to the start of the buffer,
+		// we read from z.r into the remainder.
+		n, err := z.r.Read(buf1[d:cap(buf1)])
+		if err != nil {
+			z.err = err
+			return 0
+		}
+		z.buf = buf1[:d+n]
+	}
+	x := z.buf[z.raw.end]
+	z.raw.end++
+	return x
+}
+
+// skipWhiteSpace skips past any white space.
+func (z *Tokenizer) skipWhiteSpace() {
+	if z.err != nil {
+		return
+	}
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			return
+		}
+		switch c {
+		case ' ', '\n', '\r', '\t', '\f':
+			// No-op.
+		default:
+			z.raw.end--
+			return
+		}
+	}
+}
+
+// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
+// is typically something like "script" or "textarea".
+func (z *Tokenizer) readRawOrRCDATA() {
+	if z.rawTag == "script" {
+		z.readScript()
+		z.textIsRaw = true
+		z.rawTag = ""
+		return
+	}
+loop:
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			break loop
+		}
+		if c != '<' {
+			continue loop
+		}
+		c = z.readByte()
+		if z.err != nil {
+			break loop
+		}
+		if c != '/' {
+			continue loop
+		}
+		if z.readRawEndTag() || z.err != nil {
+			break loop
+		}
+	}
+	z.data.end = z.raw.end
+	// A textarea's or title's RCDATA can contain escaped entities.
+	z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
+	z.rawTag = ""
+}
+
+// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
+// If it succeeds, it backs up the input position to reconsume the tag and
+// returns true. Otherwise it returns false. The opening "</" has already been
+// consumed.
+func (z *Tokenizer) readRawEndTag() bool {
+	for i := 0; i < len(z.rawTag); i++ {
+		c := z.readByte()
+		if z.err != nil {
+			return false
+		}
+		if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
+			z.raw.end--
+			return false
+		}
+	}
+	c := z.readByte()
+	if z.err != nil {
+		return false
+	}
+	switch c {
+	case ' ', '\n', '\r', '\t', '\f', '/', '>':
+		// The 3 is 2 for the leading "</" plus 1 for the trailing character c.
+		z.raw.end -= 3 + len(z.rawTag)
+		return true
+	}
+	z.raw.end--
+	return false
+}
+
+// readScript reads until the next </script> tag, following the byzantine
+// rules for escaping/hiding the closing tag.
+func (z *Tokenizer) readScript() {
+	defer func() {
+		z.data.end = z.raw.end
+	}()
+	var c byte
+
+scriptData:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c == '<' {
+		goto scriptDataLessThanSign
+	}
+	goto scriptData
+
+scriptDataLessThanSign:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '/':
+		goto scriptDataEndTagOpen
+	case '!':
+		goto scriptDataEscapeStart
+	}
+	z.raw.end--
+	goto scriptData
+
+scriptDataEndTagOpen:
+	if z.readRawEndTag() || z.err != nil {
+		return
+	}
+	goto scriptData
+
+scriptDataEscapeStart:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c == '-' {
+		goto scriptDataEscapeStartDash
+	}
+	z.raw.end--
+	goto scriptData
+
+scriptDataEscapeStartDash:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c == '-' {
+		goto scriptDataEscapedDashDash
+	}
+	z.raw.end--
+	goto scriptData
+
+scriptDataEscaped:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataEscapedDash
+	case '<':
+		goto scriptDataEscapedLessThanSign
+	}
+	goto scriptDataEscaped
+
+scriptDataEscapedDash:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataEscapedDashDash
+	case '<':
+		goto scriptDataEscapedLessThanSign
+	}
+	goto scriptDataEscaped
+
+scriptDataEscapedDashDash:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataEscapedDashDash
+	case '<':
+		goto scriptDataEscapedLessThanSign
+	case '>':
+		goto scriptData
+	}
+	goto scriptDataEscaped
+
+scriptDataEscapedLessThanSign:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c == '/' {
+		goto scriptDataEscapedEndTagOpen
+	}
+	if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+		goto scriptDataDoubleEscapeStart
+	}
+	z.raw.end--
+	goto scriptData
+
+scriptDataEscapedEndTagOpen:
+	if z.readRawEndTag() || z.err != nil {
+		return
+	}
+	goto scriptDataEscaped
+
+scriptDataDoubleEscapeStart:
+	z.raw.end--
+	for i := 0; i < len("script"); i++ {
+		c = z.readByte()
+		if z.err != nil {
+			return
+		}
+		if c != "script"[i] && c != "SCRIPT"[i] {
+			z.raw.end--
+			goto scriptDataEscaped
+		}
+	}
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case ' ', '\n', '\r', '\t', '\f', '/', '>':
+		goto scriptDataDoubleEscaped
+	}
+	z.raw.end--
+	goto scriptDataEscaped
+
+scriptDataDoubleEscaped:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataDoubleEscapedDash
+	case '<':
+		goto scriptDataDoubleEscapedLessThanSign
+	}
+	goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDash:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataDoubleEscapedDashDash
+	case '<':
+		goto scriptDataDoubleEscapedLessThanSign
+	}
+	goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDashDash:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch c {
+	case '-':
+		goto scriptDataDoubleEscapedDashDash
+	case '<':
+		goto scriptDataDoubleEscapedLessThanSign
+	case '>':
+		goto scriptData
+	}
+	goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedLessThanSign:
+	c = z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c == '/' {
+		goto scriptDataDoubleEscapeEnd
+	}
+	z.raw.end--
+	goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapeEnd:
+	if z.readRawEndTag() {
+		z.raw.end += len("</script>")
+		goto scriptDataEscaped
+	}
+	if z.err != nil {
+		return
+	}
+	goto scriptDataDoubleEscaped
+}
+
+// readComment reads the next comment token starting with "<!--". The opening
+// "<!--" has already been consumed.
+func (z *Tokenizer) readComment() {
+	z.data.start = z.raw.end
+	defer func() {
+		if z.data.end < z.data.start {
+			// It's a comment with no data, like <!-->.
+			z.data.end = z.data.start
+		}
+	}()
+	for dashCount := 2; ; {
+		c := z.readByte()
+		if z.err != nil {
+			// Ignore up to two dashes at EOF.
+			if dashCount > 2 {
+				dashCount = 2
+			}
+			z.data.end = z.raw.end - dashCount
+			return
+		}
+		switch c {
+		case '-':
+			dashCount++
+			continue
+		case '>':
+			if dashCount >= 2 {
+				z.data.end = z.raw.end - len("-->")
+				return
+			}
+		case '!':
+			if dashCount >= 2 {
+				c = z.readByte()
+				if z.err != nil {
+					z.data.end = z.raw.end
+					return
+				}
+				if c == '>' {
+					z.data.end = z.raw.end - len("--!>")
+					return
+				}
+			}
+		}
+		dashCount = 0
+	}
+}
+
+// readUntilCloseAngle reads until the next ">".
+func (z *Tokenizer) readUntilCloseAngle() {
+	z.data.start = z.raw.end
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return
+		}
+		if c == '>' {
+			z.data.end = z.raw.end - len(">")
+			return
+		}
+	}
+}
+
+// readMarkupDeclaration reads the next token starting with "<!". It might be
+// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
+// "<!a bogus comment". The opening "<!" has already been consumed.
+func (z *Tokenizer) readMarkupDeclaration() TokenType {
+	z.data.start = z.raw.end
+	var c [2]byte
+	for i := 0; i < 2; i++ {
+		c[i] = z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return CommentToken
+		}
+	}
+	if c[0] == '-' && c[1] == '-' {
+		z.readComment()
+		return CommentToken
+	}
+	z.raw.end -= 2
+	if z.readDoctype() {
+		return DoctypeToken
+	}
+	if z.allowCDATA && z.readCDATA() {
+		z.convertNUL = true
+		return TextToken
+	}
+	// It's a bogus comment.
+	z.readUntilCloseAngle()
+	return CommentToken
+}
+
+// readDoctype attempts to read a doctype declaration and returns true if
+// successful. The opening "<!" has already been consumed.
+func (z *Tokenizer) readDoctype() bool {
+	const s = "DOCTYPE"
+	for i := 0; i < len(s); i++ {
+		c := z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return false
+		}
+		if c != s[i] && c != s[i]+('a'-'A') {
+			// Back up to read the fragment of "DOCTYPE" again.
+			z.raw.end = z.data.start
+			return false
+		}
+	}
+	if z.skipWhiteSpace(); z.err != nil {
+		z.data.start = z.raw.end
+		z.data.end = z.raw.end
+		return true
+	}
+	z.readUntilCloseAngle()
+	return true
+}
+
+// readCDATA attempts to read a CDATA section and returns true if
+// successful. The opening "<!" has already been consumed.
+func (z *Tokenizer) readCDATA() bool {
+	const s = "[CDATA["
+	for i := 0; i < len(s); i++ {
+		c := z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return false
+		}
+		if c != s[i] {
+			// Back up to read the fragment of "[CDATA[" again.
+			z.raw.end = z.data.start
+			return false
+		}
+	}
+	z.data.start = z.raw.end
+	brackets := 0
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return true
+		}
+		switch c {
+		case ']':
+			brackets++
+		case '>':
+			if brackets >= 2 {
+				z.data.end = z.raw.end - len("]]>")
+				return true
+			}
+			brackets = 0
+		default:
+			brackets = 0
+		}
+	}
+	panic("unreachable")
+}
+
+// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
+// case-insensitively matches any element of ss.
+func (z *Tokenizer) startTagIn(ss ...string) bool {
+loop:
+	for _, s := range ss {
+		if z.data.end-z.data.start != len(s) {
+			continue loop
+		}
+		for i := 0; i < len(s); i++ {
+			c := z.buf[z.data.start+i]
+			if 'A' <= c && c <= 'Z' {
+				c += 'a' - 'A'
+			}
+			if c != s[i] {
+				continue loop
+			}
+		}
+		return true
+	}
+	return false
+}
+
+// readStartTag reads the next start tag token. The opening "<a" has already
+// been consumed, where 'a' means anything in [A-Za-z].
+func (z *Tokenizer) readStartTag() TokenType {
+	z.readTag(true)
+	if z.err != nil {
+		return ErrorToken
+	}
+	// Several tags flag the tokenizer's next token as raw.
+	c, raw := z.buf[z.data.start], false
+	if 'A' <= c && c <= 'Z' {
+		c += 'a' - 'A'
+	}
+	switch c {
+	case 'i':
+		raw = z.startTagIn("iframe")
+	case 'n':
+		raw = z.startTagIn("noembed", "noframes", "noscript")
+	case 'p':
+		raw = z.startTagIn("plaintext")
+	case 's':
+		raw = z.startTagIn("script", "style")
+	case 't':
+		raw = z.startTagIn("textarea", "title")
+	case 'x':
+		raw = z.startTagIn("xmp")
+	}
+	if raw {
+		z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
+	}
+	// Look for a self-closing token like "<br/>".
+	if z.err == nil && z.buf[z.raw.end-2] == '/' {
+		return SelfClosingTagToken
+	}
+	return StartTagToken
+}
+
+// readTag reads the next tag token and its attributes. If saveAttr, those
+// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
+// The opening "<a" or "</a" has already been consumed, where 'a' means anything
+// in [A-Za-z].
+func (z *Tokenizer) readTag(saveAttr bool) {
+	z.attr = z.attr[:0]
+	z.nAttrReturned = 0
+	// Read the tag name and attribute key/value pairs.
+	z.readTagName()
+	if z.skipWhiteSpace(); z.err != nil {
+		return
+	}
+	for {
+		c := z.readByte()
+		if z.err != nil || c == '>' {
+			break
+		}
+		z.raw.end--
+		z.readTagAttrKey()
+		z.readTagAttrVal()
+		// Save pendingAttr if saveAttr and that attribute has a non-empty key.
+		if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
+			z.attr = append(z.attr, z.pendingAttr)
+		}
+		if z.skipWhiteSpace(); z.err != nil {
+			break
+		}
+	}
+}
+
+// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
+// is positioned such that the first byte of the tag name (the "d" in "<div")
+// has already been consumed.
+func (z *Tokenizer) readTagName() {
+	z.data.start = z.raw.end - 1
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			z.data.end = z.raw.end
+			return
+		}
+		switch c {
+		case ' ', '\n', '\r', '\t', '\f':
+			z.data.end = z.raw.end - 1
+			return
+		case '/', '>':
+			z.raw.end--
+			z.data.end = z.raw.end
+			return
+		}
+	}
+}
+
+// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
+// Precondition: z.err == nil.
+func (z *Tokenizer) readTagAttrKey() {
+	z.pendingAttr[0].start = z.raw.end
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			z.pendingAttr[0].end = z.raw.end
+			return
+		}
+		switch c {
+		case ' ', '\n', '\r', '\t', '\f', '/':
+			z.pendingAttr[0].end = z.raw.end - 1
+			return
+		case '=', '>':
+			z.raw.end--
+			z.pendingAttr[0].end = z.raw.end
+			return
+		}
+	}
+}
+
+// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
+func (z *Tokenizer) readTagAttrVal() {
+	z.pendingAttr[1].start = z.raw.end
+	z.pendingAttr[1].end = z.raw.end
+	if z.skipWhiteSpace(); z.err != nil {
+		return
+	}
+	c := z.readByte()
+	if z.err != nil {
+		return
+	}
+	if c != '=' {
+		z.raw.end--
+		return
+	}
+	if z.skipWhiteSpace(); z.err != nil {
+		return
+	}
+	quote := z.readByte()
+	if z.err != nil {
+		return
+	}
+	switch quote {
+	case '>':
+		z.raw.end--
+		return
+
+	case '\'', '"':
+		z.pendingAttr[1].start = z.raw.end
+		for {
+			c := z.readByte()
+			if z.err != nil {
+				z.pendingAttr[1].end = z.raw.end
+				return
+			}
+			if c == quote {
+				z.pendingAttr[1].end = z.raw.end - 1
+				return
+			}
+		}
+
+	default:
+		z.pendingAttr[1].start = z.raw.end - 1
+		for {
+			c := z.readByte()
+			if z.err != nil {
+				z.pendingAttr[1].end = z.raw.end
+				return
+			}
+			switch c {
+			case ' ', '\n', '\r', '\t', '\f':
+				z.pendingAttr[1].end = z.raw.end - 1
+				return
+			case '>':
+				z.raw.end--
+				z.pendingAttr[1].end = z.raw.end
+				return
+			}
+		}
+	}
+}
+
+// Next scans the next token and returns its type.
+func (z *Tokenizer) Next() TokenType {
+	if z.err != nil {
+		z.tt = ErrorToken
+		return z.tt
+	}
+	z.raw.start = z.raw.end
+	z.data.start = z.raw.end
+	z.data.end = z.raw.end
+	if z.rawTag != "" {
+		if z.rawTag == "plaintext" {
+			// Read everything up to EOF.
+			for z.err == nil {
+				z.readByte()
+			}
+			z.data.end = z.raw.end
+			z.textIsRaw = true
+		} else {
+			z.readRawOrRCDATA()
+		}
+		if z.data.end > z.data.start {
+			z.tt = TextToken
+			z.convertNUL = true
+			return z.tt
+		}
+	}
+	z.textIsRaw = false
+	z.convertNUL = false
+
+loop:
+	for {
+		c := z.readByte()
+		if z.err != nil {
+			break loop
+		}
+		if c != '<' {
+			continue loop
+		}
+
+		// Check if the '<' we have just read is part of a tag, comment
+		// or doctype. If not, it's part of the accumulated text token.
+		c = z.readByte()
+		if z.err != nil {
+			break loop
+		}
+		var tokenType TokenType
+		switch {
+		case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
+			tokenType = StartTagToken
+		case c == '/':
+			tokenType = EndTagToken
+		case c == '!' || c == '?':
+			// We use CommentToken to mean any of "<!--actual comments-->",
+			// "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
+			tokenType = CommentToken
+		default:
+			continue
+		}
+
+		// We have a non-text token, but we might have accumulated some text
+		// before that. If so, we return the text first, and return the non-
+		// text token on the subsequent call to Next.
+		if x := z.raw.end - len("<a"); z.raw.start < x {
+			z.raw.end = x
+			z.data.end = x
+			z.tt = TextToken
+			return z.tt
+		}
+		switch tokenType {
+		case StartTagToken:
+			z.tt = z.readStartTag()
+			return z.tt
+		case EndTagToken:
+			c = z.readByte()
+			if z.err != nil {
+				break loop
+			}
+			if c == '>' {
+				// "</>" does not generate a token at all.
+				// Reset the tokenizer state and start again.
+				z.raw.start = z.raw.end
+				z.data.start = z.raw.end
+				z.data.end = z.raw.end
+				continue loop
+			}
+			if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+				z.readTag(false)
+				if z.err != nil {
+					z.tt = ErrorToken
+				} else {
+					z.tt = EndTagToken
+				}
+				return z.tt
+			}
+			z.raw.end--
+			z.readUntilCloseAngle()
+			z.tt = CommentToken
+			return z.tt
+		case CommentToken:
+			if c == '!' {
+				z.tt = z.readMarkupDeclaration()
+				return z.tt
+			}
+			z.raw.end--
+			z.readUntilCloseAngle()
+			z.tt = CommentToken
+			return z.tt
+		}
+	}
+	if z.raw.start < z.raw.end {
+		z.data.end = z.raw.end
+		z.tt = TextToken
+		return z.tt
+	}
+	z.tt = ErrorToken
+	return z.tt
+}
+
+// Raw returns the unmodified text of the current token. Calling Next, Token,
+// Text, TagName or TagAttr may change the contents of the returned slice.
+func (z *Tokenizer) Raw() []byte {
+	return z.buf[z.raw.start:z.raw.end]
+}
+
+// convertNewlines converts "\r" and "\r\n" in s to "\n".
+// The conversion happens in place, but the resulting slice may be shorter.
+func convertNewlines(s []byte) []byte {
+	for i, c := range s {
+		if c != '\r' {
+			continue
+		}
+
+		src := i + 1
+		if src >= len(s) || s[src] != '\n' {
+			s[i] = '\n'
+			continue
+		}
+
+		dst := i
+		for src < len(s) {
+			if s[src] == '\r' {
+				if src+1 < len(s) && s[src+1] == '\n' {
+					src++
+				}
+				s[dst] = '\n'
+			} else {
+				s[dst] = s[src]
+			}
+			src++
+			dst++
+		}
+		return s[:dst]
+	}
+	return s
+}
+
+var (
+	nul         = []byte("\x00")
+	replacement = []byte("\ufffd")
+)
+
+// Text returns the unescaped text of a text, comment or doctype token. The
+// contents of the returned slice may change on the next call to Next.
+func (z *Tokenizer) Text() []byte {
+	switch z.tt {
+	case TextToken, CommentToken, DoctypeToken:
+		s := z.buf[z.data.start:z.data.end]
+		z.data.start = z.raw.end
+		z.data.end = z.raw.end
+		s = convertNewlines(s)
+		if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
+			s = bytes.Replace(s, nul, replacement, -1)
+		}
+		if !z.textIsRaw {
+			s = unescape(s, false)
+		}
+		return s
+	}
+	return nil
+}
+
+// TagName returns the lower-cased name of a tag token (the `img` out of
+// `<IMG SRC="foo">`) and whether the tag has attributes.
+// The contents of the returned slice may change on the next call to Next.
+func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
+	if z.data.start < z.data.end {
+		switch z.tt {
+		case StartTagToken, EndTagToken, SelfClosingTagToken:
+			s := z.buf[z.data.start:z.data.end]
+			z.data.start = z.raw.end
+			z.data.end = z.raw.end
+			return lower(s), z.nAttrReturned < len(z.attr)
+		}
+	}
+	return nil, false
+}
+
+// TagAttr returns the lower-cased key and unescaped value of the next unparsed
+// attribute for the current tag token and whether there are more attributes.
+// The contents of the returned slices may change on the next call to Next.
+func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
+	if z.nAttrReturned < len(z.attr) {
+		switch z.tt {
+		case StartTagToken, SelfClosingTagToken:
+			x := z.attr[z.nAttrReturned]
+			z.nAttrReturned++
+			key = z.buf[x[0].start:x[0].end]
+			val = z.buf[x[1].start:x[1].end]
+			return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
+		}
+	}
+	return nil, nil, false
+}
+
+// Token returns the next Token. The result's Data and Attr values remain valid
+// after subsequent Next calls.
+func (z *Tokenizer) Token() Token {
+	t := Token{Type: z.tt}
+	switch z.tt {
+	case TextToken, CommentToken, DoctypeToken:
+		t.Data = string(z.Text())
+	case StartTagToken, SelfClosingTagToken, EndTagToken:
+		name, moreAttr := z.TagName()
+		for moreAttr {
+			var key, val []byte
+			key, val, moreAttr = z.TagAttr()
+			t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
+		}
+		if a := atom.Lookup(name); a != 0 {
+			t.DataAtom, t.Data = a, a.String()
+		} else {
+			t.DataAtom, t.Data = 0, string(name)
+		}
+	}
+	return t
+}
+
+// NewTokenizer returns a new HTML Tokenizer for the given Reader.
+// The input is assumed to be UTF-8 encoded.
+func NewTokenizer(r io.Reader) *Tokenizer {
+	return NewTokenizerFragment(r, "")
+}
+
+// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
+// tokenizing an exisitng element's InnerHTML fragment. contextTag is that
+// element's tag, such as "div" or "iframe".
+//
+// For example, how the InnerHTML "a<b" is tokenized depends on whether it is
+// for a <p> tag or a <script> tag.
+//
+// The input is assumed to be UTF-8 encoded.
+func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
+	z := &Tokenizer{
+		r:   r,
+		buf: make([]byte, 0, 4096),
+	}
+	if contextTag != "" {
+		switch s := strings.ToLower(contextTag); s {
+		case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
+			z.rawTag = s
+		}
+	}
+	return z
+}
diff --git a/html/token_test.go b/html/token_test.go
new file mode 100644
index 0000000..14e2346
--- /dev/null
+++ b/html/token_test.go
@@ -0,0 +1,679 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+type tokenTest struct {
+	// A short description of the test case.
+	desc string
+	// The HTML to parse.
+	html string
+	// The string representations of the expected tokens, joined by '$'.
+	golden string
+}
+
+var tokenTests = []tokenTest{
+	{
+		"empty",
+		"",
+		"",
+	},
+	// A single text node. The tokenizer should not break text nodes on whitespace,
+	// nor should it normalize whitespace within a text node.
+	{
+		"text",
+		"foo  bar",
+		"foo  bar",
+	},
+	// An entity.
+	{
+		"entity",
+		"one &lt; two",
+		"one &lt; two",
+	},
+	// A start, self-closing and end tag. The tokenizer does not care if the start
+	// and end tokens don't match; that is the job of the parser.
+	{
+		"tags",
+		"<a>b<c/>d</e>",
+		"<a>$b$<c/>$d$</e>",
+	},
+	// Angle brackets that aren't a tag.
+	{
+		"not a tag #0",
+		"<",
+		"&lt;",
+	},
+	{
+		"not a tag #1",
+		"</",
+		"&lt;/",
+	},
+	{
+		"not a tag #2",
+		"</>",
+		"",
+	},
+	{
+		"not a tag #3",
+		"a</>b",
+		"a$b",
+	},
+	{
+		"not a tag #4",
+		"</ >",
+		"<!-- -->",
+	},
+	{
+		"not a tag #5",
+		"</.",
+		"<!--.-->",
+	},
+	{
+		"not a tag #6",
+		"</.>",
+		"<!--.-->",
+	},
+	{
+		"not a tag #7",
+		"a < b",
+		"a &lt; b",
+	},
+	{
+		"not a tag #8",
+		"<.>",
+		"&lt;.&gt;",
+	},
+	{
+		"not a tag #9",
+		"a<<<b>>>c",
+		"a&lt;&lt;$<b>$&gt;&gt;c",
+	},
+	{
+		"not a tag #10",
+		"if x<0 and y < 0 then x*y>0",
+		"if x&lt;0 and y &lt; 0 then x*y&gt;0",
+	},
+	// EOF in a tag name.
+	{
+		"tag name eof #0",
+		"<a",
+		"",
+	},
+	{
+		"tag name eof #1",
+		"<a ",
+		"",
+	},
+	{
+		"tag name eof #2",
+		"a<b",
+		"a",
+	},
+	{
+		"tag name eof #3",
+		"<a><b",
+		"<a>",
+	},
+	{
+		"tag name eof #4",
+		`<a x`,
+		``,
+	},
+	// Some malformed tags that are missing a '>'.
+	{
+		"malformed tag #0",
+		`<p</p>`,
+		`<p< p="">`,
+	},
+	{
+		"malformed tag #1",
+		`<p </p>`,
+		`<p <="" p="">`,
+	},
+	{
+		"malformed tag #2",
+		`<p id`,
+		``,
+	},
+	{
+		"malformed tag #3",
+		`<p id=`,
+		``,
+	},
+	{
+		"malformed tag #4",
+		`<p id=>`,
+		`<p id="">`,
+	},
+	{
+		"malformed tag #5",
+		`<p id=0`,
+		``,
+	},
+	{
+		"malformed tag #6",
+		`<p id=0</p>`,
+		`<p id="0&lt;/p">`,
+	},
+	{
+		"malformed tag #7",
+		`<p id="0</p>`,
+		``,
+	},
+	{
+		"malformed tag #8",
+		`<p id="0"</p>`,
+		`<p id="0" <="" p="">`,
+	},
+	{
+		"malformed tag #9",
+		`<p></p id`,
+		`<p>`,
+	},
+	// Raw text and RCDATA.
+	{
+		"basic raw text",
+		"<script><a></b></script>",
+		"<script>$&lt;a&gt;&lt;/b&gt;$</script>",
+	},
+	{
+		"unfinished script end tag",
+		"<SCRIPT>a</SCR",
+		"<script>$a&lt;/SCR",
+	},
+	{
+		"broken script end tag",
+		"<SCRIPT>a</SCR ipt>",
+		"<script>$a&lt;/SCR ipt&gt;",
+	},
+	{
+		"EOF in script end tag",
+		"<SCRIPT>a</SCRipt",
+		"<script>$a&lt;/SCRipt",
+	},
+	{
+		"scriptx end tag",
+		"<SCRIPT>a</SCRiptx",
+		"<script>$a&lt;/SCRiptx",
+	},
+	{
+		"' ' completes script end tag",
+		"<SCRIPT>a</SCRipt ",
+		"<script>$a",
+	},
+	{
+		"'>' completes script end tag",
+		"<SCRIPT>a</SCRipt>",
+		"<script>$a$</script>",
+	},
+	{
+		"self-closing script end tag",
+		"<SCRIPT>a</SCRipt/>",
+		"<script>$a$</script>",
+	},
+	{
+		"nested script tag",
+		"<SCRIPT>a</SCRipt<script>",
+		"<script>$a&lt;/SCRipt&lt;script&gt;",
+	},
+	{
+		"script end tag after unfinished",
+		"<SCRIPT>a</SCRipt</script>",
+		"<script>$a&lt;/SCRipt$</script>",
+	},
+	{
+		"script/style mismatched tags",
+		"<script>a</style>",
+		"<script>$a&lt;/style&gt;",
+	},
+	{
+		"style element with entity",
+		"<style>&apos;",
+		"<style>$&amp;apos;",
+	},
+	{
+		"textarea with tag",
+		"<textarea><div></textarea>",
+		"<textarea>$&lt;div&gt;$</textarea>",
+	},
+	{
+		"title with tag and entity",
+		"<title><b>K&amp;R C</b></title>",
+		"<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
+	},
+	// DOCTYPE tests.
+	{
+		"Proper DOCTYPE",
+		"<!DOCTYPE html>",
+		"<!DOCTYPE html>",
+	},
+	{
+		"DOCTYPE with no space",
+		"<!doctypehtml>",
+		"<!DOCTYPE html>",
+	},
+	{
+		"DOCTYPE with two spaces",
+		"<!doctype  html>",
+		"<!DOCTYPE html>",
+	},
+	{
+		"looks like DOCTYPE but isn't",
+		"<!DOCUMENT html>",
+		"<!--DOCUMENT html-->",
+	},
+	{
+		"DOCTYPE at EOF",
+		"<!DOCtype",
+		"<!DOCTYPE >",
+	},
+	// XML processing instructions.
+	{
+		"XML processing instruction",
+		"<?xml?>",
+		"<!--?xml?-->",
+	},
+	// Comments.
+	{
+		"comment0",
+		"abc<b><!-- skipme --></b>def",
+		"abc$<b>$<!-- skipme -->$</b>$def",
+	},
+	{
+		"comment1",
+		"a<!-->z",
+		"a$<!---->$z",
+	},
+	{
+		"comment2",
+		"a<!--->z",
+		"a$<!---->$z",
+	},
+	{
+		"comment3",
+		"a<!--x>-->z",
+		"a$<!--x>-->$z",
+	},
+	{
+		"comment4",
+		"a<!--x->-->z",
+		"a$<!--x->-->$z",
+	},
+	{
+		"comment5",
+		"a<!>z",
+		"a$<!---->$z",
+	},
+	{
+		"comment6",
+		"a<!->z",
+		"a$<!----->$z",
+	},
+	{
+		"comment7",
+		"a<!---<>z",
+		"a$<!---<>z-->",
+	},
+	{
+		"comment8",
+		"a<!--z",
+		"a$<!--z-->",
+	},
+	{
+		"comment9",
+		"a<!--z-",
+		"a$<!--z-->",
+	},
+	{
+		"comment10",
+		"a<!--z--",
+		"a$<!--z-->",
+	},
+	{
+		"comment11",
+		"a<!--z---",
+		"a$<!--z--->",
+	},
+	{
+		"comment12",
+		"a<!--z----",
+		"a$<!--z---->",
+	},
+	{
+		"comment13",
+		"a<!--x--!>z",
+		"a$<!--x-->$z",
+	},
+	// An attribute with a backslash.
+	{
+		"backslash",
+		`<p id="a\"b">`,
+		`<p id="a\" b"="">`,
+	},
+	// Entities, tag name and attribute key lower-casing, and whitespace
+	// normalization within a tag.
+	{
+		"tricky",
+		"<p \t\n iD=\"a&quot;B\"  foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
+		`<p id="a&#34;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
+	},
+	// A nonexistent entity. Tokenizing and converting back to a string should
+	// escape the "&" to become "&amp;".
+	{
+		"noSuchEntity",
+		`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
+		`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
+	},
+	{
+		"entity without semicolon",
+		`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
+		`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
+	},
+	{
+		"entity with digits",
+		"&frac12;",
+		"½",
+	},
+	// Attribute tests:
+	// http://dev.w3.org/html5/spec/Overview.html#attributes-0
+	{
+		"Empty attribute",
+		`<input disabled FOO>`,
+		`<input disabled="" foo="">`,
+	},
+	{
+		"Empty attribute, whitespace",
+		`<input disabled FOO >`,
+		`<input disabled="" foo="">`,
+	},
+	{
+		"Unquoted attribute value",
+		`<input value=yes FOO=BAR>`,
+		`<input value="yes" foo="BAR">`,
+	},
+	{
+		"Unquoted attribute value, spaces",
+		`<input value = yes FOO = BAR>`,
+		`<input value="yes" foo="BAR">`,
+	},
+	{
+		"Unquoted attribute value, trailing space",
+		`<input value=yes FOO=BAR >`,
+		`<input value="yes" foo="BAR">`,
+	},
+	{
+		"Single-quoted attribute value",
+		`<input value='yes' FOO='BAR'>`,
+		`<input value="yes" foo="BAR">`,
+	},
+	{
+		"Single-quoted attribute value, trailing space",
+		`<input value='yes' FOO='BAR' >`,
+		`<input value="yes" foo="BAR">`,
+	},
+	{
+		"Double-quoted attribute value",
+		`<input value="I'm an attribute" FOO="BAR">`,
+		`<input value="I&#39;m an attribute" foo="BAR">`,
+	},
+	{
+		"Attribute name characters",
+		`<meta http-equiv="content-type">`,
+		`<meta http-equiv="content-type">`,
+	},
+	{
+		"Mixed attributes",
+		`a<P V="0 1" w='2' X=3 y>z`,
+		`a$<p v="0 1" w="2" x="3" y="">$z`,
+	},
+	{
+		"Attributes with a solitary single quote",
+		`<p id=can't><p id=won't>`,
+		`<p id="can&#39;t">$<p id="won&#39;t">`,
+	},
+}
+
+func TestTokenizer(t *testing.T) {
+loop:
+	for _, tt := range tokenTests {
+		z := NewTokenizer(strings.NewReader(tt.html))
+		if tt.golden != "" {
+			for i, s := range strings.Split(tt.golden, "$") {
+				if z.Next() == ErrorToken {
+					t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
+					continue loop
+				}
+				actual := z.Token().String()
+				if s != actual {
+					t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
+					continue loop
+				}
+			}
+		}
+		z.Next()
+		if z.Err() != io.EOF {
+			t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
+		}
+	}
+}
+
+type unescapeTest struct {
+	// A short description of the test case.
+	desc string
+	// The HTML text.
+	html string
+	// The unescaped text.
+	unescaped string
+}
+
+var unescapeTests = []unescapeTest{
+	// Handle no entities.
+	{
+		"copy",
+		"A\ttext\nstring",
+		"A\ttext\nstring",
+	},
+	// Handle simple named entities.
+	{
+		"simple",
+		"&amp; &gt; &lt;",
+		"& > <",
+	},
+	// Handle hitting the end of the string.
+	{
+		"stringEnd",
+		"&amp &amp",
+		"& &",
+	},
+	// Handle entities with two codepoints.
+	{
+		"multiCodepoint",
+		"text &gesl; blah",
+		"text \u22db\ufe00 blah",
+	},
+	// Handle decimal numeric entities.
+	{
+		"decimalEntity",
+		"Delta = &#916; ",
+		"Delta = Δ ",
+	},
+	// Handle hexadecimal numeric entities.
+	{
+		"hexadecimalEntity",
+		"Lambda = &#x3bb; = &#X3Bb ",
+		"Lambda = λ = λ ",
+	},
+	// Handle numeric early termination.
+	{
+		"numericEnds",
+		"&# &#x &#128;43 &copy = &#169f = &#xa9",
+		"&# &#x €43 © = ©f = ©",
+	},
+	// Handle numeric ISO-8859-1 entity replacements.
+	{
+		"numericReplacements",
+		"Footnote&#x87;",
+		"Footnote‡",
+	},
+}
+
+func TestUnescape(t *testing.T) {
+	for _, tt := range unescapeTests {
+		unescaped := UnescapeString(tt.html)
+		if unescaped != tt.unescaped {
+			t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
+		}
+	}
+}
+
+func TestUnescapeEscape(t *testing.T) {
+	ss := []string{
+		``,
+		`abc def`,
+		`a & b`,
+		`a&amp;b`,
+		`a &amp b`,
+		`&quot;`,
+		`"`,
+		`"<&>"`,
+		`&quot;&lt;&amp;&gt;&quot;`,
+		`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
+		`The special characters are: <, >, &, ' and "`,
+	}
+	for _, s := range ss {
+		if got := UnescapeString(EscapeString(s)); got != s {
+			t.Errorf("got %q want %q", got, s)
+		}
+	}
+}
+
+func TestBufAPI(t *testing.T) {
+	s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
+	z := NewTokenizer(bytes.NewBufferString(s))
+	var result bytes.Buffer
+	depth := 0
+loop:
+	for {
+		tt := z.Next()
+		switch tt {
+		case ErrorToken:
+			if z.Err() != io.EOF {
+				t.Error(z.Err())
+			}
+			break loop
+		case TextToken:
+			if depth > 0 {
+				result.Write(z.Text())
+			}
+		case StartTagToken, EndTagToken:
+			tn, _ := z.TagName()
+			if len(tn) == 1 && tn[0] == 'a' {
+				if tt == StartTagToken {
+					depth++
+				} else {
+					depth--
+				}
+			}
+		}
+	}
+	u := "14567"
+	v := string(result.Bytes())
+	if u != v {
+		t.Errorf("TestBufAPI: want %q got %q", u, v)
+	}
+}
+
+func TestConvertNewlines(t *testing.T) {
+	testCases := map[string]string{
+		"Mac\rDOS\r\nUnix\n":    "Mac\nDOS\nUnix\n",
+		"Unix\nMac\rDOS\r\n":    "Unix\nMac\nDOS\n",
+		"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
+		"":         "",
+		"\n":       "\n",
+		"\n\r":     "\n\n",
+		"\r":       "\n",
+		"\r\n":     "\n",
+		"\r\n\n":   "\n\n",
+		"\r\n\r":   "\n\n",
+		"\r\n\r\n": "\n\n",
+		"\r\r":     "\n\n",
+		"\r\r\n":   "\n\n",
+		"\r\r\n\n": "\n\n\n",
+		"\r\r\r\n": "\n\n\n",
+		"\r \n":    "\n \n",
+		"xyz":      "xyz",
+	}
+	for in, want := range testCases {
+		if got := string(convertNewlines([]byte(in))); got != want {
+			t.Errorf("input %q: got %q, want %q", in, got, want)
+		}
+	}
+}
+
+const (
+	rawLevel = iota
+	lowLevel
+	highLevel
+)
+
+func benchmarkTokenizer(b *testing.B, level int) {
+	buf, err := ioutil.ReadFile("testdata/go1.html")
+	if err != nil {
+		b.Fatalf("could not read testdata/go1.html: %v", err)
+	}
+	b.SetBytes(int64(len(buf)))
+	runtime.GC()
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		z := NewTokenizer(bytes.NewBuffer(buf))
+		for {
+			tt := z.Next()
+			if tt == ErrorToken {
+				if err := z.Err(); err != nil && err != io.EOF {
+					b.Fatalf("tokenizer error: %v", err)
+				}
+				break
+			}
+			switch level {
+			case rawLevel:
+				// Calling z.Raw just returns the raw bytes of the token. It does
+				// not unescape &lt; to <, or lower-case tag names and attribute keys.
+				z.Raw()
+			case lowLevel:
+				// Caling z.Text, z.TagName and z.TagAttr returns []byte values
+				// whose contents may change on the next call to z.Next.
+				switch tt {
+				case TextToken, CommentToken, DoctypeToken:
+					z.Text()
+				case StartTagToken, SelfClosingTagToken:
+					_, more := z.TagName()
+					for more {
+						_, _, more = z.TagAttr()
+					}
+				case EndTagToken:
+					z.TagName()
+				}
+			case highLevel:
+				// Calling z.Token converts []byte values to strings whose validity
+				// extend beyond the next call to z.Next.
+				z.Token()
+			}
+		}
+	}
+}
+
+func BenchmarkRawLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, rawLevel) }
+func BenchmarkLowLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, lowLevel) }
+func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }