unicode: add case folding tables

R=r, r
CC=golang-dev
https://golang.org/cl/4571074
diff --git a/src/pkg/unicode/letter.go b/src/pkg/unicode/letter.go
index a0c55bb..dbd8638 100644
--- a/src/pkg/unicode/letter.go
+++ b/src/pkg/unicode/letter.go
@@ -275,3 +275,52 @@
 	}
 	return r
 }
+
+// caseOrbit is defined in tables.go as []foldPair.  Right now all the
+// entries fit in uint16, so use uint16.  If that changes, compilation
+// will fail (the constants in the composite literal will not fit in uint16)
+// and the types here can change to uint32.
+type foldPair struct {
+	From uint16
+	To   uint16
+}
+
+// SimpleFold iterates over Unicode code points equivalent under
+// the Unicode-defined simple case folding.  Among the code points
+// equivalent to rune (including rune itself), SimpleFold returns the
+// smallest r >= rune if one exists, or else the smallest r >= 0. 
+//
+// For example:
+//	SimpleFold('A') = 'a'
+//	SimpleFold('a') = 'A'
+//
+//	SimpleFold('K') = 'k'
+//	SimpleFold('k') = '\u212A' (Kelvin symbol, K)
+//	SimpleFold('\u212A') = 'K'
+//
+//	SimpleFold('1') = '1'
+//
+func SimpleFold(rune int) int {
+	// Consult caseOrbit table for special cases.
+	lo := 0
+	hi := len(caseOrbit)
+	for lo < hi {
+		m := lo + (hi-lo)/2
+		if int(caseOrbit[m].From) < rune {
+			lo = m + 1
+		} else {
+			hi = m
+		}
+	}
+	if lo < len(caseOrbit) && int(caseOrbit[lo].From) == rune {
+		return int(caseOrbit[lo].To)
+	}
+
+	// No folding specified.  This is a one- or two-element
+	// equivalence class containing rune and ToLower(rune)
+	// and ToUpper(rune) if they are different from rune.
+	if l := ToLower(rune); l != rune {
+		return l
+	}
+	return ToUpper(rune)
+}
diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go
index 4c24ffc..c4e26df 100644
--- a/src/pkg/unicode/letter_test.go
+++ b/src/pkg/unicode/letter_test.go
@@ -376,3 +376,49 @@
 		}
 	}
 }
+
+var simpleFoldTests = []string{
+	// SimpleFold could order its returned slices in any order it wants,
+	// but we know it orders them in increasing order starting at in
+	// and looping around from MaxRune to 0.
+
+	// Easy cases.
+	"Aa",
+	"aA",
+	"δΔ",
+	"Δδ",
+
+	// ASCII special cases.
+	"KkK",
+	"kKK",
+	"KKk",
+	"Ssſ",
+	"sſS",
+	"ſSs",
+
+	// Non-ASCII special cases.
+	"ρϱΡ",
+	"ϱΡρ",
+	"Ρρϱ",
+	"ͅΙιι",
+	"Ιιιͅ",
+	"ιιͅΙ",
+	"ιͅΙι",
+
+	// Extra special cases: has lower/upper but no case fold.
+	"İ",
+	"ı",
+}
+
+func TestSimpleFold(t *testing.T) {
+	for _, tt := range simpleFoldTests {
+		cycle := []int(tt)
+		rune := cycle[len(cycle)-1]
+		for _, out := range cycle {
+			if r := SimpleFold(rune); r != out {
+				t.Errorf("SimpleFold(%#U) = %#U, want %#U", rune, r, out)
+			}
+			rune = out
+		}
+	}
+}
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index 39c7121..421d294 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -24,15 +24,18 @@
 func main() {
 	flag.Parse()
 	loadChars() // always needed
+	loadCasefold()
 	printCategories()
 	printScriptOrProperty(false)
 	printScriptOrProperty(true)
 	printCases()
 	printLatinProperties()
+	printCasefold()
 	printSizes()
 }
 
 var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
+var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
 var url = flag.String("url",
 	"http://www.unicode.org/Public/6.0.0/ucd/",
 	"URL of Unicode database directory")
@@ -119,6 +122,8 @@
 	upperCase int
 	lowerCase int
 	titleCase int
+	foldCase  int // simple case folding
+	caseOrbit int // next in simple case folding orbit
 }
 
 // Scripts.txt has form:
@@ -308,8 +313,53 @@
 	resp.Body.Close()
 }
 
+func loadCasefold() {
+	if *casefoldingURL == "" {
+		flag.Set("casefolding", *url+"CaseFolding.txt")
+	}
+	resp, err := http.Get(*casefoldingURL)
+	if err != nil {
+		logger.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		logger.Fatal("bad GET status for CaseFolding.txt", resp.Status)
+	}
+	input := bufio.NewReader(resp.Body)
+	for {
+		line, err := input.ReadString('\n')
+		if err != nil {
+			if err == os.EOF {
+				break
+			}
+			logger.Fatal(err)
+		}
+		if line[0] == '#' {
+			continue
+		}
+		field := strings.Split(line, "; ", -1)
+		if len(field) != 4 {
+			logger.Fatalf("CaseFolding.txt %.5s...: %d fields (expected %d)\n", line, len(field), 4)
+		}
+		kind := field[1]
+		if kind != "C" && kind != "S" {
+			// Only care about 'common' and 'simple' foldings.
+			continue
+		}
+		p1, err := strconv.Btoui64(field[0], 16)
+		if err != nil {
+			logger.Fatalf("CaseFolding.txt %.5s...: %s", line, err)
+		}
+		p2, err := strconv.Btoui64(field[2], 16)
+		if err != nil {
+			logger.Fatalf("CaseFolding.txt %.5s...: %s", line, err)
+		}
+		chars[p1].foldCase = int(p2)
+	}
+	resp.Body.Close()
+}
+
 const progHeader = `// Generated by running
-//	maketables --tables=%s --data=%s
+//	maketables --tables=%s --data=%s --casefolding=%s
 // DO NOT EDIT
 
 package unicode
@@ -330,7 +380,7 @@
 		fullCategoryTest(list)
 		return
 	}
-	fmt.Printf(progHeader, *tablelist, *dataURL)
+	fmt.Printf(progHeader, *tablelist, *dataURL, *casefoldingURL)
 
 	fmt.Println("// Version is the Unicode edition from which the tables are derived.")
 	fmt.Printf("const Version = %q\n\n", version())
@@ -837,13 +887,13 @@
 	}
 	fmt.Printf(
 		"// Generated by running\n"+
-			"//	maketables --data=%s\n"+
+			"//	maketables --data=%s --casefolding=%s\n"+
 			"// DO NOT EDIT\n\n"+
 			"// CaseRanges is the table describing case mappings for all letters with\n"+
 			"// non-self mappings.\n"+
 			"var CaseRanges = _CaseRanges\n"+
 			"var _CaseRanges = []CaseRange {\n",
-		*dataURL)
+		*dataURL, *casefoldingURL)
 
 	var startState *caseState    // the start of a run; nil for not active
 	var prevState = &caseState{} // the state of the previous character
@@ -946,13 +996,246 @@
 		if code == ' ' {
 			property = "pZ | pp"
 		}
-		fmt.Printf("\t0x%.2X: %s, // %q\n", code, property, code)
+		fmt.Printf("\t0x%02X: %s, // %q\n", code, property, code)
 	}
-	fmt.Println("}")
+	fmt.Printf("}\n\n")
 }
 
-var range16Count = 0 // Number of entries in the 16-bit range tables.
-var range32Count = 0 // Number of entries in the 32-bit range tables.
+func printCasefold() {
+	// Build list of case-folding groups attached to each canonical folded char (typically lower case).
+	var caseOrbit = make([][]int, MaxChar+1)
+	for i := range chars {
+		c := &chars[i]
+		if c.foldCase == 0 {
+			continue
+		}
+		orb := caseOrbit[c.foldCase]
+		if orb == nil {
+			orb = append(orb, c.foldCase)
+		}
+		caseOrbit[c.foldCase] = append(orb, i)
+	}
+
+	// Insert explicit 1-element groups when assuming [lower, upper] would be wrong.
+	for i := range chars {
+		c := &chars[i]
+		f := c.foldCase
+		if f == 0 {
+			f = i
+		}
+		orb := caseOrbit[f]
+		if orb == nil && (c.upperCase != 0 && c.upperCase != i || c.lowerCase != 0 && c.lowerCase != i) {
+			// Default assumption of [upper, lower] is wrong.
+			caseOrbit[i] = []int{i}
+		}
+	}
+
+	// Delete the groups for which assuming [lower, upper] is right.
+	for i, orb := range caseOrbit {
+		if len(orb) == 2 && chars[orb[0]].upperCase == orb[1] && chars[orb[1]].lowerCase == orb[0] {
+			caseOrbit[i] = nil
+		}
+	}
+
+	// Record orbit information in chars.
+	for _, orb := range caseOrbit {
+		if orb == nil {
+			continue
+		}
+		sort.SortInts(orb)
+		c := orb[len(orb)-1]
+		for _, d := range orb {
+			chars[c].caseOrbit = d
+			c = d
+		}
+	}
+
+	printCaseOrbit()
+
+	// Tables of category and script folding exceptions: code points
+	// that must be added when interpreting a particular category/script
+	// in a case-folding context.
+	cat := make(map[string]map[int]bool)
+	for name := range category {
+		if x := foldExceptions(inCategory(name)); len(x) > 0 {
+			cat[name] = x
+		}
+	}
+
+	scr := make(map[string]map[int]bool)
+	for name := range scripts {
+		if x := foldExceptions(inScript(name)); len(x) > 0 {
+			cat[name] = x
+		}
+	}
+
+	printCatFold("FoldCategory", cat)
+	printCatFold("FoldScript", scr)
+}
+
+// inCategory returns a list of all the runes in the category.
+func inCategory(name string) []int {
+	var x []int
+	for i := range chars {
+		c := &chars[i]
+		if c.category == name || len(name) == 1 && len(c.category) > 1 && c.category[0] == name[0] {
+			x = append(x, i)
+		}
+	}
+	return x
+}
+
+// inScript returns a list of all the runes in the script.
+func inScript(name string) []int {
+	var x []int
+	for _, s := range scripts[name] {
+		for c := s.lo; c <= s.hi; c++ {
+			x = append(x, int(c))
+		}
+	}
+	return x
+}
+
+// foldExceptions returns a list of all the runes fold-equivalent
+// to runes in class but not in class themselves.
+func foldExceptions(class []int) map[int]bool {
+	// Create map containing class and all fold-equivalent chars.
+	m := make(map[int]bool)
+	for _, r := range class {
+		c := &chars[r]
+		if c.caseOrbit == 0 {
+			// Just upper and lower.
+			if u := c.upperCase; u != 0 {
+				m[u] = true
+			}
+			if l := c.lowerCase; l != 0 {
+				m[l] = true
+			}
+			m[r] = true
+			continue
+		}
+		// Otherwise walk orbit.
+		r0 := r
+		for {
+			m[r] = true
+			r = chars[r].caseOrbit
+			if r == r0 {
+				break
+			}
+		}
+	}
+
+	// Remove class itself.
+	for _, r := range class {
+		m[r] = false, false
+	}
+
+	// What's left is the exceptions.
+	return m
+}
+
+var comment = map[string]string{
+	"FoldCategory": "// FoldCategory maps a category name to a table of\n" +
+		"// code points outside the category that are equivalent under\n" +
+		"// simple case folding to code points inside the category.\n" +
+		"// If there is no entry for a category name, there are no such points.\n",
+
+	"FoldScript": "// FoldScript maps a script name to a table of\n" +
+		"// code points outside the script that are equivalent under\n" +
+		"// simple case folding to code points inside the script.\n" +
+		"// If there is no entry for a script name, there are no such points.\n",
+}
+
+func printCaseOrbit() {
+	if *test {
+		for i := range chars {
+			c := &chars[i]
+			f := c.caseOrbit
+			if f == 0 {
+				if c.lowerCase != i && c.lowerCase != 0 {
+					f = c.lowerCase
+				} else if c.upperCase != i && c.upperCase != 0 {
+					f = c.upperCase
+				} else {
+					f = i
+				}
+			}
+			if g := unicode.SimpleFold(i); g != f {
+				fmt.Fprintf(os.Stderr, "unicode.SimpleFold(%#U) = %#U, want %#U\n", i, g, f)
+			}
+		}
+		return
+	}
+
+	fmt.Printf("var caseOrbit = []foldPair{\n")
+	for i := range chars {
+		c := &chars[i]
+		if c.caseOrbit != 0 {
+			fmt.Printf("\t{0x%04X, 0x%04X},\n", i, c.caseOrbit)
+			foldPairCount++
+		}
+	}
+	fmt.Printf("}\n\n")
+}
+
+func printCatFold(name string, m map[string]map[int]bool) {
+	if *test {
+		var pkgMap map[string]*unicode.RangeTable
+		if name == "FoldCategory" {
+			pkgMap = unicode.FoldCategory
+		} else {
+			pkgMap = unicode.FoldScript
+		}
+		if len(pkgMap) != len(m) {
+			fmt.Fprintf(os.Stderr, "unicode.%s has %d elements, want %d\n", name, len(pkgMap), len(m))
+			return
+		}
+		for k, v := range m {
+			t, ok := pkgMap[k]
+			if !ok {
+				fmt.Fprintf(os.Stderr, "unicode.%s[%q] missing\n", name, k)
+				continue
+			}
+			n := 0
+			for _, r := range t.R16 {
+				for c := int(r.Lo); c <= int(r.Hi); c += int(r.Stride) {
+					if !v[c] {
+						fmt.Fprintf(os.Stderr, "unicode.%s[%q] contains %#U, should not\n", name, k, c)
+					}
+					n++
+				}
+			}
+			for _, r := range t.R32 {
+				for c := int(r.Lo); c <= int(r.Hi); c += int(r.Stride) {
+					if !v[c] {
+						fmt.Fprintf(os.Stderr, "unicode.%s[%q] contains %#U, should not\n", name, k, c)
+					}
+					n++
+				}
+			}
+			if n != len(v) {
+				fmt.Fprintf(os.Stderr, "unicode.%s[%q] has %d code points, want %d\n", name, k, n, len(v))
+			}
+		}
+		return
+	}
+
+	fmt.Print(comment[name])
+	fmt.Printf("var %s = map[string]*RangeTable{\n", name)
+	for name := range m {
+		fmt.Printf("\t%q: fold%s,\n", name, name)
+	}
+	fmt.Printf("}\n\n")
+	for name, class := range m {
+		dumpRange(
+			fmt.Sprintf("var fold%s = &RangeTable{\n", name),
+			func(code int) bool { return class[code] })
+	}
+}
+
+var range16Count = 0  // Number of entries in the 16-bit range tables.
+var range32Count = 0  // Number of entries in the 32-bit range tables.
+var foldPairCount = 0 // Number of fold pairs in the exception tables.
 
 func printSizes() {
 	if *test {
@@ -963,4 +1246,6 @@
 	range16Bytes := range16Count * 3 * 2
 	range32Bytes := range32Count * 3 * 4
 	fmt.Printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes)
+	fmt.Println()
+	fmt.Printf("// Fold orbit bytes: %d pairs, %d bytes\n", foldPairCount, foldPairCount*2*2)
 }
diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go
index 32681a8..a75011a 100644
--- a/src/pkg/unicode/tables.go
+++ b/src/pkg/unicode/tables.go
@@ -1,5 +1,5 @@
 // Generated by running
-//	maketables --tables=all --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
+//	maketables --tables=all --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.0.0/ucd/CaseFolding.txt
 // DO NOT EDIT
 
 package unicode
@@ -5150,7 +5150,7 @@
 )
 
 // Generated by running
-//	maketables --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
+//	maketables --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.0.0/ucd/CaseFolding.txt
 // DO NOT EDIT
 
 // CaseRanges is the table describing case mappings for all letters with
@@ -5539,7 +5539,7 @@
 	0x7C: pS | pp,  // '|'
 	0x7D: pP | pp,  // '}'
 	0x7E: pS | pp,  // '~'
-	0x7F: pC,       // '\x7f'
+	0x7F: pC,       // '\u007f'
 	0x80: pC,       // '\u0080'
 	0x81: pC,       // '\u0081'
 	0x82: pC,       // '\u0082'
@@ -5573,102 +5573,534 @@
 	0x9E: pC,       // '\u009e'
 	0x9F: pC,       // '\u009f'
 	0xA0: pZ,       // '\u00a0'
-	0xA1: pP | pp,  // '\u00a1'
-	0xA2: pS | pp,  // '\u00a2'
-	0xA3: pS | pp,  // '\u00a3'
-	0xA4: pS | pp,  // '\u00a4'
-	0xA5: pS | pp,  // '\u00a5'
-	0xA6: pS | pp,  // '\u00a6'
-	0xA7: pS | pp,  // '\u00a7'
-	0xA8: pS | pp,  // '\u00a8'
-	0xA9: pS | pp,  // '\u00a9'
-	0xAA: pLl | pp, // '\u00aa'
-	0xAB: pP | pp,  // '\u00ab'
-	0xAC: pS | pp,  // '\u00ac'
+	0xA1: pP | pp,  // '¡'
+	0xA2: pS | pp,  // '¢'
+	0xA3: pS | pp,  // '£'
+	0xA4: pS | pp,  // '¤'
+	0xA5: pS | pp,  // '¥'
+	0xA6: pS | pp,  // '¦'
+	0xA7: pS | pp,  // '§'
+	0xA8: pS | pp,  // '¨'
+	0xA9: pS | pp,  // '©'
+	0xAA: pLl | pp, // 'ª'
+	0xAB: pP | pp,  // '«'
+	0xAC: pS | pp,  // '¬'
 	0xAD: 0,        // '\u00ad'
-	0xAE: pS | pp,  // '\u00ae'
-	0xAF: pS | pp,  // '\u00af'
-	0xB0: pS | pp,  // '\u00b0'
-	0xB1: pS | pp,  // '\u00b1'
-	0xB2: pN | pp,  // '\u00b2'
-	0xB3: pN | pp,  // '\u00b3'
-	0xB4: pS | pp,  // '\u00b4'
-	0xB5: pLl | pp, // '\u00b5'
-	0xB6: pS | pp,  // '\u00b6'
-	0xB7: pP | pp,  // '\u00b7'
-	0xB8: pS | pp,  // '\u00b8'
-	0xB9: pN | pp,  // '\u00b9'
-	0xBA: pLl | pp, // '\u00ba'
-	0xBB: pP | pp,  // '\u00bb'
-	0xBC: pN | pp,  // '\u00bc'
-	0xBD: pN | pp,  // '\u00bd'
-	0xBE: pN | pp,  // '\u00be'
-	0xBF: pP | pp,  // '\u00bf'
-	0xC0: pLu | pp, // '\u00c0'
-	0xC1: pLu | pp, // '\u00c1'
-	0xC2: pLu | pp, // '\u00c2'
-	0xC3: pLu | pp, // '\u00c3'
-	0xC4: pLu | pp, // '\u00c4'
-	0xC5: pLu | pp, // '\u00c5'
-	0xC6: pLu | pp, // '\u00c6'
-	0xC7: pLu | pp, // '\u00c7'
-	0xC8: pLu | pp, // '\u00c8'
-	0xC9: pLu | pp, // '\u00c9'
-	0xCA: pLu | pp, // '\u00ca'
-	0xCB: pLu | pp, // '\u00cb'
-	0xCC: pLu | pp, // '\u00cc'
-	0xCD: pLu | pp, // '\u00cd'
-	0xCE: pLu | pp, // '\u00ce'
-	0xCF: pLu | pp, // '\u00cf'
-	0xD0: pLu | pp, // '\u00d0'
-	0xD1: pLu | pp, // '\u00d1'
-	0xD2: pLu | pp, // '\u00d2'
-	0xD3: pLu | pp, // '\u00d3'
-	0xD4: pLu | pp, // '\u00d4'
-	0xD5: pLu | pp, // '\u00d5'
-	0xD6: pLu | pp, // '\u00d6'
-	0xD7: pS | pp,  // '\u00d7'
-	0xD8: pLu | pp, // '\u00d8'
-	0xD9: pLu | pp, // '\u00d9'
-	0xDA: pLu | pp, // '\u00da'
-	0xDB: pLu | pp, // '\u00db'
-	0xDC: pLu | pp, // '\u00dc'
-	0xDD: pLu | pp, // '\u00dd'
-	0xDE: pLu | pp, // '\u00de'
-	0xDF: pLl | pp, // '\u00df'
-	0xE0: pLl | pp, // '\u00e0'
-	0xE1: pLl | pp, // '\u00e1'
-	0xE2: pLl | pp, // '\u00e2'
-	0xE3: pLl | pp, // '\u00e3'
-	0xE4: pLl | pp, // '\u00e4'
-	0xE5: pLl | pp, // '\u00e5'
-	0xE6: pLl | pp, // '\u00e6'
-	0xE7: pLl | pp, // '\u00e7'
-	0xE8: pLl | pp, // '\u00e8'
-	0xE9: pLl | pp, // '\u00e9'
-	0xEA: pLl | pp, // '\u00ea'
-	0xEB: pLl | pp, // '\u00eb'
-	0xEC: pLl | pp, // '\u00ec'
-	0xED: pLl | pp, // '\u00ed'
-	0xEE: pLl | pp, // '\u00ee'
-	0xEF: pLl | pp, // '\u00ef'
-	0xF0: pLl | pp, // '\u00f0'
-	0xF1: pLl | pp, // '\u00f1'
-	0xF2: pLl | pp, // '\u00f2'
-	0xF3: pLl | pp, // '\u00f3'
-	0xF4: pLl | pp, // '\u00f4'
-	0xF5: pLl | pp, // '\u00f5'
-	0xF6: pLl | pp, // '\u00f6'
-	0xF7: pS | pp,  // '\u00f7'
-	0xF8: pLl | pp, // '\u00f8'
-	0xF9: pLl | pp, // '\u00f9'
-	0xFA: pLl | pp, // '\u00fa'
-	0xFB: pLl | pp, // '\u00fb'
-	0xFC: pLl | pp, // '\u00fc'
-	0xFD: pLl | pp, // '\u00fd'
-	0xFE: pLl | pp, // '\u00fe'
-	0xFF: pLl | pp, // '\u00ff'
+	0xAE: pS | pp,  // '®'
+	0xAF: pS | pp,  // '¯'
+	0xB0: pS | pp,  // '°'
+	0xB1: pS | pp,  // '±'
+	0xB2: pN | pp,  // '²'
+	0xB3: pN | pp,  // '³'
+	0xB4: pS | pp,  // '´'
+	0xB5: pLl | pp, // 'µ'
+	0xB6: pS | pp,  // '¶'
+	0xB7: pP | pp,  // '·'
+	0xB8: pS | pp,  // '¸'
+	0xB9: pN | pp,  // '¹'
+	0xBA: pLl | pp, // 'º'
+	0xBB: pP | pp,  // '»'
+	0xBC: pN | pp,  // '¼'
+	0xBD: pN | pp,  // '½'
+	0xBE: pN | pp,  // '¾'
+	0xBF: pP | pp,  // '¿'
+	0xC0: pLu | pp, // 'À'
+	0xC1: pLu | pp, // 'Á'
+	0xC2: pLu | pp, // 'Â'
+	0xC3: pLu | pp, // 'Ã'
+	0xC4: pLu | pp, // 'Ä'
+	0xC5: pLu | pp, // 'Å'
+	0xC6: pLu | pp, // 'Æ'
+	0xC7: pLu | pp, // 'Ç'
+	0xC8: pLu | pp, // 'È'
+	0xC9: pLu | pp, // 'É'
+	0xCA: pLu | pp, // 'Ê'
+	0xCB: pLu | pp, // 'Ë'
+	0xCC: pLu | pp, // 'Ì'
+	0xCD: pLu | pp, // 'Í'
+	0xCE: pLu | pp, // 'Î'
+	0xCF: pLu | pp, // 'Ï'
+	0xD0: pLu | pp, // 'Ð'
+	0xD1: pLu | pp, // 'Ñ'
+	0xD2: pLu | pp, // 'Ò'
+	0xD3: pLu | pp, // 'Ó'
+	0xD4: pLu | pp, // 'Ô'
+	0xD5: pLu | pp, // 'Õ'
+	0xD6: pLu | pp, // 'Ö'
+	0xD7: pS | pp,  // '×'
+	0xD8: pLu | pp, // 'Ø'
+	0xD9: pLu | pp, // 'Ù'
+	0xDA: pLu | pp, // 'Ú'
+	0xDB: pLu | pp, // 'Û'
+	0xDC: pLu | pp, // 'Ü'
+	0xDD: pLu | pp, // 'Ý'
+	0xDE: pLu | pp, // 'Þ'
+	0xDF: pLl | pp, // 'ß'
+	0xE0: pLl | pp, // 'à'
+	0xE1: pLl | pp, // 'á'
+	0xE2: pLl | pp, // 'â'
+	0xE3: pLl | pp, // 'ã'
+	0xE4: pLl | pp, // 'ä'
+	0xE5: pLl | pp, // 'å'
+	0xE6: pLl | pp, // 'æ'
+	0xE7: pLl | pp, // 'ç'
+	0xE8: pLl | pp, // 'è'
+	0xE9: pLl | pp, // 'é'
+	0xEA: pLl | pp, // 'ê'
+	0xEB: pLl | pp, // 'ë'
+	0xEC: pLl | pp, // 'ì'
+	0xED: pLl | pp, // 'í'
+	0xEE: pLl | pp, // 'î'
+	0xEF: pLl | pp, // 'ï'
+	0xF0: pLl | pp, // 'ð'
+	0xF1: pLl | pp, // 'ñ'
+	0xF2: pLl | pp, // 'ò'
+	0xF3: pLl | pp, // 'ó'
+	0xF4: pLl | pp, // 'ô'
+	0xF5: pLl | pp, // 'õ'
+	0xF6: pLl | pp, // 'ö'
+	0xF7: pS | pp,  // '÷'
+	0xF8: pLl | pp, // 'ø'
+	0xF9: pLl | pp, // 'ù'
+	0xFA: pLl | pp, // 'ú'
+	0xFB: pLl | pp, // 'û'
+	0xFC: pLl | pp, // 'ü'
+	0xFD: pLl | pp, // 'ý'
+	0xFE: pLl | pp, // 'þ'
+	0xFF: pLl | pp, // 'ÿ'
 }
 
-// Range entries: 3190 16-bit, 657 32-bit, 3847 total.
-// Range bytes: 19140 16-bit, 7884 32-bit, 27024 total.
+var caseOrbit = []foldPair{
+	{0x004B, 0x006B},
+	{0x0053, 0x0073},
+	{0x006B, 0x212A},
+	{0x0073, 0x017F},
+	{0x00B5, 0x039C},
+	{0x00C5, 0x00E5},
+	{0x00DF, 0x1E9E},
+	{0x00E5, 0x212B},
+	{0x0130, 0x0130},
+	{0x0131, 0x0131},
+	{0x017F, 0x0053},
+	{0x01C4, 0x01C5},
+	{0x01C5, 0x01C6},
+	{0x01C6, 0x01C4},
+	{0x01C7, 0x01C8},
+	{0x01C8, 0x01C9},
+	{0x01C9, 0x01C7},
+	{0x01CA, 0x01CB},
+	{0x01CB, 0x01CC},
+	{0x01CC, 0x01CA},
+	{0x01F1, 0x01F2},
+	{0x01F2, 0x01F3},
+	{0x01F3, 0x01F1},
+	{0x0345, 0x0399},
+	{0x0392, 0x03B2},
+	{0x0395, 0x03B5},
+	{0x0398, 0x03B8},
+	{0x0399, 0x03B9},
+	{0x039A, 0x03BA},
+	{0x039C, 0x03BC},
+	{0x03A0, 0x03C0},
+	{0x03A1, 0x03C1},
+	{0x03A3, 0x03C2},
+	{0x03A6, 0x03C6},
+	{0x03A9, 0x03C9},
+	{0x03B2, 0x03D0},
+	{0x03B5, 0x03F5},
+	{0x03B8, 0x03D1},
+	{0x03B9, 0x1FBE},
+	{0x03BA, 0x03F0},
+	{0x03BC, 0x00B5},
+	{0x03C0, 0x03D6},
+	{0x03C1, 0x03F1},
+	{0x03C2, 0x03C3},
+	{0x03C3, 0x03A3},
+	{0x03C6, 0x03D5},
+	{0x03C9, 0x2126},
+	{0x03D0, 0x0392},
+	{0x03D1, 0x03F4},
+	{0x03D5, 0x03A6},
+	{0x03D6, 0x03A0},
+	{0x03F0, 0x039A},
+	{0x03F1, 0x03A1},
+	{0x03F4, 0x0398},
+	{0x03F5, 0x0395},
+	{0x1E60, 0x1E61},
+	{0x1E61, 0x1E9B},
+	{0x1E9B, 0x1E60},
+	{0x1E9E, 0x00DF},
+	{0x1FBE, 0x0345},
+	{0x2126, 0x03A9},
+	{0x212A, 0x004B},
+	{0x212B, 0x00C5},
+	{0x2160, 0x2170},
+	{0x2161, 0x2171},
+	{0x2162, 0x2172},
+	{0x2163, 0x2173},
+	{0x2164, 0x2174},
+	{0x2165, 0x2175},
+	{0x2166, 0x2176},
+	{0x2167, 0x2177},
+	{0x2168, 0x2178},
+	{0x2169, 0x2179},
+	{0x216A, 0x217A},
+	{0x216B, 0x217B},
+	{0x216C, 0x217C},
+	{0x216D, 0x217D},
+	{0x216E, 0x217E},
+	{0x216F, 0x217F},
+	{0x2170, 0x2160},
+	{0x2171, 0x2161},
+	{0x2172, 0x2162},
+	{0x2173, 0x2163},
+	{0x2174, 0x2164},
+	{0x2175, 0x2165},
+	{0x2176, 0x2166},
+	{0x2177, 0x2167},
+	{0x2178, 0x2168},
+	{0x2179, 0x2169},
+	{0x217A, 0x216A},
+	{0x217B, 0x216B},
+	{0x217C, 0x216C},
+	{0x217D, 0x216D},
+	{0x217E, 0x216E},
+	{0x217F, 0x216F},
+	{0x24B6, 0x24D0},
+	{0x24B7, 0x24D1},
+	{0x24B8, 0x24D2},
+	{0x24B9, 0x24D3},
+	{0x24BA, 0x24D4},
+	{0x24BB, 0x24D5},
+	{0x24BC, 0x24D6},
+	{0x24BD, 0x24D7},
+	{0x24BE, 0x24D8},
+	{0x24BF, 0x24D9},
+	{0x24C0, 0x24DA},
+	{0x24C1, 0x24DB},
+	{0x24C2, 0x24DC},
+	{0x24C3, 0x24DD},
+	{0x24C4, 0x24DE},
+	{0x24C5, 0x24DF},
+	{0x24C6, 0x24E0},
+	{0x24C7, 0x24E1},
+	{0x24C8, 0x24E2},
+	{0x24C9, 0x24E3},
+	{0x24CA, 0x24E4},
+	{0x24CB, 0x24E5},
+	{0x24CC, 0x24E6},
+	{0x24CD, 0x24E7},
+	{0x24CE, 0x24E8},
+	{0x24CF, 0x24E9},
+	{0x24D0, 0x24B6},
+	{0x24D1, 0x24B7},
+	{0x24D2, 0x24B8},
+	{0x24D3, 0x24B9},
+	{0x24D4, 0x24BA},
+	{0x24D5, 0x24BB},
+	{0x24D6, 0x24BC},
+	{0x24D7, 0x24BD},
+	{0x24D8, 0x24BE},
+	{0x24D9, 0x24BF},
+	{0x24DA, 0x24C0},
+	{0x24DB, 0x24C1},
+	{0x24DC, 0x24C2},
+	{0x24DD, 0x24C3},
+	{0x24DE, 0x24C4},
+	{0x24DF, 0x24C5},
+	{0x24E0, 0x24C6},
+	{0x24E1, 0x24C7},
+	{0x24E2, 0x24C8},
+	{0x24E3, 0x24C9},
+	{0x24E4, 0x24CA},
+	{0x24E5, 0x24CB},
+	{0x24E6, 0x24CC},
+	{0x24E7, 0x24CD},
+	{0x24E8, 0x24CE},
+	{0x24E9, 0x24CF},
+}
+
+// FoldCategory maps a category name to a table of
+// code points outside the category that are equivalent under
+// simple case folding to code points inside the category.
+// If there is no entry for a category name, there are no such points.
+var FoldCategory = map[string]*RangeTable{
+	"Ll":        foldLl,
+	"Inherited": foldInherited,
+	"M":         foldM,
+	"L":         foldL,
+	"Mn":        foldMn,
+	"Common":    foldCommon,
+	"Greek":     foldGreek,
+	"Lu":        foldLu,
+	"Lt":        foldLt,
+}
+
+var foldLl = &RangeTable{
+	R16: []Range16{
+		{0x0041, 0x005a, 1},
+		{0x00c0, 0x00d6, 1},
+		{0x00d8, 0x00de, 1},
+		{0x0100, 0x012e, 2},
+		{0x0132, 0x0136, 2},
+		{0x0139, 0x0147, 2},
+		{0x014a, 0x0178, 2},
+		{0x0179, 0x017d, 2},
+		{0x0181, 0x0182, 1},
+		{0x0184, 0x0186, 2},
+		{0x0187, 0x0189, 2},
+		{0x018a, 0x018b, 1},
+		{0x018e, 0x0191, 1},
+		{0x0193, 0x0194, 1},
+		{0x0196, 0x0198, 1},
+		{0x019c, 0x019d, 1},
+		{0x019f, 0x01a0, 1},
+		{0x01a2, 0x01a6, 2},
+		{0x01a7, 0x01a9, 2},
+		{0x01ac, 0x01ae, 2},
+		{0x01af, 0x01b1, 2},
+		{0x01b2, 0x01b3, 1},
+		{0x01b5, 0x01b7, 2},
+		{0x01b8, 0x01bc, 4},
+		{0x01c4, 0x01c5, 1},
+		{0x01c7, 0x01c8, 1},
+		{0x01ca, 0x01cb, 1},
+		{0x01cd, 0x01db, 2},
+		{0x01de, 0x01ee, 2},
+		{0x01f1, 0x01f2, 1},
+		{0x01f4, 0x01f6, 2},
+		{0x01f7, 0x01f8, 1},
+		{0x01fa, 0x0232, 2},
+		{0x023a, 0x023b, 1},
+		{0x023d, 0x023e, 1},
+		{0x0241, 0x0243, 2},
+		{0x0244, 0x0246, 1},
+		{0x0248, 0x024e, 2},
+		{0x0345, 0x0370, 43},
+		{0x0372, 0x0376, 4},
+		{0x0386, 0x0388, 2},
+		{0x0389, 0x038a, 1},
+		{0x038c, 0x038e, 2},
+		{0x038f, 0x0391, 2},
+		{0x0392, 0x03a1, 1},
+		{0x03a3, 0x03ab, 1},
+		{0x03cf, 0x03d8, 9},
+		{0x03da, 0x03ee, 2},
+		{0x03f4, 0x03f7, 3},
+		{0x03f9, 0x03fa, 1},
+		{0x03fd, 0x042f, 1},
+		{0x0460, 0x0480, 2},
+		{0x048a, 0x04c0, 2},
+		{0x04c1, 0x04cd, 2},
+		{0x04d0, 0x0526, 2},
+		{0x0531, 0x0556, 1},
+		{0x10a0, 0x10c5, 1},
+		{0x1e00, 0x1e94, 2},
+		{0x1e9e, 0x1efe, 2},
+		{0x1f08, 0x1f0f, 1},
+		{0x1f18, 0x1f1d, 1},
+		{0x1f28, 0x1f2f, 1},
+		{0x1f38, 0x1f3f, 1},
+		{0x1f48, 0x1f4d, 1},
+		{0x1f59, 0x1f5f, 2},
+		{0x1f68, 0x1f6f, 1},
+		{0x1f88, 0x1f8f, 1},
+		{0x1f98, 0x1f9f, 1},
+		{0x1fa8, 0x1faf, 1},
+		{0x1fb8, 0x1fbc, 1},
+		{0x1fc8, 0x1fcc, 1},
+		{0x1fd8, 0x1fdb, 1},
+		{0x1fe8, 0x1fec, 1},
+		{0x1ff8, 0x1ffc, 1},
+		{0x2126, 0x212a, 4},
+		{0x212b, 0x2132, 7},
+		{0x2183, 0x2c00, 2685},
+		{0x2c01, 0x2c2e, 1},
+		{0x2c60, 0x2c62, 2},
+		{0x2c63, 0x2c64, 1},
+		{0x2c67, 0x2c6d, 2},
+		{0x2c6e, 0x2c70, 1},
+		{0x2c72, 0x2c75, 3},
+		{0x2c7e, 0x2c80, 1},
+		{0x2c82, 0x2ce2, 2},
+		{0x2ceb, 0x2ced, 2},
+		{0xa640, 0xa66c, 2},
+		{0xa680, 0xa696, 2},
+		{0xa722, 0xa72e, 2},
+		{0xa732, 0xa76e, 2},
+		{0xa779, 0xa77d, 2},
+		{0xa77e, 0xa786, 2},
+		{0xa78b, 0xa78d, 2},
+		{0xa790, 0xa7a0, 16},
+		{0xa7a2, 0xa7a8, 2},
+		{0xff21, 0xff3a, 1},
+	},
+	R32: []Range32{
+		{0x10400, 0x10427, 1},
+	},
+}
+
+var foldInherited = &RangeTable{
+	R16: []Range16{
+		{0x0399, 0x03b9, 32},
+		{0x1fbe, 0x1fbe, 1},
+	},
+}
+
+var foldM = &RangeTable{
+	R16: []Range16{
+		{0x0399, 0x03b9, 32},
+		{0x1fbe, 0x1fbe, 1},
+	},
+}
+
+var foldL = &RangeTable{
+	R16: []Range16{
+		{0x0345, 0x0345, 1},
+	},
+}
+
+var foldMn = &RangeTable{
+	R16: []Range16{
+		{0x0399, 0x03b9, 32},
+		{0x1fbe, 0x1fbe, 1},
+	},
+}
+
+var foldCommon = &RangeTable{
+	R16: []Range16{
+		{0x039c, 0x03bc, 32},
+	},
+}
+
+var foldGreek = &RangeTable{
+	R16: []Range16{
+		{0x00b5, 0x0345, 656},
+	},
+}
+
+var foldLu = &RangeTable{
+	R16: []Range16{
+		{0x0061, 0x007a, 1},
+		{0x00b5, 0x00df, 42},
+		{0x00e0, 0x00f6, 1},
+		{0x00f8, 0x00ff, 1},
+		{0x0101, 0x012f, 2},
+		{0x0133, 0x0137, 2},
+		{0x013a, 0x0148, 2},
+		{0x014b, 0x0177, 2},
+		{0x017a, 0x017e, 2},
+		{0x017f, 0x0180, 1},
+		{0x0183, 0x0185, 2},
+		{0x0188, 0x018c, 4},
+		{0x0192, 0x0195, 3},
+		{0x0199, 0x019a, 1},
+		{0x019e, 0x01a1, 3},
+		{0x01a3, 0x01a5, 2},
+		{0x01a8, 0x01ad, 5},
+		{0x01b0, 0x01b4, 4},
+		{0x01b6, 0x01b9, 3},
+		{0x01bd, 0x01bf, 2},
+		{0x01c5, 0x01c6, 1},
+		{0x01c8, 0x01c9, 1},
+		{0x01cb, 0x01cc, 1},
+		{0x01ce, 0x01dc, 2},
+		{0x01dd, 0x01ef, 2},
+		{0x01f2, 0x01f3, 1},
+		{0x01f5, 0x01f9, 4},
+		{0x01fb, 0x021f, 2},
+		{0x0223, 0x0233, 2},
+		{0x023c, 0x023f, 3},
+		{0x0240, 0x0242, 2},
+		{0x0247, 0x024f, 2},
+		{0x0250, 0x0254, 1},
+		{0x0256, 0x0257, 1},
+		{0x0259, 0x025b, 2},
+		{0x0260, 0x0263, 3},
+		{0x0265, 0x0268, 3},
+		{0x0269, 0x026b, 2},
+		{0x026f, 0x0271, 2},
+		{0x0272, 0x0275, 3},
+		{0x027d, 0x0283, 3},
+		{0x0288, 0x028c, 1},
+		{0x0292, 0x0345, 179},
+		{0x0371, 0x0373, 2},
+		{0x0377, 0x037b, 4},
+		{0x037c, 0x037d, 1},
+		{0x03ac, 0x03af, 1},
+		{0x03b1, 0x03ce, 1},
+		{0x03d0, 0x03d1, 1},
+		{0x03d5, 0x03d7, 1},
+		{0x03d9, 0x03ef, 2},
+		{0x03f0, 0x03f2, 1},
+		{0x03f5, 0x03fb, 3},
+		{0x0430, 0x045f, 1},
+		{0x0461, 0x0481, 2},
+		{0x048b, 0x04bf, 2},
+		{0x04c2, 0x04ce, 2},
+		{0x04cf, 0x0527, 2},
+		{0x0561, 0x0586, 1},
+		{0x1d79, 0x1d7d, 4},
+		{0x1e01, 0x1e95, 2},
+		{0x1e9b, 0x1ea1, 6},
+		{0x1ea3, 0x1eff, 2},
+		{0x1f00, 0x1f07, 1},
+		{0x1f10, 0x1f15, 1},
+		{0x1f20, 0x1f27, 1},
+		{0x1f30, 0x1f37, 1},
+		{0x1f40, 0x1f45, 1},
+		{0x1f51, 0x1f57, 2},
+		{0x1f60, 0x1f67, 1},
+		{0x1f70, 0x1f7d, 1},
+		{0x1fb0, 0x1fb1, 1},
+		{0x1fbe, 0x1fd0, 18},
+		{0x1fd1, 0x1fe0, 15},
+		{0x1fe1, 0x1fe5, 4},
+		{0x214e, 0x2184, 54},
+		{0x2c30, 0x2c5e, 1},
+		{0x2c61, 0x2c65, 4},
+		{0x2c66, 0x2c6c, 2},
+		{0x2c73, 0x2c76, 3},
+		{0x2c81, 0x2ce3, 2},
+		{0x2cec, 0x2cee, 2},
+		{0x2d00, 0x2d25, 1},
+		{0xa641, 0xa66d, 2},
+		{0xa681, 0xa697, 2},
+		{0xa723, 0xa72f, 2},
+		{0xa733, 0xa76f, 2},
+		{0xa77a, 0xa77c, 2},
+		{0xa77f, 0xa787, 2},
+		{0xa78c, 0xa791, 5},
+		{0xa7a1, 0xa7a9, 2},
+		{0xff41, 0xff5a, 1},
+	},
+	R32: []Range32{
+		{0x10428, 0x1044f, 1},
+	},
+}
+
+var foldLt = &RangeTable{
+	R16: []Range16{
+		{0x01c4, 0x01c6, 2},
+		{0x01c7, 0x01c9, 2},
+		{0x01ca, 0x01cc, 2},
+		{0x01f1, 0x01f3, 2},
+		{0x1f80, 0x1f87, 1},
+		{0x1f90, 0x1f97, 1},
+		{0x1fa0, 0x1fa7, 1},
+		{0x1fb3, 0x1fc3, 16},
+		{0x1ff3, 0x1ff3, 1},
+	},
+}
+
+// FoldScript maps a script name to a table of
+// code points outside the script that are equivalent under
+// simple case folding to code points inside the script.
+// If there is no entry for a script name, there are no such points.
+var FoldScript = map[string]*RangeTable{}
+
+
+// Range entries: 3391 16-bit, 659 32-bit, 4050 total.
+// Range bytes: 20346 16-bit, 7908 32-bit, 28254 total.
+
+// Fold orbit bytes: 147 pairs, 588 bytes