cases/gen.go - text - Git at Google

 // Copyright 2014 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build ignore
 // +build ignore

 // This program generates the trie for casing operations. The Unicode casing
 // algorithm requires the lookup of various properties and mappings for each
 // rune. The table generated by this generator combines several of the most
 // frequently used of these into a single trie so that they can be accessed
 // with a single lookup.
 package main

 import (
 	"bytes"
 	"fmt"
 	"io"
 	"log"
 	"reflect"
 	"strconv"
 	"strings"
 	"unicode"

 	"golang.org/x/text/internal/gen"
 	"golang.org/x/text/internal/triegen"
 	"golang.org/x/text/internal/ucd"
 	"golang.org/x/text/unicode/norm"
 )

 func main() {
 	gen.Init()
 	genTables()
 	genTablesTest()
 	gen.Repackage("gen_trieval.go", "trieval.go", "cases")
 }

 // runeInfo contains all information for a rune that we care about for casing
 // operations.
 type runeInfo struct {
 	Rune rune

 	entry info // trie value for this rune.

 	CaseMode info

 	// Simple case mappings.
 	Simple [1 + maxCaseMode][]rune

 	// Special casing
 	HasSpecial  bool
 	Conditional bool
 	Special     [1 + maxCaseMode][]rune

 	// Folding
 	FoldSimple  rune
 	FoldSpecial rune
 	FoldFull    []rune

 	// TODO: FC_NFKC, or equivalent data.

 	// Properties
 	SoftDotted     bool
 	CaseIgnorable  bool
 	Cased          bool
 	DecomposeGreek bool
 	BreakType      string
 	BreakCat       breakCategory

 	// We care mostly about 0, Above, and IotaSubscript.
 	CCC byte
 }

 type breakCategory int

 const (
 	breakBreak breakCategory = iota
 	breakLetter
 	breakMid
 )

 // mapping returns the case mapping for the given case type.
 func (r *runeInfo) mapping(c info) string {
 	if r.HasSpecial {
 		return string(r.Special[c])
 	}
 	if len(r.Simple[c]) != 0 {
 		return string(r.Simple[c])
 	}
 	return string(r.Rune)
 }

 func parse(file string, f func(p *ucd.Parser)) {
 	ucd.Parse(gen.OpenUCDFile(file), f)
 }

 func parseUCD() []runeInfo {
 	chars := make([]runeInfo, unicode.MaxRune)

 	get := func(r rune) *runeInfo {
 		c := &chars[r]
 		c.Rune = r
 		return c
 	}

 	parse("UnicodeData.txt", func(p *ucd.Parser) {
 		ri := get(p.Rune(0))
 		ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
 		ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
 		ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
 		ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
 		if p.String(ucd.GeneralCategory) == "Lt" {
 			ri.CaseMode = cTitle
 		}
 	})

 	// <code>; <property>
 	parse("PropList.txt", func(p *ucd.Parser) {
 		if p.String(1) == "Soft_Dotted" {
 			chars[p.Rune(0)].SoftDotted = true
 		}
 	})

 	// <code>; <word break type>
 	parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
 		ri := get(p.Rune(0))
 		switch p.String(1) {
 		case "Case_Ignorable":
 			ri.CaseIgnorable = true
 		case "Cased":
 			ri.Cased = true
 		case "Lowercase":
 			ri.CaseMode = cLower
 		case "Uppercase":
 			ri.CaseMode = cUpper
 		}
 	})

 	// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
 	parse("SpecialCasing.txt", func(p *ucd.Parser) {
 		// We drop all conditional special casing and deal with them manually in
 		// the language-specific case mappers. Rune 0x03A3 is the only one with
 		// a conditional formatting that is not language-specific. However,
 		// dealing with this letter is tricky, especially in a streaming
 		// context, so we deal with it in the Caser for Greek specifically.
 		ri := get(p.Rune(0))
 		if p.String(4) == "" {
 			ri.HasSpecial = true
 			ri.Special[cLower] = p.Runes(1)
 			ri.Special[cTitle] = p.Runes(2)
 			ri.Special[cUpper] = p.Runes(3)
 		} else {
 			ri.Conditional = true
 		}
 	})

 	// TODO: Use text breaking according to UAX #29.
 	// <code>; <word break type>
 	parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
 		ri := get(p.Rune(0))
 		ri.BreakType = p.String(1)

 		// We collapse the word breaking properties onto the categories we need.
 		switch p.String(1) { // TODO: officially we need to canonicalize.
 		case "MidLetter", "MidNumLet", "Single_Quote":
 			ri.BreakCat = breakMid
 			if !ri.CaseIgnorable {
 				// finalSigma relies on the fact that all breakMid runes are
 				// also a Case_Ignorable. Revisit this code when this changes.
 				log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
 			}
 		case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
 			ri.BreakCat = breakLetter
 		}
 	})

 	// <code>; <type>; <mapping>
 	parse("CaseFolding.txt", func(p *ucd.Parser) {
 		ri := get(p.Rune(0))
 		switch p.String(1) {
 		case "C":
 			ri.FoldSimple = p.Rune(2)
 			ri.FoldFull = p.Runes(2)
 		case "S":
 			ri.FoldSimple = p.Rune(2)
 		case "T":
 			ri.FoldSpecial = p.Rune(2)
 		case "F":
 			ri.FoldFull = p.Runes(2)
 		default:
 			log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
 		}
 	})

 	return chars
 }

 func genTables() {
 	chars := parseUCD()
 	verifyProperties(chars)

 	t := triegen.NewTrie("case")
 	for i := range chars {
 		c := &chars[i]
 		makeEntry(c)
 		t.Insert(rune(i), uint64(c.entry))
 	}

 	w := gen.NewCodeWriter()
 	defer w.WriteVersionedGoFile("tables.go", "cases")

 	gen.WriteUnicodeVersion(w)

 	// TODO: write CLDR version after adding a mechanism to detect that the
 	// tables on which the manually created locale-sensitive casing code is
 	// based hasn't changed.

 	w.WriteVar("xorData", string(xorData))
 	w.WriteVar("exceptions", string(exceptionData))

 	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
 	if err != nil {
 		log.Fatal(err)
 	}
 	w.Size += sz
 }

 func makeEntry(ri *runeInfo) {
 	if ri.CaseIgnorable {
 		if ri.Cased {
 			ri.entry = cIgnorableCased
 		} else {
 			ri.entry = cIgnorableUncased
 		}
 	} else {
 		ri.entry = ri.CaseMode
 	}

 	// TODO: handle soft-dotted.

 	ccc := cccOther
 	switch ri.CCC {
 	case 0: // Not_Reordered
 		ccc = cccZero
 	case above: // Above
 		ccc = cccAbove
 	}
 	switch ri.BreakCat {
 	case breakBreak:
 		ccc = cccBreak
 	case breakMid:
 		ri.entry |= isMidBit
 	}

 	ri.entry |= ccc

 	if ri.CaseMode == cUncased {
 		return
 	}

 	// Need to do something special.
 	if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
 		makeException(ri)
 		return
 	}
 	if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
 		makeException(ri)
 		return
 	}

 	// Rune is either lowercase or uppercase.

 	orig := string(ri.Rune)
 	mapped := ""
 	if ri.CaseMode == cUpper {
 		mapped = ri.mapping(cLower)
 	} else {
 		mapped = ri.mapping(cUpper)
 	}

 	if len(orig) != len(mapped) {
 		makeException(ri)
 		return
 	}

 	if string(ri.FoldFull) == ri.mapping(cUpper) {
 		ri.entry |= inverseFoldBit
 	}

 	n := len(orig)

 	// Create per-byte XOR mask.
 	var b []byte
 	for i := 0; i < n; i++ {
 		b = append(b, orig[i]^mapped[i])
 	}

 	// Remove leading 0 bytes, but keep at least one byte.
 	for ; len(b) > 1 && b[0] == 0; b = b[1:] {
 	}

 	if len(b) == 1 && b[0]&0xc0 == 0 {
 		ri.entry |= info(b[0]) << xorShift
 		return
 	}

 	key := string(b)
 	x, ok := xorCache[key]
 	if !ok {
 		xorData = append(xorData, 0) // for detecting start of sequence
 		xorData = append(xorData, b...)

 		x = len(xorData) - 1
 		xorCache[key] = x
 	}
 	ri.entry |= info(x<<xorShift) | xorIndexBit
 }

 var xorCache = map[string]int{}

 // xorData contains byte-wise XOR data for the least significant bytes of a
 // UTF-8 encoded rune. An index points to the last byte. The sequence starts
 // with a zero terminator.
 var xorData = []byte{}

 // See the comments in gen_trieval.go re "the exceptions slice".
 var exceptionData = []byte{0}

 // makeException encodes case mappings that cannot be expressed in a simple
 // XOR diff.
 func makeException(ri *runeInfo) {
 	ccc := ri.entry & cccMask
 	// Set exception bit and retain case type.
 	ri.entry &= 0x0007
 	ri.entry |= exceptionBit

 	if len(exceptionData) >= 1<<numExceptionBits {
 		log.Fatalf("%U:exceptionData too large %#x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
 	}

 	// Set the offset in the exceptionData array.
 	ri.entry |= info(len(exceptionData) << exceptionShift)

 	orig := string(ri.Rune)
 	tc := ri.mapping(cTitle)
 	uc := ri.mapping(cUpper)
 	lc := ri.mapping(cLower)
 	ff := string(ri.FoldFull)

 	// addString sets the length of a string and adds it to the expansions array.
 	addString := func(s string, b *byte) {
 		if len(s) == 0 {
 			// Zero-length mappings exist, but only for conditional casing,
 			// which we are representing outside of this table.
 			log.Fatalf("%U: has zero-length mapping.", ri.Rune)
 		}
 		*b <<= 3
 		if s != orig || ri.CaseMode == cLower {
 			n := len(s)
 			if n > 7 {
 				log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
 			}
 			*b |= byte(n)
 			exceptionData = append(exceptionData, s...)
 		}
 	}

 	// byte 0:
 	exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))

 	// byte 1:
 	p := len(exceptionData)
 	exceptionData = append(exceptionData, 0)

 	if len(ff) > 7 { // May be zero-length.
 		log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
 	}
 	exceptionData = append(exceptionData, ff...)
 	ct := ri.CaseMode
 	if ct != cLower {
 		addString(lc, &exceptionData[p])
 	}
 	if ct != cUpper {
 		addString(uc, &exceptionData[p])
 	}
 	if ct != cTitle {
 		addString(tc, &exceptionData[p])
 	}
 }

 // sparseCompacter is a trie value block Compacter. There are many cases where
 // successive runes alternate between lower- and upper-case. This Compacter
 // exploits this by adding a special case type where the case value is obtained
 // from or-ing it with the least-significant bit of the rune, creating large
 // ranges of equal case values that compress well.
 type sparseCompacter struct {
 	sparseBlocks  [][]uint16
 	sparseOffsets []uint16
 	sparseCount   int
 }

 // makeSparse returns the number of elements that compact block would contain
 // as well as the modified values.
 func makeSparse(vals []uint64) ([]uint16, int) {
 	// Copy the values.
 	values := make([]uint16, len(vals))
 	for i, v := range vals {
 		values[i] = uint16(v)
 	}

 	alt := func(i int, v uint16) uint16 {
 		if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
 			// Convert cLower or cUpper to cXORCase value, which has the form 11x.
 			xor := v
 			xor &^= 1
 			xor |= uint16(i&1) ^ (v & 1)
 			xor |= 0x4
 			return xor
 		}
 		return v
 	}

 	var count int
 	var previous uint16
 	for i, v := range values {
 		if v != 0 {
 			// Try if the unmodified value is equal to the previous.
 			if v == previous {
 				continue
 			}

 			// Try if the xor-ed value is equal to the previous value.
 			a := alt(i, v)
 			if a == previous {
 				values[i] = a
 				continue
 			}

 			// This is a new value.
 			count++

 			// Use the xor-ed value if it will be identical to the next value.
 			if p := i + 1; p < len(values) && alt(p, values[p]) == a {
 				values[i] = a
 				v = a
 			}
 		}
 		previous = v
 	}
 	return values, count
 }

 func (s *sparseCompacter) Size(v []uint64) (int, bool) {
 	_, n := makeSparse(v)

 	// We limit using this method to having 16 entries.
 	if n > 16 {
 		return 0, false
 	}

 	return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
 }

 func (s *sparseCompacter) Store(v []uint64) uint32 {
 	h := uint32(len(s.sparseOffsets))
 	values, sz := makeSparse(v)
 	s.sparseBlocks = append(s.sparseBlocks, values)
 	s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
 	s.sparseCount += sz
 	return h
 }

 func (s *sparseCompacter) Handler() string {
 	// The sparse global variable and its lookup method is defined in gen_trieval.go.
 	return "sparse.lookup"
 }

 func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
 	p := func(format string, args ...interface{}) {
 		_, err := fmt.Fprintf(w, format, args...)
 		if retErr == nil && err != nil {
 			retErr = err
 		}
 	}

 	ls := len(s.sparseBlocks)
 	if ls == len(s.sparseOffsets) {
 		s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
 	}
 	p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
 	p("var sparseOffsets = %#v\n\n", s.sparseOffsets)

 	ns := s.sparseCount
 	p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
 	p("var sparseValues = [%d]valueRange {", ns)
 	for i, values := range s.sparseBlocks {
 		p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
 		var v uint16
 		for i, nv := range values {
 			if nv != v {
 				if v != 0 {
 					p(",hi:%#02x},", 0x80+i-1)
 				}
 				if nv != 0 {
 					p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
 				}
 			}
 			v = nv
 		}
 		if v != 0 {
 			p(",hi:%#02x},", 0x80+len(values)-1)
 		}
 	}
 	p("\n}\n\n")
 	return
 }

 // verifyProperties that properties of the runes that are relied upon in the
 // implementation. Each property is marked with an identifier that is referred
 // to in the places where it is used.
 func verifyProperties(chars []runeInfo) {
 	for i, c := range chars {
 		r := rune(i)

 		// Rune properties.

 		// A.1: modifier never changes on lowercase. [ltLower]
 		if c.CCC > 0 && unicode.ToLower(r) != r {
 			log.Fatalf("%U: non-starter changes when lowercased", r)
 		}

 		// A.2: properties of decompositions starting with I or J. [ltLower]
 		d := norm.NFD.PropertiesString(string(r)).Decomposition()
 		if len(d) > 0 {
 			if d[0] == 'I' || d[0] == 'J' {
 				// A.2.1: we expect at least an ASCII character and a modifier.
 				if len(d) < 3 {
 					log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
 				}

 				// All subsequent runes are modifiers and all have the same CCC.
 				runes := []rune(string(d[1:]))
 				ccc := chars[runes[0]].CCC

 				for _, mr := range runes[1:] {
 					mc := chars[mr]

 					// A.2.2: all modifiers have a CCC of Above or less.
 					if ccc == 0 || ccc > above {
 						log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
 					}

 					// A.2.3: a sequence of modifiers all have the same CCC.
 					if mc.CCC != ccc {
 						log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
 					}

 					// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
 					if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
 						log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
 					}

 					if i += len(string(mr)); i >= len(d) {
 						break
 					}
 				}
 			}
 		}

 		// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
 		if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
 			log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
 		}

 		// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
 		if c.CCC == iotaSubscript && r != 0x0345 {
 			log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
 		}

 		// A.5: soft-dotted runes do not have exceptions.
 		if c.SoftDotted && c.entry&exceptionBit != 0 {
 			log.Fatalf("%U: soft-dotted has exception", r)
 		}

 		// A.6: Greek decomposition. [elUpper]
 		if unicode.Is(unicode.Greek, r) {
 			if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
 				runes := []rune(string(b))
 				// A.6.1: If a Greek rune decomposes and the first rune of the
 				// decomposition is greater than U+00FF, the rune is always
 				// great and not a modifier.
 				if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
 					log.Fatalf("%U: expected first rune of Greek decomposition to be letter, found %U", r, f)
 				}
 				// A.6.2: Any follow-up rune in a Greek decomposition is a
 				// modifier of which the first should be gobbled in
 				// decomposition.
 				for _, m := range runes[1:] {
 					switch m {
 					case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
 					default:
 						log.Fatalf("%U: modifier %U is outside of expected Greek modifier set", r, m)
 					}
 				}
 			}
 		}

 		// Breaking properties.

 		// B.1: all runes with CCC > 0 are of break type Extend.
 		if c.CCC > 0 && c.BreakType != "Extend" {
 			log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
 		}

 		// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
 		if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
 			log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
 		}

 		// B.3: letter category.
 		if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
 			if c.BreakCat != breakLetter {
 				log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
 			}
 		}
 	}
 }

 func genTablesTest() {
 	w := &bytes.Buffer{}

 	fmt.Fprintln(w, "var (")
 	printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)

 	// We discard the output as we know we have perfect functions. We run them
 	// just to verify the properties are correct.
 	n := printProperties(io.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
 	n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
 	n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
 	if n > 0 {
 		log.Fatalf("One of the discarded properties does not have a perfect filter.")
 	}

 	// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
 	fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
 	parse("SpecialCasing.txt", func(p *ucd.Parser) {
 		// Skip conditional entries.
 		if p.String(4) != "" {
 			return
 		}
 		r := p.Rune(0)
 		fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
 			r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
 	})
 	fmt.Fprint(w, "\t}\n\n")

 	// <code>; <type>; <runes>
 	table := map[rune]struct{ simple, full, special string }{}
 	parse("CaseFolding.txt", func(p *ucd.Parser) {
 		r := p.Rune(0)
 		t := p.String(1)
 		v := string(p.Runes(2))
 		if t != "T" && v == string(unicode.ToLower(r)) {
 			return
 		}
 		x := table[r]
 		switch t {
 		case "C":
 			x.full = v
 			x.simple = v
 		case "S":
 			x.simple = v
 		case "F":
 			x.full = v
 		case "T":
 			x.special = v
 		}
 		table[r] = x
 	})
 	fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
 	for r := rune(0); r < 0x10FFFF; r++ {
 		x, ok := table[r]
 		if !ok {
 			continue
 		}
 		fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
 	}
 	fmt.Fprint(w, "\t}\n\n")

 	// Break property
 	notBreak := map[rune]bool{}
 	parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
 		switch p.String(1) {
 		case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
 			"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
 			notBreak[p.Rune(0)] = true
 		}
 	})

 	fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
 	inBreak := false
 	for r := rune(0); r <= lastRuneForTesting; r++ {
 		if isBreak := !notBreak[r]; isBreak != inBreak {
 			if isBreak {
 				fmt.Fprintf(w, "\t\t{0x%x, ", r)
 			} else {
 				fmt.Fprintf(w, "0x%x},\n", r-1)
 			}
 			inBreak = isBreak
 		}
 	}
 	if inBreak {
 		fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
 	}
 	fmt.Fprint(w, "\t}\n\n")

 	// Word break test
 	// Filter out all samples that do not contain cased characters.
 	cased := map[rune]bool{}
 	parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
 		if p.String(1) == "Cased" {
 			cased[p.Rune(0)] = true
 		}
 	})

 	fmt.Fprintln(w, "\tbreakTest = []string{")
 	parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
 		c := strings.Split(p.String(0), " ")

 		const sep = '|'
 		numCased := 0
 		test := ""
 		for ; len(c) >= 2; c = c[2:] {
 			if c[0] == "÷" && test != "" {
 				test += string(sep)
 			}
 			i, err := strconv.ParseUint(c[1], 16, 32)
 			r := rune(i)
 			if err != nil {
 				log.Fatalf("Invalid rune %q.", c[1])
 			}
 			if r == sep {
 				log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
 			}
 			if cased[r] {
 				numCased++
 			}
 			test += string(r)
 		}
 		if numCased > 1 {
 			fmt.Fprintf(w, "\t\t%q,\n", test)
 		}
 	})
 	fmt.Fprintln(w, "\t}")

 	fmt.Fprintln(w, ")")

 	gen.WriteVersionedGoFile("tables_test.go", "cases", w.Bytes())
 }

 // These functions are just used for verification that their definition have not
 // changed in the Unicode Standard.

 func verifyCased(r rune) bool {
 	return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
 }

 func verifyLower(r rune) bool {
 	return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
 }

 func verifyUpper(r rune) bool {
 	return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
 }

 // verifyIgnore is an approximation of the Case_Ignorable property using the
 // core unicode package. It is used to reduce the size of the test data.
 func verifyIgnore(r rune) bool {
 	props := []*unicode.RangeTable{
 		unicode.Mn,
 		unicode.Me,
 		unicode.Cf,
 		unicode.Lm,
 		unicode.Sk,
 	}
 	for _, p := range props {
 		if unicode.Is(p, r) {
 			return true
 		}
 	}
 	return false
 }

 // printProperties prints tables of rune properties from the given UCD file.
 // A filter func f can be given to exclude certain values. A rune r will have
 // the indicated property if it is in the generated table or if f(r).
 func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
 	verify := map[rune]bool{}
 	n := 0
 	varNameParts := strings.Split(property, "_")
 	varNameParts[0] = strings.ToLower(varNameParts[0])
 	fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
 	parse(file, func(p *ucd.Parser) {
 		if p.String(1) == property {
 			r := p.Rune(0)
 			verify[r] = true
 			if !f(r) {
 				n++
 				fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
 			}
 		}
 	})
 	fmt.Fprint(w, "\t}\n\n")

 	// Verify that f is correct, that is, it represents a subset of the property.
 	for r := rune(0); r <= lastRuneForTesting; r++ {
 		if !verify[r] && f(r) {
 			log.Fatalf("Incorrect filter func for property %q.", property)
 		}
 	}
 	return n
 }

 // The newCaseTrie, sparseValues and sparseOffsets definitions below are
 // placeholders referred to by gen_trieval.go. The real definitions are
 // generated by this program and written to tables.go.

 func newCaseTrie(int) int { return 0 }

 var (
 	sparseValues  [0]valueRange
 	sparseOffsets [0]uint16
 )
	// Copyright 2014 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	//go:build ignore
	// +build ignore

	// This program generates the trie for casing operations. The Unicode casing
	// algorithm requires the lookup of various properties and mappings for each
	// rune. The table generated by this generator combines several of the most
	// frequently used of these into a single trie so that they can be accessed
	// with a single lookup.
	package main

	import (
	"bytes"
	"fmt"
	"io"
	"log"
	"reflect"
	"strconv"
	"strings"
	"unicode"

	"golang.org/x/text/internal/gen"
	"golang.org/x/text/internal/triegen"
	"golang.org/x/text/internal/ucd"
	"golang.org/x/text/unicode/norm"
	)

	func main() {
	gen.Init()
	genTables()
	genTablesTest()
	gen.Repackage("gen_trieval.go", "trieval.go", "cases")
	}

	// runeInfo contains all information for a rune that we care about for casing
	// operations.
	type runeInfo struct {
	Rune rune

	entry info // trie value for this rune.

	CaseMode info

	// Simple case mappings.
	Simple [1 + maxCaseMode][]rune

	// Special casing
	HasSpecial bool
	Conditional bool
	Special [1 + maxCaseMode][]rune

	// Folding
	FoldSimple rune
	FoldSpecial rune
	FoldFull []rune

	// TODO: FC_NFKC, or equivalent data.

	// Properties
	SoftDotted bool
	CaseIgnorable bool
	Cased bool
	DecomposeGreek bool
	BreakType string
	BreakCat breakCategory

	// We care mostly about 0, Above, and IotaSubscript.
	CCC byte
	}

	type breakCategory int

	const (
	breakBreak breakCategory = iota
	breakLetter
	breakMid
	)

	// mapping returns the case mapping for the given case type.
	func (r *runeInfo) mapping(c info) string {
	if r.HasSpecial {
	return string(r.Special[c])
	}
	if len(r.Simple[c]) != 0 {
	return string(r.Simple[c])
	}
	return string(r.Rune)
	}

	func parse(file string, f func(p *ucd.Parser)) {
	ucd.Parse(gen.OpenUCDFile(file), f)
	}

	func parseUCD() []runeInfo {
	chars := make([]runeInfo, unicode.MaxRune)

	get := func(r rune) *runeInfo {
	c := &chars[r]
	c.Rune = r
	return c
	}

	parse("UnicodeData.txt", func(p *ucd.Parser) {
	ri := get(p.Rune(0))
	ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
	ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
	ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
	ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
	if p.String(ucd.GeneralCategory) == "Lt" {
	ri.CaseMode = cTitle
	}
	})

	// <code>; <property>
	parse("PropList.txt", func(p *ucd.Parser) {
	if p.String(1) == "Soft_Dotted" {
	chars[p.Rune(0)].SoftDotted = true
	}
	})

	// <code>; <word break type>
	parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
	ri := get(p.Rune(0))
	switch p.String(1) {
	case "Case_Ignorable":
	ri.CaseIgnorable = true
	case "Cased":
	ri.Cased = true
	case "Lowercase":
	ri.CaseMode = cLower
	case "Uppercase":
	ri.CaseMode = cUpper
	}
	})

	// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
	parse("SpecialCasing.txt", func(p *ucd.Parser) {
	// We drop all conditional special casing and deal with them manually in
	// the language-specific case mappers. Rune 0x03A3 is the only one with
	// a conditional formatting that is not language-specific. However,
	// dealing with this letter is tricky, especially in a streaming
	// context, so we deal with it in the Caser for Greek specifically.
	ri := get(p.Rune(0))
	if p.String(4) == "" {
	ri.HasSpecial = true
	ri.Special[cLower] = p.Runes(1)
	ri.Special[cTitle] = p.Runes(2)
	ri.Special[cUpper] = p.Runes(3)
	} else {
	ri.Conditional = true
	}
	})

	// TODO: Use text breaking according to UAX #29.
	// <code>; <word break type>
	parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
	ri := get(p.Rune(0))
	ri.BreakType = p.String(1)

	// We collapse the word breaking properties onto the categories we need.
	switch p.String(1) { // TODO: officially we need to canonicalize.
	case "MidLetter", "MidNumLet", "Single_Quote":
	ri.BreakCat = breakMid
	if !ri.CaseIgnorable {
	// finalSigma relies on the fact that all breakMid runes are
	// also a Case_Ignorable. Revisit this code when this changes.
	log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
	}
	case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
	ri.BreakCat = breakLetter
	}
	})

	// <code>; <type>; <mapping>
	parse("CaseFolding.txt", func(p *ucd.Parser) {
	ri := get(p.Rune(0))
	switch p.String(1) {
	case "C":
	ri.FoldSimple = p.Rune(2)
	ri.FoldFull = p.Runes(2)
	case "S":
	ri.FoldSimple = p.Rune(2)
	case "T":
	ri.FoldSpecial = p.Rune(2)
	case "F":
	ri.FoldFull = p.Runes(2)
	default:
	log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
	}
	})

	return chars
	}

	func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
	c := &chars[i]
	makeEntry(c)
	t.Insert(rune(i), uint64(c.entry))
	}

	w := gen.NewCodeWriter()
	defer w.WriteVersionedGoFile("tables.go", "cases")

	gen.WriteUnicodeVersion(w)

	// TODO: write CLDR version after adding a mechanism to detect that the
	// tables on which the manually created locale-sensitive casing code is
	// based hasn't changed.

	w.WriteVar("xorData", string(xorData))
	w.WriteVar("exceptions", string(exceptionData))

	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
	log.Fatal(err)
	}
	w.Size += sz
	}

	func makeEntry(ri *runeInfo) {
	if ri.CaseIgnorable {
	if ri.Cased {
	ri.entry = cIgnorableCased
	} else {
	ri.entry = cIgnorableUncased
	}
	} else {
	ri.entry = ri.CaseMode
	}

	// TODO: handle soft-dotted.

	ccc := cccOther
	switch ri.CCC {
	case 0: // Not_Reordered
	ccc = cccZero
	case above: // Above
	ccc = cccAbove
	}
	switch ri.BreakCat {
	case breakBreak:
	ccc = cccBreak
	case breakMid:
	ri.entry \|= isMidBit
	}

	ri.entry \|= ccc

	if ri.CaseMode == cUncased {
	return
	}

	// Need to do something special.
	if ri.CaseMode == cTitle \|\| ri.HasSpecial \|\| ri.mapping(cTitle) != ri.mapping(cUpper) {
	makeException(ri)
	return
	}
	if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
	makeException(ri)
	return
	}

	// Rune is either lowercase or uppercase.

	orig := string(ri.Rune)
	mapped := ""
	if ri.CaseMode == cUpper {
	mapped = ri.mapping(cLower)
	} else {
	mapped = ri.mapping(cUpper)
	}

	if len(orig) != len(mapped) {
	makeException(ri)
	return
	}

	if string(ri.FoldFull) == ri.mapping(cUpper) {
	ri.entry \|= inverseFoldBit
	}

	n := len(orig)

	// Create per-byte XOR mask.
	var b []byte
	for i := 0; i < n; i++ {
	b = append(b, orig[i]^mapped[i])
	}

	// Remove leading 0 bytes, but keep at least one byte.
	for ; len(b) > 1 && b[0] == 0; b = b[1:] {
	}

	if len(b) == 1 && b[0]&0xc0 == 0 {
	ri.entry \|= info(b[0]) << xorShift
	return
	}

	key := string(b)
	x, ok := xorCache[key]
	if !ok {
	xorData = append(xorData, 0) // for detecting start of sequence
	xorData = append(xorData, b...)

	x = len(xorData) - 1
	xorCache[key] = x
	}
	ri.entry \|= info(x<<xorShift) \| xorIndexBit
	}

	var xorCache = map[string]int{}

	// xorData contains byte-wise XOR data for the least significant bytes of a
	// UTF-8 encoded rune. An index points to the last byte. The sequence starts
	// with a zero terminator.
	var xorData = []byte{}

	// See the comments in gen_trieval.go re "the exceptions slice".
	var exceptionData = []byte{0}

	// makeException encodes case mappings that cannot be expressed in a simple
	// XOR diff.
	func makeException(ri *runeInfo) {
	ccc := ri.entry & cccMask
	// Set exception bit and retain case type.
	ri.entry &= 0x0007
	ri.entry \|= exceptionBit

	if len(exceptionData) >= 1<<numExceptionBits {
	log.Fatalf("%U:exceptionData too large %#x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
	}

	// Set the offset in the exceptionData array.
	ri.entry \|= info(len(exceptionData) << exceptionShift)

	orig := string(ri.Rune)
	tc := ri.mapping(cTitle)
	uc := ri.mapping(cUpper)
	lc := ri.mapping(cLower)
	ff := string(ri.FoldFull)

	// addString sets the length of a string and adds it to the expansions array.
	addString := func(s string, b *byte) {
	if len(s) == 0 {
	// Zero-length mappings exist, but only for conditional casing,
	// which we are representing outside of this table.
	log.Fatalf("%U: has zero-length mapping.", ri.Rune)
	}
	*b <<= 3
	if s != orig \|\| ri.CaseMode == cLower {
	n := len(s)
	if n > 7 {
	log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
	}
	*b \|= byte(n)
	exceptionData = append(exceptionData, s...)
	}
	}

	// byte 0:
	exceptionData = append(exceptionData, byte(ccc)\|byte(len(ff)))

	// byte 1:
	p := len(exceptionData)
	exceptionData = append(exceptionData, 0)

	if len(ff) > 7 { // May be zero-length.
	log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
	}
	exceptionData = append(exceptionData, ff...)
	ct := ri.CaseMode
	if ct != cLower {
	addString(lc, &exceptionData[p])
	}
	if ct != cUpper {
	addString(uc, &exceptionData[p])
	}
	if ct != cTitle {
	addString(tc, &exceptionData[p])
	}
	}

	// sparseCompacter is a trie value block Compacter. There are many cases where
	// successive runes alternate between lower- and upper-case. This Compacter
	// exploits this by adding a special case type where the case value is obtained
	// from or-ing it with the least-significant bit of the rune, creating large
	// ranges of equal case values that compress well.
	type sparseCompacter struct {
	sparseBlocks [][]uint16
	sparseOffsets []uint16
	sparseCount int
	}

	// makeSparse returns the number of elements that compact block would contain
	// as well as the modified values.
	func makeSparse(vals []uint64) ([]uint16, int) {
	// Copy the values.
	values := make([]uint16, len(vals))
	for i, v := range vals {
	values[i] = uint16(v)
	}

	alt := func(i int, v uint16) uint16 {
	if cm := info(v & fullCasedMask); cm == cUpper \|\| cm == cLower {
	// Convert cLower or cUpper to cXORCase value, which has the form 11x.
	xor := v
	xor &^= 1
	xor \|= uint16(i&1) ^ (v & 1)
	xor \|= 0x4
	return xor
	}
	return v
	}

	var count int
	var previous uint16
	for i, v := range values {
	if v != 0 {
	// Try if the unmodified value is equal to the previous.
	if v == previous {
	continue
	}

	// Try if the xor-ed value is equal to the previous value.
	a := alt(i, v)
	if a == previous {
	values[i] = a
	continue
	}

	// This is a new value.
	count++

	// Use the xor-ed value if it will be identical to the next value.
	if p := i + 1; p < len(values) && alt(p, values[p]) == a {
	values[i] = a
	v = a
	}
	}
	previous = v
	}
	return values, count
	}

	func (s *sparseCompacter) Size(v []uint64) (int, bool) {
	_, n := makeSparse(v)

	// We limit using this method to having 16 entries.
	if n > 16 {
	return 0, false
	}

	return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
	}

	func (s *sparseCompacter) Store(v []uint64) uint32 {
	h := uint32(len(s.sparseOffsets))
	values, sz := makeSparse(v)
	s.sparseBlocks = append(s.sparseBlocks, values)
	s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
	s.sparseCount += sz
	return h
	}

	func (s *sparseCompacter) Handler() string {
	// The sparse global variable and its lookup method is defined in gen_trieval.go.
	return "sparse.lookup"
	}

	func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
	p := func(format string, args ...interface{}) {
	_, err := fmt.Fprintf(w, format, args...)
	if retErr == nil && err != nil {
	retErr = err
	}
	}

	ls := len(s.sparseBlocks)
	if ls == len(s.sparseOffsets) {
	s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
	}
	p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
	p("var sparseOffsets = %#v\n\n", s.sparseOffsets)

	ns := s.sparseCount
	p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
	p("var sparseValues = [%d]valueRange {", ns)
	for i, values := range s.sparseBlocks {
	p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
	var v uint16
	for i, nv := range values {
	if nv != v {
	if v != 0 {
	p(",hi:%#02x},", 0x80+i-1)
	}
	if nv != 0 {
	p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
	}
	}
	v = nv
	}
	if v != 0 {
	p(",hi:%#02x},", 0x80+len(values)-1)
	}
	}
	p("\n}\n\n")
	return
	}

	// verifyProperties that properties of the runes that are relied upon in the
	// implementation. Each property is marked with an identifier that is referred
	// to in the places where it is used.
	func verifyProperties(chars []runeInfo) {
	for i, c := range chars {
	r := rune(i)

	// Rune properties.

	// A.1: modifier never changes on lowercase. [ltLower]
	if c.CCC > 0 && unicode.ToLower(r) != r {
	log.Fatalf("%U: non-starter changes when lowercased", r)
	}

	// A.2: properties of decompositions starting with I or J. [ltLower]
	d := norm.NFD.PropertiesString(string(r)).Decomposition()
	if len(d) > 0 {
	if d[0] == 'I' \|\| d[0] == 'J' {
	// A.2.1: we expect at least an ASCII character and a modifier.
	if len(d) < 3 {
	log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
	}

	// All subsequent runes are modifiers and all have the same CCC.
	runes := []rune(string(d[1:]))
	ccc := chars[runes[0]].CCC

	for _, mr := range runes[1:] {
	mc := chars[mr]

	// A.2.2: all modifiers have a CCC of Above or less.
	if ccc == 0 \|\| ccc > above {
	log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
	}

	// A.2.3: a sequence of modifiers all have the same CCC.
	if mc.CCC != ccc {
	log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
	}

	// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
	if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
	log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
	}

	if i += len(string(mr)); i >= len(d) {
	break
	}
	}
	}
	}

	// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
	if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
	log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
	}

	// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
	if c.CCC == iotaSubscript && r != 0x0345 {
	log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
	}

	// A.5: soft-dotted runes do not have exceptions.
	if c.SoftDotted && c.entry&exceptionBit != 0 {
	log.Fatalf("%U: soft-dotted has exception", r)
	}

	// A.6: Greek decomposition. [elUpper]
	if unicode.Is(unicode.Greek, r) {
	if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
	runes := []rune(string(b))
	// A.6.1: If a Greek rune decomposes and the first rune of the
	// decomposition is greater than U+00FF, the rune is always
	// great and not a modifier.
	if f := runes[0]; unicode.IsMark(f) \|\| f > 0xFF && !unicode.Is(unicode.Greek, f) {
	log.Fatalf("%U: expected first rune of Greek decomposition to be letter, found %U", r, f)
	}
	// A.6.2: Any follow-up rune in a Greek decomposition is a
	// modifier of which the first should be gobbled in
	// decomposition.
	for _, m := range runes[1:] {
	switch m {
	case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
	default:
	log.Fatalf("%U: modifier %U is outside of expected Greek modifier set", r, m)
	}
	}
	}
	}

	// Breaking properties.

	// B.1: all runes with CCC > 0 are of break type Extend.
	if c.CCC > 0 && c.BreakType != "Extend" {
	log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
	}

	// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
	if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
	log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
	}

	// B.3: letter category.
	if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
	if c.BreakCat != breakLetter {
	log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
	}
	}
	}
	}

	func genTablesTest() {
	w := &bytes.Buffer{}

	fmt.Fprintln(w, "var (")
	printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)

	// We discard the output as we know we have perfect functions. We run them
	// just to verify the properties are correct.
	n := printProperties(io.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
	n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
	n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
	if n > 0 {
	log.Fatalf("One of the discarded properties does not have a perfect filter.")
	}

	// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
	fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
	parse("SpecialCasing.txt", func(p *ucd.Parser) {
	// Skip conditional entries.
	if p.String(4) != "" {
	return
	}
	r := p.Rune(0)
	fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
	r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
	})
	fmt.Fprint(w, "\t}\n\n")

	// <code>; <type>; <runes>
	table := map[rune]struct{ simple, full, special string }{}
	parse("CaseFolding.txt", func(p *ucd.Parser) {
	r := p.Rune(0)
	t := p.String(1)
	v := string(p.Runes(2))
	if t != "T" && v == string(unicode.ToLower(r)) {
	return
	}
	x := table[r]
	switch t {
	case "C":
	x.full = v
	x.simple = v
	case "S":
	x.simple = v
	case "F":
	x.full = v
	case "T":
	x.special = v
	}
	table[r] = x
	})
	fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
	for r := rune(0); r < 0x10FFFF; r++ {
	x, ok := table[r]
	if !ok {
	continue
	}
	fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
	}
	fmt.Fprint(w, "\t}\n\n")

	// Break property
	notBreak := map[rune]bool{}
	parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
	switch p.String(1) {
	case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
	"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
	notBreak[p.Rune(0)] = true
	}
	})

	fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
	inBreak := false
	for r := rune(0); r <= lastRuneForTesting; r++ {
	if isBreak := !notBreak[r]; isBreak != inBreak {
	if isBreak {
	fmt.Fprintf(w, "\t\t{0x%x, ", r)
	} else {
	fmt.Fprintf(w, "0x%x},\n", r-1)
	}
	inBreak = isBreak
	}
	}
	if inBreak {
	fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
	}
	fmt.Fprint(w, "\t}\n\n")

	// Word break test
	// Filter out all samples that do not contain cased characters.
	cased := map[rune]bool{}
	parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
	if p.String(1) == "Cased" {
	cased[p.Rune(0)] = true
	}
	})

	fmt.Fprintln(w, "\tbreakTest = []string{")
	parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
	c := strings.Split(p.String(0), " ")

	const sep = '\|'
	numCased := 0
	test := ""
	for ; len(c) >= 2; c = c[2:] {
	if c[0] == "÷" && test != "" {
	test += string(sep)
	}
	i, err := strconv.ParseUint(c[1], 16, 32)
	r := rune(i)
	if err != nil {
	log.Fatalf("Invalid rune %q.", c[1])
	}
	if r == sep {
	log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
	}
	if cased[r] {
	numCased++
	}
	test += string(r)
	}
	if numCased > 1 {
	fmt.Fprintf(w, "\t\t%q,\n", test)
	}
	})
	fmt.Fprintln(w, "\t}")

	fmt.Fprintln(w, ")")

	gen.WriteVersionedGoFile("tables_test.go", "cases", w.Bytes())
	}

	// These functions are just used for verification that their definition have not
	// changed in the Unicode Standard.

	func verifyCased(r rune) bool {
	return verifyLower(r) \|\| verifyUpper(r) \|\| unicode.IsTitle(r)
	}

	func verifyLower(r rune) bool {
	return unicode.IsLower(r) \|\| unicode.Is(unicode.Other_Lowercase, r)
	}

	func verifyUpper(r rune) bool {
	return unicode.IsUpper(r) \|\| unicode.Is(unicode.Other_Uppercase, r)
	}

	// verifyIgnore is an approximation of the Case_Ignorable property using the
	// core unicode package. It is used to reduce the size of the test data.
	func verifyIgnore(r rune) bool {
	props := []*unicode.RangeTable{
	unicode.Mn,
	unicode.Me,
	unicode.Cf,
	unicode.Lm,
	unicode.Sk,
	}
	for _, p := range props {
	if unicode.Is(p, r) {
	return true
	}
	}
	return false
	}

	// printProperties prints tables of rune properties from the given UCD file.
	// A filter func f can be given to exclude certain values. A rune r will have
	// the indicated property if it is in the generated table or if f(r).
	func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
	verify := map[rune]bool{}
	n := 0
	varNameParts := strings.Split(property, "_")
	varNameParts[0] = strings.ToLower(varNameParts[0])
	fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
	parse(file, func(p *ucd.Parser) {
	if p.String(1) == property {
	r := p.Rune(0)
	verify[r] = true
	if !f(r) {
	n++
	fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
	}
	}
	})
	fmt.Fprint(w, "\t}\n\n")

	// Verify that f is correct, that is, it represents a subset of the property.
	for r := rune(0); r <= lastRuneForTesting; r++ {
	if !verify[r] && f(r) {
	log.Fatalf("Incorrect filter func for property %q.", property)
	}
	}
	return n
	}

	// The newCaseTrie, sparseValues and sparseOffsets definitions below are
	// placeholders referred to by gen_trieval.go. The real definitions are
	// generated by this program and written to tables.go.

	func newCaseTrie(int) int { return 0 }

	var (
	sparseValues [0]valueRange
	sparseOffsets [0]uint16
	)