cases/map.go - text - Git at Google

 // Copyright 2014 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package cases

 // This file contains the definitions of case mappings for all supported
 // languages. The rules for the language-specific tailorings were taken and
 // modified from the CLDR transform definitions in common/transforms.

 import (
 	"strings"
 	"unicode"
 	"unicode/utf8"

 	"golang.org/x/text/language"
 	"golang.org/x/text/transform"
 	"golang.org/x/text/unicode/norm"
 )

 // A mapFunc takes a context set to the current rune and writes the mapped
 // version to the same context. It may advance the context to the next rune. It
 // returns whether a checkpoint is possible: whether the pDst bytes written to
 // dst so far won't need changing as we see more source bytes.
 type mapFunc func(*context) bool

 // maxIgnorable defines the maximum number of ignorables to consider for
 // lookahead operations.
 const maxIgnorable = 30

 // supported lists the language tags for which we have tailorings.
 const supported = "und af az el lt nl tr"

 func init() {
 	tags := []language.Tag{}
 	for _, s := range strings.Split(supported, " ") {
 		tags = append(tags, language.MustParse(s))
 	}
 	matcher = language.NewMatcher(tags)
 	Supported = language.NewCoverage(tags)
 }

 var (
 	matcher language.Matcher

 	Supported language.Coverage

 	// We keep the following lists separate, instead of having a single per-
 	// language struct, to give the compiler a chance to remove unused code.

 	// Some uppercase mappers are stateless, so we can precompute the
 	// Transformers and save a bit on runtime allocations.
 	upperFunc = []mapFunc{
 		nil,              // und
 		nil,              // af
 		aztrUpper(upper), // az
 		elUpper,          // el
 		ltUpper(upper),   // lt
 		nil,              // nl
 		aztrUpper(upper), // tr
 	}

 	undUpper transform.Transformer = &undUpperCaser{}

 	lowerFunc = []mapFunc{
 		lower,     // und
 		lower,     // af
 		aztrLower, // az
 		lower,     // el
 		ltLower,   // lt
 		lower,     // nl
 		aztrLower, // tr
 	}

 	titleInfos = []struct {
 		title, lower mapFunc
 		rewrite      func(*context)
 	}{
 		{title, lower, nil},                // und
 		{title, lower, afnlRewrite},        // af
 		{aztrUpper(title), aztrLower, nil}, // az
 		{title, lower, nil},                // el
 		{ltUpper(title), ltLower, nil},     // lt
 		{nlTitle, lower, afnlRewrite},      // nl
 		{aztrUpper(title), aztrLower, nil}, // tr
 	}
 )

 func makeUpper(t language.Tag, o options) transform.Transformer {
 	_, i, _ := matcher.Match(t)
 	f := upperFunc[i]
 	if f == nil {
 		return undUpper
 	}
 	return &simpleCaser{f: f}
 }

 func makeLower(t language.Tag, o options) transform.Transformer {
 	_, i, _ := matcher.Match(t)
 	f := lowerFunc[i]
 	if o.noFinalSigma {
 		return &simpleCaser{f: f}
 	}
 	return &lowerCaser{
 		first:   f,
 		midWord: finalSigma(f),
 	}
 }

 func makeTitle(t language.Tag, o options) transform.Transformer {
 	_, i, _ := matcher.Match(t)
 	x := &titleInfos[i]
 	lower := x.lower
 	if o.noLower {
 		lower = (*context).copy
 	} else if !o.noFinalSigma {
 		lower = finalSigma(lower)
 	}
 	return &titleCaser{
 		title:   x.title,
 		lower:   lower,
 		rewrite: x.rewrite,
 	}
 }

 // TODO: consider a similar special case for the fast majority lower case. This
 // is a bit more involved so will require some more precise benchmarking to
 // justify it.

 type undUpperCaser struct{ transform.NopResetter }

 // undUpperCaser implements the Transformer interface for doing an upper case
 // mapping for the root locale (und). It eliminates the need for an allocation
 // as it prevents escaping by not using function pointers.
 func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 	c := context{dst: dst, src: src, atEOF: atEOF}
 	for c.next() {
 		upper(&c)
 		c.checkpoint()
 	}
 	return c.ret()
 }

 type simpleCaser struct {
 	context
 	f mapFunc
 }

 // simpleCaser implements the Transformer interface for doing a case operation
 // on a rune-by-rune basis.
 func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 	t.context = context{dst: dst, src: src, atEOF: atEOF}
 	c := &t.context
 	for c.next() && t.f(c) {
 		c.checkpoint()
 	}
 	return c.ret()
 }

 // lowerCaser implements the Transformer interface. The default Unicode lower
 // casing requires different treatment for the first and subsequent characters
 // of a word, most notably to handle the Greek final Sigma.
 type lowerCaser struct {
 	context

 	first, midWord mapFunc
 }

 func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 	t.context = context{dst: dst, src: src, atEOF: atEOF}
 	c := &t.context

 	for isInterWord := true; c.next(); {
 		if isInterWord {
 			if c.info.isCased() {
 				if !t.first(c) {
 					break
 				}
 				isInterWord = false
 			} else if !c.copy() {
 				break
 			}
 		} else {
 			if c.info.isNotCasedAndNotCaseIgnorable() {
 				if !c.copy() {
 					break
 				}
 				isInterWord = true
 			} else if !t.midWord(c) {
 				break
 			}
 		}
 		c.checkpoint()
 	}
 	return c.ret()
 }

 // titleCaser implements the Transformer interface. Title casing algorithms
 // distinguish between the first letter of a word and subsequent letters of the
 // same word. It uses state to avoid requiring a potentially infinite lookahead.
 type titleCaser struct {
 	context

 	// rune mappings used by the actual casing algorithms.
 	title, lower mapFunc

 	rewrite func(*context)
 }

 // Transform implements the standard Unicode title case algorithm as defined in
 // Chapter 3 of The Unicode Standard:
 // toTitlecase(X): Find the word boundaries in X according to Unicode Standard
 // Annex #29, "Unicode Text Segmentation." For each word boundary, find the
 // first cased character F following the word boundary. If F exists, map F to
 // Titlecase_Mapping(F); then map all characters C between F and the following
 // word boundary to Lowercase_Mapping(C).
 func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
 	c := &t.context

 	if !c.next() {
 		return c.ret()
 	}

 	for {
 		p := c.info
 		if t.rewrite != nil {
 			t.rewrite(c)
 		}

 		wasMid := p.isCaseIgnorableAndNonBreakStarter()
 		// Break out of this loop on failure to ensure we do not modify the
 		// state incorrectly.
 		if p.isCased() && !p.isCaseIgnorableAndNotCased() {
 			if !c.isMidWord {
 				if !t.title(c) {
 					break
 				}
 				c.isMidWord = true
 			} else if !t.lower(c) {
 				break
 			}
 		} else if !c.copy() {
 			break
 		}

 		// TODO: make this an "else if" if we can prove that no rune that does
 		// not match the first condition of the if statement can be a break.
 		if p.isBreak() {
 			c.isMidWord = false
 		}

 		// As we save the state of the transformer, it is safe to call
 		// checkpoint after any successful write.
 		c.checkpoint()

 		if !c.next() {
 			break
 		}
 		if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
 			c.isMidWord = false
 		}
 	}
 	return c.ret()
 }

 // lower writes the lowercase version of the current rune to dst.
 func lower(c *context) bool {
 	if c.info&hasMappingMask == 0 || c.caseType() == cLower {
 		return c.copy()
 	}
 	if c.info&exceptionBit == 0 {
 		return c.copyXOR()
 	}
 	e := exceptions[c.info>>exceptionShift+1:]
 	if nLower := (e[0] >> lengthBits) & lengthMask; nLower != noChange {
 		return c.writeString(e[1 : 1+nLower])
 	}
 	return c.copy()
 }

 // upper writes the uppercase version of the current rune to dst.
 func upper(c *context) bool {
 	ct := c.caseType()
 	if c.info&hasMappingMask == 0 || ct == cUpper {
 		return c.copy()
 	}
 	if c.info&exceptionBit == 0 {
 		return c.copyXOR()
 	}
 	e := exceptions[c.info>>exceptionShift+1:]
 	// Get length of first special case mapping.
 	n := (e[0] >> lengthBits) & lengthMask
 	if ct == cTitle {
 		// The first special case mapping is for lower. Set n to the second.
 		if n == noChange {
 			n = 0
 		}
 		n, e = e[0]&lengthMask, e[n:]
 	}
 	if n != noChange {
 		return c.writeString(e[1 : 1+n])
 	}
 	return c.copy()
 }

 // title writes the title case version of the current rune to dst.
 func title(c *context) bool {
 	ct := c.caseType()
 	if c.info&hasMappingMask == 0 || ct == cTitle {
 		return c.copy()
 	}
 	if c.info&exceptionBit == 0 {
 		if ct == cLower {
 			return c.copyXOR()
 		}
 		return c.copy()
 	}
 	// Get the exception data.
 	e := exceptions[c.info>>exceptionShift+1:]

 	nFirst := (e[0] >> lengthBits) & lengthMask
 	if nTitle := e[0] & lengthMask; nTitle != noChange {
 		if nFirst != noChange {
 			e = e[nFirst:]
 		}
 		return c.writeString(e[1 : 1+nTitle])
 	}
 	if ct == cLower && nFirst != noChange {
 		// Use the uppercase version instead.
 		return c.writeString(e[1 : 1+nFirst])
 	}
 	// Already in correct case.
 	return c.copy()
 }

 // finalSigma adds Greek final Sigma handing to another casing function. It
 // determines whether a lowercased sigma should be σ or ς, by looking ahead for
 // case-ignorables and a cased letters.
 func finalSigma(f mapFunc) mapFunc {
 	return func(c *context) bool {
 		// ::NFD();
 		// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
 		// Σ } [:case-ignorable:]* [:cased:] → σ;
 		// [:cased:] [:case-ignorable:]* { Σ → ς;
 		// ::Any-Lower;
 		// ::NFC();

 		if !c.hasPrefix("Σ") {
 			return f(c)
 		}

 		p := c.pDst
 		c.writeString("ς")
 		// We need to do one more iteration after maxIgnorable, as a cased
 		// letter is not an ignorable and may modify the result.
 		for i := 0; i < maxIgnorable+1; i++ {
 			if !c.next() {
 				return false
 			}
 			if !c.info.isCaseIgnorable() {
 				if c.info.isCased() {
 					// p+1 is guaranteed to be in bounds: if writing ς was
 					// successful, p+1 will contain the second byte of ς. If not,
 					// this function will have returned after c.next returned false.
 					c.dst[p+1]++ // ς → σ
 				}
 				c.unreadRune()
 				return true
 			}
 			// A case ignorable may also introduce a word break, so we may need
 			// to continue searching even after detecting a break.
 			c.isMidWord = c.isMidWord && !c.info.isBreak()
 			c.copy()
 		}
 		return true
 	}
 }

 // elUpper implements Greek upper casing, which entails removing a predefined
 // set of non-blocked modifiers. Note that these accents should not be removed
 // for title casing!
 // Example: "Οδός" -> "ΟΔΟΣ".
 func elUpper(c *context) bool {
 	// From CLDR:
 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;

 	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
 	oldPDst := c.pDst
 	if !upper(c) {
 		return false
 	}
 	if !unicode.Is(unicode.Greek, r) {
 		return true
 	}
 	i := 0
 	// Take the properties of the uppercased rune that is already written to the
 	// destination. This saves us the trouble of having to uppercase the
 	// decomposed rune again.
 	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
 		// Restore the destination position and process the decomposed rune.
 		r, sz := utf8.DecodeRune(b)
 		if r <= 0xFF { // See A.6.1
 			return true
 		}
 		c.pDst = oldPDst
 		// Insert the first rune and ignore the modifiers. See A.6.2.
 		c.writeBytes(b[:sz])
 		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
 	}

 	for ; i < maxIgnorable && c.next(); i++ {
 		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
 		// Above and Iota Subscript
 		case 0x0300, // U+0300 COMBINING GRAVE ACCENT
 			0x0301, // U+0301 COMBINING ACUTE ACCENT
 			0x0304, // U+0304 COMBINING MACRON
 			0x0306, // U+0306 COMBINING BREVE
 			0x0308, // U+0308 COMBINING DIAERESIS
 			0x0313, // U+0313 COMBINING COMMA ABOVE
 			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
 			0x0342, // U+0342 COMBINING GREEK PERISPOMENI
 			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
 			// No-op. Gobble the modifier.

 		default:
 			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
 			case cccZero:
 				c.unreadRune()
 				return true

 			// We don't need to test for IotaSubscript as the only rune that
 			// qualifies (U+0345) was already excluded in the switch statement
 			// above. See A.4.

 			case cccAbove:
 				return c.copy()
 			default:
 				// Some other modifier. We're still allowed to gobble Greek
 				// modifiers after this.
 				c.copy()
 			}
 		}
 	}
 	return i == maxIgnorable
 }

 func ltLower(c *context) bool {
 	// From CLDR:
 	// # Introduce an explicit dot above when lowercasing capital I's and J's
 	// # whenever there are more accents above.
 	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
 	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
 	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
 	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
 	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
 	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
 	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
 	// ::NFD();
 	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
 	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
 	// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
 	// Ì → i \u0307 \u0300;
 	// Í → i \u0307 \u0301;
 	// Ĩ → i \u0307 \u0303;
 	// ::Any-Lower();
 	// ::NFC();

 	i := 0
 	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
 		lower(c)
 		if r != 'I' && r != 'J' {
 			return true
 		}
 	} else {
 		p := norm.NFD.Properties(c.src[c.pSrc:])
 		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
 			// UTF-8 optimization: the decomposition will only have an above
 			// modifier if the last rune of the decomposition is in [U+300-U+311].
 			// In all other cases, a decomposition starting with I is always
 			// an I followed by modifiers that are not cased themselves. See A.2.
 			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
 				if !c.writeBytes(d[:1]) {
 					return false
 				}
 				c.dst[c.pDst-1] += 'a' - 'A' // lower

 				// Assumption: modifier never changes on lowercase. See A.1.
 				// Assumption: all modifiers added have CCC = Above. See A.2.3.
 				return c.writeString("\u0307") && c.writeBytes(d[1:])
 			}
 			// In all other cases the additional modifiers will have a CCC
 			// that is less than 230 (Above). We will insert the U+0307, if
 			// needed, after these modifiers so that a string in FCD form
 			// will remain so. See A.2.2.
 			lower(c)
 			i = 1
 		} else {
 			return lower(c)
 		}
 	}

 	for ; i < maxIgnorable && c.next(); i++ {
 		switch c.info.cccType() {
 		case cccZero:
 			c.unreadRune()
 			return true
 		case cccAbove:
 			return c.writeString("\u0307") && c.copy() // See A.1.
 		default:
 			c.copy() // See A.1.
 		}
 	}
 	return i == maxIgnorable
 }

 func ltUpper(f mapFunc) mapFunc {
 	return func(c *context) bool {
 		// From CLDR:
 		// ::NFD();
 		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
 		// ::Any-Upper();
 		// ::NFC();

 		// TODO: See A.5. A soft-dotted rune never has an exception. This would
 		// allow us to overload the exception bit and encode this property in
 		// info. Need to measure performance impact of this.
 		r, _ := utf8.DecodeRune(c.src[c.pSrc:])
 		oldPDst := c.pDst
 		if !f(c) {
 			return false
 		}
 		if !unicode.Is(unicode.Soft_Dotted, r) {
 			return true
 		}

 		// We don't need to do an NFD normalization, as a soft-dotted rune never
 		// contains U+0307. See A.3.

 		i := 0
 		for ; i < maxIgnorable && c.next(); i++ {
 			switch c.info.cccType() {
 			case cccZero:
 				c.unreadRune()
 				return true
 			case cccAbove:
 				if c.hasPrefix("\u0307") {
 					// We don't do a full NFC, but rather combine runes for
 					// some of the common cases. (Returning NFC or
 					// preserving normal form is neither a requirement nor
 					// a possibility anyway).
 					if !c.next() {
 						return false
 					}
 					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
 						s := ""
 						switch c.src[c.pSrc+1] {
 						case 0x80: // U+0300 COMBINING GRAVE ACCENT
 							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
 						case 0x81: // U+0301 COMBINING ACUTE ACCENT
 							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
 						case 0x83: // U+0303 COMBINING TILDE
 							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
 						case 0x88: // U+0308 COMBINING DIAERESIS
 							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
 						default:
 						}
 						if s != "" {
 							c.pDst = oldPDst
 							return c.writeString(s)
 						}
 					}
 				}
 				return c.copy()
 			default:
 				c.copy()
 			}
 		}
 		return i == maxIgnorable
 	}
 }

 func aztrUpper(f mapFunc) mapFunc {
 	return func(c *context) bool {
 		// i→İ;
 		if c.src[c.pSrc] == 'i' {
 			return c.writeString("İ")
 		}
 		return f(c)
 	}
 }

 func aztrLower(c *context) (done bool) {
 	// From CLDR:
 	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 	// İ→i;
 	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 	// # This matches the behavior of the canonically equivalent I-dot_above
 	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
 	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
 	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
 	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
 	// I→ı ;
 	// ::Any-Lower();
 	if c.hasPrefix("\u0130") { // İ
 		return c.writeString("i")
 	}
 	if c.src[c.pSrc] != 'I' {
 		return lower(c)
 	}

 	// We ignore the lower-case I for now, but insert it later when we know
 	// which form we need.
 	start := c.pSrc + c.sz

 	i := 0
 Loop:
 	// We check for up to n ignorables before \u0307. As \u0307 is an
 	// ignorable as well, n is maxIgnorable-1.
 	for ; i < maxIgnorable && c.next(); i++ {
 		switch c.info.cccType() {
 		case cccAbove:
 			if c.hasPrefix("\u0307") {
 				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
 			}
 			done = true
 			break Loop
 		case cccZero:
 			c.unreadRune()
 			done = true
 			break Loop
 		default:
 			// We'll write this rune after we know which starter to use.
 		}
 	}
 	if i == maxIgnorable {
 		done = true
 	}
 	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
 }

 func nlTitle(c *context) bool {
 	// From CLDR:
 	// # Special titlecasing for Dutch initial "ij".
 	// ::Any-Title();
 	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
 	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
 	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
 		return title(c)
 	}

 	if !c.writeString("I") || !c.next() {
 		return false
 	}
 	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
 		return c.writeString("J")
 	}
 	c.unreadRune()
 	return true
 }

 // Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
 func afnlRewrite(c *context) {
 	if c.hasPrefix("'") || c.hasPrefix("’") {
 		c.isMidWord = true
 	}
 }
	// Copyright 2014 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package cases

	// This file contains the definitions of case mappings for all supported
	// languages. The rules for the language-specific tailorings were taken and
	// modified from the CLDR transform definitions in common/transforms.

	import (
	"strings"
	"unicode"
	"unicode/utf8"

	"golang.org/x/text/language"
	"golang.org/x/text/transform"
	"golang.org/x/text/unicode/norm"
	)

	// A mapFunc takes a context set to the current rune and writes the mapped
	// version to the same context. It may advance the context to the next rune. It
	// returns whether a checkpoint is possible: whether the pDst bytes written to
	// dst so far won't need changing as we see more source bytes.
	type mapFunc func(*context) bool

	// maxIgnorable defines the maximum number of ignorables to consider for
	// lookahead operations.
	const maxIgnorable = 30

	// supported lists the language tags for which we have tailorings.
	const supported = "und af az el lt nl tr"

	func init() {
	tags := []language.Tag{}
	for _, s := range strings.Split(supported, " ") {
	tags = append(tags, language.MustParse(s))
	}
	matcher = language.NewMatcher(tags)
	Supported = language.NewCoverage(tags)
	}

	var (
	matcher language.Matcher

	Supported language.Coverage

	// We keep the following lists separate, instead of having a single per-
	// language struct, to give the compiler a chance to remove unused code.

	// Some uppercase mappers are stateless, so we can precompute the
	// Transformers and save a bit on runtime allocations.
	upperFunc = []mapFunc{
	nil, // und
	nil, // af
	aztrUpper(upper), // az
	elUpper, // el
	ltUpper(upper), // lt
	nil, // nl
	aztrUpper(upper), // tr
	}

	undUpper transform.Transformer = &undUpperCaser{}

	lowerFunc = []mapFunc{
	lower, // und
	lower, // af
	aztrLower, // az
	lower, // el
	ltLower, // lt
	lower, // nl
	aztrLower, // tr
	}

	titleInfos = []struct {
	title, lower mapFunc
	rewrite func(*context)
	}{
	{title, lower, nil}, // und
	{title, lower, afnlRewrite}, // af
	{aztrUpper(title), aztrLower, nil}, // az
	{title, lower, nil}, // el
	{ltUpper(title), ltLower, nil}, // lt
	{nlTitle, lower, afnlRewrite}, // nl
	{aztrUpper(title), aztrLower, nil}, // tr
	}
	)

	func makeUpper(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	f := upperFunc[i]
	if f == nil {
	return undUpper
	}
	return &simpleCaser{f: f}
	}

	func makeLower(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	f := lowerFunc[i]
	if o.noFinalSigma {
	return &simpleCaser{f: f}
	}
	return &lowerCaser{
	first: f,
	midWord: finalSigma(f),
	}
	}

	func makeTitle(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	x := &titleInfos[i]
	lower := x.lower
	if o.noLower {
	lower = (*context).copy
	} else if !o.noFinalSigma {
	lower = finalSigma(lower)
	}
	return &titleCaser{
	title: x.title,
	lower: lower,
	rewrite: x.rewrite,
	}
	}

	// TODO: consider a similar special case for the fast majority lower case. This
	// is a bit more involved so will require some more precise benchmarking to
	// justify it.

	type undUpperCaser struct{ transform.NopResetter }

	// undUpperCaser implements the Transformer interface for doing an upper case
	// mapping for the root locale (und). It eliminates the need for an allocation
	// as it prevents escaping by not using function pointers.
	func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}
	for c.next() {
	upper(&c)
	c.checkpoint()
	}
	return c.ret()
	}

	type simpleCaser struct {
	context
	f mapFunc
	}

	// simpleCaser implements the Transformer interface for doing a case operation
	// on a rune-by-rune basis.
	func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF}
	c := &t.context
	for c.next() && t.f(c) {
	c.checkpoint()
	}
	return c.ret()
	}

	// lowerCaser implements the Transformer interface. The default Unicode lower
	// casing requires different treatment for the first and subsequent characters
	// of a word, most notably to handle the Greek final Sigma.
	type lowerCaser struct {
	context

	first, midWord mapFunc
	}

	func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF}
	c := &t.context

	for isInterWord := true; c.next(); {
	if isInterWord {
	if c.info.isCased() {
	if !t.first(c) {
	break
	}
	isInterWord = false
	} else if !c.copy() {
	break
	}
	} else {
	if c.info.isNotCasedAndNotCaseIgnorable() {
	if !c.copy() {
	break
	}
	isInterWord = true
	} else if !t.midWord(c) {
	break
	}
	}
	c.checkpoint()
	}
	return c.ret()
	}

	// titleCaser implements the Transformer interface. Title casing algorithms
	// distinguish between the first letter of a word and subsequent letters of the
	// same word. It uses state to avoid requiring a potentially infinite lookahead.
	type titleCaser struct {
	context

	// rune mappings used by the actual casing algorithms.
	title, lower mapFunc

	rewrite func(*context)
	}

	// Transform implements the standard Unicode title case algorithm as defined in
	// Chapter 3 of The Unicode Standard:
	// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
	// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
	// first cased character F following the word boundary. If F exists, map F to
	// Titlecase_Mapping(F); then map all characters C between F and the following
	// word boundary to Lowercase_Mapping(C).
	func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
	c := &t.context

	if !c.next() {
	return c.ret()
	}

	for {
	p := c.info
	if t.rewrite != nil {
	t.rewrite(c)
	}

	wasMid := p.isCaseIgnorableAndNonBreakStarter()
	// Break out of this loop on failure to ensure we do not modify the
	// state incorrectly.
	if p.isCased() && !p.isCaseIgnorableAndNotCased() {
	if !c.isMidWord {
	if !t.title(c) {
	break
	}
	c.isMidWord = true
	} else if !t.lower(c) {
	break
	}
	} else if !c.copy() {
	break
	}

	// TODO: make this an "else if" if we can prove that no rune that does
	// not match the first condition of the if statement can be a break.
	if p.isBreak() {
	c.isMidWord = false
	}

	// As we save the state of the transformer, it is safe to call
	// checkpoint after any successful write.
	c.checkpoint()

	if !c.next() {
	break
	}
	if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
	c.isMidWord = false
	}
	}
	return c.ret()
	}

	// lower writes the lowercase version of the current rune to dst.
	func lower(c *context) bool {
	if c.info&hasMappingMask == 0 \|\| c.caseType() == cLower {
	return c.copy()
	}
	if c.info&exceptionBit == 0 {
	return c.copyXOR()
	}
	e := exceptions[c.info>>exceptionShift+1:]
	if nLower := (e[0] >> lengthBits) & lengthMask; nLower != noChange {
	return c.writeString(e[1 : 1+nLower])
	}
	return c.copy()
	}

	// upper writes the uppercase version of the current rune to dst.
	func upper(c *context) bool {
	ct := c.caseType()
	if c.info&hasMappingMask == 0 \|\| ct == cUpper {
	return c.copy()
	}
	if c.info&exceptionBit == 0 {
	return c.copyXOR()
	}
	e := exceptions[c.info>>exceptionShift+1:]
	// Get length of first special case mapping.
	n := (e[0] >> lengthBits) & lengthMask
	if ct == cTitle {
	// The first special case mapping is for lower. Set n to the second.
	if n == noChange {
	n = 0
	}
	n, e = e[0]&lengthMask, e[n:]
	}
	if n != noChange {
	return c.writeString(e[1 : 1+n])
	}
	return c.copy()
	}

	// title writes the title case version of the current rune to dst.
	func title(c *context) bool {
	ct := c.caseType()
	if c.info&hasMappingMask == 0 \|\| ct == cTitle {
	return c.copy()
	}
	if c.info&exceptionBit == 0 {
	if ct == cLower {
	return c.copyXOR()
	}
	return c.copy()
	}
	// Get the exception data.
	e := exceptions[c.info>>exceptionShift+1:]

	nFirst := (e[0] >> lengthBits) & lengthMask
	if nTitle := e[0] & lengthMask; nTitle != noChange {
	if nFirst != noChange {
	e = e[nFirst:]
	}
	return c.writeString(e[1 : 1+nTitle])
	}
	if ct == cLower && nFirst != noChange {
	// Use the uppercase version instead.
	return c.writeString(e[1 : 1+nFirst])
	}
	// Already in correct case.
	return c.copy()
	}

	// finalSigma adds Greek final Sigma handing to another casing function. It
	// determines whether a lowercased sigma should be σ or ς, by looking ahead for
	// case-ignorables and a cased letters.
	func finalSigma(f mapFunc) mapFunc {
	return func(c *context) bool {
	// ::NFD();
	// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
	// Σ } [:case-ignorable:]* [:cased:] → σ;
	// [:cased:] [:case-ignorable:]* { Σ → ς;
	// ::Any-Lower;
	// ::NFC();

	if !c.hasPrefix("Σ") {
	return f(c)
	}

	p := c.pDst
	c.writeString("ς")
	// We need to do one more iteration after maxIgnorable, as a cased
	// letter is not an ignorable and may modify the result.
	for i := 0; i < maxIgnorable+1; i++ {
	if !c.next() {
	return false
	}
	if !c.info.isCaseIgnorable() {
	if c.info.isCased() {
	// p+1 is guaranteed to be in bounds: if writing ς was
	// successful, p+1 will contain the second byte of ς. If not,
	// this function will have returned after c.next returned false.
	c.dst[p+1]++ // ς → σ
	}
	c.unreadRune()
	return true
	}
	// A case ignorable may also introduce a word break, so we may need
	// to continue searching even after detecting a break.
	c.isMidWord = c.isMidWord && !c.info.isBreak()
	c.copy()
	}
	return true
	}
	}

	// elUpper implements Greek upper casing, which entails removing a predefined
	// set of non-blocked modifiers. Note that these accents should not be removed
	// for title casing!
	// Example: "Οδός" -> "ΟΔΟΣ".
	func elUpper(c *context) bool {
	// From CLDR:
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;

	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
	oldPDst := c.pDst
	if !upper(c) {
	return false
	}
	if !unicode.Is(unicode.Greek, r) {
	return true
	}
	i := 0
	// Take the properties of the uppercased rune that is already written to the
	// destination. This saves us the trouble of having to uppercase the
	// decomposed rune again.
	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
	// Restore the destination position and process the decomposed rune.
	r, sz := utf8.DecodeRune(b)
	if r <= 0xFF { // See A.6.1
	return true
	}
	c.pDst = oldPDst
	// Insert the first rune and ignore the modifiers. See A.6.2.
	c.writeBytes(b[:sz])
	i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
	}

	for ; i < maxIgnorable && c.next(); i++ {
	switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
	// Above and Iota Subscript
	case 0x0300, // U+0300 COMBINING GRAVE ACCENT
	0x0301, // U+0301 COMBINING ACUTE ACCENT
	0x0304, // U+0304 COMBINING MACRON
	0x0306, // U+0306 COMBINING BREVE
	0x0308, // U+0308 COMBINING DIAERESIS
	0x0313, // U+0313 COMBINING COMMA ABOVE
	0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
	0x0342, // U+0342 COMBINING GREEK PERISPOMENI
	0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
	// No-op. Gobble the modifier.

	default:
	switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
	case cccZero:
	c.unreadRune()
	return true

	// We don't need to test for IotaSubscript as the only rune that
	// qualifies (U+0345) was already excluded in the switch statement
	// above. See A.4.

	case cccAbove:
	return c.copy()
	default:
	// Some other modifier. We're still allowed to gobble Greek
	// modifiers after this.
	c.copy()
	}
	}
	}
	return i == maxIgnorable
	}

	func ltLower(c *context) bool {
	// From CLDR:
	// # Introduce an explicit dot above when lowercasing capital I's and J's
	// # whenever there are more accents above.
	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
	// ::NFD();
	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
	// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
	// Ì → i \u0307 \u0300;
	// Í → i \u0307 \u0301;
	// Ĩ → i \u0307 \u0303;
	// ::Any-Lower();
	// ::NFC();

	i := 0
	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
	lower(c)
	if r != 'I' && r != 'J' {
	return true
	}
	} else {
	p := norm.NFD.Properties(c.src[c.pSrc:])
	if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' \|\| d[0] == 'J') {
	// UTF-8 optimization: the decomposition will only have an above
	// modifier if the last rune of the decomposition is in [U+300-U+311].
	// In all other cases, a decomposition starting with I is always
	// an I followed by modifiers that are not cased themselves. See A.2.
	if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
	if !c.writeBytes(d[:1]) {
	return false
	}
	c.dst[c.pDst-1] += 'a' - 'A' // lower

	// Assumption: modifier never changes on lowercase. See A.1.
	// Assumption: all modifiers added have CCC = Above. See A.2.3.
	return c.writeString("\u0307") && c.writeBytes(d[1:])
	}
	// In all other cases the additional modifiers will have a CCC
	// that is less than 230 (Above). We will insert the U+0307, if
	// needed, after these modifiers so that a string in FCD form
	// will remain so. See A.2.2.
	lower(c)
	i = 1
	} else {
	return lower(c)
	}
	}

	for ; i < maxIgnorable && c.next(); i++ {
	switch c.info.cccType() {
	case cccZero:
	c.unreadRune()
	return true
	case cccAbove:
	return c.writeString("\u0307") && c.copy() // See A.1.
	default:
	c.copy() // See A.1.
	}
	}
	return i == maxIgnorable
	}

	func ltUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
	// From CLDR:
	// ::NFD();
	// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
	// ::Any-Upper();
	// ::NFC();

	// TODO: See A.5. A soft-dotted rune never has an exception. This would
	// allow us to overload the exception bit and encode this property in
	// info. Need to measure performance impact of this.
	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
	oldPDst := c.pDst
	if !f(c) {
	return false
	}
	if !unicode.Is(unicode.Soft_Dotted, r) {
	return true
	}

	// We don't need to do an NFD normalization, as a soft-dotted rune never
	// contains U+0307. See A.3.

	i := 0
	for ; i < maxIgnorable && c.next(); i++ {
	switch c.info.cccType() {
	case cccZero:
	c.unreadRune()
	return true
	case cccAbove:
	if c.hasPrefix("\u0307") {
	// We don't do a full NFC, but rather combine runes for
	// some of the common cases. (Returning NFC or
	// preserving normal form is neither a requirement nor
	// a possibility anyway).
	if !c.next() {
	return false
	}
	if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
	s := ""
	switch c.src[c.pSrc+1] {
	case 0x80: // U+0300 COMBINING GRAVE ACCENT
	s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
	case 0x81: // U+0301 COMBINING ACUTE ACCENT
	s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
	case 0x83: // U+0303 COMBINING TILDE
	s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
	case 0x88: // U+0308 COMBINING DIAERESIS
	s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
	default:
	}
	if s != "" {
	c.pDst = oldPDst
	return c.writeString(s)
	}
	}
	}
	return c.copy()
	default:
	c.copy()
	}
	}
	return i == maxIgnorable
	}
	}

	func aztrUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
	// i→İ;
	if c.src[c.pSrc] == 'i' {
	return c.writeString("İ")
	}
	return f(c)
	}
	}

	func aztrLower(c *context) (done bool) {
	// From CLDR:
	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	// İ→i;
	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	// # This matches the behavior of the canonically equivalent I-dot_above
	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
	// I→ı ;
	// ::Any-Lower();
	if c.hasPrefix("\u0130") { // İ
	return c.writeString("i")
	}
	if c.src[c.pSrc] != 'I' {
	return lower(c)
	}

	// We ignore the lower-case I for now, but insert it later when we know
	// which form we need.
	start := c.pSrc + c.sz

	i := 0
	Loop:
	// We check for up to n ignorables before \u0307. As \u0307 is an
	// ignorable as well, n is maxIgnorable-1.
	for ; i < maxIgnorable && c.next(); i++ {
	switch c.info.cccType() {
	case cccAbove:
	if c.hasPrefix("\u0307") {
	return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
	}
	done = true
	break Loop
	case cccZero:
	c.unreadRune()
	done = true
	break Loop
	default:
	// We'll write this rune after we know which starter to use.
	}
	}
	if i == maxIgnorable {
	done = true
	}
	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
	}

	func nlTitle(c *context) bool {
	// From CLDR:
	// # Special titlecasing for Dutch initial "ij".
	// ::Any-Title();
	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
	return title(c)
	}

	if !c.writeString("I") \|\| !c.next() {
	return false
	}
	if c.src[c.pSrc] == 'j' \|\| c.src[c.pSrc] == 'J' {
	return c.writeString("J")
	}
	c.unreadRune()
	return true
	}

	// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
	func afnlRewrite(c *context) {
	if c.hasPrefix("'") \|\| c.hasPrefix("’") {
	c.isMidWord = true
	}
	}