collate/collate.go - text - Git at Google

 // Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package collate contains types for comparing and sorting Unicode strings
 // according to a given collation order.  Package locale provides a high-level
 // interface to collation. Users should typically use that package instead.
 package collate

 import (
 	"bytes"
 	"strings"

 	"code.google.com/p/go.text/collate/colltab"
 	"code.google.com/p/go.text/language"
 	"code.google.com/p/go.text/unicode/norm"
 )

 // AlternateHandling identifies the various ways in which variables are handled.
 // A rune with a primary weight lower than the variable top is considered a
 // variable.
 // See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
 type AlternateHandling int

 const (
 	// AltNonIgnorable turns off special handling of variables.
 	AltNonIgnorable AlternateHandling = iota

 	// AltBlanked sets variables and all subsequent primary ignorables to be
 	// ignorable at all levels. This is identical to removing all variables
 	// and subsequent primary ignorables from the input.
 	AltBlanked

 	// AltShifted sets variables to be ignorable for levels one through three and
 	// adds a fourth level based on the values of the ignored levels.
 	AltShifted

 	// AltShiftTrimmed is a slight variant of AltShifted that is used to
 	// emulate POSIX.
 	AltShiftTrimmed
 )

 // Collator provides functionality for comparing strings for a given
 // collation order.
 type Collator struct {
 	// TODO: hide most of these options. Low-level options are set through the locale
 	// identifier (as defined by LDML) while high-level options are set through SetOptions.
 	// Using high-level options allows us to be more flexible (such as not ignoring
 	// Thai vowels for IgnoreDiacriticals) and more user-friendly (such as allowing
 	// diacritical marks to be ignored but not case without having to fiddle with levels).

 	// Strength sets the maximum level to use in comparison.
 	Strength colltab.Level

 	// Alternate specifies an alternative handling of variables.
 	Alternate AlternateHandling

 	// Backwards specifies the order of sorting at the secondary level.
 	// This option exists predominantly to support reverse sorting of accents in French.
 	Backwards bool

 	// TODO: implement:
 	// With HiraganaQuaternary enabled, Hiragana codepoints will get lower values
 	// than all the other non-variable code points. Strength must be greater or
 	// equal to Quaternary for this to take effect.
 	HiraganaQuaternary bool

 	// If CaseLevel is true, a level consisting only of case characteristics will
 	// be inserted in front of the tertiary level.  To ignore accents but take
 	// cases into account, set Strength to Primary and CaseLevel to true.
 	CaseLevel bool

 	// If Numeric is true, any sequence of decimal digits (category is Nd) is sorted
 	// at a primary level with its numeric value.  For example, "A-21" < "A-123".
 	Numeric bool

 	// The largest primary value that is considered to be variable.
 	variableTop uint32

 	f norm.Form

 	t colltab.Weigher

 	sorter sorter

 	_iter [2]iter
 }

 // An Option is used to change the behavior of Collator.  They override the
 // settings passed through the locale identifier.
 type Option int

 const (
 	Numeric          Option = 1 << iota // Sort numbers numerically ("2" < "12").
 	IgnoreCase                          // Case-insensitive search.
 	IgnoreDiacritics                    // Ignore diacritical marks. ("o" == "ö").
 	IgnoreWidth                         // Ignore full versus normal width.
 	UpperFirst                          // Sort upper case before lower case.
 	LowerFirst                          // Sort lower case before upper case.
 	Force                               // Force ordering if strings are equivalent but not equal.

 	Loose = IgnoreDiacritics | IgnoreWidth | IgnoreCase
 )

 // SetOptions accepts a Options or-ed together.  All previous calls to SetOptions are ignored.
 func (c *Collator) SetOptions(o Option) {
 	// TODO: implement
 }

 func (c *Collator) iter(i int) *iter {
 	// TODO: evaluate performance for making the second iterator optional.
 	return &c._iter[i]
 }

 // Supported returns the list of languages for which collating differs from its parent.
 func Supported() []language.Tag {
 	ids := strings.Split(availableLocales, ",")
 	tags := make([]language.Tag, len(ids))
 	for i, s := range ids {
 		tags[i] = language.Make(s)
 	}
 	return tags
 }

 var matcher = language.NewMatcher(Supported())

 // New returns a new Collator initialized for the given locale.
 func New(t language.Tag) *Collator {
 	_, index, _ := matcher.Match(t)
 	return NewFromTable(colltab.Init(locales[index]))
 }

 func NewFromTable(t colltab.Weigher) *Collator {
 	c := &Collator{
 		Strength: colltab.Tertiary,
 		f:        norm.NFD,
 		t:        t,
 	}
 	c._iter[0].init(c)
 	c._iter[1].init(c)
 	c.variableTop = t.Top()
 	return c
 }

 // Buffer holds keys generated by Key and KeyString.
 type Buffer struct {
 	buf [4096]byte
 	key []byte
 }

 func (b *Buffer) init() {
 	if b.key == nil {
 		b.key = b.buf[:0]
 	}
 }

 // Reset clears the buffer from previous results generated by Key and KeyString.
 func (b *Buffer) Reset() {
 	b.key = b.key[:0]
 }

 // Compare returns an integer comparing the two byte slices.
 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
 func (c *Collator) Compare(a, b []byte) int {
 	// TODO: skip identical prefixes once we have a fast way to detect if a rune is
 	// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
 	c.iter(0).setInput(a)
 	c.iter(1).setInput(b)
 	if res := c.compare(); res != 0 {
 		return res
 	}
 	if colltab.Identity == c.Strength {
 		return bytes.Compare(a, b)
 	}
 	return 0
 }

 // CompareString returns an integer comparing the two strings.
 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
 func (c *Collator) CompareString(a, b string) int {
 	// TODO: skip identical prefixes once we have a fast way to detect if a rune is
 	// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
 	c.iter(0).setInputString(a)
 	c.iter(1).setInputString(b)
 	if res := c.compare(); res != 0 {
 		return res
 	}
 	if colltab.Identity == c.Strength {
 		if a < b {
 			return -1
 		} else if a > b {
 			return 1
 		}
 	}
 	return 0
 }

 func compareLevel(f func(i *iter) int, a, b *iter) int {
 	a.pce = 0
 	b.pce = 0
 	for {
 		va := f(a)
 		vb := f(b)
 		if va != vb {
 			if va < vb {
 				return -1
 			}
 			return 1
 		} else if va == 0 {
 			break
 		}
 	}
 	return 0
 }

 func (c *Collator) compare() int {
 	ia, ib := c.iter(0), c.iter(1)
 	// Process primary level
 	if c.Alternate != AltShifted {
 		// TODO: implement script reordering
 		// TODO: special hiragana handling
 		if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
 			return res
 		}
 	} else {
 		// TODO: handle shifted
 	}
 	if colltab.Secondary <= c.Strength {
 		f := (*iter).nextSecondary
 		if c.Backwards {
 			f = (*iter).prevSecondary
 		}
 		if res := compareLevel(f, ia, ib); res != 0 {
 			return res
 		}
 	}
 	// TODO: special case handling (Danish?)
 	if colltab.Tertiary <= c.Strength || c.CaseLevel {
 		if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
 			return res
 		}
 		// TODO: Not needed for the default value of AltNonIgnorable?
 		if colltab.Quaternary <= c.Strength {
 			if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
 				return res
 			}
 		}
 	}
 	return 0
 }

 // Key returns the collation key for str.
 // Passing the buffer buf may avoid memory allocations.
 // The returned slice will point to an allocation in Buffer and will remain
 // valid until the next call to buf.Reset().
 func (c *Collator) Key(buf *Buffer, str []byte) []byte {
 	// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
 	buf.init()
 	return c.key(buf, c.getColElems(str))
 }

 // KeyFromString returns the collation key for str.
 // Passing the buffer buf may avoid memory allocations.
 // The returned slice will point to an allocation in Buffer and will retain
 // valid until the next call to buf.ResetKeys().
 func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
 	// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
 	buf.init()
 	return c.key(buf, c.getColElemsString(str))
 }

 func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
 	processWeights(c.Alternate, c.t.Top(), w)
 	kn := len(buf.key)
 	c.keyFromElems(buf, w)
 	return buf.key[kn:]
 }

 func (c *Collator) getColElems(str []byte) []colltab.Elem {
 	i := c.iter(0)
 	i.setInput(str)
 	for i.next() {
 	}
 	return i.ce
 }

 func (c *Collator) getColElemsString(str string) []colltab.Elem {
 	i := c.iter(0)
 	i.setInputString(str)
 	for i.next() {
 	}
 	return i.ce
 }

 type iter struct {
 	bytes []byte
 	str   string

 	wa  [512]colltab.Elem
 	ce  []colltab.Elem
 	pce int
 	nce int // nce <= len(nce)

 	prevCCC  uint8
 	pStarter int

 	t colltab.Weigher
 }

 func (i *iter) init(c *Collator) {
 	i.t = c.t
 	i.ce = i.wa[:0]
 }

 func (i *iter) reset() {
 	i.ce = i.ce[:0]
 	i.nce = 0
 	i.prevCCC = 0
 	i.pStarter = 0
 }

 func (i *iter) setInput(s []byte) *iter {
 	i.bytes = s
 	i.str = ""
 	i.reset()
 	return i
 }

 func (i *iter) setInputString(s string) *iter {
 	i.str = s
 	i.bytes = nil
 	i.reset()
 	return i
 }

 func (i *iter) done() bool {
 	return len(i.str) == 0 && len(i.bytes) == 0
 }

 func (i *iter) tail(n int) {
 	if i.bytes == nil {
 		i.str = i.str[n:]
 	} else {
 		i.bytes = i.bytes[n:]
 	}
 }

 func (i *iter) appendNext() int {
 	var sz int
 	if i.bytes == nil {
 		i.ce, sz = i.t.AppendNextString(i.ce, i.str)
 	} else {
 		i.ce, sz = i.t.AppendNext(i.ce, i.bytes)
 	}
 	return sz
 }

 // next appends Elems to the internal array until it adds an element with CCC=0.
 // In the majority of cases, a Elem with a primary value > 0 will have
 // a CCC of 0. The CCC values of colation elements are also used to detect if the
 // input string was not normalized and to adjust the result accordingly.
 func (i *iter) next() bool {
 	for !i.done() {
 		p0 := len(i.ce)
 		sz := i.appendNext()
 		i.tail(sz)
 		last := len(i.ce) - 1
 		if ccc := i.ce[last].CCC(); ccc == 0 {
 			i.nce = len(i.ce)
 			i.pStarter = last
 			i.prevCCC = 0
 			return true
 		} else if p0 < last && i.ce[p0].CCC() == 0 {
 			// set i.nce to only cover part of i.ce for which ccc == 0 and
 			// use rest the next call to next.
 			for p0++; p0 < last && i.ce[p0].CCC() == 0; p0++ {
 			}
 			i.nce = p0
 			i.pStarter = p0 - 1
 			i.prevCCC = ccc
 			return true
 		} else if ccc < i.prevCCC {
 			i.doNorm(p0, ccc) // should be rare for most common cases
 		} else {
 			i.prevCCC = ccc
 		}
 	}
 	if len(i.ce) != i.nce {
 		i.nce = len(i.ce)
 		return true
 	}
 	return false
 }

 // nextPlain is the same as next, but does not "normalize" the collation
 // elements.
 // TODO: remove this function. Using this instead of next does not seem
 // to improve performance in any significant way. We retain this until
 // later for evaluation purposes.
 func (i *iter) nextPlain() bool {
 	if i.done() {
 		return false
 	}
 	sz := i.appendNext()
 	i.tail(sz)
 	i.nce = len(i.ce)
 	return true
 }

 const maxCombiningCharacters = 30

 // doNorm reorders the collation elements in i.ce.
 // It assumes that blocks of collation elements added with appendNext
 // either start and end with the same CCC or start with CCC == 0.
 // This allows for a single insertion point for the entire block.
 // The correctness of this assumption is verified in builder.go.
 func (i *iter) doNorm(p int, ccc uint8) {
 	if p-i.pStarter > maxCombiningCharacters {
 		i.prevCCC = i.ce[len(i.ce)-1].CCC()
 		i.pStarter = len(i.ce) - 1
 		return
 	}
 	n := len(i.ce)
 	k := p
 	for p--; p > i.pStarter && ccc < i.ce[p-1].CCC(); p-- {
 	}
 	i.ce = append(i.ce, i.ce[p:k]...)
 	copy(i.ce[p:], i.ce[k:])
 	i.ce = i.ce[:n]
 }

 func (i *iter) nextPrimary() int {
 	for {
 		for ; i.pce < i.nce; i.pce++ {
 			if v := i.ce[i.pce].Primary(); v != 0 {
 				i.pce++
 				return v
 			}
 		}
 		if !i.next() {
 			return 0
 		}
 	}
 	panic("should not reach here")
 }

 func (i *iter) nextSecondary() int {
 	for ; i.pce < len(i.ce); i.pce++ {
 		if v := i.ce[i.pce].Secondary(); v != 0 {
 			i.pce++
 			return v
 		}
 	}
 	return 0
 }

 func (i *iter) prevSecondary() int {
 	for ; i.pce < len(i.ce); i.pce++ {
 		if v := i.ce[len(i.ce)-i.pce-1].Secondary(); v != 0 {
 			i.pce++
 			return v
 		}
 	}
 	return 0
 }

 func (i *iter) nextTertiary() int {
 	for ; i.pce < len(i.ce); i.pce++ {
 		if v := i.ce[i.pce].Tertiary(); v != 0 {
 			i.pce++
 			return int(v)
 		}
 	}
 	return 0
 }

 func (i *iter) nextQuaternary() int {
 	for ; i.pce < len(i.ce); i.pce++ {
 		if v := i.ce[i.pce].Quaternary(); v != 0 {
 			i.pce++
 			return v
 		}
 	}
 	return 0
 }

 func appendPrimary(key []byte, p int) []byte {
 	// Convert to variable length encoding; supports up to 23 bits.
 	if p <= 0x7FFF {
 		key = append(key, uint8(p>>8), uint8(p))
 	} else {
 		key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
 	}
 	return key
 }

 // keyFromElems converts the weights ws to a compact sequence of bytes.
 // The result will be appended to the byte buffer in buf.
 func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
 	for _, v := range ws {
 		if w := v.Primary(); w > 0 {
 			buf.key = appendPrimary(buf.key, w)
 		}
 	}
 	if colltab.Secondary <= c.Strength {
 		buf.key = append(buf.key, 0, 0)
 		// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
 		if !c.Backwards {
 			for _, v := range ws {
 				if w := v.Secondary(); w > 0 {
 					buf.key = append(buf.key, uint8(w>>8), uint8(w))
 				}
 			}
 		} else {
 			for i := len(ws) - 1; i >= 0; i-- {
 				if w := ws[i].Secondary(); w > 0 {
 					buf.key = append(buf.key, uint8(w>>8), uint8(w))
 				}
 			}
 		}
 	} else if c.CaseLevel {
 		buf.key = append(buf.key, 0, 0)
 	}
 	if colltab.Tertiary <= c.Strength || c.CaseLevel {
 		buf.key = append(buf.key, 0, 0)
 		for _, v := range ws {
 			if w := v.Tertiary(); w > 0 {
 				buf.key = append(buf.key, uint8(w))
 			}
 		}
 		// Derive the quaternary weights from the options and other levels.
 		// Note that we represent MaxQuaternary as 0xFF. The first byte of the
 		// representation of a primary weight is always smaller than 0xFF,
 		// so using this single byte value will compare correctly.
 		if colltab.Quaternary <= c.Strength && c.Alternate >= AltShifted {
 			if c.Alternate == AltShiftTrimmed {
 				lastNonFFFF := len(buf.key)
 				buf.key = append(buf.key, 0)
 				for _, v := range ws {
 					if w := v.Quaternary(); w == colltab.MaxQuaternary {
 						buf.key = append(buf.key, 0xFF)
 					} else if w > 0 {
 						buf.key = appendPrimary(buf.key, w)
 						lastNonFFFF = len(buf.key)
 					}
 				}
 				buf.key = buf.key[:lastNonFFFF]
 			} else {
 				buf.key = append(buf.key, 0)
 				for _, v := range ws {
 					if w := v.Quaternary(); w == colltab.MaxQuaternary {
 						buf.key = append(buf.key, 0xFF)
 					} else if w > 0 {
 						buf.key = appendPrimary(buf.key, w)
 					}
 				}
 			}
 		}
 	}
 }

 func processWeights(vw AlternateHandling, top uint32, wa []colltab.Elem) {
 	ignore := false
 	vtop := int(top)
 	switch vw {
 	case AltShifted, AltShiftTrimmed:
 		for i := range wa {
 			if p := wa[i].Primary(); p <= vtop && p != 0 {
 				wa[i] = colltab.MakeQuaternary(p)
 				ignore = true
 			} else if p == 0 {
 				if ignore {
 					wa[i] = colltab.Ignore
 				}
 			} else {
 				ignore = false
 			}
 		}
 	case AltBlanked:
 		for i := range wa {
 			if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
 				wa[i] = colltab.Ignore
 				ignore = true
 			} else {
 				ignore = false
 			}
 		}
 	}
 }
	// Copyright 2012 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package collate contains types for comparing and sorting Unicode strings
	// according to a given collation order. Package locale provides a high-level
	// interface to collation. Users should typically use that package instead.
	package collate

	import (
	"bytes"
	"strings"

	"code.google.com/p/go.text/collate/colltab"
	"code.google.com/p/go.text/language"
	"code.google.com/p/go.text/unicode/norm"
	)

	// AlternateHandling identifies the various ways in which variables are handled.
	// A rune with a primary weight lower than the variable top is considered a
	// variable.
	// See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
	type AlternateHandling int

	const (
	// AltNonIgnorable turns off special handling of variables.
	AltNonIgnorable AlternateHandling = iota

	// AltBlanked sets variables and all subsequent primary ignorables to be
	// ignorable at all levels. This is identical to removing all variables
	// and subsequent primary ignorables from the input.
	AltBlanked

	// AltShifted sets variables to be ignorable for levels one through three and
	// adds a fourth level based on the values of the ignored levels.
	AltShifted

	// AltShiftTrimmed is a slight variant of AltShifted that is used to
	// emulate POSIX.
	AltShiftTrimmed
	)

	// Collator provides functionality for comparing strings for a given
	// collation order.
	type Collator struct {
	// TODO: hide most of these options. Low-level options are set through the locale
	// identifier (as defined by LDML) while high-level options are set through SetOptions.
	// Using high-level options allows us to be more flexible (such as not ignoring
	// Thai vowels for IgnoreDiacriticals) and more user-friendly (such as allowing
	// diacritical marks to be ignored but not case without having to fiddle with levels).

	// Strength sets the maximum level to use in comparison.
	Strength colltab.Level

	// Alternate specifies an alternative handling of variables.
	Alternate AlternateHandling

	// Backwards specifies the order of sorting at the secondary level.
	// This option exists predominantly to support reverse sorting of accents in French.
	Backwards bool

	// TODO: implement:
	// With HiraganaQuaternary enabled, Hiragana codepoints will get lower values
	// than all the other non-variable code points. Strength must be greater or
	// equal to Quaternary for this to take effect.
	HiraganaQuaternary bool

	// If CaseLevel is true, a level consisting only of case characteristics will
	// be inserted in front of the tertiary level. To ignore accents but take
	// cases into account, set Strength to Primary and CaseLevel to true.
	CaseLevel bool

	// If Numeric is true, any sequence of decimal digits (category is Nd) is sorted
	// at a primary level with its numeric value. For example, "A-21" < "A-123".
	Numeric bool

	// The largest primary value that is considered to be variable.
	variableTop uint32

	f norm.Form

	t colltab.Weigher

	sorter sorter

	_iter [2]iter
	}

	// An Option is used to change the behavior of Collator. They override the
	// settings passed through the locale identifier.
	type Option int

	const (
	Numeric Option = 1 << iota // Sort numbers numerically ("2" < "12").
	IgnoreCase // Case-insensitive search.
	IgnoreDiacritics // Ignore diacritical marks. ("o" == "ö").
	IgnoreWidth // Ignore full versus normal width.
	UpperFirst // Sort upper case before lower case.
	LowerFirst // Sort lower case before upper case.
	Force // Force ordering if strings are equivalent but not equal.

	Loose = IgnoreDiacritics \| IgnoreWidth \| IgnoreCase
	)

	// SetOptions accepts a Options or-ed together. All previous calls to SetOptions are ignored.
	func (c *Collator) SetOptions(o Option) {
	// TODO: implement
	}

	func (c Collator) iter(i int) iter {
	// TODO: evaluate performance for making the second iterator optional.
	return &c._iter[i]
	}

	// Supported returns the list of languages for which collating differs from its parent.
	func Supported() []language.Tag {
	ids := strings.Split(availableLocales, ",")
	tags := make([]language.Tag, len(ids))
	for i, s := range ids {
	tags[i] = language.Make(s)
	}
	return tags
	}

	var matcher = language.NewMatcher(Supported())

	// New returns a new Collator initialized for the given locale.
	func New(t language.Tag) *Collator {
	_, index, _ := matcher.Match(t)
	return NewFromTable(colltab.Init(locales[index]))
	}

	func NewFromTable(t colltab.Weigher) *Collator {
	c := &Collator{
	Strength: colltab.Tertiary,
	f: norm.NFD,
	t: t,
	}
	c._iter[0].init(c)
	c._iter[1].init(c)
	c.variableTop = t.Top()
	return c
	}

	// Buffer holds keys generated by Key and KeyString.
	type Buffer struct {
	buf [4096]byte
	key []byte
	}

	func (b *Buffer) init() {
	if b.key == nil {
	b.key = b.buf[:0]
	}
	}

	// Reset clears the buffer from previous results generated by Key and KeyString.
	func (b *Buffer) Reset() {
	b.key = b.key[:0]
	}

	// Compare returns an integer comparing the two byte slices.
	// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
	func (c *Collator) Compare(a, b []byte) int {
	// TODO: skip identical prefixes once we have a fast way to detect if a rune is
	// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
	c.iter(0).setInput(a)
	c.iter(1).setInput(b)
	if res := c.compare(); res != 0 {
	return res
	}
	if colltab.Identity == c.Strength {
	return bytes.Compare(a, b)
	}
	return 0
	}

	// CompareString returns an integer comparing the two strings.
	// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
	func (c *Collator) CompareString(a, b string) int {
	// TODO: skip identical prefixes once we have a fast way to detect if a rune is
	// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
	c.iter(0).setInputString(a)
	c.iter(1).setInputString(b)
	if res := c.compare(); res != 0 {
	return res
	}
	if colltab.Identity == c.Strength {
	if a < b {
	return -1
	} else if a > b {
	return 1
	}
	}
	return 0
	}

	func compareLevel(f func(i iter) int, a, b iter) int {
	a.pce = 0
	b.pce = 0
	for {
	va := f(a)
	vb := f(b)
	if va != vb {
	if va < vb {
	return -1
	}
	return 1
	} else if va == 0 {
	break
	}
	}
	return 0
	}

	func (c *Collator) compare() int {
	ia, ib := c.iter(0), c.iter(1)
	// Process primary level
	if c.Alternate != AltShifted {
	// TODO: implement script reordering
	// TODO: special hiragana handling
	if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
	return res
	}
	} else {
	// TODO: handle shifted
	}
	if colltab.Secondary <= c.Strength {
	f := (*iter).nextSecondary
	if c.Backwards {
	f = (*iter).prevSecondary
	}
	if res := compareLevel(f, ia, ib); res != 0 {
	return res
	}
	}
	// TODO: special case handling (Danish?)
	if colltab.Tertiary <= c.Strength \|\| c.CaseLevel {
	if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
	return res
	}
	// TODO: Not needed for the default value of AltNonIgnorable?
	if colltab.Quaternary <= c.Strength {
	if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
	return res
	}
	}
	}
	return 0
	}

	// Key returns the collation key for str.
	// Passing the buffer buf may avoid memory allocations.
	// The returned slice will point to an allocation in Buffer and will remain
	// valid until the next call to buf.Reset().
	func (c Collator) Key(buf Buffer, str []byte) []byte {
	// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
	buf.init()
	return c.key(buf, c.getColElems(str))
	}

	// KeyFromString returns the collation key for str.
	// Passing the buffer buf may avoid memory allocations.
	// The returned slice will point to an allocation in Buffer and will retain
	// valid until the next call to buf.ResetKeys().
	func (c Collator) KeyFromString(buf Buffer, str string) []byte {
	// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
	buf.init()
	return c.key(buf, c.getColElemsString(str))
	}

	func (c Collator) key(buf Buffer, w []colltab.Elem) []byte {
	processWeights(c.Alternate, c.t.Top(), w)
	kn := len(buf.key)
	c.keyFromElems(buf, w)
	return buf.key[kn:]
	}

	func (c *Collator) getColElems(str []byte) []colltab.Elem {
	i := c.iter(0)
	i.setInput(str)
	for i.next() {
	}
	return i.ce
	}

	func (c *Collator) getColElemsString(str string) []colltab.Elem {
	i := c.iter(0)
	i.setInputString(str)
	for i.next() {
	}
	return i.ce
	}

	type iter struct {
	bytes []byte
	str string

	wa [512]colltab.Elem
	ce []colltab.Elem
	pce int
	nce int // nce <= len(nce)

	prevCCC uint8
	pStarter int

	t colltab.Weigher
	}

	func (i iter) init(c Collator) {
	i.t = c.t
	i.ce = i.wa[:0]
	}

	func (i *iter) reset() {
	i.ce = i.ce[:0]
	i.nce = 0
	i.prevCCC = 0
	i.pStarter = 0
	}

	func (i iter) setInput(s []byte) iter {
	i.bytes = s
	i.str = ""
	i.reset()
	return i
	}

	func (i iter) setInputString(s string) iter {
	i.str = s
	i.bytes = nil
	i.reset()
	return i
	}

	func (i *iter) done() bool {
	return len(i.str) == 0 && len(i.bytes) == 0
	}

	func (i *iter) tail(n int) {
	if i.bytes == nil {
	i.str = i.str[n:]
	} else {
	i.bytes = i.bytes[n:]
	}
	}

	func (i *iter) appendNext() int {
	var sz int
	if i.bytes == nil {
	i.ce, sz = i.t.AppendNextString(i.ce, i.str)
	} else {
	i.ce, sz = i.t.AppendNext(i.ce, i.bytes)
	}
	return sz
	}

	// next appends Elems to the internal array until it adds an element with CCC=0.
	// In the majority of cases, a Elem with a primary value > 0 will have
	// a CCC of 0. The CCC values of colation elements are also used to detect if the
	// input string was not normalized and to adjust the result accordingly.
	func (i *iter) next() bool {
	for !i.done() {
	p0 := len(i.ce)
	sz := i.appendNext()
	i.tail(sz)
	last := len(i.ce) - 1
	if ccc := i.ce[last].CCC(); ccc == 0 {
	i.nce = len(i.ce)
	i.pStarter = last
	i.prevCCC = 0
	return true
	} else if p0 < last && i.ce[p0].CCC() == 0 {
	// set i.nce to only cover part of i.ce for which ccc == 0 and
	// use rest the next call to next.
	for p0++; p0 < last && i.ce[p0].CCC() == 0; p0++ {
	}
	i.nce = p0
	i.pStarter = p0 - 1
	i.prevCCC = ccc
	return true
	} else if ccc < i.prevCCC {
	i.doNorm(p0, ccc) // should be rare for most common cases
	} else {
	i.prevCCC = ccc
	}
	}
	if len(i.ce) != i.nce {
	i.nce = len(i.ce)
	return true
	}
	return false
	}

	// nextPlain is the same as next, but does not "normalize" the collation
	// elements.
	// TODO: remove this function. Using this instead of next does not seem
	// to improve performance in any significant way. We retain this until
	// later for evaluation purposes.
	func (i *iter) nextPlain() bool {
	if i.done() {
	return false
	}
	sz := i.appendNext()
	i.tail(sz)
	i.nce = len(i.ce)
	return true
	}

	const maxCombiningCharacters = 30

	// doNorm reorders the collation elements in i.ce.
	// It assumes that blocks of collation elements added with appendNext
	// either start and end with the same CCC or start with CCC == 0.
	// This allows for a single insertion point for the entire block.
	// The correctness of this assumption is verified in builder.go.
	func (i *iter) doNorm(p int, ccc uint8) {
	if p-i.pStarter > maxCombiningCharacters {
	i.prevCCC = i.ce[len(i.ce)-1].CCC()
	i.pStarter = len(i.ce) - 1
	return
	}
	n := len(i.ce)
	k := p
	for p--; p > i.pStarter && ccc < i.ce[p-1].CCC(); p-- {
	}
	i.ce = append(i.ce, i.ce[p:k]...)
	copy(i.ce[p:], i.ce[k:])
	i.ce = i.ce[:n]
	}

	func (i *iter) nextPrimary() int {
	for {
	for ; i.pce < i.nce; i.pce++ {
	if v := i.ce[i.pce].Primary(); v != 0 {
	i.pce++
	return v
	}
	}
	if !i.next() {
	return 0
	}
	}
	panic("should not reach here")
	}

	func (i *iter) nextSecondary() int {
	for ; i.pce < len(i.ce); i.pce++ {
	if v := i.ce[i.pce].Secondary(); v != 0 {
	i.pce++
	return v
	}
	}
	return 0
	}

	func (i *iter) prevSecondary() int {
	for ; i.pce < len(i.ce); i.pce++ {
	if v := i.ce[len(i.ce)-i.pce-1].Secondary(); v != 0 {
	i.pce++
	return v
	}
	}
	return 0
	}

	func (i *iter) nextTertiary() int {
	for ; i.pce < len(i.ce); i.pce++ {
	if v := i.ce[i.pce].Tertiary(); v != 0 {
	i.pce++
	return int(v)
	}
	}
	return 0
	}

	func (i *iter) nextQuaternary() int {
	for ; i.pce < len(i.ce); i.pce++ {
	if v := i.ce[i.pce].Quaternary(); v != 0 {
	i.pce++
	return v
	}
	}
	return 0
	}

	func appendPrimary(key []byte, p int) []byte {
	// Convert to variable length encoding; supports up to 23 bits.
	if p <= 0x7FFF {
	key = append(key, uint8(p>>8), uint8(p))
	} else {
	key = append(key, uint8(p>>16)\|0x80, uint8(p>>8), uint8(p))
	}
	return key
	}

	// keyFromElems converts the weights ws to a compact sequence of bytes.
	// The result will be appended to the byte buffer in buf.
	func (c Collator) keyFromElems(buf Buffer, ws []colltab.Elem) {
	for _, v := range ws {
	if w := v.Primary(); w > 0 {
	buf.key = appendPrimary(buf.key, w)
	}
	}
	if colltab.Secondary <= c.Strength {
	buf.key = append(buf.key, 0, 0)
	// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
	if !c.Backwards {
	for _, v := range ws {
	if w := v.Secondary(); w > 0 {
	buf.key = append(buf.key, uint8(w>>8), uint8(w))
	}
	}
	} else {
	for i := len(ws) - 1; i >= 0; i-- {
	if w := ws[i].Secondary(); w > 0 {
	buf.key = append(buf.key, uint8(w>>8), uint8(w))
	}
	}
	}
	} else if c.CaseLevel {
	buf.key = append(buf.key, 0, 0)
	}
	if colltab.Tertiary <= c.Strength \|\| c.CaseLevel {
	buf.key = append(buf.key, 0, 0)
	for _, v := range ws {
	if w := v.Tertiary(); w > 0 {
	buf.key = append(buf.key, uint8(w))
	}
	}
	// Derive the quaternary weights from the options and other levels.
	// Note that we represent MaxQuaternary as 0xFF. The first byte of the
	// representation of a primary weight is always smaller than 0xFF,
	// so using this single byte value will compare correctly.
	if colltab.Quaternary <= c.Strength && c.Alternate >= AltShifted {
	if c.Alternate == AltShiftTrimmed {
	lastNonFFFF := len(buf.key)
	buf.key = append(buf.key, 0)
	for _, v := range ws {
	if w := v.Quaternary(); w == colltab.MaxQuaternary {
	buf.key = append(buf.key, 0xFF)
	} else if w > 0 {
	buf.key = appendPrimary(buf.key, w)
	lastNonFFFF = len(buf.key)
	}
	}
	buf.key = buf.key[:lastNonFFFF]
	} else {
	buf.key = append(buf.key, 0)
	for _, v := range ws {
	if w := v.Quaternary(); w == colltab.MaxQuaternary {
	buf.key = append(buf.key, 0xFF)
	} else if w > 0 {
	buf.key = appendPrimary(buf.key, w)
	}
	}
	}
	}
	}
	}

	func processWeights(vw AlternateHandling, top uint32, wa []colltab.Elem) {
	ignore := false
	vtop := int(top)
	switch vw {
	case AltShifted, AltShiftTrimmed:
	for i := range wa {
	if p := wa[i].Primary(); p <= vtop && p != 0 {
	wa[i] = colltab.MakeQuaternary(p)
	ignore = true
	} else if p == 0 {
	if ignore {
	wa[i] = colltab.Ignore
	}
	} else {
	ignore = false
	}
	}
	case AltBlanked:
	for i := range wa {
	if p := wa[i].Primary(); p <= vtop && (ignore \|\| p != 0) {
	wa[i] = colltab.Ignore
	ignore = true
	} else {
	ignore = false
	}
	}
	}
	}