internal/colltab/collelem.go - text - Git at Google

 // Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package colltab

 import (
 	"fmt"
 	"unicode"
 )

 // Level identifies the collation comparison level.
 // The primary level corresponds to the basic sorting of text.
 // The secondary level corresponds to accents and related linguistic elements.
 // The tertiary level corresponds to casing and related concepts.
 // The quaternary level is derived from the other levels by the
 // various algorithms for handling variable elements.
 type Level int

 const (
 	Primary Level = iota
 	Secondary
 	Tertiary
 	Quaternary
 	Identity

 	NumLevels
 )

 const (
 	defaultSecondary = 0x20
 	defaultTertiary  = 0x2
 	maxTertiary      = 0x1F
 	MaxQuaternary    = 0x1FFFFF // 21 bits.
 )

 // Elem is a representation of a collation element. This API provides ways to encode
 // and decode Elems. Implementations of collation tables may use values greater
 // or equal to PrivateUse for their own purposes.  However, these should never be
 // returned by AppendNext.
 type Elem uint32

 const (
 	maxCE       Elem = 0xAFFFFFFF
 	PrivateUse       = minContract
 	minContract      = 0xC0000000
 	maxContract      = 0xDFFFFFFF
 	minExpand        = 0xE0000000
 	maxExpand        = 0xEFFFFFFF
 	minDecomp        = 0xF0000000
 )

 type ceType int

 const (
 	ceNormal           ceType = iota // ceNormal includes implicits (ce == 0)
 	ceContractionIndex               // rune can be a start of a contraction
 	ceExpansionIndex                 // rune expands into a sequence of collation elements
 	ceDecompose                      // rune expands using NFKC decomposition
 )

 func (ce Elem) ctype() ceType {
 	if ce <= maxCE {
 		return ceNormal
 	}
 	if ce <= maxContract {
 		return ceContractionIndex
 	} else {
 		if ce <= maxExpand {
 			return ceExpansionIndex
 		}
 		return ceDecompose
 	}
 	panic("should not reach here")
 	return ceType(-1)
 }

 // For normal collation elements, we assume that a collation element either has
 // a primary or non-default secondary value, not both.
 // Collation elements with a primary value are of the form
 // 01pppppp pppppppp ppppppp0 ssssssss
 //   - p* is primary collation value
 //   - s* is the secondary collation value
 // 00pppppp pppppppp ppppppps sssttttt, where
 //   - p* is primary collation value
 //   - s* offset of secondary from default value.
 //   - t* is the tertiary collation value
 // 100ttttt cccccccc pppppppp pppppppp
 //   - t* is the tertiar collation value
 //   - c* is the canonical combining class
 //   - p* is the primary collation value
 // Collation elements with a secondary value are of the form
 // 1010cccc ccccssss ssssssss tttttttt, where
 //   - c* is the canonical combining class
 //   - s* is the secondary collation value
 //   - t* is the tertiary collation value
 // 11qqqqqq qqqqqqqq qqqqqqq0 00000000
 //   - q* quaternary value
 const (
 	ceTypeMask              = 0xC0000000
 	ceTypeMaskExt           = 0xE0000000
 	ceIgnoreMask            = 0xF00FFFFF
 	ceType1                 = 0x40000000
 	ceType2                 = 0x00000000
 	ceType3or4              = 0x80000000
 	ceType4                 = 0xA0000000
 	ceTypeQ                 = 0xC0000000
 	Ignore                  = ceType4
 	firstNonPrimary         = 0x80000000
 	lastSpecialPrimary      = 0xA0000000
 	secondaryMask           = 0x80000000
 	hasTertiaryMask         = 0x40000000
 	primaryValueMask        = 0x3FFFFE00
 	maxPrimaryBits          = 21
 	compactPrimaryBits      = 16
 	maxSecondaryBits        = 12
 	maxTertiaryBits         = 8
 	maxCCCBits              = 8
 	maxSecondaryCompactBits = 8
 	maxSecondaryDiffBits    = 4
 	maxTertiaryCompactBits  = 5
 	primaryShift            = 9
 	compactSecondaryShift   = 5
 	minCompactSecondary     = defaultSecondary - 4
 )

 func makeImplicitCE(primary int) Elem {
 	return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
 }

 // MakeElem returns an Elem for the given values.  It will return an error
 // if the given combination of values is invalid.
 func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
 	if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
 		return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
 	}
 	if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
 		return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
 	}
 	if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
 		return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
 	}
 	ce := Elem(0)
 	if primary != 0 {
 		if ccc != 0 {
 			if primary >= 1<<compactPrimaryBits {
 				return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
 			}
 			if secondary != defaultSecondary {
 				return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
 			}
 			ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
 			ce |= Elem(ccc) << compactPrimaryBits
 			ce |= Elem(primary)
 			ce |= ceType3or4
 		} else if tertiary == defaultTertiary {
 			if secondary >= 1<<maxSecondaryCompactBits {
 				return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
 			}
 			ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
 			ce |= ceType1
 		} else {
 			d := secondary - defaultSecondary + maxSecondaryDiffBits
 			if d >= 1<<maxSecondaryDiffBits || d < 0 {
 				return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
 			}
 			if tertiary >= 1<<maxTertiaryCompactBits {
 				return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
 			}
 			ce = Elem(primary<<maxSecondaryDiffBits + d)
 			ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
 		}
 	} else {
 		ce = Elem(secondary<<maxTertiaryBits + tertiary)
 		ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
 		ce |= ceType4
 	}
 	return ce, nil
 }

 // MakeQuaternary returns an Elem with the given quaternary value.
 func MakeQuaternary(v int) Elem {
 	return ceTypeQ | Elem(v<<primaryShift)
 }

 // Mask sets weights for any level smaller than l to 0.
 // The resulting Elem can be used to test for equality with
 // other Elems to which the same mask has been applied.
 func (ce Elem) Mask(l Level) uint32 {
 	return 0
 }

 // CCC returns the canonical combining class associated with the underlying character,
 // if applicable, or 0 otherwise.
 func (ce Elem) CCC() uint8 {
 	if ce&ceType3or4 != 0 {
 		if ce&ceType4 == ceType3or4 {
 			return uint8(ce >> 16)
 		}
 		return uint8(ce >> 20)
 	}
 	return 0
 }

 // Primary returns the primary collation weight for ce.
 func (ce Elem) Primary() int {
 	if ce >= firstNonPrimary {
 		if ce > lastSpecialPrimary {
 			return 0
 		}
 		return int(uint16(ce))
 	}
 	return int(ce&primaryValueMask) >> primaryShift
 }

 // Secondary returns the secondary collation weight for ce.
 func (ce Elem) Secondary() int {
 	switch ce & ceTypeMask {
 	case ceType1:
 		return int(uint8(ce))
 	case ceType2:
 		return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
 	case ceType3or4:
 		if ce < ceType4 {
 			return defaultSecondary
 		}
 		return int(ce>>8) & 0xFFF
 	case ceTypeQ:
 		return 0
 	}
 	panic("should not reach here")
 }

 // Tertiary returns the tertiary collation weight for ce.
 func (ce Elem) Tertiary() uint8 {
 	if ce&hasTertiaryMask == 0 {
 		if ce&ceType3or4 == 0 {
 			return uint8(ce & 0x1F)
 		}
 		if ce&ceType4 == ceType4 {
 			return uint8(ce)
 		}
 		return uint8(ce>>24) & 0x1F // type 2
 	} else if ce&ceTypeMask == ceType1 {
 		return defaultTertiary
 	}
 	// ce is a quaternary value.
 	return 0
 }

 func (ce Elem) updateTertiary(t uint8) Elem {
 	if ce&ceTypeMask == ceType1 {
 		// convert to type 4
 		nce := ce & primaryValueMask
 		nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
 		ce = nce
 	} else if ce&ceTypeMaskExt == ceType3or4 {
 		ce &= ^Elem(maxTertiary << 24)
 		return ce | (Elem(t) << 24)
 	} else {
 		// type 2 or 4
 		ce &= ^Elem(maxTertiary)
 	}
 	return ce | Elem(t)
 }

 // Quaternary returns the quaternary value if explicitly specified,
 // 0 if ce == Ignore, or MaxQuaternary otherwise.
 // Quaternary values are used only for shifted variants.
 func (ce Elem) Quaternary() int {
 	if ce&ceTypeMask == ceTypeQ {
 		return int(ce&primaryValueMask) >> primaryShift
 	} else if ce&ceIgnoreMask == Ignore {
 		return 0
 	}
 	return MaxQuaternary
 }

 // Weight returns the collation weight for the given level.
 func (ce Elem) Weight(l Level) int {
 	switch l {
 	case Primary:
 		return ce.Primary()
 	case Secondary:
 		return ce.Secondary()
 	case Tertiary:
 		return int(ce.Tertiary())
 	case Quaternary:
 		return ce.Quaternary()
 	}
 	return 0 // return 0 (ignore) for undefined levels.
 }

 // For contractions, collation elements are of the form
 // 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
 //   - n* is the size of the first node in the contraction trie.
 //   - i* is the index of the first node in the contraction trie.
 //   - b* is the offset into the contraction collation element table.
 // See contract.go for details on the contraction trie.
 const (
 	maxNBits              = 4
 	maxTrieIndexBits      = 12
 	maxContractOffsetBits = 13
 )

 func splitContractIndex(ce Elem) (index, n, offset int) {
 	n = int(ce & (1<<maxNBits - 1))
 	ce >>= maxNBits
 	index = int(ce & (1<<maxTrieIndexBits - 1))
 	ce >>= maxTrieIndexBits
 	offset = int(ce & (1<<maxContractOffsetBits - 1))
 	return
 }

 // For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
 // where b* is the index into the expansion sequence table.
 const maxExpandIndexBits = 16

 func splitExpandIndex(ce Elem) (index int) {
 	return int(uint16(ce))
 }

 // Some runes can be expanded using NFKD decomposition. Instead of storing the full
 // sequence of collation elements, we decompose the rune and lookup the collation
 // elements for each rune in the decomposition and modify the tertiary weights.
 // The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
 //   - v* is the replacement tertiary weight for the first rune,
 //   - w* is the replacement tertiary weight for the second rune,
 // Tertiary weights of subsequent runes should be replaced with maxTertiary.
 // See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
 func splitDecompose(ce Elem) (t1, t2 uint8) {
 	return uint8(ce), uint8(ce >> 8)
 }

 const (
 	// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
 	minUnified       rune = 0x4E00
 	maxUnified            = 0x9FFF
 	minCompatibility      = 0xF900
 	maxCompatibility      = 0xFAFF
 	minRare               = 0x3400
 	maxRare               = 0x4DBF
 )
 const (
 	commonUnifiedOffset = 0x10000
 	rareUnifiedOffset   = 0x20000 // largest rune in common is U+FAFF
 	otherOffset         = 0x50000 // largest rune in rare is U+2FA1D
 	illegalOffset       = otherOffset + int(unicode.MaxRune)
 	maxPrimary          = illegalOffset + 1
 )

 // implicitPrimary returns the primary weight for the a rune
 // for which there is no entry for the rune in the collation table.
 // We take a different approach from the one specified in
 // http://unicode.org/reports/tr10/#Implicit_Weights,
 // but preserve the resulting relative ordering of the runes.
 func implicitPrimary(r rune) int {
 	if unicode.Is(unicode.Ideographic, r) {
 		if r >= minUnified && r <= maxUnified {
 			// The most common case for CJK.
 			return int(r) + commonUnifiedOffset
 		}
 		if r >= minCompatibility && r <= maxCompatibility {
 			// This will typically not hit. The DUCET explicitly specifies mappings
 			// for all characters that do not decompose.
 			return int(r) + commonUnifiedOffset
 		}
 		return int(r) + rareUnifiedOffset
 	}
 	return int(r) + otherOffset
 }
	// Copyright 2012 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package colltab

	import (
	"fmt"
	"unicode"
	)

	// Level identifies the collation comparison level.
	// The primary level corresponds to the basic sorting of text.
	// The secondary level corresponds to accents and related linguistic elements.
	// The tertiary level corresponds to casing and related concepts.
	// The quaternary level is derived from the other levels by the
	// various algorithms for handling variable elements.
	type Level int

	const (
	Primary Level = iota
	Secondary
	Tertiary
	Quaternary
	Identity

	NumLevels
	)

	const (
	defaultSecondary = 0x20
	defaultTertiary = 0x2
	maxTertiary = 0x1F
	MaxQuaternary = 0x1FFFFF // 21 bits.
	)

	// Elem is a representation of a collation element. This API provides ways to encode
	// and decode Elems. Implementations of collation tables may use values greater
	// or equal to PrivateUse for their own purposes. However, these should never be
	// returned by AppendNext.
	type Elem uint32

	const (
	maxCE Elem = 0xAFFFFFFF
	PrivateUse = minContract
	minContract = 0xC0000000
	maxContract = 0xDFFFFFFF
	minExpand = 0xE0000000
	maxExpand = 0xEFFFFFFF
	minDecomp = 0xF0000000
	)

	type ceType int

	const (
	ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
	ceContractionIndex // rune can be a start of a contraction
	ceExpansionIndex // rune expands into a sequence of collation elements
	ceDecompose // rune expands using NFKC decomposition
	)

	func (ce Elem) ctype() ceType {
	if ce <= maxCE {
	return ceNormal
	}
	if ce <= maxContract {
	return ceContractionIndex
	} else {
	if ce <= maxExpand {
	return ceExpansionIndex
	}
	return ceDecompose
	}
	panic("should not reach here")
	return ceType(-1)
	}

	// For normal collation elements, we assume that a collation element either has
	// a primary or non-default secondary value, not both.
	// Collation elements with a primary value are of the form
	// 01pppppp pppppppp ppppppp0 ssssssss
	// - p* is primary collation value
	// - s* is the secondary collation value
	// 00pppppp pppppppp ppppppps sssttttt, where
	// - p* is primary collation value
	// - s* offset of secondary from default value.
	// - t* is the tertiary collation value
	// 100ttttt cccccccc pppppppp pppppppp
	// - t* is the tertiar collation value
	// - c* is the canonical combining class
	// - p* is the primary collation value
	// Collation elements with a secondary value are of the form
	// 1010cccc ccccssss ssssssss tttttttt, where
	// - c* is the canonical combining class
	// - s* is the secondary collation value
	// - t* is the tertiary collation value
	// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
	// - q* quaternary value
	const (
	ceTypeMask = 0xC0000000
	ceTypeMaskExt = 0xE0000000
	ceIgnoreMask = 0xF00FFFFF
	ceType1 = 0x40000000
	ceType2 = 0x00000000
	ceType3or4 = 0x80000000
	ceType4 = 0xA0000000
	ceTypeQ = 0xC0000000
	Ignore = ceType4
	firstNonPrimary = 0x80000000
	lastSpecialPrimary = 0xA0000000
	secondaryMask = 0x80000000
	hasTertiaryMask = 0x40000000
	primaryValueMask = 0x3FFFFE00
	maxPrimaryBits = 21
	compactPrimaryBits = 16
	maxSecondaryBits = 12
	maxTertiaryBits = 8
	maxCCCBits = 8
	maxSecondaryCompactBits = 8
	maxSecondaryDiffBits = 4
	maxTertiaryCompactBits = 5
	primaryShift = 9
	compactSecondaryShift = 5
	minCompactSecondary = defaultSecondary - 4
	)

	func makeImplicitCE(primary int) Elem {
	return ceType1 \| Elem(primary<<primaryShift) \| defaultSecondary
	}

	// MakeElem returns an Elem for the given values. It will return an error
	// if the given combination of values is invalid.
	func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
	if w := primary; w >= 1<<maxPrimaryBits \|\| w < 0 {
	return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
	}
	if w := secondary; w >= 1<<maxSecondaryBits \|\| w < 0 {
	return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
	}
	if w := tertiary; w >= 1<<maxTertiaryBits \|\| w < 0 {
	return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
	}
	ce := Elem(0)
	if primary != 0 {
	if ccc != 0 {
	if primary >= 1<<compactPrimaryBits {
	return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
	}
	if secondary != defaultSecondary {
	return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
	}
	ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
	ce \|= Elem(ccc) << compactPrimaryBits
	ce \|= Elem(primary)
	ce \|= ceType3or4
	} else if tertiary == defaultTertiary {
	if secondary >= 1<<maxSecondaryCompactBits {
	return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
	}
	ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
	ce \|= ceType1
	} else {
	d := secondary - defaultSecondary + maxSecondaryDiffBits
	if d >= 1<<maxSecondaryDiffBits \|\| d < 0 {
	return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 \|\| %x > %x", d, d, 1<<maxSecondaryDiffBits)
	}
	if tertiary >= 1<<maxTertiaryCompactBits {
	return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
	}
	ce = Elem(primary<<maxSecondaryDiffBits + d)
	ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
	}
	} else {
	ce = Elem(secondary<<maxTertiaryBits + tertiary)
	ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
	ce \|= ceType4
	}
	return ce, nil
	}

	// MakeQuaternary returns an Elem with the given quaternary value.
	func MakeQuaternary(v int) Elem {
	return ceTypeQ \| Elem(v<<primaryShift)
	}

	// Mask sets weights for any level smaller than l to 0.
	// The resulting Elem can be used to test for equality with
	// other Elems to which the same mask has been applied.
	func (ce Elem) Mask(l Level) uint32 {
	return 0
	}

	// CCC returns the canonical combining class associated with the underlying character,
	// if applicable, or 0 otherwise.
	func (ce Elem) CCC() uint8 {
	if ce&ceType3or4 != 0 {
	if ce&ceType4 == ceType3or4 {
	return uint8(ce >> 16)
	}
	return uint8(ce >> 20)
	}
	return 0
	}

	// Primary returns the primary collation weight for ce.
	func (ce Elem) Primary() int {
	if ce >= firstNonPrimary {
	if ce > lastSpecialPrimary {
	return 0
	}
	return int(uint16(ce))
	}
	return int(ce&primaryValueMask) >> primaryShift
	}

	// Secondary returns the secondary collation weight for ce.
	func (ce Elem) Secondary() int {
	switch ce & ceTypeMask {
	case ceType1:
	return int(uint8(ce))
	case ceType2:
	return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
	case ceType3or4:
	if ce < ceType4 {
	return defaultSecondary
	}
	return int(ce>>8) & 0xFFF
	case ceTypeQ:
	return 0
	}
	panic("should not reach here")
	}

	// Tertiary returns the tertiary collation weight for ce.
	func (ce Elem) Tertiary() uint8 {
	if ce&hasTertiaryMask == 0 {
	if ce&ceType3or4 == 0 {
	return uint8(ce & 0x1F)
	}
	if ce&ceType4 == ceType4 {
	return uint8(ce)
	}
	return uint8(ce>>24) & 0x1F // type 2
	} else if ce&ceTypeMask == ceType1 {
	return defaultTertiary
	}
	// ce is a quaternary value.
	return 0
	}

	func (ce Elem) updateTertiary(t uint8) Elem {
	if ce&ceTypeMask == ceType1 {
	// convert to type 4
	nce := ce & primaryValueMask
	nce \|= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
	ce = nce
	} else if ce&ceTypeMaskExt == ceType3or4 {
	ce &= ^Elem(maxTertiary << 24)
	return ce \| (Elem(t) << 24)
	} else {
	// type 2 or 4
	ce &= ^Elem(maxTertiary)
	}
	return ce \| Elem(t)
	}

	// Quaternary returns the quaternary value if explicitly specified,
	// 0 if ce == Ignore, or MaxQuaternary otherwise.
	// Quaternary values are used only for shifted variants.
	func (ce Elem) Quaternary() int {
	if ce&ceTypeMask == ceTypeQ {
	return int(ce&primaryValueMask) >> primaryShift
	} else if ce&ceIgnoreMask == Ignore {
	return 0
	}
	return MaxQuaternary
	}

	// Weight returns the collation weight for the given level.
	func (ce Elem) Weight(l Level) int {
	switch l {
	case Primary:
	return ce.Primary()
	case Secondary:
	return ce.Secondary()
	case Tertiary:
	return int(ce.Tertiary())
	case Quaternary:
	return ce.Quaternary()
	}
	return 0 // return 0 (ignore) for undefined levels.
	}

	// For contractions, collation elements are of the form
	// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
	// - n* is the size of the first node in the contraction trie.
	// - i* is the index of the first node in the contraction trie.
	// - b* is the offset into the contraction collation element table.
	// See contract.go for details on the contraction trie.
	const (
	maxNBits = 4
	maxTrieIndexBits = 12
	maxContractOffsetBits = 13
	)

	func splitContractIndex(ce Elem) (index, n, offset int) {
	n = int(ce & (1<<maxNBits - 1))
	ce >>= maxNBits
	index = int(ce & (1<<maxTrieIndexBits - 1))
	ce >>= maxTrieIndexBits
	offset = int(ce & (1<<maxContractOffsetBits - 1))
	return
	}

	// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
	// where b* is the index into the expansion sequence table.
	const maxExpandIndexBits = 16

	func splitExpandIndex(ce Elem) (index int) {
	return int(uint16(ce))
	}

	// Some runes can be expanded using NFKD decomposition. Instead of storing the full
	// sequence of collation elements, we decompose the rune and lookup the collation
	// elements for each rune in the decomposition and modify the tertiary weights.
	// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
	// - v* is the replacement tertiary weight for the first rune,
	// - w* is the replacement tertiary weight for the second rune,
	// Tertiary weights of subsequent runes should be replaced with maxTertiary.
	// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
	func splitDecompose(ce Elem) (t1, t2 uint8) {
	return uint8(ce), uint8(ce >> 8)
	}

	const (
	// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
	minUnified rune = 0x4E00
	maxUnified = 0x9FFF
	minCompatibility = 0xF900
	maxCompatibility = 0xFAFF
	minRare = 0x3400
	maxRare = 0x4DBF
	)
	const (
	commonUnifiedOffset = 0x10000
	rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
	otherOffset = 0x50000 // largest rune in rare is U+2FA1D
	illegalOffset = otherOffset + int(unicode.MaxRune)
	maxPrimary = illegalOffset + 1
	)

	// implicitPrimary returns the primary weight for the a rune
	// for which there is no entry for the rune in the collation table.
	// We take a different approach from the one specified in
	// http://unicode.org/reports/tr10/#Implicit_Weights,
	// but preserve the resulting relative ordering of the runes.
	func implicitPrimary(r rune) int {
	if unicode.Is(unicode.Ideographic, r) {
	if r >= minUnified && r <= maxUnified {
	// The most common case for CJK.
	return int(r) + commonUnifiedOffset
	}
	if r >= minCompatibility && r <= maxCompatibility {
	// This will typically not hit. The DUCET explicitly specifies mappings
	// for all characters that do not decompose.
	return int(r) + commonUnifiedOffset
	}
	return int(r) + rareUnifiedOffset
	}
	return int(r) + otherOffset
	}