| // Copyright 2012 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package build |
| |
| import ( |
| "fmt" |
| "log" |
| "sort" |
| "strings" |
| "unicode" |
| |
| "golang.org/x/text/internal/colltab" |
| "golang.org/x/text/unicode/norm" |
| ) |
| |
| type logicalAnchor int |
| |
| const ( |
| firstAnchor logicalAnchor = -1 |
| noAnchor = 0 |
| lastAnchor = 1 |
| ) |
| |
| // entry is used to keep track of a single entry in the collation element table |
| // during building. Examples of entries can be found in the Default Unicode |
| // Collation Element Table. |
| // See https://www.unicode.org/Public/UCA/6.0.0/allkeys.txt. |
| type entry struct { |
| str string // same as string(runes) |
| runes []rune |
| elems []rawCE // the collation elements |
| extend string // weights of extend to be appended to elems |
| before bool // weights relative to next instead of previous. |
| lock bool // entry is used in extension and can no longer be moved. |
| |
| // prev, next, and level are used to keep track of tailorings. |
| prev, next *entry |
| level colltab.Level // next differs at this level |
| skipRemove bool // do not unlink when removed |
| |
| decompose bool // can use NFKD decomposition to generate elems |
| exclude bool // do not include in table |
| implicit bool // derived, is not included in the list |
| modified bool // entry was modified in tailoring |
| logical logicalAnchor |
| |
| expansionIndex int // used to store index into expansion table |
| contractionHandle ctHandle |
| contractionIndex int // index into contraction elements |
| } |
| |
| func (e *entry) String() string { |
| return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)", |
| e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex) |
| } |
| |
| func (e *entry) skip() bool { |
| return e.contraction() |
| } |
| |
| func (e *entry) expansion() bool { |
| return !e.decompose && len(e.elems) > 1 |
| } |
| |
| func (e *entry) contraction() bool { |
| return len(e.runes) > 1 |
| } |
| |
| func (e *entry) contractionStarter() bool { |
| return e.contractionHandle.n != 0 |
| } |
| |
| // nextIndexed gets the next entry that needs to be stored in the table. |
| // It returns the entry and the collation level at which the next entry differs |
| // from the current entry. |
| // Entries that can be explicitly derived and logical reset positions are |
| // examples of entries that will not be indexed. |
| func (e *entry) nextIndexed() (*entry, colltab.Level) { |
| level := e.level |
| for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next { |
| if e.level < level { |
| level = e.level |
| } |
| } |
| return e, level |
| } |
| |
| // remove unlinks entry e from the sorted chain and clears the collation |
| // elements. e may not be at the front or end of the list. This should always |
| // be the case, as the front and end of the list are always logical anchors, |
| // which may not be removed. |
| func (e *entry) remove() { |
| if e.logical != noAnchor { |
| log.Fatalf("may not remove anchor %q", e.str) |
| } |
| // TODO: need to set e.prev.level to e.level if e.level is smaller? |
| e.elems = nil |
| if !e.skipRemove { |
| if e.prev != nil { |
| e.prev.next = e.next |
| } |
| if e.next != nil { |
| e.next.prev = e.prev |
| } |
| } |
| e.skipRemove = false |
| } |
| |
| // insertAfter inserts n after e. |
| func (e *entry) insertAfter(n *entry) { |
| if e == n { |
| panic("e == anchor") |
| } |
| if e == nil { |
| panic("unexpected nil anchor") |
| } |
| n.remove() |
| n.decompose = false // redo decomposition test |
| |
| n.next = e.next |
| n.prev = e |
| if e.next != nil { |
| e.next.prev = n |
| } |
| e.next = n |
| } |
| |
| // insertBefore inserts n before e. |
| func (e *entry) insertBefore(n *entry) { |
| if e == n { |
| panic("e == anchor") |
| } |
| if e == nil { |
| panic("unexpected nil anchor") |
| } |
| n.remove() |
| n.decompose = false // redo decomposition test |
| |
| n.prev = e.prev |
| n.next = e |
| if e.prev != nil { |
| e.prev.next = n |
| } |
| e.prev = n |
| } |
| |
| func (e *entry) encodeBase() (ce uint32, err error) { |
| switch { |
| case e.expansion(): |
| ce, err = makeExpandIndex(e.expansionIndex) |
| default: |
| if e.decompose { |
| log.Fatal("decompose should be handled elsewhere") |
| } |
| ce, err = makeCE(e.elems[0]) |
| } |
| return |
| } |
| |
| func (e *entry) encode() (ce uint32, err error) { |
| if e.skip() { |
| log.Fatal("cannot build colElem for entry that should be skipped") |
| } |
| switch { |
| case e.decompose: |
| t1 := e.elems[0].w[2] |
| t2 := 0 |
| if len(e.elems) > 1 { |
| t2 = e.elems[1].w[2] |
| } |
| ce, err = makeDecompose(t1, t2) |
| case e.contractionStarter(): |
| ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex) |
| default: |
| if len(e.runes) > 1 { |
| log.Fatal("colElem: contractions are handled in contraction trie") |
| } |
| ce, err = e.encodeBase() |
| } |
| return |
| } |
| |
| // entryLess returns true if a sorts before b and false otherwise. |
| func entryLess(a, b *entry) bool { |
| if res, _ := compareWeights(a.elems, b.elems); res != 0 { |
| return res == -1 |
| } |
| if a.logical != noAnchor { |
| return a.logical == firstAnchor |
| } |
| if b.logical != noAnchor { |
| return b.logical == lastAnchor |
| } |
| return a.str < b.str |
| } |
| |
| type sortedEntries []*entry |
| |
| func (s sortedEntries) Len() int { |
| return len(s) |
| } |
| |
| func (s sortedEntries) Swap(i, j int) { |
| s[i], s[j] = s[j], s[i] |
| } |
| |
| func (s sortedEntries) Less(i, j int) bool { |
| return entryLess(s[i], s[j]) |
| } |
| |
| type ordering struct { |
| id string |
| entryMap map[string]*entry |
| ordered []*entry |
| handle *trieHandle |
| } |
| |
| // insert inserts e into both entryMap and ordered. |
| // Note that insert simply appends e to ordered. To reattain a sorted |
| // order, o.sort() should be called. |
| func (o *ordering) insert(e *entry) { |
| if e.logical == noAnchor { |
| o.entryMap[e.str] = e |
| } else { |
| // Use key format as used in UCA rules. |
| o.entryMap[fmt.Sprintf("[%s]", e.str)] = e |
| // Also add index entry for XML format. |
| o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e |
| } |
| o.ordered = append(o.ordered, e) |
| } |
| |
| // newEntry creates a new entry for the given info and inserts it into |
| // the index. |
| func (o *ordering) newEntry(s string, ces []rawCE) *entry { |
| e := &entry{ |
| runes: []rune(s), |
| elems: ces, |
| str: s, |
| } |
| o.insert(e) |
| return e |
| } |
| |
| // find looks up and returns the entry for the given string. |
| // It returns nil if str is not in the index and if an implicit value |
| // cannot be derived, that is, if str represents more than one rune. |
| func (o *ordering) find(str string) *entry { |
| e := o.entryMap[str] |
| if e == nil { |
| r := []rune(str) |
| if len(r) == 1 { |
| const ( |
| firstHangul = 0xAC00 |
| lastHangul = 0xD7A3 |
| ) |
| if r[0] >= firstHangul && r[0] <= lastHangul { |
| ce := []rawCE{} |
| nfd := norm.NFD.String(str) |
| for _, r := range nfd { |
| ce = append(ce, o.find(string(r)).elems...) |
| } |
| e = o.newEntry(nfd, ce) |
| } else { |
| e = o.newEntry(string(r[0]), []rawCE{ |
| {w: []int{ |
| implicitPrimary(r[0]), |
| defaultSecondary, |
| defaultTertiary, |
| int(r[0]), |
| }, |
| }, |
| }) |
| e.modified = true |
| } |
| e.exclude = true // do not index implicits |
| } |
| } |
| return e |
| } |
| |
| // makeRootOrdering returns a newly initialized ordering value and populates |
| // it with a set of logical reset points that can be used as anchors. |
| // The anchors first_tertiary_ignorable and __END__ will always sort at |
| // the beginning and end, respectively. This means that prev and next are non-nil |
| // for any indexed entry. |
| func makeRootOrdering() ordering { |
| const max = unicode.MaxRune |
| o := ordering{ |
| entryMap: make(map[string]*entry), |
| } |
| insert := func(typ logicalAnchor, s string, ce []int) { |
| e := &entry{ |
| elems: []rawCE{{w: ce}}, |
| str: s, |
| exclude: true, |
| logical: typ, |
| } |
| o.insert(e) |
| } |
| insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0}) |
| insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max}) |
| insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max}) |
| insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max}) |
| insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max}) |
| return o |
| } |
| |
| // patchForInsert eliminates entries from the list with more than one collation element. |
| // The next and prev fields of the eliminated entries still point to appropriate |
| // values in the newly created list. |
| // It requires that sort has been called. |
| func (o *ordering) patchForInsert() { |
| for i := 0; i < len(o.ordered)-1; { |
| e := o.ordered[i] |
| lev := e.level |
| n := e.next |
| for ; n != nil && len(n.elems) > 1; n = n.next { |
| if n.level < lev { |
| lev = n.level |
| } |
| n.skipRemove = true |
| } |
| for ; o.ordered[i] != n; i++ { |
| o.ordered[i].level = lev |
| o.ordered[i].next = n |
| o.ordered[i+1].prev = e |
| } |
| } |
| } |
| |
| // clone copies all ordering of es into a new ordering value. |
| func (o *ordering) clone() *ordering { |
| o.sort() |
| oo := ordering{ |
| entryMap: make(map[string]*entry), |
| } |
| for _, e := range o.ordered { |
| ne := &entry{ |
| runes: e.runes, |
| elems: e.elems, |
| str: e.str, |
| decompose: e.decompose, |
| exclude: e.exclude, |
| logical: e.logical, |
| } |
| oo.insert(ne) |
| } |
| oo.sort() // link all ordering. |
| oo.patchForInsert() |
| return &oo |
| } |
| |
| // front returns the first entry to be indexed. |
| // It assumes that sort() has been called. |
| func (o *ordering) front() *entry { |
| e := o.ordered[0] |
| if e.prev != nil { |
| log.Panicf("unexpected first entry: %v", e) |
| } |
| // The first entry is always a logical position, which should not be indexed. |
| e, _ = e.nextIndexed() |
| return e |
| } |
| |
| // sort sorts all ordering based on their collation elements and initializes |
| // the prev, next, and level fields accordingly. |
| func (o *ordering) sort() { |
| sort.Sort(sortedEntries(o.ordered)) |
| l := o.ordered |
| for i := 1; i < len(l); i++ { |
| k := i - 1 |
| l[k].next = l[i] |
| _, l[k].level = compareWeights(l[k].elems, l[i].elems) |
| l[i].prev = l[k] |
| } |
| } |
| |
| // genColElems generates a collation element array from the runes in str. This |
| // assumes that all collation elements have already been added to the Builder. |
| func (o *ordering) genColElems(str string) []rawCE { |
| elems := []rawCE{} |
| for _, r := range []rune(str) { |
| for _, ce := range o.find(string(r)).elems { |
| if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 { |
| elems = append(elems, ce) |
| } |
| } |
| } |
| return elems |
| } |