blob: 6b6540187578086888d9b8982feb34a24705c690 [file] [log] [blame]
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
type trie struct {
index []uint8
values []uint16
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
maskx = 0x3F // 0011 1111
mask2 = 0x1F // 0001 1111
mask3 = 0x0F // 0000 1111
mask4 = 0x07 // 0000 0111
)
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookup(s []byte) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
return t.values[o], 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
return t.values[o], 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
o = uint16(i)<<6 + uint16(c3)&maskx
return t.values[o], 4
case c0 < t6:
if len(s) < 5 {
return 0, 0
}
return 0, 5
case c0 < te:
if len(s) < 6 {
return 0, 0
}
return 0, 6
}
// Illegal rune
return 0, 1
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookupString(s string) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
return t.values[o], 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
return t.values[o], 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
o = uint16(i)<<6 + uint16(c3)&maskx
return t.values[o], 4
case c0 < t6:
if len(s) < 5 {
return 0, 0
}
return 0, 5
case c0 < te:
if len(s) < 6 {
return 0, 0
}
return 0, 6
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupUnsafe(s []byte) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
o := uint16(i)<<6 + uint16(s[1])&maskx
if c0 < t3 {
return t.values[o]
}
i = t.index[o]
o = uint16(i)<<6 + uint16(s[2])&maskx
if c0 < t4 {
return t.values[o]
}
i = t.index[o]
o = uint16(i)<<6 + uint16(s[3])&maskx
if c0 < t5 {
return t.values[o]
}
return 0
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupStringUnsafe(s string) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
o := uint16(i)<<6 + uint16(s[1])&maskx
if c0 < t3 {
return t.values[o]
}
i = t.index[o]
o = uint16(i)<<6 + uint16(s[2])&maskx
if c0 < t4 {
return t.values[o]
}
i = t.index[o]
o = uint16(i)<<6 + uint16(s[3])&maskx
if c0 < t5 {
return t.values[o]
}
return 0
}