exp/norm: use rune
Nothing terribly interesting here. (!)
Since the public APIs are all in terms of UTF-8,
the changes are all internal only.
R=mpvl, gri, r
CC=golang-dev
https://golang.org/cl/5309042
diff --git a/src/pkg/exp/norm/composition.go b/src/pkg/exp/norm/composition.go
index 1d72223..7965ffc 100644
--- a/src/pkg/exp/norm/composition.go
+++ b/src/pkg/exp/norm/composition.go
@@ -126,26 +126,26 @@
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
-func (rb *reorderBuffer) appendRune(rune uint32) {
+func (rb *reorderBuffer) appendRune(r uint32) {
bn := rb.nbyte
- sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
+ sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
-func (rb *reorderBuffer) assignRune(pos int, rune uint32) {
+func (rb *reorderBuffer) assignRune(pos int, r uint32) {
bn := rb.rune[pos].pos
- sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
+ sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) uint32 {
inf := rb.rune[n]
- rune, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
- return uint32(rune)
+ r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
+ return uint32(r)
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
@@ -237,17 +237,17 @@
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
-func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
+func (rb *reorderBuffer) decomposeHangul(r uint32) bool {
b := rb.rune[:]
n := rb.nrune
if n+3 > len(b) {
return false
}
- rune -= hangulBase
- x := rune % jamoTCount
- rune /= jamoTCount
- rb.appendRune(jamoLBase + rune/jamoVCount)
- rb.appendRune(jamoVBase + rune%jamoVCount)
+ r -= hangulBase
+ x := r % jamoTCount
+ r /= jamoTCount
+ rb.appendRune(jamoLBase + r/jamoVCount)
+ rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
diff --git a/src/pkg/exp/norm/composition_test.go b/src/pkg/exp/norm/composition_test.go
index ce9caaf..e32380d 100644
--- a/src/pkg/exp/norm/composition_test.go
+++ b/src/pkg/exp/norm/composition_test.go
@@ -8,14 +8,14 @@
// TestCase is used for most tests.
type TestCase struct {
- in []int
- out []int
+ in []rune
+ out []rune
}
-type insertFunc func(rb *reorderBuffer, rune int) bool
+type insertFunc func(rb *reorderBuffer, r rune) bool
-func insert(rb *reorderBuffer, rune int) bool {
- src := inputString(string(rune))
+func insert(rb *reorderBuffer, r rune) bool {
+ src := inputString(string(r))
return rb.insert(src, 0, rb.f.info(src, 0))
}
@@ -39,7 +39,7 @@
continue
}
for j, want := range test.out {
- found := int(rb.runeAt(j))
+ found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
}
@@ -57,7 +57,7 @@
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out))
}
- for _, r := range []int("world!") {
+ for _, r := range []rune("world!") {
insert(&rb, r)
}
@@ -76,14 +76,14 @@
}
var insertTests = []TestCase{
- {[]int{'a'}, []int{'a'}},
- {[]int{0x300}, []int{0x300}},
- {[]int{0x300, 0x316}, []int{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
- {[]int{0x316, 0x300}, []int{0x316, 0x300}},
- {[]int{0x41, 0x316, 0x300}, []int{0x41, 0x316, 0x300}},
- {[]int{0x41, 0x300, 0x316}, []int{0x41, 0x316, 0x300}},
- {[]int{0x300, 0x316, 0x41}, []int{0x316, 0x300, 0x41}},
- {[]int{0x41, 0x300, 0x40, 0x316}, []int{0x41, 0x300, 0x40, 0x316}},
+ {[]rune{'a'}, []rune{'a'}},
+ {[]rune{0x300}, []rune{0x300}},
+ {[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
+ {[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
+ {[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
+ {[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
+ {[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
+ {[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
@@ -91,18 +91,18 @@
}
var decompositionNFDTest = []TestCase{
- {[]int{0xC0}, []int{0x41, 0x300}},
- {[]int{0xAC00}, []int{0x1100, 0x1161}},
- {[]int{0x01C4}, []int{0x01C4}},
- {[]int{0x320E}, []int{0x320E}},
- {[]int("음ẻ과"), []int{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
+ {[]rune{0xC0}, []rune{0x41, 0x300}},
+ {[]rune{0xAC00}, []rune{0x1100, 0x1161}},
+ {[]rune{0x01C4}, []rune{0x01C4}},
+ {[]rune{0x320E}, []rune{0x320E}},
+ {[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
- {[]int{0xC0}, []int{0x41, 0x300}},
- {[]int{0xAC00}, []int{0x1100, 0x1161}},
- {[]int{0x01C4}, []int{0x44, 0x5A, 0x030C}},
- {[]int{0x320E}, []int{0x28, 0x1100, 0x1161, 0x29}},
+ {[]rune{0xC0}, []rune{0x41, 0x300}},
+ {[]rune{0xAC00}, []rune{0x1100, 0x1161}},
+ {[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
+ {[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
@@ -111,15 +111,15 @@
}
var compositionTest = []TestCase{
- {[]int{0x41, 0x300}, []int{0xC0}},
- {[]int{0x41, 0x316}, []int{0x41, 0x316}},
- {[]int{0x41, 0x300, 0x35D}, []int{0xC0, 0x35D}},
- {[]int{0x41, 0x316, 0x300}, []int{0xC0, 0x316}},
+ {[]rune{0x41, 0x300}, []rune{0xC0}},
+ {[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
+ {[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
+ {[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
- {[]int{0x41, 0x316, 0x40, 0x300}, []int{0x41, 0x316, 0x40, 0x300}},
- {[]int{0x1100, 0x1161}, []int{0xAC00}},
+ {[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
+ {[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
- {[]int{0x28, 0x1100, 0x1161, 0x29}, []int{0x28, 0xAC00, 0x29}},
+ {[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
diff --git a/src/pkg/exp/norm/maketables.go b/src/pkg/exp/norm/maketables.go
index 14718c5..626f324 100644
--- a/src/pkg/exp/norm/maketables.go
+++ b/src/pkg/exp/norm/maketables.go
@@ -119,7 +119,7 @@
// This contains only the properties we're interested in.
type Char struct {
name string
- codePoint int // if zero, this index is not a valid code point.
+ codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
@@ -160,7 +160,7 @@
SMissing
)
-var lastChar int = 0
+var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
@@ -193,7 +193,7 @@
return buf.String()
}
-type Decomposition []int
+type Decomposition []rune
func (d Decomposition) String() string {
return fmt.Sprintf("%.4X", d)
@@ -220,7 +220,7 @@
return
}
-func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
+func parseDecomposition(s string, skipfirst bool) (a []rune, e os.Error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
@@ -230,7 +230,7 @@
if err != nil {
return a, err
}
- a = append(a, int(point))
+ a = append(a, rune(point))
}
return a, nil
}
@@ -260,7 +260,7 @@
state = SLast
}
firstChar := lastChar + 1
- lastChar = int(point)
+ lastChar = rune(point)
if state != SLast {
firstChar = lastChar
}
@@ -370,8 +370,8 @@
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
-func hasCompatDecomp(rune int) bool {
- c := &chars[rune]
+func hasCompatDecomp(r rune) bool {
+ c := &chars[r]
if c.compatDecomp {
return true
}
@@ -396,19 +396,19 @@
JamoTEnd = 0x11C3
)
-func isHangul(rune int) bool {
- return HangulBase <= rune && rune < HangulEnd
+func isHangul(r rune) bool {
+ return HangulBase <= r && r < HangulEnd
}
-func ccc(rune int) uint8 {
- return chars[rune].ccc
+func ccc(r rune) uint8 {
+ return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
-func insertOrdered(b Decomposition, rune int) Decomposition {
+func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
- cc := ccc(rune)
+ cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
@@ -418,18 +418,18 @@
b[n] = b[n-1]
}
}
- b[n] = rune
+ b[n] = r
return b
}
// Recursively decompose.
-func decomposeRecursive(form int, rune int, d Decomposition) Decomposition {
- if isHangul(rune) {
+func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
+ if isHangul(r) {
return d
}
- dcomp := chars[rune].forms[form].decomp
+ dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
- return insertOrdered(d, rune)
+ return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
@@ -475,8 +475,8 @@
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
- for _, rune := range f.decomp {
- chars[rune].forms[form].inDecomp = true
+ for _, r := range f.decomp {
+ chars[r].forms[form].inDecomp = true
}
}
@@ -505,7 +505,7 @@
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
- case isHangul(i):
+ case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
@@ -588,7 +588,7 @@
for i, char := range chars {
v := makeCharInfo(char)
if v != 0 {
- t.insert(i, v)
+ t.insert(rune(i), v)
}
}
return t.printTables("charInfo")
@@ -606,7 +606,7 @@
for _, c := range chars {
for f := 0; f < 2; f++ {
d := c.forms[f].expandedDecomp
- s := string([]int(d))
+ s := string([]rune(d))
if _, ok := positionMap[s]; !ok {
p := decompositions.Len()
decompositions.WriteByte(uint8(len(s)))
@@ -624,7 +624,7 @@
for i, c := range chars {
d := c.forms[FCanonical].expandedDecomp
if len(d) != 0 {
- nfcT.insert(i, positionMap[string([]int(d))])
+ nfcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
@@ -634,7 +634,7 @@
}
d = c.forms[FCompatibility].expandedDecomp
if len(d) != 0 {
- nfkcT.insert(i, positionMap[string([]int(d))])
+ nfkcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
@@ -752,7 +752,7 @@
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
- if (len(f.decomp) > 0) != isNo && !isHangul(i) {
+ if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
}
diff --git a/src/pkg/exp/norm/maketesttables.go b/src/pkg/exp/norm/maketesttables.go
index fdcc114..20eb889 100644
--- a/src/pkg/exp/norm/maketesttables.go
+++ b/src/pkg/exp/norm/maketesttables.go
@@ -16,7 +16,7 @@
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
-var testRunes = []int{
+var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
diff --git a/src/pkg/exp/norm/normalize_test.go b/src/pkg/exp/norm/normalize_test.go
index e374edf..6bd5292 100644
--- a/src/pkg/exp/norm/normalize_test.go
+++ b/src/pkg/exp/norm/normalize_test.go
@@ -28,13 +28,13 @@
if pos != test.pos {
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
}
- runes := []int(test.buffer)
+ runes := []rune(test.buffer)
if rb.nrune != len(runes) {
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
continue
}
for j, want := range runes {
- found := int(rb.runeAt(j))
+ found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
}
@@ -385,8 +385,8 @@
}
if outs != test.out {
// Find first rune that differs and show context.
- ir := []int(outs)
- ig := []int(test.out)
+ ir := []rune(outs)
+ ig := []rune(test.out)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
diff --git a/src/pkg/exp/norm/normregtest.go b/src/pkg/exp/norm/normregtest.go
index cbd73ff..cf3b340 100644
--- a/src/pkg/exp/norm/normregtest.go
+++ b/src/pkg/exp/norm/normregtest.go
@@ -103,7 +103,7 @@
name string
partnr int
number int
- rune int // used for character by character test
+ r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
@@ -174,12 +174,12 @@
if err != nil {
logger.Fatal(err)
}
- if test.rune == 0 {
+ if test.r == 0 {
// save for CharacterByCharacterTests
- test.rune = int(r)
+ test.r = int(r)
}
var buf [utf8.UTFMax]byte
- sz := utf8.EncodeRune(buf[:], int(r))
+ sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
@@ -198,7 +198,7 @@
if errorCount > 20 {
return
}
- st, sr, sg := []int(test), []int(result), []int(gold)
+ st, sr, sg := []rune(test), []rune(result), []rune(gold)
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
t.Name(), name, fstr[f], st, sr, sg, t.name)
}
@@ -210,7 +210,7 @@
if errorCount > 20 {
return
}
- logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []int(test), result, want)
+ logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
}
}
@@ -243,13 +243,13 @@
tests := part[1].tests
last := 0
for i := 0; i <= len(tests); i++ { // last one is special case
- var rune int
+ var r int
if i == len(tests) {
- rune = 0x2FA1E // Don't have to go to 0x10FFFF
+ r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
- rune = tests[i].rune
+ r = tests[i].r
}
- for last++; last < rune; last++ {
+ for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
t := &Test{partnr: 1, number: -1}
char := string(last)
diff --git a/src/pkg/exp/norm/trie_test.go b/src/pkg/exp/norm/trie_test.go
index 5649fb7..bbd5c03 100644
--- a/src/pkg/exp/norm/trie_test.go
+++ b/src/pkg/exp/norm/trie_test.go
@@ -73,15 +73,15 @@
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
-func mkUtf8(rune int) ([]byte, int) {
+func mkUTF8(r rune) ([]byte, int) {
var b [utf8.UTFMax]byte
- sz := utf8.EncodeRune(b[:], rune)
+ sz := utf8.EncodeRune(b[:], r)
return b[:sz], sz
}
func TestLookup(t *testing.T) {
for i, tt := range testRunes {
- b, szg := mkUtf8(tt)
+ b, szg := mkUTF8(tt)
v, szt := testdata.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
@@ -103,7 +103,7 @@
func TestLookupUnsafe(t *testing.T) {
for i, tt := range testRunes {
- b, _ := mkUtf8(tt)
+ b, _ := mkUTF8(tt)
v := testdata.lookupUnsafe(b)
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
@@ -113,7 +113,7 @@
func TestLookupString(t *testing.T) {
for i, tt := range testRunes {
- b, szg := mkUtf8(tt)
+ b, szg := mkUTF8(tt)
v, szt := testdata.lookupString(string(b))
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
@@ -135,7 +135,7 @@
func TestLookupStringUnsafe(t *testing.T) {
for i, tt := range testRunes {
- b, _ := mkUtf8(tt)
+ b, _ := mkUTF8(tt)
v := testdata.lookupStringUnsafe(string(b))
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
diff --git a/src/pkg/exp/norm/triedata_test.go b/src/pkg/exp/norm/triedata_test.go
index e8898e5..7f62760 100644
--- a/src/pkg/exp/norm/triedata_test.go
+++ b/src/pkg/exp/norm/triedata_test.go
@@ -4,7 +4,7 @@
package norm
-var testRunes = []int{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
+var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
// testdataValues: 192 entries, 384 bytes
// Block 2 is the null block.
diff --git a/src/pkg/exp/norm/triegen.go b/src/pkg/exp/norm/triegen.go
index 515e1c7..56cba32 100644
--- a/src/pkg/exp/norm/triegen.go
+++ b/src/pkg/exp/norm/triegen.go
@@ -94,9 +94,9 @@
return count
}
-func (n *trieNode) insert(rune int, value uint16) {
+func (n *trieNode) insert(r rune, value uint16) {
var p [utf8.UTFMax]byte
- sz := utf8.EncodeRune(p[:], rune)
+ sz := utf8.EncodeRune(p[:], r)
for i := 0; i < sz; i++ {
if n.leaf {