blob: 4ee11fb364f58dab8ea41e237651992473977fe1 [file] [log] [blame]
Russ Cox5ea8ac72009-01-14 14:05:00 -08001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Rob Pike149e3d32009-08-31 13:01:25 -07005package unicode_test
Russ Cox5ea8ac72009-01-14 14:05:00 -08006
Rob Pike149e3d32009-08-31 13:01:25 -07007import (
Russ Cox4591cd62012-09-21 00:35:25 -04008 "flag"
9 "fmt"
10 "runtime"
11 "sort"
Robert Griesemer45ca9f72009-12-15 15:41:46 -080012 "testing"
13 . "unicode"
Rob Pike149e3d32009-08-31 13:01:25 -070014)
Russ Cox5ea8ac72009-01-14 14:05:00 -080015
Russ Cox7630a102011-10-25 22:23:15 -070016var upperTest = []rune{
Russ Cox5ea8ac72009-01-14 14:05:00 -080017 0x41,
18 0xc0,
19 0xd8,
20 0x100,
21 0x139,
22 0x14a,
23 0x178,
24 0x181,
25 0x376,
26 0x3cf,
27 0x1f2a,
28 0x2102,
29 0x2c00,
30 0x2c10,
31 0x2c20,
32 0xa650,
33 0xa722,
34 0xff3a,
35 0x10400,
36 0x1d400,
37 0x1d7ca,
Russ Coxbe2edb52009-03-03 08:39:12 -080038}
Russ Cox5ea8ac72009-01-14 14:05:00 -080039
Russ Cox7630a102011-10-25 22:23:15 -070040var notupperTest = []rune{
Russ Cox5ea8ac72009-01-14 14:05:00 -080041 0x40,
42 0x5b,
43 0x61,
44 0x185,
45 0x1b0,
46 0x377,
47 0x387,
48 0x2150,
49 0xffff,
50 0x10000,
Russ Coxbe2edb52009-03-03 08:39:12 -080051}
Russ Cox5ea8ac72009-01-14 14:05:00 -080052
Russ Cox7630a102011-10-25 22:23:15 -070053var letterTest = []rune{
Russ Cox5ea8ac72009-01-14 14:05:00 -080054 0x41,
55 0x61,
56 0xaa,
57 0xba,
58 0xc8,
59 0xdb,
60 0xf9,
61 0x2ec,
62 0x535,
Rob Pikefc52d702011-01-31 15:20:44 -080063 0x620,
Russ Cox5ea8ac72009-01-14 14:05:00 -080064 0x6e6,
65 0x93d,
66 0xa15,
67 0xb99,
68 0xdc0,
69 0xedd,
70 0x1000,
71 0x1200,
72 0x1312,
73 0x1401,
74 0x1885,
75 0x2c00,
76 0xa800,
77 0xf900,
78 0xfa30,
79 0xffda,
80 0xffdc,
81 0x10000,
82 0x10300,
83 0x10400,
84 0x20000,
85 0x2f800,
86 0x2fa1d,
Russ Coxbe2edb52009-03-03 08:39:12 -080087}
Russ Cox5ea8ac72009-01-14 14:05:00 -080088
Russ Cox7630a102011-10-25 22:23:15 -070089var notletterTest = []rune{
Russ Cox5ea8ac72009-01-14 14:05:00 -080090 0x20,
91 0x35,
92 0x375,
Rob Pikefc52d702011-01-31 15:20:44 -080093 0x619,
Russ Cox5ea8ac72009-01-14 14:05:00 -080094 0x700,
95 0xfffe,
96 0x1ffff,
97 0x10ffff,
Russ Coxbe2edb52009-03-03 08:39:12 -080098}
Russ Cox5ea8ac72009-01-14 14:05:00 -080099
Rob Pikef9e4f392009-08-31 18:12:40 -0700100// Contains all the special cased Latin-1 chars.
Russ Cox7630a102011-10-25 22:23:15 -0700101var spaceTest = []rune{
Rob Pikef9e4f392009-08-31 18:12:40 -0700102 0x09,
103 0x0a,
104 0x0b,
105 0x0c,
106 0x0d,
107 0x20,
108 0x85,
109 0xA0,
110 0x2000,
111 0x3000,
112}
113
Rob Pike22c2b472009-08-28 23:05:16 -0700114type caseT struct {
Russ Cox7630a102011-10-25 22:23:15 -0700115 cas int
116 in, out rune
Rob Pike22c2b472009-08-28 23:05:16 -0700117}
118
Russ Cox91549432009-10-07 11:55:06 -0700119var caseTest = []caseT{
Rob Pike22c2b472009-08-28 23:05:16 -0700120 // errors
Robert Griesemer34788912010-10-22 10:06:33 -0700121 {-1, '\n', 0xFFFD},
122 {UpperCase, -1, -1},
123 {UpperCase, 1 << 30, 1 << 30},
Rob Pike22c2b472009-08-28 23:05:16 -0700124
125 // ASCII (special-cased so test carefully)
Robert Griesemer34788912010-10-22 10:06:33 -0700126 {UpperCase, '\n', '\n'},
127 {UpperCase, 'a', 'A'},
128 {UpperCase, 'A', 'A'},
129 {UpperCase, '7', '7'},
130 {LowerCase, '\n', '\n'},
131 {LowerCase, 'a', 'a'},
132 {LowerCase, 'A', 'a'},
133 {LowerCase, '7', '7'},
134 {TitleCase, '\n', '\n'},
135 {TitleCase, 'a', 'A'},
136 {TitleCase, 'A', 'A'},
137 {TitleCase, '7', '7'},
Rob Pike22c2b472009-08-28 23:05:16 -0700138
139 // Latin-1: easy to read the tests!
Robert Griesemer34788912010-10-22 10:06:33 -0700140 {UpperCase, 0x80, 0x80},
141 {UpperCase, 'Å', 'Å'},
142 {UpperCase, 'å', 'Å'},
143 {LowerCase, 0x80, 0x80},
144 {LowerCase, 'Å', 'å'},
145 {LowerCase, 'å', 'å'},
146 {TitleCase, 0x80, 0x80},
147 {TitleCase, 'Å', 'Å'},
148 {TitleCase, 'å', 'Å'},
Rob Pike22c2b472009-08-28 23:05:16 -0700149
150 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
Robert Griesemer34788912010-10-22 10:06:33 -0700151 {UpperCase, 0x0131, 'I'},
152 {LowerCase, 0x0131, 0x0131},
153 {TitleCase, 0x0131, 'I'},
Rob Pike22c2b472009-08-28 23:05:16 -0700154
155 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
Robert Griesemer34788912010-10-22 10:06:33 -0700156 {UpperCase, 0x0133, 0x0132},
157 {LowerCase, 0x0133, 0x0133},
158 {TitleCase, 0x0133, 0x0132},
Rob Pike22c2b472009-08-28 23:05:16 -0700159
160 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
Robert Griesemer34788912010-10-22 10:06:33 -0700161 {UpperCase, 0x212A, 0x212A},
162 {LowerCase, 0x212A, 'k'},
163 {TitleCase, 0x212A, 0x212A},
Rob Pike22c2b472009-08-28 23:05:16 -0700164
165 // From an UpperLower sequence
166 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
Robert Griesemer34788912010-10-22 10:06:33 -0700167 {UpperCase, 0xA640, 0xA640},
168 {LowerCase, 0xA640, 0xA641},
169 {TitleCase, 0xA640, 0xA640},
Rob Pike22c2b472009-08-28 23:05:16 -0700170 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
Robert Griesemer34788912010-10-22 10:06:33 -0700171 {UpperCase, 0xA641, 0xA640},
172 {LowerCase, 0xA641, 0xA641},
173 {TitleCase, 0xA641, 0xA640},
Rob Pike22c2b472009-08-28 23:05:16 -0700174 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
Robert Griesemer34788912010-10-22 10:06:33 -0700175 {UpperCase, 0xA64E, 0xA64E},
176 {LowerCase, 0xA64E, 0xA64F},
177 {TitleCase, 0xA64E, 0xA64E},
Rob Pike22c2b472009-08-28 23:05:16 -0700178 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
Robert Griesemer34788912010-10-22 10:06:33 -0700179 {UpperCase, 0xA65F, 0xA65E},
180 {LowerCase, 0xA65F, 0xA65F},
181 {TitleCase, 0xA65F, 0xA65E},
Rob Pike22c2b472009-08-28 23:05:16 -0700182
Rob Pike3c098e22009-08-30 14:02:42 -0700183 // From another UpperLower sequence
Rob Pike22c2b472009-08-28 23:05:16 -0700184 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
Robert Griesemer34788912010-10-22 10:06:33 -0700185 {UpperCase, 0x0139, 0x0139},
186 {LowerCase, 0x0139, 0x013A},
187 {TitleCase, 0x0139, 0x0139},
Rob Pike22c2b472009-08-28 23:05:16 -0700188 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
Robert Griesemer34788912010-10-22 10:06:33 -0700189 {UpperCase, 0x013f, 0x013f},
190 {LowerCase, 0x013f, 0x0140},
191 {TitleCase, 0x013f, 0x013f},
Rob Pike22c2b472009-08-28 23:05:16 -0700192 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
Robert Griesemer34788912010-10-22 10:06:33 -0700193 {UpperCase, 0x0148, 0x0147},
194 {LowerCase, 0x0148, 0x0148},
195 {TitleCase, 0x0148, 0x0147},
Rob Pike22c2b472009-08-28 23:05:16 -0700196
197 // Last block in the 5.1.0 table
198 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
Robert Griesemer34788912010-10-22 10:06:33 -0700199 {UpperCase, 0x10400, 0x10400},
200 {LowerCase, 0x10400, 0x10428},
201 {TitleCase, 0x10400, 0x10400},
Rob Pike22c2b472009-08-28 23:05:16 -0700202 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
Robert Griesemer34788912010-10-22 10:06:33 -0700203 {UpperCase, 0x10427, 0x10427},
204 {LowerCase, 0x10427, 0x1044F},
205 {TitleCase, 0x10427, 0x10427},
Rob Pike22c2b472009-08-28 23:05:16 -0700206 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
Robert Griesemer34788912010-10-22 10:06:33 -0700207 {UpperCase, 0x10428, 0x10400},
208 {LowerCase, 0x10428, 0x10428},
209 {TitleCase, 0x10428, 0x10400},
Rob Pike22c2b472009-08-28 23:05:16 -0700210 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
Robert Griesemer34788912010-10-22 10:06:33 -0700211 {UpperCase, 0x1044F, 0x10427},
212 {LowerCase, 0x1044F, 0x1044F},
213 {TitleCase, 0x1044F, 0x10427},
Rob Pike22c2b472009-08-28 23:05:16 -0700214
215 // First one not in the 5.1.0 table
216 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
Robert Griesemer34788912010-10-22 10:06:33 -0700217 {UpperCase, 0x10450, 0x10450},
218 {LowerCase, 0x10450, 0x10450},
219 {TitleCase, 0x10450, 0x10450},
Rob Pike5ea413e2011-07-27 15:54:23 -0700220
221 // Non-letters with case.
222 {LowerCase, 0x2161, 0x2171},
223 {UpperCase, 0x0345, 0x0399},
Rob Pike22c2b472009-08-28 23:05:16 -0700224}
225
Russ Cox839a6842009-01-20 14:40:40 -0800226func TestIsLetter(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700227 for _, r := range upperTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800228 if !IsLetter(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000229 t.Errorf("IsLetter(U+%04X) = false, want true", r)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800230 }
231 }
Russ Coxca6a0fe2009-09-15 09:41:59 -0700232 for _, r := range letterTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800233 if !IsLetter(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000234 t.Errorf("IsLetter(U+%04X) = false, want true", r)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800235 }
236 }
Russ Coxca6a0fe2009-09-15 09:41:59 -0700237 for _, r := range notletterTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800238 if IsLetter(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000239 t.Errorf("IsLetter(U+%04X) = true, want false", r)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800240 }
241 }
242}
243
Russ Cox839a6842009-01-20 14:40:40 -0800244func TestIsUpper(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700245 for _, r := range upperTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800246 if !IsUpper(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000247 t.Errorf("IsUpper(U+%04X) = false, want true", r)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800248 }
249 }
Russ Coxca6a0fe2009-09-15 09:41:59 -0700250 for _, r := range notupperTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800251 if IsUpper(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000252 t.Errorf("IsUpper(U+%04X) = true, want false", r)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800253 }
254 }
Russ Coxca6a0fe2009-09-15 09:41:59 -0700255 for _, r := range notletterTest {
Russ Cox5ea8ac72009-01-14 14:05:00 -0800256 if IsUpper(r) {
Rob Pike1959c3a2010-09-23 13:48:56 +1000257 t.Errorf("IsUpper(U+%04X) = true, want false", r)
Rob Pike22c2b472009-08-28 23:05:16 -0700258 }
259 }
260}
261
262func caseString(c int) string {
263 switch c {
264 case UpperCase:
Robert Griesemer40621d52009-11-09 12:07:39 -0800265 return "UpperCase"
Rob Pike22c2b472009-08-28 23:05:16 -0700266 case LowerCase:
Robert Griesemer40621d52009-11-09 12:07:39 -0800267 return "LowerCase"
Rob Pike22c2b472009-08-28 23:05:16 -0700268 case TitleCase:
Robert Griesemer40621d52009-11-09 12:07:39 -0800269 return "TitleCase"
Rob Pike22c2b472009-08-28 23:05:16 -0700270 }
Robert Griesemer45ca9f72009-12-15 15:41:46 -0800271 return "ErrorCase"
Rob Pike22c2b472009-08-28 23:05:16 -0700272}
273
274func TestTo(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700275 for _, c := range caseTest {
Robert Griesemer45ca9f72009-12-15 15:41:46 -0800276 r := To(c.cas, c.in)
Rob Pike22c2b472009-08-28 23:05:16 -0700277 if c.out != r {
Rob Pike1959c3a2010-09-23 13:48:56 +1000278 t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
Rob Pike22c2b472009-08-28 23:05:16 -0700279 }
280 }
281}
282
283func TestToUpperCase(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700284 for _, c := range caseTest {
Rob Pike22c2b472009-08-28 23:05:16 -0700285 if c.cas != UpperCase {
Robert Griesemer40621d52009-11-09 12:07:39 -0800286 continue
Rob Pike22c2b472009-08-28 23:05:16 -0700287 }
Robert Griesemer45ca9f72009-12-15 15:41:46 -0800288 r := ToUpper(c.in)
Rob Pike22c2b472009-08-28 23:05:16 -0700289 if c.out != r {
Rob Pike1959c3a2010-09-23 13:48:56 +1000290 t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
Rob Pike22c2b472009-08-28 23:05:16 -0700291 }
292 }
293}
294
295func TestToLowerCase(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700296 for _, c := range caseTest {
Rob Pike22c2b472009-08-28 23:05:16 -0700297 if c.cas != LowerCase {
Robert Griesemer40621d52009-11-09 12:07:39 -0800298 continue
Rob Pike22c2b472009-08-28 23:05:16 -0700299 }
Robert Griesemer45ca9f72009-12-15 15:41:46 -0800300 r := ToLower(c.in)
Rob Pike22c2b472009-08-28 23:05:16 -0700301 if c.out != r {
Rob Pike1959c3a2010-09-23 13:48:56 +1000302 t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
Rob Pike22c2b472009-08-28 23:05:16 -0700303 }
304 }
305}
306
307func TestToTitleCase(t *testing.T) {
Russ Coxca6a0fe2009-09-15 09:41:59 -0700308 for _, c := range caseTest {
Rob Pike22c2b472009-08-28 23:05:16 -0700309 if c.cas != TitleCase {
Robert Griesemer40621d52009-11-09 12:07:39 -0800310 continue
Rob Pike22c2b472009-08-28 23:05:16 -0700311 }
Robert Griesemer45ca9f72009-12-15 15:41:46 -0800312 r := ToTitle(c.in)
Rob Pike22c2b472009-08-28 23:05:16 -0700313 if c.out != r {
Rob Pike1959c3a2010-09-23 13:48:56 +1000314 t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
Russ Cox5ea8ac72009-01-14 14:05:00 -0800315 }
316 }
317}
Rob Pikef9e4f392009-08-31 18:12:40 -0700318
319func TestIsSpace(t *testing.T) {
320 for _, c := range spaceTest {
321 if !IsSpace(c) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800322 t.Errorf("IsSpace(U+%04X) = false; want true", c)
Rob Pikef9e4f392009-08-31 18:12:40 -0700323 }
324 }
325 for _, c := range letterTest {
326 if IsSpace(c) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800327 t.Errorf("IsSpace(U+%04X) = true; want false", c)
Rob Pikef9e4f392009-08-31 18:12:40 -0700328 }
329 }
330}
Rob Pike932def92009-08-31 21:18:40 -0700331
332// Check that the optimizations for IsLetter etc. agree with the tables.
333// We only need to check the Latin-1 range.
334func TestLetterOptimizations(t *testing.T) {
Russ Cox7630a102011-10-25 22:23:15 -0700335 for i := rune(0); i <= MaxLatin1; i++ {
Rob Pike932def92009-08-31 21:18:40 -0700336 if Is(Letter, i) != IsLetter(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800337 t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
Rob Pike932def92009-08-31 21:18:40 -0700338 }
339 if Is(Upper, i) != IsUpper(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800340 t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
Rob Pike932def92009-08-31 21:18:40 -0700341 }
342 if Is(Lower, i) != IsLower(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800343 t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
Rob Pike932def92009-08-31 21:18:40 -0700344 }
345 if Is(Title, i) != IsTitle(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800346 t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
Rob Pike932def92009-08-31 21:18:40 -0700347 }
348 if Is(White_Space, i) != IsSpace(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800349 t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
Rob Pike932def92009-08-31 21:18:40 -0700350 }
351 if To(UpperCase, i) != ToUpper(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800352 t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
Rob Pike932def92009-08-31 21:18:40 -0700353 }
354 if To(LowerCase, i) != ToLower(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800355 t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
Rob Pike932def92009-08-31 21:18:40 -0700356 }
357 if To(TitleCase, i) != ToTitle(i) {
Robert Griesemer40621d52009-11-09 12:07:39 -0800358 t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
Rob Pike932def92009-08-31 21:18:40 -0700359 }
360 }
361}
Rob Pike4e2b7f82010-03-30 17:51:03 -0700362
363func TestTurkishCase(t *testing.T) {
Russ Cox7630a102011-10-25 22:23:15 -0700364 lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
365 upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
Rob Pike4e2b7f82010-03-30 17:51:03 -0700366 for i, l := range lower {
367 u := upper[i]
368 if TurkishCase.ToLower(l) != l {
369 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
370 }
371 if TurkishCase.ToUpper(u) != u {
372 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
373 }
374 if TurkishCase.ToUpper(l) != u {
375 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
376 }
377 if TurkishCase.ToLower(u) != l {
378 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
379 }
380 if TurkishCase.ToTitle(u) != u {
381 t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
382 }
383 if TurkishCase.ToTitle(l) != u {
384 t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
385 }
386 }
387}
Russ Coxfc77e822011-06-16 17:56:25 -0400388
389var simpleFoldTests = []string{
Rui Ueyama79b3daa2014-03-19 10:14:04 -0700390 // SimpleFold(x) returns the next equivalent rune > x or wraps
391 // around to smaller values.
Russ Coxfc77e822011-06-16 17:56:25 -0400392
393 // Easy cases.
394 "Aa",
Russ Coxfc77e822011-06-16 17:56:25 -0400395 "δΔ",
Russ Coxfc77e822011-06-16 17:56:25 -0400396
397 // ASCII special cases.
398 "KkK",
Russ Coxfc77e822011-06-16 17:56:25 -0400399 "Ssſ",
Russ Coxfc77e822011-06-16 17:56:25 -0400400
401 // Non-ASCII special cases.
402 "ρϱΡ",
Russ Coxfc77e822011-06-16 17:56:25 -0400403 "ͅΙιι",
Russ Coxfc77e822011-06-16 17:56:25 -0400404
405 // Extra special cases: has lower/upper but no case fold.
406 "İ",
407 "ı",
408}
409
410func TestSimpleFold(t *testing.T) {
411 for _, tt := range simpleFoldTests {
Russ Cox7630a102011-10-25 22:23:15 -0700412 cycle := []rune(tt)
413 r := cycle[len(cycle)-1]
Russ Coxfc77e822011-06-16 17:56:25 -0400414 for _, out := range cycle {
Russ Cox7630a102011-10-25 22:23:15 -0700415 if r := SimpleFold(r); r != out {
416 t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
Russ Coxfc77e822011-06-16 17:56:25 -0400417 }
Russ Cox7630a102011-10-25 22:23:15 -0700418 r = out
Russ Coxfc77e822011-06-16 17:56:25 -0400419 }
420 }
421}
Russ Cox4591cd62012-09-21 00:35:25 -0400422
423// Running 'go test -calibrate' runs the calibration to find a plausible
424// cutoff point for linear search of a range list vs. binary search.
425// We create a fake table and then time how long it takes to do a
Robert Griesemer465b9c32012-10-30 13:38:01 -0700426// sequence of searches within that table, for all possible inputs
Russ Cox4591cd62012-09-21 00:35:25 -0400427// relative to the ranges (something before all, in each, between each, after all).
428// This assumes that all possible runes are equally likely.
429// In practice most runes are ASCII so this is a conservative estimate
430// of an effective cutoff value. In practice we could probably set it higher
431// than what this function recommends.
432
433var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
434
435func TestCalibrate(t *testing.T) {
436 if !*calibrate {
437 return
438 }
439
440 if runtime.GOARCH == "amd64" {
441 fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
442 }
443
444 // Find the point where binary search wins by more than 10%.
445 // The 10% bias gives linear search an edge when they're close,
446 // because on predominantly ASCII inputs linear search is even
447 // better than our benchmarks measure.
448 n := sort.Search(64, func(n int) bool {
449 tab := fakeTable(n)
450 blinear := func(b *testing.B) {
451 tab := tab
452 max := n*5 + 20
453 for i := 0; i < b.N; i++ {
454 for j := 0; j <= max; j++ {
455 linear(tab, uint16(j))
456 }
457 }
458 }
459 bbinary := func(b *testing.B) {
460 tab := tab
461 max := n*5 + 20
462 for i := 0; i < b.N; i++ {
463 for j := 0; j <= max; j++ {
464 binary(tab, uint16(j))
465 }
466 }
467 }
468 bmlinear := testing.Benchmark(blinear)
469 bmbinary := testing.Benchmark(bbinary)
470 fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
471 return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
472 })
473 fmt.Printf("calibration: linear cutoff = %d\n", n)
474}
475
476func fakeTable(n int) []Range16 {
477 var r16 []Range16
478 for i := 0; i < n; i++ {
479 r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
480 }
481 return r16
482}
483
484func linear(ranges []Range16, r uint16) bool {
485 for i := range ranges {
486 range_ := &ranges[i]
487 if r < range_.Lo {
488 return false
489 }
490 if r <= range_.Hi {
491 return (r-range_.Lo)%range_.Stride == 0
492 }
493 }
494 return false
495}
496
497func binary(ranges []Range16, r uint16) bool {
498 // binary search over ranges
499 lo := 0
500 hi := len(ranges)
501 for lo < hi {
502 m := lo + (hi-lo)/2
503 range_ := &ranges[m]
504 if range_.Lo <= r && r <= range_.Hi {
505 return (r-range_.Lo)%range_.Stride == 0
506 }
507 if r < range_.Lo {
508 hi = m
509 } else {
510 lo = m + 1
511 }
512 }
513 return false
514}
515
516func TestLatinOffset(t *testing.T) {
517 var maps = []map[string]*RangeTable{
518 Categories,
519 FoldCategory,
520 FoldScript,
521 Properties,
522 Scripts,
523 }
524 for _, m := range maps {
525 for name, tab := range m {
526 i := 0
527 for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
528 i++
529 }
530 if tab.LatinOffset != i {
531 t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
532 }
533 }
534 }
535}