blob: e07eb7d5c05059c3ba0ee74477a3179ebccdf416 [file] [log] [blame]
Rob Pike6610d792010-08-12 14:41:52 +10001// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package regexp
6
7import (
8 "fmt"
Rob Pike7db904c2011-02-03 13:58:40 -08009 "strings"
Rob Pike6610d792010-08-12 14:41:52 +100010 "testing"
11)
12
13// For each pattern/text pair, what is the expected output of each function?
14// We can derive the textual results from the indexed results, the non-submatch
15// results from the submatched results, the single results from the 'all' results,
16// and the byte results from the string results. Therefore the table includes
17// only the FindAllStringSubmatchIndex result.
18type FindTest struct {
19 pat string
20 text string
21 matches [][]int
22}
23
24func (t FindTest) String() string {
25 return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
26}
27
28var findTests = []FindTest{
Robert Griesemer34788912010-10-22 10:06:33 -070029 {``, ``, build(1, 0, 0)},
30 {`^abcdefg`, "abcdefg", build(1, 0, 7)},
31 {`a+`, "baaab", build(1, 1, 4)},
32 {"abcd..", "abcdef", build(1, 0, 6)},
33 {`a`, "a", build(1, 0, 1)},
34 {`x`, "y", nil},
35 {`b`, "abc", build(1, 1, 2)},
36 {`.`, "a", build(1, 0, 1)},
37 {`.*`, "abcdef", build(1, 0, 6)},
38 {`^`, "abcde", build(1, 0, 0)},
39 {`$`, "abcde", build(1, 5, 5)},
40 {`^abcd$`, "abcd", build(1, 0, 4)},
41 {`^bcd'`, "abcdef", nil},
42 {`^abcd$`, "abcde", nil},
43 {`a+`, "baaab", build(1, 1, 4)},
44 {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
45 {`[a-z]+`, "abcd", build(1, 0, 4)},
46 {`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
47 {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
48 {`[^\n]+`, "abcd\n", build(1, 0, 4)},
49 {`[日本語]+`, "日本語日本語", build(1, 0, 18)},
50 {`日本語+`, "日本語", build(1, 0, 9)},
51 {`日本語+`, "日本語語語語", build(1, 0, 18)},
52 {`()`, "", build(1, 0, 0, 0, 0)},
53 {`(a)`, "a", build(1, 0, 1, 0, 1)},
54 {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
55 {`(.*)`, "", build(1, 0, 0, 0, 0)},
56 {`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
57 {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
58 {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
59 {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
60 {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
Russ Cox6c230fb2011-09-26 18:33:13 -040061 {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
62 {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
Rob Pike6610d792010-08-12 14:41:52 +100063
Robert Griesemer34788912010-10-22 10:06:33 -070064 {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
65 {`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
66 {`[.]`, ".", build(1, 0, 1)},
67 {`/$`, "/abc/", build(1, 4, 5)},
68 {`/$`, "/abc", nil},
Rob Pike6610d792010-08-12 14:41:52 +100069
70 // multiple matches
Robert Griesemer34788912010-10-22 10:06:33 -070071 {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
72 {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
73 {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
74 {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
75 {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
Rob Pike6610d792010-08-12 14:41:52 +100076
77 // fixed bugs
Robert Griesemer34788912010-10-22 10:06:33 -070078 {`ab$`, "cab", build(1, 1, 3)},
79 {`axxb$`, "axxcb", nil},
80 {`data`, "daXY data", build(1, 5, 9)},
81 {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
Rob Pike15cb7ed2011-01-03 11:35:34 -080082 {`zx+`, "zzx", build(1, 1, 3)},
Russ Cox6c230fb2011-09-26 18:33:13 -040083 {`ab$`, "abcab", build(1, 3, 5)},
84 {`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
85 {`(?:.|(?:.a))`, "", nil},
86 {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
87 {`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
88 {`(a){0}`, "", build(1, 0, 0, -1, -1)},
89 {`(?-s)(?:(?:^).)`, "\n", nil},
90 {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
91 {`(?:(?:^).)`, "\n", nil},
92 {`\b`, "x", build(2, 0, 0, 1, 1)},
93 {`\b`, "xx", build(2, 0, 0, 2, 2)},
94 {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
95 {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
96 {`\B`, "x", nil},
97 {`\B`, "xx", build(1, 1, 1)},
98 {`\B`, "x y", nil},
99 {`\B`, "xx yy", build(2, 1, 1, 4, 4)},
100
101 // RE2 tests
102 {`[^\S\s]`, "abcd", nil},
103 {`[^\S[:space:]]`, "abcd", nil},
104 {`[^\D\d]`, "abcd", nil},
105 {`[^\D[:digit:]]`, "abcd", nil},
106 {`(?i)\W`, "x", nil},
107 {`(?i)\W`, "k", nil},
108 {`(?i)\W`, "s", nil},
Rob Pike6610d792010-08-12 14:41:52 +1000109
110 // can backslash-escape any punctuation
Robert Griesemer34788912010-10-22 10:06:33 -0700111 {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
Rob Pike6610d792010-08-12 14:41:52 +1000112 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
Robert Griesemer34788912010-10-22 10:06:33 -0700113 {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
Rob Pike6610d792010-08-12 14:41:52 +1000114 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
Robert Griesemer34788912010-10-22 10:06:33 -0700115 {"\\`", "`", build(1, 0, 1)},
116 {"[\\`]+", "`", build(1, 0, 1)},
Rob Pike69fe3dd2010-08-16 15:17:34 -0700117
118 // long set of matches (longer than startSize)
Robert Griesemer34788912010-10-22 10:06:33 -0700119 {
Rob Pike69fe3dd2010-08-16 15:17:34 -0700120 ".",
121 "qwertyuiopasdfghjklzxcvbnm1234567890",
122 build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
123 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
124 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
125 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
126 },
Rob Pike6610d792010-08-12 14:41:52 +1000127}
128
129// build is a helper to construct a [][]int by extracting n sequences from x.
130// This represents n matches with len(x)/n submatches each.
131func build(n int, x ...int) [][]int {
132 ret := make([][]int, n)
133 runLength := len(x) / n
134 j := 0
135 for i := range ret {
136 ret[i] = make([]int, runLength)
137 copy(ret[i], x[j:])
138 j += runLength
139 if j > len(x) {
140 panic("invalid build entry")
141 }
142 }
143 return ret
144}
145
146// First the simple cases.
147
148func TestFind(t *testing.T) {
149 for _, test := range findTests {
Rob Pike5bd40942010-12-16 16:55:26 -0800150 re := MustCompile(test.pat)
Rob Pikea9e7c932010-12-17 10:23:46 -0800151 if re.String() != test.pat {
152 t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
Rob Pike5bd40942010-12-16 16:55:26 -0800153 }
154 result := re.Find([]byte(test.text))
Rob Pike6610d792010-08-12 14:41:52 +1000155 switch {
156 case len(test.matches) == 0 && len(result) == 0:
157 // ok
158 case test.matches == nil && result != nil:
159 t.Errorf("expected no match; got one: %s", test)
160 case test.matches != nil && result == nil:
161 t.Errorf("expected match; got none: %s", test)
162 case test.matches != nil && result != nil:
163 expect := test.text[test.matches[0][0]:test.matches[0][1]]
164 if expect != string(result) {
165 t.Errorf("expected %q got %q: %s", expect, result, test)
166 }
167 }
168 }
169}
170
171func TestFindString(t *testing.T) {
172 for _, test := range findTests {
173 result := MustCompile(test.pat).FindString(test.text)
174 switch {
175 case len(test.matches) == 0 && len(result) == 0:
176 // ok
177 case test.matches == nil && result != "":
178 t.Errorf("expected no match; got one: %s", test)
179 case test.matches != nil && result == "":
180 // Tricky because an empty result has two meanings: no match or empty match.
181 if test.matches[0][0] != test.matches[0][1] {
182 t.Errorf("expected match; got none: %s", test)
183 }
184 case test.matches != nil && result != "":
185 expect := test.text[test.matches[0][0]:test.matches[0][1]]
186 if expect != result {
187 t.Errorf("expected %q got %q: %s", expect, result, test)
188 }
189 }
190 }
191}
192
193func testFindIndex(test *FindTest, result []int, t *testing.T) {
194 switch {
195 case len(test.matches) == 0 && len(result) == 0:
196 // ok
197 case test.matches == nil && result != nil:
198 t.Errorf("expected no match; got one: %s", test)
199 case test.matches != nil && result == nil:
200 t.Errorf("expected match; got none: %s", test)
201 case test.matches != nil && result != nil:
202 expect := test.matches[0]
203 if expect[0] != result[0] || expect[1] != result[1] {
204 t.Errorf("expected %v got %v: %s", expect, result, test)
205 }
206 }
207}
208
209func TestFindIndex(t *testing.T) {
210 for _, test := range findTests {
211 testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
212 }
213}
214
215func TestFindStringIndex(t *testing.T) {
216 for _, test := range findTests {
217 testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
218 }
219}
220
Rob Pike7db904c2011-02-03 13:58:40 -0800221func TestFindReaderIndex(t *testing.T) {
222 for _, test := range findTests {
223 testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
224 }
225}
226
Rob Pike6610d792010-08-12 14:41:52 +1000227// Now come the simple All cases.
228
229func TestFindAll(t *testing.T) {
230 for _, test := range findTests {
231 result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
232 switch {
233 case test.matches == nil && result == nil:
234 // ok
235 case test.matches == nil && result != nil:
236 t.Errorf("expected no match; got one: %s", test)
237 case test.matches != nil && result == nil:
Russ Cox6c230fb2011-09-26 18:33:13 -0400238 t.Fatalf("expected match; got none: %s", test)
Rob Pike6610d792010-08-12 14:41:52 +1000239 case test.matches != nil && result != nil:
240 if len(test.matches) != len(result) {
241 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
242 continue
243 }
244 for k, e := range test.matches {
245 expect := test.text[e[0]:e[1]]
246 if expect != string(result[k]) {
247 t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
248 }
249 }
250 }
251 }
252}
253
254func TestFindAllString(t *testing.T) {
255 for _, test := range findTests {
256 result := MustCompile(test.pat).FindAllString(test.text, -1)
257 switch {
258 case test.matches == nil && result == nil:
259 // ok
260 case test.matches == nil && result != nil:
261 t.Errorf("expected no match; got one: %s", test)
262 case test.matches != nil && result == nil:
263 t.Errorf("expected match; got none: %s", test)
264 case test.matches != nil && result != nil:
265 if len(test.matches) != len(result) {
266 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
267 continue
268 }
269 for k, e := range test.matches {
270 expect := test.text[e[0]:e[1]]
271 if expect != result[k] {
272 t.Errorf("expected %q got %q: %s", expect, result, test)
273 }
274 }
275 }
276 }
277}
278
279func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
280 switch {
281 case test.matches == nil && result == nil:
282 // ok
283 case test.matches == nil && result != nil:
284 t.Errorf("expected no match; got one: %s", test)
285 case test.matches != nil && result == nil:
286 t.Errorf("expected match; got none: %s", test)
287 case test.matches != nil && result != nil:
288 if len(test.matches) != len(result) {
289 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
290 return
291 }
292 for k, e := range test.matches {
293 if e[0] != result[k][0] || e[1] != result[k][1] {
294 t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
295 }
296 }
297 }
298}
299
300func TestFindAllIndex(t *testing.T) {
301 for _, test := range findTests {
302 testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
303 }
304}
305
306func TestFindAllStringIndex(t *testing.T) {
307 for _, test := range findTests {
308 testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
309 }
310}
311
312// Now come the Submatch cases.
313
314func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
315 if len(submatches) != len(result)*2 {
316 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
317 return
318 }
319 for k := 0; k < len(submatches); k += 2 {
320 if submatches[k] == -1 {
321 if result[k/2] != nil {
322 t.Errorf("match %d: expected nil got %q: %s", n, result, test)
323 }
324 continue
325 }
326 expect := test.text[submatches[k]:submatches[k+1]]
327 if expect != string(result[k/2]) {
328 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
329 return
330 }
331 }
332}
333
334func TestFindSubmatch(t *testing.T) {
335 for _, test := range findTests {
336 result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
337 switch {
338 case test.matches == nil && result == nil:
339 // ok
340 case test.matches == nil && result != nil:
341 t.Errorf("expected no match; got one: %s", test)
342 case test.matches != nil && result == nil:
343 t.Errorf("expected match; got none: %s", test)
344 case test.matches != nil && result != nil:
345 testSubmatchBytes(&test, 0, test.matches[0], result, t)
346 }
347 }
348}
349
350func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
351 if len(submatches) != len(result)*2 {
352 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
353 return
354 }
355 for k := 0; k < len(submatches); k += 2 {
356 if submatches[k] == -1 {
357 if result[k/2] != "" {
358 t.Errorf("match %d: expected nil got %q: %s", n, result, test)
359 }
360 continue
361 }
362 expect := test.text[submatches[k]:submatches[k+1]]
363 if expect != result[k/2] {
364 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
365 return
366 }
367 }
368}
369
370func TestFindStringSubmatch(t *testing.T) {
371 for _, test := range findTests {
372 result := MustCompile(test.pat).FindStringSubmatch(test.text)
373 switch {
374 case test.matches == nil && result == nil:
375 // ok
376 case test.matches == nil && result != nil:
377 t.Errorf("expected no match; got one: %s", test)
378 case test.matches != nil && result == nil:
379 t.Errorf("expected match; got none: %s", test)
380 case test.matches != nil && result != nil:
381 testSubmatchString(&test, 0, test.matches[0], result, t)
382 }
383 }
384}
385
386func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
387 if len(expect) != len(result) {
388 t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
389 return
390 }
391 for k, e := range expect {
392 if e != result[k] {
393 t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
394 }
395 }
396}
397
398func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
399 switch {
400 case test.matches == nil && result == nil:
401 // ok
402 case test.matches == nil && result != nil:
403 t.Errorf("expected no match; got one: %s", test)
404 case test.matches != nil && result == nil:
405 t.Errorf("expected match; got none: %s", test)
406 case test.matches != nil && result != nil:
407 testSubmatchIndices(test, 0, test.matches[0], result, t)
408 }
409}
410
411func TestFindSubmatchIndex(t *testing.T) {
412 for _, test := range findTests {
413 testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
414 }
415}
416
Rob Pike8a2d7062011-02-01 17:48:42 -0800417func TestFindStringSubmatchIndex(t *testing.T) {
Rob Pike6610d792010-08-12 14:41:52 +1000418 for _, test := range findTests {
419 testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
420 }
421}
422
Rob Pike7db904c2011-02-03 13:58:40 -0800423func TestFindReaderSubmatchIndex(t *testing.T) {
424 for _, test := range findTests {
425 testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
426 }
427}
428
Rob Pike6610d792010-08-12 14:41:52 +1000429// Now come the monster AllSubmatch cases.
430
431func TestFindAllSubmatch(t *testing.T) {
432 for _, test := range findTests {
433 result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
434 switch {
435 case test.matches == nil && result == nil:
436 // ok
437 case test.matches == nil && result != nil:
438 t.Errorf("expected no match; got one: %s", test)
439 case test.matches != nil && result == nil:
440 t.Errorf("expected match; got none: %s", test)
441 case len(test.matches) != len(result):
442 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
443 case test.matches != nil && result != nil:
444 for k, match := range test.matches {
445 testSubmatchBytes(&test, k, match, result[k], t)
446 }
447 }
448 }
449}
450
451func TestFindAllStringSubmatch(t *testing.T) {
452 for _, test := range findTests {
453 result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
454 switch {
455 case test.matches == nil && result == nil:
456 // ok
457 case test.matches == nil && result != nil:
458 t.Errorf("expected no match; got one: %s", test)
459 case test.matches != nil && result == nil:
460 t.Errorf("expected match; got none: %s", test)
461 case len(test.matches) != len(result):
462 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
463 case test.matches != nil && result != nil:
464 for k, match := range test.matches {
465 testSubmatchString(&test, k, match, result[k], t)
466 }
467 }
468 }
469}
470
471func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
472 switch {
473 case test.matches == nil && result == nil:
474 // ok
475 case test.matches == nil && result != nil:
476 t.Errorf("expected no match; got one: %s", test)
477 case test.matches != nil && result == nil:
478 t.Errorf("expected match; got none: %s", test)
479 case len(test.matches) != len(result):
480 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
481 case test.matches != nil && result != nil:
482 for k, match := range test.matches {
483 testSubmatchIndices(test, k, match, result[k], t)
484 }
485 }
486}
487
488func TestFindAllSubmatchIndex(t *testing.T) {
489 for _, test := range findTests {
490 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
491 }
492}
493
Rob Pike8a2d7062011-02-01 17:48:42 -0800494func TestFindAllStringSubmatchIndex(t *testing.T) {
Rob Pike6610d792010-08-12 14:41:52 +1000495 for _, test := range findTests {
496 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
497 }
498}