blob: 23c22003eef27c096707cd83788e3d8fa7ad336d [file] [log] [blame]
Rob Pike8f5b2772008-10-09 19:40:53 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Rob Pikebe7e0f82008-11-19 15:38:46 -08005package regexp
Rob Pike8f5b2772008-10-09 19:40:53 -07006
7import (
8 "os";
9 "regexp";
Rob Pikebe7e0f82008-11-19 15:38:46 -080010 "testing";
Rob Pike8f5b2772008-10-09 19:40:53 -070011)
12
Russ Coxbe2edb52009-03-03 08:39:12 -080013var good_re = []string{
Rob Pike0b05e912008-10-14 17:45:49 -070014 ``,
15 `.`,
16 `^.$`,
17 `a`,
18 `a*`,
19 `a+`,
20 `a?`,
21 `a|b`,
22 `a*|b*`,
23 `(a*|b)(c*|d)`,
24 `[a-z]`,
25 `[a-abc-c\-\]\[]`,
26 `[a-z]+`,
27 `[]`,
28 `[abc]`,
29 `[^1234]`,
Russ Coxbe2edb52009-03-03 08:39:12 -080030}
Rob Pike75df21c2008-10-14 16:32:43 -070031
Rob Pike1da03aa2009-01-06 13:54:53 -080032// TODO: nice to do this with a map
Rob Pike74a60ed2009-01-15 17:22:15 -080033type stringError struct {
Rob Pike75df21c2008-10-14 16:32:43 -070034 re string;
Rob Pikeaaf63f82009-04-17 00:08:24 -070035 err os.Error;
Rob Pike75df21c2008-10-14 16:32:43 -070036}
Russ Coxbe2edb52009-03-03 08:39:12 -080037var bad_re = []stringError{
38 stringError{ `*`, regexp.ErrBareClosure },
39 stringError{ `(abc`, regexp.ErrUnmatchedLpar },
40 stringError{ `abc)`, regexp.ErrUnmatchedRpar },
41 stringError{ `x[a-z`, regexp.ErrUnmatchedLbkt },
42 stringError{ `abc]`, regexp.ErrUnmatchedRbkt },
43 stringError{ `[z-a]`, regexp.ErrBadRange },
44 stringError{ `abc\`, regexp.ErrExtraneousBackslash },
45 stringError{ `a**`, regexp.ErrBadClosure },
46 stringError{ `a*+`, regexp.ErrBadClosure },
47 stringError{ `a??`, regexp.ErrBadClosure },
48 stringError{ `*`, regexp.ErrBareClosure },
49 stringError{ `\x`, regexp.ErrBadBackslash },
50}
Rob Pike75df21c2008-10-14 16:32:43 -070051
Rob Pike74a60ed2009-01-15 17:22:15 -080052type vec []int;
Rob Pike75df21c2008-10-14 16:32:43 -070053
Rob Pike74a60ed2009-01-15 17:22:15 -080054type tester struct {
Rob Pike75df21c2008-10-14 16:32:43 -070055 re string;
56 text string;
Rob Pike74a60ed2009-01-15 17:22:15 -080057 match vec;
Rob Pike75df21c2008-10-14 16:32:43 -070058}
59
Russ Coxbe2edb52009-03-03 08:39:12 -080060var matches = []tester {
61 tester{ ``, "", vec{0,0} },
62 tester{ `a`, "a", vec{0,1} },
63 tester{ `x`, "y", vec{} },
64 tester{ `b`, "abc", vec{1,2} },
65 tester{ `.`, "a", vec{0,1} },
66 tester{ `.*`, "abcdef", vec{0,6} },
67 tester{ `^abcd$`, "abcd", vec{0,4} },
68 tester{ `^bcd'`, "abcdef", vec{} },
69 tester{ `^abcd$`, "abcde", vec{} },
70 tester{ `a+`, "baaab", vec{1,4} },
71 tester{ `a*`, "baaab", vec{0,0} },
72 tester{ `[a-z]+`, "abcd", vec{0,4} },
73 tester{ `[^a-z]+`, "ab1234cd", vec{2,6} },
74 tester{ `[a\-\]z]+`, "az]-bcz", vec{0,4} },
75 tester{ `[日本語]+`, "日本語日本語", vec{0,18} },
76 tester{ `()`, "", vec{0,0, 0,0} },
77 tester{ `(a)`, "a", vec{0,1, 0,1} },
78 tester{ `(.)(.)`, "日a", vec{0,4, 0,3, 3,4} },
79 tester{ `(.*)`, "", vec{0,0, 0,0} },
80 tester{ `(.*)`, "abcd", vec{0,4, 0,4} },
81 tester{ `(..)(..)`, "abcd", vec{0,4, 0,2, 2,4} },
82 tester{ `(([^xyz]*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 3,4} },
83 tester{ `((a|b|c)*(d))`, "abcd", vec{0,4, 0,4, 2,3, 3,4} },
84 tester{ `(((a|b|c)*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 2,3, 3,4} },
85 tester{ `a*(|(b))c*`, "aacc", vec{0,4, 2,2, -1,-1} },
86}
Rob Pike75df21c2008-10-14 16:32:43 -070087
Rob Pikeaaf63f82009-04-17 00:08:24 -070088func compileTest(t *testing.T, expr string, error os.Error) *regexp.Regexp {
Rob Pike75df21c2008-10-14 16:32:43 -070089 re, err := regexp.Compile(expr);
90 if err != error {
Rob Pikebe7e0f82008-11-19 15:38:46 -080091 t.Error("compiling `", expr, "`; unexpected error: ", err.String());
Rob Pike8f5b2772008-10-09 19:40:53 -070092 }
Rob Pike75df21c2008-10-14 16:32:43 -070093 return re
94}
95
Rob Pike794efd72009-01-16 10:34:36 -080096func printVec(t *testing.T, m []int) {
Russ Coxd47d8882008-12-18 22:37:22 -080097 l := len(m);
Rob Pike0b05e912008-10-14 17:45:49 -070098 if l == 0 {
Rob Pikebe7e0f82008-11-19 15:38:46 -080099 t.Log("\t<no match>");
Rob Pike0b05e912008-10-14 17:45:49 -0700100 } else {
Russ Coxd47d8882008-12-18 22:37:22 -0800101 for i := 0; i < l; i = i+2 {
Rob Pikebe7e0f82008-11-19 15:38:46 -0800102 t.Log("\t", m[i], ",", m[i+1])
Rob Pike0b05e912008-10-14 17:45:49 -0700103 }
Rob Pike75df21c2008-10-14 16:32:43 -0700104 }
105}
106
Rob Pike794efd72009-01-16 10:34:36 -0800107func printStrings(t *testing.T, m []string) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800108 l := len(m);
109 if l == 0 {
110 t.Log("\t<no match>");
111 } else {
112 for i := 0; i < l; i = i+2 {
113 t.Logf("\t%q", m[i])
114 }
115 }
116}
117
Rob Pike794efd72009-01-16 10:34:36 -0800118func equal(m1, m2 []int) bool {
Russ Coxd47d8882008-12-18 22:37:22 -0800119 l := len(m1);
120 if l != len(m2) {
Rob Pike75df21c2008-10-14 16:32:43 -0700121 return false
122 }
123 for i := 0; i < l; i++ {
124 if m1[i] != m2[i] {
125 return false
126 }
127 }
128 return true
129}
130
Rob Pike794efd72009-01-16 10:34:36 -0800131func equalStrings(m1, m2 []string) bool {
Rob Pike1da03aa2009-01-06 13:54:53 -0800132 l := len(m1);
133 if l != len(m2) {
134 return false
135 }
136 for i := 0; i < l; i++ {
137 if m1[i] != m2[i] {
138 return false
139 }
140 }
141 return true
142}
143
Rob Pike794efd72009-01-16 10:34:36 -0800144func executeTest(t *testing.T, expr string, str string, match []int) {
145 re := compileTest(t, expr, nil);
Rob Pikebe7e0f82008-11-19 15:38:46 -0800146 if re == nil {
147 return
148 }
Rob Pike75df21c2008-10-14 16:32:43 -0700149 m := re.Execute(str);
Rob Pike794efd72009-01-16 10:34:36 -0800150 if !equal(m, match) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800151 t.Error("Execute failure on `", expr, "` matching `", str, "`:");
Rob Pike794efd72009-01-16 10:34:36 -0800152 printVec(t, m);
Rob Pikebe7e0f82008-11-19 15:38:46 -0800153 t.Log("should be:");
Rob Pike794efd72009-01-16 10:34:36 -0800154 printVec(t, match);
Rob Pike75df21c2008-10-14 16:32:43 -0700155 }
156}
157
Russ Cox839a6842009-01-20 14:40:40 -0800158func TestGoodCompile(t *testing.T) {
Rob Pike75df21c2008-10-14 16:32:43 -0700159 for i := 0; i < len(good_re); i++ {
Rob Pike794efd72009-01-16 10:34:36 -0800160 compileTest(t, good_re[i], nil);
Rob Pike75df21c2008-10-14 16:32:43 -0700161 }
Rob Pikebe7e0f82008-11-19 15:38:46 -0800162}
163
Russ Cox839a6842009-01-20 14:40:40 -0800164func TestBadCompile(t *testing.T) {
Rob Pike75df21c2008-10-14 16:32:43 -0700165 for i := 0; i < len(bad_re); i++ {
Rob Pike794efd72009-01-16 10:34:36 -0800166 compileTest(t, bad_re[i].re, bad_re[i].err)
Rob Pike75df21c2008-10-14 16:32:43 -0700167 }
Rob Pikebe7e0f82008-11-19 15:38:46 -0800168}
169
Russ Cox839a6842009-01-20 14:40:40 -0800170func TestExecute(t *testing.T) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800171 for i := 0; i < len(matches); i++ {
172 test := &matches[i];
Rob Pike794efd72009-01-16 10:34:36 -0800173 executeTest(t, test.re, test.text, test.match)
Rob Pike1da03aa2009-01-06 13:54:53 -0800174 }
175}
176
Rob Pike794efd72009-01-16 10:34:36 -0800177func matchTest(t *testing.T, expr string, str string, match []int) {
178 re := compileTest(t, expr, nil);
Rob Pike1da03aa2009-01-06 13:54:53 -0800179 if re == nil {
180 return
181 }
182 m := re.Match(str);
183 if m != (len(match) > 0) {
184 t.Error("Match failure on `", expr, "` matching `", str, "`:", m, "should be", len(match) > 0);
185 }
186}
187
Russ Cox839a6842009-01-20 14:40:40 -0800188func TestMatch(t *testing.T) {
Rob Pike75df21c2008-10-14 16:32:43 -0700189 for i := 0; i < len(matches); i++ {
Rob Pikebe7e0f82008-11-19 15:38:46 -0800190 test := &matches[i];
Rob Pike794efd72009-01-16 10:34:36 -0800191 matchTest(t, test.re, test.text, test.match)
Rob Pike75df21c2008-10-14 16:32:43 -0700192 }
Rob Pike8f5b2772008-10-09 19:40:53 -0700193}
Rob Pike1da03aa2009-01-06 13:54:53 -0800194
Rob Pike794efd72009-01-16 10:34:36 -0800195func matchStringsTest(t *testing.T, expr string, str string, match []int) {
196 re := compileTest(t, expr, nil);
Rob Pike1da03aa2009-01-06 13:54:53 -0800197 if re == nil {
198 return
199 }
Russ Cox55645042009-01-06 15:19:02 -0800200 strs := make([]string, len(match)/2);
Rob Pike1da03aa2009-01-06 13:54:53 -0800201 for i := 0; i < len(match); i++ {
202 strs[i/2] = str[match[i] : match[i+1]]
203 }
204 m := re.MatchStrings(str);
Rob Pike794efd72009-01-16 10:34:36 -0800205 if !equalStrings(m, strs) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800206 t.Error("MatchStrings failure on `", expr, "` matching `", str, "`:");
Rob Pike794efd72009-01-16 10:34:36 -0800207 printStrings(t, m);
Rob Pike1da03aa2009-01-06 13:54:53 -0800208 t.Log("should be:");
Rob Pike794efd72009-01-16 10:34:36 -0800209 printStrings(t, strs);
Rob Pike1da03aa2009-01-06 13:54:53 -0800210 }
211}
212
Russ Cox839a6842009-01-20 14:40:40 -0800213func TestMatchStrings(t *testing.T) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800214 for i := 0; i < len(matches); i++ {
215 test := &matches[i];
Rob Pike794efd72009-01-16 10:34:36 -0800216 matchTest(t, test.re, test.text, test.match)
Rob Pike1da03aa2009-01-06 13:54:53 -0800217 }
218}
219
Rob Pike794efd72009-01-16 10:34:36 -0800220func matchFunctionTest(t *testing.T, expr string, str string, match []int) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800221 m, err := Match(expr, str);
222 if err == nil {
223 return
224 }
225 if m != (len(match) > 0) {
226 t.Error("function Match failure on `", expr, "` matching `", str, "`:", m, "should be", len(match) > 0);
227 }
228}
229
Russ Cox839a6842009-01-20 14:40:40 -0800230func TestMatchFunction(t *testing.T) {
Rob Pike1da03aa2009-01-06 13:54:53 -0800231 for i := 0; i < len(matches); i++ {
232 test := &matches[i];
Rob Pike794efd72009-01-16 10:34:36 -0800233 matchFunctionTest(t, test.re, test.text, test.match)
Rob Pike1da03aa2009-01-06 13:54:53 -0800234 }
235}
Steve Newmana6c7a802009-06-18 17:55:47 -0700236
237type ReplaceTest struct {
238 pattern, replacement, input, output string;
239}
240
241var replaceTests = []ReplaceTest {
242 // Test empty input and/or replacement, with pattern that matches the empty string.
243 ReplaceTest{"", "", "", ""},
244 ReplaceTest{"", "x", "", "x"},
245 ReplaceTest{"", "", "abc", "abc"},
246 ReplaceTest{"", "x", "abc", "xaxbxcx"},
247
248 // Test empty input and/or replacement, with pattern that does not match the empty string.
249 ReplaceTest{"b", "", "", ""},
250 ReplaceTest{"b", "x", "", ""},
251 ReplaceTest{"b", "", "abc", "ac"},
252 ReplaceTest{"b", "x", "abc", "axc"},
253 ReplaceTest{"y", "", "", ""},
254 ReplaceTest{"y", "x", "", ""},
255 ReplaceTest{"y", "", "abc", "abc"},
256 ReplaceTest{"y", "x", "abc", "abc"},
257
258 // Multibyte characters -- verify that we don't try to match in the middle
259 // of a character.
260 ReplaceTest{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
261 ReplaceTest{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
262
263 // Start and end of a string.
264 ReplaceTest{"^[a-c]*", "x", "abcdabc", "xdabc"},
265 ReplaceTest{"[a-c]*$", "x", "abcdabc", "abcdx"},
266 ReplaceTest{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
267 ReplaceTest{"^[a-c]*", "x", "abc", "x"},
268 ReplaceTest{"[a-c]*$", "x", "abc", "x"},
269 ReplaceTest{"^[a-c]*$", "x", "abc", "x"},
270 ReplaceTest{"^[a-c]*", "x", "dabce", "xdabce"},
271 ReplaceTest{"[a-c]*$", "x", "dabce", "dabcex"},
272 ReplaceTest{"^[a-c]*$", "x", "dabce", "dabce"},
273 ReplaceTest{"^[a-c]*", "x", "", "x"},
274 ReplaceTest{"[a-c]*$", "x", "", "x"},
275 ReplaceTest{"^[a-c]*$", "x", "", "x"},
276
277 ReplaceTest{"^[a-c]+", "x", "abcdabc", "xdabc"},
278 ReplaceTest{"[a-c]+$", "x", "abcdabc", "abcdx"},
279 ReplaceTest{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
280 ReplaceTest{"^[a-c]+", "x", "abc", "x"},
281 ReplaceTest{"[a-c]+$", "x", "abc", "x"},
282 ReplaceTest{"^[a-c]+$", "x", "abc", "x"},
283 ReplaceTest{"^[a-c]+", "x", "dabce", "dabce"},
284 ReplaceTest{"[a-c]+$", "x", "dabce", "dabce"},
285 ReplaceTest{"^[a-c]+$", "x", "dabce", "dabce"},
286 ReplaceTest{"^[a-c]+", "x", "", ""},
287 ReplaceTest{"[a-c]+$", "x", "", ""},
288 ReplaceTest{"^[a-c]+$", "x", "", ""},
289
290 // Other cases.
291 ReplaceTest{"abc", "def", "abcdefg", "defdefg"},
292 ReplaceTest{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
293 ReplaceTest{"abc", "", "abcdabc", "d"},
294 ReplaceTest{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
295 ReplaceTest{"abc", "d", "", ""},
296 ReplaceTest{"abc", "d", "abc", "d"},
297 ReplaceTest{".+", "x", "abc", "x"},
298 ReplaceTest{"[a-c]*", "x", "def", "xdxexfx"},
299 ReplaceTest{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
300 ReplaceTest{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
301}
302
303func TestReplaceAll(t *testing.T) {
304 for i, tc := range replaceTests {
305 re, err := Compile(tc.pattern);
306 if err != nil {
307 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err);
308 continue;
309 }
310 actual := re.ReplaceAll(tc.input, tc.replacement);
311 if actual != tc.output {
312 t.Errorf("%q.Replace(%q,%q) = %q; want %q",
313 tc.pattern, tc.input, tc.replacement, actual, tc.output);
314 }
315 }
316}
317
318type QuoteMetaTest struct {
319 pattern, output string;
320}
321
322var quoteMetaTests = []QuoteMetaTest {
323 QuoteMetaTest{``, ``},
324 QuoteMetaTest{`foo`, `foo`},
325 QuoteMetaTest{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`},
326}
327
328func TestQuoteMeta(t *testing.T) {
329 for i, tc := range quoteMetaTests {
330 // Verify that QuoteMeta returns the expected string.
331 quoted := QuoteMeta(tc.pattern);
332 if quoted != tc.output {
333 t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
334 tc.pattern, quoted, tc.output);
335 continue;
336 }
337
338 // Verify that the quoted string is in fact treated as expected
339 // by Compile -- i.e. that it matches the original, unquoted string.
340 if tc.pattern != "" {
341 re, err := Compile(quoted);
342 if err != nil {
343 t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err);
344 continue;
345 }
346 src := "abc" + tc.pattern + "def";
347 repl := "xyz";
348 replaced := re.ReplaceAll(src, repl);
349 expected := "abcxyzdef";
350 if replaced != expected {
351 t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
352 tc.pattern, src, repl, replaced, expected);
353 }
354 }
355 }
356}
357