| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package syntax |
| |
| import "testing" |
| |
| var simplifyTests = []struct { |
| Regexp string |
| Simple string |
| }{ |
| // Already-simple constructs |
| {`a`, `a`}, |
| {`ab`, `ab`}, |
| {`a|b`, `[a-b]`}, |
| {`ab|cd`, `ab|cd`}, |
| {`(ab)*`, `(ab)*`}, |
| {`(ab)+`, `(ab)+`}, |
| {`(ab)?`, `(ab)?`}, |
| {`.`, `(?s:.)`}, |
| {`^`, `(?m:^)`}, |
| {`$`, `(?m:$)`}, |
| {`[ac]`, `[ac]`}, |
| {`[^ac]`, `[^ac]`}, |
| |
| // Posix character classes |
| {`[[:alnum:]]`, `[0-9A-Za-z]`}, |
| {`[[:alpha:]]`, `[A-Za-z]`}, |
| {`[[:blank:]]`, `[\t ]`}, |
| {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, |
| {`[[:digit:]]`, `[0-9]`}, |
| {`[[:graph:]]`, `[!-~]`}, |
| {`[[:lower:]]`, `[a-z]`}, |
| {`[[:print:]]`, `[ -~]`}, |
| {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, |
| {`[[:space:]]`, `[\t-\r ]`}, |
| {`[[:upper:]]`, `[A-Z]`}, |
| {`[[:xdigit:]]`, `[0-9A-Fa-f]`}, |
| |
| // Perl character classes |
| {`\d`, `[0-9]`}, |
| {`\s`, `[\t-\n\f-\r ]`}, |
| {`\w`, `[0-9A-Z_a-z]`}, |
| {`\D`, `[^0-9]`}, |
| {`\S`, `[^\t-\n\f-\r ]`}, |
| {`\W`, `[^0-9A-Z_a-z]`}, |
| {`[\d]`, `[0-9]`}, |
| {`[\s]`, `[\t-\n\f-\r ]`}, |
| {`[\w]`, `[0-9A-Z_a-z]`}, |
| {`[\D]`, `[^0-9]`}, |
| {`[\S]`, `[^\t-\n\f-\r ]`}, |
| {`[\W]`, `[^0-9A-Z_a-z]`}, |
| |
| // Posix repetitions |
| {`a{1}`, `a`}, |
| {`a{2}`, `aa`}, |
| {`a{5}`, `aaaaa`}, |
| {`a{0,1}`, `a?`}, |
| // The next three are illegible because Simplify inserts (?:) |
| // parens instead of () parens to avoid creating extra |
| // captured subexpressions. The comments show a version with fewer parens. |
| {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)? |
| {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)? |
| {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? |
| {`a{0,2}`, `(?:aa?)?`}, // (aa?)? |
| {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)? |
| {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)? |
| {`a{0,}`, `a*`}, |
| {`a{1,}`, `a+`}, |
| {`a{2,}`, `aa+`}, |
| {`a{5,}`, `aaaaa+`}, |
| |
| // Test that operators simplify their arguments. |
| {`(?:a{1,}){1,}`, `a+`}, |
| {`(a{1,}b{1,})`, `(a+b+)`}, |
| {`a{1,}|b{1,}`, `a+|b+`}, |
| {`(?:a{1,})*`, `(?:a+)*`}, |
| {`(?:a{1,})+`, `a+`}, |
| {`(?:a{1,})?`, `(?:a+)?`}, |
| {``, `(?:)`}, |
| {`a{0}`, `(?:)`}, |
| |
| // Character class simplification |
| {`[ab]`, `[a-b]`}, |
| {`[a-za-za-z]`, `[a-z]`}, |
| {`[A-Za-zA-Za-z]`, `[A-Za-z]`}, |
| {`[ABCDEFGH]`, `[A-H]`}, |
| {`[AB-CD-EF-GH]`, `[A-H]`}, |
| {`[W-ZP-XE-R]`, `[E-Z]`}, |
| {`[a-ee-gg-m]`, `[a-m]`}, |
| {`[a-ea-ha-m]`, `[a-m]`}, |
| {`[a-ma-ha-e]`, `[a-m]`}, |
| {`[a-zA-Z0-9 -~]`, `[ -~]`}, |
| |
| // Empty character classes |
| {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, |
| |
| // Full character classes |
| {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`}, |
| |
| // Unicode case folding. |
| {`(?i)A`, `(?i:A)`}, |
| {`(?i)a`, `(?i:A)`}, |
| {`(?i)[A]`, `(?i:A)`}, |
| {`(?i)[a]`, `(?i:A)`}, |
| {`(?i)K`, `(?i:K)`}, |
| {`(?i)k`, `(?i:K)`}, |
| {`(?i)\x{212a}`, "(?i:K)"}, |
| {`(?i)[K]`, "[Kk\u212A]"}, |
| {`(?i)[k]`, "[Kk\u212A]"}, |
| {`(?i)[\x{212a}]`, "[Kk\u212A]"}, |
| {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, |
| {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, |
| {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`}, |
| |
| // Empty string as a regular expression. |
| // The empty string must be preserved inside parens in order |
| // to make submatches work right, so these tests are less |
| // interesting than they might otherwise be. String inserts |
| // explicit (?:) in place of non-parenthesized empty strings, |
| // to make them easier to spot for other parsers. |
| {`(a|b|)`, `([a-b]|(?:))`}, |
| {`(|)`, `()`}, |
| {`a()`, `a()`}, |
| {`(()|())`, `(()|())`}, |
| {`(a|)`, `(a|(?:))`}, |
| {`ab()cd()`, `ab()cd()`}, |
| {`()`, `()`}, |
| {`()*`, `()*`}, |
| {`()+`, `()+`}, |
| {`()?`, `()?`}, |
| {`(){0}`, `(?:)`}, |
| {`(){1}`, `()`}, |
| {`(){1,}`, `()+`}, |
| {`(){0,2}`, `(?:()()?)?`}, |
| } |
| |
| func TestSimplify(t *testing.T) { |
| for _, tt := range simplifyTests { |
| re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) |
| if err != nil { |
| t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) |
| continue |
| } |
| s := re.Simplify().String() |
| if s != tt.Simple { |
| t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) |
| } |
| } |
| } |