Gustavo Niemeyer | 04ca4f8 | 2011-03-06 17:33:23 -0500 | [diff] [blame] | 1 | // Copyright 2010 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 5 | package path |
| 6 | |
| 7 | import ( |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 8 | "errors" |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 9 | "strings" |
Rob Pike | 45e3bcb | 2011-11-08 15:41:54 -0800 | [diff] [blame] | 10 | "unicode/utf8" |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 11 | ) |
| 12 | |
Rémy Oudompheng | 3e7d804 | 2012-02-16 20:05:39 +0100 | [diff] [blame] | 13 | // ErrBadPattern indicates a globbing pattern was malformed. |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 14 | var ErrBadPattern = errors.New("syntax error in pattern") |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 15 | |
| 16 | // Match returns true if name matches the shell file name pattern. |
Gustavo Niemeyer | 04ca4f8 | 2011-03-06 17:33:23 -0500 | [diff] [blame] | 17 | // The pattern syntax is: |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 18 | // |
| 19 | // pattern: |
| 20 | // { term } |
| 21 | // term: |
| 22 | // '*' matches any sequence of non-/ characters |
| 23 | // '?' matches any single non-/ character |
| 24 | // '[' [ '^' ] { character-range } ']' |
| 25 | // character class (must be non-empty) |
| 26 | // c matches character c (c != '*', '?', '\\', '[') |
| 27 | // '\\' c matches character c |
| 28 | // |
| 29 | // character-range: |
| 30 | // c matches character c (c != '\\', '-', ']') |
| 31 | // '\\' c matches character c |
| 32 | // lo '-' hi matches character c for lo <= c <= hi |
| 33 | // |
| 34 | // Match requires pattern to match all of name, not just a substring. |
Rémy Oudompheng | 3e7d804 | 2012-02-16 20:05:39 +0100 | [diff] [blame] | 35 | // The only possible returned error is ErrBadPattern, when pattern |
| 36 | // is malformed. |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 37 | // |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 38 | func Match(pattern, name string) (matched bool, err error) { |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 39 | Pattern: |
| 40 | for len(pattern) > 0 { |
| 41 | var star bool |
| 42 | var chunk string |
| 43 | star, chunk, pattern = scanChunk(pattern) |
| 44 | if star && chunk == "" { |
| 45 | // Trailing * matches rest of string unless it has a /. |
Brad Fitzpatrick | d8e27db | 2013-08-05 16:27:24 -0700 | [diff] [blame] | 46 | return strings.Index(name, "/") < 0, nil |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 47 | } |
| 48 | // Look for match at current position. |
| 49 | t, ok, err := matchChunk(chunk, name) |
Kevin Ballard | 20834d6 | 2010-02-25 09:15:52 -0800 | [diff] [blame] | 50 | // if we're the last chunk, make sure we've exhausted the name |
| 51 | // otherwise we'll give a false result even if we could still match |
| 52 | // using the star |
| 53 | if ok && (len(t) == 0 || len(pattern) > 0) { |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 54 | name = t |
| 55 | continue |
| 56 | } |
| 57 | if err != nil { |
| 58 | return false, err |
| 59 | } |
| 60 | if star { |
| 61 | // Look for match skipping i+1 bytes. |
| 62 | // Cannot skip /. |
| 63 | for i := 0; i < len(name) && name[i] != '/'; i++ { |
| 64 | t, ok, err := matchChunk(chunk, name[i+1:]) |
| 65 | if ok { |
Kevin Ballard | 20834d6 | 2010-02-25 09:15:52 -0800 | [diff] [blame] | 66 | // if we're the last chunk, make sure we exhausted the name |
| 67 | if len(pattern) == 0 && len(t) > 0 { |
| 68 | continue |
| 69 | } |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 70 | name = t |
| 71 | continue Pattern |
| 72 | } |
| 73 | if err != nil { |
| 74 | return false, err |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | return false, nil |
| 79 | } |
| 80 | return len(name) == 0, nil |
| 81 | } |
| 82 | |
Gustavo Niemeyer | 04ca4f8 | 2011-03-06 17:33:23 -0500 | [diff] [blame] | 83 | // scanChunk gets the next segment of pattern, which is a non-star string |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 84 | // possibly preceded by a star. |
| 85 | func scanChunk(pattern string) (star bool, chunk, rest string) { |
| 86 | for len(pattern) > 0 && pattern[0] == '*' { |
| 87 | pattern = pattern[1:] |
| 88 | star = true |
| 89 | } |
| 90 | inrange := false |
| 91 | var i int |
| 92 | Scan: |
| 93 | for i = 0; i < len(pattern); i++ { |
| 94 | switch pattern[i] { |
| 95 | case '\\': |
| 96 | // error check handled in matchChunk: bad pattern. |
| 97 | if i+1 < len(pattern) { |
| 98 | i++ |
| 99 | } |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 100 | case '[': |
| 101 | inrange = true |
| 102 | case ']': |
| 103 | inrange = false |
| 104 | case '*': |
| 105 | if !inrange { |
| 106 | break Scan |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | return star, pattern[0:i], pattern[i:] |
| 111 | } |
| 112 | |
| 113 | // matchChunk checks whether chunk matches the beginning of s. |
| 114 | // If so, it returns the remainder of s (after the match). |
| 115 | // Chunk is all single-character operators: literals, char classes, and ?. |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 116 | func matchChunk(chunk, s string) (rest string, ok bool, err error) { |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 117 | for len(chunk) > 0 { |
| 118 | if len(s) == 0 { |
| 119 | return |
| 120 | } |
| 121 | switch chunk[0] { |
| 122 | case '[': |
| 123 | // character class |
| 124 | r, n := utf8.DecodeRuneInString(s) |
| 125 | s = s[n:] |
| 126 | chunk = chunk[1:] |
| 127 | // possibly negated |
| 128 | notNegated := true |
| 129 | if len(chunk) > 0 && chunk[0] == '^' { |
| 130 | notNegated = false |
| 131 | chunk = chunk[1:] |
| 132 | } |
| 133 | // parse all ranges |
| 134 | match := false |
| 135 | nrange := 0 |
| 136 | for { |
| 137 | if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { |
| 138 | chunk = chunk[1:] |
| 139 | break |
| 140 | } |
Russ Cox | db33959 | 2011-10-25 22:20:02 -0700 | [diff] [blame] | 141 | var lo, hi rune |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 142 | if lo, chunk, err = getEsc(chunk); err != nil { |
| 143 | return |
| 144 | } |
| 145 | hi = lo |
| 146 | if chunk[0] == '-' { |
| 147 | if hi, chunk, err = getEsc(chunk[1:]); err != nil { |
| 148 | return |
| 149 | } |
| 150 | } |
| 151 | if lo <= r && r <= hi { |
| 152 | match = true |
| 153 | } |
| 154 | nrange++ |
| 155 | } |
| 156 | if match != notNegated { |
| 157 | return |
| 158 | } |
| 159 | |
| 160 | case '?': |
| 161 | if s[0] == '/' { |
| 162 | return |
| 163 | } |
| 164 | _, n := utf8.DecodeRuneInString(s) |
| 165 | s = s[n:] |
| 166 | chunk = chunk[1:] |
| 167 | |
| 168 | case '\\': |
| 169 | chunk = chunk[1:] |
| 170 | if len(chunk) == 0 { |
| 171 | err = ErrBadPattern |
| 172 | return |
| 173 | } |
| 174 | fallthrough |
| 175 | |
| 176 | default: |
| 177 | if chunk[0] != s[0] { |
| 178 | return |
| 179 | } |
| 180 | s = s[1:] |
| 181 | chunk = chunk[1:] |
| 182 | } |
| 183 | } |
| 184 | return s, true, nil |
| 185 | } |
| 186 | |
| 187 | // getEsc gets a possibly-escaped character from chunk, for a character class. |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 188 | func getEsc(chunk string) (r rune, nchunk string, err error) { |
Russ Cox | 78961ed | 2010-02-24 16:11:14 -0800 | [diff] [blame] | 189 | if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { |
| 190 | err = ErrBadPattern |
| 191 | return |
| 192 | } |
| 193 | if chunk[0] == '\\' { |
| 194 | chunk = chunk[1:] |
| 195 | if len(chunk) == 0 { |
| 196 | err = ErrBadPattern |
| 197 | return |
| 198 | } |
| 199 | } |
| 200 | r, n := utf8.DecodeRuneInString(chunk) |
| 201 | if r == utf8.RuneError && n == 1 { |
| 202 | err = ErrBadPattern |
| 203 | } |
| 204 | nchunk = chunk[n:] |
| 205 | if len(nchunk) == 0 { |
| 206 | err = ErrBadPattern |
| 207 | } |
| 208 | return |
| 209 | } |