Andrew Gerrand | 9fc5164 | 2013-09-19 10:55:46 +1000 | [diff] [blame] | 1 | // Copyright 2012 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package present |
| 6 | |
| 7 | import ( |
| 8 | "errors" |
| 9 | "regexp" |
| 10 | "strconv" |
| 11 | "unicode/utf8" |
| 12 | ) |
| 13 | |
| 14 | // This file is stolen from go/src/cmd/godoc/codewalk.go. |
| 15 | // It's an evaluator for the file address syntax implemented by acme and sam, |
| 16 | // but using Go-native regular expressions. |
| 17 | // To keep things reasonably close, this version uses (?m:re) for all user-provided |
| 18 | // regular expressions. That is the only change to the code from codewalk.go. |
Tobias Klauser | 95c3470 | 2019-08-22 09:30:48 +0200 | [diff] [blame] | 19 | // See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax. |
Andrew Gerrand | 9fc5164 | 2013-09-19 10:55:46 +1000 | [diff] [blame] | 20 | |
cui fliter | f90d8ad | 2022-10-10 20:58:54 +0800 | [diff] [blame] | 21 | // addrToByteRange evaluates the given address starting at offset start in data. |
Andrew Gerrand | 9fc5164 | 2013-09-19 10:55:46 +1000 | [diff] [blame] | 22 | // It returns the lo and hi byte offset of the matched region within data. |
| 23 | func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) { |
| 24 | if addr == "" { |
| 25 | lo, hi = start, len(data) |
| 26 | return |
| 27 | } |
| 28 | var ( |
| 29 | dir byte |
| 30 | prevc byte |
| 31 | charOffset bool |
| 32 | ) |
| 33 | lo = start |
| 34 | hi = start |
| 35 | for addr != "" && err == nil { |
| 36 | c := addr[0] |
| 37 | switch c { |
| 38 | default: |
| 39 | err = errors.New("invalid address syntax near " + string(c)) |
| 40 | case ',': |
| 41 | if len(addr) == 1 { |
| 42 | hi = len(data) |
| 43 | } else { |
| 44 | _, hi, err = addrToByteRange(addr[1:], hi, data) |
| 45 | } |
| 46 | return |
| 47 | |
| 48 | case '+', '-': |
| 49 | if prevc == '+' || prevc == '-' { |
| 50 | lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset) |
| 51 | } |
| 52 | dir = c |
| 53 | |
| 54 | case '$': |
| 55 | lo = len(data) |
| 56 | hi = len(data) |
| 57 | if len(addr) > 1 { |
| 58 | dir = '+' |
| 59 | } |
| 60 | |
| 61 | case '#': |
| 62 | charOffset = true |
| 63 | |
| 64 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| 65 | var i int |
| 66 | for i = 1; i < len(addr); i++ { |
| 67 | if addr[i] < '0' || addr[i] > '9' { |
| 68 | break |
| 69 | } |
| 70 | } |
| 71 | var n int |
| 72 | n, err = strconv.Atoi(addr[0:i]) |
| 73 | if err != nil { |
| 74 | break |
| 75 | } |
| 76 | lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset) |
| 77 | dir = 0 |
| 78 | charOffset = false |
| 79 | prevc = c |
| 80 | addr = addr[i:] |
| 81 | continue |
| 82 | |
| 83 | case '/': |
| 84 | var i, j int |
| 85 | Regexp: |
| 86 | for i = 1; i < len(addr); i++ { |
| 87 | switch addr[i] { |
| 88 | case '\\': |
| 89 | i++ |
| 90 | case '/': |
| 91 | j = i + 1 |
| 92 | break Regexp |
| 93 | } |
| 94 | } |
| 95 | if j == 0 { |
| 96 | j = i |
| 97 | } |
| 98 | pattern := addr[1:i] |
| 99 | lo, hi, err = addrRegexp(data, lo, hi, dir, pattern) |
| 100 | prevc = c |
| 101 | addr = addr[j:] |
| 102 | continue |
| 103 | } |
| 104 | prevc = c |
| 105 | addr = addr[1:] |
| 106 | } |
| 107 | |
| 108 | if err == nil && dir != 0 { |
| 109 | lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset) |
| 110 | } |
| 111 | if err != nil { |
| 112 | return 0, 0, err |
| 113 | } |
| 114 | return lo, hi, nil |
| 115 | } |
| 116 | |
| 117 | // addrNumber applies the given dir, n, and charOffset to the address lo, hi. |
| 118 | // dir is '+' or '-', n is the count, and charOffset is true if the syntax |
| 119 | // used was #n. Applying +n (or +#n) means to advance n lines |
| 120 | // (or characters) after hi. Applying -n (or -#n) means to back up n lines |
| 121 | // (or characters) before lo. |
| 122 | // The return value is the new lo, hi. |
| 123 | func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) { |
| 124 | switch dir { |
| 125 | case 0: |
| 126 | lo = 0 |
| 127 | hi = 0 |
| 128 | fallthrough |
| 129 | |
| 130 | case '+': |
| 131 | if charOffset { |
| 132 | pos := hi |
| 133 | for ; n > 0 && pos < len(data); n-- { |
| 134 | _, size := utf8.DecodeRune(data[pos:]) |
| 135 | pos += size |
| 136 | } |
| 137 | if n == 0 { |
| 138 | return pos, pos, nil |
| 139 | } |
| 140 | break |
| 141 | } |
| 142 | // find next beginning of line |
| 143 | if hi > 0 { |
| 144 | for hi < len(data) && data[hi-1] != '\n' { |
| 145 | hi++ |
| 146 | } |
| 147 | } |
| 148 | lo = hi |
| 149 | if n == 0 { |
| 150 | return lo, hi, nil |
| 151 | } |
| 152 | for ; hi < len(data); hi++ { |
| 153 | if data[hi] != '\n' { |
| 154 | continue |
| 155 | } |
| 156 | switch n--; n { |
| 157 | case 1: |
| 158 | lo = hi + 1 |
| 159 | case 0: |
| 160 | return lo, hi + 1, nil |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | case '-': |
| 165 | if charOffset { |
| 166 | // Scan backward for bytes that are not UTF-8 continuation bytes. |
| 167 | pos := lo |
| 168 | for ; pos > 0 && n > 0; pos-- { |
| 169 | if data[pos]&0xc0 != 0x80 { |
| 170 | n-- |
| 171 | } |
| 172 | } |
| 173 | if n == 0 { |
| 174 | return pos, pos, nil |
| 175 | } |
| 176 | break |
| 177 | } |
| 178 | // find earlier beginning of line |
| 179 | for lo > 0 && data[lo-1] != '\n' { |
| 180 | lo-- |
| 181 | } |
| 182 | hi = lo |
| 183 | if n == 0 { |
| 184 | return lo, hi, nil |
| 185 | } |
| 186 | for ; lo >= 0; lo-- { |
| 187 | if lo > 0 && data[lo-1] != '\n' { |
| 188 | continue |
| 189 | } |
| 190 | switch n--; n { |
| 191 | case 1: |
| 192 | hi = lo |
| 193 | case 0: |
| 194 | return lo, hi, nil |
| 195 | } |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | return 0, 0, errors.New("address out of range") |
| 200 | } |
| 201 | |
| 202 | // addrRegexp searches for pattern in the given direction starting at lo, hi. |
| 203 | // The direction dir is '+' (search forward from hi) or '-' (search backward from lo). |
| 204 | // Backward searches are unimplemented. |
| 205 | func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) { |
| 206 | // We want ^ and $ to work as in sam/acme, so use ?m. |
| 207 | re, err := regexp.Compile("(?m:" + pattern + ")") |
| 208 | if err != nil { |
| 209 | return 0, 0, err |
| 210 | } |
| 211 | if dir == '-' { |
| 212 | // Could implement reverse search using binary search |
| 213 | // through file, but that seems like overkill. |
| 214 | return 0, 0, errors.New("reverse search not implemented") |
| 215 | } |
| 216 | m := re.FindIndex(data[hi:]) |
| 217 | if len(m) > 0 { |
| 218 | m[0] += hi |
| 219 | m[1] += hi |
| 220 | } else if hi > 0 { |
| 221 | // No match. Wrap to beginning of data. |
| 222 | m = re.FindIndex(data) |
| 223 | } |
| 224 | if len(m) == 0 { |
| 225 | return 0, 0, errors.New("no match for " + pattern) |
| 226 | } |
| 227 | return m[0], m[1], nil |
| 228 | } |