blob: b4f7503b6da8543cd09783da6f1827d2c5792df0 [file] [log] [blame]
Andrew Gerrand9fc51642013-09-19 10:55:46 +10001// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package present
6
7import (
8 "errors"
9 "regexp"
10 "strconv"
11 "unicode/utf8"
12)
13
14// This file is stolen from go/src/cmd/godoc/codewalk.go.
15// It's an evaluator for the file address syntax implemented by acme and sam,
16// but using Go-native regular expressions.
17// To keep things reasonably close, this version uses (?m:re) for all user-provided
18// regular expressions. That is the only change to the code from codewalk.go.
Tobias Klauser95c34702019-08-22 09:30:48 +020019// See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax.
Andrew Gerrand9fc51642013-09-19 10:55:46 +100020
cui fliterf90d8ad2022-10-10 20:58:54 +080021// addrToByteRange evaluates the given address starting at offset start in data.
Andrew Gerrand9fc51642013-09-19 10:55:46 +100022// It returns the lo and hi byte offset of the matched region within data.
23func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
24 if addr == "" {
25 lo, hi = start, len(data)
26 return
27 }
28 var (
29 dir byte
30 prevc byte
31 charOffset bool
32 )
33 lo = start
34 hi = start
35 for addr != "" && err == nil {
36 c := addr[0]
37 switch c {
38 default:
39 err = errors.New("invalid address syntax near " + string(c))
40 case ',':
41 if len(addr) == 1 {
42 hi = len(data)
43 } else {
44 _, hi, err = addrToByteRange(addr[1:], hi, data)
45 }
46 return
47
48 case '+', '-':
49 if prevc == '+' || prevc == '-' {
50 lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
51 }
52 dir = c
53
54 case '$':
55 lo = len(data)
56 hi = len(data)
57 if len(addr) > 1 {
58 dir = '+'
59 }
60
61 case '#':
62 charOffset = true
63
64 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
65 var i int
66 for i = 1; i < len(addr); i++ {
67 if addr[i] < '0' || addr[i] > '9' {
68 break
69 }
70 }
71 var n int
72 n, err = strconv.Atoi(addr[0:i])
73 if err != nil {
74 break
75 }
76 lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
77 dir = 0
78 charOffset = false
79 prevc = c
80 addr = addr[i:]
81 continue
82
83 case '/':
84 var i, j int
85 Regexp:
86 for i = 1; i < len(addr); i++ {
87 switch addr[i] {
88 case '\\':
89 i++
90 case '/':
91 j = i + 1
92 break Regexp
93 }
94 }
95 if j == 0 {
96 j = i
97 }
98 pattern := addr[1:i]
99 lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
100 prevc = c
101 addr = addr[j:]
102 continue
103 }
104 prevc = c
105 addr = addr[1:]
106 }
107
108 if err == nil && dir != 0 {
109 lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
110 }
111 if err != nil {
112 return 0, 0, err
113 }
114 return lo, hi, nil
115}
116
117// addrNumber applies the given dir, n, and charOffset to the address lo, hi.
118// dir is '+' or '-', n is the count, and charOffset is true if the syntax
119// used was #n. Applying +n (or +#n) means to advance n lines
120// (or characters) after hi. Applying -n (or -#n) means to back up n lines
121// (or characters) before lo.
122// The return value is the new lo, hi.
123func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
124 switch dir {
125 case 0:
126 lo = 0
127 hi = 0
128 fallthrough
129
130 case '+':
131 if charOffset {
132 pos := hi
133 for ; n > 0 && pos < len(data); n-- {
134 _, size := utf8.DecodeRune(data[pos:])
135 pos += size
136 }
137 if n == 0 {
138 return pos, pos, nil
139 }
140 break
141 }
142 // find next beginning of line
143 if hi > 0 {
144 for hi < len(data) && data[hi-1] != '\n' {
145 hi++
146 }
147 }
148 lo = hi
149 if n == 0 {
150 return lo, hi, nil
151 }
152 for ; hi < len(data); hi++ {
153 if data[hi] != '\n' {
154 continue
155 }
156 switch n--; n {
157 case 1:
158 lo = hi + 1
159 case 0:
160 return lo, hi + 1, nil
161 }
162 }
163
164 case '-':
165 if charOffset {
166 // Scan backward for bytes that are not UTF-8 continuation bytes.
167 pos := lo
168 for ; pos > 0 && n > 0; pos-- {
169 if data[pos]&0xc0 != 0x80 {
170 n--
171 }
172 }
173 if n == 0 {
174 return pos, pos, nil
175 }
176 break
177 }
178 // find earlier beginning of line
179 for lo > 0 && data[lo-1] != '\n' {
180 lo--
181 }
182 hi = lo
183 if n == 0 {
184 return lo, hi, nil
185 }
186 for ; lo >= 0; lo-- {
187 if lo > 0 && data[lo-1] != '\n' {
188 continue
189 }
190 switch n--; n {
191 case 1:
192 hi = lo
193 case 0:
194 return lo, hi, nil
195 }
196 }
197 }
198
199 return 0, 0, errors.New("address out of range")
200}
201
202// addrRegexp searches for pattern in the given direction starting at lo, hi.
203// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
204// Backward searches are unimplemented.
205func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
206 // We want ^ and $ to work as in sam/acme, so use ?m.
207 re, err := regexp.Compile("(?m:" + pattern + ")")
208 if err != nil {
209 return 0, 0, err
210 }
211 if dir == '-' {
212 // Could implement reverse search using binary search
213 // through file, but that seems like overkill.
214 return 0, 0, errors.New("reverse search not implemented")
215 }
216 m := re.FindIndex(data[hi:])
217 if len(m) > 0 {
218 m[0] += hi
219 m[1] += hi
220 } else if hi > 0 {
221 // No match. Wrap to beginning of data.
222 m = re.FindIndex(data)
223 }
224 if len(m) == 0 {
225 return 0, 0, errors.New("no match for " + pattern)
226 }
227 return m[0], m[1], nil
228}