| // Copyright 2012 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package present |
| |
| import ( |
| "errors" |
| "regexp" |
| "strconv" |
| "unicode/utf8" |
| ) |
| |
| // This file is stolen from go/src/cmd/godoc/codewalk.go. |
| // It's an evaluator for the file address syntax implemented by acme and sam, |
| // but using Go-native regular expressions. |
| // To keep things reasonably close, this version uses (?m:re) for all user-provided |
| // regular expressions. That is the only change to the code from codewalk.go. |
| // See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax. |
| |
| // addrToByte evaluates the given address starting at offset start in data. |
| // It returns the lo and hi byte offset of the matched region within data. |
| func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) { |
| if addr == "" { |
| lo, hi = start, len(data) |
| return |
| } |
| var ( |
| dir byte |
| prevc byte |
| charOffset bool |
| ) |
| lo = start |
| hi = start |
| for addr != "" && err == nil { |
| c := addr[0] |
| switch c { |
| default: |
| err = errors.New("invalid address syntax near " + string(c)) |
| case ',': |
| if len(addr) == 1 { |
| hi = len(data) |
| } else { |
| _, hi, err = addrToByteRange(addr[1:], hi, data) |
| } |
| return |
| |
| case '+', '-': |
| if prevc == '+' || prevc == '-' { |
| lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset) |
| } |
| dir = c |
| |
| case '$': |
| lo = len(data) |
| hi = len(data) |
| if len(addr) > 1 { |
| dir = '+' |
| } |
| |
| case '#': |
| charOffset = true |
| |
| case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| var i int |
| for i = 1; i < len(addr); i++ { |
| if addr[i] < '0' || addr[i] > '9' { |
| break |
| } |
| } |
| var n int |
| n, err = strconv.Atoi(addr[0:i]) |
| if err != nil { |
| break |
| } |
| lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset) |
| dir = 0 |
| charOffset = false |
| prevc = c |
| addr = addr[i:] |
| continue |
| |
| case '/': |
| var i, j int |
| Regexp: |
| for i = 1; i < len(addr); i++ { |
| switch addr[i] { |
| case '\\': |
| i++ |
| case '/': |
| j = i + 1 |
| break Regexp |
| } |
| } |
| if j == 0 { |
| j = i |
| } |
| pattern := addr[1:i] |
| lo, hi, err = addrRegexp(data, lo, hi, dir, pattern) |
| prevc = c |
| addr = addr[j:] |
| continue |
| } |
| prevc = c |
| addr = addr[1:] |
| } |
| |
| if err == nil && dir != 0 { |
| lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset) |
| } |
| if err != nil { |
| return 0, 0, err |
| } |
| return lo, hi, nil |
| } |
| |
| // addrNumber applies the given dir, n, and charOffset to the address lo, hi. |
| // dir is '+' or '-', n is the count, and charOffset is true if the syntax |
| // used was #n. Applying +n (or +#n) means to advance n lines |
| // (or characters) after hi. Applying -n (or -#n) means to back up n lines |
| // (or characters) before lo. |
| // The return value is the new lo, hi. |
| func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) { |
| switch dir { |
| case 0: |
| lo = 0 |
| hi = 0 |
| fallthrough |
| |
| case '+': |
| if charOffset { |
| pos := hi |
| for ; n > 0 && pos < len(data); n-- { |
| _, size := utf8.DecodeRune(data[pos:]) |
| pos += size |
| } |
| if n == 0 { |
| return pos, pos, nil |
| } |
| break |
| } |
| // find next beginning of line |
| if hi > 0 { |
| for hi < len(data) && data[hi-1] != '\n' { |
| hi++ |
| } |
| } |
| lo = hi |
| if n == 0 { |
| return lo, hi, nil |
| } |
| for ; hi < len(data); hi++ { |
| if data[hi] != '\n' { |
| continue |
| } |
| switch n--; n { |
| case 1: |
| lo = hi + 1 |
| case 0: |
| return lo, hi + 1, nil |
| } |
| } |
| |
| case '-': |
| if charOffset { |
| // Scan backward for bytes that are not UTF-8 continuation bytes. |
| pos := lo |
| for ; pos > 0 && n > 0; pos-- { |
| if data[pos]&0xc0 != 0x80 { |
| n-- |
| } |
| } |
| if n == 0 { |
| return pos, pos, nil |
| } |
| break |
| } |
| // find earlier beginning of line |
| for lo > 0 && data[lo-1] != '\n' { |
| lo-- |
| } |
| hi = lo |
| if n == 0 { |
| return lo, hi, nil |
| } |
| for ; lo >= 0; lo-- { |
| if lo > 0 && data[lo-1] != '\n' { |
| continue |
| } |
| switch n--; n { |
| case 1: |
| hi = lo |
| case 0: |
| return lo, hi, nil |
| } |
| } |
| } |
| |
| return 0, 0, errors.New("address out of range") |
| } |
| |
| // addrRegexp searches for pattern in the given direction starting at lo, hi. |
| // The direction dir is '+' (search forward from hi) or '-' (search backward from lo). |
| // Backward searches are unimplemented. |
| func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) { |
| // We want ^ and $ to work as in sam/acme, so use ?m. |
| re, err := regexp.Compile("(?m:" + pattern + ")") |
| if err != nil { |
| return 0, 0, err |
| } |
| if dir == '-' { |
| // Could implement reverse search using binary search |
| // through file, but that seems like overkill. |
| return 0, 0, errors.New("reverse search not implemented") |
| } |
| m := re.FindIndex(data[hi:]) |
| if len(m) > 0 { |
| m[0] += hi |
| m[1] += hi |
| } else if hi > 0 { |
| // No match. Wrap to beginning of data. |
| m = re.FindIndex(data) |
| } |
| if len(m) == 0 { |
| return 0, 0, errors.New("no match for " + pattern) |
| } |
| return m[0], m[1], nil |
| } |