present/args.go - tools - Git at Google

 // Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package present

 import (
 	"errors"
 	"regexp"
 	"strconv"
 	"unicode/utf8"
 )

 // This file is stolen from go/src/cmd/godoc/codewalk.go.
 // It's an evaluator for the file address syntax implemented by acme and sam,
 // but using Go-native regular expressions.
 // To keep things reasonably close, this version uses (?m:re) for all user-provided
 // regular expressions. That is the only change to the code from codewalk.go.
 // See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax.

 // addrToByte evaluates the given address starting at offset start in data.
 // It returns the lo and hi byte offset of the matched region within data.
 func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
 	if addr == "" {
 		lo, hi = start, len(data)
 		return
 	}
 	var (
 		dir        byte
 		prevc      byte
 		charOffset bool
 	)
 	lo = start
 	hi = start
 	for addr != "" && err == nil {
 		c := addr[0]
 		switch c {
 		default:
 			err = errors.New("invalid address syntax near " + string(c))
 		case ',':
 			if len(addr) == 1 {
 				hi = len(data)
 			} else {
 				_, hi, err = addrToByteRange(addr[1:], hi, data)
 			}
 			return

 		case '+', '-':
 			if prevc == '+' || prevc == '-' {
 				lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
 			}
 			dir = c

 		case '$':
 			lo = len(data)
 			hi = len(data)
 			if len(addr) > 1 {
 				dir = '+'
 			}

 		case '#':
 			charOffset = true

 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			var i int
 			for i = 1; i < len(addr); i++ {
 				if addr[i] < '0' || addr[i] > '9' {
 					break
 				}
 			}
 			var n int
 			n, err = strconv.Atoi(addr[0:i])
 			if err != nil {
 				break
 			}
 			lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
 			dir = 0
 			charOffset = false
 			prevc = c
 			addr = addr[i:]
 			continue

 		case '/':
 			var i, j int
 		Regexp:
 			for i = 1; i < len(addr); i++ {
 				switch addr[i] {
 				case '\\':
 					i++
 				case '/':
 					j = i + 1
 					break Regexp
 				}
 			}
 			if j == 0 {
 				j = i
 			}
 			pattern := addr[1:i]
 			lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
 			prevc = c
 			addr = addr[j:]
 			continue
 		}
 		prevc = c
 		addr = addr[1:]
 	}

 	if err == nil && dir != 0 {
 		lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
 	}
 	if err != nil {
 		return 0, 0, err
 	}
 	return lo, hi, nil
 }

 // addrNumber applies the given dir, n, and charOffset to the address lo, hi.
 // dir is '+' or '-', n is the count, and charOffset is true if the syntax
 // used was #n.  Applying +n (or +#n) means to advance n lines
 // (or characters) after hi.  Applying -n (or -#n) means to back up n lines
 // (or characters) before lo.
 // The return value is the new lo, hi.
 func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
 	switch dir {
 	case 0:
 		lo = 0
 		hi = 0
 		fallthrough

 	case '+':
 		if charOffset {
 			pos := hi
 			for ; n > 0 && pos < len(data); n-- {
 				_, size := utf8.DecodeRune(data[pos:])
 				pos += size
 			}
 			if n == 0 {
 				return pos, pos, nil
 			}
 			break
 		}
 		// find next beginning of line
 		if hi > 0 {
 			for hi < len(data) && data[hi-1] != '\n' {
 				hi++
 			}
 		}
 		lo = hi
 		if n == 0 {
 			return lo, hi, nil
 		}
 		for ; hi < len(data); hi++ {
 			if data[hi] != '\n' {
 				continue
 			}
 			switch n--; n {
 			case 1:
 				lo = hi + 1
 			case 0:
 				return lo, hi + 1, nil
 			}
 		}

 	case '-':
 		if charOffset {
 			// Scan backward for bytes that are not UTF-8 continuation bytes.
 			pos := lo
 			for ; pos > 0 && n > 0; pos-- {
 				if data[pos]&0xc0 != 0x80 {
 					n--
 				}
 			}
 			if n == 0 {
 				return pos, pos, nil
 			}
 			break
 		}
 		// find earlier beginning of line
 		for lo > 0 && data[lo-1] != '\n' {
 			lo--
 		}
 		hi = lo
 		if n == 0 {
 			return lo, hi, nil
 		}
 		for ; lo >= 0; lo-- {
 			if lo > 0 && data[lo-1] != '\n' {
 				continue
 			}
 			switch n--; n {
 			case 1:
 				hi = lo
 			case 0:
 				return lo, hi, nil
 			}
 		}
 	}

 	return 0, 0, errors.New("address out of range")
 }

 // addrRegexp searches for pattern in the given direction starting at lo, hi.
 // The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
 // Backward searches are unimplemented.
 func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
 	// We want ^ and $ to work as in sam/acme, so use ?m.
 	re, err := regexp.Compile("(?m:" + pattern + ")")
 	if err != nil {
 		return 0, 0, err
 	}
 	if dir == '-' {
 		// Could implement reverse search using binary search
 		// through file, but that seems like overkill.
 		return 0, 0, errors.New("reverse search not implemented")
 	}
 	m := re.FindIndex(data[hi:])
 	if len(m) > 0 {
 		m[0] += hi
 		m[1] += hi
 	} else if hi > 0 {
 		// No match.  Wrap to beginning of data.
 		m = re.FindIndex(data)
 	}
 	if len(m) == 0 {
 		return 0, 0, errors.New("no match for " + pattern)
 	}
 	return m[0], m[1], nil
 }
	// Copyright 2012 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package present

	import (
	"errors"
	"regexp"
	"strconv"
	"unicode/utf8"
	)

	// This file is stolen from go/src/cmd/godoc/codewalk.go.
	// It's an evaluator for the file address syntax implemented by acme and sam,
	// but using Go-native regular expressions.
	// To keep things reasonably close, this version uses (?m:re) for all user-provided
	// regular expressions. That is the only change to the code from codewalk.go.
	// See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax.

	// addrToByte evaluates the given address starting at offset start in data.
	// It returns the lo and hi byte offset of the matched region within data.
	func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
	if addr == "" {
	lo, hi = start, len(data)
	return
	}
	var (
	dir byte
	prevc byte
	charOffset bool
	)
	lo = start
	hi = start
	for addr != "" && err == nil {
	c := addr[0]
	switch c {
	default:
	err = errors.New("invalid address syntax near " + string(c))
	case ',':
	if len(addr) == 1 {
	hi = len(data)
	} else {
	_, hi, err = addrToByteRange(addr[1:], hi, data)
	}
	return

	case '+', '-':
	if prevc == '+' \|\| prevc == '-' {
	lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
	}
	dir = c

	case '$':
	lo = len(data)
	hi = len(data)
	if len(addr) > 1 {
	dir = '+'
	}

	case '#':
	charOffset = true

	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	var i int
	for i = 1; i < len(addr); i++ {
	if addr[i] < '0' \|\| addr[i] > '9' {
	break
	}
	}
	var n int
	n, err = strconv.Atoi(addr[0:i])
	if err != nil {
	break
	}
	lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
	dir = 0
	charOffset = false
	prevc = c
	addr = addr[i:]
	continue

	case '/':
	var i, j int
	Regexp:
	for i = 1; i < len(addr); i++ {
	switch addr[i] {
	case '\\':
	i++
	case '/':
	j = i + 1
	break Regexp
	}
	}
	if j == 0 {
	j = i
	}
	pattern := addr[1:i]
	lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
	prevc = c
	addr = addr[j:]
	continue
	}
	prevc = c
	addr = addr[1:]
	}

	if err == nil && dir != 0 {
	lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
	}
	if err != nil {
	return 0, 0, err
	}
	return lo, hi, nil
	}

	// addrNumber applies the given dir, n, and charOffset to the address lo, hi.
	// dir is '+' or '-', n is the count, and charOffset is true if the syntax
	// used was #n. Applying +n (or +#n) means to advance n lines
	// (or characters) after hi. Applying -n (or -#n) means to back up n lines
	// (or characters) before lo.
	// The return value is the new lo, hi.
	func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
	switch dir {
	case 0:
	lo = 0
	hi = 0
	fallthrough

	case '+':
	if charOffset {
	pos := hi
	for ; n > 0 && pos < len(data); n-- {
	_, size := utf8.DecodeRune(data[pos:])
	pos += size
	}
	if n == 0 {
	return pos, pos, nil
	}
	break
	}
	// find next beginning of line
	if hi > 0 {
	for hi < len(data) && data[hi-1] != '\n' {
	hi++
	}
	}
	lo = hi
	if n == 0 {
	return lo, hi, nil
	}
	for ; hi < len(data); hi++ {
	if data[hi] != '\n' {
	continue
	}
	switch n--; n {
	case 1:
	lo = hi + 1
	case 0:
	return lo, hi + 1, nil
	}
	}

	case '-':
	if charOffset {
	// Scan backward for bytes that are not UTF-8 continuation bytes.
	pos := lo
	for ; pos > 0 && n > 0; pos-- {
	if data[pos]&0xc0 != 0x80 {
	n--
	}
	}
	if n == 0 {
	return pos, pos, nil
	}
	break
	}
	// find earlier beginning of line
	for lo > 0 && data[lo-1] != '\n' {
	lo--
	}
	hi = lo
	if n == 0 {
	return lo, hi, nil
	}
	for ; lo >= 0; lo-- {
	if lo > 0 && data[lo-1] != '\n' {
	continue
	}
	switch n--; n {
	case 1:
	hi = lo
	case 0:
	return lo, hi, nil
	}
	}
	}

	return 0, 0, errors.New("address out of range")
	}

	// addrRegexp searches for pattern in the given direction starting at lo, hi.
	// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
	// Backward searches are unimplemented.
	func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
	// We want ^ and $ to work as in sam/acme, so use ?m.
	re, err := regexp.Compile("(?m:" + pattern + ")")
	if err != nil {
	return 0, 0, err
	}
	if dir == '-' {
	// Could implement reverse search using binary search
	// through file, but that seems like overkill.
	return 0, 0, errors.New("reverse search not implemented")
	}
	m := re.FindIndex(data[hi:])
	if len(m) > 0 {
	m[0] += hi
	m[1] += hi
	} else if hi > 0 {
	// No match. Wrap to beginning of data.
	m = re.FindIndex(data)
	}
	if len(m) == 0 {
	return 0, 0, errors.New("no match for " + pattern)
	}
	return m[0], m[1], nil
	}