blob: e07cc2b0dbc6161eca7f9580f40601eba4b1a08f [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains the infrastructure to create an
// identifier and full-text index for a set of Go files.
//
// Algorithm for identifier index:
// - traverse all .go files of the file tree specified by root
// - for each identifier (word) encountered, collect all occurrences (spots)
// into a list; this produces a list of spots for each word
// - reduce the lists: from a list of spots to a list of FileRuns,
// and from a list of FileRuns into a list of PakRuns
// - make a HitList from the PakRuns
//
// Details:
// - keep two lists per word: one containing package-level declarations
// that have snippets, and one containing all other spots
// - keep the snippets in a separate table indexed by snippet index
// and store the snippet index in place of the line number in a SpotInfo
// (the line number for spots with snippets is stored in the snippet)
// - at the end, create lists of alternative spellings for a given
// word
//
// Algorithm for full text index:
// - concatenate all source code in a byte buffer (in memory)
// - add the files to a file set in lockstep as they are added to the byte
// buffer such that a byte buffer offset corresponds to the Pos value for
// that file location
// - create a suffix array from the concatenated sources
//
// String lookup in full text index:
// - use the suffix array to lookup a string's offsets - the offsets
// correspond to the Pos values relative to the file set
// - translate the Pos values back into file and line information and
// sort the result
package main
import (
"bufio"
"bytes"
"encoding/gob"
"errors"
"go/ast"
"go/parser"
"go/scanner"
"go/token"
"index/suffixarray"
"io"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
)
// ----------------------------------------------------------------------------
// InterfaceSlice is a helper type for sorting interface
// slices according to some slice-specific sort criteria.
type Comparer func(x, y interface{}) bool
type InterfaceSlice struct {
slice []interface{}
less Comparer
}
func (p *InterfaceSlice) Len() int { return len(p.slice) }
func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
func (p *InterfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
// ----------------------------------------------------------------------------
// RunList
// A RunList is a list of entries that can be sorted according to some
// criteria. A RunList may be compressed by grouping "runs" of entries
// which are equal (according to the sort critera) into a new RunList of
// runs. For instance, a RunList containing pairs (x, y) may be compressed
// into a RunList containing pair runs (x, {y}) where each run consists of
// a list of y's with the same x.
type RunList []interface{}
func (h RunList) sort(less Comparer) {
sort.Sort(&InterfaceSlice{h, less})
}
// Compress entries which are the same according to a sort criteria
// (specified by less) into "runs".
func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList {
if len(h) == 0 {
return nil
}
// len(h) > 0
// create runs of entries with equal values
h.sort(less)
// for each run, make a new run object and collect them in a new RunList
var hh RunList
i, x := 0, h[0]
for j, y := range h {
if less(x, y) {
hh = append(hh, newRun(h[i:j]))
i, x = j, h[j] // start a new run
}
}
// add final run, if any
if i < len(h) {
hh = append(hh, newRun(h[i:]))
}
return hh
}
// ----------------------------------------------------------------------------
// SpotInfo
// A SpotInfo value describes a particular identifier spot in a given file;
// It encodes three values: the SpotKind (declaration or use), a line or
// snippet index "lori", and whether it's a line or index.
//
// The following encoding is used:
//
// bits 32 4 1 0
// value [lori|kind|isIndex]
//
type SpotInfo uint32
// SpotKind describes whether an identifier is declared (and what kind of
// declaration) or used.
type SpotKind uint32
const (
PackageClause SpotKind = iota
ImportDecl
ConstDecl
TypeDecl
VarDecl
FuncDecl
MethodDecl
Use
nKinds
)
func init() {
// sanity check: if nKinds is too large, the SpotInfo
// accessor functions may need to be updated
if nKinds > 8 {
panic("nKinds > 8")
}
}
// makeSpotInfo makes a SpotInfo.
func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo {
// encode lori: bits [4..32)
x := SpotInfo(lori) << 4
if int(x>>4) != lori {
// lori value doesn't fit - since snippet indices are
// most certainly always smaller then 1<<28, this can
// only happen for line numbers; give it no line number (= 0)
x = 0
}
// encode kind: bits [1..4)
x |= SpotInfo(kind) << 1
// encode isIndex: bit 0
if isIndex {
x |= 1
}
return x
}
func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) }
func (x SpotInfo) Lori() int { return int(x >> 4) }
func (x SpotInfo) IsIndex() bool { return x&1 != 0 }
// ----------------------------------------------------------------------------
// KindRun
// Debugging support. Disable to see multiple entries per line.
const removeDuplicates = true
// A KindRun is a run of SpotInfos of the same kind in a given file.
// The kind (3 bits) is stored in each SpotInfo element; to find the
// kind of a KindRun, look at any of it's elements.
type KindRun []SpotInfo
// KindRuns are sorted by line number or index. Since the isIndex bit
// is always the same for all infos in one list we can compare lori's.
func (k KindRun) Len() int { return len(k) }
func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
// FileRun contents are sorted by Kind for the reduction into KindRuns.
func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
// newKindRun allocates a new KindRun from the SpotInfo run h.
func newKindRun(h RunList) interface{} {
run := make(KindRun, len(h))
for i, x := range h {
run[i] = x.(SpotInfo)
}
// Spots were sorted by file and kind to create this run.
// Within this run, sort them by line number or index.
sort.Sort(run)
if removeDuplicates {
// Since both the lori and kind field must be
// same for duplicates, and since the isIndex
// bit is always the same for all infos in one
// list we can simply compare the entire info.
k := 0
prev := SpotInfo(1<<32 - 1) // an unlikely value
for _, x := range run {
if x != prev {
run[k] = x
k++
prev = x
}
}
run = run[0:k]
}
return run
}
// ----------------------------------------------------------------------------
// FileRun
// A Pak describes a Go package.
type Pak struct {
Path string // path of directory containing the package
Name string // package name as declared by package clause
}
// Paks are sorted by name (primary key) and by import path (secondary key).
func (p *Pak) less(q *Pak) bool {
return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
}
// A File describes a Go file.
type File struct {
Name string // directory-local file name
Pak *Pak // the package to which the file belongs
}
// Path returns the file path of f.
func (f *File) Path() string {
return filepath.Join(f.Pak.Path, f.Name)
}
// A Spot describes a single occurrence of a word.
type Spot struct {
File *File
Info SpotInfo
}
// A FileRun is a list of KindRuns belonging to the same file.
type FileRun struct {
File *File
Groups []KindRun
}
// Spots are sorted by file path for the reduction into FileRuns.
func lessSpot(x, y interface{}) bool {
fx := x.(Spot).File
fy := y.(Spot).File
// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
px := fx.Pak.Path
py := fy.Pak.Path
return px < py || px == py && fx.Name < fy.Name
}
// newFileRun allocates a new FileRun from the Spot run h.
func newFileRun(h RunList) interface{} {
file := h[0].(Spot).File
// reduce the list of Spots into a list of KindRuns
h1 := make(RunList, len(h))
for i, x := range h {
h1[i] = x.(Spot).Info
}
h2 := h1.reduce(lessKind, newKindRun)
// create the FileRun
groups := make([]KindRun, len(h2))
for i, x := range h2 {
groups[i] = x.(KindRun)
}
return &FileRun{file, groups}
}
// ----------------------------------------------------------------------------
// PakRun
// A PakRun describes a run of *FileRuns of a package.
type PakRun struct {
Pak *Pak
Files []*FileRun
}
// Sorting support for files within a PakRun.
func (p *PakRun) Len() int { return len(p.Files) }
func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
// FileRuns are sorted by package for the reduction into PakRuns.
func lessFileRun(x, y interface{}) bool {
return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
}
// newPakRun allocates a new PakRun from the *FileRun run h.
func newPakRun(h RunList) interface{} {
pak := h[0].(*FileRun).File.Pak
files := make([]*FileRun, len(h))
for i, x := range h {
files[i] = x.(*FileRun)
}
run := &PakRun{pak, files}
sort.Sort(run) // files were sorted by package; sort them by file now
return run
}
// ----------------------------------------------------------------------------
// HitList
// A HitList describes a list of PakRuns.
type HitList []*PakRun
// PakRuns are sorted by package.
func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
func reduce(h0 RunList) HitList {
// reduce a list of Spots into a list of FileRuns
h1 := h0.reduce(lessSpot, newFileRun)
// reduce a list of FileRuns into a list of PakRuns
h2 := h1.reduce(lessFileRun, newPakRun)
// sort the list of PakRuns by package
h2.sort(lessPakRun)
// create a HitList
h := make(HitList, len(h2))
for i, p := range h2 {
h[i] = p.(*PakRun)
}
return h
}
// filter returns a new HitList created by filtering
// all PakRuns from h that have a matching pakname.
func (h HitList) filter(pakname string) HitList {
var hh HitList
for _, p := range h {
if p.Pak.Name == pakname {
hh = append(hh, p)
}
}
return hh
}
// ----------------------------------------------------------------------------
// AltWords
type wordPair struct {
canon string // canonical word spelling (all lowercase)
alt string // alternative spelling
}
// An AltWords describes a list of alternative spellings for a
// canonical (all lowercase) spelling of a word.
type AltWords struct {
Canon string // canonical word spelling (all lowercase)
Alts []string // alternative spelling for the same word
}
// wordPairs are sorted by their canonical spelling.
func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
// newAltWords allocates a new AltWords from the *wordPair run h.
func newAltWords(h RunList) interface{} {
canon := h[0].(*wordPair).canon
alts := make([]string, len(h))
for i, x := range h {
alts[i] = x.(*wordPair).alt
}
return &AltWords{canon, alts}
}
func (a *AltWords) filter(s string) *AltWords {
var alts []string
for _, w := range a.Alts {
if w != s {
alts = append(alts, w)
}
}
if len(alts) > 0 {
return &AltWords{a.Canon, alts}
}
return nil
}
// ----------------------------------------------------------------------------
// Indexer
// Adjust these flags as seems best.
const includeMainPackages = true
const includeTestFiles = true
type IndexResult struct {
Decls RunList // package-level declarations (with snippets)
Others RunList // all other occurrences
}
// Statistics provides statistics information for an index.
type Statistics struct {
Bytes int // total size of indexed source files
Files int // number of indexed source files
Lines int // number of lines (all files)
Words int // number of different identifiers
Spots int // number of identifier occurrences
}
// An Indexer maintains the data structures and provides the machinery
// for indexing .go files under a file tree. It implements the path.Visitor
// interface for walking file trees, and the ast.Visitor interface for
// walking Go ASTs.
type Indexer struct {
fset *token.FileSet // file set for all indexed files
sources bytes.Buffer // concatenated sources
packages map[string]*Pak // map of canonicalized *Paks
words map[string]*IndexResult // RunLists of Spots
snippets []*Snippet // indices are stored in SpotInfos
current *token.File // last file added to file set
file *File // AST for current file
decl ast.Decl // AST for current decl
stats Statistics
}
func (x *Indexer) lookupPackage(path, name string) *Pak {
// In the source directory tree, more than one package may
// live in the same directory. For the packages map, construct
// a key that includes both the directory path and the package
// name.
key := path + ":" + name
pak := x.packages[key]
if pak == nil {
pak = &Pak{path, name}
x.packages[key] = pak
}
return pak
}
func (x *Indexer) addSnippet(s *Snippet) int {
index := len(x.snippets)
x.snippets = append(x.snippets, s)
return index
}
func (x *Indexer) visitComment(c *ast.CommentGroup) {
if c != nil {
ast.Walk(x, c)
}
}
func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
if id != nil {
lists, found := x.words[id.Name]
if !found {
lists = new(IndexResult)
x.words[id.Name] = lists
}
if kind == Use || x.decl == nil {
// not a declaration or no snippet required
info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
lists.Others = append(lists.Others, Spot{x.file, info})
} else {
// a declaration with snippet
index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
info := makeSpotInfo(kind, index, true)
lists.Decls = append(lists.Decls, Spot{x.file, info})
}
x.stats.Spots++
}
}
func (x *Indexer) visitSpec(spec ast.Spec, isVarDecl bool) {
switch n := spec.(type) {
case *ast.ImportSpec:
x.visitComment(n.Doc)
x.visitIdent(ImportDecl, n.Name)
ast.Walk(x, n.Path)
x.visitComment(n.Comment)
case *ast.ValueSpec:
x.visitComment(n.Doc)
kind := ConstDecl
if isVarDecl {
kind = VarDecl
}
for _, n := range n.Names {
x.visitIdent(kind, n)
}
ast.Walk(x, n.Type)
for _, v := range n.Values {
ast.Walk(x, v)
}
x.visitComment(n.Comment)
case *ast.TypeSpec:
x.visitComment(n.Doc)
x.visitIdent(TypeDecl, n.Name)
ast.Walk(x, n.Type)
x.visitComment(n.Comment)
}
}
func (x *Indexer) Visit(node ast.Node) ast.Visitor {
// TODO(gri): methods in interface types are categorized as VarDecl
switch n := node.(type) {
case nil:
return nil
case *ast.Ident:
x.visitIdent(Use, n)
case *ast.Field:
x.decl = nil // no snippets for fields
x.visitComment(n.Doc)
for _, m := range n.Names {
x.visitIdent(VarDecl, m)
}
ast.Walk(x, n.Type)
ast.Walk(x, n.Tag)
x.visitComment(n.Comment)
case *ast.DeclStmt:
if decl, ok := n.Decl.(*ast.GenDecl); ok {
// local declarations can only be *ast.GenDecls
x.decl = nil // no snippets for local declarations
x.visitComment(decl.Doc)
for _, s := range decl.Specs {
x.visitSpec(s, decl.Tok == token.VAR)
}
} else {
// handle error case gracefully
ast.Walk(x, n.Decl)
}
case *ast.GenDecl:
x.decl = n
x.visitComment(n.Doc)
for _, s := range n.Specs {
x.visitSpec(s, n.Tok == token.VAR)
}
case *ast.FuncDecl:
x.visitComment(n.Doc)
kind := FuncDecl
if n.Recv != nil {
kind = MethodDecl
ast.Walk(x, n.Recv)
}
x.decl = n
x.visitIdent(kind, n.Name)
ast.Walk(x, n.Type)
if n.Body != nil {
ast.Walk(x, n.Body)
}
case *ast.File:
x.visitComment(n.Doc)
x.decl = nil
x.visitIdent(PackageClause, n.Name)
for _, d := range n.Decls {
ast.Walk(x, d)
}
// don't visit package level comments for now
// to avoid duplicate visiting from individual
// nodes
default:
return x
}
return nil
}
func pkgName(filename string) string {
// use a new file set each time in order to not pollute the indexer's
// file set (which must stay in sync with the concatenated source code)
file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly)
if err != nil || file == nil {
return ""
}
return file.Name.Name
}
// addFile adds a file to the index if possible and returns the file set file
// and the file's AST if it was successfully parsed as a Go file. If addFile
// failed (that is, if the file was not added), it returns file == nil.
func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) {
// open file
f, err := fs.Open(filename)
if err != nil {
return
}
defer f.Close()
// The file set's base offset and x.sources size must be in lock-step;
// this permits the direct mapping of suffix array lookup results to
// to corresponding Pos values.
//
// When a file is added to the file set, its offset base increases by
// the size of the file + 1; and the initial base offset is 1. Add an
// extra byte to the sources here.
x.sources.WriteByte(0)
// If the sources length doesn't match the file set base at this point
// the file set implementation changed or we have another error.
base := x.fset.Base()
if x.sources.Len() != base {
panic("internal error - file base incorrect")
}
// append file contents (src) to x.sources
if _, err := x.sources.ReadFrom(f); err == nil {
src := x.sources.Bytes()[base:]
if goFile {
// parse the file and in the process add it to the file set
if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
return
}
// file has parse errors, and the AST may be incorrect -
// set lines information explicitly and index as ordinary
// text file (cannot fall through to the text case below
// because the file has already been added to the file set
// by the parser)
file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
file.SetLinesForContent(src)
ast = nil
return
}
if isText(src) {
// only add the file to the file set (for the full text index)
file = x.fset.AddFile(filename, x.fset.Base(), len(src))
file.SetLinesForContent(src)
return
}
}
// discard possibly added data
x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
return
}
// Design note: Using an explicit white list of permitted files for indexing
// makes sure that the important files are included and massively reduces the
// number of files to index. The advantage over a blacklist is that unexpected
// (non-blacklisted) files won't suddenly explode the index.
//
// TODO(gri): We may want to make this list customizable, perhaps via a flag.
// Files are whitelisted if they have a file name or extension
// present as key in whitelisted.
var whitelisted = map[string]bool{
".bash": true,
".c": true,
".css": true,
".go": true,
".goc": true,
".h": true,
".html": true,
".js": true,
".out": true,
".py": true,
".s": true,
".sh": true,
".txt": true,
".xml": true,
"AUTHORS": true,
"CONTRIBUTORS": true,
"LICENSE": true,
"Makefile": true,
"PATENTS": true,
"README": true,
}
// isWhitelisted returns true if a file is on the list
// of "permitted" files for indexing. The filename must
// be the directory-local name of the file.
func isWhitelisted(filename string) bool {
key := filepath.Ext(filename)
if key == "" {
// file has no extension - use entire filename
key = filename
}
return whitelisted[key]
}
func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
if f.IsDir() {
return
}
filename := filepath.Join(dirname, f.Name())
goFile := false
switch {
case isGoFile(f):
if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) {
return
}
if !includeMainPackages && pkgName(filename) == "main" {
return
}
goFile = true
case !fulltextIndex || !isWhitelisted(f.Name()):
return
}
file, fast := x.addFile(filename, goFile)
if file == nil {
return // addFile failed
}
if fast != nil {
// we've got a Go file to index
x.current = file
pak := x.lookupPackage(dirname, fast.Name.Name)
x.file = &File{f.Name(), pak}
ast.Walk(x, fast)
}
// update statistics
x.stats.Bytes += file.Size()
x.stats.Files++
x.stats.Lines += file.LineCount()
}
// ----------------------------------------------------------------------------
// Index
type LookupResult struct {
Decls HitList // package-level declarations (with snippets)
Others HitList // all other occurrences
}
type Index struct {
fset *token.FileSet // file set used during indexing; nil if no textindex
suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
words map[string]*LookupResult // maps words to hit lists
alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
snippets []*Snippet // all snippets, indexed by snippet index
stats Statistics
}
func canonical(w string) string { return strings.ToLower(w) }
// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
var x Indexer
th := NewThrottle(throttle, 0.1e9) // run at least 0.1s at a time
// initialize Indexer
// (use some reasonably sized maps to start)
x.fset = token.NewFileSet()
x.packages = make(map[string]*Pak, 256)
x.words = make(map[string]*IndexResult, 8192)
// index all files in the directories given by dirnames
for dirname := range dirnames {
list, err := fs.ReadDir(dirname)
if err != nil {
continue // ignore this directory
}
for _, f := range list {
if !f.IsDir() {
x.visitFile(dirname, f, fulltextIndex)
}
th.Throttle()
}
}
if !fulltextIndex {
// the file set, the current file, and the sources are
// not needed after indexing if no text index is built -
// help GC and clear them
x.fset = nil
x.sources.Reset()
x.current = nil // contains reference to fset!
}
// for each word, reduce the RunLists into a LookupResult;
// also collect the word with its canonical spelling in a
// word list for later computation of alternative spellings
words := make(map[string]*LookupResult)
var wlist RunList
for w, h := range x.words {
decls := reduce(h.Decls)
others := reduce(h.Others)
words[w] = &LookupResult{
Decls: decls,
Others: others,
}
wlist = append(wlist, &wordPair{canonical(w), w})
th.Throttle()
}
x.stats.Words = len(words)
// reduce the word list {canonical(w), w} into
// a list of AltWords runs {canonical(w), {w}}
alist := wlist.reduce(lessWordPair, newAltWords)
// convert alist into a map of alternative spellings
alts := make(map[string]*AltWords)
for i := 0; i < len(alist); i++ {
a := alist[i].(*AltWords)
alts[a.Canon] = a
}
// create text index
var suffixes *suffixarray.Index
if fulltextIndex {
suffixes = suffixarray.New(x.sources.Bytes())
}
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
}
type fileIndex struct {
Words map[string]*LookupResult
Alts map[string]*AltWords
Snippets []*Snippet
Fulltext bool
}
func (x *fileIndex) Write(w io.Writer) error {
return gob.NewEncoder(w).Encode(x)
}
func (x *fileIndex) Read(r io.Reader) error {
return gob.NewDecoder(r).Decode(x)
}
// Write writes the index x to w.
func (x *Index) Write(w io.Writer) error {
fulltext := false
if x.suffixes != nil {
fulltext = true
}
fx := fileIndex{
x.words,
x.alts,
x.snippets,
fulltext,
}
if err := fx.Write(w); err != nil {
return err
}
if fulltext {
if err := x.fset.Write(w); err != nil {
return err
}
if err := x.suffixes.Write(w); err != nil {
return err
}
}
return nil
}
// Read reads the index from r into x; x must not be nil.
// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
func (x *Index) Read(r io.Reader) error {
// We use the ability to read bytes as a plausible surrogate for buffering.
if _, ok := r.(io.ByteReader); !ok {
r = bufio.NewReader(r)
}
var fx fileIndex
if err := fx.Read(r); err != nil {
return err
}
x.words = fx.Words
x.alts = fx.Alts
x.snippets = fx.Snippets
if fx.Fulltext {
x.fset = token.NewFileSet()
if err := x.fset.Read(r); err != nil {
return err
}
x.suffixes = new(suffixarray.Index)
if err := x.suffixes.Read(r); err != nil {
return err
}
}
return nil
}
// Stats() returns index statistics.
func (x *Index) Stats() Statistics {
return x.stats
}
func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
match = x.words[w]
alt = x.alts[canonical(w)]
// remove current spelling from alternatives
// (if there is no match, the alternatives do
// not contain the current spelling)
if match != nil && alt != nil {
alt = alt.filter(w)
}
return
}
func isIdentifier(s string) bool {
var S scanner.Scanner
fset := token.NewFileSet()
S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0)
if _, tok, _ := S.Scan(); tok == token.IDENT {
_, tok, _ := S.Scan()
return tok == token.EOF
}
return false
}
// For a given query, which is either a single identifier or a qualified
// identifier, Lookup returns a list of packages, a LookupResult, and a
// list of alternative spellings, if any. Any and all results may be nil.
// If the query syntax is wrong, an error is reported.
func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) {
ss := strings.Split(query, ".")
// check query syntax
for _, s := range ss {
if !isIdentifier(s) {
err = errors.New("all query parts must be identifiers")
return
}
}
// handle simple and qualified identifiers
switch len(ss) {
case 1:
ident := ss[0]
match, alt = x.lookupWord(ident)
if match != nil {
// found a match - filter packages with same name
// for the list of packages called ident, if any
paks = match.Others.filter(ident)
}
case 2:
pakname, ident := ss[0], ss[1]
match, alt = x.lookupWord(ident)
if match != nil {
// found a match - filter by package name
// (no paks - package names are not qualified)
decls := match.Decls.filter(pakname)
others := match.Others.filter(pakname)
match = &LookupResult{decls, others}
}
default:
err = errors.New("query is not a (qualified) identifier")
}
return
}
func (x *Index) Snippet(i int) *Snippet {
// handle illegal snippet indices gracefully
if 0 <= i && i < len(x.snippets) {
return x.snippets[i]
}
return nil
}
type positionList []struct {
filename string
line int
}
func (list positionList) Len() int { return len(list) }
func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
// unique returns the list sorted and with duplicate entries removed
func unique(list []int) []int {
sort.Ints(list)
var last int
i := 0
for _, x := range list {
if i == 0 || x != last {
last = x
list[i] = x
i++
}
}
return list[0:i]
}
// A FileLines value specifies a file and line numbers within that file.
type FileLines struct {
Filename string
Lines []int
}
// LookupRegexp returns the number of matches and the matches where a regular
// expression r is found in the full text index. At most n matches are
// returned (thus found <= n).
//
func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
if x.suffixes == nil || n <= 0 {
return
}
// n > 0
var list positionList
// FindAllIndex may returns matches that span across file boundaries.
// Such matches are unlikely, buf after eliminating them we may end up
// with fewer than n matches. If we don't have enough at the end, redo
// the search with an increased value n1, but only if FindAllIndex
// returned all the requested matches in the first place (if it
// returned fewer than that there cannot be more).
for n1 := n; found < n; n1 += n - found {
found = 0
matches := x.suffixes.FindAllIndex(r, n1)
// compute files, exclude matches that span file boundaries,
// and map offsets to file-local offsets
list = make(positionList, len(matches))
for _, m := range matches {
// by construction, an offset corresponds to the Pos value
// for the file set - use it to get the file and line
p := token.Pos(m[0])
if file := x.fset.File(p); file != nil {
if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
// match [m[0], m[1]) is within the file boundaries
list[found].filename = file.Name()
list[found].line = file.Line(p)
found++
}
}
}
if found == n || len(matches) < n1 {
// found all matches or there's no chance to find more
break
}
}
list = list[0:found]
sort.Sort(list) // sort by filename
// collect matches belonging to the same file
var last string
var lines []int
addLines := func() {
if len(lines) > 0 {
// remove duplicate lines
result = append(result, FileLines{last, unique(lines)})
lines = nil
}
}
for _, m := range list {
if m.filename != last {
addLines()
last = m.filename
}
lines = append(lines, m.line)
}
addLines()
return
}