src/cmd/godoc/index.go - go - Git at Google

 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // This file contains the infrastructure to create an
 // identifier and full-text index for a set of Go files.
 //
 // Algorithm for identifier index:
 // - traverse all .go files of the file tree specified by root
 // - for each identifier (word) encountered, collect all occurrences (spots)
 //   into a list; this produces a list of spots for each word
 // - reduce the lists: from a list of spots to a list of FileRuns,
 //   and from a list of FileRuns into a list of PakRuns
 // - make a HitList from the PakRuns
 //
 // Details:
 // - keep two lists per word: one containing package-level declarations
 //   that have snippets, and one containing all other spots
 // - keep the snippets in a separate table indexed by snippet index
 //   and store the snippet index in place of the line number in a SpotInfo
 //   (the line number for spots with snippets is stored in the snippet)
 // - at the end, create lists of alternative spellings for a given
 //   word
 //
 // Algorithm for full text index:
 // - concatenate all source code in a byte buffer (in memory)
 // - add the files to a file set in lockstep as they are added to the byte
 //   buffer such that a byte buffer offset corresponds to the Pos value for
 //   that file location
 // - create a suffix array from the concatenated sources
 //
 // String lookup in full text index:
 // - use the suffix array to lookup a string's offsets - the offsets
 //   correspond to the Pos values relative to the file set
 // - translate the Pos values back into file and line information and
 //   sort the result

 package main

 import (
 	"bufio"
 	"bytes"
 	"encoding/gob"
 	"errors"
 	"go/ast"
 	"go/parser"
 	"go/scanner"
 	"go/token"
 	"index/suffixarray"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 )

 // ----------------------------------------------------------------------------
 // InterfaceSlice is a helper type for sorting interface
 // slices according to some slice-specific sort criteria.

 type Comparer func(x, y interface{}) bool

 type InterfaceSlice struct {
 	slice []interface{}
 	less  Comparer
 }

 func (p *InterfaceSlice) Len() int           { return len(p.slice) }
 func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
 func (p *InterfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }

 // ----------------------------------------------------------------------------
 // RunList

 // A RunList is a list of entries that can be sorted according to some
 // criteria. A RunList may be compressed by grouping "runs" of entries
 // which are equal (according to the sort critera) into a new RunList of
 // runs. For instance, a RunList containing pairs (x, y) may be compressed
 // into a RunList containing pair runs (x, {y}) where each run consists of
 // a list of y's with the same x.
 type RunList []interface{}

 func (h RunList) sort(less Comparer) {
 	sort.Sort(&InterfaceSlice{h, less})
 }

 // Compress entries which are the same according to a sort criteria
 // (specified by less) into "runs".
 func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList {
 	if len(h) == 0 {
 		return nil
 	}
 	// len(h) > 0

 	// create runs of entries with equal values
 	h.sort(less)

 	// for each run, make a new run object and collect them in a new RunList
 	var hh RunList
 	i, x := 0, h[0]
 	for j, y := range h {
 		if less(x, y) {
 			hh = append(hh, newRun(h[i:j]))
 			i, x = j, h[j] // start a new run
 		}
 	}
 	// add final run, if any
 	if i < len(h) {
 		hh = append(hh, newRun(h[i:]))
 	}

 	return hh
 }

 // ----------------------------------------------------------------------------
 // SpotInfo

 // A SpotInfo value describes a particular identifier spot in a given file;
 // It encodes three values: the SpotKind (declaration or use), a line or
 // snippet index "lori", and whether it's a line or index.
 //
 // The following encoding is used:
 //
 //   bits    32   4    1       0
 //   value    [lori|kind|isIndex]
 //
 type SpotInfo uint32

 // SpotKind describes whether an identifier is declared (and what kind of
 // declaration) or used.
 type SpotKind uint32

 const (
 	PackageClause SpotKind = iota
 	ImportDecl
 	ConstDecl
 	TypeDecl
 	VarDecl
 	FuncDecl
 	MethodDecl
 	Use
 	nKinds
 )

 func init() {
 	// sanity check: if nKinds is too large, the SpotInfo
 	// accessor functions may need to be updated
 	if nKinds > 8 {
 		panic("nKinds > 8")
 	}
 }

 // makeSpotInfo makes a SpotInfo.
 func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo {
 	// encode lori: bits [4..32)
 	x := SpotInfo(lori) << 4
 	if int(x>>4) != lori {
 		// lori value doesn't fit - since snippet indices are
 		// most certainly always smaller then 1<<28, this can
 		// only happen for line numbers; give it no line number (= 0)
 		x = 0
 	}
 	// encode kind: bits [1..4)
 	x |= SpotInfo(kind) << 1
 	// encode isIndex: bit 0
 	if isIndex {
 		x |= 1
 	}
 	return x
 }

 func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) }
 func (x SpotInfo) Lori() int      { return int(x >> 4) }
 func (x SpotInfo) IsIndex() bool  { return x&1 != 0 }

 // ----------------------------------------------------------------------------
 // KindRun

 // Debugging support. Disable to see multiple entries per line.
 const removeDuplicates = true

 // A KindRun is a run of SpotInfos of the same kind in a given file.
 // The kind (3 bits) is stored in each SpotInfo element; to find the
 // kind of a KindRun, look at any of it's elements.
 type KindRun []SpotInfo

 // KindRuns are sorted by line number or index. Since the isIndex bit
 // is always the same for all infos in one list we can compare lori's.
 func (k KindRun) Len() int           { return len(k) }
 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
 func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }

 // FileRun contents are sorted by Kind for the reduction into KindRuns.
 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }

 // newKindRun allocates a new KindRun from the SpotInfo run h.
 func newKindRun(h RunList) interface{} {
 	run := make(KindRun, len(h))
 	for i, x := range h {
 		run[i] = x.(SpotInfo)
 	}

 	// Spots were sorted by file and kind to create this run.
 	// Within this run, sort them by line number or index.
 	sort.Sort(run)

 	if removeDuplicates {
 		// Since both the lori and kind field must be
 		// same for duplicates, and since the isIndex
 		// bit is always the same for all infos in one
 		// list we can simply compare the entire info.
 		k := 0
 		prev := SpotInfo(1<<32 - 1) // an unlikely value
 		for _, x := range run {
 			if x != prev {
 				run[k] = x
 				k++
 				prev = x
 			}
 		}
 		run = run[0:k]
 	}

 	return run
 }

 // ----------------------------------------------------------------------------
 // FileRun

 // A Pak describes a Go package.
 type Pak struct {
 	Path string // path of directory containing the package
 	Name string // package name as declared by package clause
 }

 // Paks are sorted by name (primary key) and by import path (secondary key).
 func (p *Pak) less(q *Pak) bool {
 	return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
 }

 // A File describes a Go file.
 type File struct {
 	Name string // directory-local file name
 	Pak  *Pak   // the package to which the file belongs
 }

 // Path returns the file path of f.
 func (f *File) Path() string {
 	return filepath.Join(f.Pak.Path, f.Name)
 }

 // A Spot describes a single occurrence of a word.
 type Spot struct {
 	File *File
 	Info SpotInfo
 }

 // A FileRun is a list of KindRuns belonging to the same file.
 type FileRun struct {
 	File   *File
 	Groups []KindRun
 }

 // Spots are sorted by file path for the reduction into FileRuns.
 func lessSpot(x, y interface{}) bool {
 	fx := x.(Spot).File
 	fy := y.(Spot).File
 	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
 	px := fx.Pak.Path
 	py := fy.Pak.Path
 	return px < py || px == py && fx.Name < fy.Name
 }

 // newFileRun allocates a new FileRun from the Spot run h.
 func newFileRun(h RunList) interface{} {
 	file := h[0].(Spot).File

 	// reduce the list of Spots into a list of KindRuns
 	h1 := make(RunList, len(h))
 	for i, x := range h {
 		h1[i] = x.(Spot).Info
 	}
 	h2 := h1.reduce(lessKind, newKindRun)

 	// create the FileRun
 	groups := make([]KindRun, len(h2))
 	for i, x := range h2 {
 		groups[i] = x.(KindRun)
 	}
 	return &FileRun{file, groups}
 }

 // ----------------------------------------------------------------------------
 // PakRun

 // A PakRun describes a run of *FileRuns of a package.
 type PakRun struct {
 	Pak   *Pak
 	Files []*FileRun
 }

 // Sorting support for files within a PakRun.
 func (p *PakRun) Len() int           { return len(p.Files) }
 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
 func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }

 // FileRuns are sorted by package for the reduction into PakRuns.
 func lessFileRun(x, y interface{}) bool {
 	return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
 }

 // newPakRun allocates a new PakRun from the *FileRun run h.
 func newPakRun(h RunList) interface{} {
 	pak := h[0].(*FileRun).File.Pak
 	files := make([]*FileRun, len(h))
 	for i, x := range h {
 		files[i] = x.(*FileRun)
 	}
 	run := &PakRun{pak, files}
 	sort.Sort(run) // files were sorted by package; sort them by file now
 	return run
 }

 // ----------------------------------------------------------------------------
 // HitList

 // A HitList describes a list of PakRuns.
 type HitList []*PakRun

 // PakRuns are sorted by package.
 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }

 func reduce(h0 RunList) HitList {
 	// reduce a list of Spots into a list of FileRuns
 	h1 := h0.reduce(lessSpot, newFileRun)
 	// reduce a list of FileRuns into a list of PakRuns
 	h2 := h1.reduce(lessFileRun, newPakRun)
 	// sort the list of PakRuns by package
 	h2.sort(lessPakRun)
 	// create a HitList
 	h := make(HitList, len(h2))
 	for i, p := range h2 {
 		h[i] = p.(*PakRun)
 	}
 	return h
 }

 // filter returns a new HitList created by filtering
 // all PakRuns from h that have a matching pakname.
 func (h HitList) filter(pakname string) HitList {
 	var hh HitList
 	for _, p := range h {
 		if p.Pak.Name == pakname {
 			hh = append(hh, p)
 		}
 	}
 	return hh
 }

 // ----------------------------------------------------------------------------
 // AltWords

 type wordPair struct {
 	canon string // canonical word spelling (all lowercase)
 	alt   string // alternative spelling
 }

 // An AltWords describes a list of alternative spellings for a
 // canonical (all lowercase) spelling of a word.
 type AltWords struct {
 	Canon string   // canonical word spelling (all lowercase)
 	Alts  []string // alternative spelling for the same word
 }

 // wordPairs are sorted by their canonical spelling.
 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }

 // newAltWords allocates a new AltWords from the *wordPair run h.
 func newAltWords(h RunList) interface{} {
 	canon := h[0].(*wordPair).canon
 	alts := make([]string, len(h))
 	for i, x := range h {
 		alts[i] = x.(*wordPair).alt
 	}
 	return &AltWords{canon, alts}
 }

 func (a *AltWords) filter(s string) *AltWords {
 	var alts []string
 	for _, w := range a.Alts {
 		if w != s {
 			alts = append(alts, w)
 		}
 	}
 	if len(alts) > 0 {
 		return &AltWords{a.Canon, alts}
 	}
 	return nil
 }

 // ----------------------------------------------------------------------------
 // Indexer

 // Adjust these flags as seems best.
 const includeMainPackages = true
 const includeTestFiles = true

 type IndexResult struct {
 	Decls  RunList // package-level declarations (with snippets)
 	Others RunList // all other occurrences
 }

 // Statistics provides statistics information for an index.
 type Statistics struct {
 	Bytes int // total size of indexed source files
 	Files int // number of indexed source files
 	Lines int // number of lines (all files)
 	Words int // number of different identifiers
 	Spots int // number of identifier occurrences
 }

 // An Indexer maintains the data structures and provides the machinery
 // for indexing .go files under a file tree. It implements the path.Visitor
 // interface for walking file trees, and the ast.Visitor interface for
 // walking Go ASTs.
 type Indexer struct {
 	fset     *token.FileSet          // file set for all indexed files
 	sources  bytes.Buffer            // concatenated sources
 	packages map[string]*Pak         // map of canonicalized *Paks
 	words    map[string]*IndexResult // RunLists of Spots
 	snippets []*Snippet              // indices are stored in SpotInfos
 	current  *token.File             // last file added to file set
 	file     *File                   // AST for current file
 	decl     ast.Decl                // AST for current decl
 	stats    Statistics
 }

 func (x *Indexer) lookupPackage(path, name string) *Pak {
 	// In the source directory tree, more than one package may
 	// live in the same directory. For the packages map, construct
 	// a key that includes both the directory path and the package
 	// name.
 	key := path + ":" + name
 	pak := x.packages[key]
 	if pak == nil {
 		pak = &Pak{path, name}
 		x.packages[key] = pak
 	}
 	return pak
 }

 func (x *Indexer) addSnippet(s *Snippet) int {
 	index := len(x.snippets)
 	x.snippets = append(x.snippets, s)
 	return index
 }

 func (x *Indexer) visitComment(c *ast.CommentGroup) {
 	if c != nil {
 		ast.Walk(x, c)
 	}
 }

 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
 	if id != nil {
 		lists, found := x.words[id.Name]
 		if !found {
 			lists = new(IndexResult)
 			x.words[id.Name] = lists
 		}

 		if kind == Use || x.decl == nil {
 			// not a declaration or no snippet required
 			info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
 			lists.Others = append(lists.Others, Spot{x.file, info})
 		} else {
 			// a declaration with snippet
 			index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
 			info := makeSpotInfo(kind, index, true)
 			lists.Decls = append(lists.Decls, Spot{x.file, info})
 		}

 		x.stats.Spots++
 	}
 }

 func (x *Indexer) visitSpec(spec ast.Spec, isVarDecl bool) {
 	switch n := spec.(type) {
 	case *ast.ImportSpec:
 		x.visitComment(n.Doc)
 		x.visitIdent(ImportDecl, n.Name)
 		ast.Walk(x, n.Path)
 		x.visitComment(n.Comment)

 	case *ast.ValueSpec:
 		x.visitComment(n.Doc)
 		kind := ConstDecl
 		if isVarDecl {
 			kind = VarDecl
 		}
 		for _, n := range n.Names {
 			x.visitIdent(kind, n)
 		}
 		ast.Walk(x, n.Type)
 		for _, v := range n.Values {
 			ast.Walk(x, v)
 		}
 		x.visitComment(n.Comment)

 	case *ast.TypeSpec:
 		x.visitComment(n.Doc)
 		x.visitIdent(TypeDecl, n.Name)
 		ast.Walk(x, n.Type)
 		x.visitComment(n.Comment)
 	}
 }

 func (x *Indexer) Visit(node ast.Node) ast.Visitor {
 	// TODO(gri): methods in interface types are categorized as VarDecl
 	switch n := node.(type) {
 	case nil:
 		return nil

 	case *ast.Ident:
 		x.visitIdent(Use, n)

 	case *ast.Field:
 		x.decl = nil // no snippets for fields
 		x.visitComment(n.Doc)
 		for _, m := range n.Names {
 			x.visitIdent(VarDecl, m)
 		}
 		ast.Walk(x, n.Type)
 		ast.Walk(x, n.Tag)
 		x.visitComment(n.Comment)

 	case *ast.DeclStmt:
 		if decl, ok := n.Decl.(*ast.GenDecl); ok {
 			// local declarations can only be *ast.GenDecls
 			x.decl = nil // no snippets for local declarations
 			x.visitComment(decl.Doc)
 			for _, s := range decl.Specs {
 				x.visitSpec(s, decl.Tok == token.VAR)
 			}
 		} else {
 			// handle error case gracefully
 			ast.Walk(x, n.Decl)
 		}

 	case *ast.GenDecl:
 		x.decl = n
 		x.visitComment(n.Doc)
 		for _, s := range n.Specs {
 			x.visitSpec(s, n.Tok == token.VAR)
 		}

 	case *ast.FuncDecl:
 		x.visitComment(n.Doc)
 		kind := FuncDecl
 		if n.Recv != nil {
 			kind = MethodDecl
 			ast.Walk(x, n.Recv)
 		}
 		x.decl = n
 		x.visitIdent(kind, n.Name)
 		ast.Walk(x, n.Type)
 		if n.Body != nil {
 			ast.Walk(x, n.Body)
 		}

 	case *ast.File:
 		x.visitComment(n.Doc)
 		x.decl = nil
 		x.visitIdent(PackageClause, n.Name)
 		for _, d := range n.Decls {
 			ast.Walk(x, d)
 		}
 		// don't visit package level comments for now
 		// to avoid duplicate visiting from individual
 		// nodes

 	default:
 		return x
 	}

 	return nil
 }

 func pkgName(filename string) string {
 	// use a new file set each time in order to not pollute the indexer's
 	// file set (which must stay in sync with the concatenated source code)
 	file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly)
 	if err != nil || file == nil {
 		return ""
 	}
 	return file.Name.Name
 }

 // addFile adds a file to the index if possible and returns the file set file
 // and the file's AST if it was successfully parsed as a Go file. If addFile
 // failed (that is, if the file was not added), it returns file == nil.
 func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) {
 	// open file
 	f, err := fs.Open(filename)
 	if err != nil {
 		return
 	}
 	defer f.Close()

 	// The file set's base offset and x.sources size must be in lock-step;
 	// this permits the direct mapping of suffix array lookup results to
 	// to corresponding Pos values.
 	//
 	// When a file is added to the file set, its offset base increases by
 	// the size of the file + 1; and the initial base offset is 1. Add an
 	// extra byte to the sources here.
 	x.sources.WriteByte(0)

 	// If the sources length doesn't match the file set base at this point
 	// the file set implementation changed or we have another error.
 	base := x.fset.Base()
 	if x.sources.Len() != base {
 		panic("internal error - file base incorrect")
 	}

 	// append file contents (src) to x.sources
 	if _, err := x.sources.ReadFrom(f); err == nil {
 		src := x.sources.Bytes()[base:]

 		if goFile {
 			// parse the file and in the process add it to the file set
 			if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
 				file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
 				return
 			}
 			// file has parse errors, and the AST may be incorrect -
 			// set lines information explicitly and index as ordinary
 			// text file (cannot fall through to the text case below
 			// because the file has already been added to the file set
 			// by the parser)
 			file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
 			file.SetLinesForContent(src)
 			ast = nil
 			return
 		}

 		if isText(src) {
 			// only add the file to the file set (for the full text index)
 			file = x.fset.AddFile(filename, x.fset.Base(), len(src))
 			file.SetLinesForContent(src)
 			return
 		}
 	}

 	// discard possibly added data
 	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
 	return
 }

 // Design note: Using an explicit white list of permitted files for indexing
 // makes sure that the important files are included and massively reduces the
 // number of files to index. The advantage over a blacklist is that unexpected
 // (non-blacklisted) files won't suddenly explode the index.
 //
 // TODO(gri): We may want to make this list customizable, perhaps via a flag.

 // Files are whitelisted if they have a file name or extension
 // present as key in whitelisted.
 var whitelisted = map[string]bool{
 	".bash":        true,
 	".c":           true,
 	".css":         true,
 	".go":          true,
 	".goc":         true,
 	".h":           true,
 	".html":        true,
 	".js":          true,
 	".out":         true,
 	".py":          true,
 	".s":           true,
 	".sh":          true,
 	".txt":         true,
 	".xml":         true,
 	"AUTHORS":      true,
 	"CONTRIBUTORS": true,
 	"LICENSE":      true,
 	"Makefile":     true,
 	"PATENTS":      true,
 	"README":       true,
 }

 // isWhitelisted returns true if a file is on the list
 // of "permitted" files for indexing. The filename must
 // be the directory-local name of the file.
 func isWhitelisted(filename string) bool {
 	key := filepath.Ext(filename)
 	if key == "" {
 		// file has no extension - use entire filename
 		key = filename
 	}
 	return whitelisted[key]
 }

 func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
 	if f.IsDir() {
 		return
 	}

 	filename := filepath.Join(dirname, f.Name())
 	goFile := false

 	switch {
 	case isGoFile(f):
 		if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) {
 			return
 		}
 		if !includeMainPackages && pkgName(filename) == "main" {
 			return
 		}
 		goFile = true

 	case !fulltextIndex || !isWhitelisted(f.Name()):
 		return
 	}

 	file, fast := x.addFile(filename, goFile)
 	if file == nil {
 		return // addFile failed
 	}

 	if fast != nil {
 		// we've got a Go file to index
 		x.current = file
 		pak := x.lookupPackage(dirname, fast.Name.Name)
 		x.file = &File{f.Name(), pak}
 		ast.Walk(x, fast)
 	}

 	// update statistics
 	x.stats.Bytes += file.Size()
 	x.stats.Files++
 	x.stats.Lines += file.LineCount()
 }

 // ----------------------------------------------------------------------------
 // Index

 type LookupResult struct {
 	Decls  HitList // package-level declarations (with snippets)
 	Others HitList // all other occurrences
 }

 type Index struct {
 	fset     *token.FileSet           // file set used during indexing; nil if no textindex
 	suffixes *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
 	words    map[string]*LookupResult // maps words to hit lists
 	alts     map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
 	snippets []*Snippet               // all snippets, indexed by snippet index
 	stats    Statistics
 }

 func canonical(w string) string { return strings.ToLower(w) }

 // NewIndex creates a new index for the .go files
 // in the directories given by dirnames.
 //
 func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
 	var x Indexer
 	th := NewThrottle(throttle, 0.1e9) // run at least 0.1s at a time

 	// initialize Indexer
 	// (use some reasonably sized maps to start)
 	x.fset = token.NewFileSet()
 	x.packages = make(map[string]*Pak, 256)
 	x.words = make(map[string]*IndexResult, 8192)

 	// index all files in the directories given by dirnames
 	for dirname := range dirnames {
 		list, err := fs.ReadDir(dirname)
 		if err != nil {
 			continue // ignore this directory
 		}
 		for _, f := range list {
 			if !f.IsDir() {
 				x.visitFile(dirname, f, fulltextIndex)
 			}
 			th.Throttle()
 		}
 	}

 	if !fulltextIndex {
 		// the file set, the current file, and the sources are
 		// not needed after indexing if no text index is built -
 		// help GC and clear them
 		x.fset = nil
 		x.sources.Reset()
 		x.current = nil // contains reference to fset!
 	}

 	// for each word, reduce the RunLists into a LookupResult;
 	// also collect the word with its canonical spelling in a
 	// word list for later computation of alternative spellings
 	words := make(map[string]*LookupResult)
 	var wlist RunList
 	for w, h := range x.words {
 		decls := reduce(h.Decls)
 		others := reduce(h.Others)
 		words[w] = &LookupResult{
 			Decls:  decls,
 			Others: others,
 		}
 		wlist = append(wlist, &wordPair{canonical(w), w})
 		th.Throttle()
 	}
 	x.stats.Words = len(words)

 	// reduce the word list {canonical(w), w} into
 	// a list of AltWords runs {canonical(w), {w}}
 	alist := wlist.reduce(lessWordPair, newAltWords)

 	// convert alist into a map of alternative spellings
 	alts := make(map[string]*AltWords)
 	for i := 0; i < len(alist); i++ {
 		a := alist[i].(*AltWords)
 		alts[a.Canon] = a
 	}

 	// create text index
 	var suffixes *suffixarray.Index
 	if fulltextIndex {
 		suffixes = suffixarray.New(x.sources.Bytes())
 	}

 	return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
 }

 type fileIndex struct {
 	Words    map[string]*LookupResult
 	Alts     map[string]*AltWords
 	Snippets []*Snippet
 	Fulltext bool
 }

 func (x *fileIndex) Write(w io.Writer) error {
 	return gob.NewEncoder(w).Encode(x)
 }

 func (x *fileIndex) Read(r io.Reader) error {
 	return gob.NewDecoder(r).Decode(x)
 }

 // Write writes the index x to w.
 func (x *Index) Write(w io.Writer) error {
 	fulltext := false
 	if x.suffixes != nil {
 		fulltext = true
 	}
 	fx := fileIndex{
 		x.words,
 		x.alts,
 		x.snippets,
 		fulltext,
 	}
 	if err := fx.Write(w); err != nil {
 		return err
 	}
 	if fulltext {
 		if err := x.fset.Write(w); err != nil {
 			return err
 		}
 		if err := x.suffixes.Write(w); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // Read reads the index from r into x; x must not be nil.
 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
 func (x *Index) Read(r io.Reader) error {
 	// We use the ability to read bytes as a plausible surrogate for buffering.
 	if _, ok := r.(io.ByteReader); !ok {
 		r = bufio.NewReader(r)
 	}
 	var fx fileIndex
 	if err := fx.Read(r); err != nil {
 		return err
 	}
 	x.words = fx.Words
 	x.alts = fx.Alts
 	x.snippets = fx.Snippets
 	if fx.Fulltext {
 		x.fset = token.NewFileSet()
 		if err := x.fset.Read(r); err != nil {
 			return err
 		}
 		x.suffixes = new(suffixarray.Index)
 		if err := x.suffixes.Read(r); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // Stats() returns index statistics.
 func (x *Index) Stats() Statistics {
 	return x.stats
 }

 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
 	match = x.words[w]
 	alt = x.alts[canonical(w)]
 	// remove current spelling from alternatives
 	// (if there is no match, the alternatives do
 	// not contain the current spelling)
 	if match != nil && alt != nil {
 		alt = alt.filter(w)
 	}
 	return
 }

 func isIdentifier(s string) bool {
 	var S scanner.Scanner
 	fset := token.NewFileSet()
 	S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0)
 	if _, tok, _ := S.Scan(); tok == token.IDENT {
 		_, tok, _ := S.Scan()
 		return tok == token.EOF
 	}
 	return false
 }

 // For a given query, which is either a single identifier or a qualified
 // identifier, Lookup returns a list of packages, a LookupResult, and a
 // list of alternative spellings, if any. Any and all results may be nil.
 // If the query syntax is wrong, an error is reported.
 func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) {
 	ss := strings.Split(query, ".")

 	// check query syntax
 	for _, s := range ss {
 		if !isIdentifier(s) {
 			err = errors.New("all query parts must be identifiers")
 			return
 		}
 	}

 	// handle simple and qualified identifiers
 	switch len(ss) {
 	case 1:
 		ident := ss[0]
 		match, alt = x.lookupWord(ident)
 		if match != nil {
 			// found a match - filter packages with same name
 			// for the list of packages called ident, if any
 			paks = match.Others.filter(ident)
 		}

 	case 2:
 		pakname, ident := ss[0], ss[1]
 		match, alt = x.lookupWord(ident)
 		if match != nil {
 			// found a match - filter by package name
 			// (no paks - package names are not qualified)
 			decls := match.Decls.filter(pakname)
 			others := match.Others.filter(pakname)
 			match = &LookupResult{decls, others}
 		}

 	default:
 		err = errors.New("query is not a (qualified) identifier")
 	}

 	return
 }

 func (x *Index) Snippet(i int) *Snippet {
 	// handle illegal snippet indices gracefully
 	if 0 <= i && i < len(x.snippets) {
 		return x.snippets[i]
 	}
 	return nil
 }

 type positionList []struct {
 	filename string
 	line     int
 }

 func (list positionList) Len() int           { return len(list) }
 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
 func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }

 // unique returns the list sorted and with duplicate entries removed
 func unique(list []int) []int {
 	sort.Ints(list)
 	var last int
 	i := 0
 	for _, x := range list {
 		if i == 0 || x != last {
 			last = x
 			list[i] = x
 			i++
 		}
 	}
 	return list[0:i]
 }

 // A FileLines value specifies a file and line numbers within that file.
 type FileLines struct {
 	Filename string
 	Lines    []int
 }

 // LookupRegexp returns the number of matches and the matches where a regular
 // expression r is found in the full text index. At most n matches are
 // returned (thus found <= n).
 //
 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
 	if x.suffixes == nil || n <= 0 {
 		return
 	}
 	// n > 0

 	var list positionList
 	// FindAllIndex may returns matches that span across file boundaries.
 	// Such matches are unlikely, buf after eliminating them we may end up
 	// with fewer than n matches. If we don't have enough at the end, redo
 	// the search with an increased value n1, but only if FindAllIndex
 	// returned all the requested matches in the first place (if it
 	// returned fewer than that there cannot be more).
 	for n1 := n; found < n; n1 += n - found {
 		found = 0
 		matches := x.suffixes.FindAllIndex(r, n1)
 		// compute files, exclude matches that span file boundaries,
 		// and map offsets to file-local offsets
 		list = make(positionList, len(matches))
 		for _, m := range matches {
 			// by construction, an offset corresponds to the Pos value
 			// for the file set - use it to get the file and line
 			p := token.Pos(m[0])
 			if file := x.fset.File(p); file != nil {
 				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
 					// match [m[0], m[1]) is within the file boundaries
 					list[found].filename = file.Name()
 					list[found].line = file.Line(p)
 					found++
 				}
 			}
 		}
 		if found == n || len(matches) < n1 {
 			// found all matches or there's no chance to find more
 			break
 		}
 	}
 	list = list[0:found]
 	sort.Sort(list) // sort by filename

 	// collect matches belonging to the same file
 	var last string
 	var lines []int
 	addLines := func() {
 		if len(lines) > 0 {
 			// remove duplicate lines
 			result = append(result, FileLines{last, unique(lines)})
 			lines = nil
 		}
 	}
 	for _, m := range list {
 		if m.filename != last {
 			addLines()
 			last = m.filename
 		}
 		lines = append(lines, m.line)
 	}
 	addLines()

 	return
 }