| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // This file contains the infrastructure to create an |
| // identifier and full-text index for a set of Go files. |
| // |
| // Algorithm for identifier index: |
| // - traverse all .go files of the file tree specified by root |
| // - for each identifier (word) encountered, collect all occurrences (spots) |
| // into a list; this produces a list of spots for each word |
| // - reduce the lists: from a list of spots to a list of FileRuns, |
| // and from a list of FileRuns into a list of PakRuns |
| // - make a HitList from the PakRuns |
| // |
| // Details: |
| // - keep two lists per word: one containing package-level declarations |
| // that have snippets, and one containing all other spots |
| // - keep the snippets in a separate table indexed by snippet index |
| // and store the snippet index in place of the line number in a SpotInfo |
| // (the line number for spots with snippets is stored in the snippet) |
| // - at the end, create lists of alternative spellings for a given |
| // word |
| // |
| // Algorithm for full text index: |
| // - concatenate all source code in a byte buffer (in memory) |
| // - add the files to a file set in lockstep as they are added to the byte |
| // buffer such that a byte buffer offset corresponds to the Pos value for |
| // that file location |
| // - create a suffix array from the concatenated sources |
| // |
| // String lookup in full text index: |
| // - use the suffix array to lookup a string's offsets - the offsets |
| // correspond to the Pos values relative to the file set |
| // - translate the Pos values back into file and line information and |
| // sort the result |
| |
| package godoc |
| |
| import ( |
| "bufio" |
| "bytes" |
| "encoding/gob" |
| "errors" |
| "fmt" |
| "go/ast" |
| "go/doc" |
| "go/parser" |
| "go/token" |
| "index/suffixarray" |
| "io" |
| "log" |
| "os" |
| pathpkg "path" |
| "path/filepath" |
| "regexp" |
| "runtime" |
| "sort" |
| "strconv" |
| "strings" |
| "sync" |
| "time" |
| "unicode" |
| |
| "golang.org/x/tools/godoc/util" |
| "golang.org/x/tools/godoc/vfs" |
| ) |
| |
| // ---------------------------------------------------------------------------- |
| // InterfaceSlice is a helper type for sorting interface |
| // slices according to some slice-specific sort criteria. |
| |
| type comparer func(x, y interface{}) bool |
| |
| type interfaceSlice struct { |
| slice []interface{} |
| less comparer |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // RunList |
| |
| // A RunList is a list of entries that can be sorted according to some |
| // criteria. A RunList may be compressed by grouping "runs" of entries |
| // which are equal (according to the sort critera) into a new RunList of |
| // runs. For instance, a RunList containing pairs (x, y) may be compressed |
| // into a RunList containing pair runs (x, {y}) where each run consists of |
| // a list of y's with the same x. |
| type RunList []interface{} |
| |
| func (h RunList) sort(less comparer) { |
| sort.Sort(&interfaceSlice{h, less}) |
| } |
| |
| func (p *interfaceSlice) Len() int { return len(p.slice) } |
| func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } |
| func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } |
| |
| // Compress entries which are the same according to a sort criteria |
| // (specified by less) into "runs". |
| func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList { |
| if len(h) == 0 { |
| return nil |
| } |
| // len(h) > 0 |
| |
| // create runs of entries with equal values |
| h.sort(less) |
| |
| // for each run, make a new run object and collect them in a new RunList |
| var hh RunList |
| i, x := 0, h[0] |
| for j, y := range h { |
| if less(x, y) { |
| hh = append(hh, newRun(h[i:j])) |
| i, x = j, h[j] // start a new run |
| } |
| } |
| // add final run, if any |
| if i < len(h) { |
| hh = append(hh, newRun(h[i:])) |
| } |
| |
| return hh |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // KindRun |
| |
| // Debugging support. Disable to see multiple entries per line. |
| const removeDuplicates = true |
| |
| // A KindRun is a run of SpotInfos of the same kind in a given file. |
| // The kind (3 bits) is stored in each SpotInfo element; to find the |
| // kind of a KindRun, look at any of its elements. |
| type KindRun []SpotInfo |
| |
| // KindRuns are sorted by line number or index. Since the isIndex bit |
| // is always the same for all infos in one list we can compare lori's. |
| func (k KindRun) Len() int { return len(k) } |
| func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } |
| func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } |
| |
| // FileRun contents are sorted by Kind for the reduction into KindRuns. |
| func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } |
| |
| // newKindRun allocates a new KindRun from the SpotInfo run h. |
| func newKindRun(h RunList) interface{} { |
| run := make(KindRun, len(h)) |
| for i, x := range h { |
| run[i] = x.(SpotInfo) |
| } |
| |
| // Spots were sorted by file and kind to create this run. |
| // Within this run, sort them by line number or index. |
| sort.Sort(run) |
| |
| if removeDuplicates { |
| // Since both the lori and kind field must be |
| // same for duplicates, and since the isIndex |
| // bit is always the same for all infos in one |
| // list we can simply compare the entire info. |
| k := 0 |
| prev := SpotInfo(1<<32 - 1) // an unlikely value |
| for _, x := range run { |
| if x != prev { |
| run[k] = x |
| k++ |
| prev = x |
| } |
| } |
| run = run[0:k] |
| } |
| |
| return run |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // FileRun |
| |
| // A Pak describes a Go package. |
| type Pak struct { |
| Path string // path of directory containing the package |
| Name string // package name as declared by package clause |
| } |
| |
| // Paks are sorted by name (primary key) and by import path (secondary key). |
| func (p *Pak) less(q *Pak) bool { |
| return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path |
| } |
| |
| // A File describes a Go file. |
| type File struct { |
| Name string // directory-local file name |
| Pak *Pak // the package to which the file belongs |
| } |
| |
| // Path returns the file path of f. |
| func (f *File) Path() string { |
| return pathpkg.Join(f.Pak.Path, f.Name) |
| } |
| |
| // A Spot describes a single occurrence of a word. |
| type Spot struct { |
| File *File |
| Info SpotInfo |
| } |
| |
| // A FileRun is a list of KindRuns belonging to the same file. |
| type FileRun struct { |
| File *File |
| Groups []KindRun |
| } |
| |
| // Spots are sorted by file path for the reduction into FileRuns. |
| func lessSpot(x, y interface{}) bool { |
| fx := x.(Spot).File |
| fy := y.(Spot).File |
| // same as "return fx.Path() < fy.Path()" but w/o computing the file path first |
| px := fx.Pak.Path |
| py := fy.Pak.Path |
| return px < py || px == py && fx.Name < fy.Name |
| } |
| |
| // newFileRun allocates a new FileRun from the Spot run h. |
| func newFileRun(h RunList) interface{} { |
| file := h[0].(Spot).File |
| |
| // reduce the list of Spots into a list of KindRuns |
| h1 := make(RunList, len(h)) |
| for i, x := range h { |
| h1[i] = x.(Spot).Info |
| } |
| h2 := h1.reduce(lessKind, newKindRun) |
| |
| // create the FileRun |
| groups := make([]KindRun, len(h2)) |
| for i, x := range h2 { |
| groups[i] = x.(KindRun) |
| } |
| return &FileRun{file, groups} |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // PakRun |
| |
| // A PakRun describes a run of *FileRuns of a package. |
| type PakRun struct { |
| Pak *Pak |
| Files []*FileRun |
| } |
| |
| // Sorting support for files within a PakRun. |
| func (p *PakRun) Len() int { return len(p.Files) } |
| func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } |
| func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } |
| |
| // FileRuns are sorted by package for the reduction into PakRuns. |
| func lessFileRun(x, y interface{}) bool { |
| return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) |
| } |
| |
| // newPakRun allocates a new PakRun from the *FileRun run h. |
| func newPakRun(h RunList) interface{} { |
| pak := h[0].(*FileRun).File.Pak |
| files := make([]*FileRun, len(h)) |
| for i, x := range h { |
| files[i] = x.(*FileRun) |
| } |
| run := &PakRun{pak, files} |
| sort.Sort(run) // files were sorted by package; sort them by file now |
| return run |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // HitList |
| |
| // A HitList describes a list of PakRuns. |
| type HitList []*PakRun |
| |
| // PakRuns are sorted by package. |
| func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } |
| |
| func reduce(h0 RunList) HitList { |
| // reduce a list of Spots into a list of FileRuns |
| h1 := h0.reduce(lessSpot, newFileRun) |
| // reduce a list of FileRuns into a list of PakRuns |
| h2 := h1.reduce(lessFileRun, newPakRun) |
| // sort the list of PakRuns by package |
| h2.sort(lessPakRun) |
| // create a HitList |
| h := make(HitList, len(h2)) |
| for i, p := range h2 { |
| h[i] = p.(*PakRun) |
| } |
| return h |
| } |
| |
| // filter returns a new HitList created by filtering |
| // all PakRuns from h that have a matching pakname. |
| func (h HitList) filter(pakname string) HitList { |
| var hh HitList |
| for _, p := range h { |
| if p.Pak.Name == pakname { |
| hh = append(hh, p) |
| } |
| } |
| return hh |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // AltWords |
| |
| type wordPair struct { |
| canon string // canonical word spelling (all lowercase) |
| alt string // alternative spelling |
| } |
| |
| // An AltWords describes a list of alternative spellings for a |
| // canonical (all lowercase) spelling of a word. |
| type AltWords struct { |
| Canon string // canonical word spelling (all lowercase) |
| Alts []string // alternative spelling for the same word |
| } |
| |
| // wordPairs are sorted by their canonical spelling. |
| func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } |
| |
| // newAltWords allocates a new AltWords from the *wordPair run h. |
| func newAltWords(h RunList) interface{} { |
| canon := h[0].(*wordPair).canon |
| alts := make([]string, len(h)) |
| for i, x := range h { |
| alts[i] = x.(*wordPair).alt |
| } |
| return &AltWords{canon, alts} |
| } |
| |
| func (a *AltWords) filter(s string) *AltWords { |
| var alts []string |
| for _, w := range a.Alts { |
| if w != s { |
| alts = append(alts, w) |
| } |
| } |
| if len(alts) > 0 { |
| return &AltWords{a.Canon, alts} |
| } |
| return nil |
| } |
| |
| // Ident stores information about external identifiers in order to create |
| // links to package documentation. |
| type Ident struct { |
| Path string // e.g. "net/http" |
| Package string // e.g. "http" |
| Name string // e.g. "NewRequest" |
| Doc string // e.g. "NewRequest returns a new Request..." |
| } |
| |
| // byImportCount sorts the given slice of Idents by the import |
| // counts of the packages to which they belong. |
| type byImportCount struct { |
| Idents []Ident |
| ImportCount map[string]int |
| } |
| |
| func (ic byImportCount) Len() int { |
| return len(ic.Idents) |
| } |
| |
| func (ic byImportCount) Less(i, j int) bool { |
| ri := ic.ImportCount[ic.Idents[i].Path] |
| rj := ic.ImportCount[ic.Idents[j].Path] |
| if ri == rj { |
| return ic.Idents[i].Path < ic.Idents[j].Path |
| } |
| return ri > rj |
| } |
| |
| func (ic byImportCount) Swap(i, j int) { |
| ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i] |
| } |
| |
| func (ic byImportCount) String() string { |
| buf := bytes.NewBuffer([]byte("[")) |
| for _, v := range ic.Idents { |
| buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path])) |
| } |
| buf.WriteString("\n]") |
| return buf.String() |
| } |
| |
| // filter creates a new Ident list where the results match the given |
| // package name. |
| func (ic byImportCount) filter(pakname string) []Ident { |
| if ic.Idents == nil { |
| return nil |
| } |
| var res []Ident |
| for _, i := range ic.Idents { |
| if i.Package == pakname { |
| res = append(res, i) |
| } |
| } |
| return res |
| } |
| |
| // top returns the top n identifiers. |
| func (ic byImportCount) top(n int) []Ident { |
| if len(ic.Idents) > n { |
| return ic.Idents[:n] |
| } |
| return ic.Idents |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // Indexer |
| |
| type IndexResult struct { |
| Decls RunList // package-level declarations (with snippets) |
| Others RunList // all other occurrences |
| } |
| |
| // Statistics provides statistics information for an index. |
| type Statistics struct { |
| Bytes int // total size of indexed source files |
| Files int // number of indexed source files |
| Lines int // number of lines (all files) |
| Words int // number of different identifiers |
| Spots int // number of identifier occurrences |
| } |
| |
| // An Indexer maintains the data structures and provides the machinery |
| // for indexing .go files under a file tree. It implements the path.Visitor |
| // interface for walking file trees, and the ast.Visitor interface for |
| // walking Go ASTs. |
| type Indexer struct { |
| c *Corpus |
| fset *token.FileSet // file set for all indexed files |
| fsOpenGate chan bool // send pre fs.Open; receive on close |
| |
| mu sync.Mutex // guards all the following |
| sources bytes.Buffer // concatenated sources |
| strings map[string]string // interned string |
| packages map[Pak]*Pak // interned *Paks |
| words map[string]*IndexResult // RunLists of Spots |
| snippets []*Snippet // indices are stored in SpotInfos |
| current *token.File // last file added to file set |
| file *File // AST for current file |
| decl ast.Decl // AST for current decl |
| stats Statistics |
| throttle *util.Throttle |
| importCount map[string]int // package path ("net/http") => count |
| packagePath map[string]map[string]bool // "template" => "text/template" => true |
| exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
| curPkgExports map[string]SpotKind |
| idents map[SpotKind]map[string][]Ident // kind => name => list of Idents |
| } |
| |
| func (x *Indexer) intern(s string) string { |
| if s, ok := x.strings[s]; ok { |
| return s |
| } |
| x.strings[s] = s |
| return s |
| } |
| |
| func (x *Indexer) lookupPackage(path, name string) *Pak { |
| // In the source directory tree, more than one package may |
| // live in the same directory. For the packages map, construct |
| // a key that includes both the directory path and the package |
| // name. |
| key := Pak{Path: x.intern(path), Name: x.intern(name)} |
| pak := x.packages[key] |
| if pak == nil { |
| pak = &key |
| x.packages[key] = pak |
| } |
| return pak |
| } |
| |
| func (x *Indexer) addSnippet(s *Snippet) int { |
| index := len(x.snippets) |
| x.snippets = append(x.snippets, s) |
| return index |
| } |
| |
| func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { |
| if id == nil { |
| return |
| } |
| name := x.intern(id.Name) |
| |
| switch kind { |
| case TypeDecl, FuncDecl, ConstDecl, VarDecl: |
| x.curPkgExports[name] = kind |
| } |
| |
| lists, found := x.words[name] |
| if !found { |
| lists = new(IndexResult) |
| x.words[name] = lists |
| } |
| |
| if kind == Use || x.decl == nil { |
| if x.c.IndexGoCode { |
| // not a declaration or no snippet required |
| info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) |
| lists.Others = append(lists.Others, Spot{x.file, info}) |
| } |
| } else { |
| // a declaration with snippet |
| index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) |
| info := makeSpotInfo(kind, index, true) |
| lists.Decls = append(lists.Decls, Spot{x.file, info}) |
| } |
| |
| x.stats.Spots++ |
| } |
| |
| func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) { |
| for _, f := range flist.List { |
| x.decl = nil // no snippets for fields |
| for _, name := range f.Names { |
| x.visitIdent(kind, name) |
| } |
| ast.Walk(x, f.Type) |
| // ignore tag - not indexed at the moment |
| } |
| } |
| |
| func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { |
| switch n := spec.(type) { |
| case *ast.ImportSpec: |
| x.visitIdent(ImportDecl, n.Name) |
| if n.Path != nil { |
| if imp, err := strconv.Unquote(n.Path.Value); err == nil { |
| x.importCount[x.intern(imp)]++ |
| } |
| } |
| |
| case *ast.ValueSpec: |
| for _, n := range n.Names { |
| x.visitIdent(kind, n) |
| } |
| ast.Walk(x, n.Type) |
| for _, v := range n.Values { |
| ast.Walk(x, v) |
| } |
| |
| case *ast.TypeSpec: |
| x.visitIdent(TypeDecl, n.Name) |
| ast.Walk(x, n.Type) |
| } |
| } |
| |
| func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { |
| kind := VarDecl |
| if decl.Tok == token.CONST { |
| kind = ConstDecl |
| } |
| x.decl = decl |
| for _, s := range decl.Specs { |
| x.visitSpec(kind, s) |
| } |
| } |
| |
| func (x *Indexer) Visit(node ast.Node) ast.Visitor { |
| switch n := node.(type) { |
| case nil: |
| // nothing to do |
| |
| case *ast.Ident: |
| x.visitIdent(Use, n) |
| |
| case *ast.FieldList: |
| x.visitFieldList(VarDecl, n) |
| |
| case *ast.InterfaceType: |
| x.visitFieldList(MethodDecl, n.Methods) |
| |
| case *ast.DeclStmt: |
| // local declarations should only be *ast.GenDecls; |
| // ignore incorrect ASTs |
| if decl, ok := n.Decl.(*ast.GenDecl); ok { |
| x.decl = nil // no snippets for local declarations |
| x.visitGenDecl(decl) |
| } |
| |
| case *ast.GenDecl: |
| x.decl = n |
| x.visitGenDecl(n) |
| |
| case *ast.FuncDecl: |
| kind := FuncDecl |
| if n.Recv != nil { |
| kind = MethodDecl |
| ast.Walk(x, n.Recv) |
| } |
| x.decl = n |
| x.visitIdent(kind, n.Name) |
| ast.Walk(x, n.Type) |
| if n.Body != nil { |
| ast.Walk(x, n.Body) |
| } |
| |
| case *ast.File: |
| x.decl = nil |
| x.visitIdent(PackageClause, n.Name) |
| for _, d := range n.Decls { |
| ast.Walk(x, d) |
| } |
| |
| default: |
| return x |
| } |
| |
| return nil |
| } |
| |
| // addFile adds a file to the index if possible and returns the file set file |
| // and the file's AST if it was successfully parsed as a Go file. If addFile |
| // failed (that is, if the file was not added), it returns file == nil. |
| func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { |
| defer f.Close() |
| |
| // The file set's base offset and x.sources size must be in lock-step; |
| // this permits the direct mapping of suffix array lookup results to |
| // to corresponding Pos values. |
| // |
| // When a file is added to the file set, its offset base increases by |
| // the size of the file + 1; and the initial base offset is 1. Add an |
| // extra byte to the sources here. |
| x.sources.WriteByte(0) |
| |
| // If the sources length doesn't match the file set base at this point |
| // the file set implementation changed or we have another error. |
| base := x.fset.Base() |
| if x.sources.Len() != base { |
| panic("internal error: file base incorrect") |
| } |
| |
| // append file contents (src) to x.sources |
| if _, err := x.sources.ReadFrom(f); err == nil { |
| src := x.sources.Bytes()[base:] |
| |
| if goFile { |
| // parse the file and in the process add it to the file set |
| if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { |
| file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file |
| return |
| } |
| // file has parse errors, and the AST may be incorrect - |
| // set lines information explicitly and index as ordinary |
| // text file (cannot fall through to the text case below |
| // because the file has already been added to the file set |
| // by the parser) |
| file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file |
| file.SetLinesForContent(src) |
| ast = nil |
| return |
| } |
| |
| if util.IsText(src) { |
| // only add the file to the file set (for the full text index) |
| file = x.fset.AddFile(filename, x.fset.Base(), len(src)) |
| file.SetLinesForContent(src) |
| return |
| } |
| } |
| |
| // discard possibly added data |
| x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added |
| return |
| } |
| |
| // Design note: Using an explicit white list of permitted files for indexing |
| // makes sure that the important files are included and massively reduces the |
| // number of files to index. The advantage over a blacklist is that unexpected |
| // (non-blacklisted) files won't suddenly explode the index. |
| |
| // Files are whitelisted if they have a file name or extension |
| // present as key in whitelisted. |
| var whitelisted = map[string]bool{ |
| ".bash": true, |
| ".c": true, |
| ".cc": true, |
| ".cpp": true, |
| ".cxx": true, |
| ".css": true, |
| ".go": true, |
| ".goc": true, |
| ".h": true, |
| ".hh": true, |
| ".hpp": true, |
| ".hxx": true, |
| ".html": true, |
| ".js": true, |
| ".out": true, |
| ".py": true, |
| ".s": true, |
| ".sh": true, |
| ".txt": true, |
| ".xml": true, |
| "AUTHORS": true, |
| "CONTRIBUTORS": true, |
| "LICENSE": true, |
| "Makefile": true, |
| "PATENTS": true, |
| "README": true, |
| } |
| |
| // isWhitelisted returns true if a file is on the list |
| // of "permitted" files for indexing. The filename must |
| // be the directory-local name of the file. |
| func isWhitelisted(filename string) bool { |
| key := pathpkg.Ext(filename) |
| if key == "" { |
| // file has no extension - use entire filename |
| key = filename |
| } |
| return whitelisted[key] |
| } |
| |
| func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) { |
| pkgName := x.intern(astFile.Name.Name) |
| if pkgName == "main" { |
| return |
| } |
| pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
| astPkg := ast.Package{ |
| Name: pkgName, |
| Files: map[string]*ast.File{ |
| filename: astFile, |
| }, |
| } |
| var m doc.Mode |
| docPkg := doc.New(&astPkg, dirname, m) |
| addIdent := func(sk SpotKind, name string, docstr string) { |
| if x.idents[sk] == nil { |
| x.idents[sk] = make(map[string][]Ident) |
| } |
| name = x.intern(name) |
| x.idents[sk][name] = append(x.idents[sk][name], Ident{ |
| Path: pkgPath, |
| Package: pkgName, |
| Name: name, |
| Doc: doc.Synopsis(docstr), |
| }) |
| } |
| |
| if x.idents[PackageClause] == nil { |
| x.idents[PackageClause] = make(map[string][]Ident) |
| } |
| // List of words under which the package identifier will be stored. |
| // This includes the package name and the components of the directory |
| // in which it resides. |
| words := strings.Split(pathpkg.Dir(pkgPath), "/") |
| if words[0] == "." { |
| words = []string{} |
| } |
| name := x.intern(docPkg.Name) |
| synopsis := doc.Synopsis(docPkg.Doc) |
| words = append(words, name) |
| pkgIdent := Ident{ |
| Path: pkgPath, |
| Package: pkgName, |
| Name: name, |
| Doc: synopsis, |
| } |
| for _, word := range words { |
| word = x.intern(word) |
| found := false |
| pkgs := x.idents[PackageClause][word] |
| for i, p := range pkgs { |
| if p.Path == pkgPath { |
| if docPkg.Doc != "" { |
| p.Doc = synopsis |
| pkgs[i] = p |
| } |
| found = true |
| break |
| } |
| } |
| if !found { |
| x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent) |
| } |
| } |
| |
| for _, c := range docPkg.Consts { |
| for _, name := range c.Names { |
| addIdent(ConstDecl, name, c.Doc) |
| } |
| } |
| for _, t := range docPkg.Types { |
| addIdent(TypeDecl, t.Name, t.Doc) |
| for _, c := range t.Consts { |
| for _, name := range c.Names { |
| addIdent(ConstDecl, name, c.Doc) |
| } |
| } |
| for _, v := range t.Vars { |
| for _, name := range v.Names { |
| addIdent(VarDecl, name, v.Doc) |
| } |
| } |
| for _, f := range t.Funcs { |
| addIdent(FuncDecl, f.Name, f.Doc) |
| } |
| for _, f := range t.Methods { |
| addIdent(MethodDecl, f.Name, f.Doc) |
| // Change the name of methods to be "<typename>.<methodname>". |
| // They will still be indexed as <methodname>. |
| idents := x.idents[MethodDecl][f.Name] |
| idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name) |
| } |
| } |
| for _, v := range docPkg.Vars { |
| for _, name := range v.Names { |
| addIdent(VarDecl, name, v.Doc) |
| } |
| } |
| for _, f := range docPkg.Funcs { |
| addIdent(FuncDecl, f.Name, f.Doc) |
| } |
| } |
| |
| func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) { |
| pkgName := astFile.Name.Name |
| |
| if x.c.IndexGoCode { |
| x.current = file |
| pak := x.lookupPackage(dirname, pkgName) |
| x.file = &File{filename, pak} |
| ast.Walk(x, astFile) |
| } |
| |
| if x.c.IndexDocs { |
| // Test files are already filtered out in visitFile if IndexGoCode and |
| // IndexFullText are false. Otherwise, check here. |
| isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) && |
| (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/")) |
| if !isTestFile { |
| x.indexDocs(dirname, filename, astFile) |
| } |
| } |
| |
| ppKey := x.intern(pkgName) |
| if _, ok := x.packagePath[ppKey]; !ok { |
| x.packagePath[ppKey] = make(map[string]bool) |
| } |
| pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
| x.packagePath[ppKey][pkgPath] = true |
| |
| // Merge in exported symbols found walking this file into |
| // the map for that package. |
| if len(x.curPkgExports) > 0 { |
| dest, ok := x.exports[pkgPath] |
| if !ok { |
| dest = make(map[string]SpotKind) |
| x.exports[pkgPath] = dest |
| } |
| for k, v := range x.curPkgExports { |
| dest[k] = v |
| } |
| } |
| } |
| |
| func (x *Indexer) visitFile(dirname string, fi os.FileInfo) { |
| if fi.IsDir() || !x.c.IndexEnabled { |
| return |
| } |
| |
| filename := pathpkg.Join(dirname, fi.Name()) |
| goFile := isGoFile(fi) |
| |
| switch { |
| case x.c.IndexFullText: |
| if !isWhitelisted(fi.Name()) { |
| return |
| } |
| case x.c.IndexGoCode: |
| if !goFile { |
| return |
| } |
| case x.c.IndexDocs: |
| if !goFile || |
| strings.HasSuffix(fi.Name(), "_test.go") || |
| strings.HasPrefix(dirname, "/test/") { |
| return |
| } |
| default: |
| // No indexing turned on. |
| return |
| } |
| |
| x.fsOpenGate <- true |
| defer func() { <-x.fsOpenGate }() |
| |
| // open file |
| f, err := x.c.fs.Open(filename) |
| if err != nil { |
| return |
| } |
| |
| x.mu.Lock() |
| defer x.mu.Unlock() |
| |
| x.throttle.Throttle() |
| |
| x.curPkgExports = make(map[string]SpotKind) |
| file, fast := x.addFile(f, filename, goFile) |
| if file == nil { |
| return // addFile failed |
| } |
| |
| if fast != nil { |
| x.indexGoFile(dirname, fi.Name(), file, fast) |
| } |
| |
| // update statistics |
| x.stats.Bytes += file.Size() |
| x.stats.Files++ |
| x.stats.Lines += file.LineCount() |
| } |
| |
| // indexOptions contains information that affects the contents of an index. |
| type indexOptions struct { |
| // Docs provides documentation search results. |
| // It is only consulted if IndexEnabled is true. |
| // The default values is true. |
| Docs bool |
| |
| // GoCode provides Go source code search results. |
| // It is only consulted if IndexEnabled is true. |
| // The default values is true. |
| GoCode bool |
| |
| // FullText provides search results from all files. |
| // It is only consulted if IndexEnabled is true. |
| // The default values is true. |
| FullText bool |
| |
| // MaxResults optionally specifies the maximum results for indexing. |
| // The default is 1000. |
| MaxResults int |
| } |
| |
| // ---------------------------------------------------------------------------- |
| // Index |
| |
| type LookupResult struct { |
| Decls HitList // package-level declarations (with snippets) |
| Others HitList // all other occurrences |
| } |
| |
| type Index struct { |
| fset *token.FileSet // file set used during indexing; nil if no textindex |
| suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex |
| words map[string]*LookupResult // maps words to hit lists |
| alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings |
| snippets []*Snippet // all snippets, indexed by snippet index |
| stats Statistics |
| importCount map[string]int // package path ("net/http") => count |
| packagePath map[string]map[string]bool // "template" => "text/template" => true |
| exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
| idents map[SpotKind]map[string][]Ident |
| opts indexOptions |
| } |
| |
| func canonical(w string) string { return strings.ToLower(w) } |
| |
| // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems |
| // consuming file descriptors, where some systems have low 256 or 512 limits. |
| // Go should have a built-in way to cap fd usage under the ulimit. |
| const ( |
| maxOpenFiles = 200 |
| maxOpenDirs = 50 |
| ) |
| |
| func (c *Corpus) throttle() float64 { |
| if c.IndexThrottle <= 0 { |
| return 0.9 |
| } |
| if c.IndexThrottle > 1.0 { |
| return 1.0 |
| } |
| return c.IndexThrottle |
| } |
| |
| // NewIndex creates a new index for the .go files provided by the corpus. |
| func (c *Corpus) NewIndex() *Index { |
| // initialize Indexer |
| // (use some reasonably sized maps to start) |
| x := &Indexer{ |
| c: c, |
| fset: token.NewFileSet(), |
| fsOpenGate: make(chan bool, maxOpenFiles), |
| strings: make(map[string]string), |
| packages: make(map[Pak]*Pak, 256), |
| words: make(map[string]*IndexResult, 8192), |
| throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time |
| importCount: make(map[string]int), |
| packagePath: make(map[string]map[string]bool), |
| exports: make(map[string]map[string]SpotKind), |
| idents: make(map[SpotKind]map[string][]Ident, 4), |
| } |
| |
| // index all files in the directories given by dirnames |
| var wg sync.WaitGroup // outstanding ReadDir + visitFile |
| dirGate := make(chan bool, maxOpenDirs) |
| for dirname := range c.fsDirnames() { |
| if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { |
| continue |
| } |
| dirGate <- true |
| wg.Add(1) |
| go func(dirname string) { |
| defer func() { <-dirGate }() |
| defer wg.Done() |
| |
| list, err := c.fs.ReadDir(dirname) |
| if err != nil { |
| log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) |
| return // ignore this directory |
| } |
| for _, fi := range list { |
| wg.Add(1) |
| go func(fi os.FileInfo) { |
| defer wg.Done() |
| x.visitFile(dirname, fi) |
| }(fi) |
| } |
| }(dirname) |
| } |
| wg.Wait() |
| |
| if !c.IndexFullText { |
| // the file set, the current file, and the sources are |
| // not needed after indexing if no text index is built - |
| // help GC and clear them |
| x.fset = nil |
| x.sources.Reset() |
| x.current = nil // contains reference to fset! |
| } |
| |
| // for each word, reduce the RunLists into a LookupResult; |
| // also collect the word with its canonical spelling in a |
| // word list for later computation of alternative spellings |
| words := make(map[string]*LookupResult) |
| var wlist RunList |
| for w, h := range x.words { |
| decls := reduce(h.Decls) |
| others := reduce(h.Others) |
| words[w] = &LookupResult{ |
| Decls: decls, |
| Others: others, |
| } |
| wlist = append(wlist, &wordPair{canonical(w), w}) |
| x.throttle.Throttle() |
| } |
| x.stats.Words = len(words) |
| |
| // reduce the word list {canonical(w), w} into |
| // a list of AltWords runs {canonical(w), {w}} |
| alist := wlist.reduce(lessWordPair, newAltWords) |
| |
| // convert alist into a map of alternative spellings |
| alts := make(map[string]*AltWords) |
| for i := 0; i < len(alist); i++ { |
| a := alist[i].(*AltWords) |
| alts[a.Canon] = a |
| } |
| |
| // create text index |
| var suffixes *suffixarray.Index |
| if c.IndexFullText { |
| suffixes = suffixarray.New(x.sources.Bytes()) |
| } |
| |
| // sort idents by the number of imports of their respective packages |
| for _, idMap := range x.idents { |
| for _, ir := range idMap { |
| sort.Sort(byImportCount{ir, x.importCount}) |
| } |
| } |
| |
| return &Index{ |
| fset: x.fset, |
| suffixes: suffixes, |
| words: words, |
| alts: alts, |
| snippets: x.snippets, |
| stats: x.stats, |
| importCount: x.importCount, |
| packagePath: x.packagePath, |
| exports: x.exports, |
| idents: x.idents, |
| opts: indexOptions{ |
| Docs: x.c.IndexDocs, |
| GoCode: x.c.IndexGoCode, |
| FullText: x.c.IndexFullText, |
| MaxResults: x.c.MaxResults, |
| }, |
| } |
| } |
| |
| var ErrFileIndexVersion = errors.New("file index version out of date") |
| |
| const fileIndexVersion = 3 |
| |
| // fileIndex is the subset of Index that's gob-encoded for use by |
| // Index.Write and Index.Read. |
| type fileIndex struct { |
| Version int |
| Words map[string]*LookupResult |
| Alts map[string]*AltWords |
| Snippets []*Snippet |
| Fulltext bool |
| Stats Statistics |
| ImportCount map[string]int |
| PackagePath map[string]map[string]bool |
| Exports map[string]map[string]SpotKind |
| Idents map[SpotKind]map[string][]Ident |
| Opts indexOptions |
| } |
| |
| func (x *fileIndex) Write(w io.Writer) error { |
| return gob.NewEncoder(w).Encode(x) |
| } |
| |
| func (x *fileIndex) Read(r io.Reader) error { |
| return gob.NewDecoder(r).Decode(x) |
| } |
| |
| // WriteTo writes the index x to w. |
| func (x *Index) WriteTo(w io.Writer) (n int64, err error) { |
| w = countingWriter{&n, w} |
| fulltext := false |
| if x.suffixes != nil { |
| fulltext = true |
| } |
| fx := fileIndex{ |
| Version: fileIndexVersion, |
| Words: x.words, |
| Alts: x.alts, |
| Snippets: x.snippets, |
| Fulltext: fulltext, |
| Stats: x.stats, |
| ImportCount: x.importCount, |
| PackagePath: x.packagePath, |
| Exports: x.exports, |
| Idents: x.idents, |
| Opts: x.opts, |
| } |
| if err := fx.Write(w); err != nil { |
| return 0, err |
| } |
| if fulltext { |
| encode := func(x interface{}) error { |
| return gob.NewEncoder(w).Encode(x) |
| } |
| if err := x.fset.Write(encode); err != nil { |
| return 0, err |
| } |
| if err := x.suffixes.Write(w); err != nil { |
| return 0, err |
| } |
| } |
| return n, nil |
| } |
| |
| // ReadFrom reads the index from r into x; x must not be nil. |
| // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. |
| // If the index is from an old version, the error is ErrFileIndexVersion. |
| func (x *Index) ReadFrom(r io.Reader) (n int64, err error) { |
| // We use the ability to read bytes as a plausible surrogate for buffering. |
| if _, ok := r.(io.ByteReader); !ok { |
| r = bufio.NewReader(r) |
| } |
| r = countingReader{&n, r.(byteReader)} |
| var fx fileIndex |
| if err := fx.Read(r); err != nil { |
| return n, err |
| } |
| if fx.Version != fileIndexVersion { |
| return 0, ErrFileIndexVersion |
| } |
| x.words = fx.Words |
| x.alts = fx.Alts |
| x.snippets = fx.Snippets |
| x.stats = fx.Stats |
| x.importCount = fx.ImportCount |
| x.packagePath = fx.PackagePath |
| x.exports = fx.Exports |
| x.idents = fx.Idents |
| x.opts = fx.Opts |
| if fx.Fulltext { |
| x.fset = token.NewFileSet() |
| decode := func(x interface{}) error { |
| return gob.NewDecoder(r).Decode(x) |
| } |
| if err := x.fset.Read(decode); err != nil { |
| return n, err |
| } |
| x.suffixes = new(suffixarray.Index) |
| if err := x.suffixes.Read(r); err != nil { |
| return n, err |
| } |
| } |
| return n, nil |
| } |
| |
| // Stats returns index statistics. |
| func (x *Index) Stats() Statistics { |
| return x.stats |
| } |
| |
| // ImportCount returns a map from import paths to how many times they were seen. |
| func (x *Index) ImportCount() map[string]int { |
| return x.importCount |
| } |
| |
| // PackagePath returns a map from short package name to a set |
| // of full package path names that use that short package name. |
| func (x *Index) PackagePath() map[string]map[string]bool { |
| return x.packagePath |
| } |
| |
| // Exports returns a map from full package path to exported |
| // symbol name to its type. |
| func (x *Index) Exports() map[string]map[string]SpotKind { |
| return x.exports |
| } |
| |
| // Idents returns a map from identifier type to exported |
| // symbol name to the list of identifiers matching that name. |
| func (x *Index) Idents() map[SpotKind]map[string][]Ident { |
| return x.idents |
| } |
| |
| func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { |
| match = x.words[w] |
| alt = x.alts[canonical(w)] |
| // remove current spelling from alternatives |
| // (if there is no match, the alternatives do |
| // not contain the current spelling) |
| if match != nil && alt != nil { |
| alt = alt.filter(w) |
| } |
| return |
| } |
| |
| // isIdentifier reports whether s is a Go identifier. |
| func isIdentifier(s string) bool { |
| for i, ch := range s { |
| if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) { |
| continue |
| } |
| return false |
| } |
| return len(s) > 0 |
| } |
| |
| // For a given query, which is either a single identifier or a qualified |
| // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a |
| // list of alternative spellings, and identifiers, if any. Any and all results |
| // may be nil. If the query syntax is wrong, an error is reported. |
| func (x *Index) Lookup(query string) (*SearchResult, error) { |
| ss := strings.Split(query, ".") |
| |
| // check query syntax |
| for _, s := range ss { |
| if !isIdentifier(s) { |
| return nil, errors.New("all query parts must be identifiers") |
| } |
| } |
| rslt := &SearchResult{ |
| Query: query, |
| Idents: make(map[SpotKind][]Ident, 5), |
| } |
| // handle simple and qualified identifiers |
| switch len(ss) { |
| case 1: |
| ident := ss[0] |
| rslt.Hit, rslt.Alt = x.lookupWord(ident) |
| if rslt.Hit != nil { |
| // found a match - filter packages with same name |
| // for the list of packages called ident, if any |
| rslt.Pak = rslt.Hit.Others.filter(ident) |
| } |
| for k, v := range x.idents { |
| const rsltLimit = 50 |
| ids := byImportCount{v[ident], x.importCount} |
| rslt.Idents[k] = ids.top(rsltLimit) |
| } |
| |
| case 2: |
| pakname, ident := ss[0], ss[1] |
| rslt.Hit, rslt.Alt = x.lookupWord(ident) |
| if rslt.Hit != nil { |
| // found a match - filter by package name |
| // (no paks - package names are not qualified) |
| decls := rslt.Hit.Decls.filter(pakname) |
| others := rslt.Hit.Others.filter(pakname) |
| rslt.Hit = &LookupResult{decls, others} |
| } |
| for k, v := range x.idents { |
| ids := byImportCount{v[ident], x.importCount} |
| rslt.Idents[k] = ids.filter(pakname) |
| } |
| |
| default: |
| return nil, errors.New("query is not a (qualified) identifier") |
| } |
| |
| return rslt, nil |
| } |
| |
| func (x *Index) Snippet(i int) *Snippet { |
| // handle illegal snippet indices gracefully |
| if 0 <= i && i < len(x.snippets) { |
| return x.snippets[i] |
| } |
| return nil |
| } |
| |
| type positionList []struct { |
| filename string |
| line int |
| } |
| |
| func (list positionList) Len() int { return len(list) } |
| func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } |
| func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } |
| |
| // unique returns the list sorted and with duplicate entries removed |
| func unique(list []int) []int { |
| sort.Ints(list) |
| var last int |
| i := 0 |
| for _, x := range list { |
| if i == 0 || x != last { |
| last = x |
| list[i] = x |
| i++ |
| } |
| } |
| return list[0:i] |
| } |
| |
| // A FileLines value specifies a file and line numbers within that file. |
| type FileLines struct { |
| Filename string |
| Lines []int |
| } |
| |
| // LookupRegexp returns the number of matches and the matches where a regular |
| // expression r is found in the full text index. At most n matches are |
| // returned (thus found <= n). |
| // |
| func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { |
| if x.suffixes == nil || n <= 0 { |
| return |
| } |
| // n > 0 |
| |
| var list positionList |
| // FindAllIndex may returns matches that span across file boundaries. |
| // Such matches are unlikely, buf after eliminating them we may end up |
| // with fewer than n matches. If we don't have enough at the end, redo |
| // the search with an increased value n1, but only if FindAllIndex |
| // returned all the requested matches in the first place (if it |
| // returned fewer than that there cannot be more). |
| for n1 := n; found < n; n1 += n - found { |
| found = 0 |
| matches := x.suffixes.FindAllIndex(r, n1) |
| // compute files, exclude matches that span file boundaries, |
| // and map offsets to file-local offsets |
| list = make(positionList, len(matches)) |
| for _, m := range matches { |
| // by construction, an offset corresponds to the Pos value |
| // for the file set - use it to get the file and line |
| p := token.Pos(m[0]) |
| if file := x.fset.File(p); file != nil { |
| if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { |
| // match [m[0], m[1]) is within the file boundaries |
| list[found].filename = file.Name() |
| list[found].line = file.Line(p) |
| found++ |
| } |
| } |
| } |
| if found == n || len(matches) < n1 { |
| // found all matches or there's no chance to find more |
| break |
| } |
| } |
| list = list[0:found] |
| sort.Sort(list) // sort by filename |
| |
| // collect matches belonging to the same file |
| var last string |
| var lines []int |
| addLines := func() { |
| if len(lines) > 0 { |
| // remove duplicate lines |
| result = append(result, FileLines{last, unique(lines)}) |
| lines = nil |
| } |
| } |
| for _, m := range list { |
| if m.filename != last { |
| addLines() |
| last = m.filename |
| } |
| lines = append(lines, m.line) |
| } |
| addLines() |
| |
| return |
| } |
| |
| // InvalidateIndex should be called whenever any of the file systems |
| // under godoc's observation change so that the indexer is kicked on. |
| func (c *Corpus) invalidateIndex() { |
| c.fsModified.Set(nil) |
| c.refreshMetadata() |
| } |
| |
| // indexUpToDate() returns true if the search index is not older |
| // than any of the file systems under godoc's observation. |
| // |
| func (c *Corpus) indexUpToDate() bool { |
| _, fsTime := c.fsModified.Get() |
| _, siTime := c.searchIndex.Get() |
| return !fsTime.After(siTime) |
| } |
| |
| // feedDirnames feeds the directory names of all directories |
| // under the file system given by root to channel c. |
| // |
| func (c *Corpus) feedDirnames(ch chan<- string) { |
| if dir, _ := c.fsTree.Get(); dir != nil { |
| for d := range dir.(*Directory).iter(false) { |
| ch <- d.Path |
| } |
| } |
| } |
| |
| // fsDirnames() returns a channel sending all directory names |
| // of all the file systems under godoc's observation. |
| // |
| func (c *Corpus) fsDirnames() <-chan string { |
| ch := make(chan string, 256) // buffered for fewer context switches |
| go func() { |
| c.feedDirnames(ch) |
| close(ch) |
| }() |
| return ch |
| } |
| |
| // CompatibleWith reports whether the Index x is compatible with the corpus |
| // indexing options set in c. |
| func (x *Index) CompatibleWith(c *Corpus) bool { |
| return x.opts.Docs == c.IndexDocs && |
| x.opts.GoCode == c.IndexGoCode && |
| x.opts.FullText == c.IndexFullText && |
| x.opts.MaxResults == c.MaxResults |
| } |
| |
| func (c *Corpus) readIndex(filenames string) error { |
| matches, err := filepath.Glob(filenames) |
| if err != nil { |
| return err |
| } else if matches == nil { |
| return fmt.Errorf("no index files match %q", filenames) |
| } |
| sort.Strings(matches) // make sure files are in the right order |
| files := make([]io.Reader, 0, len(matches)) |
| for _, filename := range matches { |
| f, err := os.Open(filename) |
| if err != nil { |
| return err |
| } |
| defer f.Close() |
| files = append(files, f) |
| } |
| return c.ReadIndexFrom(io.MultiReader(files...)) |
| } |
| |
| // ReadIndexFrom sets the current index from the serialized version found in r. |
| func (c *Corpus) ReadIndexFrom(r io.Reader) error { |
| x := new(Index) |
| if _, err := x.ReadFrom(r); err != nil { |
| return err |
| } |
| if !x.CompatibleWith(c) { |
| return fmt.Errorf("index file options are incompatible: %v", x.opts) |
| } |
| c.searchIndex.Set(x) |
| return nil |
| } |
| |
| func (c *Corpus) UpdateIndex() { |
| if c.Verbose { |
| log.Printf("updating index...") |
| } |
| start := time.Now() |
| index := c.NewIndex() |
| stop := time.Now() |
| c.searchIndex.Set(index) |
| if c.Verbose { |
| secs := stop.Sub(start).Seconds() |
| stats := index.Stats() |
| log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", |
| secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) |
| } |
| memstats := new(runtime.MemStats) |
| runtime.ReadMemStats(memstats) |
| if c.Verbose { |
| log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
| } |
| runtime.GC() |
| runtime.ReadMemStats(memstats) |
| if c.Verbose { |
| log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
| } |
| } |
| |
| // RunIndexer runs forever, indexing. |
| func (c *Corpus) RunIndexer() { |
| // initialize the index from disk if possible |
| if c.IndexFiles != "" { |
| c.initFSTree() |
| if err := c.readIndex(c.IndexFiles); err != nil { |
| log.Printf("error reading index from file %s: %v", c.IndexFiles, err) |
| } |
| return |
| } |
| |
| // Repeatedly update the package directory tree and index. |
| // TODO(bgarcia): Use fsnotify to only update when notified of a filesystem change. |
| for { |
| c.initFSTree() |
| c.UpdateIndex() |
| if c.IndexInterval < 0 { |
| return |
| } |
| delay := 5 * time.Minute // by default, reindex every 5 minutes |
| if c.IndexInterval > 0 { |
| delay = c.IndexInterval |
| } |
| time.Sleep(delay) |
| } |
| } |
| |
| type countingWriter struct { |
| n *int64 |
| w io.Writer |
| } |
| |
| func (c countingWriter) Write(p []byte) (n int, err error) { |
| n, err = c.w.Write(p) |
| *c.n += int64(n) |
| return |
| } |
| |
| type byteReader interface { |
| io.Reader |
| io.ByteReader |
| } |
| |
| type countingReader struct { |
| n *int64 |
| r byteReader |
| } |
| |
| func (c countingReader) Read(p []byte) (n int, err error) { |
| n, err = c.r.Read(p) |
| *c.n += int64(n) |
| return |
| } |
| |
| func (c countingReader) ReadByte() (b byte, err error) { |
| b, err = c.r.ReadByte() |
| *c.n += 1 |
| return |
| } |