|  | // Copyright 2016 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | // Package fastwalk provides a faster version of [filepath.Walk] for file system | 
|  | // scanning tools. | 
|  | package fastwalk | 
|  |  | 
|  | import ( | 
|  | "errors" | 
|  | "os" | 
|  | "path/filepath" | 
|  | "runtime" | 
|  | "sync" | 
|  | ) | 
|  |  | 
|  | // ErrTraverseLink is used as a return value from WalkFuncs to indicate that the | 
|  | // symlink named in the call may be traversed. | 
|  | var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") | 
|  |  | 
|  | // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the | 
|  | // callback should not be called for any other files in the current directory. | 
|  | // Child directories will still be traversed. | 
|  | var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") | 
|  |  | 
|  | // Walk is a faster implementation of [filepath.Walk]. | 
|  | // | 
|  | // [filepath.Walk]'s design necessarily calls [os.Lstat] on each file, | 
|  | // even if the caller needs less info. | 
|  | // Many tools need only the type of each file. | 
|  | // On some platforms, this information is provided directly by the readdir | 
|  | // system call, avoiding the need to stat each file individually. | 
|  | // fastwalk_unix.go contains a fork of the syscall routines. | 
|  | // | 
|  | // See golang.org/issue/16399. | 
|  | // | 
|  | // Walk walks the file tree rooted at root, calling walkFn for | 
|  | // each file or directory in the tree, including root. | 
|  | // | 
|  | // If Walk returns [filepath.SkipDir], the directory is skipped. | 
|  | // | 
|  | // Unlike [filepath.Walk]: | 
|  | //   - file stat calls must be done by the user. | 
|  | //     The only provided metadata is the file type, which does not include | 
|  | //     any permission bits. | 
|  | //   - multiple goroutines stat the filesystem concurrently. The provided | 
|  | //     walkFn must be safe for concurrent use. | 
|  | //   - Walk can follow symlinks if walkFn returns the TraverseLink | 
|  | //     sentinel error. It is the walkFn's responsibility to prevent | 
|  | //     Walk from going into symlink cycles. | 
|  | func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { | 
|  | // TODO(bradfitz): make numWorkers configurable? We used a | 
|  | // minimum of 4 to give the kernel more info about multiple | 
|  | // things we want, in hopes its I/O scheduling can take | 
|  | // advantage of that. Hopefully most are in cache. Maybe 4 is | 
|  | // even too low of a minimum. Profile more. | 
|  | numWorkers := 4 | 
|  | if n := runtime.NumCPU(); n > numWorkers { | 
|  | numWorkers = n | 
|  | } | 
|  |  | 
|  | // Make sure to wait for all workers to finish, otherwise | 
|  | // walkFn could still be called after returning. This Wait call | 
|  | // runs after close(e.donec) below. | 
|  | var wg sync.WaitGroup | 
|  | defer wg.Wait() | 
|  |  | 
|  | w := &walker{ | 
|  | fn:       walkFn, | 
|  | enqueuec: make(chan walkItem, numWorkers), // buffered for performance | 
|  | workc:    make(chan walkItem, numWorkers), // buffered for performance | 
|  | donec:    make(chan struct{}), | 
|  |  | 
|  | // buffered for correctness & not leaking goroutines: | 
|  | resc: make(chan error, numWorkers), | 
|  | } | 
|  | defer close(w.donec) | 
|  |  | 
|  | for i := 0; i < numWorkers; i++ { | 
|  | wg.Add(1) | 
|  | go w.doWork(&wg) | 
|  | } | 
|  | todo := []walkItem{{dir: root}} | 
|  | out := 0 | 
|  | for { | 
|  | workc := w.workc | 
|  | var workItem walkItem | 
|  | if len(todo) == 0 { | 
|  | workc = nil | 
|  | } else { | 
|  | workItem = todo[len(todo)-1] | 
|  | } | 
|  | select { | 
|  | case workc <- workItem: | 
|  | todo = todo[:len(todo)-1] | 
|  | out++ | 
|  | case it := <-w.enqueuec: | 
|  | todo = append(todo, it) | 
|  | case err := <-w.resc: | 
|  | out-- | 
|  | if err != nil { | 
|  | return err | 
|  | } | 
|  | if out == 0 && len(todo) == 0 { | 
|  | // It's safe to quit here, as long as the buffered | 
|  | // enqueue channel isn't also readable, which might | 
|  | // happen if the worker sends both another unit of | 
|  | // work and its result before the other select was | 
|  | // scheduled and both w.resc and w.enqueuec were | 
|  | // readable. | 
|  | select { | 
|  | case it := <-w.enqueuec: | 
|  | todo = append(todo, it) | 
|  | default: | 
|  | return nil | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // doWork reads directories as instructed (via workc) and runs the | 
|  | // user's callback function. | 
|  | func (w *walker) doWork(wg *sync.WaitGroup) { | 
|  | defer wg.Done() | 
|  | for { | 
|  | select { | 
|  | case <-w.donec: | 
|  | return | 
|  | case it := <-w.workc: | 
|  | select { | 
|  | case <-w.donec: | 
|  | return | 
|  | case w.resc <- w.walk(it.dir, !it.callbackDone): | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | type walker struct { | 
|  | fn func(path string, typ os.FileMode) error | 
|  |  | 
|  | donec    chan struct{} // closed on fastWalk's return | 
|  | workc    chan walkItem // to workers | 
|  | enqueuec chan walkItem // from workers | 
|  | resc     chan error    // from workers | 
|  | } | 
|  |  | 
|  | type walkItem struct { | 
|  | dir          string | 
|  | callbackDone bool // callback already called; don't do it again | 
|  | } | 
|  |  | 
|  | func (w *walker) enqueue(it walkItem) { | 
|  | select { | 
|  | case w.enqueuec <- it: | 
|  | case <-w.donec: | 
|  | } | 
|  | } | 
|  |  | 
|  | func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { | 
|  | joined := dirName + string(os.PathSeparator) + baseName | 
|  | if typ == os.ModeDir { | 
|  | w.enqueue(walkItem{dir: joined}) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | err := w.fn(joined, typ) | 
|  | if typ == os.ModeSymlink { | 
|  | if err == ErrTraverseLink { | 
|  | // Set callbackDone so we don't call it twice for both the | 
|  | // symlink-as-symlink and the symlink-as-directory later: | 
|  | w.enqueue(walkItem{dir: joined, callbackDone: true}) | 
|  | return nil | 
|  | } | 
|  | if err == filepath.SkipDir { | 
|  | // Permit SkipDir on symlinks too. | 
|  | return nil | 
|  | } | 
|  | } | 
|  | return err | 
|  | } | 
|  |  | 
|  | func (w *walker) walk(root string, runUserCallback bool) error { | 
|  | if runUserCallback { | 
|  | err := w.fn(root, os.ModeDir) | 
|  | if err == filepath.SkipDir { | 
|  | return nil | 
|  | } | 
|  | if err != nil { | 
|  | return err | 
|  | } | 
|  | } | 
|  |  | 
|  | return readDir(root, w.onDirEnt) | 
|  | } |