| // Copyright 2020 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package fuzz provides common fuzzing functionality for tests built with |
| // "go test" and for programs that use fuzzing functionality in the testing |
| // package. |
| package fuzz |
| |
| import ( |
| "bytes" |
| "context" |
| "crypto/sha256" |
| "errors" |
| "fmt" |
| "internal/godebug" |
| "io" |
| "math/bits" |
| "os" |
| "path/filepath" |
| "reflect" |
| "runtime" |
| "strings" |
| "time" |
| ) |
| |
| // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. |
| // The zero value is valid for each field unless specified otherwise. |
| type CoordinateFuzzingOpts struct { |
| // Log is a writer for logging progress messages and warnings. |
| // If nil, io.Discard will be used instead. |
| Log io.Writer |
| |
| // Timeout is the amount of wall clock time to spend fuzzing after the corpus |
| // has loaded. If zero, there will be no time limit. |
| Timeout time.Duration |
| |
| // Limit is the number of random values to generate and test. If zero, |
| // there will be no limit on the number of generated values. |
| Limit int64 |
| |
| // MinimizeTimeout is the amount of wall clock time to spend minimizing |
| // after discovering a crasher. If zero, there will be no time limit. If |
| // MinimizeTimeout and MinimizeLimit are both zero, then minimization will |
| // be disabled. |
| MinimizeTimeout time.Duration |
| |
| // MinimizeLimit is the maximum number of calls to the fuzz function to be |
| // made while minimizing after finding a crash. If zero, there will be no |
| // limit. Calls to the fuzz function made when minimizing also count toward |
| // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then |
| // minimization will be disabled. |
| MinimizeLimit int64 |
| |
| // parallel is the number of worker processes to run in parallel. If zero, |
| // CoordinateFuzzing will run GOMAXPROCS workers. |
| Parallel int |
| |
| // Seed is a list of seed values added by the fuzz target with testing.F.Add |
| // and in testdata. |
| Seed []CorpusEntry |
| |
| // Types is the list of types which make up a corpus entry. |
| // Types must be set and must match values in Seed. |
| Types []reflect.Type |
| |
| // CorpusDir is a directory where files containing values that crash the |
| // code being tested may be written. CorpusDir must be set. |
| CorpusDir string |
| |
| // CacheDir is a directory containing additional "interesting" values. |
| // The fuzzer may derive new values from these, and may write new values here. |
| CacheDir string |
| } |
| |
| // CoordinateFuzzing creates several worker processes and communicates with |
| // them to test random inputs that could trigger crashes and expose bugs. |
| // The worker processes run the same binary in the same directory with the |
| // same environment variables as the coordinator process. Workers also run |
| // with the same arguments as the coordinator, except with the -test.fuzzworker |
| // flag prepended to the argument list. |
| // |
| // If a crash occurs, the function will return an error containing information |
| // about the crash, which can be reported to the user. |
| func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { |
| if err := ctx.Err(); err != nil { |
| return err |
| } |
| if opts.Log == nil { |
| opts.Log = io.Discard |
| } |
| if opts.Parallel == 0 { |
| opts.Parallel = runtime.GOMAXPROCS(0) |
| } |
| if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { |
| // Don't start more workers than we need. |
| opts.Parallel = int(opts.Limit) |
| } |
| |
| c, err := newCoordinator(opts) |
| if err != nil { |
| return err |
| } |
| |
| if opts.Timeout > 0 { |
| var cancel func() |
| ctx, cancel = context.WithTimeout(ctx, opts.Timeout) |
| defer cancel() |
| } |
| |
| // fuzzCtx is used to stop workers, for example, after finding a crasher. |
| fuzzCtx, cancelWorkers := context.WithCancel(ctx) |
| defer cancelWorkers() |
| doneC := ctx.Done() |
| |
| // stop is called when a worker encounters a fatal error. |
| var fuzzErr error |
| stopping := false |
| stop := func(err error) { |
| if err == fuzzCtx.Err() || isInterruptError(err) { |
| // Suppress cancellation errors and terminations due to SIGINT. |
| // The messages are not helpful since either the user triggered the error |
| // (with ^C) or another more helpful message will be printed (a crasher). |
| err = nil |
| } |
| if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { |
| fuzzErr = err |
| } |
| if stopping { |
| return |
| } |
| stopping = true |
| cancelWorkers() |
| doneC = nil |
| } |
| |
| // Ensure that any crash we find is written to the corpus, even if an error |
| // or interruption occurs while minimizing it. |
| crashWritten := false |
| defer func() { |
| if c.crashMinimizing == nil || crashWritten { |
| return |
| } |
| werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) |
| if werr != nil { |
| err = fmt.Errorf("%w\n%v", err, werr) |
| return |
| } |
| if err == nil { |
| err = &crashError{ |
| path: c.crashMinimizing.entry.Path, |
| err: errors.New(c.crashMinimizing.crasherMsg), |
| } |
| } |
| }() |
| |
| // Start workers. |
| // TODO(jayconrod): do we want to support fuzzing different binaries? |
| dir := "" // same as self |
| binPath := os.Args[0] |
| args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) |
| env := os.Environ() // same as self |
| |
| errC := make(chan error) |
| workers := make([]*worker, opts.Parallel) |
| for i := range workers { |
| var err error |
| workers[i], err = newWorker(c, dir, binPath, args, env) |
| if err != nil { |
| return err |
| } |
| } |
| for i := range workers { |
| w := workers[i] |
| go func() { |
| err := w.coordinate(fuzzCtx) |
| if fuzzCtx.Err() != nil || isInterruptError(err) { |
| err = nil |
| } |
| cleanErr := w.cleanup() |
| if err == nil { |
| err = cleanErr |
| } |
| errC <- err |
| }() |
| } |
| |
| // Main event loop. |
| // Do not return until all workers have terminated. We avoid a deadlock by |
| // receiving messages from workers even after ctx is cancelled. |
| activeWorkers := len(workers) |
| statTicker := time.NewTicker(3 * time.Second) |
| defer statTicker.Stop() |
| defer c.logStats() |
| |
| c.logStats() |
| for { |
| var inputC chan fuzzInput |
| input, ok := c.peekInput() |
| if ok && c.crashMinimizing == nil && !stopping { |
| inputC = c.inputC |
| } |
| |
| var minimizeC chan fuzzMinimizeInput |
| minimizeInput, ok := c.peekMinimizeInput() |
| if ok && !stopping { |
| minimizeC = c.minimizeC |
| } |
| |
| select { |
| case <-doneC: |
| // Interrupted, cancelled, or timed out. |
| // stop sets doneC to nil so we don't busy wait here. |
| stop(ctx.Err()) |
| |
| case err := <-errC: |
| // A worker terminated, possibly after encountering a fatal error. |
| stop(err) |
| activeWorkers-- |
| if activeWorkers == 0 { |
| return fuzzErr |
| } |
| |
| case result := <-c.resultC: |
| // Received response from worker. |
| if stopping { |
| break |
| } |
| c.updateStats(result) |
| |
| if result.crasherMsg != "" { |
| if c.warmupRun() && result.entry.IsSeed { |
| target := filepath.Base(c.opts.CorpusDir) |
| fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) |
| stop(errors.New(result.crasherMsg)) |
| break |
| } |
| if c.canMinimize() && result.canMinimize { |
| if c.crashMinimizing != nil { |
| // This crash is not minimized, and another crash is being minimized. |
| // Ignore this one and wait for the other one to finish. |
| break |
| } |
| // Found a crasher but haven't yet attempted to minimize it. |
| // Send it back to a worker for minimization. Disable inputC so |
| // other workers don't continue fuzzing. |
| c.crashMinimizing = &result |
| fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) |
| c.queueForMinimization(result, nil) |
| } else if !crashWritten { |
| // Found a crasher that's either minimized or not minimizable. |
| // Write to corpus and stop. |
| err := writeToCorpus(&result.entry, opts.CorpusDir) |
| if err == nil { |
| crashWritten = true |
| err = &crashError{ |
| path: result.entry.Path, |
| err: errors.New(result.crasherMsg), |
| } |
| } |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n", |
| c.elapsed(), |
| result.entry.Path, |
| result.entry.Parent, |
| result.entry.Generation, |
| len(result.entry.Data), |
| result.entryDuration, |
| ) |
| } |
| stop(err) |
| } |
| } else if result.coverageData != nil { |
| if c.warmupRun() { |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n", |
| c.elapsed(), |
| result.entry.Parent, |
| countBits(diffCoverage(c.coverageMask, result.coverageData)), |
| len(result.entry.Data), |
| result.entryDuration, |
| ) |
| } |
| c.updateCoverage(result.coverageData) |
| c.warmupInputLeft-- |
| if c.warmupInputLeft == 0 { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n", |
| c.elapsed(), |
| len(c.corpus.entries), |
| countBits(c.coverageMask), |
| ) |
| } |
| } |
| } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { |
| // Found a value that expanded coverage. |
| // It's not a crasher, but we may want to add it to the on-disk |
| // corpus and prioritize it for future fuzzing. |
| // TODO(jayconrod, katiehockman): Prioritize fuzzing these |
| // values which expanded coverage, perhaps based on the |
| // number of new edges that this result expanded. |
| // TODO(jayconrod, katiehockman): Don't write a value that's already |
| // in the corpus. |
| if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { |
| // Send back to workers to find a smaller value that preserves |
| // at least one new coverage bit. |
| c.queueForMinimization(result, keepCoverage) |
| } else { |
| // Update the coordinator's coverage mask and save the value. |
| inputSize := len(result.entry.Data) |
| entryNew, err := c.addCorpusEntries(true, result.entry) |
| if err != nil { |
| stop(err) |
| break |
| } |
| if !entryNew { |
| continue |
| } |
| c.updateCoverage(keepCoverage) |
| c.inputQueue.enqueue(result.entry) |
| c.interestingCount++ |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n", |
| c.elapsed(), |
| result.entry.Path, |
| result.entry.Parent, |
| result.entry.Generation, |
| countBits(keepCoverage), |
| countBits(c.coverageMask), |
| inputSize, |
| result.entryDuration, |
| ) |
| } |
| } |
| } else { |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n", |
| c.elapsed(), |
| result.entry.Path, |
| result.entry.Parent, |
| result.canMinimize, |
| ) |
| } |
| } |
| } else if c.warmupRun() { |
| // No error or coverage data was reported for this input during |
| // warmup, so continue processing results. |
| c.warmupInputLeft-- |
| if c.warmupInputLeft == 0 { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) |
| if shouldPrintDebugInfo() { |
| fmt.Fprintf( |
| c.opts.Log, |
| "DEBUG finished testing-only phase, elapsed: %s, entries: %d\n", |
| time.Since(c.startTime), |
| len(c.corpus.entries), |
| ) |
| } |
| } |
| } |
| |
| // Once the result has been processed, stop the worker if we |
| // have reached the fuzzing limit. |
| if c.opts.Limit > 0 && c.count >= c.opts.Limit { |
| stop(nil) |
| } |
| |
| case inputC <- input: |
| // Sent the next input to a worker. |
| c.sentInput(input) |
| |
| case minimizeC <- minimizeInput: |
| // Sent the next input for minimization to a worker. |
| c.sentMinimizeInput(minimizeInput) |
| |
| case <-statTicker.C: |
| c.logStats() |
| } |
| } |
| |
| // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, |
| // write to the cache instead. |
| } |
| |
| // crashError wraps a crasher written to the seed corpus. It saves the name |
| // of the file where the input causing the crasher was saved. The testing |
| // framework uses this to report a command to re-run that specific input. |
| type crashError struct { |
| path string |
| err error |
| } |
| |
| func (e *crashError) Error() string { |
| return e.err.Error() |
| } |
| |
| func (e *crashError) Unwrap() error { |
| return e.err |
| } |
| |
| func (e *crashError) CrashPath() string { |
| return e.path |
| } |
| |
| type corpus struct { |
| entries []CorpusEntry |
| hashes map[[sha256.Size]byte]bool |
| } |
| |
| // addCorpusEntries adds entries to the corpus, and optionally writes the entries |
| // to the cache directory. If an entry is already in the corpus it is skipped. If |
| // all of the entries are unique, addCorpusEntries returns true and a nil error, |
| // if at least one of the entries was a duplicate, it returns false and a nil error. |
| func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { |
| noDupes := true |
| for _, e := range entries { |
| data, err := corpusEntryData(e) |
| if err != nil { |
| return false, err |
| } |
| h := sha256.Sum256(data) |
| if c.corpus.hashes[h] { |
| noDupes = false |
| continue |
| } |
| if addToCache { |
| if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { |
| return false, err |
| } |
| // For entries written to disk, we don't hold onto the bytes, |
| // since the corpus would consume a significant amount of |
| // memory. |
| e.Data = nil |
| } |
| c.corpus.hashes[h] = true |
| c.corpus.entries = append(c.corpus.entries, e) |
| } |
| return noDupes, nil |
| } |
| |
| // CorpusEntry represents an individual input for fuzzing. |
| // |
| // We must use an equivalent type in the testing and testing/internal/testdeps |
| // packages, but testing can't import this package directly, and we don't want |
| // to export this type from testing. Instead, we use the same struct type and |
| // use a type alias (not a defined type) for convenience. |
| type CorpusEntry = struct { |
| Parent string |
| |
| // Path is the path of the corpus file, if the entry was loaded from disk. |
| // For other entries, including seed values provided by f.Add, Path is the |
| // name of the test, e.g. seed#0 or its hash. |
| Path string |
| |
| // Data is the raw input data. Data should only be populated for seed |
| // values. For on-disk corpus files, Data will be nil, as it will be loaded |
| // from disk using Path. |
| Data []byte |
| |
| // Values is the unmarshaled values from a corpus file. |
| Values []any |
| |
| Generation int |
| |
| // IsSeed indicates whether this entry is part of the seed corpus. |
| IsSeed bool |
| } |
| |
| // corpusEntryData returns the raw input bytes, either from the data struct |
| // field, or from disk. |
| func corpusEntryData(ce CorpusEntry) ([]byte, error) { |
| if ce.Data != nil { |
| return ce.Data, nil |
| } |
| |
| return os.ReadFile(ce.Path) |
| } |
| |
| type fuzzInput struct { |
| // entry is the value to test initially. The worker will randomly mutate |
| // values from this starting point. |
| entry CorpusEntry |
| |
| // timeout is the time to spend fuzzing variations of this input, |
| // not including starting or cleaning up. |
| timeout time.Duration |
| |
| // limit is the maximum number of calls to the fuzz function the worker may |
| // make. The worker may make fewer calls, for example, if it finds an |
| // error early. If limit is zero, there is no limit on calls to the |
| // fuzz function. |
| limit int64 |
| |
| // warmup indicates whether this is a warmup input before fuzzing begins. If |
| // true, the input should not be fuzzed. |
| warmup bool |
| |
| // coverageData reflects the coordinator's current coverageMask. |
| coverageData []byte |
| } |
| |
| type fuzzResult struct { |
| // entry is an interesting value or a crasher. |
| entry CorpusEntry |
| |
| // crasherMsg is an error message from a crash. It's "" if no crash was found. |
| crasherMsg string |
| |
| // canMinimize is true if the worker should attempt to minimize this result. |
| // It may be false because an attempt has already been made. |
| canMinimize bool |
| |
| // coverageData is set if the worker found new coverage. |
| coverageData []byte |
| |
| // limit is the number of values the coordinator asked the worker |
| // to test. 0 if there was no limit. |
| limit int64 |
| |
| // count is the number of values the worker actually tested. |
| count int64 |
| |
| // totalDuration is the time the worker spent testing inputs. |
| totalDuration time.Duration |
| |
| // entryDuration is the time the worker spent execution an interesting result |
| entryDuration time.Duration |
| } |
| |
| type fuzzMinimizeInput struct { |
| // entry is an interesting value or crasher to minimize. |
| entry CorpusEntry |
| |
| // crasherMsg is an error message from a crash. It's "" if no crash was found. |
| // If set, the worker will attempt to find a smaller input that also produces |
| // an error, though not necessarily the same error. |
| crasherMsg string |
| |
| // limit is the maximum number of calls to the fuzz function the worker may |
| // make. The worker may make fewer calls, for example, if it can't reproduce |
| // an error. If limit is zero, there is no limit on calls to the fuzz function. |
| limit int64 |
| |
| // timeout is the time to spend minimizing this input. |
| // A zero timeout means no limit. |
| timeout time.Duration |
| |
| // keepCoverage is a set of coverage bits that entry found that were not in |
| // the coordinator's combined set. When minimizing, the worker should find an |
| // input that preserves at least one of these bits. keepCoverage is nil for |
| // crashing inputs. |
| keepCoverage []byte |
| } |
| |
| // coordinator holds channels that workers can use to communicate with |
| // the coordinator. |
| type coordinator struct { |
| opts CoordinateFuzzingOpts |
| |
| // startTime is the time we started the workers after loading the corpus. |
| // Used for logging. |
| startTime time.Time |
| |
| // inputC is sent values to fuzz by the coordinator. Any worker may receive |
| // values from this channel. Workers send results to resultC. |
| inputC chan fuzzInput |
| |
| // minimizeC is sent values to minimize by the coordinator. Any worker may |
| // receive values from this channel. Workers send results to resultC. |
| minimizeC chan fuzzMinimizeInput |
| |
| // resultC is sent results of fuzzing by workers. The coordinator |
| // receives these. Multiple types of messages are allowed. |
| resultC chan fuzzResult |
| |
| // count is the number of values fuzzed so far. |
| count int64 |
| |
| // countLastLog is the number of values fuzzed when the output was last |
| // logged. |
| countLastLog int64 |
| |
| // timeLastLog is the time at which the output was last logged. |
| timeLastLog time.Time |
| |
| // interestingCount is the number of unique interesting values which have |
| // been found this execution. |
| interestingCount int |
| |
| // warmupInputCount is the count of all entries in the corpus which will |
| // need to be received from workers to run once during warmup, but not fuzz. |
| // This could be for coverage data, or only for the purposes of verifying |
| // that the seed corpus doesn't have any crashers. See warmupRun. |
| warmupInputCount int |
| |
| // warmupInputLeft is the number of entries in the corpus which still need |
| // to be received from workers to run once during warmup, but not fuzz. |
| // See warmupInputLeft. |
| warmupInputLeft int |
| |
| // duration is the time spent fuzzing inside workers, not counting time |
| // starting up or tearing down. |
| duration time.Duration |
| |
| // countWaiting is the number of fuzzing executions the coordinator is |
| // waiting on workers to complete. |
| countWaiting int64 |
| |
| // corpus is a set of interesting values, including the seed corpus and |
| // generated values that workers reported as interesting. |
| corpus corpus |
| |
| // minimizationAllowed is true if one or more of the types of fuzz |
| // function's parameters can be minimized. |
| minimizationAllowed bool |
| |
| // inputQueue is a queue of inputs that workers should try fuzzing. This is |
| // initially populated from the seed corpus and cached inputs. More inputs |
| // may be added as new coverage is discovered. |
| inputQueue queue |
| |
| // minimizeQueue is a queue of inputs that caused errors or exposed new |
| // coverage. Workers should attempt to find smaller inputs that do the |
| // same thing. |
| minimizeQueue queue |
| |
| // crashMinimizing is the crash that is currently being minimized. |
| crashMinimizing *fuzzResult |
| |
| // coverageMask aggregates coverage that was found for all inputs in the |
| // corpus. Each byte represents a single basic execution block. Each set bit |
| // within the byte indicates that an input has triggered that block at least |
| // 1 << n times, where n is the position of the bit in the byte. For example, a |
| // value of 12 indicates that separate inputs have triggered this block |
| // between 4-7 times and 8-15 times. |
| coverageMask []byte |
| } |
| |
| func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { |
| // Make sure all of the seed corpus has marshalled data. |
| for i := range opts.Seed { |
| if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { |
| opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) |
| } |
| } |
| c := &coordinator{ |
| opts: opts, |
| startTime: time.Now(), |
| inputC: make(chan fuzzInput), |
| minimizeC: make(chan fuzzMinimizeInput), |
| resultC: make(chan fuzzResult), |
| timeLastLog: time.Now(), |
| corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, |
| } |
| if err := c.readCache(); err != nil { |
| return nil, err |
| } |
| if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { |
| for _, t := range opts.Types { |
| if isMinimizable(t) { |
| c.minimizationAllowed = true |
| break |
| } |
| } |
| } |
| |
| covSize := len(coverage()) |
| if covSize == 0 { |
| fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") |
| // Even though a coverage-only run won't occur, we should still run all |
| // of the seed corpus to make sure there are no existing failures before |
| // we start fuzzing. |
| c.warmupInputCount = len(c.opts.Seed) |
| for _, e := range c.opts.Seed { |
| c.inputQueue.enqueue(e) |
| } |
| } else { |
| c.warmupInputCount = len(c.corpus.entries) |
| for _, e := range c.corpus.entries { |
| c.inputQueue.enqueue(e) |
| } |
| // Set c.coverageMask to a clean []byte full of zeros. |
| c.coverageMask = make([]byte, covSize) |
| } |
| c.warmupInputLeft = c.warmupInputCount |
| |
| if len(c.corpus.entries) == 0 { |
| fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") |
| var vals []any |
| for _, t := range opts.Types { |
| vals = append(vals, zeroValue(t)) |
| } |
| data := marshalCorpusFile(vals...) |
| h := sha256.Sum256(data) |
| name := fmt.Sprintf("%x", h[:4]) |
| c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) |
| } |
| |
| return c, nil |
| } |
| |
| func (c *coordinator) updateStats(result fuzzResult) { |
| c.count += result.count |
| c.countWaiting -= result.limit |
| c.duration += result.totalDuration |
| } |
| |
| func (c *coordinator) logStats() { |
| now := time.Now() |
| if c.warmupRun() { |
| runSoFar := c.warmupInputCount - c.warmupInputLeft |
| if coverageEnabled { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) |
| } else { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) |
| } |
| } else if c.crashMinimizing != nil { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) |
| } else { |
| rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() |
| if coverageEnabled { |
| total := c.warmupInputCount + c.interestingCount |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total) |
| } else { |
| fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) |
| } |
| } |
| c.countLastLog = c.count |
| c.timeLastLog = now |
| } |
| |
| // peekInput returns the next value that should be sent to workers. |
| // If the number of executions is limited, the returned value includes |
| // a limit for one worker. If there are no executions left, peekInput returns |
| // a zero value and false. |
| // |
| // peekInput doesn't actually remove the input from the queue. The caller |
| // must call sentInput after sending the input. |
| // |
| // If the input queue is empty and the coverage/testing-only run has completed, |
| // queue refills it from the corpus. |
| func (c *coordinator) peekInput() (fuzzInput, bool) { |
| if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { |
| // Already making the maximum number of calls to the fuzz function. |
| // Don't send more inputs right now. |
| return fuzzInput{}, false |
| } |
| if c.inputQueue.len == 0 { |
| if c.warmupRun() { |
| // Wait for coverage/testing-only run to finish before sending more |
| // inputs. |
| return fuzzInput{}, false |
| } |
| c.refillInputQueue() |
| } |
| |
| entry, ok := c.inputQueue.peek() |
| if !ok { |
| panic("input queue empty after refill") |
| } |
| input := fuzzInput{ |
| entry: entry.(CorpusEntry), |
| timeout: workerFuzzDuration, |
| warmup: c.warmupRun(), |
| } |
| if c.coverageMask != nil { |
| input.coverageData = bytes.Clone(c.coverageMask) |
| } |
| if input.warmup { |
| // No fuzzing will occur, but it should count toward the limit set by |
| // -fuzztime. |
| input.limit = 1 |
| return input, true |
| } |
| |
| if c.opts.Limit > 0 { |
| input.limit = c.opts.Limit / int64(c.opts.Parallel) |
| if c.opts.Limit%int64(c.opts.Parallel) > 0 { |
| input.limit++ |
| } |
| remaining := c.opts.Limit - c.count - c.countWaiting |
| if input.limit > remaining { |
| input.limit = remaining |
| } |
| } |
| return input, true |
| } |
| |
| // sentInput updates internal counters after an input is sent to c.inputC. |
| func (c *coordinator) sentInput(input fuzzInput) { |
| c.inputQueue.dequeue() |
| c.countWaiting += input.limit |
| } |
| |
| // refillInputQueue refills the input queue from the corpus after it becomes |
| // empty. |
| func (c *coordinator) refillInputQueue() { |
| for _, e := range c.corpus.entries { |
| c.inputQueue.enqueue(e) |
| } |
| } |
| |
| // queueForMinimization creates a fuzzMinimizeInput from result and adds it |
| // to the minimization queue to be sent to workers. |
| func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { |
| if result.crasherMsg != "" { |
| c.minimizeQueue.clear() |
| } |
| |
| input := fuzzMinimizeInput{ |
| entry: result.entry, |
| crasherMsg: result.crasherMsg, |
| keepCoverage: keepCoverage, |
| } |
| c.minimizeQueue.enqueue(input) |
| } |
| |
| // peekMinimizeInput returns the next input that should be sent to workers for |
| // minimization. |
| func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { |
| if !c.canMinimize() { |
| // Already making the maximum number of calls to the fuzz function. |
| // Don't send more inputs right now. |
| return fuzzMinimizeInput{}, false |
| } |
| v, ok := c.minimizeQueue.peek() |
| if !ok { |
| return fuzzMinimizeInput{}, false |
| } |
| input := v.(fuzzMinimizeInput) |
| |
| if c.opts.MinimizeTimeout > 0 { |
| input.timeout = c.opts.MinimizeTimeout |
| } |
| if c.opts.MinimizeLimit > 0 { |
| input.limit = c.opts.MinimizeLimit |
| } else if c.opts.Limit > 0 { |
| if input.crasherMsg != "" { |
| input.limit = c.opts.Limit |
| } else { |
| input.limit = c.opts.Limit / int64(c.opts.Parallel) |
| if c.opts.Limit%int64(c.opts.Parallel) > 0 { |
| input.limit++ |
| } |
| } |
| } |
| if c.opts.Limit > 0 { |
| remaining := c.opts.Limit - c.count - c.countWaiting |
| if input.limit > remaining { |
| input.limit = remaining |
| } |
| } |
| return input, true |
| } |
| |
| // sentMinimizeInput removes an input from the minimization queue after it's |
| // sent to minimizeC. |
| func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { |
| c.minimizeQueue.dequeue() |
| c.countWaiting += input.limit |
| } |
| |
| // warmupRun returns true while the coordinator is running inputs without |
| // mutating them as a warmup before fuzzing. This could be to gather baseline |
| // coverage data for entries in the corpus, or to test all of the seed corpus |
| // for errors before fuzzing begins. |
| // |
| // The coordinator doesn't store coverage data in the cache with each input |
| // because that data would be invalid when counter offsets in the test binary |
| // change. |
| // |
| // When gathering coverage, the coordinator sends each entry to a worker to |
| // gather coverage for that entry only, without fuzzing or minimizing. This |
| // phase ends when all workers have finished, and the coordinator has a combined |
| // coverage map. |
| func (c *coordinator) warmupRun() bool { |
| return c.warmupInputLeft > 0 |
| } |
| |
| // updateCoverage sets bits in c.coverageMask that are set in newCoverage. |
| // updateCoverage returns the number of newly set bits. See the comment on |
| // coverageMask for the format. |
| func (c *coordinator) updateCoverage(newCoverage []byte) int { |
| if len(newCoverage) != len(c.coverageMask) { |
| panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) |
| } |
| newBitCount := 0 |
| for i := range newCoverage { |
| diff := newCoverage[i] &^ c.coverageMask[i] |
| newBitCount += bits.OnesCount8(diff) |
| c.coverageMask[i] |= newCoverage[i] |
| } |
| return newBitCount |
| } |
| |
| // canMinimize returns whether the coordinator should attempt to find smaller |
| // inputs that reproduce a crash or new coverage. |
| func (c *coordinator) canMinimize() bool { |
| return c.minimizationAllowed && |
| (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) |
| } |
| |
| func (c *coordinator) elapsed() time.Duration { |
| return time.Since(c.startTime).Round(1 * time.Second) |
| } |
| |
| // readCache creates a combined corpus from seed values and values in the cache |
| // (in GOCACHE/fuzz). |
| // |
| // TODO(fuzzing): need a mechanism that can remove values that |
| // aren't useful anymore, for example, because they have the wrong type. |
| func (c *coordinator) readCache() error { |
| if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { |
| return err |
| } |
| entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) |
| if err != nil { |
| if _, ok := err.(*MalformedCorpusError); !ok { |
| // It's okay if some files in the cache directory are malformed and |
| // are not included in the corpus, but fail if it's an I/O error. |
| return err |
| } |
| // TODO(jayconrod,katiehockman): consider printing some kind of warning |
| // indicating the number of files which were skipped because they are |
| // malformed. |
| } |
| if _, err := c.addCorpusEntries(false, entries...); err != nil { |
| return err |
| } |
| return nil |
| } |
| |
| // MalformedCorpusError is an error found while reading the corpus from the |
| // filesystem. All of the errors are stored in the errs list. The testing |
| // framework uses this to report malformed files in testdata. |
| type MalformedCorpusError struct { |
| errs []error |
| } |
| |
| func (e *MalformedCorpusError) Error() string { |
| var msgs []string |
| for _, s := range e.errs { |
| msgs = append(msgs, s.Error()) |
| } |
| return strings.Join(msgs, "\n") |
| } |
| |
| // ReadCorpus reads the corpus from the provided dir. The returned corpus |
| // entries are guaranteed to match the given types. Any malformed files will |
| // be saved in a MalformedCorpusError and returned, along with the most recent |
| // error. |
| func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { |
| files, err := os.ReadDir(dir) |
| if os.IsNotExist(err) { |
| return nil, nil // No corpus to read |
| } else if err != nil { |
| return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) |
| } |
| var corpus []CorpusEntry |
| var errs []error |
| for _, file := range files { |
| // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input |
| // based on its name. We should only read files created by writeToCorpus. |
| // If we read ALL files, we won't be able to change the file format by |
| // changing the extension. We also won't be able to add files like |
| // README.txt explaining why the directory exists. |
| if file.IsDir() { |
| continue |
| } |
| filename := filepath.Join(dir, file.Name()) |
| data, err := os.ReadFile(filename) |
| if err != nil { |
| return nil, fmt.Errorf("failed to read corpus file: %v", err) |
| } |
| var vals []any |
| vals, err = readCorpusData(data, types) |
| if err != nil { |
| errs = append(errs, fmt.Errorf("%q: %v", filename, err)) |
| continue |
| } |
| corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) |
| } |
| if len(errs) > 0 { |
| return corpus, &MalformedCorpusError{errs: errs} |
| } |
| return corpus, nil |
| } |
| |
| func readCorpusData(data []byte, types []reflect.Type) ([]any, error) { |
| vals, err := unmarshalCorpusFile(data) |
| if err != nil { |
| return nil, fmt.Errorf("unmarshal: %v", err) |
| } |
| if err = CheckCorpus(vals, types); err != nil { |
| return nil, err |
| } |
| return vals, nil |
| } |
| |
| // CheckCorpus verifies that the types in vals match the expected types |
| // provided. |
| func CheckCorpus(vals []any, types []reflect.Type) error { |
| if len(vals) != len(types) { |
| return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) |
| } |
| valsT := make([]reflect.Type, len(vals)) |
| for valsI, v := range vals { |
| valsT[valsI] = reflect.TypeOf(v) |
| } |
| for i := range types { |
| if valsT[i] != types[i] { |
| return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) |
| } |
| } |
| return nil |
| } |
| |
| // writeToCorpus atomically writes the given bytes to a new file in testdata. If |
| // the directory does not exist, it will create one. If the file already exists, |
| // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new |
| // file that was just written or an error if it failed. |
| func writeToCorpus(entry *CorpusEntry, dir string) (err error) { |
| sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16] |
| entry.Path = filepath.Join(dir, sum) |
| if err := os.MkdirAll(dir, 0777); err != nil { |
| return err |
| } |
| if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil { |
| os.Remove(entry.Path) // remove partially written file |
| return err |
| } |
| return nil |
| } |
| |
| func testName(path string) string { |
| return filepath.Base(path) |
| } |
| |
| func zeroValue(t reflect.Type) any { |
| for _, v := range zeroVals { |
| if reflect.TypeOf(v) == t { |
| return v |
| } |
| } |
| panic(fmt.Sprintf("unsupported type: %v", t)) |
| } |
| |
| var zeroVals []any = []any{ |
| []byte(""), |
| string(""), |
| false, |
| byte(0), |
| rune(0), |
| float32(0), |
| float64(0), |
| int(0), |
| int8(0), |
| int16(0), |
| int32(0), |
| int64(0), |
| uint(0), |
| uint8(0), |
| uint16(0), |
| uint32(0), |
| uint64(0), |
| } |
| |
| var debugInfo = godebug.New("fuzzdebug").Value() == "1" |
| |
| func shouldPrintDebugInfo() bool { |
| return debugInfo |
| } |