// Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package fuzz provides common fuzzing functionality for tests built with // "go test" and for programs that use fuzzing functionality in the testing // package. package fuzz import ( "context" "crypto/sha256" "errors" "fmt" "io" "io/ioutil" "os" "path/filepath" "reflect" "runtime" "strings" "time" ) // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. // The zero value is valid for each field unless specified otherwise. type CoordinateFuzzingOpts struct { // Log is a writer for logging progress messages and warnings. // If nil, io.Discard will be used instead. Log io.Writer // Timeout is the amount of wall clock time to spend fuzzing after the corpus // has loaded. If zero, there will be no time limit. Timeout time.Duration // Count is the number of random values to generate and test. If zero, // there will be no limit on the number of generated values. Count int64 // parallel is the number of worker processes to run in parallel. If zero, // CoordinateFuzzing will run GOMAXPROCS workers. Parallel int // Seed is a list of seed values added by the fuzz target with testing.F.Add // and in testdata. Seed []CorpusEntry // Types is the list of types which make up a corpus entry. // Types must be set and must match values in Seed. Types []reflect.Type // CorpusDir is a directory where files containing values that crash the // code being tested may be written. CorpusDir must be set. CorpusDir string // CacheDir is a directory containing additional "interesting" values. // The fuzzer may derive new values from these, and may write new values here. CacheDir string } // CoordinateFuzzing creates several worker processes and communicates with // them to test random inputs that could trigger crashes and expose bugs. // The worker processes run the same binary in the same directory with the // same environment variables as the coordinator process. Workers also run // with the same arguments as the coordinator, except with the -test.fuzzworker // flag prepended to the argument list. // // If a crash occurs, the function will return an error containing information // about the crash, which can be reported to the user. func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { if err := ctx.Err(); err != nil { return err } if opts.Log == nil { opts.Log = io.Discard } if opts.Parallel == 0 { opts.Parallel = runtime.GOMAXPROCS(0) } if opts.Count > 0 && int64(opts.Parallel) > opts.Count { // Don't start more workers than we need. opts.Parallel = int(opts.Count) } c, err := newCoordinator(opts) if err != nil { return err } if opts.Timeout > 0 { var cancel func() ctx, cancel = context.WithTimeout(ctx, opts.Timeout) defer cancel() } // TODO(jayconrod): do we want to support fuzzing different binaries? dir := "" // same as self binPath := os.Args[0] args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) env := os.Environ() // same as self // newWorker creates a worker but doesn't start it yet. newWorker := func() (*worker, error) { mem, err := sharedMemTempFile(workerSharedMemSize) if err != nil { return nil, err } memMu := make(chan *sharedMem, 1) memMu <- mem return &worker{ dir: dir, binPath: binPath, args: args, env: env[:len(env):len(env)], // copy on append to ensure workers don't overwrite each other. coordinator: c, memMu: memMu, }, nil } // fuzzCtx is used to stop workers, for example, after finding a crasher. fuzzCtx, cancelWorkers := context.WithCancel(ctx) defer cancelWorkers() doneC := ctx.Done() inputC := c.inputC // stop is called when a worker encounters a fatal error. var fuzzErr error stopping := false stop := func(err error) { if err == fuzzCtx.Err() || isInterruptError(err) { // Suppress cancellation errors and terminations due to SIGINT. // The messages are not helpful since either the user triggered the error // (with ^C) or another more helpful message will be printed (a crasher). err = nil } if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { fuzzErr = err } if stopping { return } stopping = true cancelWorkers() doneC = nil inputC = nil } // Start workers. errC := make(chan error) workers := make([]*worker, opts.Parallel) for i := range workers { var err error workers[i], err = newWorker() if err != nil { return err } } for i := range workers { w := workers[i] go func() { err := w.coordinate(fuzzCtx) cleanErr := w.cleanup() if err == nil { err = cleanErr } errC <- err }() } // Main event loop. // Do not return until all workers have terminated. We avoid a deadlock by // receiving messages from workers even after ctx is cancelled. activeWorkers := len(workers) input, ok := c.nextInput() if !ok { panic("no input") } statTicker := time.NewTicker(3 * time.Second) defer statTicker.Stop() defer c.logStats() crashWritten := false for { select { case <-doneC: // Interrupted, cancelled, or timed out. // stop sets doneC to nil so we don't busy wait here. stop(ctx.Err()) case result := <-c.resultC: // Received response from worker. c.updateStats(result) if c.opts.Count > 0 && c.count >= c.opts.Count { stop(nil) } if result.crasherMsg != "" { // Found a crasher. Write it to testdata and return it. if crashWritten { break } fileName, err := writeToCorpus(result.entry.Data, opts.CorpusDir) if err == nil { crashWritten = true err = &crashError{ name: filepath.Base(fileName), err: errors.New(result.crasherMsg), } } // TODO(jayconrod,katiehockman): if -keepfuzzing, report the error to // the user and restart the crashed worker. stop(err) } else if result.coverageData != nil { foundNew := c.updateCoverage(result.coverageData) if foundNew && !c.coverageOnlyRun() { // Found an interesting value that expanded coverage. // This is not a crasher, but we should add it to the // on-disk corpus, and prioritize it for future fuzzing. // TODO(jayconrod, katiehockman): Prioritize fuzzing these // values which expanded coverage, perhaps based on the // number of new edges that this result expanded. // TODO(jayconrod, katiehockman): Don't write a value that's already // in the corpus. c.interestingCount++ c.corpus.entries = append(c.corpus.entries, result.entry) if opts.CacheDir != "" { if _, err := writeToCorpus(result.entry.Data, opts.CacheDir); err != nil { stop(err) } } } else if c.coverageOnlyRun() { c.covOnlyInputs-- if c.covOnlyInputs == 0 { // The coordinator has finished getting a baseline for // coverage. Tell all of the workers to inialize their // baseline coverage data (by setting interestingCount // to 0). c.interestingCount = 0 } } } if inputC == nil && !stopping && !c.coverageOnlyRun() { // inputC was disabled earlier because we hit the limit on the number // of inputs to fuzz (nextInput returned false). // Workers can do less work than requested though, so we might be // below the limit now. Call nextInput again and re-enable inputC if so. if input, ok = c.nextInput(); ok { inputC = c.inputC } } case err := <-errC: // A worker terminated, possibly after encountering a fatal error. stop(err) activeWorkers-- if activeWorkers == 0 { return fuzzErr } case inputC <- input: // Send the next input to any worker. if c.corpusIndex == 0 && c.coverageOnlyRun() { // The coordinator is currently trying to run all of the corpus // entries to gather baseline coverage data, and all of the // inputs have been passed to inputC. Block any more inputs from // being passed to the workers for now. inputC = nil } else if input, ok = c.nextInput(); !ok { inputC = nil } case <-statTicker.C: c.logStats() } } // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, // write to the cache instead. } // crashError wraps a crasher written to the seed corpus. It saves the name // of the file where the input causing the crasher was saved. The testing // framework uses this to report a command to re-run that specific input. type crashError struct { name string err error } func (e *crashError) Error() string { return e.err.Error() } func (e *crashError) Unwrap() error { return e.err } func (e *crashError) CrashName() string { return e.name } type corpus struct { entries []CorpusEntry } // CorpusEntry represents an individual input for fuzzing. // // We must use an equivalent type in the testing and testing/internal/testdeps // packages, but testing can't import this package directly, and we don't want // to export this type from testing. Instead, we use the same struct type and // use a type alias (not a defined type) for convenience. type CorpusEntry = struct { // Name is the name of the corpus file, if the entry was loaded from the // seed corpus. It can be used with -run. For entries added with f.Add and // entries generated by the mutator, Name is empty. Name string // Data is the raw data loaded from a corpus file. Data []byte // Values is the unmarshaled values from a corpus file. Values []interface{} } type fuzzInput struct { // entry is the value to test initially. The worker will randomly mutate // values from this starting point. entry CorpusEntry // countRequested is the number of values to test. If non-zero, the worker // will stop after testing this many values, if it hasn't already stopped. countRequested int64 // coverageOnly indicates whether this input is for a coverage-only run. If // true, the input should not be fuzzed. coverageOnly bool // interestingCount reflects the coordinator's current interestingCount // value. interestingCount int64 // coverageData reflects the coordinator's current coverageData. coverageData []byte } type fuzzResult struct { // entry is an interesting value or a crasher. entry CorpusEntry // crasherMsg is an error message from a crash. It's "" if no crash was found. crasherMsg string // coverageData is set if the worker found new coverage. coverageData []byte // countRequested is the number of values the coordinator asked the worker // to test. 0 if there was no limit. countRequested int64 // count is the number of values the worker actually tested. count int64 // duration is the time the worker spent testing inputs. duration time.Duration } // coordinator holds channels that workers can use to communicate with // the coordinator. type coordinator struct { opts CoordinateFuzzingOpts // startTime is the time we started the workers after loading the corpus. // Used for logging. startTime time.Time // inputC is sent values to fuzz by the coordinator. Any worker may receive // values from this channel. inputC chan fuzzInput // resultC is sent results of fuzzing by workers. The coordinator // receives these. Multiple types of messages are allowed. resultC chan fuzzResult // count is the number of values fuzzed so far. count int64 // interestingCount is the number of unique interesting values which have // been found this execution. interestingCount int64 // covOnlyInputs is the number of entries in the corpus which still need to // be sent to a worker to gather baseline coverage data. covOnlyInputs int // duration is the time spent fuzzing inside workers, not counting time // starting up or tearing down. duration time.Duration // countWaiting is the number of values the coordinator is currently waiting // for workers to fuzz. countWaiting int64 // corpus is a set of interesting values, including the seed corpus and // generated values that workers reported as interesting. corpus corpus // corpusIndex is the next value to send to workers. // TODO(jayconrod,katiehockman): need a scheduling algorithm that chooses // which corpus value to send next (or generates something new). corpusIndex int coverageData []byte } func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { // Make sure all of the seed corpus has marshalled data. for i := range opts.Seed { if opts.Seed[i].Data == nil { opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) } } corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir) if err != nil { return nil, err } covOnlyInputs := len(corpus.entries) if len(corpus.entries) == 0 { var vals []interface{} for _, t := range opts.Types { vals = append(vals, zeroValue(t)) } corpus.entries = append(corpus.entries, CorpusEntry{Data: marshalCorpusFile(vals...), Values: vals}) } c := &coordinator{ opts: opts, startTime: time.Now(), inputC: make(chan fuzzInput), resultC: make(chan fuzzResult), corpus: corpus, covOnlyInputs: covOnlyInputs, } cov := coverageCopy() if len(cov) == 0 { fmt.Fprintf(c.opts.Log, "warning: coverage-guided fuzzing is not supported on this platform\n") c.covOnlyInputs = 0 } else { // Set c.coverageData to a clean []byte full of zeros. c.coverageData = make([]byte, len(cov)) } if c.covOnlyInputs > 0 { // Set c.interestingCount to -1 so the workers know when the coverage // run is finished and can update their local coverage data. c.interestingCount = -1 } return c, nil } func (c *coordinator) updateStats(result fuzzResult) { // Adjust total stats. c.count += result.count c.countWaiting -= result.countRequested c.duration += result.duration } func (c *coordinator) logStats() { // TODO(jayconrod,katiehockman): consider printing the amount of coverage // that has been reached so far (perhaps a percentage of edges?) elapsed := time.Since(c.startTime) if c.coverageOnlyRun() { fmt.Fprintf(c.opts.Log, "gathering baseline coverage, elapsed: %.1fs, workers: %d, left: %d\n", elapsed.Seconds(), c.opts.Parallel, c.covOnlyInputs) } else { rate := float64(c.count) / elapsed.Seconds() fmt.Fprintf(c.opts.Log, "fuzzing, elapsed: %.1fs, execs: %d (%.0f/sec), workers: %d, interesting: %d\n", elapsed.Seconds(), c.count, rate, c.opts.Parallel, c.interestingCount) } } // nextInput returns the next value that should be sent to workers. // If the number of executions is limited, the returned value includes // a limit for one worker. If there are no executions left, nextInput returns // a zero value and false. func (c *coordinator) nextInput() (fuzzInput, bool) { if c.opts.Count > 0 && c.count+c.countWaiting >= c.opts.Count { // Workers already testing all requested inputs. return fuzzInput{}, false } input := fuzzInput{ entry: c.corpus.entries[c.corpusIndex], interestingCount: c.interestingCount, coverageData: c.coverageData, } c.corpusIndex = (c.corpusIndex + 1) % (len(c.corpus.entries)) if c.coverageOnlyRun() { // This is a coverage-only run, so this input shouldn't be fuzzed, // and shouldn't be included in the count of generated values. input.coverageOnly = true return input, true } if c.opts.Count > 0 { input.countRequested = c.opts.Count / int64(c.opts.Parallel) if c.opts.Count%int64(c.opts.Parallel) > 0 { input.countRequested++ } remaining := c.opts.Count - c.count - c.countWaiting if input.countRequested > remaining { input.countRequested = remaining } c.countWaiting += input.countRequested } return input, true } func (c *coordinator) coverageOnlyRun() bool { return c.covOnlyInputs > 0 } // updateCoverage updates c.coverageData for all edges that have a higher // counter value in newCoverage. It return true if a new edge was hit. func (c *coordinator) updateCoverage(newCoverage []byte) bool { if len(newCoverage) != len(c.coverageData) { panic(fmt.Sprintf("num edges changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageData))) } newEdge := false for i := range newCoverage { if newCoverage[i] > c.coverageData[i] { if c.coverageData[i] == 0 { newEdge = true } c.coverageData[i] = newCoverage[i] } } return newEdge } // readCache creates a combined corpus from seed values and values in the cache // (in GOCACHE/fuzz). // // TODO(jayconrod,katiehockman): need a mechanism that can remove values that // aren't useful anymore, for example, because they have the wrong type. func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) { var c corpus c.entries = append(c.entries, seed...) entries, err := ReadCorpus(cacheDir, types) if err != nil { if _, ok := err.(*MalformedCorpusError); !ok { // It's okay if some files in the cache directory are malformed and // are not included in the corpus, but fail if it's an I/O error. return corpus{}, err } // TODO(jayconrod,katiehockman): consider printing some kind of warning // indicating the number of files which were skipped because they are // malformed. } c.entries = append(c.entries, entries...) return c, nil } // MalformedCorpusError is an error found while reading the corpus from the // filesystem. All of the errors are stored in the errs list. The testing // framework uses this to report malformed files in testdata. type MalformedCorpusError struct { errs []error } func (e *MalformedCorpusError) Error() string { var msgs []string for _, s := range e.errs { msgs = append(msgs, s.Error()) } return strings.Join(msgs, "\n") } // ReadCorpus reads the corpus from the provided dir. The returned corpus // entries are guaranteed to match the given types. Any malformed files will // be saved in a MalformedCorpusError and returned, along with the most recent // error. func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { files, err := ioutil.ReadDir(dir) if os.IsNotExist(err) { return nil, nil // No corpus to read } else if err != nil { return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) } var corpus []CorpusEntry var errs []error for _, file := range files { // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input // based on its name. We should only read files created by writeToCorpus. // If we read ALL files, we won't be able to change the file format by // changing the extension. We also won't be able to add files like // README.txt explaining why the directory exists. if file.IsDir() { continue } filename := filepath.Join(dir, file.Name()) data, err := ioutil.ReadFile(filename) if err != nil { return nil, fmt.Errorf("failed to read corpus file: %v", err) } var vals []interface{} vals, err = readCorpusData(data, types) if err != nil { errs = append(errs, fmt.Errorf("%q: %v", filename, err)) continue } corpus = append(corpus, CorpusEntry{Name: filename, Data: data, Values: vals}) } if len(errs) > 0 { return corpus, &MalformedCorpusError{errs: errs} } return corpus, nil } func readCorpusData(data []byte, types []reflect.Type) ([]interface{}, error) { vals, err := unmarshalCorpusFile(data) if err != nil { return nil, fmt.Errorf("unmarshal: %v", err) } if len(vals) != len(types) { return nil, fmt.Errorf("wrong number of values in corpus file: %d, want %d", len(vals), len(types)) } for i := range types { if reflect.TypeOf(vals[i]) != types[i] { return nil, fmt.Errorf("mismatched types in corpus file: %v, want %v", vals, types) } } return vals, nil } // writeToCorpus atomically writes the given bytes to a new file in testdata. // If the directory does not exist, it will create one. If the file already // exists, writeToCorpus will not rewrite it. writeToCorpus returns the // file's name, or an error if it failed. func writeToCorpus(b []byte, dir string) (name string, err error) { sum := fmt.Sprintf("%x", sha256.Sum256(b)) name = filepath.Join(dir, sum) if err := os.MkdirAll(dir, 0777); err != nil { return "", err } if err := ioutil.WriteFile(name, b, 0666); err != nil { os.Remove(name) // remove partially written file return "", err } return name, nil } func zeroValue(t reflect.Type) interface{} { for _, v := range zeroVals { if reflect.TypeOf(v) == t { return v } } panic(fmt.Sprintf("unsupported type: %v", t)) } var zeroVals []interface{} = []interface{}{ []byte(""), string(""), false, byte(0), rune(0), float32(0), float64(0), int(0), int8(0), int16(0), int32(0), int64(0), uint(0), uint8(0), uint16(0), uint32(0), uint64(0), }