go/src/internal/fuzz/fuzz.go

678 lines
20 KiB
Go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fuzz provides common fuzzing functionality for tests built with
// "go test" and for programs that use fuzzing functionality in the testing
// package.
package fuzz
import (
"context"
"crypto/sha256"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"time"
)
// CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing.
// The zero value is valid for each field unless specified otherwise.
type CoordinateFuzzingOpts struct {
// Log is a writer for logging progress messages and warnings.
// If nil, io.Discard will be used instead.
Log io.Writer
// Timeout is the amount of wall clock time to spend fuzzing after the corpus
// has loaded. If zero, there will be no time limit.
Timeout time.Duration
// Count is the number of random values to generate and test. If zero,
// there will be no limit on the number of generated values.
Count int64
// parallel is the number of worker processes to run in parallel. If zero,
// CoordinateFuzzing will run GOMAXPROCS workers.
Parallel int
// Seed is a list of seed values added by the fuzz target with testing.F.Add
// and in testdata.
Seed []CorpusEntry
// Types is the list of types which make up a corpus entry.
// Types must be set and must match values in Seed.
Types []reflect.Type
// CorpusDir is a directory where files containing values that crash the
// code being tested may be written. CorpusDir must be set.
CorpusDir string
// CacheDir is a directory containing additional "interesting" values.
// The fuzzer may derive new values from these, and may write new values here.
CacheDir string
}
// CoordinateFuzzing creates several worker processes and communicates with
// them to test random inputs that could trigger crashes and expose bugs.
// The worker processes run the same binary in the same directory with the
// same environment variables as the coordinator process. Workers also run
// with the same arguments as the coordinator, except with the -test.fuzzworker
// flag prepended to the argument list.
//
// If a crash occurs, the function will return an error containing information
// about the crash, which can be reported to the user.
func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) {
if err := ctx.Err(); err != nil {
return err
}
if opts.Log == nil {
opts.Log = io.Discard
}
if opts.Parallel == 0 {
opts.Parallel = runtime.GOMAXPROCS(0)
}
if opts.Count > 0 && int64(opts.Parallel) > opts.Count {
// Don't start more workers than we need.
opts.Parallel = int(opts.Count)
}
c, err := newCoordinator(opts)
if err != nil {
return err
}
if opts.Timeout > 0 {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, opts.Timeout)
defer cancel()
}
// TODO(jayconrod): do we want to support fuzzing different binaries?
dir := "" // same as self
binPath := os.Args[0]
args := append([]string{"-test.fuzzworker"}, os.Args[1:]...)
env := os.Environ() // same as self
// newWorker creates a worker but doesn't start it yet.
newWorker := func() (*worker, error) {
mem, err := sharedMemTempFile(workerSharedMemSize)
if err != nil {
return nil, err
}
memMu := make(chan *sharedMem, 1)
memMu <- mem
return &worker{
dir: dir,
binPath: binPath,
args: args,
env: env[:len(env):len(env)], // copy on append to ensure workers don't overwrite each other.
coordinator: c,
memMu: memMu,
}, nil
}
// fuzzCtx is used to stop workers, for example, after finding a crasher.
fuzzCtx, cancelWorkers := context.WithCancel(ctx)
defer cancelWorkers()
doneC := ctx.Done()
inputC := c.inputC
// stop is called when a worker encounters a fatal error.
var fuzzErr error
stopping := false
stop := func(err error) {
if err == fuzzCtx.Err() || isInterruptError(err) {
// Suppress cancellation errors and terminations due to SIGINT.
// The messages are not helpful since either the user triggered the error
// (with ^C) or another more helpful message will be printed (a crasher).
err = nil
}
if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) {
fuzzErr = err
}
if stopping {
return
}
stopping = true
cancelWorkers()
doneC = nil
inputC = nil
}
// Start workers.
errC := make(chan error)
workers := make([]*worker, opts.Parallel)
for i := range workers {
var err error
workers[i], err = newWorker()
if err != nil {
return err
}
}
for i := range workers {
w := workers[i]
go func() {
err := w.coordinate(fuzzCtx)
cleanErr := w.cleanup()
if err == nil {
err = cleanErr
}
errC <- err
}()
}
// Main event loop.
// Do not return until all workers have terminated. We avoid a deadlock by
// receiving messages from workers even after ctx is cancelled.
activeWorkers := len(workers)
input, ok := c.nextInput()
if !ok {
panic("no input")
}
statTicker := time.NewTicker(3 * time.Second)
defer statTicker.Stop()
defer c.logStats()
crashWritten := false
for {
select {
case <-doneC:
// Interrupted, cancelled, or timed out.
// stop sets doneC to nil so we don't busy wait here.
stop(ctx.Err())
case result := <-c.resultC:
// Received response from worker.
c.updateStats(result)
if c.opts.Count > 0 && c.count >= c.opts.Count {
stop(nil)
}
if result.crasherMsg != "" {
// Found a crasher. Write it to testdata and return it.
if crashWritten {
break
}
fileName, err := writeToCorpus(result.entry.Data, opts.CorpusDir)
if err == nil {
crashWritten = true
err = &crashError{
name: filepath.Base(fileName),
err: errors.New(result.crasherMsg),
}
}
// TODO(jayconrod,katiehockman): if -keepfuzzing, report the error to
// the user and restart the crashed worker.
stop(err)
} else if result.coverageData != nil {
foundNew := c.updateCoverage(result.coverageData)
if foundNew && !c.coverageOnlyRun() {
// Found an interesting value that expanded coverage.
// This is not a crasher, but we should add it to the
// on-disk corpus, and prioritize it for future fuzzing.
// TODO(jayconrod, katiehockman): Prioritize fuzzing these
// values which expanded coverage, perhaps based on the
// number of new edges that this result expanded.
// TODO(jayconrod, katiehockman): Don't write a value that's already
// in the corpus.
c.interestingCount++
c.corpus.entries = append(c.corpus.entries, result.entry)
if opts.CacheDir != "" {
if _, err := writeToCorpus(result.entry.Data, opts.CacheDir); err != nil {
stop(err)
}
}
} else if c.coverageOnlyRun() {
c.covOnlyInputs--
if c.covOnlyInputs == 0 {
// The coordinator has finished getting a baseline for
// coverage. Tell all of the workers to inialize their
// baseline coverage data (by setting interestingCount
// to 0).
c.interestingCount = 0
}
}
}
if inputC == nil && !stopping && !c.coverageOnlyRun() {
// inputC was disabled earlier because we hit the limit on the number
// of inputs to fuzz (nextInput returned false).
// Workers can do less work than requested though, so we might be
// below the limit now. Call nextInput again and re-enable inputC if so.
if input, ok = c.nextInput(); ok {
inputC = c.inputC
}
}
case err := <-errC:
// A worker terminated, possibly after encountering a fatal error.
stop(err)
activeWorkers--
if activeWorkers == 0 {
return fuzzErr
}
case inputC <- input:
// Send the next input to any worker.
if c.corpusIndex == 0 && c.coverageOnlyRun() {
// The coordinator is currently trying to run all of the corpus
// entries to gather baseline coverage data, and all of the
// inputs have been passed to inputC. Block any more inputs from
// being passed to the workers for now.
inputC = nil
} else if input, ok = c.nextInput(); !ok {
inputC = nil
}
case <-statTicker.C:
c.logStats()
}
}
// TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus,
// write to the cache instead.
}
// crashError wraps a crasher written to the seed corpus. It saves the name
// of the file where the input causing the crasher was saved. The testing
// framework uses this to report a command to re-run that specific input.
type crashError struct {
name string
err error
}
func (e *crashError) Error() string {
return e.err.Error()
}
func (e *crashError) Unwrap() error {
return e.err
}
func (e *crashError) CrashName() string {
return e.name
}
type corpus struct {
entries []CorpusEntry
}
// CorpusEntry represents an individual input for fuzzing.
//
// We must use an equivalent type in the testing and testing/internal/testdeps
// packages, but testing can't import this package directly, and we don't want
// to export this type from testing. Instead, we use the same struct type and
// use a type alias (not a defined type) for convenience.
type CorpusEntry = struct {
// Name is the name of the corpus file, if the entry was loaded from the
// seed corpus. It can be used with -run. For entries added with f.Add and
// entries generated by the mutator, Name is empty.
Name string
// Data is the raw data loaded from a corpus file.
Data []byte
// Values is the unmarshaled values from a corpus file.
Values []interface{}
}
type fuzzInput struct {
// entry is the value to test initially. The worker will randomly mutate
// values from this starting point.
entry CorpusEntry
// countRequested is the number of values to test. If non-zero, the worker
// will stop after testing this many values, if it hasn't already stopped.
countRequested int64
// coverageOnly indicates whether this input is for a coverage-only run. If
// true, the input should not be fuzzed.
coverageOnly bool
// interestingCount reflects the coordinator's current interestingCount
// value.
interestingCount int64
// coverageData reflects the coordinator's current coverageData.
coverageData []byte
}
type fuzzResult struct {
// entry is an interesting value or a crasher.
entry CorpusEntry
// crasherMsg is an error message from a crash. It's "" if no crash was found.
crasherMsg string
// coverageData is set if the worker found new coverage.
coverageData []byte
// countRequested is the number of values the coordinator asked the worker
// to test. 0 if there was no limit.
countRequested int64
// count is the number of values the worker actually tested.
count int64
// duration is the time the worker spent testing inputs.
duration time.Duration
}
// coordinator holds channels that workers can use to communicate with
// the coordinator.
type coordinator struct {
opts CoordinateFuzzingOpts
// startTime is the time we started the workers after loading the corpus.
// Used for logging.
startTime time.Time
// inputC is sent values to fuzz by the coordinator. Any worker may receive
// values from this channel.
inputC chan fuzzInput
// resultC is sent results of fuzzing by workers. The coordinator
// receives these. Multiple types of messages are allowed.
resultC chan fuzzResult
// count is the number of values fuzzed so far.
count int64
// interestingCount is the number of unique interesting values which have
// been found this execution.
interestingCount int64
// covOnlyInputs is the number of entries in the corpus which still need to
// be sent to a worker to gather baseline coverage data.
covOnlyInputs int
// duration is the time spent fuzzing inside workers, not counting time
// starting up or tearing down.
duration time.Duration
// countWaiting is the number of values the coordinator is currently waiting
// for workers to fuzz.
countWaiting int64
// corpus is a set of interesting values, including the seed corpus and
// generated values that workers reported as interesting.
corpus corpus
// corpusIndex is the next value to send to workers.
// TODO(jayconrod,katiehockman): need a scheduling algorithm that chooses
// which corpus value to send next (or generates something new).
corpusIndex int
coverageData []byte
}
func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
// Make sure all of the seed corpus has marshalled data.
for i := range opts.Seed {
if opts.Seed[i].Data == nil {
opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
}
}
corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir)
if err != nil {
return nil, err
}
covOnlyInputs := len(corpus.entries)
if len(corpus.entries) == 0 {
var vals []interface{}
for _, t := range opts.Types {
vals = append(vals, zeroValue(t))
}
corpus.entries = append(corpus.entries, CorpusEntry{Data: marshalCorpusFile(vals...), Values: vals})
}
c := &coordinator{
opts: opts,
startTime: time.Now(),
inputC: make(chan fuzzInput),
resultC: make(chan fuzzResult),
corpus: corpus,
covOnlyInputs: covOnlyInputs,
}
cov := coverageCopy()
if len(cov) == 0 {
fmt.Fprintf(c.opts.Log, "warning: coverage-guided fuzzing is not supported on this platform\n")
c.covOnlyInputs = 0
} else {
// Set c.coverageData to a clean []byte full of zeros.
c.coverageData = make([]byte, len(cov))
}
if c.covOnlyInputs > 0 {
// Set c.interestingCount to -1 so the workers know when the coverage
// run is finished and can update their local coverage data.
c.interestingCount = -1
}
return c, nil
}
func (c *coordinator) updateStats(result fuzzResult) {
// Adjust total stats.
c.count += result.count
c.countWaiting -= result.countRequested
c.duration += result.duration
}
func (c *coordinator) logStats() {
// TODO(jayconrod,katiehockman): consider printing the amount of coverage
// that has been reached so far (perhaps a percentage of edges?)
elapsed := time.Since(c.startTime)
if c.coverageOnlyRun() {
fmt.Fprintf(c.opts.Log, "gathering baseline coverage, elapsed: %.1fs, workers: %d, left: %d\n", elapsed.Seconds(), c.opts.Parallel, c.covOnlyInputs)
} else {
rate := float64(c.count) / elapsed.Seconds()
fmt.Fprintf(c.opts.Log, "fuzzing, elapsed: %.1fs, execs: %d (%.0f/sec), workers: %d, interesting: %d\n", elapsed.Seconds(), c.count, rate, c.opts.Parallel, c.interestingCount)
}
}
// nextInput returns the next value that should be sent to workers.
// If the number of executions is limited, the returned value includes
// a limit for one worker. If there are no executions left, nextInput returns
// a zero value and false.
func (c *coordinator) nextInput() (fuzzInput, bool) {
if c.opts.Count > 0 && c.count+c.countWaiting >= c.opts.Count {
// Workers already testing all requested inputs.
return fuzzInput{}, false
}
input := fuzzInput{
entry: c.corpus.entries[c.corpusIndex],
interestingCount: c.interestingCount,
coverageData: c.coverageData,
}
c.corpusIndex = (c.corpusIndex + 1) % (len(c.corpus.entries))
if c.coverageOnlyRun() {
// This is a coverage-only run, so this input shouldn't be fuzzed,
// and shouldn't be included in the count of generated values.
input.coverageOnly = true
return input, true
}
if c.opts.Count > 0 {
input.countRequested = c.opts.Count / int64(c.opts.Parallel)
if c.opts.Count%int64(c.opts.Parallel) > 0 {
input.countRequested++
}
remaining := c.opts.Count - c.count - c.countWaiting
if input.countRequested > remaining {
input.countRequested = remaining
}
c.countWaiting += input.countRequested
}
return input, true
}
func (c *coordinator) coverageOnlyRun() bool {
return c.covOnlyInputs > 0
}
// updateCoverage updates c.coverageData for all edges that have a higher
// counter value in newCoverage. It return true if a new edge was hit.
func (c *coordinator) updateCoverage(newCoverage []byte) bool {
if len(newCoverage) != len(c.coverageData) {
panic(fmt.Sprintf("num edges changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageData)))
}
newEdge := false
for i := range newCoverage {
if newCoverage[i] > c.coverageData[i] {
if c.coverageData[i] == 0 {
newEdge = true
}
c.coverageData[i] = newCoverage[i]
}
}
return newEdge
}
// readCache creates a combined corpus from seed values and values in the cache
// (in GOCACHE/fuzz).
//
// TODO(jayconrod,katiehockman): need a mechanism that can remove values that
// aren't useful anymore, for example, because they have the wrong type.
func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) {
var c corpus
c.entries = append(c.entries, seed...)
entries, err := ReadCorpus(cacheDir, types)
if err != nil {
if _, ok := err.(*MalformedCorpusError); !ok {
// It's okay if some files in the cache directory are malformed and
// are not included in the corpus, but fail if it's an I/O error.
return corpus{}, err
}
// TODO(jayconrod,katiehockman): consider printing some kind of warning
// indicating the number of files which were skipped because they are
// malformed.
}
c.entries = append(c.entries, entries...)
return c, nil
}
// MalformedCorpusError is an error found while reading the corpus from the
// filesystem. All of the errors are stored in the errs list. The testing
// framework uses this to report malformed files in testdata.
type MalformedCorpusError struct {
errs []error
}
func (e *MalformedCorpusError) Error() string {
var msgs []string
for _, s := range e.errs {
msgs = append(msgs, s.Error())
}
return strings.Join(msgs, "\n")
}
// ReadCorpus reads the corpus from the provided dir. The returned corpus
// entries are guaranteed to match the given types. Any malformed files will
// be saved in a MalformedCorpusError and returned, along with the most recent
// error.
func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
files, err := ioutil.ReadDir(dir)
if os.IsNotExist(err) {
return nil, nil // No corpus to read
} else if err != nil {
return nil, fmt.Errorf("reading seed corpus from testdata: %v", err)
}
var corpus []CorpusEntry
var errs []error
for _, file := range files {
// TODO(jayconrod,katiehockman): determine when a file is a fuzzing input
// based on its name. We should only read files created by writeToCorpus.
// If we read ALL files, we won't be able to change the file format by
// changing the extension. We also won't be able to add files like
// README.txt explaining why the directory exists.
if file.IsDir() {
continue
}
filename := filepath.Join(dir, file.Name())
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("failed to read corpus file: %v", err)
}
var vals []interface{}
vals, err = readCorpusData(data, types)
if err != nil {
errs = append(errs, fmt.Errorf("%q: %v", filename, err))
continue
}
corpus = append(corpus, CorpusEntry{Name: filename, Data: data, Values: vals})
}
if len(errs) > 0 {
return corpus, &MalformedCorpusError{errs: errs}
}
return corpus, nil
}
func readCorpusData(data []byte, types []reflect.Type) ([]interface{}, error) {
vals, err := unmarshalCorpusFile(data)
if err != nil {
return nil, fmt.Errorf("unmarshal: %v", err)
}
if len(vals) != len(types) {
return nil, fmt.Errorf("wrong number of values in corpus file: %d, want %d", len(vals), len(types))
}
for i := range types {
if reflect.TypeOf(vals[i]) != types[i] {
return nil, fmt.Errorf("mismatched types in corpus file: %v, want %v", vals, types)
}
}
return vals, nil
}
// writeToCorpus atomically writes the given bytes to a new file in testdata.
// If the directory does not exist, it will create one. If the file already
// exists, writeToCorpus will not rewrite it. writeToCorpus returns the
// file's name, or an error if it failed.
func writeToCorpus(b []byte, dir string) (name string, err error) {
sum := fmt.Sprintf("%x", sha256.Sum256(b))
name = filepath.Join(dir, sum)
if err := os.MkdirAll(dir, 0777); err != nil {
return "", err
}
if err := ioutil.WriteFile(name, b, 0666); err != nil {
os.Remove(name) // remove partially written file
return "", err
}
return name, nil
}
func zeroValue(t reflect.Type) interface{} {
for _, v := range zeroVals {
if reflect.TypeOf(v) == t {
return v
}
}
panic(fmt.Sprintf("unsupported type: %v", t))
}
var zeroVals []interface{} = []interface{}{
[]byte(""),
string(""),
false,
byte(0),
rune(0),
float32(0),
float64(0),
int(0),
int8(0),
int16(0),
int32(0),
int64(0),
uint(0),
uint8(0),
uint16(0),
uint32(0),
uint64(0),
}