internal/fuzz: centralize corpus entry addition

Adds an addCorpusEntry method to coordinator which manages checking for
duplicate entries, writing entries to the cache directory, and adding
entries to the corpus. Also moves readCache to be a method on the
coordinator.

Fixes #50606

Change-Id: Id6721384a2ad1cfb4c5471cf0cd0a7510d250a6c
Reviewed-on: https://go-review.googlesource.com/c/go/+/360394
Trust: Katie Hockman <katie@golang.org>
Reviewed-by: Katie Hockman <katie@golang.org>
Trust: Roland Shoemaker <roland@golang.org>
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Roland Shoemaker 2021-11-01 10:03:36 -07:00
parent a991d9dc27
commit b2dc66c64d
1 changed files with 44 additions and 37 deletions

View File

@ -316,32 +316,15 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
} else { } else {
// Update the coordinator's coverage mask and save the value. // Update the coordinator's coverage mask and save the value.
inputSize := len(result.entry.Data) inputSize := len(result.entry.Data)
if opts.CacheDir != "" { duplicate, err := c.addCorpusEntries(true, result.entry)
// It is possible that the input that was discovered is already if err != nil {
// present in the corpus, but the worker produced a coverage map stop(err)
// that still expanded our total coverage (this may happen due to break
// flakiness in the coverage counters). In order to prevent adding }
// duplicate entries to the corpus (and re-writing the file on if duplicate {
// disk), skip it if the on disk file already exists. continue
// TODO(roland): this check is limited in that it will only be
// applied if we are using the CacheDir. Another option would be
// to iterate through the corpus and check if it is already present,
// which would catch cases where we are not caching entries.
// A slightly faster approach would be to keep some kind of map of
// entry hashes, which would allow us to avoid iterating through
// all entries.
_, err = os.Stat(result.entry.Path)
if err == nil {
continue
}
err := writeToCorpus(&result.entry, opts.CacheDir)
if err != nil {
stop(err)
}
result.entry.Data = nil
} }
c.updateCoverage(keepCoverage) c.updateCoverage(keepCoverage)
c.corpus.entries = append(c.corpus.entries, result.entry)
c.inputQueue.enqueue(result.entry) c.inputQueue.enqueue(result.entry)
c.interestingCount++ c.interestingCount++
if shouldPrintDebugInfo() { if shouldPrintDebugInfo() {
@ -433,6 +416,28 @@ func (e *crashError) CrashPath() string {
type corpus struct { type corpus struct {
entries []CorpusEntry entries []CorpusEntry
hashes map[[sha256.Size]byte]bool
}
func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) {
for _, e := range entries {
h := sha256.Sum256(e.Data)
if c.corpus.hashes[h] {
return true, nil
}
if addToCache {
if err := writeToCorpus(&e, c.opts.CacheDir); err != nil {
return false, err
}
// For entries written to disk, we don't hold onto the bytes,
// since the corpus would consume a significant amount of
// memory.
e.Data = nil
}
c.corpus.hashes[h] = true
c.corpus.entries = append(c.corpus.entries, e)
}
return false, nil
} }
// CorpusEntry represents an individual input for fuzzing. // CorpusEntry represents an individual input for fuzzing.
@ -640,18 +645,17 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
} }
} }
corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir)
if err != nil {
return nil, err
}
c := &coordinator{ c := &coordinator{
opts: opts, opts: opts,
startTime: time.Now(), startTime: time.Now(),
inputC: make(chan fuzzInput), inputC: make(chan fuzzInput),
minimizeC: make(chan fuzzMinimizeInput), minimizeC: make(chan fuzzMinimizeInput),
resultC: make(chan fuzzResult), resultC: make(chan fuzzResult),
corpus: corpus,
timeLastLog: time.Now(), timeLastLog: time.Now(),
corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)},
}
if err := c.readCache(); err != nil {
return nil, err
} }
if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 {
for _, t := range opts.Types { for _, t := range opts.Types {
@ -691,7 +695,7 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
data := marshalCorpusFile(vals...) data := marshalCorpusFile(vals...)
h := sha256.Sum256(data) h := sha256.Sum256(data)
name := fmt.Sprintf("%x", h[:4]) name := fmt.Sprintf("%x", h[:4])
c.corpus.entries = append(c.corpus.entries, CorpusEntry{Path: name, Data: data}) c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data})
} }
return c, nil return c, nil
@ -908,22 +912,25 @@ func (c *coordinator) elapsed() time.Duration {
// //
// TODO(fuzzing): need a mechanism that can remove values that // TODO(fuzzing): need a mechanism that can remove values that
// aren't useful anymore, for example, because they have the wrong type. // aren't useful anymore, for example, because they have the wrong type.
func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) { func (c *coordinator) readCache() error {
var c corpus if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil {
c.entries = append(c.entries, seed...) return err
entries, err := ReadCorpus(cacheDir, types) }
entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types)
if err != nil { if err != nil {
if _, ok := err.(*MalformedCorpusError); !ok { if _, ok := err.(*MalformedCorpusError); !ok {
// It's okay if some files in the cache directory are malformed and // It's okay if some files in the cache directory are malformed and
// are not included in the corpus, but fail if it's an I/O error. // are not included in the corpus, but fail if it's an I/O error.
return corpus{}, err return err
} }
// TODO(jayconrod,katiehockman): consider printing some kind of warning // TODO(jayconrod,katiehockman): consider printing some kind of warning
// indicating the number of files which were skipped because they are // indicating the number of files which were skipped because they are
// malformed. // malformed.
} }
c.entries = append(c.entries, entries...) if _, err := c.addCorpusEntries(false, entries...); err != nil {
return c, nil return err
}
return nil
} }
// MalformedCorpusError is an error found while reading the corpus from the // MalformedCorpusError is an error found while reading the corpus from the