[dev.fuzz] internal/fuzz: implement a more robust mutator

This change also allocates a larger capacity (100 MB) for the shared
memory at the start, rather than beginning as small as possible and
immediately needing to grow while mutating. This means that 100 MB is
the maximum size of a corpus entry currently, since growing the shared
memory is not yet supported.

The code in internal/fuzz/mutator.go and internal/fuzz/pcg.go are copied
from, or heavily inspired by, code originally authored by Dmitry Vyukov
and Josh Bleecher Snyder as part of the go-fuzz project. Thanks to them
for their contributions. See https://github.com/dvyukov/go-fuzz.

Change-Id: I0d51d53976e23933072e760ff78e6c4ad9dcd862
Reviewed-on: https://go-review.googlesource.com/c/go/+/281972
Run-TryBot: Katie Hockman <katie@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Jay Conrod <jayconrod@google.com>
Trust: Katie Hockman <katie@golang.org>
This commit is contained in:
Katie Hockman 2021-01-05 16:01:49 -05:00
parent a1646595e6
commit 2f072cf8a9
7 changed files with 456 additions and 36 deletions

View File

@ -13,23 +13,23 @@ go test -parallel=1
# Running the fuzzer should find a crashing input quickly.
! go test -fuzz=FuzzWithBug -fuzztime=5s -parallel=1
stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603'
stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]'
stdout 'this input caused a crash!'
grep '\Aab\z' testdata/corpus/FuzzWithBug/fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603
go run check_testdata.go FuzzWithBug
# Now, the failing bytes should have been added to the seed corpus for
# the target, and should fail when run without fuzzing.
! go test -parallel=1
! go test -run=FuzzWithNilPanic -fuzz=FuzzWithNilPanic -fuzztime=5s -parallel=1
stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1'
stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]'
stdout 'runtime.Goexit'
grep '\Aac\z' testdata/corpus/FuzzWithNilPanic/f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1
go run check_testdata.go FuzzWithNilPanic
! go test -run=FuzzWithBadExit -fuzz=FuzzWithBadExit -fuzztime=5s -parallel=1
stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67'
stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]'
stdout 'unexpectedly'
grep '\Aad\z' testdata/corpus/FuzzWithBadExit/70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67
go run check_testdata.go FuzzWithBadExit
-- go.mod --
module m
@ -39,7 +39,6 @@ go 1.16
package fuzz_crash
import (
"bytes"
"os"
"testing"
)
@ -47,7 +46,7 @@ import (
func FuzzWithBug(f *testing.F) {
f.Add([]byte("aa"))
f.Fuzz(func(t *testing.T, b []byte) {
if bytes.Equal(b, []byte("ab")) {
if string(b) != "aa" {
panic("this input caused a crash!")
}
})
@ -56,7 +55,7 @@ func FuzzWithBug(f *testing.F) {
func FuzzWithNilPanic(f *testing.F) {
f.Add([]byte("aa"))
f.Fuzz(func(t *testing.T, b []byte) {
if bytes.Equal(b, []byte("ac")) {
if string(b) != "aa" {
panic(nil)
}
})
@ -65,8 +64,55 @@ func FuzzWithNilPanic(f *testing.F) {
func FuzzWithBadExit(f *testing.F) {
f.Add([]byte("aa"))
f.Fuzz(func(t *testing.T, b []byte) {
if bytes.Equal(b, []byte("ad")) {
if string(b) != "aa" {
os.Exit(1)
}
})
}
-- check_testdata.go --
// +build ignore
package main
import (
"bytes"
"crypto/sha256"
"fmt"
"io/ioutil"
"os"
"path/filepath"
)
func main() {
target := os.Args[1]
dir := filepath.Join("testdata/corpus", target)
files, err := ioutil.ReadDir(dir)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
if len(files) != 1 {
fmt.Fprintln(os.Stderr, fmt.Errorf("expect only one new mutation to be written to testdata", len(files)))
os.Exit(1)
}
fname := files[0].Name()
contents, err := ioutil.ReadFile(filepath.Join(dir, fname))
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
if bytes.Equal(contents, []byte("aa")) {
fmt.Fprintln(os.Stderr, fmt.Errorf("newly written testdata entry was not mutated"))
os.Exit(1)
}
// The hash of the bytes in the file should match the filename.
h := []byte(fmt.Sprintf("%x", sha256.Sum256(contents)))
if !bytes.Equal([]byte(fname), h) {
fmt.Fprintln(os.Stderr, fmt.Errorf("hash of bytes %q does not match filename %q", h, fname))
os.Exit(1)
}
}

View File

@ -10,6 +10,12 @@
go test -fuzz=FuzzA -fuzztime=5s -parallel=1 -log=fuzz
go run check_logs.go fuzz fuzz.worker
# Test that the mutator is good enough to find several unique mutations.
! go test -v -fuzz=Fuzz -parallel=1 -fuzztime=30s mutator_test.go
! stdout ok
stdout FAIL
stdout 'mutator found enough edge cases'
-- go.mod --
module m
@ -143,7 +149,7 @@ func checkWorkerLog(r io.Reader) error {
sawAMutant = true
}
}
if err := scan.Err(); err != nil {
if err := scan.Err(); err != nil && err != bufio.ErrTooLong {
return err
}
if !sawAMutant {
@ -151,3 +157,40 @@ func checkWorkerLog(r io.Reader) error {
}
return nil
}
-- mutator_test.go --
package fuzz_test
import (
"strings"
"testing"
)
// TODO(katiehockman): re-work this test once we have a better fuzzing engine
// (ie. more mutations, and compiler instrumentation)
func Fuzz(f *testing.F) {
// TODO(katiehockman): simplify this once we can dedupe crashes (e.g.
// replace map with calls to panic, and simply count the number of crashes
// that were added to testdata)
crashes := make(map[string]bool)
// No seed corpus initiated
f.Fuzz(func(t *testing.T, b []byte) {
if len(crashes) >= 150 {
panic("mutator found enough edge cases")
}
if len(b) < 5 {
return // continue
}
for i := 0; i < 256; i++ {
s := string(byte(i))
if strings.HasPrefix(string(b), s) {
crashes["pre-" + s] = true
}
if strings.HasSuffix(string(b), s) {
crashes["suffix-" + s] = true
}
}
})
}

View File

@ -47,20 +47,13 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD
parallel = runtime.GOMAXPROCS(0)
}
sharedMemSize := 100 << 20 // 100 MB
corpus, err := readCorpusAndCache(seed, corpusDir, cacheDir)
if err != nil {
return err
}
var maxEntryLen int
if len(corpus.entries) == 0 {
corpus.entries = []corpusEntry{{b: []byte{}}}
maxEntryLen = 0
} else {
for _, e := range corpus.entries {
if len(e.b) > maxEntryLen {
maxEntryLen = len(e.b)
}
}
}
// TODO(jayconrod): do we want to support fuzzing different binaries?
@ -78,7 +71,7 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD
}
newWorker := func() (*worker, error) {
mem, err := sharedMemTempFile(maxEntryLen)
mem, err := sharedMemTempFile(sharedMemSize)
if err != nil {
return nil, err
}

View File

@ -47,10 +47,9 @@ func sharedMemSize(valueSize int) int {
return int(unsafe.Sizeof(sharedMemHeader{})) + valueSize
}
// sharedMemTempFile creates a new temporary file large enough to hold a value
// of the given size, then maps it into memory. The file will be removed when
// the Close method is called.
func sharedMemTempFile(valueSize int) (m *sharedMem, err error) {
// sharedMemTempFile creates a new temporary file of the given size, then maps
// it into memory. The file will be removed when the Close method is called.
func sharedMemTempFile(size int) (m *sharedMem, err error) {
// Create a temporary file.
f, err := ioutil.TempFile("", "fuzz-*")
if err != nil {
@ -64,7 +63,7 @@ func sharedMemTempFile(valueSize int) (m *sharedMem, err error) {
}()
// Resize it to the correct size.
totalSize := sharedMemSize(valueSize)
totalSize := sharedMemSize(size)
if err := f.Truncate(int64(totalSize)); err != nil {
return nil, err
}

View File

@ -4,14 +4,244 @@
package fuzz
import "math/rand"
import (
"encoding/binary"
"reflect"
"unsafe"
)
func mutate(b []byte) {
if len(b) == 0 {
return
}
// Mutate a byte in a random position.
pos := rand.Intn(len(b))
b[pos] = byte(rand.Intn(256))
type mutator struct {
r *pcgRand
}
func newMutator() *mutator {
return &mutator{r: newPcgRand()}
}
func (m *mutator) rand(n int) int {
return m.r.intn(n)
}
func (m *mutator) randByteOrder() binary.ByteOrder {
if m.r.bool() {
return binary.LittleEndian
}
return binary.BigEndian
}
// chooseLen chooses length of range mutation in range [0,n]. It gives
// preference to shorter ranges.
func (m *mutator) chooseLen(n int) int {
switch x := m.rand(100); {
case x < 90:
return m.rand(min(8, n)) + 1
case x < 99:
return m.rand(min(32, n)) + 1
default:
return m.rand(n) + 1
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// mutate performs several mutations directly onto the provided byte slice.
func (m *mutator) mutate(ptrB *[]byte) {
// TODO(jayconrod,katiehockman): make this use zero allocations
// TODO(katiehockman): pull some of these functions into helper methods
// and test that each case is working as expected.
// TODO(katiehockman): perform more types of mutations.
b := *ptrB
defer func() {
oldHdr := (*reflect.SliceHeader)(unsafe.Pointer(ptrB))
newHdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
if oldHdr.Data != newHdr.Data {
panic("data moved to new address")
}
*ptrB = b
}()
numIters := 1 + m.r.exp2()
for iter := 0; iter < numIters; iter++ {
switch m.rand(10) {
case 0:
// Remove a range of bytes.
if len(b) <= 1 {
iter--
continue
}
pos0 := m.rand(len(b))
pos1 := pos0 + m.chooseLen(len(b)-pos0)
copy(b[pos0:], b[pos1:])
b = b[:len(b)-(pos1-pos0)]
case 1:
// Insert a range of random bytes.
pos := m.rand(len(b) + 1)
n := m.chooseLen(10)
if len(b)+n >= cap(b) {
iter--
continue
}
b = b[:len(b)+n]
copy(b[pos+n:], b[pos:])
for i := 0; i < n; i++ {
b[pos+i] = byte(m.rand(256))
}
case 2:
// Duplicate a range of bytes.
if len(b) <= 1 {
iter--
continue
}
src := m.rand(len(b))
dst := m.rand(len(b))
for dst == src {
dst = m.rand(len(b))
}
n := m.chooseLen(len(b) - src)
tmp := make([]byte, n)
copy(tmp, b[src:])
b = b[:len(b)+n]
copy(b[dst+n:], b[dst:])
copy(b[dst:], tmp)
case 3:
// Copy a range of bytes.
if len(b) <= 1 {
iter--
continue
}
src := m.rand(len(b))
dst := m.rand(len(b))
for dst == src {
dst = m.rand(len(b))
}
n := m.chooseLen(len(b) - src)
copy(b[dst:], b[src:src+n])
case 4:
// Bit flip.
if len(b) == 0 {
iter--
continue
}
pos := m.rand(len(b))
b[pos] ^= 1 << uint(m.rand(8))
case 5:
// Set a byte to a random value.
if len(b) == 0 {
iter--
continue
}
pos := m.rand(len(b))
b[pos] = byte(m.rand(256))
case 6:
// Swap 2 bytes.
if len(b) <= 1 {
iter--
continue
}
src := m.rand(len(b))
dst := m.rand(len(b))
for dst == src {
dst = m.rand(len(b))
}
b[src], b[dst] = b[dst], b[src]
case 7:
// Add/subtract from a byte.
if len(b) == 0 {
iter--
continue
}
pos := m.rand(len(b))
v := byte(m.rand(35) + 1)
if m.r.bool() {
b[pos] += v
} else {
b[pos] -= v
}
case 8:
// Add/subtract from a uint16.
if len(b) < 2 {
iter--
continue
}
v := uint16(m.rand(35) + 1)
if m.r.bool() {
v = 0 - v
}
pos := m.rand(len(b) - 1)
enc := m.randByteOrder()
enc.PutUint16(b[pos:], enc.Uint16(b[pos:])+v)
case 9:
// Add/subtract from a uint32.
if len(b) < 4 {
iter--
continue
}
v := uint32(m.rand(35) + 1)
if m.r.bool() {
v = 0 - v
}
pos := m.rand(len(b) - 3)
enc := m.randByteOrder()
enc.PutUint32(b[pos:], enc.Uint32(b[pos:])+v)
case 10:
// Add/subtract from a uint64.
if len(b) < 8 {
iter--
continue
}
v := uint64(m.rand(35) + 1)
if m.r.bool() {
v = 0 - v
}
pos := m.rand(len(b) - 7)
enc := m.randByteOrder()
enc.PutUint64(b[pos:], enc.Uint64(b[pos:])+v)
case 11:
// Replace a byte with an interesting value.
if len(b) == 0 {
iter--
continue
}
pos := m.rand(len(b))
b[pos] = byte(interesting8[m.rand(len(interesting8))])
case 12:
// Replace a uint16 with an interesting value.
if len(b) < 2 {
iter--
continue
}
pos := m.rand(len(b) - 1)
v := uint16(interesting16[m.rand(len(interesting16))])
m.randByteOrder().PutUint16(b[pos:], v)
case 13:
// Replace a uint32 with an interesting value.
if len(b) < 4 {
iter--
continue
}
pos := m.rand(len(b) - 3)
v := uint32(interesting32[m.rand(len(interesting32))])
m.randByteOrder().PutUint32(b[pos:], v)
}
}
}
var (
interesting8 = []int8{-128, -1, 0, 1, 16, 32, 64, 100, 127}
interesting16 = []int16{-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767}
interesting32 = []int32{-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647}
)
func init() {
for _, v := range interesting8 {
interesting16 = append(interesting16, int16(v))
}
for _, v := range interesting16 {
interesting32 = append(interesting32, int32(v))
}
}

104
src/internal/fuzz/pcg.go Normal file
View File

@ -0,0 +1,104 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fuzz
import (
"math/bits"
"sync/atomic"
"time"
)
// The functions in pcg implement a 32 bit PRNG with a 64 bit period: pcg xsh rr
// 64 32. See https://www.pcg-random.org/ for more information. This
// implementation is geared specifically towards the needs of fuzzing: Simple
// creation and use, no reproducibility, no concurrency safety, just the
// necessary methods, optimized for speed.
var globalInc uint64 // PCG stream
const multiplier uint64 = 6364136223846793005
// pcgRand is a PRNG. It should not be copied or shared. No Rand methods are
// concurrency safe.
type pcgRand struct {
noCopy noCopy // help avoid mistakes: ask vet to ensure that we don't make a copy
state uint64
inc uint64
}
// newPcgRand generates a new, seeded Rand, ready for use.
func newPcgRand() *pcgRand {
r := new(pcgRand)
now := uint64(time.Now().UnixNano())
inc := atomic.AddUint64(&globalInc, 1)
r.state = now
r.inc = (inc << 1) | 1
r.step()
r.state += now
r.step()
return r
}
func (r *pcgRand) step() {
r.state *= multiplier
r.state += r.inc
}
// uint32 returns a pseudo-random uint32.
func (r *pcgRand) uint32() uint32 {
x := r.state
r.step()
return bits.RotateLeft32(uint32(((x>>18)^x)>>27), -int(x>>59))
}
// intn returns a pseudo-random number in [0, n).
// n must fit in a uint32.
func (r *pcgRand) intn(n int) int {
if int(uint32(n)) != n {
panic("large Intn")
}
return int(r.uint32n(uint32(n)))
}
// uint32n returns a pseudo-random number in [0, n).
//
// For implementation details, see:
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
// https://lemire.me/blog/2016/06/30/fast-random-shuffling
func (r *pcgRand) uint32n(n uint32) uint32 {
v := r.uint32()
prod := uint64(v) * uint64(n)
low := uint32(prod)
if low < n {
thresh := uint32(-int32(n)) % n
for low < thresh {
v = r.uint32()
prod = uint64(v) * uint64(n)
low = uint32(prod)
}
}
return uint32(prod >> 32)
}
// exp2 generates n with probability 1/2^(n+1).
func (r *pcgRand) exp2() int {
return bits.TrailingZeros32(r.uint32())
}
// bool generates a random bool.
func (r *pcgRand) bool() bool {
return r.uint32()&1 == 0
}
// noCopy may be embedded into structs which must not be copied
// after the first use.
//
// See https://golang.org/issues/8005#issuecomment-190753527
// for details.
type noCopy struct{}
// lock is a no-op used by -copylocks checker from `go vet`.
func (*noCopy) lock() {}
func (*noCopy) unlock() {}

View File

@ -318,7 +318,7 @@ func RunFuzzWorker(ctx context.Context, fn func([]byte) error) error {
if err != nil {
return err
}
srv := &workerServer{workerComm: comm, fuzzFn: fn}
srv := &workerServer{workerComm: comm, fuzzFn: fn, m: newMutator()}
return srv.serve(ctx)
}
@ -366,6 +366,7 @@ type workerComm struct {
// memory after a worker process terminates unexpectedly.
type workerServer struct {
workerComm
m *mutator
// fuzzFn runs the worker's fuzz function on the given input and returns
// an error if it finds a crasher (the process may also exit or crash).
@ -441,7 +442,11 @@ func (ws *workerServer) fuzz(ctx context.Context, args fuzzArgs) fuzzResponse {
// real heuristic once we have one.
return fuzzResponse{Interesting: true}
default:
mutate(ws.mem.valueRef())
b := ws.mem.valueRef()
ws.m.mutate(&b)
// TODO(jayconrod): consider making ws.m.header() contain the whole
// slice header, so the length can be updated when the slice changes
ws.mem.header().length = len(b)
if err := ws.fuzzFn(ws.mem.valueRef()); err != nil {
return fuzzResponse{Err: err.Error()}
}