[dev.fuzz] internal/fuzz: implement a more robust mutator

This change also allocates a larger capacity (100 MB) for the shared memory at the start, rather than beginning as small as possible and immediately needing to grow while mutating. This means that 100 MB is the maximum size of a corpus entry currently, since growing the shared memory is not yet supported. The code in internal/fuzz/mutator.go and internal/fuzz/pcg.go are copied from, or heavily inspired by, code originally authored by Dmitry Vyukov and Josh Bleecher Snyder as part of the go-fuzz project. Thanks to them for their contributions. See https://github.com/dvyukov/go-fuzz. Change-Id: I0d51d53976e23933072e760ff78e6c4ad9dcd862 Reviewed-on: https://go-review.googlesource.com/c/go/+/281972 Run-TryBot: Katie Hockman <katie@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Jay Conrod <jayconrod@google.com> Trust: Katie Hockman <katie@golang.org>
2021-01-05 16:01:49 -05:00 · 2021-01-05 16:01:49 -05:00 · 2f072cf8a9
parent a1646595e6
commit 2f072cf8a9
7 changed files with 456 additions and 36 deletions
--- a/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt
+++ b/src/cmd/go/testdata/script/test_fuzz_mutate_crash.txt
@ -13,23 +13,23 @@ go test -parallel=1

 # Running the fuzzer should find a crashing input quickly.
 ! go test -fuzz=FuzzWithBug -fuzztime=5s -parallel=1
-stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603'
+stdout 'testdata[/\\]corpus[/\\]FuzzWithBug[/\\]'
 stdout 'this input caused a crash!'
-grep '\Aab\z' testdata/corpus/FuzzWithBug/fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603
+go run check_testdata.go FuzzWithBug

 # Now, the failing bytes should have been added to the seed corpus for
 # the target, and should fail when run without fuzzing.
 ! go test -parallel=1

 ! go test -run=FuzzWithNilPanic -fuzz=FuzzWithNilPanic -fuzztime=5s -parallel=1
-stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1'
+stdout 'testdata[/\\]corpus[/\\]FuzzWithNilPanic[/\\]'
 stdout 'runtime.Goexit'
-grep '\Aac\z' testdata/corpus/FuzzWithNilPanic/f45de51cdef30991551e41e882dd7b5404799648a0a00753f44fc966e6153fc1
+go run check_testdata.go FuzzWithNilPanic

 ! go test -run=FuzzWithBadExit -fuzz=FuzzWithBadExit -fuzztime=5s -parallel=1
-stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67'
+stdout 'testdata[/\\]corpus[/\\]FuzzWithBadExit[/\\]'
 stdout 'unexpectedly'
-grep '\Aad\z' testdata/corpus/FuzzWithBadExit/70ba33708cbfb103f1a8e34afef333ba7dc021022b2d9aaa583aabb8058d8d67
+go run check_testdata.go FuzzWithBadExit

 -- go.mod --
 module m
@ -39,7 +39,6 @@ go 1.16
 package fuzz_crash

 import (
-	"bytes"
    "os"
 	"testing"
 )
@ -47,7 +46,7 @@ import (
 func FuzzWithBug(f *testing.F) {
 	f.Add([]byte("aa"))
 	f.Fuzz(func(t *testing.T, b []byte) {
-		if bytes.Equal(b, []byte("ab")) {
+		if string(b) != "aa" {
 			panic("this input caused a crash!")
 		}
 	})
@ -56,7 +55,7 @@ func FuzzWithBug(f *testing.F) {
 func FuzzWithNilPanic(f *testing.F) {
 	f.Add([]byte("aa"))
 	f.Fuzz(func(t *testing.T, b []byte) {
-		if bytes.Equal(b, []byte("ac")) {
+		if string(b) != "aa" {
 			panic(nil)
 		}
 	})
@ -65,8 +64,55 @@ func FuzzWithNilPanic(f *testing.F) {
 func FuzzWithBadExit(f *testing.F) {
 	f.Add([]byte("aa"))
 	f.Fuzz(func(t *testing.T, b []byte) {
-		if bytes.Equal(b, []byte("ad")) {
+		if string(b) != "aa" {
 			os.Exit(1)
 		}
 	})
+}
+
+-- check_testdata.go --
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+)
+
+func main() {
+	target := os.Args[1]
+	dir := filepath.Join("testdata/corpus", target)
+
+	files, err := ioutil.ReadDir(dir)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	if len(files) != 1 {
+		fmt.Fprintln(os.Stderr, fmt.Errorf("expect only one new mutation to be written to testdata", len(files)))
+		os.Exit(1)
+	}
+
+	fname := files[0].Name()
+	contents, err := ioutil.ReadFile(filepath.Join(dir, fname))
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+	if bytes.Equal(contents, []byte("aa")) {
+		fmt.Fprintln(os.Stderr, fmt.Errorf("newly written testdata entry was not mutated"))
+		os.Exit(1)
+	}
+	// The hash of the bytes in the file should match the filename.
+	h := []byte(fmt.Sprintf("%x", sha256.Sum256(contents)))
+	if !bytes.Equal([]byte(fname), h) {
+		fmt.Fprintln(os.Stderr, fmt.Errorf("hash of bytes %q does not match filename %q", h, fname))
+		os.Exit(1)
+	}
 }
--- a/src/cmd/go/testdata/script/test_fuzz_mutator.txt
+++ b/src/cmd/go/testdata/script/test_fuzz_mutator.txt
@ -10,6 +10,12 @@
 go test -fuzz=FuzzA -fuzztime=5s -parallel=1 -log=fuzz
 go run check_logs.go fuzz fuzz.worker

+# Test that the mutator is good enough to find several unique mutations.
+! go test -v -fuzz=Fuzz -parallel=1 -fuzztime=30s mutator_test.go
+! stdout ok
+stdout FAIL
+stdout 'mutator found enough edge cases'
+
 -- go.mod --
 module m

@ -143,7 +149,7 @@ func checkWorkerLog(r io.Reader) error {
 			sawAMutant = true
 		}
 	}
-	if err := scan.Err(); err != nil {
+	if err := scan.Err(); err != nil && err != bufio.ErrTooLong {
 		return err
 	}
 	if !sawAMutant {
@ -151,3 +157,40 @@ func checkWorkerLog(r io.Reader) error {
 	}
 	return nil
 }
+
+-- mutator_test.go --
+package fuzz_test
+
+import (
+	"strings"
+	"testing"
+)
+
+// TODO(katiehockman): re-work this test once we have a better fuzzing engine
+// (ie. more mutations, and compiler instrumentation)
+func Fuzz(f *testing.F) {
+	// TODO(katiehockman): simplify this once we can dedupe crashes (e.g.
+	// replace map with calls to panic, and simply count the number of crashes
+	// that were added to testdata)
+	crashes := make(map[string]bool)
+	// No seed corpus initiated
+	f.Fuzz(func(t *testing.T, b []byte) {
+		if len(crashes) >= 150 {
+			panic("mutator found enough edge cases")
+		}
+
+		if len(b) < 5 {
+			return // continue
+		}
+
+		for i := 0; i < 256; i++ {
+			s := string(byte(i))
+			if strings.HasPrefix(string(b), s) {
+				crashes["pre-" + s] = true
+			}
+			if strings.HasSuffix(string(b), s) {
+				crashes["suffix-" + s] = true
+			}
+		}
+	})
+}
--- a/src/internal/fuzz/fuzz.go
+++ b/src/internal/fuzz/fuzz.go
@ -47,20 +47,13 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD
 		parallel = runtime.GOMAXPROCS(0)
 	}

+	sharedMemSize := 100 << 20 // 100 MB
 	corpus, err := readCorpusAndCache(seed, corpusDir, cacheDir)
 	if err != nil {
 		return err
 	}
-	var maxEntryLen int
 	if len(corpus.entries) == 0 {
 		corpus.entries = []corpusEntry{{b: []byte{}}}
-		maxEntryLen = 0
-	} else {
-		for _, e := range corpus.entries {
-			if len(e.b) > maxEntryLen {
-				maxEntryLen = len(e.b)
-			}
-		}
 	}

 	// TODO(jayconrod): do we want to support fuzzing different binaries?
@ -78,7 +71,7 @@ func CoordinateFuzzing(ctx context.Context, parallel int, seed [][]byte, corpusD
 	}

 	newWorker := func() (*worker, error) {
-		mem, err := sharedMemTempFile(maxEntryLen)
+		mem, err := sharedMemTempFile(sharedMemSize)
 		if err != nil {
 			return nil, err
 		}
--- a/src/internal/fuzz/mem.go
+++ b/src/internal/fuzz/mem.go
@ -47,10 +47,9 @@ func sharedMemSize(valueSize int) int {
 	return int(unsafe.Sizeof(sharedMemHeader{})) + valueSize
 }

-// sharedMemTempFile creates a new temporary file large enough to hold a value
-// of the given size, then maps it into memory. The file will be removed when
-// the Close method is called.
-func sharedMemTempFile(valueSize int) (m *sharedMem, err error) {
+// sharedMemTempFile creates a new temporary file of the given size, then maps
+// it into memory. The file will be removed when the Close method is called.
+func sharedMemTempFile(size int) (m *sharedMem, err error) {
 	// Create a temporary file.
 	f, err := ioutil.TempFile("", "fuzz-*")
 	if err != nil {
@ -64,7 +63,7 @@ func sharedMemTempFile(valueSize int) (m *sharedMem, err error) {
 	}()

 	// Resize it to the correct size.
-	totalSize := sharedMemSize(valueSize)
+	totalSize := sharedMemSize(size)
 	if err := f.Truncate(int64(totalSize)); err != nil {
 		return nil, err
 	}
--- a/src/internal/fuzz/mutator.go
+++ b/src/internal/fuzz/mutator.go
@ -4,14 +4,244 @@

 package fuzz

-import "math/rand"
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+)

-func mutate(b []byte) {
-	if len(b) == 0 {
-		return
-	}
-
-	// Mutate a byte in a random position.
-	pos := rand.Intn(len(b))
-	b[pos] = byte(rand.Intn(256))
+type mutator struct {
+	r *pcgRand
+}
+
+func newMutator() *mutator {
+	return &mutator{r: newPcgRand()}
+}
+
+func (m *mutator) rand(n int) int {
+	return m.r.intn(n)
+}
+
+func (m *mutator) randByteOrder() binary.ByteOrder {
+	if m.r.bool() {
+		return binary.LittleEndian
+	}
+	return binary.BigEndian
+}
+
+// chooseLen chooses length of range mutation in range [0,n]. It gives
+// preference to shorter ranges.
+func (m *mutator) chooseLen(n int) int {
+	switch x := m.rand(100); {
+	case x < 90:
+		return m.rand(min(8, n)) + 1
+	case x < 99:
+		return m.rand(min(32, n)) + 1
+	default:
+		return m.rand(n) + 1
+	}
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// mutate performs several mutations directly onto the provided byte slice.
+func (m *mutator) mutate(ptrB *[]byte) {
+	// TODO(jayconrod,katiehockman): make this use zero allocations
+	// TODO(katiehockman): pull some of these functions into helper methods
+	// and test that each case is working as expected.
+	// TODO(katiehockman): perform more types of mutations.
+	b := *ptrB
+	defer func() {
+		oldHdr := (*reflect.SliceHeader)(unsafe.Pointer(ptrB))
+		newHdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+		if oldHdr.Data != newHdr.Data {
+			panic("data moved to new address")
+		}
+		*ptrB = b
+	}()
+
+	numIters := 1 + m.r.exp2()
+	for iter := 0; iter < numIters; iter++ {
+		switch m.rand(10) {
+		case 0:
+			// Remove a range of bytes.
+			if len(b) <= 1 {
+				iter--
+				continue
+			}
+			pos0 := m.rand(len(b))
+			pos1 := pos0 + m.chooseLen(len(b)-pos0)
+			copy(b[pos0:], b[pos1:])
+			b = b[:len(b)-(pos1-pos0)]
+		case 1:
+			// Insert a range of random bytes.
+			pos := m.rand(len(b) + 1)
+			n := m.chooseLen(10)
+			if len(b)+n >= cap(b) {
+				iter--
+				continue
+			}
+			b = b[:len(b)+n]
+			copy(b[pos+n:], b[pos:])
+			for i := 0; i < n; i++ {
+				b[pos+i] = byte(m.rand(256))
+			}
+		case 2:
+			// Duplicate a range of bytes.
+			if len(b) <= 1 {
+				iter--
+				continue
+			}
+			src := m.rand(len(b))
+			dst := m.rand(len(b))
+			for dst == src {
+				dst = m.rand(len(b))
+			}
+			n := m.chooseLen(len(b) - src)
+			tmp := make([]byte, n)
+			copy(tmp, b[src:])
+			b = b[:len(b)+n]
+			copy(b[dst+n:], b[dst:])
+			copy(b[dst:], tmp)
+		case 3:
+			// Copy a range of bytes.
+			if len(b) <= 1 {
+				iter--
+				continue
+			}
+			src := m.rand(len(b))
+			dst := m.rand(len(b))
+			for dst == src {
+				dst = m.rand(len(b))
+			}
+			n := m.chooseLen(len(b) - src)
+			copy(b[dst:], b[src:src+n])
+		case 4:
+			// Bit flip.
+			if len(b) == 0 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b))
+			b[pos] ^= 1 << uint(m.rand(8))
+		case 5:
+			// Set a byte to a random value.
+			if len(b) == 0 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b))
+			b[pos] = byte(m.rand(256))
+		case 6:
+			// Swap 2 bytes.
+			if len(b) <= 1 {
+				iter--
+				continue
+			}
+			src := m.rand(len(b))
+			dst := m.rand(len(b))
+			for dst == src {
+				dst = m.rand(len(b))
+			}
+			b[src], b[dst] = b[dst], b[src]
+		case 7:
+			// Add/subtract from a byte.
+			if len(b) == 0 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b))
+			v := byte(m.rand(35) + 1)
+			if m.r.bool() {
+				b[pos] += v
+			} else {
+				b[pos] -= v
+			}
+		case 8:
+			// Add/subtract from a uint16.
+			if len(b) < 2 {
+				iter--
+				continue
+			}
+			v := uint16(m.rand(35) + 1)
+			if m.r.bool() {
+				v = 0 - v
+			}
+			pos := m.rand(len(b) - 1)
+			enc := m.randByteOrder()
+			enc.PutUint16(b[pos:], enc.Uint16(b[pos:])+v)
+		case 9:
+			// Add/subtract from a uint32.
+			if len(b) < 4 {
+				iter--
+				continue
+			}
+			v := uint32(m.rand(35) + 1)
+			if m.r.bool() {
+				v = 0 - v
+			}
+			pos := m.rand(len(b) - 3)
+			enc := m.randByteOrder()
+			enc.PutUint32(b[pos:], enc.Uint32(b[pos:])+v)
+		case 10:
+			// Add/subtract from a uint64.
+			if len(b) < 8 {
+				iter--
+				continue
+			}
+			v := uint64(m.rand(35) + 1)
+			if m.r.bool() {
+				v = 0 - v
+			}
+			pos := m.rand(len(b) - 7)
+			enc := m.randByteOrder()
+			enc.PutUint64(b[pos:], enc.Uint64(b[pos:])+v)
+		case 11:
+			// Replace a byte with an interesting value.
+			if len(b) == 0 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b))
+			b[pos] = byte(interesting8[m.rand(len(interesting8))])
+		case 12:
+			// Replace a uint16 with an interesting value.
+			if len(b) < 2 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b) - 1)
+			v := uint16(interesting16[m.rand(len(interesting16))])
+			m.randByteOrder().PutUint16(b[pos:], v)
+		case 13:
+			// Replace a uint32 with an interesting value.
+			if len(b) < 4 {
+				iter--
+				continue
+			}
+			pos := m.rand(len(b) - 3)
+			v := uint32(interesting32[m.rand(len(interesting32))])
+			m.randByteOrder().PutUint32(b[pos:], v)
+		}
+	}
+}
+
+var (
+	interesting8  = []int8{-128, -1, 0, 1, 16, 32, 64, 100, 127}
+	interesting16 = []int16{-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767}
+	interesting32 = []int32{-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647}
+)
+
+func init() {
+	for _, v := range interesting8 {
+		interesting16 = append(interesting16, int16(v))
+	}
+	for _, v := range interesting16 {
+		interesting32 = append(interesting32, int32(v))
+	}
 }
--- a/src/internal/fuzz/pcg.go
+++ b/src/internal/fuzz/pcg.go
@ -0,0 +1,104 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fuzz
+
+import (
+	"math/bits"
+	"sync/atomic"
+	"time"
+)
+
+// The functions in pcg implement a 32 bit PRNG with a 64 bit period: pcg xsh rr
+// 64 32. See https://www.pcg-random.org/ for more information. This
+// implementation is geared specifically towards the needs of fuzzing: Simple
+// creation and use, no reproducibility, no concurrency safety, just the
+// necessary methods, optimized for speed.
+
+var globalInc uint64 // PCG stream
+
+const multiplier uint64 = 6364136223846793005
+
+// pcgRand is a PRNG. It should not be copied or shared. No Rand methods are
+// concurrency safe.
+type pcgRand struct {
+	noCopy noCopy // help avoid mistakes: ask vet to ensure that we don't make a copy
+	state  uint64
+	inc    uint64
+}
+
+// newPcgRand generates a new, seeded Rand, ready for use.
+func newPcgRand() *pcgRand {
+	r := new(pcgRand)
+	now := uint64(time.Now().UnixNano())
+	inc := atomic.AddUint64(&globalInc, 1)
+	r.state = now
+	r.inc = (inc << 1) | 1
+	r.step()
+	r.state += now
+	r.step()
+	return r
+}
+
+func (r *pcgRand) step() {
+	r.state *= multiplier
+	r.state += r.inc
+}
+
+// uint32 returns a pseudo-random uint32.
+func (r *pcgRand) uint32() uint32 {
+	x := r.state
+	r.step()
+	return bits.RotateLeft32(uint32(((x>>18)^x)>>27), -int(x>>59))
+}
+
+// intn returns a pseudo-random number in [0, n).
+// n must fit in a uint32.
+func (r *pcgRand) intn(n int) int {
+	if int(uint32(n)) != n {
+		panic("large Intn")
+	}
+	return int(r.uint32n(uint32(n)))
+}
+
+// uint32n returns a pseudo-random number in [0, n).
+//
+// For implementation details, see:
+// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
+// https://lemire.me/blog/2016/06/30/fast-random-shuffling
+func (r *pcgRand) uint32n(n uint32) uint32 {
+	v := r.uint32()
+	prod := uint64(v) * uint64(n)
+	low := uint32(prod)
+	if low < n {
+		thresh := uint32(-int32(n)) % n
+		for low < thresh {
+			v = r.uint32()
+			prod = uint64(v) * uint64(n)
+			low = uint32(prod)
+		}
+	}
+	return uint32(prod >> 32)
+}
+
+// exp2 generates n with probability 1/2^(n+1).
+func (r *pcgRand) exp2() int {
+	return bits.TrailingZeros32(r.uint32())
+}
+
+// bool generates a random bool.
+func (r *pcgRand) bool() bool {
+	return r.uint32()&1 == 0
+}
+
+// noCopy may be embedded into structs which must not be copied
+// after the first use.
+//
+// See https://golang.org/issues/8005#issuecomment-190753527
+// for details.
+type noCopy struct{}
+
+// lock is a no-op used by -copylocks checker from `go vet`.
+func (*noCopy) lock()   {}
+func (*noCopy) unlock() {}
--- a/src/internal/fuzz/worker.go
+++ b/src/internal/fuzz/worker.go
@ -318,7 +318,7 @@ func RunFuzzWorker(ctx context.Context, fn func([]byte) error) error {
 	if err != nil {
 		return err
 	}
-	srv := &workerServer{workerComm: comm, fuzzFn: fn}
+	srv := &workerServer{workerComm: comm, fuzzFn: fn, m: newMutator()}
 	return srv.serve(ctx)
 }

@ -366,6 +366,7 @@ type workerComm struct {
 // memory after a worker process terminates unexpectedly.
 type workerServer struct {
 	workerComm
+	m *mutator

 	// fuzzFn runs the worker's fuzz function on the given input and returns
 	// an error if it finds a crasher (the process may also exit or crash).
@ -441,7 +442,11 @@ func (ws *workerServer) fuzz(ctx context.Context, args fuzzArgs) fuzzResponse {
 			// real heuristic once we have one.
 			return fuzzResponse{Interesting: true}
 		default:
-			mutate(ws.mem.valueRef())
+			b := ws.mem.valueRef()
+			ws.m.mutate(&b)
+			// TODO(jayconrod): consider making ws.m.header() contain the whole
+			// slice header, so the length can be updated when the slice changes
+			ws.mem.header().length = len(b)
 			if err := ws.fuzzFn(ws.mem.valueRef()); err != nil {
 				return fuzzResponse{Err: err.Error()}
 			}