runtime: help the race detector detect possible concurrent cleanups

This change makes it so that cleanup goroutines, in race mode, create a fake race context and switch to it, emulating cleanups running on new goroutines. This helps in catching races between cleanups that might run concurrently. Change-Id: I4c4e33054313798d4ac4e5d91ff2487ea3eb4b16 Reviewed-on: https://go-review.googlesource.com/c/go/+/652635 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2025-02-25 22:50:10 +00:00 · 2025-02-25 22:50:10 +00:00 · 3ea94ae446
parent b30fa1bcc4
commit 3ea94ae446
4 changed files with 125 additions and 0 deletions
--- a/src/runtime/mcleanup.go
+++ b/src/runtime/mcleanup.go
@ -575,15 +575,41 @@ func runCleanups() {
 	for {
 		b := gcCleanups.dequeue()
 		if raceenabled {
+			// Approximately: adds a happens-before edge between the cleanup
+			// argument being mutated and the call to the cleanup below.
 			racefingo()
 		}

 		gcCleanups.beginRunningCleanups()
 		for i := 0; i < int(b.n); i++ {
 			fn := b.cleanups[i]
+
+			var racectx uintptr
+			if raceenabled {
+				// Enter a new race context so the race detector can catch
+				// potential races between cleanups, even if they execute on
+				// the same goroutine.
+				//
+				// Synchronize on fn. This would fail to find races on the
+				// closed-over values in fn (suppose fn is passed to multiple
+				// AddCleanup calls) if fn was not unique, but it is. Update
+				// the synchronization on fn if you intend to optimize it
+				// and store the cleanup function and cleanup argument on the
+				// queue directly.
+				racerelease(unsafe.Pointer(fn))
+				racectx = raceEnterNewCtx()
+				raceacquire(unsafe.Pointer(fn))
+			}
+
+			// Execute the next cleanup.
 			cleanup := *(*func())(unsafe.Pointer(&fn))
 			cleanup()
 			b.cleanups[i] = nil
+
+			if raceenabled {
+				// Restore the old context.
+				raceRestoreCtx(racectx)
+			}
 		}
 		gcCleanups.endRunningCleanups()

@ -621,3 +647,53 @@ func unique_runtime_blockUntilEmptyCleanupQueue(timeout int64) bool {
 func sync_test_runtime_blockUntilEmptyCleanupQueue(timeout int64) bool {
 	return gcCleanups.blockUntilEmpty(timeout)
 }
+
+// raceEnterNewCtx creates a new racectx and switches the current
+// goroutine to it. Returns the old racectx.
+//
+// Must be running on a user goroutine. nosplit to match other race
+// instrumentation.
+//
+//go:nosplit
+func raceEnterNewCtx() uintptr {
+	// We use the existing ctx as the spawn context, but gp.gopc
+	// as the spawn PC to make the error output a little nicer
+	// (pointing to AddCleanup, where the goroutines are created).
+	//
+	// We also need to carefully indicate to the race detector
+	// that the goroutine stack will only be accessed by the new
+	// race context, to avoid false positives on stack locations.
+	// We do this by marking the stack as free in the first context
+	// and then re-marking it as allocated in the second. Crucially,
+	// there must be (1) no race operations and (2) no stack changes
+	// in between. (1) is easy to avoid because we're in the runtime
+	// so there's no implicit race instrumentation. To avoid (2) we
+	// defensively become non-preemptible so the GC can't stop us,
+	// and rely on the fact that racemalloc, racefreem, and racectx
+	// are nosplit.
+	mp := acquirem()
+	gp := getg()
+	ctx := getg().racectx
+	racefree(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
+	getg().racectx = racectxstart(gp.gopc, ctx)
+	racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
+	releasem(mp)
+	return ctx
+}
+
+// raceRestoreCtx restores ctx on the goroutine. It is the inverse of
+// raceenternewctx and must be called with its result.
+//
+// Must be running on a user goroutine. nosplit to match other race
+// instrumentation.
+//
+//go:nosplit
+func raceRestoreCtx(ctx uintptr) {
+	mp := acquirem()
+	gp := getg()
+	racefree(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
+	racectxend(getg().racectx)
+	racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
+	getg().racectx = ctx
+	releasem(mp)
+}
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@ -566,6 +566,13 @@ func racegoend() {
 	racecall(&__tsan_go_end, getg().racectx, 0, 0, 0)
 }

+//go:nosplit
+func racectxstart(pc, spawnctx uintptr) uintptr {
+	var racectx uintptr
+	racecall(&__tsan_go_start, spawnctx, uintptr(unsafe.Pointer(&racectx)), pc, 0)
+	return racectx
+}
+
 //go:nosplit
 func racectxend(racectx uintptr) {
 	racecall(&__tsan_go_end, racectx, 0, 0, 0)
--- a/src/runtime/race/testdata/finalizer_test.go
+++ b/src/runtime/race/testdata/finalizer_test.go
@ -9,6 +9,7 @@ import (
 	"sync"
 	"testing"
 	"time"
+	"unsafe"
 )

 func TestNoRaceFin(t *testing.T) {
@ -66,3 +67,43 @@ func TestRaceFin(t *testing.T) {
 	time.Sleep(100 * time.Millisecond)
 	y = 66
 }
+
+func TestNoRaceCleanup(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		x := new(string)
+		y := new(string)
+		runtime.AddCleanup(x, func(y *string) {
+			*y = "foo"
+		}, y)
+		*y = "bar"
+		runtime.KeepAlive(x)
+		c <- true
+	}()
+	<-c
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+}
+
+func TestRaceBetweenCleanups(t *testing.T) {
+	// Allocate struct with pointer to avoid hitting tinyalloc.
+	// Otherwise we can't be sure when the allocation will
+	// be freed.
+	type T struct {
+		v int
+		p unsafe.Pointer
+	}
+	sharedVar := new(int)
+	v0 := new(T)
+	v1 := new(T)
+	cleanup := func(x int) {
+		*sharedVar = x
+	}
+	runtime.AddCleanup(v0, cleanup, 0)
+	runtime.AddCleanup(v1, cleanup, 0)
+	v0 = nil
+	v1 = nil
+
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+}
--- a/src/runtime/race0.go
+++ b/src/runtime/race0.go
@ -41,4 +41,5 @@ func racemalloc(p unsafe.Pointer, sz uintptr)                               { th
 func racefree(p unsafe.Pointer, sz uintptr)                                 { throw("race") }
 func racegostart(pc uintptr) uintptr                                        { throw("race"); return 0 }
 func racegoend()                                                            { throw("race") }
+func racectxstart(spawnctx, racectx uintptr) uintptr                        { throw("race"); return 0 }
 func racectxend(racectx uintptr)                                            { throw("race") }