testing: simplify concurrency and cleanup logic

While investigating #60083, I found a couple of bugs (notably #61034) that had slipped through code review in part because the concurrency patterns used in the testing package were too complex for me to fully reason about. This change adjusts those patterns to be more in line with current idioms, and to reduce the number of special cases that depend on details that should be orthogonal. (For example: the details of how we invoke the Cleanup functions should not depend on whether the test happened to run any parallel subtests.) In the process, this change fixes a handful of bugs: - Concurrent calls to Run (explicitly allowed by TestParallelSub) could previously drive the testcontext.running count negative, causing the number of running parallel tests to exceed the -parallel flag. - The -failfast flag now takes effect immediately on failure. It no longer delays until the test finishes, and no longer misses failures during cleanup (fixing #61034). - If a Cleanup function calls runtime.Goexit (typically via t.FailNow) during a panic, Cleanup functions from its parent tests are no longer skipped and buffered logs from its parent tests are now flushed. - The time reported for a test with subtests now includes the time spent running those subtests, regardless of whether they are parallel. (Previously, non-parallel subtests were included but parallel subtests were not.) - Calls to (*B).Run in iterations after the first are now diagnosed with a panic. (This diagnoses badly-behaved benchmarks: if Run is called during the first iteration, no subsequent iterations are supposed to occur.) Fixes #61034. Change-Id: I3797f6ef5210a3d2d5d6c2710d3f35c0219b02ea Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race,gotip-windows-amd64-longtest Reviewed-on: https://go-review.googlesource.com/c/go/+/506755 Auto-Submit: Bryan Mills <bcmills@google.com> Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-28 09:37:47 -04:00 · 2023-06-28 09:37:47 -04:00 · e6b76bfc46
parent f5bf9fb278
commit e6b76bfc46
6 changed files with 436 additions and 499 deletions
--- a/src/cmd/cgo/internal/test/callback_windows.go
+++ b/src/cmd/cgo/internal/test/callback_windows.go
@ -29,7 +29,7 @@ USHORT backtrace(ULONG FramesToCapture, PVOID *BackTrace) {
 		}

 		ControlPc = context.Rip;
-        // Check if we left the user range.
+		// Check if we left the user range.
 		if (ControlPc < 0x10000) {
 			break;
 		}
@ -89,7 +89,7 @@ func testCallbackCallersSEH(t *testing.T) {
 		"test.testCallbackCallersSEH",
 		"test.TestCallbackCallersSEH",
 		"testing.tRunner",
-		"testing.(*T).Run.gowrap1",
+		"testing.(*T).Run.gowrap3",
 		"runtime.goexit",
 	}
 	pc := make([]uintptr, 100)
--- a/src/testing/benchmark.go
+++ b/src/testing/benchmark.go
@ -176,6 +176,10 @@ func (b *B) ReportAllocs() {

 // runN runs a single benchmark for the specified number of iterations.
 func (b *B) runN(n int) {
+	if b.done {
+		panic("testing: internal error: runN when benchmark is already done")
+	}
+
 	benchmarkLock.Lock()
 	defer benchmarkLock.Unlock()
 	defer func() {
@ -196,46 +200,46 @@ func (b *B) runN(n int) {
 	b.previousDuration = b.duration
 }

-// run1 runs the first iteration of benchFunc. It reports whether more
-// iterations of this benchmarks should be run.
-func (b *B) run1() bool {
+// run1 runs the first iteration of benchFunc.
+//
+// If no more iterations of this benchmark should be done, run1 sets b.done.
+func (b *B) run1() {
 	if ctx := b.context; ctx != nil {
 		// Extend maxLen, if needed.
 		if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
 			ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
 		}
 	}
+
+	runOneDone := make(chan struct{})
 	go func() {
 		// Signal that we're done whether we return normally
 		// or by FailNow's runtime.Goexit.
-		defer func() {
-			b.signal <- true
-		}()
-
+		defer close(runOneDone)
 		b.runN(1)
 	}()
-	<-b.signal
+	<-runOneDone
+
 	if b.failed {
 		fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
-		return false
+		b.done = true
+		close(b.doneOrParallel)
+		return
 	}
 	// Only print the output if we know we are not going to proceed.
 	// Otherwise it is printed in processBench.
-	b.mu.RLock()
-	finished := b.finished
-	b.mu.RUnlock()
-	if b.hasSub.Load() || finished {
+	if b.hasSub.Load() || b.skipped {
 		tag := "BENCH"
 		if b.skipped {
 			tag = "SKIP"
 		}
-		if b.chatty != nil && (len(b.output) > 0 || finished) {
+		if b.chatty != nil && (len(b.output) > 0 || b.skipped) {
 			b.trimOutput()
 			fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
 		}
-		return false
+		b.done = true
+		close(b.doneOrParallel)
 	}
-	return true
 }

 var labelsOnce sync.Once
@ -262,9 +266,10 @@ func (b *B) run() {
 	}
 }

+// doBench calls b.launch in a separate goroutine and waits for it to complete.
 func (b *B) doBench() BenchmarkResult {
 	go b.launch()
-	<-b.signal
+	<-b.doneOrParallel
 	return b.result
 }

@ -276,7 +281,10 @@ func (b *B) launch() {
 	// Signal that we're done whether we return normally
 	// or by FailNow's runtime.Goexit.
 	defer func() {
-		b.signal <- true
+		b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
+		b.setRanLeaf()
+		b.done = true
+		close(b.doneOrParallel)
 	}()

 	// Run the benchmark for at least the specified amount of time.
@ -316,7 +324,6 @@ func (b *B) launch() {
 			b.runN(int(n))
 		}
 	}
-	b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
 }

 // Elapsed returns the measured elapsed time of the benchmark.
@ -550,6 +557,7 @@ func runBenchmarks(importPath string, matchString func(pat, str string) (bool, e
 		main.chatty = newChattyPrinter(main.w)
 	}
 	main.runN(1)
+	main.done = true
 	return !main.failed
 }

@ -568,18 +576,21 @@ func (ctx *benchContext) processBench(b *B) {
 			if i > 0 || j > 0 {
 				b = &B{
 					common: common{
-						signal: make(chan bool),
-						name:   b.name,
-						w:      b.w,
-						chatty: b.chatty,
-						bench:  true,
+						doneOrParallel: make(chan struct{}),
+						name:           b.name,
+						w:              b.w,
+						chatty:         b.chatty,
+						bench:          true,
 					},
 					benchFunc: b.benchFunc,
 					benchTime: b.benchTime,
 				}
 				b.run1()
 			}
-			r := b.doBench()
+			var r BenchmarkResult
+			if !b.done {
+				r = b.doBench()
+			}
 			if b.failed {
 				// The output could be very long here, but probably isn't.
 				// We print it all, regardless, because we don't want to trim the reason
@ -622,6 +633,13 @@ var hideStdoutForTesting = false
 // A subbenchmark is like any other benchmark. A benchmark that calls Run at
 // least once will not be measured itself and will be called once with N=1.
 func (b *B) Run(name string, f func(b *B)) bool {
+	if b.previousN > 0 {
+		// If the benchmark calls Run we will only call it once with N=1.
+		// If it doesn't call Run the first time we try it, it must not
+		// call run on subsequent invocations either.
+		panic("testing: unexpected call to B.Run during iteration")
+	}
+
 	// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
 	// Release the lock and acquire it on exit to ensure locks stay paired.
 	b.hasSub.Store(true)
@ -639,14 +657,14 @@ func (b *B) Run(name string, f func(b *B)) bool {
 	n := runtime.Callers(2, pc[:])
 	sub := &B{
 		common: common{
-			signal:  make(chan bool),
-			name:    benchName,
-			parent:  &b.common,
-			level:   b.level + 1,
-			creator: pc[:n],
-			w:       b.w,
-			chatty:  b.chatty,
-			bench:   true,
+			doneOrParallel: make(chan struct{}),
+			name:           benchName,
+			parent:         &b.common,
+			level:          b.level + 1,
+			creator:        pc[:n],
+			w:              b.w,
+			chatty:         b.chatty,
+			bench:          true,
 		},
 		importPath: b.importPath,
 		benchFunc:  f,
@ -679,7 +697,8 @@ func (b *B) Run(name string, f func(b *B)) bool {
 		}
 	}

-	if sub.run1() {
+	sub.run1()
+	if !sub.done {
 		sub.run()
 	}
 	b.add(sub.result)
@ -823,13 +842,14 @@ func (b *B) SetParallelism(p int) {
 func Benchmark(f func(b *B)) BenchmarkResult {
 	b := &B{
 		common: common{
-			signal: make(chan bool),
-			w:      discard{},
+			doneOrParallel: make(chan struct{}),
+			w:              discard{},
 		},
 		benchFunc: f,
 		benchTime: benchTime,
 	}
-	if b.run1() {
+	b.run1()
+	if !b.done {
 		b.run()
 	}
 	return b.result
--- a/src/testing/fuzz.go
+++ b/src/testing/fuzz.go
@ -70,10 +70,6 @@ type F struct {
 	fuzzContext *fuzzContext
 	testContext *testContext

-	// inFuzzFn is true when the fuzz function is running. Most F methods cannot
-	// be called when inFuzzFn is true.
-	inFuzzFn bool
-
 	// corpus is a set of seed corpus entries, added with F.Add and loaded
 	// from testdata.
 	corpus []corpusEntry
@ -300,13 +296,13 @@ func (f *F) Fuzz(ff any) {
 		n := runtime.Callers(2, pc[:])
 		t := &T{
 			common: common{
-				barrier: make(chan bool),
-				signal:  make(chan bool),
-				name:    testName,
-				parent:  &f.common,
-				level:   f.level + 1,
-				creator: pc[:n],
-				chatty:  f.chatty,
+				runParallel:    make(chan struct{}),
+				doneOrParallel: make(chan struct{}),
+				name:           testName,
+				parent:         &f.common,
+				level:          f.level + 1,
+				creator:        pc[:n],
+				chatty:         f.chatty,
 			},
 			context: f.testContext,
 		}
@ -318,7 +314,7 @@ func (f *F) Fuzz(ff any) {
 		if t.chatty != nil {
 			t.chatty.Updatef(t.name, "=== RUN   %s\n", t.name)
 		}
-		f.common.inFuzzFn, f.inFuzzFn = true, true
+		f.inFuzzFn = true
 		go tRunner(t, func(t *T) {
 			args := []reflect.Value{reflect.ValueOf(t)}
 			for _, v := range e.Values {
@ -334,11 +330,11 @@ func (f *F) Fuzz(ff any) {
 			}
 			fn.Call(args)
 		})
-		<-t.signal
+		<-t.doneOrParallel
 		if t.chatty != nil && t.chatty.json {
 			t.chatty.Updatef(t.parent.name, "=== NAME  %s\n", t.parent.name)
 		}
-		f.common.inFuzzFn, f.inFuzzFn = false, false
+		f.inFuzzFn = false
 		return !t.Failed()
 	}

@ -510,12 +506,12 @@ func runFuzzTests(deps testDeps, fuzzTests []InternalFuzzTarget, deadline time.T
 				}
 				f := &F{
 					common: common{
-						signal:  make(chan bool),
-						barrier: make(chan bool),
-						name:    testName,
-						parent:  &root,
-						level:   root.level + 1,
-						chatty:  root.chatty,
+						doneOrParallel: make(chan struct{}),
+						runParallel:    make(chan struct{}),
+						name:           testName,
+						parent:         &root,
+						level:          root.level + 1,
+						chatty:         root.chatty,
 					},
 					testContext: tctx,
 					fuzzContext: fctx,
@ -525,12 +521,12 @@ func runFuzzTests(deps testDeps, fuzzTests []InternalFuzzTarget, deadline time.T
 					f.chatty.Updatef(f.name, "=== RUN   %s\n", f.name)
 				}
 				go fRunner(f, ft.Fn)
-				<-f.signal
+				<-f.doneOrParallel
 				if f.chatty != nil && f.chatty.json {
 					f.chatty.Updatef(f.parent.name, "=== NAME  %s\n", f.parent.name)
 				}
 				ok = ok && !f.Failed()
-				ran = ran || f.ran
+				ran = ran || f.ranAnyLeaf
 			}
 			if !ran {
 				// There were no tests to run on this iteration.
@ -592,12 +588,12 @@ func runFuzzing(deps testDeps, fuzzTests []InternalFuzzTarget) (ok bool) {

 	f := &F{
 		common: common{
-			signal:  make(chan bool),
-			barrier: nil, // T.Parallel has no effect when fuzzing.
-			name:    testName,
-			parent:  &root,
-			level:   root.level + 1,
-			chatty:  root.chatty,
+			doneOrParallel: make(chan struct{}),
+			runParallel:    nil, // T.Parallel has no effect when fuzzing.
+			name:           testName,
+			parent:         &root,
+			level:          root.level + 1,
+			chatty:         root.chatty,
 		},
 		fuzzContext: fctx,
 		testContext: tctx,
@ -607,7 +603,7 @@ func runFuzzing(deps testDeps, fuzzTests []InternalFuzzTarget) (ok bool) {
 		f.chatty.Updatef(f.name, "=== RUN   %s\n", f.name)
 	}
 	go fRunner(f, fuzzTest.Fn)
-	<-f.signal
+	<-f.doneOrParallel
 	if f.chatty != nil {
 		f.chatty.Updatef(f.parent.name, "=== NAME  %s\n", f.parent.name)
 	}
@ -625,6 +621,12 @@ func runFuzzing(deps testDeps, fuzzTests []InternalFuzzTarget) (ok bool) {
 // simplifications are made. We also require that F.Fuzz, F.Skip, or F.Fail is
 // called.
 func fRunner(f *F, fn func(*F)) {
+	// TODO(bcmills): This function has a lot of code and structure in common with
+	// tRunner. At some point it would probably be good to factor out the common
+	// parts to make the differences easier to spot.
+
+	returned := false
+
 	// When this goroutine is done, either because runtime.Goexit was called, a
 	// panic started, or fn returned normally, record the duration and send
 	// t.signal, indicating the fuzz test is done.
@ -636,96 +638,93 @@ func fRunner(f *F, fn func(*F)) {
 		// Unfortunately, recovering here adds stack frames, but the location of
 		// the original panic should still be
 		// clear.
-		f.checkRaces()
-		if f.Failed() {
-			numFailed.Add(1)
-		}
-		err := recover()
-		if err == nil {
-			f.mu.RLock()
-			fuzzNotCalled := !f.fuzzCalled && !f.skipped && !f.failed
-			if !f.finished && !f.skipped && !f.failed {
-				err = errNilPanicOrGoexit
-			}
-			f.mu.RUnlock()
-			if fuzzNotCalled && err == nil {
+
+		panicVal := recover()
+		if panicVal == nil && !f.skipped && !f.failed {
+			if !returned {
+				panicVal = errNilPanicOrGoexit
+			} else if !f.fuzzCalled {
 				f.Error("returned without calling F.Fuzz, F.Fail, or F.Skip")
 			}
 		}

-		// Use a deferred call to ensure that we report that the test is
-		// complete even if a cleanup function calls F.FailNow. See issue 41355.
-		didPanic := false
-		defer func() {
-			if !didPanic {
-				// Only report that the test is complete if it doesn't panic,
-				// as otherwise the test binary can exit before the panic is
-				// reported to the user. See issue 41479.
-				f.signal <- true
-			}
-		}()
-
-		// If we recovered a panic or inappropriate runtime.Goexit, fail the test,
-		// flush the output log up to the root, then panic.
-		doPanic := func(err any) {
+		if panicVal != nil {
+			// Mark the test as failed so that Cleanup functions can see its correct status.
 			f.Fail()
-			if r := f.runCleanup(recoverAndReturnPanic); r != nil {
-				f.Logf("cleanup panicked with %v", r)
-			}
-			for root := &f.common; root.parent != nil; root = root.parent {
-				root.mu.Lock()
-				root.duration += time.Since(root.start)
-				d := root.duration
-				root.mu.Unlock()
-				root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
-			}
-			didPanic = true
-			panic(err)
-		}
-		if err != nil {
-			doPanic(err)
-		}
-
-		// No panic or inappropriate Goexit.
-		f.duration += time.Since(f.start)
-
-		if len(f.sub) > 0 {
+		} else if f.runParallel != nil {
 			// Unblock inputs that called T.Parallel while running the seed corpus.
 			// This only affects fuzz tests run as normal tests.
-			// While fuzzing, T.Parallel has no effect, so f.sub is empty, and this
-			// branch is not taken. f.barrier is nil in that case.
-			f.testContext.release()
-			close(f.barrier)
-			// Wait for the subtests to complete.
-			for _, sub := range f.sub {
-				<-sub.signal
-			}
-			cleanupStart := time.Now()
-			err := f.runCleanup(recoverAndReturnPanic)
-			f.duration += time.Since(cleanupStart)
-			if err != nil {
-				doPanic(err)
-			}
+			// While fuzzing, T.Parallel has no effect, so f.parallelSubtests is empty
+			// and this is a no-op.
+
+			// Check for races before starting parallel subtests, so that if a
+			// parallel subtest *also* triggers a data race we will report the two
+			// races to the two tests and not attribute all of them to the subtest.
+			f.checkRaces()
+
+			close(f.runParallel)
+			f.parallelSubtests.Wait()
 		}

-		// Report after all subtests have finished.
-		f.report()
-		f.done = true
-		f.setRan()
-	}()
-	defer func() {
-		if len(f.sub) == 0 {
-			f.runCleanup(normalPanic)
-		}
+		// Use a deferred call to ensure that we report that the test is
+		// complete even if a cleanup function calls t.FailNow. See issue 41355.
+		defer func() {
+			cleanupPanic := recover()
+			if panicVal == nil {
+				panicVal = cleanupPanic
+			}
+
+			// Only report that the test is complete if it doesn't panic,
+			// as otherwise the test binary can exit before the panic is
+			// reported to the user. See issue 41479.
+			if panicVal != nil {
+				// Flush the output log up to the root before dying.
+				for root := &f.common; root.parent != nil; root = root.parent {
+					root.mu.Lock()
+					root.duration += time.Since(root.start)
+					d := root.duration
+					root.mu.Unlock()
+					root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
+
+					// Since the parent will never finish running, do its cleanup now.
+					// Run the cleanup in a fresh goroutine in case it calls runtime.Goexit,
+					// which we cannot recover.
+					cleanupDone := make(chan struct{})
+					go func() {
+						defer close(cleanupDone)
+						if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
+							fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
+						}
+					}()
+					<-cleanupDone
+				}
+				panic(panicVal)
+			}
+
+			f.checkRaces()
+			f.duration += time.Since(f.start)
+			f.report()
+
+			// Do not lock f.done to allow race detector to detect race in case
+			// the user does not appropriately synchronize a goroutine.
+			f.done = true
+			if f.parent != nil && !f.hasSub.Load() {
+				f.setRanLeaf()
+			}
+
+			running.Delete(f.name)
+			close(f.doneOrParallel)
+		}()
+
+		f.runCleanup(normalPanic)
 	}()

+	// Run the actual fuzz function.
 	f.start = time.Now()
 	f.resetRaces()
 	fn(f)

 	// Code beyond this point will not be executed when FailNow or SkipNow
 	// is invoked.
-	f.mu.Lock()
-	f.finished = true
-	f.mu.Unlock()
+	returned = true
 }
--- a/src/testing/panic_test.go
+++ b/src/testing/panic_test.go
@ -211,57 +211,68 @@ func TestPanicHelper(t *testing.T) {
 }

 func TestMorePanic(t *testing.T) {
-	testenv.MustHaveExec(t)
+	subprocess := false
+	if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" {
+		subprocess = true
+	} else {
+		testenv.MustHaveExec(t)
+		t.Parallel()
+	}

 	testCases := []struct {
+		issue int
 		desc  string
-		flags []string
+		f     func(*testing.T)
 		want  string
 	}{
 		{
-			desc:  "Issue 48502: call runtime.Goexit in t.Cleanup after panic",
-			flags: []string{"-test.run=^TestGoexitInCleanupAfterPanicHelper$"},
-			want: `panic: die
-	panic: test executed panic(nil) or runtime.Goexit`,
+			issue: 48502,
+			desc:  "runtime.Goexit in t.Cleanup after panic",
+			f: func(t *testing.T) {
+				t.Cleanup(func() {
+					t.Log("Goexiting in cleanup")
+					runtime.Goexit()
+				})
+				t.Parallel()
+				panic("die")
+			},
+			want: `panic: die [recovered]
+	panic: die`,
 		},
 		{
-			desc:  "Issue 48515: call t.Run in t.Cleanup should trigger panic",
-			flags: []string{"-test.run=^TestCallRunInCleanupHelper$"},
-			want:  `panic: testing: t.Run called during t.Cleanup`,
+			issue: 48515,
+			desc:  "t.Run in t.Cleanup should trigger panic",
+			f: func(t *testing.T) {
+				t.Cleanup(func() {
+					t.Run("in-cleanup", func(t *testing.T) {
+						t.Log("must not be executed")
+					})
+				})
+			},
+			want: `panic: testing: t.Run called during t.Cleanup`,
 		},
 	}

 	for _, tc := range testCases {
-		cmd := exec.Command(os.Args[0], tc.flags...)
-		cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
-		b, _ := cmd.CombinedOutput()
-		got := string(b)
-		want := tc.want
-		re := makeRegexp(want)
-		if ok, err := regexp.MatchString(re, got); !ok || err != nil {
-			t.Errorf("output:\ngot:\n%s\nwant:\n%s", got, want)
-		}
-	}
-}
+		tc := tc
+		t.Run(fmt.Sprintf("issue%v", tc.issue), func(t *testing.T) {
+			if subprocess {
+				tc.f(t)
+				return
+			}

-func TestCallRunInCleanupHelper(t *testing.T) {
-	if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
-		return
-	}
+			t.Parallel()
+			cmd := testenv.Command(t, os.Args[0], "-test.run="+t.Name())
+			cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1")
+			b, _ := cmd.CombinedOutput()
+			got := string(b)
+			t.Logf("%v:\n%s", tc.desc, got)

-	t.Cleanup(func() {
-		t.Run("in-cleanup", func(t *testing.T) {
-			t.Log("must not be executed")
+			want := tc.want
+			re := makeRegexp(want)
+			if ok, err := regexp.MatchString(re, got); !ok || err != nil {
+				t.Errorf("wanted:\n%s", want)
+			}
 		})
-	})
-}
-
-func TestGoexitInCleanupAfterPanicHelper(t *testing.T) {
-	if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
-		return
 	}
-
-	t.Cleanup(func() { runtime.Goexit() })
-	t.Parallel()
-	panic("die")
 }
--- a/src/testing/sub_test.go
+++ b/src/testing/sub_test.go
@ -21,102 +21,6 @@ func init() {
 	benchTime.d = 100 * time.Millisecond
 }

-func TestTestContext(t *T) {
-	const (
-		add1 = 0
-		done = 1
-	)
-	// After each of the calls are applied to the context, the
-	type call struct {
-		typ int // run or done
-		// result from applying the call
-		running int
-		waiting int
-		started bool
-	}
-	testCases := []struct {
-		max int
-		run []call
-	}{{
-		max: 1,
-		run: []call{
-			{typ: add1, running: 1, waiting: 0, started: true},
-			{typ: done, running: 0, waiting: 0, started: false},
-		},
-	}, {
-		max: 1,
-		run: []call{
-			{typ: add1, running: 1, waiting: 0, started: true},
-			{typ: add1, running: 1, waiting: 1, started: false},
-			{typ: done, running: 1, waiting: 0, started: true},
-			{typ: done, running: 0, waiting: 0, started: false},
-			{typ: add1, running: 1, waiting: 0, started: true},
-		},
-	}, {
-		max: 3,
-		run: []call{
-			{typ: add1, running: 1, waiting: 0, started: true},
-			{typ: add1, running: 2, waiting: 0, started: true},
-			{typ: add1, running: 3, waiting: 0, started: true},
-			{typ: add1, running: 3, waiting: 1, started: false},
-			{typ: add1, running: 3, waiting: 2, started: false},
-			{typ: add1, running: 3, waiting: 3, started: false},
-			{typ: done, running: 3, waiting: 2, started: true},
-			{typ: add1, running: 3, waiting: 3, started: false},
-			{typ: done, running: 3, waiting: 2, started: true},
-			{typ: done, running: 3, waiting: 1, started: true},
-			{typ: done, running: 3, waiting: 0, started: true},
-			{typ: done, running: 2, waiting: 0, started: false},
-			{typ: done, running: 1, waiting: 0, started: false},
-			{typ: done, running: 0, waiting: 0, started: false},
-		},
-	}}
-	for i, tc := range testCases {
-		ctx := &testContext{
-			startParallel: make(chan bool),
-			maxParallel:   tc.max,
-		}
-		for j, call := range tc.run {
-			doCall := func(f func()) chan bool {
-				done := make(chan bool)
-				go func() {
-					f()
-					done <- true
-				}()
-				return done
-			}
-			started := false
-			switch call.typ {
-			case add1:
-				signal := doCall(ctx.waitParallel)
-				select {
-				case <-signal:
-					started = true
-				case ctx.startParallel <- true:
-					<-signal
-				}
-			case done:
-				signal := doCall(ctx.release)
-				select {
-				case <-signal:
-				case <-ctx.startParallel:
-					started = true
-					<-signal
-				}
-			}
-			if started != call.started {
-				t.Errorf("%d:%d:started: got %v; want %v", i, j, started, call.started)
-			}
-			if ctx.running != call.running {
-				t.Errorf("%d:%d:running: got %v; want %v", i, j, ctx.running, call.running)
-			}
-			if ctx.numWaiting != call.waiting {
-				t.Errorf("%d:%d:waiting: got %v; want %v", i, j, ctx.numWaiting, call.waiting)
-			}
-		}
-	}
-}
-
 func TestTRun(t *T) {
 	realTest := t
 	testCases := []struct {
@ -511,10 +415,10 @@ func TestTRun(t *T) {
 			buf := &strings.Builder{}
 			root := &T{
 				common: common{
-					signal:  make(chan bool),
-					barrier: make(chan bool),
-					name:    "",
-					w:       buf,
+					doneOrParallel: make(chan struct{}),
+					runParallel:    make(chan struct{}),
+					name:           "",
+					w:              buf,
 				},
 				context: ctx,
 			}
@ -523,7 +427,6 @@ func TestTRun(t *T) {
 				root.chatty.json = tc.json
 			}
 			ok := root.Run(tc.desc, tc.f)
-			ctx.release()

 			if ok != tc.ok {
 				t.Errorf("%s:ok: got %v; want %v", tc.desc, ok, tc.ok)
@ -531,8 +434,8 @@ func TestTRun(t *T) {
 			if ok != !root.Failed() {
 				t.Errorf("%s:root failed: got %v; want %v", tc.desc, !ok, root.Failed())
 			}
-			if ctx.running != 0 || ctx.numWaiting != 0 {
-				t.Errorf("%s:running and waiting non-zero: got %d and %d", tc.desc, ctx.running, ctx.numWaiting)
+			if n := ctx.running(); n != 0 {
+				t.Errorf("%s:running non-zero: got %d", tc.desc, n)
 			}
 			got := strings.TrimSpace(buf.String())
 			want := strings.TrimSpace(tc.output)
@ -701,9 +604,9 @@ func TestBRun(t *T) {
 			// the benchtime and catch the failure result of the subbenchmark.
 			root := &B{
 				common: common{
-					signal: make(chan bool),
-					name:   "root",
-					w:      buf,
+					doneOrParallel: make(chan struct{}),
+					name:           "root",
+					w:              buf,
 				},
 				benchFunc: func(b *B) { ok = b.Run("test", tc.f) }, // Use Run to catch failure.
 				benchTime: durationOrCountFlag{d: 1 * time.Microsecond},
@ -837,8 +740,8 @@ func TestLogAfterComplete(t *T) {
 		common: common{
 			// Use a buffered channel so that tRunner can write
 			// to it although nothing is reading from it.
-			signal: make(chan bool, 1),
-			w:      &buf,
+			doneOrParallel: make(chan struct{}),
+			w:              &buf,
 		},
 		context: ctx,
 	}
@ -910,18 +813,22 @@ func TestCleanup(t *T) {
 func TestConcurrentCleanup(t *T) {
 	cleanups := 0
 	t.Run("test", func(t *T) {
-		done := make(chan struct{})
+		var wg sync.WaitGroup
+		wg.Add(2)
 		for i := 0; i < 2; i++ {
 			i := i
 			go func() {
 				t.Cleanup(func() {
+					// Although the calls to Cleanup are concurrent, the functions passed
+					// to Cleanup should be called sequentially, in some nondeterministic
+					// order based on when the Cleanup calls happened to be scheduled.
+					// So these assignments to the cleanups variable should not race.
 					cleanups |= 1 << i
 				})
-				done <- struct{}{}
+				wg.Done()
 			}()
 		}
-		<-done
-		<-done
+		wg.Wait()
 	})
 	if cleanups != 1|2 {
 		t.Errorf("unexpected cleanup; got %d want 3", cleanups)
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@ -480,7 +480,7 @@ var (
 	cpuList     []int
 	testlogFile *os.File

-	numFailed atomic.Uint32 // number of test failures
+	anyFailed atomic.Bool

 	running sync.Map // map[string]time.Time of running, unpaused tests
 )
@ -593,38 +593,40 @@ const maxStackLen = 50
 // captures common methods such as Errorf.
 type common struct {
 	mu          sync.RWMutex         // guards this group of fields
-	output      []byte               // Output generated by test or benchmark.
-	w           io.Writer            // For flushToParent.
-	ran         bool                 // Test or benchmark (or one of its subtests) was executed.
+	output      []byte               // output generated by test or benchmark
+	w           io.Writer            // output to which flushToParent should write
+	ranAnyLeaf  bool                 // Test or benchmark ran to completion without calling Run itself, or had a subtest that did so.
 	failed      bool                 // Test or benchmark has failed.
 	skipped     bool                 // Test or benchmark has been skipped.
+	goexiting   bool                 // Test function is attempting to abort by goexit (implies failed || skipped).
 	done        bool                 // Test is finished and all subtests have completed.
 	helperPCs   map[uintptr]struct{} // functions to be skipped when writing file/line info
 	helperNames map[string]struct{}  // helperPCs converted to function names
 	cleanups    []func()             // optional functions to be called at the end of the test
-	cleanupName string               // Name of the cleanup function.
-	cleanupPc   []uintptr            // The stack trace at the point where Cleanup was called.
-	finished    bool                 // Test function has completed.
-	inFuzzFn    bool                 // Whether the fuzz target, if this is one, is running.
+	cleanupName string               // name of the cleanup function currently running
+	cleanupPc   []uintptr            // stack trace at the point where Cleanup was called

-	chatty         *chattyPrinter // A copy of chattyPrinter, if the chatty flag is set.
-	bench          bool           // Whether the current test is a benchmark.
-	hasSub         atomic.Bool    // whether there are sub-benchmarks.
-	cleanupStarted atomic.Bool    // Registered cleanup callbacks have started to execute
-	runner         string         // Function name of tRunner running the test.
-	isParallel     bool           // Whether the test is parallel.
+	chatty     *chattyPrinter // copy of chattyPrinter, if the chatty flag is set
+	bench      bool           // Current test is a benchmark.
+	runner     string         // function name of tRunner running the test
+	isParallel bool           // Test is running in parallel.
+	inFuzzFn   bool           // Fuzz target, if this is one, is running.
+	inCleanup  bool           // Cleanup callbacks, if any, are running.

-	parent   *common
-	level    int       // Nesting depth of test or benchmark.
-	creator  []uintptr // If level > 0, the stack trace at the point where the parent called t.Run.
-	name     string    // Name of test or benchmark.
-	start    time.Time // Time test or benchmark started
-	duration time.Duration
-	barrier  chan bool // To signal parallel subtests they may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing).
-	signal   chan bool // To signal a test is done.
-	sub      []*T      // Queue of subtests to be run in parallel.
+	parent         *common
+	level          int           // nesting depth of test or benchmark
+	creator        []uintptr     // if level > 0, the stack trace at the point where the parent called t.Run
+	name           string        // name of test or benchmark
+	start          time.Time     // time test or benchmark started or resumed
+	duration       time.Duration // time in the test function, excluding time blocked in t.Parallel
+	runParallel    chan struct{} // Closed when parallel subtests may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing).
+	doneOrParallel chan struct{} // Closed when the test is either blocked in Parallel or done running.

-	lastRaceErrors  atomic.Int64 // Max value of race.Errors seen during the test or its subtests.
+	hasSub           atomic.Bool // Test or benchmark has subtests or sub-benchmarks.
+	parallelSubtests sync.WaitGroup
+	runMu            sync.Mutex // Held during calls to Run to prevent the total number of active subtests from exceeding the parallelism limit.
+
+	lastRaceErrors  atomic.Int64 // max value of race.Errors seen during the test or its subtests
 	raceErrorLogged atomic.Bool

 	tempDirMu  sync.Mutex
@ -931,13 +933,13 @@ func (c *common) Name() string {
 	return c.name
 }

-func (c *common) setRan() {
+func (c *common) setRanLeaf() {
 	if c.parent != nil {
-		c.parent.setRan()
+		c.parent.setRanLeaf()
 	}
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	c.ran = true
+	c.ranAnyLeaf = true
 }

 // Fail marks the function as having failed but continues execution.
@ -952,6 +954,7 @@ func (c *common) Fail() {
 		panic("Fail in goroutine after " + c.name + " has completed")
 	}
 	c.failed = true
+	anyFailed.Store(true)
 }

 // Failed reports whether the function has failed.
@ -1000,7 +1003,7 @@ func (c *common) FailNow() {
 	// a top-of-stack deferred function now, we know that the send
 	// only happens after any other stacked defers have completed.
 	c.mu.Lock()
-	c.finished = true
+	c.goexiting = true
 	c.mu.Unlock()
 	runtime.Goexit()
 }
@ -1115,7 +1118,7 @@ func (c *common) SkipNow() {
 	c.checkFuzzFn("SkipNow")
 	c.mu.Lock()
 	c.skipped = true
-	c.finished = true
+	c.goexiting = true
 	c.mu.Unlock()
 	runtime.Goexit()
 }
@ -1318,8 +1321,8 @@ const (
 // If ph is recoverAndReturnPanic, it will catch panics, and return the
 // recovered value if any.
 func (c *common) runCleanup(ph panicHandling) (panicVal any) {
-	c.cleanupStarted.Store(true)
-	defer c.cleanupStarted.Store(false)
+	c.inCleanup = true
+	defer func() { c.inCleanup = false }()

 	if ph == recoverAndReturnPanic {
 		defer func() {
@ -1446,8 +1449,7 @@ func (t *T) Parallel() {
 	if t.isEnvSet {
 		panic("testing: t.Parallel called after t.Setenv; cannot set environment variables in parallel tests")
 	}
-	t.isParallel = true
-	if t.parent.barrier == nil {
+	if t.parent.runParallel == nil {
 		// T.Parallel has no effect when fuzzing.
 		// Multiple processes may run in parallel, but only one input can run at a
 		// time per process so we can attribute crashes to specific inputs.
@ -1460,7 +1462,7 @@ func (t *T) Parallel() {
 	t.duration += time.Since(t.start)

 	// Add to the list of tests to be released by the parent.
-	t.parent.sub = append(t.parent.sub, t)
+	t.parent.parallelSubtests.Add(1)

 	// Report any races during execution of this test up to this point.
 	//
@ -1479,9 +1481,19 @@ func (t *T) Parallel() {
 	}
 	running.Delete(t.name)

-	t.signal <- true   // Release calling test.
-	<-t.parent.barrier // Wait for the parent test to complete.
-	t.context.waitParallel()
+	t.isParallel = true
+
+	// Release the parent test to run. We can't just use parallelSem tokens for
+	// this because some callers (notably TestParallelSub) expect to be able to
+	// call Run from multiple goroutines and have those calls succeed.
+	//
+	// Instead, we close a channel to unblock the parent's call to Run, allowing
+	// it to resume. Then, we wait for it to finish and unblock its parallel
+	// subtests, and acquire a parallel run token so that we don't run too many of
+	// the subtests together all at once.
+	close(t.doneOrParallel)
+	<-t.parent.runParallel
+	t.context.acquireParallel()

 	if t.chatty != nil {
 		t.chatty.Updatef(t.name, "=== CONT  %s\n", t.name)
@ -1538,19 +1550,13 @@ var errNilPanicOrGoexit = errors.New("test executed panic(nil) or runtime.Goexit

 func tRunner(t *T, fn func(t *T)) {
 	t.runner = callerName(0)
+	returned := false

 	// When this goroutine is done, either because fn(t)
 	// returned normally or because a test failure triggered
 	// a call to runtime.Goexit, record the duration and send
 	// a signal saying that the test is done.
 	defer func() {
-		t.checkRaces()
-
-		// TODO(#61034): This is the wrong place for this check.
-		if t.Failed() {
-			numFailed.Add(1)
-		}
-
 		// Check if the test panicked or Goexited inappropriately.
 		//
 		// If this happens in a normal test, print output but continue panicking.
@ -1559,132 +1565,120 @@ func tRunner(t *T, fn func(t *T)) {
 		// If this happens while fuzzing, recover from the panic and treat it like a
 		// normal failure. It's important that the process keeps running in order to
 		// find short inputs that cause panics.
-		err := recover()
-		signal := true
-
-		t.mu.RLock()
-		finished := t.finished
-		t.mu.RUnlock()
-		if !finished && err == nil {
-			err = errNilPanicOrGoexit
+		panicVal := recover()
+		if !returned && !t.goexiting && panicVal == nil {
+			panicVal = errNilPanicOrGoexit
 			for p := t.parent; p != nil; p = p.parent {
 				p.mu.RLock()
-				finished = p.finished
+				pGoexiting := p.goexiting
 				p.mu.RUnlock()
-				if finished {
-					if !t.isParallel {
-						t.Errorf("%v: subtest may have called FailNow on a parent test", err)
-						err = nil
-					}
-					signal = false
+				if pGoexiting {
+					t.Errorf("%v: subtest may have called FailNow on a parent test", panicVal)
+					panicVal = nil
 					break
 				}
 			}
 		}

-		if err != nil && t.context.isFuzzing {
+		if panicVal != nil && t.context.isFuzzing {
 			prefix := "panic: "
-			if err == errNilPanicOrGoexit {
+			if panicVal == errNilPanicOrGoexit {
 				prefix = ""
 			}
-			t.Errorf("%s%s\n%s\n", prefix, err, string(debug.Stack()))
-			t.mu.Lock()
-			t.finished = true
-			t.mu.Unlock()
-			err = nil
+			t.Errorf("%s%s\n%s\n", prefix, panicVal, string(debug.Stack()))
+			panicVal = nil
+		}
+
+		if panicVal != nil {
+			// Mark the test as failed so that Cleanup functions can see its correct status.
+			t.Fail()
+		} else if t.runParallel != nil {
+			// Run parallel subtests.
+
+			// Check for races before starting parallel subtests, so that if a
+			// parallel subtest *also* triggers a data race we will report the two
+			// races to the two tests and not attribute all of them to the subtest.
+			t.checkRaces()
+
+			if t.isParallel {
+				// Release our own parallel token first, so that it is available for
+				// subtests to acquire.
+				t.context.releaseParallel()
+			}
+			close(t.runParallel)
+			t.parallelSubtests.Wait()
+			if t.isParallel {
+				// Re-acquire a parallel token to limit concurrent cleanup.
+				t.context.acquireParallel()
+			}
 		}

 		// Use a deferred call to ensure that we report that the test is
 		// complete even if a cleanup function calls t.FailNow. See issue 41355.
-		didPanic := false
 		defer func() {
+			cleanupPanic := recover()
+			if panicVal == nil {
+				panicVal = cleanupPanic
+			}
+
 			// Only report that the test is complete if it doesn't panic,
 			// as otherwise the test binary can exit before the panic is
 			// reported to the user. See issue 41479.
-			if didPanic {
-				return
+			if panicVal != nil {
+				// Flush the output log up to the root before dying.
+				for root := &t.common; root.parent != nil; root = root.parent {
+					root.mu.Lock()
+					root.duration += time.Since(root.start)
+					d := root.duration
+					root.mu.Unlock()
+					root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
+
+					// Since the parent will never finish running, do its cleanup now.
+					// Run the cleanup in a fresh goroutine in case it calls runtime.Goexit,
+					// which we cannot recover.
+					cleanupDone := make(chan struct{})
+					go func() {
+						defer close(cleanupDone)
+						if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
+							fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
+						}
+					}()
+					<-cleanupDone
+				}
+				panic(panicVal)
 			}
-			if err != nil {
-				panic(err)
+
+			t.checkRaces()
+			t.duration += time.Since(t.start)
+			t.report()
+
+			// Do not lock t.done to allow race detector to detect race in case
+			// the user does not appropriately synchronize a goroutine.
+			t.done = true
+			if t.parent != nil && !t.hasSub.Load() {
+				t.setRanLeaf()
 			}
+
 			running.Delete(t.name)
-			t.signal <- signal
+			if t.isParallel {
+				t.context.releaseParallel()
+				t.parent.parallelSubtests.Done()
+			} else {
+				close(t.doneOrParallel)
+			}
 		}()

-		doPanic := func(err any) {
-			t.Fail()
-			if r := t.runCleanup(recoverAndReturnPanic); r != nil {
-				t.Logf("cleanup panicked with %v", r)
-			}
-			// Flush the output log up to the root before dying.
-			for root := &t.common; root.parent != nil; root = root.parent {
-				root.mu.Lock()
-				root.duration += time.Since(root.start)
-				d := root.duration
-				root.mu.Unlock()
-				root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
-				if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
-					fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
-				}
-			}
-			didPanic = true
-			panic(err)
-		}
-		if err != nil {
-			doPanic(err)
-		}
-
-		t.duration += time.Since(t.start)
-
-		if len(t.sub) > 0 {
-			// Run parallel subtests.
-			// Decrease the running count for this test.
-			t.context.release()
-			// Release the parallel subtests.
-			close(t.barrier)
-			// Wait for subtests to complete.
-			for _, sub := range t.sub {
-				<-sub.signal
-			}
-			cleanupStart := time.Now()
-			err := t.runCleanup(recoverAndReturnPanic)
-			t.duration += time.Since(cleanupStart)
-			if err != nil {
-				doPanic(err)
-			}
-			t.checkRaces()
-			if !t.isParallel {
-				// Reacquire the count for sequential tests. See comment in Run.
-				t.context.waitParallel()
-			}
-		} else if t.isParallel {
-			// Only release the count for this test if it was run as a parallel
-			// test. See comment in Run method.
-			t.context.release()
-		}
-		t.report() // Report after all subtests have finished.
-
-		// Do not lock t.done to allow race detector to detect race in case
-		// the user does not appropriately synchronize a goroutine.
-		t.done = true
-		if t.parent != nil && !t.hasSub.Load() {
-			t.setRan()
-		}
-	}()
-	defer func() {
-		if len(t.sub) == 0 {
-			t.runCleanup(normalPanic)
-		}
+		t.runCleanup(normalPanic)
 	}()

+	// Run the actual test function.
 	t.start = time.Now()
 	t.resetRaces()
 	fn(t)

-	// code beyond here will not be executed when FailNow is invoked
-	t.mu.Lock()
-	t.finished = true
-	t.mu.Unlock()
+	// Code beyond this point will not be executed when FailNow or SkipNow
+	// is invoked.
+	returned = true
 }

 // Run runs f as a subtest of t called name. It runs f in a separate goroutine
@ -1694,7 +1688,7 @@ func tRunner(t *T, fn func(t *T)) {
 // Run may be called simultaneously from multiple goroutines, but all such calls
 // must return before the outer test function for t returns.
 func (t *T) Run(name string, f func(t *T)) bool {
-	if t.cleanupStarted.Load() {
+	if t.inCleanup {
 		panic("testing: t.Run called during t.Cleanup")
 	}

@ -1708,40 +1702,56 @@ func (t *T) Run(name string, f func(t *T)) bool {
 	// continue walking the stack into the parent test.
 	var pc [maxStackLen]uintptr
 	n := runtime.Callers(2, pc[:])
-	t = &T{
+	sub := &T{
 		common: common{
-			barrier: make(chan bool),
-			signal:  make(chan bool, 1),
-			name:    testName,
-			parent:  &t.common,
-			level:   t.level + 1,
-			creator: pc[:n],
-			chatty:  t.chatty,
+			runParallel:    make(chan struct{}),
+			doneOrParallel: make(chan struct{}),
+			name:           testName,
+			parent:         &t.common,
+			level:          t.level + 1,
+			creator:        pc[:n],
+			chatty:         t.chatty,
 		},
 		context: t.context,
 	}
-	t.w = indenter{&t.common}
+	sub.w = indenter{&sub.common}

-	if t.chatty != nil {
-		t.chatty.Updatef(t.name, "=== RUN   %s\n", t.name)
+	// Ensure that only one non-parallel subtest runs at a time so that we don't
+	// exceed the -parallel setting due to concurrent calls.
+	// (Run may be called concurrently even if the test is not marked parallel —
+	// see TestParallelSub.)
+	t.runMu.Lock()
+	defer t.runMu.Unlock()
+
+	if t.isParallel {
+		// Release our parallelism token for the subtest to use
+		// for its own parallel subtests.
+		t.context.releaseParallel()
+		defer t.context.acquireParallel()
 	}
-	running.Store(t.name, time.Now())
+
+	if sub.chatty != nil {
+		sub.chatty.Updatef(sub.name, "=== RUN   %s\n", sub.name)
+	}
+	running.Store(sub.name, time.Now())

 	// Instead of reducing the running count of this test before calling the
 	// tRunner and increasing it afterwards, we rely on tRunner keeping the
 	// count correct. This ensures that a sequence of sequential tests runs
 	// without being preempted, even when their parent is a parallel test. This
 	// may especially reduce surprises if *parallel == 1.
-	go tRunner(t, f)
-	if !<-t.signal {
-		// At this point, it is likely that FailNow was called on one of the
-		// parent tests by one of the subtests. Continue aborting up the chain.
+	go tRunner(sub, f)
+	<-sub.doneOrParallel
+	if t.goexiting {
+		// The parent test (t) thinks it called runtime.Goexit, but here we are
+		// still running. It is likely that this subtest called FailNow or SkipNow
+		// on the t instead of sub, so propagate the Goexit to the parent goroutine.
 		runtime.Goexit()
 	}
 	if t.chatty != nil && t.chatty.json {
-		t.chatty.Updatef(t.parent.name, "=== NAME  %s\n", t.parent.name)
+		t.chatty.Updatef(t.name, "=== NAME  %s\n", t.name)
 	}
-	return !t.failed
+	return !sub.failed
 }

 // Deadline reports the time at which the test binary will have
@ -1765,53 +1775,43 @@ type testContext struct {
 	// does not match).
 	isFuzzing bool

-	mu sync.Mutex
-
-	// Channel used to signal tests that are ready to be run in parallel.
-	startParallel chan bool
-
-	// running is the number of tests currently running in parallel.
-	// This does not include tests that are waiting for subtests to complete.
-	running int
-
-	// numWaiting is the number tests waiting to be run in parallel.
-	numWaiting int
-
-	// maxParallel is a copy of the parallel flag.
-	maxParallel int
+	// parallelSem is a counting semaphore to limit concurrency of Parallel tests.
+	// It has a capacity equal to the parallel flag.
+	// Send a token to acquire; receive to release.
+	// Non-parallel tests do not require a token.
+	parallelSem chan token
 }

+// A token is a semaphore token.
+type token struct{}
+
+// newTestContext returns a new testContext with the given parallelism and matcher.
 func newTestContext(maxParallel int, m *matcher) *testContext {
-	return &testContext{
-		match:         m,
-		startParallel: make(chan bool),
-		maxParallel:   maxParallel,
-		running:       1, // Set the count to 1 for the main (sequential) test.
+	tc := &testContext{
+		match:       m,
+		parallelSem: make(chan token, maxParallel),
+	}
+	return tc
+}
+
+// acquireParallel blocks until it can obtain a semaphore token for running a
+// parallel test.
+func (c *testContext) acquireParallel() {
+	c.parallelSem <- token{}
+}
+
+// releaseParallel returns a semaphore token obtained by acquireParallel.
+func (c *testContext) releaseParallel() {
+	select {
+	case <-c.parallelSem:
+	default:
+		panic("testing: internal error: no parallel token to release")
 	}
 }

-func (c *testContext) waitParallel() {
-	c.mu.Lock()
-	if c.running < c.maxParallel {
-		c.running++
-		c.mu.Unlock()
-		return
-	}
-	c.numWaiting++
-	c.mu.Unlock()
-	<-c.startParallel
-}
-
-func (c *testContext) release() {
-	c.mu.Lock()
-	if c.numWaiting == 0 {
-		c.running--
-		c.mu.Unlock()
-		return
-	}
-	c.numWaiting--
-	c.mu.Unlock()
-	c.startParallel <- true // Pick a waiting test to be run.
+// running returns the number of semaphore tokens outstanding.
+func (c *testContext) running() int {
+	return len(c.parallelSem)
 }

 // No one should be using func Main anymore.
@ -2132,9 +2132,9 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT
 			ctx.deadline = deadline
 			t := &T{
 				common: common{
-					signal:  make(chan bool, 1),
-					barrier: make(chan bool),
-					w:       os.Stdout,
+					doneOrParallel: make(chan struct{}),
+					runParallel:    make(chan struct{}),
+					w:              os.Stdout,
 				},
 				context: ctx,
 			}
@ -2147,12 +2147,12 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT
 				}
 			})
 			select {
-			case <-t.signal:
+			case <-t.doneOrParallel:
 			default:
-				panic("internal error: tRunner exited without sending on t.signal")
+				panic("internal error: tRunner exited without closing t.doneOrParallel")
 			}
 			ok = ok && !t.Failed()
-			ran = ran || t.ran
+			ran = ran || t.ranAnyLeaf
 		}
 	}
 	return ran, ok
@ -2390,5 +2390,5 @@ func parseCpuList() {
 }

 func shouldFailFast() bool {
-	return *failFast && numFailed.Load() > 0
+	return *failFast && anyFailed.Load()
 }