diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 5c7328aacc..37c051634c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2841,45 +2841,47 @@ top: if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 { throw("findrunnable: negative nmspinning") } - } - // Check all runqueues once again. - _p_ = checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) - if _p_ != nil { - acquirep(_p_) - if wasSpinning { + // Note the for correctness, only the last M transitioning from + // spinning to non-spinning must perform these rechecks to + // ensure no missed work. We are performing it on every M that + // transitions as a conservative change to monitor effects on + // latency. See golang.org/issue/43997. + + // Check all runqueues once again. + _p_ = checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) + if _p_ != nil { + acquirep(_p_) _g_.m.spinning = true atomic.Xadd(&sched.nmspinning, 1) + goto top } - goto top - } - // Check for idle-priority GC work again. - _p_, gp = checkIdleGCNoP() - if _p_ != nil { - acquirep(_p_) - if wasSpinning { + // Check for idle-priority GC work again. + _p_, gp = checkIdleGCNoP() + if _p_ != nil { + acquirep(_p_) _g_.m.spinning = true atomic.Xadd(&sched.nmspinning, 1) + + // Run the idle worker. + _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false } - // Run the idle worker. - _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode - casgstatus(gp, _Gwaiting, _Grunnable) - if trace.enabled { - traceGoUnpark(gp, 0) - } - return gp, false + // Finally, check for timer creation or expiry concurrently with + // transitioning from spinning to non-spinning. + // + // Note that we cannot use checkTimers here because it calls + // adjusttimers which may need to allocate memory, and that isn't + // allowed when we don't have an active P. + pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) } - // Finally, check for timer creation or expiry concurrently with - // transitioning from spinning to non-spinning. - // - // Note that we cannot use checkTimers here because it calls - // adjusttimers which may need to allocate memory, and that isn't - // allowed when we don't have an active P. - pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) - // Poll network until next timer. if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 { atomic.Store64(&sched.pollUntil, uint64(pollUntil)) diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index 767bde15b4..01152dff76 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -692,6 +692,55 @@ func BenchmarkCreateGoroutinesCapture(b *testing.B) { } } +// warmupScheduler ensures the scheduler has at least targetThreadCount threads +// in its thread pool. +func warmupScheduler(targetThreadCount int) { + var wg sync.WaitGroup + var count int32 + for i := 0; i < targetThreadCount; i++ { + wg.Add(1) + go func() { + atomic.AddInt32(&count, 1) + for atomic.LoadInt32(&count) < int32(targetThreadCount) { + // spin until all threads started + } + + // spin a bit more to ensure they are all running on separate CPUs. + doWork(time.Millisecond) + wg.Done() + }() + } + wg.Wait() +} + +func doWork(dur time.Duration) { + start := time.Now() + for time.Since(start) < dur { + } +} + +// BenchmarkCreateGoroutinesSingle creates many goroutines, all from a single +// producer (the main benchmark goroutine). +// +// Compared to BenchmarkCreateGoroutines, this causes different behavior in the +// scheduler because Ms are much more likely to need to steal work from the +// main P rather than having work in the local run queue. +func BenchmarkCreateGoroutinesSingle(b *testing.B) { + // Since we are interested in stealing behavior, warm the scheduler to + // get all the Ps running first. + warmupScheduler(runtime.GOMAXPROCS(0)) + b.ResetTimer() + + var wg sync.WaitGroup + wg.Add(b.N) + for i := 0; i < b.N; i++ { + go func(){ + wg.Done() + }() + } + wg.Wait() +} + func BenchmarkClosureCall(b *testing.B) { sum := 0 off1 := 1