diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 230e78b000..5b699cb298 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -349,9 +349,6 @@ func (c *gcControllerState) init(gcPercent int32) { kp: 0.9, ti: 4.0, - // An update is done once per GC cycle. - period: 1, - // Set a high reset time in GC cycles. // This is inversely proportional to the rate at which we // accumulate error from clipping. By making this very high @@ -677,8 +674,9 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa (float64(scanWork) * (1 - utilization)) // Update cons/mark controller. + // Period for this is 1 GC cycle. oldConsMark := c.consMark - c.consMark = c.consMarkController.next(c.consMark, currentConsMark) + c.consMark = c.consMarkController.next(c.consMark, currentConsMark, 1.0) if debug.gcpacertrace > 0 { printlock() @@ -1259,10 +1257,7 @@ func readGOGC() int32 { type piController struct { kp float64 // Proportional constant. ti float64 // Integral time constant. - tt float64 // Reset time in GC cyles. - - // Period in GC cycles between updates. - period float64 + tt float64 // Reset time. min, max float64 // Output boundaries. @@ -1271,7 +1266,7 @@ type piController struct { errIntegral float64 // Integral of the error from t=0 to now. } -func (c *piController) next(input, setpoint float64) float64 { +func (c *piController) next(input, setpoint, period float64) float64 { // Compute the raw output value. prop := c.kp * (setpoint - input) rawOutput := prop + c.errIntegral @@ -1286,7 +1281,7 @@ func (c *piController) next(input, setpoint float64) float64 { // Update the controller's state. if c.ti != 0 && c.tt != 0 { - c.errIntegral += (c.kp*c.period/c.ti)*(setpoint-input) + (c.period/c.tt)*(output-rawOutput) + c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput) } return output } diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 72ec81e5e3..a2a88e94d2 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -270,35 +270,85 @@ func bgscavenge(c chan int) { c <- 1 goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) - // Exponentially-weighted moving average of the fraction of time this - // goroutine spends scavenging (that is, percent of a single CPU). - // It represents a measure of scheduling overheads which might extend - // the sleep or the critical time beyond what's expected. Assume no - // overhead to begin with. + // idealFraction is the ideal % of overall application CPU time that we + // spend scavenging. // - // TODO(mknyszek): Consider making this based on total CPU time of the - // application (i.e. scavengePercent * GOMAXPROCS). This isn't really - // feasible now because the scavenger acquires the heap lock over the - // scavenging operation, which means scavenging effectively blocks - // allocators and isn't scalable. However, given a scalable allocator, - // it makes sense to also make the scavenger scale with it; if you're - // allocating more frequently, then presumably you're also generating - // more work for the scavenger. - const idealFraction = scavengePercent / 100.0 - scavengeEWMA := float64(idealFraction) + // TODO(mknyszek): Currently this is percent of one CPU (hence the division + // by gomaxprocs), but ideally this should be 1% of overall CPU time. + // Given a scalable memory allocator, it makes sense that the scavenger + // should scale with it; if you're allocating more frequently, then presumably + // you're also generating more work from the scavenger. + idealFraction := float64(scavengePercent) / 100.0 / float64(gomaxprocs) + // Input: fraction of CPU time used. + // Setpoint: idealFraction. + // Output: ratio of critical time to sleep time (determines sleep time). + // + // The output of this controller is somewhat indirect to what we actually + // want to achieve: how much time to sleep for. The reason for this definition + // is to ensure that the controller's outputs have a direct relationship with + // its inputs (as opposed to an inverse relationship), making it somewhat + // easier to reason about for tuning purposes. + critSleepController := piController{ + // Tuned loosely via Ziegler-Nichols process. + kp: 0.3375, + ti: 3.2e6, + tt: 1e9, // 1 second reset time. + + // These ranges seem wide, but we want to give the controller plenty of + // room to hunt for the optimal value. + min: 0.001, // 1:1000 + max: 1000.0, // 1000:1 + } + // It doesn't really matter what value we start at, but we can't be zero, because + // that'll cause divide-by-zero issues. + critSleepRatio := 0.001 for { released := uintptr(0) crit := float64(0) - // If background scavenging is disabled or if there's no work to do just park. - retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal) - if retained > goal { - // Scavenge one page, and measure the amount of time spent scavenging. + // Spend at least 1 ms scavenging, otherwise the corresponding + // sleep time to maintain our desired utilization is too low to + // be reliable. + const minCritTime = 1e6 + for crit < minCritTime { + // If background scavenging is disabled or if there's no work to do just park. + retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal) + if retained <= goal { + break + } + + // scavengeQuantum is the amount of memory we try to scavenge + // in one go. A smaller value means the scavenger is more responsive + // to the scheduler in case of e.g. preemption. A larger value means + // that the overheads of scavenging are better amortized, so better + // scavenging throughput. + // + // The current value is chosen assuming a cost of ~10µs/physical page + // (this is somewhat pessimistic), which implies a worst-case latency of + // about 160µs for 4 KiB physical pages. The current value is biased + // toward latency over throughput. + const scavengeQuantum = 64 << 10 + + // Accumulate the amount of time spent scavenging. start := nanotime() - released = mheap_.pages.scavenge(physPageSize) + released = mheap_.pages.scavenge(scavengeQuantum) atomic.Xadduintptr(&mheap_.pages.scav.released, released) - crit = float64(nanotime() - start) + end := nanotime() + + // On some platforms we may see end >= start if the time it takes to scavenge + // memory is less than the minimum granularity of its clock (e.g. Windows) or + // due to clock bugs. + // + // In this case, just assume scavenging takes 10 µs per regular physical page + // (determined empirically), and conservatively ignore the impact of huge pages + // on timing. + const approxCritNSPerPhysicalPage = 10e3 + if end <= start { + crit += approxCritNSPerPhysicalPage * float64(released/physPageSize) + } else { + crit += float64(end - start) + } } if released == 0 { @@ -316,18 +366,13 @@ func bgscavenge(c chan int) { throw("released less than one physical page of memory") } - // On some platforms we may see crit as zero if the time it takes to scavenge - // memory is less than the minimum granularity of its clock (e.g. Windows). - // In this case, just assume scavenging takes 10 µs per regular physical page - // (determined empirically), and conservatively ignore the impact of huge pages - // on timing. - // - // We shouldn't ever see a crit value less than zero unless there's a bug of - // some kind, either on our side or in the platform we're running on, but be - // defensive in that case as well. - const approxCritNSPerPhysicalPage = 10e3 - if crit <= 0 { - crit = approxCritNSPerPhysicalPage * float64(released/physPageSize) + if crit < minCritTime { + // This means there wasn't enough work to actually fill up minCritTime. + // That's fine; we shouldn't try to do anything with this information + // because it's going result in a short enough sleep request that things + // will get messy. Just assume we did at least this much work. + // All this means is that we'll sleep longer than we otherwise would have. + crit = minCritTime } // Multiply the critical time by 1 + the ratio of the costs of using @@ -338,41 +383,19 @@ func bgscavenge(c chan int) { // because of the additional overheads of using scavenged memory. crit *= 1 + scavengeCostRatio - // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone - // put their machine to sleep) in the critical section, bound the time we use to - // calculate at 10 ms to avoid letting the sleep time get arbitrarily high. - const maxCrit = 10e6 - if crit > maxCrit { - crit = maxCrit - } + // Go to sleep for our current sleepNS. + slept := scavengeSleep(int64(crit / critSleepRatio)) - // Compute the amount of time to sleep, assuming we want to use at most - // scavengePercent of CPU time. Take into account scheduling overheads - // that may extend the length of our sleep by multiplying by how far - // off we are from the ideal ratio. For example, if we're sleeping too - // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time - // down. - adjust := scavengeEWMA / idealFraction - sleepTime := int64(adjust * crit / (scavengePercent / 100.0)) + // Calculate the CPU time spent. + // + // This may be slightly inaccurate with respect to GOMAXPROCS, but we're + // recomputing this often enough relative to GOMAXPROCS changes in general + // (it only changes when the world is stopped, and not during a GC) that + // that small inaccuracy is in the noise. + cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs)) - // Go to sleep. - slept := scavengeSleep(sleepTime) - - // Compute the new ratio. - fraction := crit / (crit + float64(slept)) - - // Set a lower bound on the fraction. - // Due to OS-related anomalies we may "sleep" for an inordinate amount - // of time. Let's avoid letting the ratio get out of hand by bounding - // the sleep time we use in our EWMA. - const minFraction = 1.0 / 1000.0 - if fraction < minFraction { - fraction = minFraction - } - - // Update scavengeEWMA by merging in the new crit/slept ratio. - const alpha = 0.5 - scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA + // Update the critSleepRatio, adjusting until we reach our ideal fraction. + critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit) } }