diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 92581ef94d..8f2f9278ec 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1515,7 +1515,7 @@ func gcMark(start_time int64) { work.finalizersDone = true for i := 0; i < int(gomaxprocs); i++ { - if allp[i].gcw.wbuf != 0 { + if !allp[i].gcw.empty() { throw("P has cached GC work at end of mark termination") } } diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index b05b06a1de..43d5db2fab 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -8,7 +8,7 @@ import "unsafe" const ( _Debugwbufs = false // if true check wbufs consistency - _WorkbufSize = 4096 // in bytes; larger values result in less contention + _WorkbufSize = 2048 // in bytes; larger values result in less contention ) // Garbage collector work pool abstraction. @@ -60,8 +60,25 @@ func (wp wbufptr) ptr() *workbuf { // gcWork may locally hold GC work buffers. This can be done by // disabling preemption (systemstack or acquirem). type gcWork struct { - // Invariant: wbuf is never full or empty - wbuf wbufptr + // wbuf1 and wbuf2 are the primary and secondary work buffers. + // + // This can be thought of as a stack of both work buffers' + // pointers concatenated. When we pop the last pointer, we + // shift the stack up by one work buffer by bringing in a new + // full buffer and discarding an empty one. When we fill both + // buffers, we shift the stack down by one work buffer by + // bringing in a new empty buffer and discarding a full one. + // This way we have one buffer's worth of hysteresis, which + // amortizes the cost of getting or putting a work buffer over + // at least one buffer of work and reduces contention on the + // global work lists. + // + // wbuf1 is always the buffer we're currently pushing to and + // popping from and wbuf2 is the buffer that will be discarded + // next. + // + // Invariant: Both wbuf1 and wbuf2 are nil or neither are. + wbuf1, wbuf2 wbufptr // Bytes marked (blackened) on this gcWork. This is aggregated // into work.bytesMarked by dispose. @@ -72,25 +89,38 @@ type gcWork struct { scanWork int64 } +func (w *gcWork) init() { + w.wbuf1 = wbufptrOf(getempty(101)) + wbuf2 := trygetfull(102) + if wbuf2 == nil { + wbuf2 = getempty(103) + } + w.wbuf2 = wbufptrOf(wbuf2) +} + // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object. //go:nowritebarrier func (ww *gcWork) put(obj uintptr) { w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed - wbuf := w.wbuf.ptr() + wbuf := w.wbuf1.ptr() if wbuf == nil { - wbuf = getempty(42) - w.wbuf = wbufptrOf(wbuf) + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } else if wbuf.nobj == len(wbuf.obj) { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == len(wbuf.obj) { + putfull(wbuf, 132) + wbuf = getempty(133) + w.wbuf1 = wbufptrOf(wbuf) + } } wbuf.obj[wbuf.nobj] = obj wbuf.nobj++ - - if wbuf.nobj == len(wbuf.obj) { - putfull(wbuf, 50) - w.wbuf = 0 - } } // tryGet dequeues a pointer for the garbage collector to trace. @@ -102,24 +132,28 @@ func (ww *gcWork) put(obj uintptr) { func (ww *gcWork) tryGet() uintptr { w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed - wbuf := w.wbuf.ptr() + wbuf := w.wbuf1.ptr() if wbuf == nil { - wbuf = trygetfull(74) - if wbuf == nil { - return 0 + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } + if wbuf.nobj == 0 { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == 0 { + owbuf := wbuf + wbuf = trygetfull(167) + if wbuf == nil { + return 0 + } + putempty(owbuf, 166) + w.wbuf1 = wbufptrOf(wbuf) } - w.wbuf = wbufptrOf(wbuf) } wbuf.nobj-- - obj := wbuf.obj[wbuf.nobj] - - if wbuf.nobj == 0 { - putempty(wbuf, 86) - w.wbuf = 0 - } - - return obj + return wbuf.obj[wbuf.nobj] } // get dequeues a pointer for the garbage collector to trace, blocking @@ -129,27 +163,30 @@ func (ww *gcWork) tryGet() uintptr { func (ww *gcWork) get() uintptr { w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed - wbuf := w.wbuf.ptr() + wbuf := w.wbuf1.ptr() if wbuf == nil { - wbuf = getfull(103) - if wbuf == nil { - return 0 + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } + if wbuf.nobj == 0 { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == 0 { + owbuf := wbuf + wbuf = getfull(185) + if wbuf == nil { + return 0 + } + putempty(owbuf, 184) + w.wbuf1 = wbufptrOf(wbuf) } - wbuf.checknonempty() - w.wbuf = wbufptrOf(wbuf) } // TODO: This might be a good place to add prefetch code wbuf.nobj-- - obj := wbuf.obj[wbuf.nobj] - - if wbuf.nobj == 0 { - putempty(wbuf, 115) - w.wbuf = 0 - } - - return obj + return wbuf.obj[wbuf.nobj] } // dispose returns any cached pointers to the global queue. @@ -160,12 +197,21 @@ func (ww *gcWork) get() uintptr { // //go:nowritebarrier func (w *gcWork) dispose() { - if wbuf := w.wbuf; wbuf != 0 { - if wbuf.ptr().nobj == 0 { - throw("dispose: workbuf is empty") + if wbuf := w.wbuf1.ptr(); wbuf != nil { + if wbuf.nobj == 0 { + putempty(wbuf, 212) + } else { + putfull(wbuf, 214) } - putfull(wbuf.ptr(), 166) - w.wbuf = 0 + w.wbuf1 = 0 + + wbuf = w.wbuf2.ptr() + if wbuf.nobj == 0 { + putempty(wbuf, 218) + } else { + putfull(wbuf, 220) + } + w.wbuf2 = 0 } if w.bytesMarked != 0 { // dispose happens relatively infrequently. If this @@ -185,16 +231,21 @@ func (w *gcWork) dispose() { // global queue. //go:nowritebarrier func (w *gcWork) balance() { - if wbuf := w.wbuf; wbuf != 0 && wbuf.ptr().nobj > 4 { - w.wbuf = wbufptrOf(handoff(wbuf.ptr())) + if w.wbuf1 == 0 { + return + } + if wbuf := w.wbuf2.ptr(); wbuf.nobj != 0 { + putfull(wbuf, 246) + w.wbuf2 = wbufptrOf(getempty(247)) + } else if wbuf := w.wbuf1.ptr(); wbuf.nobj > 4 { + w.wbuf1 = wbufptrOf(handoff(wbuf)) } } // empty returns true if w has no mark work available. //go:nowritebarrier func (w *gcWork) empty() bool { - wbuf := w.wbuf - return wbuf == 0 || wbuf.ptr().nobj == 0 + return w.wbuf1 == 0 || (w.wbuf1.ptr().nobj == 0 && w.wbuf2.ptr().nobj == 0) } // Internally, the GC work pool is kept in arrays in work buffers.