mirror of https://github.com/golang/go.git
runtime: faster GC sweep phase
benchmark old ns/op new ns/op delta garbage.BenchmarkParser 3731065750 3715543750 -0.41% garbage.BenchmarkParser-2 3631299750 3495248500 -3.75% garbage.BenchmarkParser-4 3386486000 3339353000 -1.39% garbage.BenchmarkParser-8 3267632000 3286422500 +0.58% garbage.BenchmarkParser-16 3299203000 3316081750 +0.51% garbage.BenchmarkTree 977532888 919453833 -5.94% garbage.BenchmarkTree-2 919948555 853478000 -7.23% garbage.BenchmarkTree-4 841329000 790207000 -6.08% garbage.BenchmarkTree-8 787792777 740380666 -6.01% garbage.BenchmarkTree-16 899257166 846594555 -5.86% garbage.BenchmarkTree2 574876300 571885800 -0.52% garbage.BenchmarkTree2-2 348162700 345888900 -0.65% garbage.BenchmarkTree2-4 184912500 179137000 -3.22% garbage.BenchmarkTree2-8 104243900 103485600 -0.73% garbage.BenchmarkTree2-16 97269500 85137100 -14.25% garbage.BenchmarkParserPause 141101976 157746974 +11.80% garbage.BenchmarkParserPause-2 103096051 83043048 -19.45% garbage.BenchmarkParserPause-4 52153133 45951111 -11.89% garbage.BenchmarkParserPause-8 36730190 38901024 +5.91% garbage.BenchmarkParserPause-16 32678875 29578585 -9.49% garbage.BenchmarkTreePause 29487065 29648439 +0.55% garbage.BenchmarkTreePause-2 22443494 21306159 -5.07% garbage.BenchmarkTreePause-4 15799691 14985647 -5.15% garbage.BenchmarkTreePause-8 10768112 9531420 -12.97% garbage.BenchmarkTreePause-16 16329891 15205158 -6.89% garbage.BenchmarkTree2Pause 2586957240 2577533200 -0.36% garbage.BenchmarkTree2Pause-2 1683383760 1673923800 -0.56% garbage.BenchmarkTree2Pause-4 1102860320 1074040280 -2.68% garbage.BenchmarkTree2Pause-8 902627920 886122400 -1.86% garbage.BenchmarkTree2Pause-16 856470920 804152320 -6.50% garbage.BenchmarkParserLastPause 277316000 280839000 +1.25% garbage.BenchmarkParserLastPause-2 179446000 163687000 -8.78% garbage.BenchmarkParserLastPause-4 106752000 94144000 -11.81% garbage.BenchmarkParserLastPause-8 57758000 61640000 +6.72% garbage.BenchmarkParserLastPause-16 51235000 42552000 -16.95% garbage.BenchmarkTreeLastPause 45244000 50786000 +12.25% garbage.BenchmarkTreeLastPause-2 37163000 34654000 -6.75% garbage.BenchmarkTreeLastPause-4 24178000 21967000 -9.14% garbage.BenchmarkTreeLastPause-8 20390000 15648000 -30.30% garbage.BenchmarkTreeLastPause-16 22398000 20180000 -9.90% garbage.BenchmarkTree2LastPause 5748706000 5718809000 -0.52% garbage.BenchmarkTree2LastPause-2 3481570000 3458844000 -0.65% garbage.BenchmarkTree2LastPause-4 1849073000 1791330000 -3.22% garbage.BenchmarkTree2LastPause-8 1042375000 1034811000 -0.73% garbage.BenchmarkTree2LastPause-16 972637000 851323000 -14.25% There is also visible improvement in consumed CPU time: tree2 -heapsize=8000000000 -cpus=12 before: 248.74user 6.36system 0:52.74elapsed 483%CPU after: 229.86user 6.33system 0:51.08elapsed 462%CPU -1.66s of real time, but -18.91s of consumed CPU time R=golang-dev CC=golang-dev https://golang.org/cl/6215065
This commit is contained in:
parent
581e7c2a78
commit
845aa1fc2c
|
|
@ -120,10 +120,9 @@ static struct {
|
|||
uint32 nproc;
|
||||
volatile uint32 nwait;
|
||||
volatile uint32 ndone;
|
||||
volatile uint32 debugmarkdone;
|
||||
Note alldone;
|
||||
Lock markgate;
|
||||
Lock sweepgate;
|
||||
uint32 spanidx;
|
||||
ParFor *sweepfor;
|
||||
|
||||
Lock;
|
||||
byte *chunk;
|
||||
|
|
@ -720,40 +719,10 @@ handlespecial(byte *p, uintptr size)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void sweepspan(MSpan *s);
|
||||
|
||||
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
|
||||
// It clears the mark bits in preparation for the next GC round.
|
||||
static void
|
||||
sweep(void)
|
||||
{
|
||||
MSpan *s, **allspans;
|
||||
int64 now;
|
||||
uint32 spanidx, nspan;
|
||||
|
||||
now = runtime·nanotime();
|
||||
nspan = runtime·mheap.nspan;
|
||||
allspans = runtime·mheap.allspans;
|
||||
for(;;) {
|
||||
spanidx = runtime·xadd(&work.spanidx, 1) - 1;
|
||||
if(spanidx >= nspan)
|
||||
break;
|
||||
s = allspans[spanidx];
|
||||
|
||||
// Stamp newly unused spans. The scavenger will use that
|
||||
// info to potentially give back some pages to the OS.
|
||||
if(s->state == MSpanFree && s->unusedsince == 0)
|
||||
s->unusedsince = now;
|
||||
|
||||
if(s->state != MSpanInUse)
|
||||
continue;
|
||||
|
||||
sweepspan(s);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sweepspan(MSpan *s)
|
||||
sweepspan(ParFor *desc, uint32 idx)
|
||||
{
|
||||
int32 cl, n, npages;
|
||||
uintptr size;
|
||||
|
|
@ -762,7 +731,16 @@ sweepspan(MSpan *s)
|
|||
byte *arena_start;
|
||||
MLink *start, *end;
|
||||
int32 nfree;
|
||||
MSpan *s;
|
||||
|
||||
USED(&desc);
|
||||
s = runtime·mheap.allspans[idx];
|
||||
// Stamp newly unused spans. The scavenger will use that
|
||||
// info to potentially give back some pages to the OS.
|
||||
if(s->state == MSpanFree && s->unusedsince == 0)
|
||||
s->unusedsince = runtime·nanotime();
|
||||
if(s->state != MSpanInUse)
|
||||
return;
|
||||
arena_start = runtime·mheap.arena_start;
|
||||
p = (byte*)(s->start << PageShift);
|
||||
cl = s->sizeclass;
|
||||
|
|
@ -847,16 +825,15 @@ sweepspan(MSpan *s)
|
|||
void
|
||||
runtime·gchelper(void)
|
||||
{
|
||||
// Wait until main proc is ready for mark help.
|
||||
runtime·lock(&work.markgate);
|
||||
runtime·unlock(&work.markgate);
|
||||
scanblock(nil, 0);
|
||||
|
||||
// Wait until main proc is ready for sweep help.
|
||||
runtime·lock(&work.sweepgate);
|
||||
runtime·unlock(&work.sweepgate);
|
||||
sweep();
|
||||
if(DebugMark) {
|
||||
// wait while the main thread executes mark(debug_scanblock)
|
||||
while(runtime·atomicload(&work.debugmarkdone) == 0)
|
||||
runtime·usleep(10);
|
||||
}
|
||||
|
||||
runtime·parfordo(work.sweepfor);
|
||||
if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
|
||||
runtime·notewakeup(&work.alldone);
|
||||
}
|
||||
|
|
@ -972,33 +949,38 @@ runtime·gc(int32 force)
|
|||
obj0 = mstats.nmalloc - mstats.nfree;
|
||||
}
|
||||
|
||||
runtime·lock(&work.markgate);
|
||||
runtime·lock(&work.sweepgate);
|
||||
|
||||
work.nwait = 0;
|
||||
work.ndone = 0;
|
||||
work.debugmarkdone = 0;
|
||||
work.nproc = runtime·gcprocs();
|
||||
if(work.sweepfor == nil)
|
||||
work.sweepfor = runtime·parforalloc(MaxGcproc);
|
||||
runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
|
||||
if(work.nproc > 1) {
|
||||
runtime·noteclear(&work.alldone);
|
||||
runtime·helpgc(work.nproc);
|
||||
}
|
||||
work.nwait = 0;
|
||||
work.ndone = 0;
|
||||
|
||||
runtime·unlock(&work.markgate); // let the helpers in
|
||||
mark(scanblock);
|
||||
if(DebugMark)
|
||||
if(DebugMark) {
|
||||
mark(debug_scanblock);
|
||||
runtime·atomicstore(&work.debugmarkdone, 1);
|
||||
}
|
||||
t1 = runtime·nanotime();
|
||||
|
||||
work.spanidx = 0;
|
||||
runtime·unlock(&work.sweepgate); // let the helpers in
|
||||
sweep();
|
||||
if(work.nproc > 1)
|
||||
runtime·notesleep(&work.alldone);
|
||||
runtime·parfordo(work.sweepfor);
|
||||
t2 = runtime·nanotime();
|
||||
|
||||
stealcache();
|
||||
cachestats(&stats);
|
||||
|
||||
if(work.nproc > 1)
|
||||
runtime·notesleep(&work.alldone);
|
||||
|
||||
stats.nprocyield += work.sweepfor->nprocyield;
|
||||
stats.nosyield += work.sweepfor->nosyield;
|
||||
stats.nsleep += work.sweepfor->nsleep;
|
||||
|
||||
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
|
||||
m->gcing = 0;
|
||||
|
||||
|
|
@ -1027,20 +1009,21 @@ runtime·gc(int32 force)
|
|||
|
||||
if(gctrace) {
|
||||
runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
|
||||
" %D(%D) handoff, %D/%D/%D yields\n",
|
||||
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
|
||||
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
|
||||
heap0>>20, heap1>>20, obj0, obj1,
|
||||
mstats.nmalloc, mstats.nfree,
|
||||
stats.nhandoff, stats.nhandoffcnt,
|
||||
work.sweepfor->nsteal, work.sweepfor->nstealcnt,
|
||||
stats.nprocyield, stats.nosyield, stats.nsleep);
|
||||
}
|
||||
|
||||
|
||||
runtime·MProf_GC();
|
||||
runtime·semrelease(&runtime·worldsema);
|
||||
runtime·starttheworld();
|
||||
|
||||
// give the queued finalizers, if any, a chance to run
|
||||
if(finq != nil)
|
||||
// give the queued finalizers, if any, a chance to run
|
||||
if(finq != nil)
|
||||
runtime·gosched();
|
||||
|
||||
if(gctrace > 1 && !force)
|
||||
|
|
|
|||
Loading…
Reference in New Issue