mirror of https://github.com/golang/go.git
runtime: use only treaps for tracking spans
Currently, mheap tracks spans in both mSpanLists and mTreaps, but mSpanLists, while they tend to be smaller, complicate the implementation. Here we simplify the implementation by removing free and busy from mheap and renaming freelarge -> free and busylarge -> busy. This change also slightly changes the reclamation policy. Previously, for allocations under 1MB we would attempt to find a small span of the right size. Now, we just try to find any number of spans totaling the right size. This may increase heap fragmentation, but that will be dealt with using virtual memory tricks in follow-up CLs. For #14045. Garbage-heavy benchmarks show very little change, except what appears to be a decrease in STW times and peak RSS. name old STW-ns/GC new STW-ns/GC delta Garbage/benchmem-MB=64-8 263k ±64% 217k ±24% -17.66% (p=0.028 n=25+23) name old STW-ns/op new STW-ns/op delta Garbage/benchmem-MB=64-8 9.39k ±65% 7.80k ±24% -16.88% (p=0.037 n=25+23) name old peak-RSS-bytes new peak-RSS-bytes delta Garbage/benchmem-MB=64-8 281M ± 0% 249M ± 4% -11.40% (p=0.000 n=19+18) https://perf.golang.org/search?q=upload:20181005.1 Go1 benchmarks perform roughly the same, the most notable regression being the JSON encode/decode benchmark with worsens by ~2%. name old time/op new time/op delta BinaryTree17-8 3.02s ± 2% 2.99s ± 2% -1.18% (p=0.000 n=25+24) Fannkuch11-8 3.05s ± 1% 3.02s ± 2% -1.20% (p=0.000 n=25+25) FmtFprintfEmpty-8 43.6ns ± 5% 43.4ns ± 3% ~ (p=0.528 n=25+25) FmtFprintfString-8 74.9ns ± 3% 73.4ns ± 1% -2.03% (p=0.001 n=25+24) FmtFprintfInt-8 79.3ns ± 3% 77.9ns ± 1% -1.73% (p=0.003 n=25+25) FmtFprintfIntInt-8 119ns ± 6% 116ns ± 0% -2.68% (p=0.000 n=25+18) FmtFprintfPrefixedInt-8 134ns ± 4% 132ns ± 1% -1.52% (p=0.004 n=25+25) FmtFprintfFloat-8 240ns ± 1% 241ns ± 1% ~ (p=0.403 n=24+23) FmtManyArgs-8 543ns ± 1% 537ns ± 1% -1.00% (p=0.000 n=25+25) GobDecode-8 6.88ms ± 1% 6.92ms ± 4% ~ (p=0.088 n=24+22) GobEncode-8 5.92ms ± 1% 5.93ms ± 1% ~ (p=0.898 n=25+24) Gzip-8 267ms ± 2% 266ms ± 2% ~ (p=0.213 n=25+24) Gunzip-8 35.4ms ± 1% 35.6ms ± 1% +0.70% (p=0.000 n=25+25) HTTPClientServer-8 104µs ± 2% 104µs ± 2% ~ (p=0.686 n=25+25) JSONEncode-8 9.67ms ± 1% 9.80ms ± 4% +1.32% (p=0.000 n=25+25) JSONDecode-8 47.7ms ± 1% 48.8ms ± 5% +2.33% (p=0.000 n=25+25) Mandelbrot200-8 4.87ms ± 1% 4.91ms ± 1% +0.79% (p=0.000 n=25+25) GoParse-8 3.59ms ± 4% 3.55ms ± 1% ~ (p=0.199 n=25+24) RegexpMatchEasy0_32-8 90.3ns ± 1% 89.9ns ± 1% -0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 204ns ± 1% 204ns ± 1% ~ (p=0.914 n=25+24) RegexpMatchEasy1_32-8 84.9ns ± 0% 84.6ns ± 1% -0.36% (p=0.000 n=24+25) RegexpMatchEasy1_1K-8 350ns ± 1% 348ns ± 3% -0.59% (p=0.007 n=25+25) RegexpMatchMedium_32-8 122ns ± 1% 121ns ± 0% -1.08% (p=0.000 n=25+18) RegexpMatchMedium_1K-8 36.1µs ± 1% 34.6µs ± 1% -4.02% (p=0.000 n=25+25) RegexpMatchHard_32-8 1.69µs ± 2% 1.65µs ± 1% -2.38% (p=0.000 n=25+25) RegexpMatchHard_1K-8 50.8µs ± 1% 49.4µs ± 1% -2.69% (p=0.000 n=25+24) Revcomp-8 453ms ± 2% 449ms ± 3% -0.74% (p=0.022 n=25+24) Template-8 63.2ms ± 2% 63.4ms ± 1% ~ (p=0.127 n=25+24) TimeParse-8 313ns ± 1% 315ns ± 3% ~ (p=0.924 n=24+25) TimeFormat-8 294ns ± 1% 292ns ± 2% -0.65% (p=0.004 n=23+24) [Geo mean] 49.9µs 49.6µs -0.65% name old speed new speed delta GobDecode-8 112MB/s ± 1% 110MB/s ± 4% -1.00% (p=0.036 n=24+24) GobEncode-8 130MB/s ± 1% 129MB/s ± 1% ~ (p=0.894 n=25+24) Gzip-8 72.7MB/s ± 2% 73.0MB/s ± 2% ~ (p=0.208 n=25+24) Gunzip-8 549MB/s ± 1% 545MB/s ± 1% -0.70% (p=0.000 n=25+25) JSONEncode-8 201MB/s ± 1% 198MB/s ± 3% -1.29% (p=0.000 n=25+25) JSONDecode-8 40.7MB/s ± 1% 39.8MB/s ± 5% -2.23% (p=0.000 n=25+25) GoParse-8 16.2MB/s ± 4% 16.3MB/s ± 1% ~ (p=0.211 n=25+24) RegexpMatchEasy0_32-8 354MB/s ± 1% 356MB/s ± 1% +0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 5.00GB/s ± 0% 4.99GB/s ± 1% ~ (p=0.588 n=24+24) RegexpMatchEasy1_32-8 377MB/s ± 1% 378MB/s ± 1% +0.39% (p=0.000 n=25+25) RegexpMatchEasy1_1K-8 2.92GB/s ± 1% 2.94GB/s ± 3% +0.65% (p=0.008 n=25+25) RegexpMatchMedium_32-8 8.14MB/s ± 1% 8.22MB/s ± 1% +0.98% (p=0.000 n=25+24) RegexpMatchMedium_1K-8 28.4MB/s ± 1% 29.6MB/s ± 1% +4.19% (p=0.000 n=25+25) RegexpMatchHard_32-8 18.9MB/s ± 2% 19.4MB/s ± 1% +2.43% (p=0.000 n=25+25) RegexpMatchHard_1K-8 20.2MB/s ± 1% 20.7MB/s ± 1% +2.76% (p=0.000 n=25+24) Revcomp-8 561MB/s ± 2% 566MB/s ± 3% +0.75% (p=0.021 n=25+24) Template-8 30.7MB/s ± 2% 30.6MB/s ± 1% ~ (p=0.131 n=25+24) [Geo mean] 120MB/s 121MB/s +0.48% https://perf.golang.org/search?q=upload:20181004.6 Change-Id: I97f9fee34577961a116a8ddd445c6272253f0f95 Reviewed-on: https://go-review.googlesource.com/c/139837 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
e508a5f072
commit
07e738ec32
|
|
@ -136,8 +136,7 @@ const (
|
||||||
_TinySize = 16
|
_TinySize = 16
|
||||||
_TinySizeClass = int8(2)
|
_TinySizeClass = int8(2)
|
||||||
|
|
||||||
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
||||||
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
|
|
||||||
|
|
||||||
// Per-P, per order stack segment cache size.
|
// Per-P, per order stack segment cache size.
|
||||||
_StackCacheSize = 32 * 1024
|
_StackCacheSize = 32 * 1024
|
||||||
|
|
|
||||||
|
|
@ -30,13 +30,11 @@ const minPhysPageSize = 4096
|
||||||
//go:notinheap
|
//go:notinheap
|
||||||
type mheap struct {
|
type mheap struct {
|
||||||
lock mutex
|
lock mutex
|
||||||
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
|
free mTreap // free treap of spans
|
||||||
freelarge mTreap // free treap of length >= _MaxMHeapList
|
busy mSpanList // busy list of spans
|
||||||
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length
|
sweepgen uint32 // sweep generation, see comment in mspan
|
||||||
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList
|
sweepdone uint32 // all spans are swept
|
||||||
sweepgen uint32 // sweep generation, see comment in mspan
|
sweepers uint32 // number of active sweepone calls
|
||||||
sweepdone uint32 // all spans are swept
|
|
||||||
sweepers uint32 // number of active sweepone calls
|
|
||||||
|
|
||||||
// allspans is a slice of all mspans ever created. Each mspan
|
// allspans is a slice of all mspans ever created. Each mspan
|
||||||
// appears exactly once.
|
// appears exactly once.
|
||||||
|
|
@ -599,12 +597,7 @@ func (h *mheap) init() {
|
||||||
h.spanalloc.zero = false
|
h.spanalloc.zero = false
|
||||||
|
|
||||||
// h->mapcache needs no init
|
// h->mapcache needs no init
|
||||||
for i := range h.free {
|
h.busy.init()
|
||||||
h.free[i].init()
|
|
||||||
h.busy[i].init()
|
|
||||||
}
|
|
||||||
|
|
||||||
h.busylarge.init()
|
|
||||||
for i := range h.central {
|
for i := range h.central {
|
||||||
h.central[i].mcentral.init(spanClass(i))
|
h.central[i].mcentral.init(spanClass(i))
|
||||||
}
|
}
|
||||||
|
|
@ -647,30 +640,12 @@ retry:
|
||||||
// Sweeps and reclaims at least npage pages into heap.
|
// Sweeps and reclaims at least npage pages into heap.
|
||||||
// Called before allocating npage pages.
|
// Called before allocating npage pages.
|
||||||
func (h *mheap) reclaim(npage uintptr) {
|
func (h *mheap) reclaim(npage uintptr) {
|
||||||
// First try to sweep busy spans with large objects of size >= npage,
|
if h.reclaimList(&h.busy, npage) != 0 {
|
||||||
// this has good chances of reclaiming the necessary space.
|
|
||||||
for i := int(npage); i < len(h.busy); i++ {
|
|
||||||
if h.reclaimList(&h.busy[i], npage) != 0 {
|
|
||||||
return // Bingo!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then -- even larger objects.
|
|
||||||
if h.reclaimList(&h.busylarge, npage) != 0 {
|
|
||||||
return // Bingo!
|
return // Bingo!
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now try smaller objects.
|
|
||||||
// One such object is not enough, so we need to reclaim several of them.
|
|
||||||
reclaimed := uintptr(0)
|
|
||||||
for i := 0; i < int(npage) && i < len(h.busy); i++ {
|
|
||||||
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
|
|
||||||
if reclaimed >= npage {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now sweep everything that is not yet swept.
|
// Now sweep everything that is not yet swept.
|
||||||
|
var reclaimed uintptr
|
||||||
unlock(&h.lock)
|
unlock(&h.lock)
|
||||||
for {
|
for {
|
||||||
n := sweepone()
|
n := sweepone()
|
||||||
|
|
@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
|
||||||
mheap_.nlargealloc++
|
mheap_.nlargealloc++
|
||||||
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
||||||
// Swept spans are at the end of lists.
|
// Swept spans are at the end of lists.
|
||||||
if s.npages < uintptr(len(h.busy)) {
|
h.busy.insertBack(s)
|
||||||
h.busy[s.npages].insertBack(s)
|
|
||||||
} else {
|
|
||||||
h.busylarge.insertBack(s)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// heap_scan and heap_live were updated.
|
// heap_scan and heap_live were updated.
|
||||||
|
|
@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
||||||
// The returned span has been removed from the
|
// The returned span has been removed from the
|
||||||
// free list, but its state is still mSpanFree.
|
// free list, but its state is still mSpanFree.
|
||||||
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
|
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
|
||||||
var list *mSpanList
|
|
||||||
var s *mspan
|
var s *mspan
|
||||||
|
|
||||||
// Try in fixed-size lists up to max.
|
// Best fit in the treap of spans.
|
||||||
for i := int(npage); i < len(h.free); i++ {
|
s = h.free.remove(npage)
|
||||||
list = &h.free[i]
|
|
||||||
if !list.isEmpty() {
|
|
||||||
s = list.first
|
|
||||||
list.remove(s)
|
|
||||||
goto HaveSpan
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Best fit in list of large spans.
|
|
||||||
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
|
|
||||||
if s == nil {
|
if s == nil {
|
||||||
if !h.grow(npage) {
|
if !h.grow(npage) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
s = h.allocLarge(npage)
|
s = h.free.remove(npage)
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HaveSpan:
|
|
||||||
// Mark span in use.
|
// Mark span in use.
|
||||||
if s.state != mSpanFree {
|
if s.state != mSpanFree {
|
||||||
throw("MHeap_AllocLocked - MSpan not free")
|
throw("MHeap_AllocLocked - MSpan not free")
|
||||||
|
|
@ -933,21 +893,6 @@ HaveSpan:
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
|
|
||||||
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
|
|
||||||
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
|
|
||||||
// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
|
|
||||||
func (h *mheap) isLargeSpan(npages uintptr) bool {
|
|
||||||
return npages >= uintptr(len(h.free))
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocLarge allocates a span of at least npage pages from the treap of large spans.
|
|
||||||
// Returns nil if no such span currently exists.
|
|
||||||
func (h *mheap) allocLarge(npage uintptr) *mspan {
|
|
||||||
// Search treap for smallest span with >= npage pages.
|
|
||||||
return h.freelarge.remove(npage)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to add at least npage pages of memory to the heap,
|
// Try to add at least npage pages of memory to the heap,
|
||||||
// returning whether it worked.
|
// returning whether it worked.
|
||||||
//
|
//
|
||||||
|
|
@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) {
|
||||||
unlock(&h.lock)
|
unlock(&h.lock)
|
||||||
}
|
}
|
||||||
|
|
||||||
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
|
// s must be on the busy list or unlinked.
|
||||||
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
|
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
|
||||||
switch s.state {
|
switch s.state {
|
||||||
case mSpanManual:
|
case mSpanManual:
|
||||||
|
|
@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||||
}
|
}
|
||||||
s.state = mSpanFree
|
s.state = mSpanFree
|
||||||
if s.inList() {
|
if s.inList() {
|
||||||
h.busyList(s.npages).remove(s)
|
h.busy.remove(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stamp newly unused spans. The scavenger will use that
|
// Stamp newly unused spans. The scavenger will use that
|
||||||
|
|
@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||||
h.setSpan(before.base(), s)
|
h.setSpan(before.base(), s)
|
||||||
// The size is potentially changing so the treap needs to delete adjacent nodes and
|
// The size is potentially changing so the treap needs to delete adjacent nodes and
|
||||||
// insert back as a combined node.
|
// insert back as a combined node.
|
||||||
if h.isLargeSpan(before.npages) {
|
h.free.removeSpan(before)
|
||||||
// We have a t, it is large so it has to be in the treap so we can remove it.
|
|
||||||
h.freelarge.removeSpan(before)
|
|
||||||
} else {
|
|
||||||
h.freeList(before.npages).remove(before)
|
|
||||||
}
|
|
||||||
before.state = mSpanDead
|
before.state = mSpanDead
|
||||||
h.spanalloc.free(unsafe.Pointer(before))
|
h.spanalloc.free(unsafe.Pointer(before))
|
||||||
}
|
}
|
||||||
|
|
@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||||
s.npreleased += after.npreleased
|
s.npreleased += after.npreleased
|
||||||
s.needzero |= after.needzero
|
s.needzero |= after.needzero
|
||||||
h.setSpan(s.base()+s.npages*pageSize-1, s)
|
h.setSpan(s.base()+s.npages*pageSize-1, s)
|
||||||
if h.isLargeSpan(after.npages) {
|
h.free.removeSpan(after)
|
||||||
h.freelarge.removeSpan(after)
|
|
||||||
} else {
|
|
||||||
h.freeList(after.npages).remove(after)
|
|
||||||
}
|
|
||||||
after.state = mSpanDead
|
after.state = mSpanDead
|
||||||
h.spanalloc.free(unsafe.Pointer(after))
|
h.spanalloc.free(unsafe.Pointer(after))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert s into appropriate list or treap.
|
// Insert s into the free treap.
|
||||||
if h.isLargeSpan(s.npages) {
|
h.free.insert(s)
|
||||||
h.freelarge.insert(s)
|
|
||||||
} else {
|
|
||||||
h.freeList(s.npages).insert(s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *mheap) freeList(npages uintptr) *mSpanList {
|
|
||||||
return &h.free[npages]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *mheap) busyList(npages uintptr) *mSpanList {
|
|
||||||
if npages < uintptr(len(h.busy)) {
|
|
||||||
return &h.busy[npages]
|
|
||||||
}
|
|
||||||
return &h.busylarge
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
||||||
|
|
@ -1123,21 +1044,6 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
|
|
||||||
if list.isEmpty() {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
var sumreleased uintptr
|
|
||||||
for s := list.first; s != nil; s = s.next {
|
|
||||||
if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
sumreleased += s.scavenge()
|
|
||||||
}
|
|
||||||
return sumreleased
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *mheap) scavenge(k int32, now, limit uint64) {
|
func (h *mheap) scavenge(k int32, now, limit uint64) {
|
||||||
// Disallow malloc or panic while holding the heap lock. We do
|
// Disallow malloc or panic while holding the heap lock. We do
|
||||||
// this here because this is an non-mallocgc entry-point to
|
// this here because this is an non-mallocgc entry-point to
|
||||||
|
|
@ -1145,11 +1051,7 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
|
||||||
gp := getg()
|
gp := getg()
|
||||||
gp.m.mallocing++
|
gp.m.mallocing++
|
||||||
lock(&h.lock)
|
lock(&h.lock)
|
||||||
var sumreleased uintptr
|
sumreleased := scavengetreap(h.free.treap, now, limit)
|
||||||
for i := 0; i < len(h.free); i++ {
|
|
||||||
sumreleased += scavengelist(&h.free[i], now, limit)
|
|
||||||
}
|
|
||||||
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
|
|
||||||
unlock(&h.lock)
|
unlock(&h.lock)
|
||||||
gp.m.mallocing--
|
gp.m.mallocing--
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue