mirror of https://github.com/golang/go.git
runtime: redo heap bitmap
[this is a retry of CL 407035 + its revert CL 422395. The content is unchanged] Use just 1 bit per word to record the ptr/nonptr bitmap. Use word-sized operations to manipulate the bitmap, so we can operate on up to 64 ptr/nonptr bits at a time. Use a separate bitmap, one bit per word of the ptr/nonptr bitmap, to encode a no-more-pointers signal. Since we can check 64 ptr/nonptr bits at once, knowing the exact last pointer location is not necessary. As a followon CL, we should make the gcdata bitmap an array of uintptr instead of an array of byte, so we can load 64 bits of it at once. Similarly for the processing of gc programs. Change-Id: Ica5eb622f5b87e647be64f471d67b02732ef8be6 Reviewed-on: https://go-review.googlesource.com/c/go/+/422634 Reviewed-by: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@google.com> Run-TryBot: Keith Randall <khr@golang.org>
This commit is contained in:
parent
a55793835f
commit
6a9c674a09
|
|
@ -72,11 +72,7 @@ func TestIntendedInlining(t *testing.T) {
|
||||||
"cgoInRange",
|
"cgoInRange",
|
||||||
"gclinkptr.ptr",
|
"gclinkptr.ptr",
|
||||||
"guintptr.ptr",
|
"guintptr.ptr",
|
||||||
"heapBits.bits",
|
"writeHeapBitsForAddr",
|
||||||
"heapBits.isPointer",
|
|
||||||
"heapBits.morePointers",
|
|
||||||
"heapBits.next",
|
|
||||||
"heapBitsForAddr",
|
|
||||||
"markBits.isMarked",
|
"markBits.isMarked",
|
||||||
"muintptr.ptr",
|
"muintptr.ptr",
|
||||||
"puintptr.ptr",
|
"puintptr.ptr",
|
||||||
|
|
@ -224,6 +220,8 @@ func TestIntendedInlining(t *testing.T) {
|
||||||
// On loong64, mips64x and riscv64, Ctz64 is not intrinsified and causes nextFreeFast too expensive
|
// On loong64, mips64x and riscv64, Ctz64 is not intrinsified and causes nextFreeFast too expensive
|
||||||
// to inline (Issue 22239).
|
// to inline (Issue 22239).
|
||||||
want["runtime"] = append(want["runtime"], "nextFreeFast")
|
want["runtime"] = append(want["runtime"], "nextFreeFast")
|
||||||
|
// Same behavior for heapBits.nextFast.
|
||||||
|
want["runtime"] = append(want["runtime"], "heapBits.nextFast")
|
||||||
}
|
}
|
||||||
if runtime.GOARCH != "386" {
|
if runtime.GOARCH != "386" {
|
||||||
// As explained above, Ctz64 and Ctz32 are not Go code on 386.
|
// As explained above, Ctz64 and Ctz32 are not Go code on 386.
|
||||||
|
|
|
||||||
|
|
@ -6989,8 +6989,21 @@ func TestFuncLayout(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trimBitmap removes trailing 0 elements from b and returns the result.
|
||||||
|
func trimBitmap(b []byte) []byte {
|
||||||
|
for len(b) > 0 && b[len(b)-1] == 0 {
|
||||||
|
b = b[:len(b)-1]
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
|
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
|
||||||
heapBits := GCBits(New(typ).Interface())
|
heapBits := GCBits(New(typ).Interface())
|
||||||
|
|
||||||
|
// Trim scalars at the end, as bits might end in zero,
|
||||||
|
// e.g. with rep(2, lit(1, 0)).
|
||||||
|
bits = trimBitmap(bits)
|
||||||
|
|
||||||
if !bytes.Equal(heapBits, bits) {
|
if !bytes.Equal(heapBits, bits) {
|
||||||
_, _, line, _ := runtime.Caller(1)
|
_, _, line, _ := runtime.Caller(1)
|
||||||
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
|
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
|
||||||
|
|
@ -7007,12 +7020,10 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) {
|
||||||
heapBits := GCBits(data.Interface())
|
heapBits := GCBits(data.Interface())
|
||||||
// Repeat the bitmap for the slice size, trimming scalars in
|
// Repeat the bitmap for the slice size, trimming scalars in
|
||||||
// the last element.
|
// the last element.
|
||||||
bits = rep(cap, bits)
|
bits = trimBitmap(rep(cap, bits))
|
||||||
for len(bits) > 0 && bits[len(bits)-1] == 0 {
|
|
||||||
bits = bits[:len(bits)-1]
|
|
||||||
}
|
|
||||||
if !bytes.Equal(heapBits, bits) {
|
if !bytes.Equal(heapBits, bits) {
|
||||||
t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits)
|
_, _, line, _ := runtime.Caller(1)
|
||||||
|
t.Errorf("line %d: heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", line, typ, cap, heapBits, bits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -568,17 +568,16 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
|
||||||
if base == 0 {
|
if base == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
hbits := heapBitsForAddr(base)
|
|
||||||
n := span.elemsize
|
n := span.elemsize
|
||||||
for i = uintptr(0); i < n; i += goarch.PtrSize {
|
hbits := heapBitsForAddr(base, n)
|
||||||
if !hbits.morePointers() {
|
for {
|
||||||
// No more possible pointers.
|
var addr uintptr
|
||||||
|
if hbits, addr = hbits.next(); addr == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
|
if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(addr))) {
|
||||||
panic(errorString(msg))
|
panic(errorString(msg))
|
||||||
}
|
}
|
||||||
hbits = hbits.next()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -153,16 +153,16 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
|
||||||
|
|
||||||
// src must be in the regular heap.
|
// src must be in the regular heap.
|
||||||
|
|
||||||
hbits := heapBitsForAddr(uintptr(src))
|
hbits := heapBitsForAddr(uintptr(src), size)
|
||||||
for i := uintptr(0); i < off+size; i += goarch.PtrSize {
|
for {
|
||||||
bits := hbits.bits()
|
var addr uintptr
|
||||||
if i >= off && bits&bitPointer != 0 {
|
if hbits, addr = hbits.next(); addr == 0 {
|
||||||
v := *(*unsafe.Pointer)(add(src, i))
|
break
|
||||||
if cgoIsGoPointer(v) {
|
}
|
||||||
throw(cgoWriteBarrierFail)
|
v := *(*unsafe.Pointer)(unsafe.Pointer(addr))
|
||||||
}
|
if cgoIsGoPointer(v) {
|
||||||
|
throw(cgoWriteBarrierFail)
|
||||||
}
|
}
|
||||||
hbits = hbits.next()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -737,16 +737,16 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
|
||||||
for i := uintptr(0); i < nptr/8+1; i++ {
|
for i := uintptr(0); i < nptr/8+1; i++ {
|
||||||
tmpbuf[i] = 0
|
tmpbuf[i] = 0
|
||||||
}
|
}
|
||||||
i := uintptr(0)
|
|
||||||
hbits := heapBitsForAddr(p)
|
hbits := heapBitsForAddr(p, size)
|
||||||
for ; i < nptr; i++ {
|
for {
|
||||||
if !hbits.morePointers() {
|
var addr uintptr
|
||||||
break // end of object
|
hbits, addr = hbits.next()
|
||||||
|
if addr == 0 {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
if hbits.isPointer() {
|
i := (addr - p) / goarch.PtrSize
|
||||||
tmpbuf[i/8] |= 1 << (i % 8)
|
tmpbuf[i/8] |= 1 << (i % 8)
|
||||||
}
|
|
||||||
hbits = hbits.next()
|
|
||||||
}
|
}
|
||||||
return bitvector{int32(i), &tmpbuf[0]}
|
return bitvector{int32(nptr), &tmpbuf[0]}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -247,13 +247,15 @@ const (
|
||||||
// memory.
|
// memory.
|
||||||
heapArenaBytes = 1 << logHeapArenaBytes
|
heapArenaBytes = 1 << logHeapArenaBytes
|
||||||
|
|
||||||
|
heapArenaWords = heapArenaBytes / goarch.PtrSize
|
||||||
|
|
||||||
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
|
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
|
||||||
// prefer using heapArenaBytes where possible (we need the
|
// prefer using heapArenaBytes where possible (we need the
|
||||||
// constant to compute some other constants).
|
// constant to compute some other constants).
|
||||||
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
|
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
|
||||||
|
|
||||||
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
|
// heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
|
||||||
heapArenaBitmapBytes = heapArenaBytes / (goarch.PtrSize * 8 / 2)
|
heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)
|
||||||
|
|
||||||
pagesPerArena = heapArenaBytes / pageSize
|
pagesPerArena = heapArenaBytes / pageSize
|
||||||
|
|
||||||
|
|
@ -353,10 +355,10 @@ func mallocinit() {
|
||||||
throw("bad TinySizeClass")
|
throw("bad TinySizeClass")
|
||||||
}
|
}
|
||||||
|
|
||||||
if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
|
if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
|
||||||
// heapBits expects modular arithmetic on bitmap
|
// heapBits expects modular arithmetic on bitmap
|
||||||
// addresses to work.
|
// addresses to work.
|
||||||
throw("heapArenaBitmapBytes not a power of 2")
|
throw("heapArenaBitmapWords not a power of 2")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check physPageSize.
|
// Check physPageSize.
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -251,7 +251,7 @@ func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
|
||||||
// visible to the background sweeper.
|
// visible to the background sweeper.
|
||||||
mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
|
mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
|
||||||
s.limit = s.base() + size
|
s.limit = s.base() + size
|
||||||
heapBitsForAddr(s.base()).initSpan(s)
|
s.initHeapBits()
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -250,6 +250,6 @@ func (c *mcentral) grow() *mspan {
|
||||||
// n := (npages << _PageShift) / size
|
// n := (npages << _PageShift) / size
|
||||||
n := s.divideByElemSize(npages << _PageShift)
|
n := s.divideByElemSize(npages << _PageShift)
|
||||||
s.limit = s.base() + size*n
|
s.limit = s.base() + size*n
|
||||||
heapBitsForAddr(s.base()).initSpan(s)
|
s.initHeapBits()
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1267,7 +1267,6 @@ func scanobject(b uintptr, gcw *gcWork) {
|
||||||
// b is either the beginning of an object, in which case this
|
// b is either the beginning of an object, in which case this
|
||||||
// is the size of the object to scan, or it points to an
|
// is the size of the object to scan, or it points to an
|
||||||
// oblet, in which case we compute the size to scan below.
|
// oblet, in which case we compute the size to scan below.
|
||||||
hbits := heapBitsForAddr(b)
|
|
||||||
s := spanOfUnchecked(b)
|
s := spanOfUnchecked(b)
|
||||||
n := s.elemsize
|
n := s.elemsize
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
|
|
@ -1302,20 +1301,24 @@ func scanobject(b uintptr, gcw *gcWork) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var i uintptr
|
hbits := heapBitsForAddr(b, n)
|
||||||
for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
|
var scanSize uintptr
|
||||||
// Load bits once. See CL 22712 and issue 16973 for discussion.
|
for {
|
||||||
bits := hbits.bits()
|
var addr uintptr
|
||||||
if bits&bitScan == 0 {
|
if hbits, addr = hbits.nextFast(); addr == 0 {
|
||||||
break // no more pointers in this object
|
if hbits, addr = hbits.next(); addr == 0 {
|
||||||
}
|
break
|
||||||
if bits&bitPointer == 0 {
|
}
|
||||||
continue // not a pointer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of farthest pointer we found, so we can
|
||||||
|
// update heapScanWork. TODO: is there a better metric,
|
||||||
|
// now that we can skip scalar portions pretty efficiently?
|
||||||
|
scanSize = addr - b + goarch.PtrSize
|
||||||
|
|
||||||
// Work here is duplicated in scanblock and above.
|
// Work here is duplicated in scanblock and above.
|
||||||
// If you make changes here, make changes there too.
|
// If you make changes here, make changes there too.
|
||||||
obj := *(*uintptr)(unsafe.Pointer(b + i))
|
obj := *(*uintptr)(unsafe.Pointer(addr))
|
||||||
|
|
||||||
// At this point we have extracted the next potential pointer.
|
// At this point we have extracted the next potential pointer.
|
||||||
// Quickly filter out nil and pointers back to the current object.
|
// Quickly filter out nil and pointers back to the current object.
|
||||||
|
|
@ -1329,13 +1332,13 @@ func scanobject(b uintptr, gcw *gcWork) {
|
||||||
// heap. In this case, we know the object was
|
// heap. In this case, we know the object was
|
||||||
// just allocated and hence will be marked by
|
// just allocated and hence will be marked by
|
||||||
// allocation itself.
|
// allocation itself.
|
||||||
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
|
if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
|
||||||
greyobject(obj, b, i, span, gcw, objIndex)
|
greyobject(obj, b, addr-b, span, gcw, objIndex)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gcw.bytesMarked += uint64(n)
|
gcw.bytesMarked += uint64(n)
|
||||||
gcw.heapScanWork += int64(i)
|
gcw.heapScanWork += int64(scanSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
// scanConservative scans block [b, b+n) conservatively, treating any
|
// scanConservative scans block [b, b+n) conservatively, treating any
|
||||||
|
|
|
||||||
|
|
@ -221,9 +221,22 @@ var mheap_ mheap
|
||||||
//go:notinheap
|
//go:notinheap
|
||||||
type heapArena struct {
|
type heapArena struct {
|
||||||
// bitmap stores the pointer/scalar bitmap for the words in
|
// bitmap stores the pointer/scalar bitmap for the words in
|
||||||
// this arena. See mbitmap.go for a description. Use the
|
// this arena. See mbitmap.go for a description.
|
||||||
// heapBits type to access this.
|
// This array uses 1 bit per word of heap, or 1.6% of the heap size (for 64-bit).
|
||||||
bitmap [heapArenaBitmapBytes]byte
|
bitmap [heapArenaBitmapWords]uintptr
|
||||||
|
|
||||||
|
// If the ith bit of noMorePtrs is true, then there are no more
|
||||||
|
// pointers for the object containing the word described by the
|
||||||
|
// high bit of bitmap[i].
|
||||||
|
// In that case, bitmap[i+1], ... must be zero until the start
|
||||||
|
// of the next object.
|
||||||
|
// We never operate on these entries using bit-parallel techniques,
|
||||||
|
// so it is ok if they are small. Also, they can't be bigger than
|
||||||
|
// uint16 because at that size a single noMorePtrs entry
|
||||||
|
// represents 8K of memory, the minimum size of a span. Any larger
|
||||||
|
// and we'd have to worry about concurrent updates.
|
||||||
|
// This array uses 1 bit per word of bitmap, or .024% of the heap size (for 64-bit).
|
||||||
|
noMorePtrs [heapArenaBitmapWords / 8]uint8
|
||||||
|
|
||||||
// spans maps from virtual address page ID within this arena to *mspan.
|
// spans maps from virtual address page ID within this arena to *mspan.
|
||||||
// For allocated spans, their pages map to the span itself.
|
// For allocated spans, their pages map to the span itself.
|
||||||
|
|
|
||||||
|
|
@ -260,12 +260,14 @@ func growslice(et *_type, old slice, cap int) slice {
|
||||||
capmem = roundupsize(uintptr(newcap) << shift)
|
capmem = roundupsize(uintptr(newcap) << shift)
|
||||||
overflow = uintptr(newcap) > (maxAlloc >> shift)
|
overflow = uintptr(newcap) > (maxAlloc >> shift)
|
||||||
newcap = int(capmem >> shift)
|
newcap = int(capmem >> shift)
|
||||||
|
capmem = uintptr(newcap) << shift
|
||||||
default:
|
default:
|
||||||
lenmem = uintptr(old.len) * et.size
|
lenmem = uintptr(old.len) * et.size
|
||||||
newlenmem = uintptr(cap) * et.size
|
newlenmem = uintptr(cap) * et.size
|
||||||
capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
|
capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
|
||||||
capmem = roundupsize(capmem)
|
capmem = roundupsize(capmem)
|
||||||
newcap = int(capmem / et.size)
|
newcap = int(capmem / et.size)
|
||||||
|
capmem = uintptr(newcap) * et.size
|
||||||
}
|
}
|
||||||
|
|
||||||
// The check of overflow in addition to capmem > maxAlloc is needed
|
// The check of overflow in addition to capmem > maxAlloc is needed
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue