runtime: redo heap bitmap

Use just 1 bit per word to record the ptr/nonptr bitmap.
Use word-sized operations to manipulate the bitmap, so we can operate
on up to 64 ptr/nonptr bits at a time.

Use a separate bitmap, one bit per word of the ptr/nonptr bitmap,
to encode a no-more-pointers signal. Since we can check 64 ptr/nonptr
bits at once, knowing the exact last pointer location is not necessary.

This cleans up the bitmap implementation significantly, which will
hopefully make it faster. TODO: measure

As a followon CL, we should make the gcdata bitmap an array of
uintptr instead of an array of byte, so we can load 64 bits of it at once.
Similarly for the processing of gc programs.

Change-Id: I18151b1876d9543599800dec51e2a1b19df97d49
Reviewed-on: https://go-review.googlesource.com/c/go/+/407035
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Keith Randall 2022-04-29 13:21:44 -07:00 committed by Keith Randall
parent e7307034cc
commit b589208c8c
12 changed files with 537 additions and 1133 deletions

View File

@ -72,11 +72,7 @@ func TestIntendedInlining(t *testing.T) {
"cgoInRange",
"gclinkptr.ptr",
"guintptr.ptr",
"heapBits.bits",
"heapBits.isPointer",
"heapBits.morePointers",
"heapBits.next",
"heapBitsForAddr",
"writeHeapBitsForAddr",
"markBits.isMarked",
"muintptr.ptr",
"puintptr.ptr",
@ -224,6 +220,8 @@ func TestIntendedInlining(t *testing.T) {
// On loong64, mips64x and riscv64, Ctz64 is not intrinsified and causes nextFreeFast too expensive
// to inline (Issue 22239).
want["runtime"] = append(want["runtime"], "nextFreeFast")
// Same behavior for heapBits.nextFast.
want["runtime"] = append(want["runtime"], "heapBits.nextFast")
}
if runtime.GOARCH != "386" {
// As explained above, Ctz64 and Ctz32 are not Go code on 386.

View File

@ -6989,8 +6989,21 @@ func TestFuncLayout(t *testing.T) {
}
}
// trimBitmap removes trailing 0 elements from b and returns the result.
func trimBitmap(b []byte) []byte {
for len(b) > 0 && b[len(b)-1] == 0 {
b = b[:len(b)-1]
}
return b
}
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
heapBits := GCBits(New(typ).Interface())
// Trim scalars at the end, as bits might end in zero,
// e.g. with rep(2, lit(1, 0)).
bits = trimBitmap(bits)
if !bytes.Equal(heapBits, bits) {
_, _, line, _ := runtime.Caller(1)
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
@ -7007,12 +7020,10 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) {
heapBits := GCBits(data.Interface())
// Repeat the bitmap for the slice size, trimming scalars in
// the last element.
bits = rep(cap, bits)
for len(bits) > 0 && bits[len(bits)-1] == 0 {
bits = bits[:len(bits)-1]
}
bits = trimBitmap(rep(cap, bits))
if !bytes.Equal(heapBits, bits) {
t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits)
_, _, line, _ := runtime.Caller(1)
t.Errorf("line %d: heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", line, typ, cap, heapBits, bits)
}
}

View File

@ -568,17 +568,16 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
if base == 0 {
return
}
hbits := heapBitsForAddr(base)
n := span.elemsize
for i = uintptr(0); i < n; i += goarch.PtrSize {
if !hbits.morePointers() {
// No more possible pointers.
hbits := heapBitsForAddr(base, n)
for {
var addr uintptr
if hbits, addr = hbits.next(); addr == 0 {
break
}
if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(addr))) {
panic(errorString(msg))
}
hbits = hbits.next()
}
return

View File

@ -153,16 +153,16 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
// src must be in the regular heap.
hbits := heapBitsForAddr(uintptr(src))
for i := uintptr(0); i < off+size; i += goarch.PtrSize {
bits := hbits.bits()
if i >= off && bits&bitPointer != 0 {
v := *(*unsafe.Pointer)(add(src, i))
if cgoIsGoPointer(v) {
throw(cgoWriteBarrierFail)
}
hbits := heapBitsForAddr(uintptr(src), size)
for {
var addr uintptr
if hbits, addr = hbits.next(); addr == 0 {
break
}
v := *(*unsafe.Pointer)(unsafe.Pointer(addr))
if cgoIsGoPointer(v) {
throw(cgoWriteBarrierFail)
}
hbits = hbits.next()
}
}

View File

@ -737,16 +737,16 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
for i := uintptr(0); i < nptr/8+1; i++ {
tmpbuf[i] = 0
}
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
if !hbits.morePointers() {
break // end of object
hbits := heapBitsForAddr(p, size)
for {
var addr uintptr
hbits, addr = hbits.next()
if addr == 0 {
break
}
if hbits.isPointer() {
tmpbuf[i/8] |= 1 << (i % 8)
}
hbits = hbits.next()
i := (addr - p) / goarch.PtrSize
tmpbuf[i/8] |= 1 << (i % 8)
}
return bitvector{int32(i), &tmpbuf[0]}
return bitvector{int32(nptr), &tmpbuf[0]}
}

View File

@ -247,13 +247,15 @@ const (
// memory.
heapArenaBytes = 1 << logHeapArenaBytes
heapArenaWords = heapArenaBytes / goarch.PtrSize
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
heapArenaBitmapBytes = heapArenaBytes / (goarch.PtrSize * 8 / 2)
// heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)
pagesPerArena = heapArenaBytes / pageSize
@ -353,10 +355,10 @@ func mallocinit() {
throw("bad TinySizeClass")
}
if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
throw("heapArenaBitmapBytes not a power of 2")
throw("heapArenaBitmapWords not a power of 2")
}
// Check physPageSize.

File diff suppressed because it is too large Load Diff

View File

@ -251,7 +251,7 @@ func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
// visible to the background sweeper.
mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
s.limit = s.base() + size
heapBitsForAddr(s.base()).initSpan(s)
s.initHeapBits()
return s
}

View File

@ -250,6 +250,6 @@ func (c *mcentral) grow() *mspan {
// n := (npages << _PageShift) / size
n := s.divideByElemSize(npages << _PageShift)
s.limit = s.base() + size*n
heapBitsForAddr(s.base()).initSpan(s)
s.initHeapBits()
return s
}

View File

@ -1265,7 +1265,6 @@ func scanobject(b uintptr, gcw *gcWork) {
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
hbits := heapBitsForAddr(b)
s := spanOfUnchecked(b)
n := s.elemsize
if n == 0 {
@ -1308,20 +1307,24 @@ func scanobject(b uintptr, gcw *gcWork) {
}
}
var i uintptr
for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
// Load bits once. See CL 22712 and issue 16973 for discussion.
bits := hbits.bits()
if bits&bitScan == 0 {
break // no more pointers in this object
}
if bits&bitPointer == 0 {
continue // not a pointer
hbits := heapBitsForAddr(b, n)
var scanSize uintptr
for {
var addr uintptr
if hbits, addr = hbits.nextFast(); addr == 0 {
if hbits, addr = hbits.next(); addr == 0 {
break
}
}
// Keep track of farthest pointer we found, so we can
// update heapScanWork. TODO: is there a better metric,
// now that we can skip scalar portions pretty efficiently?
scanSize = addr - b + goarch.PtrSize
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
obj := *(*uintptr)(unsafe.Pointer(b + i))
obj := *(*uintptr)(unsafe.Pointer(addr))
// At this point we have extracted the next potential pointer.
// Quickly filter out nil and pointers back to the current object.
@ -1335,13 +1338,13 @@ func scanobject(b uintptr, gcw *gcWork) {
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, span, gcw, objIndex)
if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
greyobject(obj, b, addr-b, span, gcw, objIndex)
}
}
}
gcw.bytesMarked += uint64(n)
gcw.heapScanWork += int64(i)
gcw.heapScanWork += int64(scanSize)
}
// scanConservative scans block [b, b+n) conservatively, treating any

View File

@ -221,9 +221,22 @@ var mheap_ mheap
//go:notinheap
type heapArena struct {
// bitmap stores the pointer/scalar bitmap for the words in
// this arena. See mbitmap.go for a description. Use the
// heapBits type to access this.
bitmap [heapArenaBitmapBytes]byte
// this arena. See mbitmap.go for a description.
// This array uses 1 bit per word of heap, or 1.6% of the heap size (for 64-bit).
bitmap [heapArenaBitmapWords]uintptr
// If the ith bit of noMorePtrs is true, then there are no more
// pointers for the object containing the word described by the
// high bit of bitmap[i].
// In that case, bitmap[i+1], ... must be zero until the start
// of the next object.
// We never operate on these entries using bit-parallel techniques,
// so it is ok if they are small. Also, they can't be bigger than
// uint16 because at that size a single noMorePtrs entry
// represents 8K of memory, the minimum size of a span. Any larger
// and we'd have to worry about concurrent updates.
// This array uses 1 bit per word of bitmap, or .024% of the heap size (for 64-bit).
noMorePtrs [heapArenaBitmapWords / 8]uint8
// spans maps from virtual address page ID within this arena to *mspan.
// For allocated spans, their pages map to the span itself.

View File

@ -260,12 +260,14 @@ func growslice(et *_type, old slice, cap int) slice {
capmem = roundupsize(uintptr(newcap) << shift)
overflow = uintptr(newcap) > (maxAlloc >> shift)
newcap = int(capmem >> shift)
capmem = uintptr(newcap) << shift
default:
lenmem = uintptr(old.len) * et.size
newlenmem = uintptr(cap) * et.size
capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
capmem = roundupsize(capmem)
newcap = int(capmem / et.size)
capmem = uintptr(newcap) * et.size
}
// The check of overflow in addition to capmem > maxAlloc is needed