mirror of https://github.com/golang/go.git
runtime: don't call span.heapBits in writeHeapBitsSmall
For whatever reason, span.heapBits is kind of slow. It accounts for
about a quarter of the cost of writeHeapBitsSmall, which is absurd. We
get a nice speed improvement for small allocations by eliminating this
call.
│ before │ after │
│ sec/op │ sec/op vs base │
MallocTypeInfo16-4 29.47n ± 1% 27.02n ± 1% -8.31% (p=0.002 n=6)
Change-Id: I6270e26902e5a9254cf1503fac81c3c799c59d6a
Reviewed-on: https://go-review.googlesource.com/c/go/+/614255
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
d94b7a1876
commit
56fb8350c8
|
|
@ -427,8 +427,15 @@ func mallocinit() {
|
|||
// Check that the minimum size (exclusive) for a malloc header is also
|
||||
// a size class boundary. This is important to making sure checks align
|
||||
// across different parts of the runtime.
|
||||
//
|
||||
// While we're here, also check to make sure all these size classes'
|
||||
// span sizes are one page. Some code relies on this.
|
||||
minSizeForMallocHeaderIsSizeClass := false
|
||||
sizeClassesUpToMinSizeForMallocHeaderAreOnePage := true
|
||||
for i := 0; i < len(class_to_size); i++ {
|
||||
if class_to_allocnpages[i] > 1 {
|
||||
sizeClassesUpToMinSizeForMallocHeaderAreOnePage = false
|
||||
}
|
||||
if minSizeForMallocHeader == uintptr(class_to_size[i]) {
|
||||
minSizeForMallocHeaderIsSizeClass = true
|
||||
break
|
||||
|
|
@ -437,6 +444,9 @@ func mallocinit() {
|
|||
if !minSizeForMallocHeaderIsSizeClass {
|
||||
throw("min size of malloc header is not a size class boundary")
|
||||
}
|
||||
if !sizeClassesUpToMinSizeForMallocHeaderAreOnePage {
|
||||
throw("expected all size classes up to min size for malloc header to fit in one-page spans")
|
||||
}
|
||||
// Check that the pointer bitmap for all small sizes without a malloc header
|
||||
// fits in a word.
|
||||
if minSizeForMallocHeader/goarch.PtrSize > 8*goarch.PtrSize {
|
||||
|
|
|
|||
|
|
@ -642,8 +642,7 @@ func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize
|
|||
// The objects here are always really small, so a single load is sufficient.
|
||||
src0 := readUintptr(typ.GCData)
|
||||
|
||||
// Create repetitions of the bitmap if we have a small array.
|
||||
bits := span.elemsize / goarch.PtrSize
|
||||
// Create repetitions of the bitmap if we have a small slice backing store.
|
||||
scanSize = typ.PtrBytes
|
||||
src := src0
|
||||
switch typ.Size_ {
|
||||
|
|
@ -658,19 +657,23 @@ func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize
|
|||
|
||||
// Since we're never writing more than one uintptr's worth of bits, we're either going
|
||||
// to do one or two writes.
|
||||
dst := span.heapBits()
|
||||
dst := unsafe.Pointer(span.base() + pageSize - pageSize/goarch.PtrSize/8)
|
||||
o := (x - span.base()) / goarch.PtrSize
|
||||
i := o / ptrBits
|
||||
j := o % ptrBits
|
||||
bits := span.elemsize / goarch.PtrSize
|
||||
if j+bits > ptrBits {
|
||||
// Two writes.
|
||||
bits0 := ptrBits - j
|
||||
bits1 := bits - bits0
|
||||
dst[i+0] = dst[i+0]&(^uintptr(0)>>bits0) | (src << j)
|
||||
dst[i+1] = dst[i+1]&^((1<<bits1)-1) | (src >> bits0)
|
||||
dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize))
|
||||
dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize))
|
||||
*dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j)
|
||||
*dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0)
|
||||
} else {
|
||||
// One write.
|
||||
dst[i] = (dst[i] &^ (((1 << bits) - 1) << j)) | (src << j)
|
||||
dst := (*uintptr)(add(dst, i*goarch.PtrSize))
|
||||
*dst = (*dst)&^(((1<<bits)-1)<<j) | (src << j)
|
||||
}
|
||||
|
||||
const doubleCheck = false
|
||||
|
|
|
|||
Loading…
Reference in New Issue