diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 4592044363..6b46ad18cb 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -202,7 +202,19 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) { } base = s.base() if p-base >= s.elemsize { - base += (p - base) / s.elemsize * s.elemsize + // n := (p - base) / s.elemsize, using division by multiplication + n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2) + + const debugMagic = false + if debugMagic { + n2 := (p - base) / s.elemsize + if n != n2 { + println("runtime: bad div magic", (p - base), s.elemsize, s.divShift, s.divMul, s.divShift2) + throw("bad div magic") + } + } + + base += n * s.elemsize } if base == p { print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n") diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 94ef4de56a..fc4dfeea97 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -101,11 +101,14 @@ type mspan struct { // if sweepgen == h->sweepgen, the span is swept and ready to use // h->sweepgen is incremented by 2 after every GC sweepgen uint32 + divMul uint32 // for divide by elemsize - divMagic.mul ref uint16 // capacity - number of objects in freelist sizeclass uint8 // size class incache bool // being used by an mcache state uint8 // mspaninuse etc needzero uint8 // needs to be zeroed before allocation + divShift uint8 // for divide by elemsize - divMagic.shift + divShift2 uint8 // for divide by elemsize - divMagic.shift2 elemsize uintptr // computed from sizeclass or from npages unusedsince int64 // first time spotted by gc in mspanfree state npreleased uintptr // number of pages released to the os @@ -385,8 +388,15 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan s.sizeclass = uint8(sizeclass) if sizeclass == 0 { s.elemsize = s.npages << _PageShift + s.divShift = 0 + s.divMul = 0 + s.divShift2 = 0 } else { s.elemsize = uintptr(class_to_size[sizeclass]) + m := &class_to_divmagic[sizeclass] + s.divShift = m.shift + s.divMul = m.mul + s.divShift2 = m.shift2 } // update stats, sweep lists diff --git a/src/runtime/msize.go b/src/runtime/msize.go index 370cae629e..f2a7cb9ddd 100644 --- a/src/runtime/msize.go +++ b/src/runtime/msize.go @@ -48,6 +48,8 @@ package runtime var class_to_size [_NumSizeClasses]int32 var class_to_allocnpages [_NumSizeClasses]int32 +var class_to_divmagic [_NumSizeClasses]divMagic + var size_to_class8 [1024/8 + 1]int8 var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 @@ -144,6 +146,11 @@ func initSizes() { for i := 0; i < len(class_to_size); i++ { memstats.by_size[i].size = uint32(class_to_size[i]) } + + for i := 1; i < len(class_to_size); i++ { + class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) + } + return dump: @@ -182,3 +189,55 @@ func roundupsize(size uintptr) uintptr { } return round(size, _PageSize) } + +// divMagic holds magic constants to implement division +// by a particular constant as a shift, multiply, and shift. +// That is, given +// m = computeMagic(d) +// then +// n/d == ((n>>m.shift) * m.mul) >> m.shift2 +// +// The magic computation picks m such that +// d = d₁*d₂ +// d₂= 2^m.shift +// m.mul = ⌈2^m.shift2 / d₁⌉ +// +// The magic computation here is tailored for malloc block sizes +// and does not handle arbitrary d correctly. Malloc block sizes d are +// always even, so the first shift implements the factors of 2 in d +// and then the mul and second shift implement the odd factor +// that remains. Because the first shift divides n by at least 2 (actually 8) +// before the multiply gets involved, the huge corner cases that +// require additional adjustment are impossible, so the usual +// fixup is not needed. +// +// For more details see Hacker's Delight, Chapter 10, and +// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html +// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html +type divMagic struct { + shift uint8 + mul uint32 + shift2 uint8 +} + +func computeDivMagic(d uint32) divMagic { + var m divMagic + + // Compute pre-shift by factoring power of 2 out of d. + for d&1 == 0 { + m.shift++ + d >>= 1 + } + + // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. + // This is always a good enough approximation. + // We could use smaller k for some divisors but there's no point. + k := uint8(63) + d64 := uint64(d) + for ((1<= 1<<32 { + k-- + } + m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ + m.shift2 = k + return m +}