diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index dea6879adc..b866d7f732 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -476,12 +476,33 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { // (The number of values is given by dataSize / typ.size.) // If dataSize < size, the fragment [x+dataSize, x+size) is // recorded as non-pointer data. +// It is known that the type has pointers somewhere; +// malloc does not call heapBitsSetType when there are no pointers, +// because all free objects are marked as noscan during +// heapBitsSweepSpan. +// There can only be one allocation from a given span active at a time, +// so this code is not racing with other instances of itself, +// and we don't allocate from a span until it has been swept, +// so this code is not racing with heapBitsSweepSpan. +// It is, however, racing with the concurrent GC mark phase, +// which can be setting the mark bit in the leading 2-bit entry +// of an allocated block. The block we are modifying is not quite +// allocated yet, so the GC marker is not racing with updates to x's bits, +// but if the start or end of x shares a bitmap byte with an adjacent +// object, the GC marker is racing with updates to those object's mark bits. func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // From here till marked label marking the object as allocated // and storing type info in the GC bitmap. h := heapBitsForAddr(x) - var ptrmask *uint8 + // dataSize is always size rounded up to the next malloc size class, + // except in the case of allocating a defer block, in which case + // size is sizeof(_defer{}) (at least 6 words) and dataSize may be + // arbitrarily larger. + // + // The checks for size == ptrSize and size == 2*ptrSize can therefore + // assume that dataSize == size without checking it explicitly. + if size == ptrSize { // It's one word and it has pointers, it must be a pointer. // The bitmap byte is shared with the one-word object @@ -494,6 +515,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { atomicor8(h.bitp, bitPointer<>(j%8))&1 != 0 { - atomicor8(h.bitp, bitPointer<= 2 { - atomicor8(h.bitp, bitMarked<= nw { + goto Phase3 + } + *hbitp = uint8(hb) + hbitp = subtractb(hbitp, 1) + b >>= 4 + nb -= 4 + + case 4: + // Ptrmask and heap bitmap are misaligned. + // The bits for the first two words are in a byte shared with another object + // and must be updated atomically. + // NOTE(rsc): The atomic here may not be necessary. + // We took care of 1-word and 2-word objects above, + // so this is at least a 6-word object, so our start bits + // are shared only with the type bits of another object, + // not with its mark bit. Since there is only one allocation + // from a given span at a time, we should be able to set + // these bits non-atomically. Not worth the risk right now. + hb = (b&1)<<4 | (b&2)<<(4+heapBitsWidth-1) // bits being prepared for *h.bitp + b >>= 2 + nb -= 2 + // Note: no bitMarker in hb because the first two words don't get markers from us. + atomicor8(hbitp, uint8(hb)) + hbitp = subtractb(hbitp, 1) + + // Expand 8-bit chunks of ptrmask into pairs of heap bitmap bytes. + // We know the object size is a multiple of 2 words but not 4, so the + // object size minus the 2 words we just handled is a multiple of 4, + // so we can use non-atomic writes to the heap bitmap for the + // rest of this code, even for the final fragment or a trailing dead marker byte. + + // Loop prepares bits for final byte but stops before writing them, + // so that in the case where we need to write only part of a byte, + // the code below the loop can truncate the bitMarked. + w += 2 + } + + // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. + // The loop computes the bits for that last write but does not execute the write; + // it leaves the bits in hb for processing by phase 3. + // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to + // use in the first half of the loop right now, and then we only adjust nb explicitly + // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. + nb -= 4 + for { + // Emit bitmap byte. + // b has at least nb+4 bits, with one exception: + // if w+4 >= nw, then b has only nw-w bits, + // but we'll stop at the break and then truncate + // appropriately in Phase 3. + hb = b&1 | (b&2)<<(heapBitsWidth-1) | (b&4)<<(2*heapBitsWidth-2) | (b&8)<<(3*heapBitsWidth-3) + hb |= bitMarked | bitMarked<= nw { + break + } + *hbitp = uint8(hb) + hbitp = subtractb(hbitp, 1) + b >>= 4 + + // Load more bits. b has nb right now. + if p != endp { + // Fast path: keep reading from ptrmask. + // nb unmodified: we just loaded 8 bits, + // and the next iteration will consume 8 bits, + // leaving us with the same nb the next time we're here. + b |= uintptr(*p) << nb + p = addb(p, 1) + } else if p == nil { + // Almost as fast path: track bit count and refill from pbits. + // For short repetitions. + if nb < 8 { + b |= pbits << nb + nb += endnb + } + nb -= 8 // for next iteration + } else { + // Slow path: reached end of ptrmask. + // Process final partial byte and rewind to start. + b |= uintptr(*p) << nb + nb += endnb + if nb < 8 { + b |= uintptr(*ptrmask) << nb + p = addb(ptrmask, 1) + } else { + nb -= 8 + p = ptrmask + } + } + + // Emit bitmap byte. + hb = b&1 | (b&2)<<(heapBitsWidth-1) | (b&4)<<(2*heapBitsWidth-2) | (b&8)<<(3*heapBitsWidth-3) + hb |= bitMarked | bitMarked<= nw { + break + } + *hbitp = uint8(hb) + hbitp = subtractb(hbitp, 1) + b >>= 4 + } + +Phase3: + // Phase 3: Special case for final byte or half-byte describing final fragment of data. + // If there are not four data words for this final fragment, we must clear the mark bits + // in the 2-bit entries for the missing words. Clearing them creates a ``dead'' entry + // to tell the GC scan to stop scanning this object early. + // If there are four words in the final fragment but there is more data, + // then we must write a ``dead'' entry to the next bitmap byte. + if frag := (nw - w) % 4; frag != 0 { + // Data ends at least one word early. + hb &= 1<<(heapBitsWidth*frag) - 1 + if w*ptrSize <= size { + // We own the whole byte and get the dead marker for free. + *hbitp = uint8(hb) + } else { + // We only own the bottom half of the byte. + // If frag == 1, we get a dead marker for free. + // If frag == 2, no dead marker needed (we've reached the end of the object). + atomicand8(hbitp, 0xf0) + atomicor8(hbitp, uint8(hb)) + } + } else { + // Data ends with a full bitmap byte. + *hbitp = uint8(hb) + if w*ptrSize < size { + // There's more data in the allocated object. + // Write a dead marker in the next byte. + hbitp = subtractb(hbitp, 1) + if (w+4)*ptrSize <= size { + // We own the whole byte. + *hbitp = 0 + } else { + // We only own the bottom half of the byte. + atomicand8(hbitp, 0xf0) + } + } + } + + const test = false // slow but helpful + if test { + // Double-check that bits to be written were written correctly. + // Does not check that other bits were not written, unfortunately. + h := heapBitsForAddr(x) + nptr := typ.size / ptrSize + for i := uintptr(0); i <= dataSize/ptrSize; i++ { + j := i % nptr + var have, want uint8 + if i == dataSize/ptrSize { + if dataSize >= size { + break + } + have = (*h.bitp >> h.shift) & 3 + want = 0 // dead bits + } else { + have = (*h.bitp >> h.shift) & 3 + if (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { + want |= bitPointer + } + if i >= 2 { + want |= bitMarked + } else { + have &^= bitMarked + } + } + if have != want { + println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size) + print("typ.size=", typ.size, " dataSize=", dataSize, " size=", size, "\n") + h = heapBitsForAddr(x) + print("initial bits h.bitp=", h.bitp, " h.shift=", h.shift, "\n") + print("p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") + println("at word", i, "offset", i*ptrSize, "have", have, "want", want) + throw("bad heapBitsSetType") + } + h = h.next() + } } } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 917299c9df..bf21e47d83 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -601,18 +601,16 @@ func scanobject(b uintptr, gcw *gcWork) { // in the type bit for the one word. The only one-word objects // are pointers, or else they'd be merged with other non-pointer // data into larger allocations. - if n != 1 { - b := hbits.bits() - if i >= 2*ptrSize && b&bitMarked == 0 { - break // no more pointers in this object - } - if b&bitPointer == 0 { - continue // not a pointer - } + bits := hbits.bits() + if i >= 2*ptrSize && bits&bitMarked == 0 { + break // no more pointers in this object + } + if bits&bitPointer == 0 { + continue // not a pointer } - // Work here is duplicated in scanblock. - // If you make changes here, make changes there too. + // Work here is duplicated in scanblock and above. + // If you make changes here, make changes there too. obj := *(*uintptr)(unsafe.Pointer(b + i)) // At this point we have extracted the next potential pointer.