diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index f1bdf93f46..0d17ddfe30 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -968,7 +968,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { p.init(new(mutex), testSysStat) lockInit(p.mheapLock, lockRankMheap) p.test = true - for i, init := range chunks { addr := chunkBase(chunkIdx(i)) @@ -1007,6 +1006,18 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { } } + // Make sure the scavenge index is updated. + // + // This is an inefficient way to do it, but it's also the simplest way. + minPages := physPageSize / pageSize + if minPages < 1 { + minPages = 1 + } + _, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages) + if npages != 0 { + p.scav.index.mark(addr, addr+pallocChunkBytes) + } + // Update heap metadata for the allocRange calls above. systemstack(func() { lock(p.mheapLock) @@ -1015,12 +1026,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { }) } - systemstack(func() { - lock(p.mheapLock) - p.scavengeStartGen() - unlock(p.mheapLock) - }) - return (*PageAlloc)(p) } @@ -1035,6 +1040,8 @@ func FreePageAlloc(pp *PageAlloc) { for l := 0; l < summaryLevels; l++ { sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes) } + // Only necessary on 64-bit. This is a global on 32-bit. + sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks))) } else { resSize := uintptr(0) for _, s := range p.summary { @@ -1042,6 +1049,7 @@ func FreePageAlloc(pp *PageAlloc) { } sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize)) } + // Subtract back out whatever we mapped for the summaries. // sysUsed adds to p.sysStat and memstats.mappedReady no matter what // (and in anger should actually be accounted for), and there's no other @@ -1550,3 +1558,28 @@ func (s *Scavenger) Stop() { s.Wake() <-s.done } + +type ScavengeIndex struct { + i scavengeIndex +} + +func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex { + s := new(ScavengeIndex) + s.i.chunks = make([]atomic.Uint8, uintptr(1<>10, " KiB work, ", gcController.heapReleased.load()>>10, " KiB total, ", (gcController.heapInUse.load()*100)/heapRetained(), "% util", @@ -691,130 +676,20 @@ func printScavTrace(gen uint32, released uintptr, forced bool) { printunlock() } -// scavengeStartGen starts a new scavenge generation, resetting -// the scavenger's search space to the full in-use address space. -// -// p.mheapLock must be held. -// -// Must run on the system stack because p.mheapLock must be held. -// -//go:systemstack -func (p *pageAlloc) scavengeStartGen() { - assertLockHeld(p.mheapLock) - - lock(&p.scav.lock) - if debug.scavtrace > 0 { - printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false) - } - p.inUse.cloneInto(&p.scav.inUse) - - // Pick the new starting address for the scavenger cycle. - var startAddr offAddr - if p.scav.scavLWM.lessThan(p.scav.freeHWM) { - // The "free" high watermark exceeds the "scavenged" low watermark, - // so there are free scavengable pages in parts of the address space - // that the scavenger already searched, the high watermark being the - // highest one. Pick that as our new starting point to ensure we - // see those pages. - startAddr = p.scav.freeHWM - } else { - // The "free" high watermark does not exceed the "scavenged" low - // watermark. This means the allocator didn't free any memory in - // the range we scavenged last cycle, so we might as well continue - // scavenging from where we were. - startAddr = p.scav.scavLWM - } - p.scav.inUse.removeGreaterEqual(startAddr.addr()) - - // reservationBytes may be zero if p.inUse.totalBytes is small, or if - // scavengeReservationShards is large. This case is fine as the scavenger - // will simply be turned off, but it does mean that scavengeReservationShards, - // in concert with pallocChunkBytes, dictates the minimum heap size at which - // the scavenger triggers. In practice this minimum is generally less than an - // arena in size, so virtually every heap has the scavenger on. - p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards - p.scav.gen++ - atomic.Storeuintptr(&p.scav.released, 0) - p.scav.freeHWM = minOffAddr - p.scav.scavLWM = maxOffAddr - unlock(&p.scav.lock) -} - -// scavengeReserve reserves a contiguous range of the address space -// for scavenging. The maximum amount of space it reserves is proportional -// to the size of the heap. The ranges are reserved from the high addresses -// first. -// -// Returns the reserved range and the scavenge generation number for it. -func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { - lock(&p.scav.lock) - gen := p.scav.gen - - // Start by reserving the minimum. - r := p.scav.inUse.removeLast(p.scav.reservationBytes) - - // Return early if the size is zero; we don't want to use - // the bogus address below. - if r.size() == 0 { - unlock(&p.scav.lock) - return r, gen - } - - // The scavenger requires that base be aligned to a - // palloc chunk because that's the unit of operation for - // the scavenger, so align down, potentially extending - // the range. - newBase := alignDown(r.base.addr(), pallocChunkBytes) - - // Remove from inUse however much extra we just pulled out. - p.scav.inUse.removeGreaterEqual(newBase) - unlock(&p.scav.lock) - - r.base = offAddr{newBase} - return r, gen -} - -// scavengeUnreserve returns an unscavenged portion of a range that was -// previously reserved with scavengeReserve. -func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { - if r.size() == 0 { - return - } - if r.base.addr()%pallocChunkBytes != 0 { - throw("unreserving unaligned region") - } - lock(&p.scav.lock) - if gen == p.scav.gen { - p.scav.inUse.add(r) - } - unlock(&p.scav.lock) -} - -// scavengeOne walks over address range work until it finds +// scavengeOne walks over the chunk at chunk index ci and searches for // a contiguous run of pages to scavenge. It will try to scavenge // at most max bytes at once, but may scavenge more to avoid // breaking huge pages. Once it scavenges some memory it returns // how much it scavenged in bytes. // -// Returns the number of bytes scavenged and the part of work -// which was not yet searched. +// searchIdx is the page index to start searching from in ci. // -// work's base address must be aligned to pallocChunkBytes. +// Returns the number of bytes scavenged. // // Must run on the systemstack because it acquires p.mheapLock. // //go:systemstack -func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) { - // Defensively check if we've received an empty address range. - // If so, just return. - if work.size() == 0 { - // Nothing to do. - return 0, work - } - // Check the prerequisites of work. - if work.base.addr()%pallocChunkBytes != 0 { - throw("scavengeOne called with unaligned work region") - } +func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintptr { // Calculate the maximum number of pages to scavenge. // // This should be alignUp(max, pageSize) / pageSize but max can and will @@ -836,168 +711,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange minPages = 1 } - // Fast path: check the chunk containing the top-most address in work. - if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 { - return r, w - } else { - work = w - } - - // findCandidate finds the next scavenge candidate in work optimistically. - // - // Returns the candidate chunk index and true on success, and false on failure. - // - // The heap need not be locked. - findCandidate := func(work addrRange) (chunkIdx, bool) { - // Iterate over this work's chunks. - for i := chunkIndex(work.limit.addr() - 1); i >= chunkIndex(work.base.addr()); i-- { - // If this chunk is totally in-use or has no unscavenged pages, don't bother - // doing a more sophisticated check. - // - // Note we're accessing the summary and the chunks without a lock, but - // that's fine. We're being optimistic anyway. - - // Check quickly if there are enough free pages at all. - if p.summary[len(p.summary)-1][i].max() < uint(minPages) { - continue - } - - // Run over the chunk looking harder for a candidate. Again, we could - // race with a lot of different pieces of code, but we're just being - // optimistic. Make sure we load the l2 pointer atomically though, to - // avoid races with heap growth. It may or may not be possible to also - // see a nil pointer in this case if we do race with heap growth, but - // just defensively ignore the nils. This operation is optimistic anyway. - l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()]))) - if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) { - return i, true - } - } - return 0, false - } - - // Slow path: iterate optimistically over the in-use address space - // looking for any free and unscavenged page. If we think we see something, - // lock and verify it! - for work.size() != 0 { - - // Search for the candidate. - candidateChunkIdx, ok := findCandidate(work) - if !ok { - // We didn't find a candidate, so we're done. - work.limit = work.base - break - } - - // Lock, so we can verify what we found. - lock(p.mheapLock) - - // Find, verify, and scavenge if we can. - chunk := p.chunkOf(candidateChunkIdx) - base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages) - if npages > 0 { - work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)} - unlock(p.mheapLock) - return uintptr(npages) * pageSize, work - } - unlock(p.mheapLock) - - // We were fooled, so let's continue from where we left off. - work.limit = offAddr{chunkBase(candidateChunkIdx)} - } - return 0, work -} - -// scavengeOneFast is the fast path for scavengeOne, which just checks the top -// chunk of work for some pages to scavenge. -// -// Must run on the system stack because it acquires the heap lock. -// -//go:systemstack -func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) { - maxAddr := work.limit.addr() - 1 - maxChunk := chunkIndex(maxAddr) - lock(p.mheapLock) - if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) { + if p.summary[len(p.summary)-1][ci].max() >= uint(minPages) { // We only bother looking for a candidate if there at least // minPages free pages at all. - base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages) + base, npages := p.chunkOf(ci).findScavengeCandidate(pallocChunkPages-1, minPages, maxPages) // If we found something, scavenge it and return! if npages != 0 { - work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)} + // Compute the full address for the start of the range. + addr := chunkBase(ci) + uintptr(base)*pageSize + + // Mark the range we're about to scavenge as allocated, because + // we don't want any allocating goroutines to grab it while + // the scavenging is in progress. + if scav := p.allocRange(addr, uintptr(npages)); scav != 0 { + throw("double scavenge") + } + + // With that done, it's safe to unlock. unlock(p.mheapLock) - return uintptr(npages) * pageSize, work + + if !p.test { + // Only perform the actual scavenging if we're not in a test. + // It's dangerous to do so otherwise. + sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) + + // Update global accounting only when not in test, otherwise + // the runtime's accounting will be wrong. + nbytes := int64(npages) * pageSize + gcController.heapReleased.add(nbytes) + gcController.heapFree.add(-nbytes) + + stats := memstats.heapStats.acquire() + atomic.Xaddint64(&stats.committed, -nbytes) + atomic.Xaddint64(&stats.released, nbytes) + memstats.heapStats.release() + } + + // Relock the heap, because now we need to make these pages + // available allocation. Free them back to the page allocator. + lock(p.mheapLock) + p.free(addr, uintptr(npages), true) + + // Mark the range as scavenged. + p.chunkOf(ci).scavenged.setRange(base, npages) + unlock(p.mheapLock) + + return uintptr(npages) * pageSize } } + // Mark this chunk as having no free pages. + p.scav.index.clear(ci) unlock(p.mheapLock) - // Update the limit to reflect the fact that we checked maxChunk already. - work.limit = offAddr{chunkBase(maxChunk)} - return 0, work -} - -// scavengeRangeLocked scavenges the given region of memory. -// The region of memory is described by its chunk index (ci), -// the starting page index of the region relative to that -// chunk (base), and the length of the region in pages (npages). -// -// Returns the base address of the scavenged region. -// -// p.mheapLock must be held. Unlocks p.mheapLock but reacquires -// it before returning. Must be run on the systemstack as a result. -// -//go:systemstack -func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr { - assertLockHeld(p.mheapLock) - - // Compute the full address for the start of the range. - addr := chunkBase(ci) + uintptr(base)*pageSize - - // Mark the range we're about to scavenge as allocated, because - // we don't want any allocating goroutines to grab it while - // the scavenging is in progress. - if scav := p.allocRange(addr, uintptr(npages)); scav != 0 { - throw("double scavenge") - } - - // With that done, it's safe to unlock. - unlock(p.mheapLock) - - // Update the scavenge low watermark. - lock(&p.scav.lock) - if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) { - p.scav.scavLWM = oAddr - } - unlock(&p.scav.lock) - - if !p.test { - // Only perform the actual scavenging if we're not in a test. - // It's dangerous to do so otherwise. - sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) - - // Update global accounting only when not in test, otherwise - // the runtime's accounting will be wrong. - nbytes := int64(npages) * pageSize - gcController.heapReleased.add(nbytes) - gcController.heapFree.add(-nbytes) - - // Update consistent accounting too. - stats := memstats.heapStats.acquire() - atomic.Xaddint64(&stats.committed, -nbytes) - atomic.Xaddint64(&stats.released, nbytes) - memstats.heapStats.release() - } - - // Relock the heap, because now we need to make these pages - // available allocation. Free them back to the page allocator. - lock(p.mheapLock) - p.free(addr, uintptr(npages), true) - - // Mark the range as scavenged. - p.chunkOf(ci).scavenged.setRange(base, npages) - return addr + return 0 } // fillAligned returns x but with all zeroes in m-aligned @@ -1059,38 +827,6 @@ func fillAligned(x uint64, m uint) uint64 { return ^((x - (x >> (m - 1))) | x) } -// hasScavengeCandidate returns true if there's any min-page-aligned groups of -// min pages of free-and-unscavenged memory in the region represented by this -// pallocData. -// -// min must be a non-zero power of 2 <= maxPagesPerPhysPage. -func (m *pallocData) hasScavengeCandidate(min uintptr) bool { - if min&(min-1) != 0 || min == 0 { - print("runtime: min = ", min, "\n") - throw("min must be a non-zero power of 2") - } else if min > maxPagesPerPhysPage { - print("runtime: min = ", min, "\n") - throw("min too large") - } - - // The goal of this search is to see if the chunk contains any free and unscavenged memory. - for i := len(m.scavenged) - 1; i >= 0; i-- { - // 1s are scavenged OR non-free => 0s are unscavenged AND free - // - // TODO(mknyszek): Consider splitting up fillAligned into two - // functions, since here we technically could get by with just - // the first half of its computation. It'll save a few instructions - // but adds some additional code complexity. - x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min)) - - // Quickly skip over chunks of non-free or scavenged pages. - if x != ^uint64(0) { - return true - } - } - return false -} - // findScavengeCandidate returns a start index and a size for this pallocData // segment which represents a contiguous region of free and unscavenged memory. // @@ -1210,3 +946,157 @@ func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (ui } return start, size } + +// scavengeIndex is a structure for efficiently managing which pageAlloc chunks have +// memory available to scavenge. +type scavengeIndex struct { + // chunks is a bitmap representing the entire address space. Each bit represents + // a single chunk, and a 1 value indicates the presence of pages available for + // scavenging. Updates to the bitmap are serialized by the pageAlloc lock. + // + // The underlying storage of chunks is platform dependent and may not even be + // totally mapped read/write. min and max reflect the extent that is safe to access. + // min is inclusive, max is exclusive. + // + // searchAddr is the maximum address (in the offset address space, so we have a linear + // view of the address space; see mranges.go:offAddr) containing memory available to + // scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups. + // + // searchAddr is always inclusive and should be the base address of the highest runtime + // page available for scavenging. + // + // searchAddr is managed by both find and mark. + // + // Normally, find monotonically decreases searchAddr as it finds no more free pages to + // scavenge. However, mark, when marking a new chunk at an index greater than the current + // searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here + // is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they + // won't barge over new memory becoming available to scavenge. Furthermore, this ensures + // that some future caller of find *must* observe the new high index. That caller + // (or any other racing with it), then makes searchAddr positive before continuing, bringing + // us back to our monotonically decreasing steady-state. + // + // A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr) + // is always guaranteed to be >= min and < max (converted to heap addresses). + // + // TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster + // iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either + // copy them from math/bits or fix the fact that we can't import math/bits' code from + // the runtime due to compiler instrumentation. + searchAddr atomicOffAddr + chunks []atomic.Uint8 + minHeapIdx atomic.Int32 + min, max atomic.Int32 +} + +// find returns the highest chunk index that may contain pages available to scavenge. +// It also returns an offset to start searching in the highest chunk. +func (s *scavengeIndex) find() (chunkIdx, uint) { + searchAddr, marked := s.searchAddr.Load() + if searchAddr == minOffAddr.addr() { + // We got a cleared search addr. + return 0, 0 + } + + // Starting from searchAddr's chunk, and moving down to minHeapIdx, + // iterate until we find a chunk with pages to scavenge. + min := s.minHeapIdx.Load() + searchChunk := chunkIndex(uintptr(searchAddr)) + start := int32(searchChunk / 8) + for i := start; i >= min; i-- { + // Skip over irrelevant address space. + chunks := s.chunks[i].Load() + if chunks == 0 { + continue + } + // Note that we can't have 8 leading zeroes here because + // we necessarily skipped that case. So, what's left is + // an index. If there are no zeroes, we want the 7th + // index, if 1 zero, the 6th, and so on. + n := 7 - sys.LeadingZeros8(chunks) + ci := chunkIdx(uint(i)*8 + uint(n)) + if searchChunk == ci { + return ci, chunkPageIndex(uintptr(searchAddr)) + } + // Try to reduce searchAddr to newSearchAddr. + newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize + if marked { + // Attempt to be the first one to decrease the searchAddr + // after an increase. If we fail, that means there was another + // increase, or somebody else got to it before us. Either way, + // it doesn't matter. We may lose some performance having an + // incorrect search address, but it's far more important that + // we don't miss updates. + s.searchAddr.StoreUnmark(searchAddr, newSearchAddr) + } else { + // Decrease searchAddr. + s.searchAddr.StoreMin(newSearchAddr) + } + return ci, pallocChunkPages - 1 + } + // Clear searchAddr, because we've exhausted the heap. + s.searchAddr.Clear() + return 0, 0 +} + +// mark sets the inclusive range of chunks between indices start and end as +// containing pages available to scavenge. +// +// Must be serialized with other mark, markRange, and clear calls. +func (s *scavengeIndex) mark(base, limit uintptr) { + start, end := chunkIndex(base), chunkIndex(limit-pageSize) + if start == end { + // Within a chunk. + mask := uint8(1 << (start % 8)) + s.chunks[start/8].Or(mask) + } else if start/8 == end/8 { + // Within the same byte in the index. + mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8) + s.chunks[start/8].Or(mask) + } else { + // Crosses multiple bytes in the index. + startAligned := chunkIdx(alignUp(uintptr(start), 8)) + endAligned := chunkIdx(alignDown(uintptr(end), 8)) + + // Do the end of the first byte first. + if width := startAligned - start; width > 0 { + mask := uint8(uint16(1< 0 { + mask := uint8(uint16(1<= BaseChunkIdx+6; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("ThirtyTwoChunks", func(t *testing.T) { + find, mark := setup(t) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("ThirtyTwoChunksOffset", func(t *testing.T) { + find, mark := setup(t) + mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0)) + for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("Mark", func(t *testing.T) { + find, mark := setup(t) + for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ { + mark(PageBase(i, 0), PageBase(i+1, 0)) + } + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("MarkInterleaved", func(t *testing.T) { + find, mark := setup(t) + for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ { + mark(PageBase(i, 0), PageBase(i+1, 0)) + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("MarkIdempotentOneChunk", func(t *testing.T) { + find, mark := setup(t) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)) + find(BaseChunkIdx, PallocChunkPages-1) + find(0, 0) + }) + t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) { + find, mark := setup(t) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0)) + for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) + t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) { + find, mark := setup(t) + mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0)) + mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0)) + for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- { + find(i, PallocChunkPages-1) + } + find(0, 0) + }) +} diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 0a53cd451b..698b7bff31 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -398,11 +398,15 @@ func sweepone() uintptr { // between sweep done and sweep termination (e.g. not enough // allocations to trigger a GC) which would be nice to fill in // with scavenging work. - systemstack(func() { - lock(&mheap_.lock) - mheap_.pages.scavengeStartGen() - unlock(&mheap_.lock) - }) + if debug.scavtrace > 0 { + systemstack(func() { + lock(&mheap_.lock) + released := atomic.Loaduintptr(&mheap_.pages.scav.released) + printScavTrace(released, false) + atomic.Storeuintptr(&mheap_.pages.scav.released, 0) + unlock(&mheap_.lock) + }) + } scavenger.ready() } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index ac4f99b57d..ff681a19cd 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -62,7 +62,10 @@ const ( type mheap struct { // lock must only be acquired on the system stack, otherwise a g // could self-deadlock if its stack grows with the lock held. - lock mutex + lock mutex + + _ uint32 // 8-byte align pages so its alignment is consistent with tests. + pages pageAlloc // page allocation data structure sweepgen uint32 // sweep generation, see comment in mspan; written during STW @@ -1548,22 +1551,12 @@ func (h *mheap) scavengeAll() { gp := getg() gp.m.mallocing++ - lock(&h.lock) - // Start a new scavenge generation so we have a chance to walk - // over the whole heap. - h.pages.scavengeStartGen() - unlock(&h.lock) - released := h.pages.scavenge(^uintptr(0)) - lock(&h.pages.scav.lock) - gen := h.pages.scav.gen - unlock(&h.pages.scav.lock) - gp.m.mallocing-- if debug.scavtrace > 0 { - printScavTrace(gen, released, true) + printScavTrace(released, true) } } diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 3881974742..c85da15ff2 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -262,45 +262,20 @@ type pageAlloc struct { // All access is protected by the mheapLock. inUse addrRanges + _ uint32 // Align scav so it's easier to reason about alignment within scav. + // scav stores the scavenger state. scav struct { - lock mutex - - // inUse is a slice of ranges of address space which have not - // yet been looked at by the scavenger. - // - // Protected by lock. - inUse addrRanges - - // gen is the scavenge generation number. - // - // Protected by lock. - gen uint32 - - // reservationBytes is how large of a reservation should be made - // in bytes of address space for each scavenge iteration. - // - // Protected by lock. - reservationBytes uintptr + // index is an efficient index of chunks that have pages available to + // scavenge. + index scavengeIndex // released is the amount of memory released this generation. // // Updated atomically. released uintptr - // scavLWM is the lowest (offset) address that the scavenger reached this - // scavenge generation. - // - // Protected by lock. - scavLWM offAddr - - // freeHWM is the highest (offset) address of a page that was freed to - // the page allocator this scavenge generation. - // - // Protected by mheapLock. - freeHWM offAddr - - _ uint32 // Align assistTime for atomics. + _ uint32 // Align assistTime for atomics on 32-bit platforms. // scavengeAssistTime is the time spent scavenging in the last GC cycle. // @@ -348,12 +323,6 @@ func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) { // Set the mheapLock. p.mheapLock = mheapLock - - // Initialize p.scav.inUse. - p.scav.inUse.init(sysStat) - - // Initialize scavenge tracking state. - p.scav.scavLWM = maxSearchAddr } // tryChunkOf returns the bitmap data for the given chunk. @@ -903,10 +872,7 @@ func (p *pageAlloc) free(base, npages uintptr, scavenged bool) { } limit := base + npages*pageSize - 1 if !scavenged { - // Update the free high watermark for the scavenger. - if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) { - p.scav.freeHWM = offLimit - } + p.scav.index.mark(base, limit+1) } if npages == 1 { // Fast path: we're clearing a single bit, and we know exactly diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go index e072f70cd7..859c61d8a5 100644 --- a/src/runtime/mpagealloc_32bit.go +++ b/src/runtime/mpagealloc_32bit.go @@ -11,7 +11,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/atomic" + "unsafe" +) const ( // The number of levels in the radix tree. @@ -53,6 +56,10 @@ var levelLogPages = [summaryLevels]uint{ logPallocChunkPages, } +// scavengeIndexArray is the backing store for p.scav.index.chunks. +// On 32-bit platforms, it's small enough to just be a global. +var scavengeIndexArray [((1 << heapAddrBits) / pallocChunkBytes) / 8]atomic.Uint8 + // See mpagealloc_64bit.go for details. func (p *pageAlloc) sysInit() { // Calculate how much memory all our entries will take up. @@ -87,6 +94,9 @@ func (p *pageAlloc) sysInit() { reservation = add(reservation, uintptr(entries)*pallocSumBytes) } + + // Set up the scavenge index. + p.scav.index.chunks = scavengeIndexArray[:] } // See mpagealloc_64bit.go for details. diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go index 0b99209d99..bfc3e0ad90 100644 --- a/src/runtime/mpagealloc_64bit.go +++ b/src/runtime/mpagealloc_64bit.go @@ -6,7 +6,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/atomic" + "unsafe" +) const ( // The number of levels in the radix tree. @@ -83,6 +86,12 @@ func (p *pageAlloc) sysInit() { sl := notInHeapSlice{(*notInHeap)(r), 0, entries} p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl)) } + + // Set up the scavenge index. + nbytes := uintptr(1< haveMax { + needMin = haveMax + } + have := makeAddrRange( + // Avoid a panic from indexing one past the last element. + uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin), + uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax), + ) + need := makeAddrRange( + // Avoid a panic from indexing one past the last element. + uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin), + uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax), + ) + // Subtract any overlap from rounding. We can't re-map memory because + // it'll be zeroed. + need = need.subtract(have) + + // If we've got something to map, map it, and update the slice bounds. + if need.size() != 0 { + sysMap(unsafe.Pointer(need.base.addr()), need.size(), sysStat) + sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size()) + // Update the indices only after the new memory is valid. + if haveMin == 0 || needMin < haveMin { + s.min.Store(needMin) + } + if haveMax == 0 || needMax > haveMax { + s.max.Store(needMax) + } + } + // Update minHeapIdx. Note that even if there's no mapping work to do, + // we may still have a new, lower minimum heap address. + minHeapIdx := s.minHeapIdx.Load() + if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx { + s.minHeapIdx.Store(baseIdx) + } + return need.size() } diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go index e0be1e134e..9cf83cc613 100644 --- a/src/runtime/mranges.go +++ b/src/runtime/mranges.go @@ -11,6 +11,7 @@ package runtime import ( "internal/goarch" + "runtime/internal/atomic" "unsafe" ) @@ -141,6 +142,69 @@ func (l offAddr) addr() uintptr { return l.a } +// atomicOffAddr is like offAddr, but operations on it are atomic. +// It also contains operations to be able to store marked addresses +// to ensure that they're not overridden until they've been seen. +type atomicOffAddr struct { + // a contains the offset address, unlike offAddr. + a atomic.Int64 +} + +// Clear attempts to store minOffAddr in atomicOffAddr. It may fail +// if a marked value is placed in the box in the meanwhile. +func (b *atomicOffAddr) Clear() { + for { + old := b.a.Load() + if old < 0 { + return + } + if b.a.CompareAndSwap(old, int64(minOffAddr.addr()-arenaBaseOffset)) { + return + } + } +} + +// StoreMin stores addr if it's less than the current value in the +// offset address space if the current value is not marked. +func (b *atomicOffAddr) StoreMin(addr uintptr) { + new := int64(addr - arenaBaseOffset) + for { + old := b.a.Load() + if old < new { + return + } + if b.a.CompareAndSwap(old, new) { + return + } + } +} + +// StoreUnmark attempts to unmark the value in atomicOffAddr and +// replace it with newAddr. markedAddr must be a marked address +// returned by Load. This function will not store newAddr if the +// box no longer contains markedAddr. +func (b *atomicOffAddr) StoreUnmark(markedAddr, newAddr uintptr) { + b.a.CompareAndSwap(-int64(markedAddr-arenaBaseOffset), int64(newAddr-arenaBaseOffset)) +} + +// StoreMarked stores addr but first converted to the offset address +// space and then negated. +func (b *atomicOffAddr) StoreMarked(addr uintptr) { + b.a.Store(-int64(addr - arenaBaseOffset)) +} + +// Load returns the address in the box as a virtual address. It also +// returns if the value was marked or not. +func (b *atomicOffAddr) Load() (uintptr, bool) { + v := b.a.Load() + wasMarked := false + if v < 0 { + wasMarked = true + v = -v + } + return uintptr(v) + arenaBaseOffset, wasMarked +} + // addrRanges is a data structure holding a collection of ranges of // address space. //