diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index f1bdf93f46..0d17ddfe30 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -968,7 +968,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 	p.init(new(mutex), testSysStat)
 	lockInit(p.mheapLock, lockRankMheap)
 	p.test = true
-
 	for i, init := range chunks {
 		addr := chunkBase(chunkIdx(i))
 
@@ -1007,6 +1006,18 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 			}
 		}
 
+		// Make sure the scavenge index is updated.
+		//
+		// This is an inefficient way to do it, but it's also the simplest way.
+		minPages := physPageSize / pageSize
+		if minPages < 1 {
+			minPages = 1
+		}
+		_, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages)
+		if npages != 0 {
+			p.scav.index.mark(addr, addr+pallocChunkBytes)
+		}
+
 		// Update heap metadata for the allocRange calls above.
 		systemstack(func() {
 			lock(p.mheapLock)
@@ -1015,12 +1026,6 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		})
 	}
 
-	systemstack(func() {
-		lock(p.mheapLock)
-		p.scavengeStartGen()
-		unlock(p.mheapLock)
-	})
-
 	return (*PageAlloc)(p)
 }
 
@@ -1035,6 +1040,8 @@ func FreePageAlloc(pp *PageAlloc) {
 		for l := 0; l < summaryLevels; l++ {
 			sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes)
 		}
+		// Only necessary on 64-bit. This is a global on 32-bit.
+		sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks)))
 	} else {
 		resSize := uintptr(0)
 		for _, s := range p.summary {
@@ -1042,6 +1049,7 @@ func FreePageAlloc(pp *PageAlloc) {
 		}
 		sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize))
 	}
+
 	// Subtract back out whatever we mapped for the summaries.
 	// sysUsed adds to p.sysStat and memstats.mappedReady no matter what
 	// (and in anger should actually be accounted for), and there's no other
@@ -1550,3 +1558,28 @@ func (s *Scavenger) Stop() {
 	s.Wake()
 	<-s.done
 }
+
+type ScavengeIndex struct {
+	i scavengeIndex
+}
+
+func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex {
+	s := new(ScavengeIndex)
+	s.i.chunks = make([]atomic.Uint8, uintptr(1<<heapAddrBits/pallocChunkBytes/8))
+	s.i.min.Store(int32(min / 8))
+	s.i.max.Store(int32(max / 8))
+	return s
+}
+
+func (s *ScavengeIndex) Find() (ChunkIdx, uint) {
+	ci, off := s.i.find()
+	return ChunkIdx(ci), off
+}
+
+func (s *ScavengeIndex) Mark(base, limit uintptr) {
+	s.i.mark(base, limit)
+}
+
+func (s *ScavengeIndex) Clear(ci ChunkIdx) {
+	s.i.clear(chunkIdx(ci))
+}
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 9423db271b..d22e6635f8 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -118,11 +118,6 @@ const (
 	// This ratio is used as part of multiplicative factor to help the scavenger account
 	// for the additional costs of using scavenged memory in its pacing.
 	scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
-
-	// scavengeReservationShards determines the amount of memory the scavenger
-	// should reserve for scavenging at a time. Specifically, the amount of
-	// memory reserved is (heap size in bytes) / scavengeReservationShards.
-	scavengeReservationShards = 64
 )
 
 // heapRetained returns an estimate of the current heap RSS.
@@ -642,26 +637,16 @@ func bgscavenge(c chan int) {
 // scavenge always tries to scavenge nbytes worth of memory, and will
 // only fail to do so if the heap is exhausted for now.
 func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
-	var (
-		addrs addrRange
-		gen   uint32
-	)
 	released := uintptr(0)
 	for released < nbytes {
-		if addrs.size() == 0 {
-			if addrs, gen = p.scavengeReserve(); addrs.size() == 0 {
-				break
-			}
+		ci, pageIdx := p.scav.index.find()
+		if ci == 0 {
+			break
 		}
 		systemstack(func() {
-			r, a := p.scavengeOne(addrs, nbytes-released)
-			released += r
-			addrs = a
+			released += p.scavengeOne(ci, pageIdx, nbytes-released)
 		})
 	}
-	// Only unreserve the space which hasn't been scavenged or searched
-	// to ensure we always make progress.
-	p.scavengeUnreserve(addrs, gen)
 	return released
 }
 
@@ -672,11 +657,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
 // application.
 //
 // scavenger.lock must be held.
-func printScavTrace(gen uint32, released uintptr, forced bool) {
+func printScavTrace(released uintptr, forced bool) {
 	assertLockHeld(&scavenger.lock)
 
 	printlock()
-	print("scav ", gen, " ",
+	print("scav ",
 		released>>10, " KiB work, ",
 		gcController.heapReleased.load()>>10, " KiB total, ",
 		(gcController.heapInUse.load()*100)/heapRetained(), "% util",
@@ -691,130 +676,20 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 	printunlock()
 }
 
-// scavengeStartGen starts a new scavenge generation, resetting
-// the scavenger's search space to the full in-use address space.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeStartGen() {
-	assertLockHeld(p.mheapLock)
-
-	lock(&p.scav.lock)
-	if debug.scavtrace > 0 {
-		printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false)
-	}
-	p.inUse.cloneInto(&p.scav.inUse)
-
-	// Pick the new starting address for the scavenger cycle.
-	var startAddr offAddr
-	if p.scav.scavLWM.lessThan(p.scav.freeHWM) {
-		// The "free" high watermark exceeds the "scavenged" low watermark,
-		// so there are free scavengable pages in parts of the address space
-		// that the scavenger already searched, the high watermark being the
-		// highest one. Pick that as our new starting point to ensure we
-		// see those pages.
-		startAddr = p.scav.freeHWM
-	} else {
-		// The "free" high watermark does not exceed the "scavenged" low
-		// watermark. This means the allocator didn't free any memory in
-		// the range we scavenged last cycle, so we might as well continue
-		// scavenging from where we were.
-		startAddr = p.scav.scavLWM
-	}
-	p.scav.inUse.removeGreaterEqual(startAddr.addr())
-
-	// reservationBytes may be zero if p.inUse.totalBytes is small, or if
-	// scavengeReservationShards is large. This case is fine as the scavenger
-	// will simply be turned off, but it does mean that scavengeReservationShards,
-	// in concert with pallocChunkBytes, dictates the minimum heap size at which
-	// the scavenger triggers. In practice this minimum is generally less than an
-	// arena in size, so virtually every heap has the scavenger on.
-	p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
-	p.scav.gen++
-	atomic.Storeuintptr(&p.scav.released, 0)
-	p.scav.freeHWM = minOffAddr
-	p.scav.scavLWM = maxOffAddr
-	unlock(&p.scav.lock)
-}
-
-// scavengeReserve reserves a contiguous range of the address space
-// for scavenging. The maximum amount of space it reserves is proportional
-// to the size of the heap. The ranges are reserved from the high addresses
-// first.
-//
-// Returns the reserved range and the scavenge generation number for it.
-func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
-	lock(&p.scav.lock)
-	gen := p.scav.gen
-
-	// Start by reserving the minimum.
-	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
-
-	// Return early if the size is zero; we don't want to use
-	// the bogus address below.
-	if r.size() == 0 {
-		unlock(&p.scav.lock)
-		return r, gen
-	}
-
-	// The scavenger requires that base be aligned to a
-	// palloc chunk because that's the unit of operation for
-	// the scavenger, so align down, potentially extending
-	// the range.
-	newBase := alignDown(r.base.addr(), pallocChunkBytes)
-
-	// Remove from inUse however much extra we just pulled out.
-	p.scav.inUse.removeGreaterEqual(newBase)
-	unlock(&p.scav.lock)
-
-	r.base = offAddr{newBase}
-	return r, gen
-}
-
-// scavengeUnreserve returns an unscavenged portion of a range that was
-// previously reserved with scavengeReserve.
-func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
-	if r.size() == 0 {
-		return
-	}
-	if r.base.addr()%pallocChunkBytes != 0 {
-		throw("unreserving unaligned region")
-	}
-	lock(&p.scav.lock)
-	if gen == p.scav.gen {
-		p.scav.inUse.add(r)
-	}
-	unlock(&p.scav.lock)
-}
-
-// scavengeOne walks over address range work until it finds
+// scavengeOne walks over the chunk at chunk index ci and searches for
 // a contiguous run of pages to scavenge. It will try to scavenge
 // at most max bytes at once, but may scavenge more to avoid
 // breaking huge pages. Once it scavenges some memory it returns
 // how much it scavenged in bytes.
 //
-// Returns the number of bytes scavenged and the part of work
-// which was not yet searched.
+// searchIdx is the page index to start searching from in ci.
 //
-// work's base address must be aligned to pallocChunkBytes.
+// Returns the number of bytes scavenged.
 //
 // Must run on the systemstack because it acquires p.mheapLock.
 //
 //go:systemstack
-func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) {
-	// Defensively check if we've received an empty address range.
-	// If so, just return.
-	if work.size() == 0 {
-		// Nothing to do.
-		return 0, work
-	}
-	// Check the prerequisites of work.
-	if work.base.addr()%pallocChunkBytes != 0 {
-		throw("scavengeOne called with unaligned work region")
-	}
+func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintptr {
 	// Calculate the maximum number of pages to scavenge.
 	//
 	// This should be alignUp(max, pageSize) / pageSize but max can and will
@@ -836,168 +711,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange
 		minPages = 1
 	}
 
-	// Fast path: check the chunk containing the top-most address in work.
-	if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 {
-		return r, w
-	} else {
-		work = w
-	}
-
-	// findCandidate finds the next scavenge candidate in work optimistically.
-	//
-	// Returns the candidate chunk index and true on success, and false on failure.
-	//
-	// The heap need not be locked.
-	findCandidate := func(work addrRange) (chunkIdx, bool) {
-		// Iterate over this work's chunks.
-		for i := chunkIndex(work.limit.addr() - 1); i >= chunkIndex(work.base.addr()); i-- {
-			// If this chunk is totally in-use or has no unscavenged pages, don't bother
-			// doing a more sophisticated check.
-			//
-			// Note we're accessing the summary and the chunks without a lock, but
-			// that's fine. We're being optimistic anyway.
-
-			// Check quickly if there are enough free pages at all.
-			if p.summary[len(p.summary)-1][i].max() < uint(minPages) {
-				continue
-			}
-
-			// Run over the chunk looking harder for a candidate. Again, we could
-			// race with a lot of different pieces of code, but we're just being
-			// optimistic. Make sure we load the l2 pointer atomically though, to
-			// avoid races with heap growth. It may or may not be possible to also
-			// see a nil pointer in this case if we do race with heap growth, but
-			// just defensively ignore the nils. This operation is optimistic anyway.
-			l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()])))
-			if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) {
-				return i, true
-			}
-		}
-		return 0, false
-	}
-
-	// Slow path: iterate optimistically over the in-use address space
-	// looking for any free and unscavenged page. If we think we see something,
-	// lock and verify it!
-	for work.size() != 0 {
-
-		// Search for the candidate.
-		candidateChunkIdx, ok := findCandidate(work)
-		if !ok {
-			// We didn't find a candidate, so we're done.
-			work.limit = work.base
-			break
-		}
-
-		// Lock, so we can verify what we found.
-		lock(p.mheapLock)
-
-		// Find, verify, and scavenge if we can.
-		chunk := p.chunkOf(candidateChunkIdx)
-		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
-		if npages > 0 {
-			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
-			unlock(p.mheapLock)
-			return uintptr(npages) * pageSize, work
-		}
-		unlock(p.mheapLock)
-
-		// We were fooled, so let's continue from where we left off.
-		work.limit = offAddr{chunkBase(candidateChunkIdx)}
-	}
-	return 0, work
-}
-
-// scavengeOneFast is the fast path for scavengeOne, which just checks the top
-// chunk of work for some pages to scavenge.
-//
-// Must run on the system stack because it acquires the heap lock.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) {
-	maxAddr := work.limit.addr() - 1
-	maxChunk := chunkIndex(maxAddr)
-
 	lock(p.mheapLock)
-	if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
+	if p.summary[len(p.summary)-1][ci].max() >= uint(minPages) {
 		// We only bother looking for a candidate if there at least
 		// minPages free pages at all.
-		base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+		base, npages := p.chunkOf(ci).findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 
 		// If we found something, scavenge it and return!
 		if npages != 0 {
-			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+			// Compute the full address for the start of the range.
+			addr := chunkBase(ci) + uintptr(base)*pageSize
+
+			// Mark the range we're about to scavenge as allocated, because
+			// we don't want any allocating goroutines to grab it while
+			// the scavenging is in progress.
+			if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
+				throw("double scavenge")
+			}
+
+			// With that done, it's safe to unlock.
 			unlock(p.mheapLock)
-			return uintptr(npages) * pageSize, work
+
+			if !p.test {
+				// Only perform the actual scavenging if we're not in a test.
+				// It's dangerous to do so otherwise.
+				sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+
+				// Update global accounting only when not in test, otherwise
+				// the runtime's accounting will be wrong.
+				nbytes := int64(npages) * pageSize
+				gcController.heapReleased.add(nbytes)
+				gcController.heapFree.add(-nbytes)
+
+				stats := memstats.heapStats.acquire()
+				atomic.Xaddint64(&stats.committed, -nbytes)
+				atomic.Xaddint64(&stats.released, nbytes)
+				memstats.heapStats.release()
+			}
+
+			// Relock the heap, because now we need to make these pages
+			// available allocation. Free them back to the page allocator.
+			lock(p.mheapLock)
+			p.free(addr, uintptr(npages), true)
+
+			// Mark the range as scavenged.
+			p.chunkOf(ci).scavenged.setRange(base, npages)
+			unlock(p.mheapLock)
+
+			return uintptr(npages) * pageSize
 		}
 	}
+	// Mark this chunk as having no free pages.
+	p.scav.index.clear(ci)
 	unlock(p.mheapLock)
 
-	// Update the limit to reflect the fact that we checked maxChunk already.
-	work.limit = offAddr{chunkBase(maxChunk)}
-	return 0, work
-}
-
-// scavengeRangeLocked scavenges the given region of memory.
-// The region of memory is described by its chunk index (ci),
-// the starting page index of the region relative to that
-// chunk (base), and the length of the region in pages (npages).
-//
-// Returns the base address of the scavenged region.
-//
-// p.mheapLock must be held. Unlocks p.mheapLock but reacquires
-// it before returning. Must be run on the systemstack as a result.
-//
-//go:systemstack
-func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
-	assertLockHeld(p.mheapLock)
-
-	// Compute the full address for the start of the range.
-	addr := chunkBase(ci) + uintptr(base)*pageSize
-
-	// Mark the range we're about to scavenge as allocated, because
-	// we don't want any allocating goroutines to grab it while
-	// the scavenging is in progress.
-	if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
-		throw("double scavenge")
-	}
-
-	// With that done, it's safe to unlock.
-	unlock(p.mheapLock)
-
-	// Update the scavenge low watermark.
-	lock(&p.scav.lock)
-	if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
-		p.scav.scavLWM = oAddr
-	}
-	unlock(&p.scav.lock)
-
-	if !p.test {
-		// Only perform the actual scavenging if we're not in a test.
-		// It's dangerous to do so otherwise.
-		sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
-
-		// Update global accounting only when not in test, otherwise
-		// the runtime's accounting will be wrong.
-		nbytes := int64(npages) * pageSize
-		gcController.heapReleased.add(nbytes)
-		gcController.heapFree.add(-nbytes)
-
-		// Update consistent accounting too.
-		stats := memstats.heapStats.acquire()
-		atomic.Xaddint64(&stats.committed, -nbytes)
-		atomic.Xaddint64(&stats.released, nbytes)
-		memstats.heapStats.release()
-	}
-
-	// Relock the heap, because now we need to make these pages
-	// available allocation. Free them back to the page allocator.
-	lock(p.mheapLock)
-	p.free(addr, uintptr(npages), true)
-
-	// Mark the range as scavenged.
-	p.chunkOf(ci).scavenged.setRange(base, npages)
-	return addr
+	return 0
 }
 
 // fillAligned returns x but with all zeroes in m-aligned
@@ -1059,38 +827,6 @@ func fillAligned(x uint64, m uint) uint64 {
 	return ^((x - (x >> (m - 1))) | x)
 }
 
-// hasScavengeCandidate returns true if there's any min-page-aligned groups of
-// min pages of free-and-unscavenged memory in the region represented by this
-// pallocData.
-//
-// min must be a non-zero power of 2 <= maxPagesPerPhysPage.
-func (m *pallocData) hasScavengeCandidate(min uintptr) bool {
-	if min&(min-1) != 0 || min == 0 {
-		print("runtime: min = ", min, "\n")
-		throw("min must be a non-zero power of 2")
-	} else if min > maxPagesPerPhysPage {
-		print("runtime: min = ", min, "\n")
-		throw("min too large")
-	}
-
-	// The goal of this search is to see if the chunk contains any free and unscavenged memory.
-	for i := len(m.scavenged) - 1; i >= 0; i-- {
-		// 1s are scavenged OR non-free => 0s are unscavenged AND free
-		//
-		// TODO(mknyszek): Consider splitting up fillAligned into two
-		// functions, since here we technically could get by with just
-		// the first half of its computation. It'll save a few instructions
-		// but adds some additional code complexity.
-		x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
-
-		// Quickly skip over chunks of non-free or scavenged pages.
-		if x != ^uint64(0) {
-			return true
-		}
-	}
-	return false
-}
-
 // findScavengeCandidate returns a start index and a size for this pallocData
 // segment which represents a contiguous region of free and unscavenged memory.
 //
@@ -1210,3 +946,157 @@ func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (ui
 	}
 	return start, size
 }
+
+// scavengeIndex is a structure for efficiently managing which pageAlloc chunks have
+// memory available to scavenge.
+type scavengeIndex struct {
+	// chunks is a bitmap representing the entire address space. Each bit represents
+	// a single chunk, and a 1 value indicates the presence of pages available for
+	// scavenging. Updates to the bitmap are serialized by the pageAlloc lock.
+	//
+	// The underlying storage of chunks is platform dependent and may not even be
+	// totally mapped read/write. min and max reflect the extent that is safe to access.
+	// min is inclusive, max is exclusive.
+	//
+	// searchAddr is the maximum address (in the offset address space, so we have a linear
+	// view of the address space; see mranges.go:offAddr) containing memory available to
+	// scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups.
+	//
+	// searchAddr is always inclusive and should be the base address of the highest runtime
+	// page available for scavenging.
+	//
+	// searchAddr is managed by both find and mark.
+	//
+	// Normally, find monotonically decreases searchAddr as it finds no more free pages to
+	// scavenge. However, mark, when marking a new chunk at an index greater than the current
+	// searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here
+	// is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they
+	// won't barge over new memory becoming available to scavenge. Furthermore, this ensures
+	// that some future caller of find *must* observe the new high index. That caller
+	// (or any other racing with it), then makes searchAddr positive before continuing, bringing
+	// us back to our monotonically decreasing steady-state.
+	//
+	// A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr)
+	// is always guaranteed to be >= min and < max (converted to heap addresses).
+	//
+	// TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster
+	// iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either
+	// copy them from math/bits or fix the fact that we can't import math/bits' code from
+	// the runtime due to compiler instrumentation.
+	searchAddr atomicOffAddr
+	chunks     []atomic.Uint8
+	minHeapIdx atomic.Int32
+	min, max   atomic.Int32
+}
+
+// find returns the highest chunk index that may contain pages available to scavenge.
+// It also returns an offset to start searching in the highest chunk.
+func (s *scavengeIndex) find() (chunkIdx, uint) {
+	searchAddr, marked := s.searchAddr.Load()
+	if searchAddr == minOffAddr.addr() {
+		// We got a cleared search addr.
+		return 0, 0
+	}
+
+	// Starting from searchAddr's chunk, and moving down to minHeapIdx,
+	// iterate until we find a chunk with pages to scavenge.
+	min := s.minHeapIdx.Load()
+	searchChunk := chunkIndex(uintptr(searchAddr))
+	start := int32(searchChunk / 8)
+	for i := start; i >= min; i-- {
+		// Skip over irrelevant address space.
+		chunks := s.chunks[i].Load()
+		if chunks == 0 {
+			continue
+		}
+		// Note that we can't have 8 leading zeroes here because
+		// we necessarily skipped that case. So, what's left is
+		// an index. If there are no zeroes, we want the 7th
+		// index, if 1 zero, the 6th, and so on.
+		n := 7 - sys.LeadingZeros8(chunks)
+		ci := chunkIdx(uint(i)*8 + uint(n))
+		if searchChunk == ci {
+			return ci, chunkPageIndex(uintptr(searchAddr))
+		}
+		// Try to reduce searchAddr to newSearchAddr.
+		newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize
+		if marked {
+			// Attempt to be the first one to decrease the searchAddr
+			// after an increase. If we fail, that means there was another
+			// increase, or somebody else got to it before us. Either way,
+			// it doesn't matter. We may lose some performance having an
+			// incorrect search address, but it's far more important that
+			// we don't miss updates.
+			s.searchAddr.StoreUnmark(searchAddr, newSearchAddr)
+		} else {
+			// Decrease searchAddr.
+			s.searchAddr.StoreMin(newSearchAddr)
+		}
+		return ci, pallocChunkPages - 1
+	}
+	// Clear searchAddr, because we've exhausted the heap.
+	s.searchAddr.Clear()
+	return 0, 0
+}
+
+// mark sets the inclusive range of chunks between indices start and end as
+// containing pages available to scavenge.
+//
+// Must be serialized with other mark, markRange, and clear calls.
+func (s *scavengeIndex) mark(base, limit uintptr) {
+	start, end := chunkIndex(base), chunkIndex(limit-pageSize)
+	if start == end {
+		// Within a chunk.
+		mask := uint8(1 << (start % 8))
+		s.chunks[start/8].Or(mask)
+	} else if start/8 == end/8 {
+		// Within the same byte in the index.
+		mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8)
+		s.chunks[start/8].Or(mask)
+	} else {
+		// Crosses multiple bytes in the index.
+		startAligned := chunkIdx(alignUp(uintptr(start), 8))
+		endAligned := chunkIdx(alignDown(uintptr(end), 8))
+
+		// Do the end of the first byte first.
+		if width := startAligned - start; width > 0 {
+			mask := uint8(uint16(1<<width)-1) << (start % 8)
+			s.chunks[start/8].Or(mask)
+		}
+		// Do the middle aligned sections that take up a whole
+		// byte.
+		for ci := startAligned; ci < endAligned; ci += 8 {
+			s.chunks[ci/8].Store(^uint8(0))
+		}
+		// Do the end of the last byte.
+		//
+		// This width check doesn't match the one above
+		// for start because aligning down into the endAligned
+		// block means we always have at least one chunk in this
+		// block (note that end is *inclusive*). This also means
+		// that if end == endAligned+n, then what we really want
+		// is to fill n+1 chunks, i.e. width n+1. By induction,
+		// this is true for all n.
+		if width := end - endAligned + 1; width > 0 {
+			mask := uint8(uint16(1<<width) - 1)
+			s.chunks[end/8].Or(mask)
+		}
+	}
+	newSearchAddr := limit - pageSize
+	searchAddr, _ := s.searchAddr.Load()
+	// N.B. Because mark is serialized, it's not necessary to do a
+	// full CAS here. mark only ever increases searchAddr, while
+	// find only ever decreases it. Since we only ever race with
+	// decreases, even if the value we loaded is stale, the actual
+	// value will never be larger.
+	if (offAddr{searchAddr}).lessThan(offAddr{newSearchAddr}) {
+		s.searchAddr.StoreMarked(newSearchAddr)
+	}
+}
+
+// clear sets the chunk at index ci as not containing pages available to scavenge.
+//
+// Must be serialized with other mark, markRange, and clear calls.
+func (s *scavengeIndex) clear(ci chunkIdx) {
+	s.chunks[ci/8].And(^uint8(1 << (ci % 8)))
+}
diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go
index 8d92295961..620392f867 100644
--- a/src/runtime/mgcscavenge_test.go
+++ b/src/runtime/mgcscavenge_test.go
@@ -561,3 +561,149 @@ func TestScavenger(t *testing.T) {
 	// Clean up.
 	s.Stop()
 }
+
+func TestScavengeIndex(t *testing.T) {
+	setup := func(t *testing.T) (func(ChunkIdx, uint), func(uintptr, uintptr)) {
+		t.Helper()
+
+		// Pick some reasonable bounds. We don't need a huge range just to test.
+		si := NewScavengeIndex(BaseChunkIdx, BaseChunkIdx+64)
+		find := func(want ChunkIdx, wantOffset uint) {
+			t.Helper()
+
+			got, gotOffset := si.Find()
+			if want != got {
+				t.Errorf("find: wanted chunk index %d, got %d", want, got)
+			}
+			if want != got {
+				t.Errorf("find: wanted page offset %d, got %d", wantOffset, gotOffset)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+			si.Clear(got)
+		}
+		mark := func(base, limit uintptr) {
+			t.Helper()
+
+			si.Mark(base, limit)
+		}
+		return find, mark
+	}
+	t.Run("Uninitialized", func(t *testing.T) {
+		find, _ := setup(t)
+		find(0, 0)
+	})
+	t.Run("OnePage", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
+		find(BaseChunkIdx, 3)
+		find(0, 0)
+	})
+	t.Run("FirstPage", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
+		find(BaseChunkIdx, 0)
+		find(0, 0)
+	})
+	t.Run("SeveralPages", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
+		find(BaseChunkIdx, 13)
+		find(0, 0)
+	})
+	t.Run("WholeChunk", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+		find(BaseChunkIdx, PallocChunkPages-1)
+		find(0, 0)
+	})
+	t.Run("LastPage", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
+		find(BaseChunkIdx, PallocChunkPages-1)
+		find(0, 0)
+	})
+	t.Run("TwoChunks", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
+		find(BaseChunkIdx+1, 127)
+		find(BaseChunkIdx, PallocChunkPages-1)
+		find(0, 0)
+	})
+	t.Run("TwoChunksOffset", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
+		find(BaseChunkIdx+8, 128)
+		find(BaseChunkIdx+7, PallocChunkPages-1)
+		find(0, 0)
+	})
+	t.Run("SevenChunksOffset", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
+		find(BaseChunkIdx+13, 14)
+		for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("ThirtyTwoChunks", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+		for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("ThirtyTwoChunksOffset", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
+		for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("Mark", func(t *testing.T) {
+		find, mark := setup(t)
+		for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+			mark(PageBase(i, 0), PageBase(i+1, 0))
+		}
+		for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("MarkInterleaved", func(t *testing.T) {
+		find, mark := setup(t)
+		for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+			mark(PageBase(i, 0), PageBase(i+1, 0))
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("MarkIdempotentOneChunk", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+		find(BaseChunkIdx, PallocChunkPages-1)
+		find(0, 0)
+	})
+	t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+		mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+		for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+	t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) {
+		find, mark := setup(t)
+		mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
+		mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
+		for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
+			find(i, PallocChunkPages-1)
+		}
+		find(0, 0)
+	})
+}
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 0a53cd451b..698b7bff31 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -398,11 +398,15 @@ func sweepone() uintptr {
 		// between sweep done and sweep termination (e.g. not enough
 		// allocations to trigger a GC) which would be nice to fill in
 		// with scavenging work.
-		systemstack(func() {
-			lock(&mheap_.lock)
-			mheap_.pages.scavengeStartGen()
-			unlock(&mheap_.lock)
-		})
+		if debug.scavtrace > 0 {
+			systemstack(func() {
+				lock(&mheap_.lock)
+				released := atomic.Loaduintptr(&mheap_.pages.scav.released)
+				printScavTrace(released, false)
+				atomic.Storeuintptr(&mheap_.pages.scav.released, 0)
+				unlock(&mheap_.lock)
+			})
+		}
 		scavenger.ready()
 	}
 
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index ac4f99b57d..ff681a19cd 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -62,7 +62,10 @@ const (
 type mheap struct {
 	// lock must only be acquired on the system stack, otherwise a g
 	// could self-deadlock if its stack grows with the lock held.
-	lock  mutex
+	lock mutex
+
+	_ uint32 // 8-byte align pages so its alignment is consistent with tests.
+
 	pages pageAlloc // page allocation data structure
 
 	sweepgen uint32 // sweep generation, see comment in mspan; written during STW
@@ -1548,22 +1551,12 @@ func (h *mheap) scavengeAll() {
 	gp := getg()
 	gp.m.mallocing++
 
-	lock(&h.lock)
-	// Start a new scavenge generation so we have a chance to walk
-	// over the whole heap.
-	h.pages.scavengeStartGen()
-	unlock(&h.lock)
-
 	released := h.pages.scavenge(^uintptr(0))
 
-	lock(&h.pages.scav.lock)
-	gen := h.pages.scav.gen
-	unlock(&h.pages.scav.lock)
-
 	gp.m.mallocing--
 
 	if debug.scavtrace > 0 {
-		printScavTrace(gen, released, true)
+		printScavTrace(released, true)
 	}
 }
 
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 3881974742..c85da15ff2 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -262,45 +262,20 @@ type pageAlloc struct {
 	// All access is protected by the mheapLock.
 	inUse addrRanges
 
+	_ uint32 // Align scav so it's easier to reason about alignment within scav.
+
 	// scav stores the scavenger state.
 	scav struct {
-		lock mutex
-
-		// inUse is a slice of ranges of address space which have not
-		// yet been looked at by the scavenger.
-		//
-		// Protected by lock.
-		inUse addrRanges
-
-		// gen is the scavenge generation number.
-		//
-		// Protected by lock.
-		gen uint32
-
-		// reservationBytes is how large of a reservation should be made
-		// in bytes of address space for each scavenge iteration.
-		//
-		// Protected by lock.
-		reservationBytes uintptr
+		// index is an efficient index of chunks that have pages available to
+		// scavenge.
+		index scavengeIndex
 
 		// released is the amount of memory released this generation.
 		//
 		// Updated atomically.
 		released uintptr
 
-		// scavLWM is the lowest (offset) address that the scavenger reached this
-		// scavenge generation.
-		//
-		// Protected by lock.
-		scavLWM offAddr
-
-		// freeHWM is the highest (offset) address of a page that was freed to
-		// the page allocator this scavenge generation.
-		//
-		// Protected by mheapLock.
-		freeHWM offAddr
-
-		_ uint32 // Align assistTime for atomics.
+		_ uint32 // Align assistTime for atomics on 32-bit platforms.
 
 		// scavengeAssistTime is the time spent scavenging in the last GC cycle.
 		//
@@ -348,12 +323,6 @@ func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
 
 	// Set the mheapLock.
 	p.mheapLock = mheapLock
-
-	// Initialize p.scav.inUse.
-	p.scav.inUse.init(sysStat)
-
-	// Initialize scavenge tracking state.
-	p.scav.scavLWM = maxSearchAddr
 }
 
 // tryChunkOf returns the bitmap data for the given chunk.
@@ -903,10 +872,7 @@ func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
 	}
 	limit := base + npages*pageSize - 1
 	if !scavenged {
-		// Update the free high watermark for the scavenger.
-		if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
-			p.scav.freeHWM = offLimit
-		}
+		p.scav.index.mark(base, limit+1)
 	}
 	if npages == 1 {
 		// Fast path: we're clearing a single bit, and we know exactly
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go
index e072f70cd7..859c61d8a5 100644
--- a/src/runtime/mpagealloc_32bit.go
+++ b/src/runtime/mpagealloc_32bit.go
@@ -11,7 +11,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
 
 const (
 	// The number of levels in the radix tree.
@@ -53,6 +56,10 @@ var levelLogPages = [summaryLevels]uint{
 	logPallocChunkPages,
 }
 
+// scavengeIndexArray is the backing store for p.scav.index.chunks.
+// On 32-bit platforms, it's small enough to just be a global.
+var scavengeIndexArray [((1 << heapAddrBits) / pallocChunkBytes) / 8]atomic.Uint8
+
 // See mpagealloc_64bit.go for details.
 func (p *pageAlloc) sysInit() {
 	// Calculate how much memory all our entries will take up.
@@ -87,6 +94,9 @@ func (p *pageAlloc) sysInit() {
 
 		reservation = add(reservation, uintptr(entries)*pallocSumBytes)
 	}
+
+	// Set up the scavenge index.
+	p.scav.index.chunks = scavengeIndexArray[:]
 }
 
 // See mpagealloc_64bit.go for details.
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go
index 0b99209d99..bfc3e0ad90 100644
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -6,7 +6,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
 
 const (
 	// The number of levels in the radix tree.
@@ -83,6 +86,12 @@ func (p *pageAlloc) sysInit() {
 		sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
 		p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
 	}
+
+	// Set up the scavenge index.
+	nbytes := uintptr(1<<heapAddrBits) / pallocChunkBytes / 8
+	r := sysReserve(nil, nbytes)
+	sl := notInHeapSlice{(*notInHeap)(r), int(nbytes), int(nbytes)}
+	p.scav.index.chunks = *(*[]atomic.Uint8)(unsafe.Pointer(&sl))
 }
 
 // sysGrow performs architecture-dependent operations on heap
@@ -177,4 +186,72 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) {
 		sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
 		p.summaryMappedReady += need.size()
 	}
+
+	// Update the scavenge index.
+	p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat)
+}
+
+// grow increases the index's backing store in response to a heap growth.
+//
+// Returns the amount of memory added to sysStat.
+func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
+	if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+		print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+		throw("sysGrow bounds not aligned to pallocChunkBytes")
+	}
+	// Map and commit the pieces of chunks that we need.
+	//
+	// We always map the full range of the minimum heap address to the
+	// maximum heap address. We don't do this for the summary structure
+	// because it's quite large and a discontiguous heap could cause a
+	// lot of memory to be used. In this situation, the worst case overhead
+	// is in the single-digit MiB if we map the whole thing.
+	//
+	// The base address of the backing store is always page-aligned,
+	// because it comes from the OS, so it's sufficient to align the
+	// index.
+	haveMin := s.min.Load()
+	haveMax := s.max.Load()
+	needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize))
+	needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize))
+	// Extend the range down to what we have, if there's no overlap.
+	if needMax < haveMin {
+		needMax = haveMin
+	}
+	if needMin > haveMax {
+		needMin = haveMax
+	}
+	have := makeAddrRange(
+		// Avoid a panic from indexing one past the last element.
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin),
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax),
+	)
+	need := makeAddrRange(
+		// Avoid a panic from indexing one past the last element.
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin),
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax),
+	)
+	// Subtract any overlap from rounding. We can't re-map memory because
+	// it'll be zeroed.
+	need = need.subtract(have)
+
+	// If we've got something to map, map it, and update the slice bounds.
+	if need.size() != 0 {
+		sysMap(unsafe.Pointer(need.base.addr()), need.size(), sysStat)
+		sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
+		// Update the indices only after the new memory is valid.
+		if haveMin == 0 || needMin < haveMin {
+			s.min.Store(needMin)
+		}
+		if haveMax == 0 || needMax > haveMax {
+			s.max.Store(needMax)
+		}
+	}
+	// Update minHeapIdx. Note that even if there's no mapping work to do,
+	// we may still have a new, lower minimum heap address.
+	minHeapIdx := s.minHeapIdx.Load()
+	if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx {
+		s.minHeapIdx.Store(baseIdx)
+	}
+	return need.size()
 }
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index e0be1e134e..9cf83cc613 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -11,6 +11,7 @@ package runtime
 
 import (
 	"internal/goarch"
+	"runtime/internal/atomic"
 	"unsafe"
 )
 
@@ -141,6 +142,69 @@ func (l offAddr) addr() uintptr {
 	return l.a
 }
 
+// atomicOffAddr is like offAddr, but operations on it are atomic.
+// It also contains operations to be able to store marked addresses
+// to ensure that they're not overridden until they've been seen.
+type atomicOffAddr struct {
+	// a contains the offset address, unlike offAddr.
+	a atomic.Int64
+}
+
+// Clear attempts to store minOffAddr in atomicOffAddr. It may fail
+// if a marked value is placed in the box in the meanwhile.
+func (b *atomicOffAddr) Clear() {
+	for {
+		old := b.a.Load()
+		if old < 0 {
+			return
+		}
+		if b.a.CompareAndSwap(old, int64(minOffAddr.addr()-arenaBaseOffset)) {
+			return
+		}
+	}
+}
+
+// StoreMin stores addr if it's less than the current value in the
+// offset address space if the current value is not marked.
+func (b *atomicOffAddr) StoreMin(addr uintptr) {
+	new := int64(addr - arenaBaseOffset)
+	for {
+		old := b.a.Load()
+		if old < new {
+			return
+		}
+		if b.a.CompareAndSwap(old, new) {
+			return
+		}
+	}
+}
+
+// StoreUnmark attempts to unmark the value in atomicOffAddr and
+// replace it with newAddr. markedAddr must be a marked address
+// returned by Load. This function will not store newAddr if the
+// box no longer contains markedAddr.
+func (b *atomicOffAddr) StoreUnmark(markedAddr, newAddr uintptr) {
+	b.a.CompareAndSwap(-int64(markedAddr-arenaBaseOffset), int64(newAddr-arenaBaseOffset))
+}
+
+// StoreMarked stores addr but first converted to the offset address
+// space and then negated.
+func (b *atomicOffAddr) StoreMarked(addr uintptr) {
+	b.a.Store(-int64(addr - arenaBaseOffset))
+}
+
+// Load returns the address in the box as a virtual address. It also
+// returns if the value was marked or not.
+func (b *atomicOffAddr) Load() (uintptr, bool) {
+	v := b.a.Load()
+	wasMarked := false
+	if v < 0 {
+		wasMarked = true
+		v = -v
+	}
+	return uintptr(v) + arenaBaseOffset, wasMarked
+}
+
 // addrRanges is a data structure holding a collection of ranges of
 // address space.
 //