diff --git a/src/cmd/compile/internal/gc/inl_test.go b/src/cmd/compile/internal/gc/inl_test.go index ac86cda2b8..5a8c19e2cb 100644 --- a/src/cmd/compile/internal/gc/inl_test.go +++ b/src/cmd/compile/internal/gc/inl_test.go @@ -55,6 +55,7 @@ func TestIntendedInlining(t *testing.T) { "isDirectIface", "itabHashFunc", "noescape", + "pcvalueCacheKey", "readUnaligned32", "readUnaligned64", "releasem", diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 1dc7ab740e..edda45c669 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -698,7 +698,7 @@ func findfunc(pc uintptr) funcInfo { } type pcvalueCache struct { - entries [16]pcvalueCacheEnt + entries [2][8]pcvalueCacheEnt } type pcvalueCacheEnt struct { @@ -709,6 +709,14 @@ type pcvalueCacheEnt struct { val int32 } +// pcvalueCacheKey returns the outermost index in a pcvalueCache to use for targetpc. +// It must be very cheap to calculate. +// For now, align to sys.PtrSize and reduce mod the number of entries. +// In practice, this appears to be fairly randomly and evenly distributed. +func pcvalueCacheKey(targetpc uintptr) uintptr { + return (targetpc / sys.PtrSize) % uintptr(len(pcvalueCache{}.entries)) +} + func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, strict bool) int32 { if off == 0 { return -1 @@ -721,13 +729,14 @@ func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, stric // cheaper than doing the hashing for a less associative // cache. if cache != nil { - for i := range cache.entries { + x := pcvalueCacheKey(targetpc) + for i := range cache.entries[x] { // We check off first because we're more // likely to have multiple entries with // different offsets for the same targetpc // than the other way around, so we'll usually // fail in the first clause. - ent := &cache.entries[i] + ent := &cache.entries[x][i] if ent.off == off && ent.targetpc == targetpc { return ent.val } @@ -756,9 +765,14 @@ func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, stric // replacement prevents a performance cliff if // a recursive stack's cycle is slightly // larger than the cache. + // Put the new element at the beginning, + // since it is the most likely to be newly used. if cache != nil { - ci := fastrandn(uint32(len(cache.entries))) - cache.entries[ci] = pcvalueCacheEnt{ + x := pcvalueCacheKey(targetpc) + e := &cache.entries[x] + ci := fastrand() % uint32(len(cache.entries[x])) + e[ci] = e[0] + e[0] = pcvalueCacheEnt{ targetpc: targetpc, off: off, val: val,