cmd/compile/internal: merge stack slots for selected local auto vars

Preliminary compiler support for merging/overlapping stack
slots of local variables whose access patterns are disjoint.

This patch includes changes in AllocFrame to do the actual
merging/overlapping based on information returned from a new
liveness.MergeLocals helper. The MergeLocals helper identifies
candidates by looking for sets of AUTO variables that either A) have
the same size and GC shape (if types contain pointers), or B) have the
same size (but potentially different types as long as those types have
no pointers). Variables must be greater than (3*types.PtrSize) in size
to be considered for merging.

After forming candidates, MergeLocals collects variables into "can be
overlapped" equivalence classes or partitions; this process is driven
by an additional liveness analysis pass. Ideally it would be nice to
move the existing stackmap liveness pass up before AllocFrame
and "widen" it to include merge candidates so that we can do just a
single liveness as opposed to two passes, however this may be difficult
given that the merge-locals liveness has to take into account
writes corresponding to dead stores.

This patch also required a change to the way ssa.OpVarDef pseudo-ops
are generated; prior to this point they would only be created for
variables whose type included pointers; if stack slot merging is
enabled then the ssagen code creates OpVarDef ops for all auto vars
that are merge candidates.

Note that some temporaries created late in the compilation process
(e.g. during ssa backend) are difficult to reason about, especially in
cases where we take the address of a temp and pass it to the runtime.
For the time being we mark most of the vars created post-ssagen as
"not a merge candidate".

Stack slot merging for locals/autos is enabled by default if "-N" is
not in effect, and can be disabled via "-gcflags=-d=mergelocals=0".

Fixmes/todos/restrictions:
- try lowering size restrictions
- re-evaluate the various skips that happen in SSA-created autotmps

Fixes #62737.
Updates #65532.
Updates #65495.

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest
Change-Id: Ibc22e8a76c87e47bc9fafe4959804d9ea923623d
Reviewed-on: https://go-review.googlesource.com/c/go/+/553055
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Than McIntosh 2023-12-28 14:26:34 +00:00
parent 754f870381
commit 89f7805c2e
14 changed files with 1512 additions and 222 deletions

View File

@ -41,6 +41,10 @@ type DebugFlags struct {
LoopVarHash string `help:"for debugging changes in loop behavior. Overrides experiment and loopvar flag."` LoopVarHash string `help:"for debugging changes in loop behavior. Overrides experiment and loopvar flag."`
LocationLists int `help:"print information about DWARF location list creation"` LocationLists int `help:"print information about DWARF location list creation"`
MaxShapeLen int `help:"hash shape names longer than this threshold (default 500)" concurrent:"ok"` MaxShapeLen int `help:"hash shape names longer than this threshold (default 500)" concurrent:"ok"`
MergeLocals int `help:"merge together non-interfering local stack slots" concurrent:"ok"`
MergeLocalsDumpFunc string `help:"dump specified func in merge locals"`
MergeLocalsHash string `help:"hash value for debugging stack slot merging of local variables" concurrent:"ok"`
MergeLocalsTrace int `help:"trace debug output for locals merging"`
Nil int `help:"print information about nil checks"` Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers" concurrent:"ok"` NoOpenDefer int `help:"disable open-coded defers" concurrent:"ok"`
NoRefName int `help:"do not include referenced symbol names in object file" concurrent:"ok"` NoRefName int `help:"do not include referenced symbol names in object file" concurrent:"ok"`

View File

@ -184,6 +184,7 @@ func ParseFlags() {
Debug.SyncFrames = -1 // disable sync markers by default Debug.SyncFrames = -1 // disable sync markers by default
Debug.ZeroCopy = 1 Debug.ZeroCopy = 1
Debug.RangeFuncCheck = 1 Debug.RangeFuncCheck = 1
Debug.MergeLocals = 1
Debug.Checkptr = -1 // so we can tell whether it is set explicitly Debug.Checkptr = -1 // so we can tell whether it is set explicitly
@ -260,6 +261,9 @@ func ParseFlags() {
if Debug.PGOHash != "" { if Debug.PGOHash != "" {
PGOHash = NewHashDebug("pgohash", Debug.PGOHash, nil) PGOHash = NewHashDebug("pgohash", Debug.PGOHash, nil)
} }
if Debug.MergeLocalsHash != "" {
MergeLocalsHash = NewHashDebug("mergelocals", Debug.MergeLocalsHash, nil)
}
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) { if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH) log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)

View File

@ -53,9 +53,10 @@ func (d *HashDebug) SetInlineSuffixOnly(b bool) *HashDebug {
// The default compiler-debugging HashDebug, for "-d=gossahash=..." // The default compiler-debugging HashDebug, for "-d=gossahash=..."
var hashDebug *HashDebug var hashDebug *HashDebug
var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
var LoopVarHash *HashDebug // for debugging shared/private loop variable changes var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
var PGOHash *HashDebug // for debugging PGO optimization decisions var PGOHash *HashDebug // for debugging PGO optimization decisions
var MergeLocalsHash *HashDebug // for debugging local stack slot merging changes
// DebugHashMatchPkgFunc reports whether debug variable Gossahash // DebugHashMatchPkgFunc reports whether debug variable Gossahash
// //

View File

@ -194,6 +194,7 @@ const (
nameLibfuzzer8BitCounter // if PEXTERN should be assigned to __sancov_cntrs section nameLibfuzzer8BitCounter // if PEXTERN should be assigned to __sancov_cntrs section
nameCoverageAuxVar // instrumentation counter var or pkg ID for cmd/cover nameCoverageAuxVar // instrumentation counter var or pkg ID for cmd/cover
nameAlias // is type name an alias nameAlias // is type name an alias
nameNonMergeable // not a candidate for stack slot merging
) )
func (n *Name) Readonly() bool { return n.flags&nameReadonly != 0 } func (n *Name) Readonly() bool { return n.flags&nameReadonly != 0 }
@ -209,6 +210,7 @@ func (n *Name) InlLocal() bool { return n.flags&nameInlLocal !=
func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 } func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 }
func (n *Name) Libfuzzer8BitCounter() bool { return n.flags&nameLibfuzzer8BitCounter != 0 } func (n *Name) Libfuzzer8BitCounter() bool { return n.flags&nameLibfuzzer8BitCounter != 0 }
func (n *Name) CoverageAuxVar() bool { return n.flags&nameCoverageAuxVar != 0 } func (n *Name) CoverageAuxVar() bool { return n.flags&nameCoverageAuxVar != 0 }
func (n *Name) NonMergeable() bool { return n.flags&nameNonMergeable != 0 }
func (n *Name) setReadonly(b bool) { n.flags.set(nameReadonly, b) } func (n *Name) setReadonly(b bool) { n.flags.set(nameReadonly, b) }
func (n *Name) SetNeedzero(b bool) { n.flags.set(nameNeedzero, b) } func (n *Name) SetNeedzero(b bool) { n.flags.set(nameNeedzero, b) }
@ -223,6 +225,7 @@ func (n *Name) SetInlLocal(b bool) { n.flags.set(nameInlLocal, b
func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) } func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) }
func (n *Name) SetLibfuzzer8BitCounter(b bool) { n.flags.set(nameLibfuzzer8BitCounter, b) } func (n *Name) SetLibfuzzer8BitCounter(b bool) { n.flags.set(nameLibfuzzer8BitCounter, b) }
func (n *Name) SetCoverageAuxVar(b bool) { n.flags.set(nameCoverageAuxVar, b) } func (n *Name) SetCoverageAuxVar(b bool) { n.flags.set(nameCoverageAuxVar, b) }
func (n *Name) SetNonMergeable(b bool) { n.flags.set(nameNonMergeable, b) }
// OnStack reports whether variable n may reside on the stack. // OnStack reports whether variable n may reside on the stack.
func (n *Name) OnStack() bool { func (n *Name) OnStack() bool {

View File

@ -0,0 +1,691 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package liveness
import (
"cmd/compile/internal/base"
"cmd/compile/internal/bitvec"
"cmd/compile/internal/ir"
"cmd/compile/internal/reflectdata"
"cmd/compile/internal/ssa"
"cmd/internal/obj"
"cmd/internal/src"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
)
// MergeLocalsState encapsulates information about which AUTO
// (stack-allocated) variables within a function can be safely
// merged/overlapped, e.g. share a stack slot with some other auto).
// An instance of MergeLocalsState is produced by MergeLocals() below
// and then consumed in ssagen.AllocFrame. The map 'partition' contains
// entries of the form <N,SL> where N is an *ir.Name and SL is a slice
// holding the indices (within 'vars') of other variables that share the
// same slot. For example, if a function contains five variables where
// v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
// MergeLocalsState content might look like
//
// vars: [v1, v2, v3, v4, v5]
// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
// v4 -> [3, 4], v5 -> [3, 4]
//
// A nil MergeLocalsState indicates that no local variables meet the
// necessary criteria for overlap.
type MergeLocalsState struct {
// contains auto vars that participate in overlapping
vars []*ir.Name
// maps auto variable to overlap partition
partition map[*ir.Name][]int
}
// candRegion is a sub-range (start, end) corresponding to an interval
// [st,en] within the list of candidate variables.
type candRegion struct {
st, en int
}
// MergeLocals analyzes the specified ssa function f to determine which
// of its auto variables can safely share the same stack slot, returning
// a state object that describes how the overlap should be done.
func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
cands, idx, regions := collectMergeCandidates(fn)
if len(regions) == 0 {
return nil
}
lv := newliveness(fn, f, cands, idx, 0)
// If we have a local variable such as "r2" below that's written
// but then not read, something like:
//
// vardef r1
// r1.x = ...
// vardef r2
// r2.x = 0
// r2.y = ...
// <call foo>
// // no subsequent use of r2
// ... = r1.x
//
// then for the purpose of calculating stack maps at the call, we
// can ignore "r2" completely during liveness analysis for stack
// maps, however for stack slock merging we most definitely want
// to treat the writes as "uses".
lv.conservativeWrites = true
lv.prologue()
lv.solve()
cs := &cstate{
fn: fn,
ibuilders: make([]IntervalsBuilder, len(cands)),
}
computeIntervals(lv, cs)
rv := performMerging(lv, cs, regions)
if err := rv.check(); err != nil {
base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
}
return rv
}
// Subsumed returns whether variable n is subsumed, e.g. appears
// in an overlap position but is not the leader in that partition.
func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
return true
}
return false
}
// IsLeader returns whether a variable n is the leader (first element)
// in a sharing partition.
func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
return true
}
return false
}
// Leader returns the leader variable for subsumed var n.
func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
if sl, ok := mls.partition[n]; ok {
if mls.vars[sl[0]] == n {
panic("variable is not subsumed")
}
return mls.vars[sl[0]]
}
panic("not a merge candidate")
}
// Followers writes a list of the followers for leader n into the slice tmp.
func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
tmp = tmp[:0]
sl, ok := mls.partition[n]
if !ok {
panic("no entry for leader")
}
if mls.vars[sl[0]] != n {
panic("followers invoked on subsumed var")
}
for _, k := range sl[1:] {
tmp = append(tmp, mls.vars[k])
}
sort.SliceStable(tmp, func(i, j int) bool {
return tmp[i].Sym().Name < tmp[j].Sym().Name
})
return tmp
}
// EstSavings returns the estimated reduction in stack size for
// the given merge locals state.
func (mls *MergeLocalsState) EstSavings() int {
tot := 0
for n := range mls.partition {
if mls.Subsumed(n) {
tot += int(n.Type().Size())
}
}
return tot
}
// check tests for various inconsistencies and problems in mls,
// returning an error if any problems are found.
func (mls *MergeLocalsState) check() error {
if mls == nil {
return nil
}
used := make(map[int]bool)
seenv := make(map[*ir.Name]int)
for ii, v := range mls.vars {
if prev, ok := seenv[v]; ok {
return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
v.Sym().Name, ii, prev)
}
seenv[v] = ii
}
for k, sl := range mls.partition {
// length of slice value needs to be more than 1
if len(sl) < 2 {
return fmt.Errorf("k=%q v=%+v slice len %d invalid",
k.Sym().Name, sl, len(sl))
}
// values in the slice need to be var indices
for i, v := range sl {
if v < 0 || v > len(mls.vars)-1 {
return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
}
}
}
for k, sl := range mls.partition {
foundk := false
for i, v := range sl {
vv := mls.vars[v]
if i == 0 {
if !mls.IsLeader(vv) {
return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
}
} else {
if !mls.Subsumed(vv) {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
}
if mls.Leader(vv) != mls.vars[sl[0]] {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
}
}
if vv == k {
foundk = true
if used[v] {
return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
}
used[v] = true
}
}
if !foundk {
return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
}
}
for i := range used {
if !used[i] {
return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
}
}
return nil
}
func (mls *MergeLocalsState) String() string {
var leaders []*ir.Name
for n, sl := range mls.partition {
if n == mls.vars[sl[0]] {
leaders = append(leaders, n)
}
}
sort.Slice(leaders, func(i, j int) bool {
return leaders[i].Sym().Name < leaders[j].Sym().Name
})
var sb strings.Builder
for _, n := range leaders {
sb.WriteString(n.Sym().Name + ":")
sl := mls.partition[n]
for _, k := range sl[1:] {
n := mls.vars[k]
sb.WriteString(" " + n.Sym().Name)
}
sb.WriteString("\n")
}
return sb.String()
}
// collectMergeCandidates visits all of the AUTO vars declared in
// function fn and returns a list of candidate variables for merging /
// overlapping. Return values are: 1) a slice of ir.Name's
// corresponding to the candidates, 2) a map that maps ir.Name to slot
// in the slice, and 3) a slice containing regions (start/end pairs)
// corresponding to variables that could be overlapped provided that
// their lifetimes are disjoint.
func collectMergeCandidates(fn *ir.Func) ([]*ir.Name, map[*ir.Name]int32, []candRegion) {
m := make(map[*ir.Name]int32)
var cands []*ir.Name
var regions []candRegion
// Collect up the available set of appropriate AUTOs in the
// function as a first step.
for _, n := range fn.Dcl {
if !n.Used() {
continue
}
if !ssa.IsMergeCandidate(n) {
continue
}
cands = append(cands, n)
}
if len(cands) < 2 {
return nil, nil, nil
}
// Sort by pointerness, size, and then name.
sort.SliceStable(cands, func(i, j int) bool {
ci, cj := cands[i], cands[j]
ihp, jhp := 0, 0
var ilsym, jlsym *obj.LSym
if ci.Type().HasPointers() {
ihp = 1
ilsym, _, _ = reflectdata.GCSym(ci.Type())
}
if cj.Type().HasPointers() {
jhp = 1
jlsym, _, _ = reflectdata.GCSym(cj.Type())
}
if ihp != jhp {
return ihp < jhp
}
if ci.Type().Size() != cj.Type().Size() {
return ci.Type().Size() < cj.Type().Size()
}
if ihp != 0 && jhp != 0 && ilsym != jlsym {
// FIXME: find less clunky way to do this
return fmt.Sprintf("%v", ilsym) < fmt.Sprintf("%v", jlsym)
}
if ci.Sym().Name != cj.Sym().Name {
return ci.Sym().Name < cj.Sym().Name
}
return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", ci.Pos())
})
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", fn)
for i := range cands {
dumpCand(cands[i], i)
}
}
// Now generate a pruned candidate list-- we only want to return a
// non-empty list if there is some possibility of overlapping two
// vars.
var pruned []*ir.Name
st := 0
for {
en := nextRegion(cands, st)
if en == -1 {
break
}
if st == en {
// region has just one element, we can skip it
st++
continue
}
pst := len(pruned)
pen := pst + (en - st)
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= add part %d -> %d\n", pst, pen)
}
// non-empty region, add to pruned
pruned = append(pruned, cands[st:en+1]...)
regions = append(regions, candRegion{st: pst, en: pen})
st = en + 1
}
if len(pruned) < 2 {
return nil, nil, nil
}
for i, n := range pruned {
m[n] = int32(i)
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= pruned candidate list for func %v:\n", fn)
for i := range pruned {
dumpCand(pruned[i], i)
}
}
return pruned, m, regions
}
// nextRegion starts at location idx and walks forward in the cands
// slice looking for variables that are "compatible" (overlappable)
// with the variable at position idx; it returns the end of the new
// region (range of compatible variables starting at idx).
func nextRegion(cands []*ir.Name, idx int) int {
n := len(cands)
if idx >= n {
return -1
}
c0 := cands[idx]
hp0 := c0.Type().HasPointers()
for j := idx + 1; j < n; j++ {
cj := cands[j]
hpj := cj.Type().HasPointers()
ok := true
if hp0 {
if !hpj || c0.Type().Size() != cj.Type().Size() {
return j - 1
}
// GC shape must match if both types have pointers.
gcsym0, _, _ := reflectdata.GCSym(c0.Type())
gcsymj, _, _ := reflectdata.GCSym(cj.Type())
if gcsym0 != gcsymj {
return j - 1
}
} else {
// If no pointers, match size only.
if !ok || hp0 != hpj || c0.Type().Size() != cj.Type().Size() {
return j - 1
}
}
}
return n - 1
}
type cstate struct {
fn *ir.Func
ibuilders []IntervalsBuilder
}
// mergeVisitRegion tries to perform overlapping of variables with a
// given subrange of cands described by st and en (indices into our
// candidate var list), where the variables within this range have
// already been determined to be compatible with respect to type,
// size, etc. Overlapping is done in a a greedy fashion: we select the
// first element in the st->en range, then walk the rest of the
// elements adding in vars whose lifetimes don't overlap with the
// first element, then repeat the process until we run out of work to do.
func (mls *MergeLocalsState) mergeVisitRegion(lv *liveness, ivs []Intervals, st, en int) {
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
}
n := en - st + 1
used := bitvec.New(int32(n))
nxt := func(slot int) int {
for c := slot - st; c < n; c++ {
if used.Get(int32(c)) {
continue
}
return c + st
}
return -1
}
navail := n
cands := lv.vars
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail)
}
for navail >= 2 {
leader := nxt(st)
used.Set(int32(leader - st))
navail--
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader,
used.String())
}
elems := []int{leader}
lints := ivs[leader]
for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
// Skip if de-selected by merge locals hash.
if base.Debug.MergeLocalsHash != "" {
if !base.MergeLocalsHash.MatchPosWithInfo(cands[succ].Pos(), "mergelocals", nil) {
continue
}
}
// Skip if already used.
if used.Get(int32(succ - st)) {
continue
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
}
// Can we overlap leader with this var?
if lints.Overlaps(ivs[succ]) {
continue
} else {
// Add to overlap set.
elems = append(elems, succ)
lints = lints.Merge(ivs[succ])
}
}
if len(elems) > 1 {
// We found some things to overlap with leader. Add the
// candidate elements to "vars" and update "partition".
off := len(mls.vars)
sl := make([]int, len(elems))
for i, candslot := range elems {
sl[i] = off + i
mls.vars = append(mls.vars, cands[candslot])
mls.partition[cands[candslot]] = sl
}
navail -= (len(elems) - 1)
for i := range elems {
used.Set(int32(elems[i] - st))
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
for i := range sl {
dumpCand(mls.vars[sl[i]], sl[i])
}
for i, v := range elems {
fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
}
}
}
}
}
// performMerging carries out variable merging within each of the
// candidate ranges in regions, returning a state object
// that describes the variable overlaps.
func performMerging(lv *liveness, cs *cstate, regions []candRegion) *MergeLocalsState {
cands := lv.vars
mls := &MergeLocalsState{
partition: make(map[*ir.Name][]int),
}
// Finish intervals construction.
ivs := make([]Intervals, len(cands))
for i := range cands {
var err error
ivs[i], err = cs.ibuilders[i].Finish()
if err != nil {
ninstr := 0
if base.Debug.MergeLocalsTrace != 0 {
iidx := 0
for k := 0; k < len(lv.f.Blocks); k++ {
b := lv.f.Blocks[k]
fmt.Fprintf(os.Stderr, "\n")
for _, v := range b.Values {
fmt.Fprintf(os.Stderr, " b%d %d: %s\n", k, iidx, v.LongString())
iidx++
ninstr++
}
}
}
base.FatalfAt(cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
return nil
}
}
// Dump state before attempting overlap.
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
for i := range cands {
c := cands[i]
fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
i, c.Sym().Name, c.Type().Size(), ivs[i].String())
}
fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(regions))
for _, cr := range regions {
fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
}
fmt.Fprintf(os.Stderr, "\n")
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= len(regions) = %d\n", len(regions))
}
// Apply a greedy merge/overlap strategy within each region
// of compatible variables.
for _, cr := range regions {
mls.mergeVisitRegion(lv, ivs, cr.st, cr.en)
}
if len(mls.vars) == 0 {
return nil
}
return mls
}
// computeIntervals performs a backwards sweep over the instructions
// of the function we're compiling, building up an Intervals object
// for each candidate variable by looking for upwards exposed uses
// and kills.
func computeIntervals(lv *liveness, cs *cstate) {
nvars := int32(len(lv.vars))
liveout := bitvec.New(nvars)
if base.Debug.MergeLocalsDumpFunc != "" &&
strings.HasSuffix(fmt.Sprintf("%v", cs.fn), base.Debug.MergeLocalsDumpFunc) {
fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
ii := 0
for k, b := range lv.f.Blocks {
fmt.Fprintf(os.Stderr, "b%d:\n", k)
for _, v := range b.Values {
pos := base.Ctxt.PosTable.Pos(v.Pos)
fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
ii++
}
}
}
// Count instructions.
ninstr := 0
for _, b := range lv.f.Blocks {
ninstr += len(b.Values)
}
// current instruction index during backwards walk
iidx := ninstr - 1
// Make a backwards pass over all blocks
for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
b := lv.f.Blocks[k]
be := lv.blockEffects(b)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
for j := range lv.vars {
if be.liveout.Get(int32(j)) {
fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
}
}
fmt.Fprintf(os.Stderr, "\n")
}
// Take into account effects taking place at end of this basic
// block by comparing our current live set with liveout for
// the block. If a given var was not live before and is now
// becoming live we need to mark this transition with a
// builder "Live" call; similarly if a var was live before and
// is now no longer live, we need a "Kill" call.
for j := range lv.vars {
isLive := liveout.Get(int32(j))
blockLiveOut := be.liveout.Get(int32(j))
if isLive {
if !blockLiveOut {
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
}
cs.ibuilders[j].Kill(iidx)
}
} else if blockLiveOut {
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
iidx, lv.vars[j])
}
cs.ibuilders[j].Live(iidx)
}
}
// Set our working "currently live" set to the previously
// computed live out set for the block.
liveout.Copy(be.liveout)
// Now walk backwards through this block.
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
}
// Update liveness based on what we see happening in this
// instruction.
pos, e := lv.valueEffects(v)
becomeslive := e&uevar != 0
iskilled := e&varkill != 0
if becomeslive && iskilled {
// we do not ever expect to see both a kill and an
// upwards exposed use given our size constraints.
panic("should never happen")
}
if iskilled && liveout.Get(pos) {
cs.ibuilders[pos].Kill(iidx)
liveout.Unset(pos)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
iidx, lv.vars[pos])
}
} else if becomeslive && !liveout.Get(pos) {
cs.ibuilders[pos].Live(iidx)
liveout.Set(pos)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
iidx, lv.vars[pos])
}
}
iidx--
}
if b == lv.f.Entry {
for j, v := range lv.vars {
if liveout.Get(int32(j)) {
lv.f.Fatalf("%v %L recorded as live on entry",
lv.fn.Nname, v)
}
}
}
}
if iidx != -1 {
panic("iidx underflow")
}
}
func dumpCand(c *ir.Name, i int) {
fmtFullPos := func(p src.XPos) string {
var sb strings.Builder
sep := ""
base.Ctxt.AllPos(p, func(pos src.Pos) {
fmt.Fprintf(&sb, sep)
sep = "|"
file := filepath.Base(pos.Filename())
fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
})
return sb.String()
}
fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v t=%v\n",
i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
c.Type().HasPointers(), c.Type())
}
// for unit testing only.
func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
mls := &MergeLocalsState{partition: partition, vars: vars}
if err := mls.check(); err != nil {
return nil, err
}
return mls, nil
}

View File

@ -143,6 +143,11 @@ type liveness struct {
doClobber bool // Whether to clobber dead stack slots in this function. doClobber bool // Whether to clobber dead stack slots in this function.
noClobberArgs bool // Do not clobber function arguments noClobberArgs bool // Do not clobber function arguments
// treat "dead" writes as equivalent to reads during the analysis;
// used only during liveness analysis for stack slot merging (doesn't
// make sense for stackmap analysis).
conservativeWrites bool
} }
// Map maps from *ssa.Value to StackMapIndex. // Map maps from *ssa.Value to StackMapIndex.
@ -312,8 +317,12 @@ func (lv *liveness) valueEffects(v *ssa.Value) (int32, liveEffect) {
if e&(ssa.SymRead|ssa.SymAddr) != 0 { if e&(ssa.SymRead|ssa.SymAddr) != 0 {
effect |= uevar effect |= uevar
} }
if e&ssa.SymWrite != 0 && (!isfat(n.Type()) || v.Op == ssa.OpVarDef) { if e&ssa.SymWrite != 0 {
effect |= varkill if !isfat(n.Type()) || v.Op == ssa.OpVarDef {
effect |= varkill
} else if lv.conservativeWrites {
effect |= uevar
}
} }
if effect == 0 { if effect == 0 {
@ -450,6 +459,11 @@ func (lv *liveness) blockEffects(b *ssa.Block) *blockEffects {
// this argument and the in arguments are always assumed live. The vars // this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes. // argument is a slice of *Nodes.
func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, locals bitvec.BitVec) { func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, locals bitvec.BitVec) {
var slotsSeen map[int64]*ir.Name
checkForDuplicateSlots := base.Debug.MergeLocals != 0
if checkForDuplicateSlots {
slotsSeen = make(map[int64]*ir.Name)
}
for i := int32(0); ; i++ { for i := int32(0); ; i++ {
i = liveout.Next(i) i = liveout.Next(i)
if i < 0 { if i < 0 {
@ -468,6 +482,12 @@ func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, loc
fallthrough // PPARAMOUT in registers acts memory-allocates like an AUTO fallthrough // PPARAMOUT in registers acts memory-allocates like an AUTO
case ir.PAUTO: case ir.PAUTO:
typebits.Set(node.Type(), node.FrameOffset()+lv.stkptrsize, locals) typebits.Set(node.Type(), node.FrameOffset()+lv.stkptrsize, locals)
if checkForDuplicateSlots {
if prev, ok := slotsSeen[node.FrameOffset()]; ok {
base.FatalfAt(node.Pos(), "two vars live at pointerMap generation: %q and %q", prev.Sym().Name, node.Sym().Name)
}
slotsSeen[node.FrameOffset()] = node
}
} }
} }
} }

View File

@ -314,8 +314,9 @@ func checkFunc(f *Func) {
f.Fatalf("bad arg 1 type to %s: want integer, have %s", v.Op, v.Args[1].LongString()) f.Fatalf("bad arg 1 type to %s: want integer, have %s", v.Op, v.Args[1].LongString())
} }
case OpVarDef: case OpVarDef:
if !v.Aux.(*ir.Name).Type().HasPointers() { n := v.Aux.(*ir.Name)
f.Fatalf("vardef must have pointer type %s", v.Aux.(*ir.Name).Type().String()) if !n.Type().HasPointers() && !IsMergeCandidate(n) {
f.Fatalf("vardef must be merge candidate or have pointer type %s", v.Aux.(*ir.Name).Type().String())
} }
case OpNilCheck: case OpNilCheck:
// nil checks have pointer type before scheduling, and // nil checks have pointer type before scheduling, and

View File

@ -838,5 +838,25 @@ func (f *Func) useFMA(v *Value) bool {
// NewLocal returns a new anonymous local variable of the given type. // NewLocal returns a new anonymous local variable of the given type.
func (f *Func) NewLocal(pos src.XPos, typ *types.Type) *ir.Name { func (f *Func) NewLocal(pos src.XPos, typ *types.Type) *ir.Name {
return typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list nn := typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
nn.SetNonMergeable(true)
return nn
}
// IsMergeCandidate returns true if variable n could participate in
// stack slot merging. For now we're restricting the set to things to
// items larger than what CanSSA would allow (approximateky, we disallow things
// marked as open defer slots so as to avoid complicating liveness
// analysis.
func IsMergeCandidate(n *ir.Name) bool {
if base.Debug.MergeLocals == 0 ||
base.Flag.N != 0 ||
n.Class != ir.PAUTO ||
n.Type().Size() <= int64(3*types.PtrSize) ||
n.Addrtaken() ||
n.NonMergeable() ||
n.OpenDeferSlot() {
return false
}
return true
} }

View File

@ -13,6 +13,7 @@ import (
"cmd/compile/internal/base" "cmd/compile/internal/base"
"cmd/compile/internal/ir" "cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/objw" "cmd/compile/internal/objw"
"cmd/compile/internal/ssa" "cmd/compile/internal/ssa"
"cmd/compile/internal/types" "cmd/compile/internal/types"
@ -151,6 +152,18 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
} }
} }
var mls *liveness.MergeLocalsState
if base.Debug.MergeLocals != 0 {
mls = liveness.MergeLocals(fn, f)
if base.Debug.MergeLocalsTrace == 1 && mls != nil {
fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging\n", ir.FuncName(fn), mls.EstSavings())
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
fn, mls)
}
}
}
// Use sort.SliceStable instead of sort.Slice so stack layout (and thus // Use sort.SliceStable instead of sort.Slice so stack layout (and thus
// compiler output) is less sensitive to frontend changes that // compiler output) is less sensitive to frontend changes that
// introduce or remove unused variables. // introduce or remove unused variables.
@ -158,6 +171,22 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j]) return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j])
}) })
if base.Debug.MergeLocalsTrace > 1 && mls != nil {
fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
for i, v := range fn.Dcl {
if !ssa.IsMergeCandidate(v) {
continue
}
fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used())
}
}
var leaders map[*ir.Name]int64
if mls != nil {
leaders = make(map[*ir.Name]int64)
}
// Reassign stack offsets of the locals that are used. // Reassign stack offsets of the locals that are used.
lastHasPtr := false lastHasPtr := false
for i, n := range fn.Dcl { for i, n := range fn.Dcl {
@ -165,12 +194,14 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations) // i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
continue continue
} }
if mls != nil && mls.Subsumed(n) {
continue
}
if !n.Used() { if !n.Used() {
fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:] fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
fn.Dcl = fn.Dcl[:i] fn.Dcl = fn.Dcl[:i]
break break
} }
types.CalcSize(n.Type()) types.CalcSize(n.Type())
w := n.Type().Size() w := n.Type().Size()
if w >= types.MaxWidth || w < 0 { if w >= types.MaxWidth || w < 0 {
@ -195,6 +226,42 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
lastHasPtr = false lastHasPtr = false
} }
n.SetFrameOffset(-s.stksize) n.SetFrameOffset(-s.stksize)
if mls != nil && mls.IsLeader(n) {
leaders[n] = -s.stksize
}
}
if mls != nil {
followers := []*ir.Name{}
newdcl := make([]*ir.Name, 0, len(fn.Dcl))
for i := 0; i < len(fn.Dcl); i++ {
n := fn.Dcl[i]
if mls.Subsumed(n) {
continue
}
newdcl = append(newdcl, n)
if off, ok := leaders[n]; ok {
followers = mls.Followers(n, followers)
for _, f := range followers {
// Set the stack offset for each follower to be
// the same as the leader.
f.SetFrameOffset(off)
}
// position followers immediately after leader
newdcl = append(newdcl, followers...)
}
}
fn.Dcl = newdcl
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
for i, v := range fn.Dcl {
if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
continue
}
fmt.Fprintf(os.Stderr, " %d: %q frameoff %d used=%v\n", i, v.Sym().Name, v.FrameOffset(), v.Used())
}
} }
s.stksize = types.RoundUp(s.stksize, s.stkalign) s.stksize = types.RoundUp(s.stksize, s.stkalign)

View File

@ -633,7 +633,7 @@ func (s *state) zeroResults() {
if typ := n.Type(); ssa.CanSSA(typ) { if typ := n.Type(); ssa.CanSSA(typ) {
s.assign(n, s.zeroVal(typ), false, 0) s.assign(n, s.zeroVal(typ), false, 0)
} else { } else {
if typ.HasPointers() { if typ.HasPointers() || ssa.IsMergeCandidate(n) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, n, s.mem()) s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, n, s.mem())
} }
s.zero(n.Type(), s.decladdrs[n]) s.zero(n.Type(), s.decladdrs[n])
@ -3942,7 +3942,7 @@ func (s *state) assignWhichMayOverlap(left ir.Node, right *ssa.Value, deref bool
// If this assignment clobbers an entire local variable, then emit // If this assignment clobbers an entire local variable, then emit
// OpVarDef so liveness analysis knows the variable is redefined. // OpVarDef so liveness analysis knows the variable is redefined.
if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && t.HasPointers() { if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && (t.HasPointers() || ssa.IsMergeCandidate(base)) {
s.vars[memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, base, s.mem(), !ir.IsAutoTmp(base)) s.vars[memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, base, s.mem(), !ir.IsAutoTmp(base))
} }
@ -5382,7 +5382,8 @@ func (s *state) call(n *ir.CallExpr, k callKind, returnResultAddr bool, deferExt
} }
// Make a defer struct on the stack. // Make a defer struct on the stack.
t := deferstruct() t := deferstruct()
_, addr := s.temp(n.Pos(), t) n, addr := s.temp(n.Pos(), t)
n.SetNonMergeable(true)
s.store(closure.Type, s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(deferStructFnField), addr), s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(deferStructFnField), addr),
closure) closure)
@ -6886,7 +6887,7 @@ func (s *state) dottype1(pos src.XPos, src, dst *types.Type, iface, source, targ
// temp allocates a temp of type t at position pos // temp allocates a temp of type t at position pos
func (s *state) temp(pos src.XPos, t *types.Type) (*ir.Name, *ssa.Value) { func (s *state) temp(pos src.XPos, t *types.Type) (*ir.Name, *ssa.Value) {
tmp := typecheck.TempAt(pos, s.curfn, t) tmp := typecheck.TempAt(pos, s.curfn, t)
if t.HasPointers() { if t.HasPointers() || (ssa.IsMergeCandidate(tmp) && t != deferstruct()) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem()) s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem())
} }
addr := s.addr(tmp) addr := s.addr(tmp)

View File

@ -0,0 +1,184 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package test
import (
"cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/src"
"internal/testenv"
"path/filepath"
"slices"
"sort"
"strings"
"testing"
)
func TestMergeLocalState(t *testing.T) {
mkiv := func(name string) *ir.Name {
i32 := types.Types[types.TINT32]
s := typecheck.Lookup(name)
v := ir.NewNameAt(src.NoXPos, s, i32)
return v
}
v1 := mkiv("v1")
v2 := mkiv("v2")
v3 := mkiv("v3")
testcases := []struct {
vars []*ir.Name
partition map[*ir.Name][]int
experr bool
}{
{
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: false,
},
{
// invalid mls.v slot -1
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{-1, 0},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// duplicate var in v
vars: []*ir.Name{v1, v2, v2},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// single element in partition
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// missing element 2
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1},
v2: []int{0, 1},
v3: []int{0, 1},
},
experr: true,
},
{
// partitions disagree for v1 vs v2
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{1, 0, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
}
for k, testcase := range testcases {
mls, err := liveness.MakeMergeLocalsState(testcase.partition, testcase.vars)
t.Logf("tc %d err is %v\n", k, err)
if testcase.experr && err == nil {
t.Fatalf("tc:%d missing error mls %v", k, mls)
} else if !testcase.experr && err != nil {
t.Fatalf("tc:%d unexpected error mls %v", k, err)
}
if mls != nil {
t.Logf("tc %d: mls: %v\n", k, mls.String())
}
}
}
func TestMergeLocalsIntegration(t *testing.T) {
testenv.MustHaveGoBuild(t)
// This test does a build of a specific canned package to
// check whether merging of stack slots is taking place.
// The idea is to do the compile with a trace option turned
// on and then pick up on the frame offsets of specific
// variables.
//
// Stack slot merging is a greedy algorithm, and there can
// be many possible ways to overlap a given set of candidate
// variables, all of them legal. Rather than locking down
// a specific set of overlappings or frame offsets, this
// tests just verifies that there is one clump of 3 vars that
// get overlapped, then another clump of 2 that share the same
// frame offset.
//
// The expected output blob we're interested in looks like this:
//
// =-= stack layout for ABC:
// 2: "p1" frameoff -8200 used=true
// 3: "xp3" frameoff -8200 used=true
// 4: "xp4" frameoff -8200 used=true
// 5: "p2" frameoff -16400 used=true
// 6: "s" frameoff -24592 used=true
// 7: "v1" frameoff -32792 used=true
// 8: "v3" frameoff -32792 used=true
// 9: "v2" frameoff -40992 used=true
//
tmpdir := t.TempDir()
src := filepath.Join("testdata", "mergelocals", "integration.go")
obj := filepath.Join(tmpdir, "p.a")
out, err := testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-p=p", "-c", "1", "-o", obj, "-d=mergelocalstrace=2,mergelocals=1", src).CombinedOutput()
if err != nil {
t.Fatalf("failed to compile: %v\n%s", err, out)
}
vars := make(map[string]string)
lines := strings.Split(string(out), "\n")
prolog := true
varsAtFrameOffset := make(map[string]int)
for _, line := range lines {
if line == "=-= stack layout for ABC:" {
prolog = false
continue
} else if prolog || line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) != 5 {
t.Fatalf("bad trace output line: %s", line)
}
vname := fields[1]
frameoff := fields[3]
varsAtFrameOffset[frameoff] = varsAtFrameOffset[frameoff] + 1
vars[vname] = frameoff
}
wantvnum := 8
gotvnum := len(vars)
if wantvnum != gotvnum {
t.Fatalf("expected trace output on %d vars got %d\n", wantvnum, gotvnum)
}
// We expect one clump of 3, another clump of 2, and the rest singletons.
expected := []int{1, 1, 1, 2, 3}
got := []int{}
for _, v := range varsAtFrameOffset {
got = append(got, v)
}
sort.Ints(got)
if !slices.Equal(got, expected) {
t.Fatalf("expected variable clumps %+v not equal to what we got: %+v", expected, got)
}
}

View File

@ -0,0 +1,83 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
// This type and the following one will share the same GC shape and size.
type Pointery struct {
p *Pointery
x [1024]int
}
type Pointery2 struct {
p *Pointery2
x [1024]int
}
// This type and the following one will have the same size.
type Vanilla struct {
np uintptr
x [1024]int
}
type Vanilla2 struct {
np uintptr
x [1023]int
y int
}
type Single struct {
np uintptr
x [1023]int
}
func ABC(i, j int) int {
r := 0
// here v1 interferes with v2 but could be overlapped with v3.
// we can also overlap v1 with v3.
var v1 Vanilla
if i < 101 {
var v2 Vanilla
v1.x[i] = j
r += v1.x[j]
v2.x[i] = j
r += v2.x[j]
}
{
var v3 Vanilla2
v3.x[i] = j
r += v3.x[j]
}
var s Single
s.x[i] = j
r += s.x[j]
// Here p1 and p2 interfere, but p1 could be overlapped with xp3.
var p1, p2 Pointery
p1.x[i] = j
r += p1.x[j]
p2.x[i] = j
r += p2.x[j]
{
var xp3 Pointery2
xp3.x[i] = j
r += xp3.x[j]
}
if i == j*2 {
// p2 live on this path
p2.x[i] += j
r += p2.x[j]
} else {
// p2 not live on this path
var xp4 Pointery2
xp4.x[i] = j
r += xp4.x[j]
}
return r
}

View File

@ -25,7 +25,9 @@ func initStackTemp(init *ir.Nodes, tmp *ir.Name, val ir.Node) *ir.AddrExpr {
// allocated temporary variable of the given type. Statements to // allocated temporary variable of the given type. Statements to
// zero-initialize tmp are appended to init. // zero-initialize tmp are appended to init.
func stackTempAddr(init *ir.Nodes, typ *types.Type) *ir.AddrExpr { func stackTempAddr(init *ir.Nodes, typ *types.Type) *ir.AddrExpr {
return initStackTemp(init, typecheck.TempAt(base.Pos, ir.CurFunc, typ), nil) n := typecheck.TempAt(base.Pos, ir.CurFunc, typ)
n.SetNonMergeable(true)
return initStackTemp(init, n, nil)
} }
// stackBufAddr returns the expression &tmp, where tmp is a newly // stackBufAddr returns the expression &tmp, where tmp is a newly

View File

@ -11,214 +11,423 @@
package main package main
var z [10<<20]byte var z [10 << 20]byte
func main() { // GC_ERROR "stack frame too large" func main() { // GC_ERROR "stack frame too large"
// seq 1 206 | sed 's/.*/ var x& [10<<20]byte; z = x&/' // seq 1 206 | sed 's/.*/ var x& [10<<20]byte/'
var x1 [10<<20]byte; z = x1 // seq 1 206 | sed 's/.*/ z = x&/'
var x2 [10<<20]byte; z = x2 var x1 [10<<20]byte
var x3 [10<<20]byte; z = x3 var x2 [10<<20]byte
var x4 [10<<20]byte; z = x4 var x3 [10<<20]byte
var x5 [10<<20]byte; z = x5 var x4 [10<<20]byte
var x6 [10<<20]byte; z = x6 var x5 [10<<20]byte
var x7 [10<<20]byte; z = x7 var x6 [10<<20]byte
var x8 [10<<20]byte; z = x8 var x7 [10<<20]byte
var x9 [10<<20]byte; z = x9 var x8 [10<<20]byte
var x10 [10<<20]byte; z = x10 var x9 [10<<20]byte
var x11 [10<<20]byte; z = x11 var x10 [10<<20]byte
var x12 [10<<20]byte; z = x12 var x11 [10<<20]byte
var x13 [10<<20]byte; z = x13 var x12 [10<<20]byte
var x14 [10<<20]byte; z = x14 var x13 [10<<20]byte
var x15 [10<<20]byte; z = x15 var x14 [10<<20]byte
var x16 [10<<20]byte; z = x16 var x15 [10<<20]byte
var x17 [10<<20]byte; z = x17 var x16 [10<<20]byte
var x18 [10<<20]byte; z = x18 var x17 [10<<20]byte
var x19 [10<<20]byte; z = x19 var x18 [10<<20]byte
var x20 [10<<20]byte; z = x20 var x19 [10<<20]byte
var x21 [10<<20]byte; z = x21 var x20 [10<<20]byte
var x22 [10<<20]byte; z = x22 var x21 [10<<20]byte
var x23 [10<<20]byte; z = x23 var x22 [10<<20]byte
var x24 [10<<20]byte; z = x24 var x23 [10<<20]byte
var x25 [10<<20]byte; z = x25 var x24 [10<<20]byte
var x26 [10<<20]byte; z = x26 var x25 [10<<20]byte
var x27 [10<<20]byte; z = x27 var x26 [10<<20]byte
var x28 [10<<20]byte; z = x28 var x27 [10<<20]byte
var x29 [10<<20]byte; z = x29 var x28 [10<<20]byte
var x30 [10<<20]byte; z = x30 var x29 [10<<20]byte
var x31 [10<<20]byte; z = x31 var x30 [10<<20]byte
var x32 [10<<20]byte; z = x32 var x31 [10<<20]byte
var x33 [10<<20]byte; z = x33 var x32 [10<<20]byte
var x34 [10<<20]byte; z = x34 var x33 [10<<20]byte
var x35 [10<<20]byte; z = x35 var x34 [10<<20]byte
var x36 [10<<20]byte; z = x36 var x35 [10<<20]byte
var x37 [10<<20]byte; z = x37 var x36 [10<<20]byte
var x38 [10<<20]byte; z = x38 var x37 [10<<20]byte
var x39 [10<<20]byte; z = x39 var x38 [10<<20]byte
var x40 [10<<20]byte; z = x40 var x39 [10<<20]byte
var x41 [10<<20]byte; z = x41 var x40 [10<<20]byte
var x42 [10<<20]byte; z = x42 var x41 [10<<20]byte
var x43 [10<<20]byte; z = x43 var x42 [10<<20]byte
var x44 [10<<20]byte; z = x44 var x43 [10<<20]byte
var x45 [10<<20]byte; z = x45 var x44 [10<<20]byte
var x46 [10<<20]byte; z = x46 var x45 [10<<20]byte
var x47 [10<<20]byte; z = x47 var x46 [10<<20]byte
var x48 [10<<20]byte; z = x48 var x47 [10<<20]byte
var x49 [10<<20]byte; z = x49 var x48 [10<<20]byte
var x50 [10<<20]byte; z = x50 var x49 [10<<20]byte
var x51 [10<<20]byte; z = x51 var x50 [10<<20]byte
var x52 [10<<20]byte; z = x52 var x51 [10<<20]byte
var x53 [10<<20]byte; z = x53 var x52 [10<<20]byte
var x54 [10<<20]byte; z = x54 var x53 [10<<20]byte
var x55 [10<<20]byte; z = x55 var x54 [10<<20]byte
var x56 [10<<20]byte; z = x56 var x55 [10<<20]byte
var x57 [10<<20]byte; z = x57 var x56 [10<<20]byte
var x58 [10<<20]byte; z = x58 var x57 [10<<20]byte
var x59 [10<<20]byte; z = x59 var x58 [10<<20]byte
var x60 [10<<20]byte; z = x60 var x59 [10<<20]byte
var x61 [10<<20]byte; z = x61 var x60 [10<<20]byte
var x62 [10<<20]byte; z = x62 var x61 [10<<20]byte
var x63 [10<<20]byte; z = x63 var x62 [10<<20]byte
var x64 [10<<20]byte; z = x64 var x63 [10<<20]byte
var x65 [10<<20]byte; z = x65 var x64 [10<<20]byte
var x66 [10<<20]byte; z = x66 var x65 [10<<20]byte
var x67 [10<<20]byte; z = x67 var x66 [10<<20]byte
var x68 [10<<20]byte; z = x68 var x67 [10<<20]byte
var x69 [10<<20]byte; z = x69 var x68 [10<<20]byte
var x70 [10<<20]byte; z = x70 var x69 [10<<20]byte
var x71 [10<<20]byte; z = x71 var x70 [10<<20]byte
var x72 [10<<20]byte; z = x72 var x71 [10<<20]byte
var x73 [10<<20]byte; z = x73 var x72 [10<<20]byte
var x74 [10<<20]byte; z = x74 var x73 [10<<20]byte
var x75 [10<<20]byte; z = x75 var x74 [10<<20]byte
var x76 [10<<20]byte; z = x76 var x75 [10<<20]byte
var x77 [10<<20]byte; z = x77 var x76 [10<<20]byte
var x78 [10<<20]byte; z = x78 var x77 [10<<20]byte
var x79 [10<<20]byte; z = x79 var x78 [10<<20]byte
var x80 [10<<20]byte; z = x80 var x79 [10<<20]byte
var x81 [10<<20]byte; z = x81 var x80 [10<<20]byte
var x82 [10<<20]byte; z = x82 var x81 [10<<20]byte
var x83 [10<<20]byte; z = x83 var x82 [10<<20]byte
var x84 [10<<20]byte; z = x84 var x83 [10<<20]byte
var x85 [10<<20]byte; z = x85 var x84 [10<<20]byte
var x86 [10<<20]byte; z = x86 var x85 [10<<20]byte
var x87 [10<<20]byte; z = x87 var x86 [10<<20]byte
var x88 [10<<20]byte; z = x88 var x87 [10<<20]byte
var x89 [10<<20]byte; z = x89 var x88 [10<<20]byte
var x90 [10<<20]byte; z = x90 var x89 [10<<20]byte
var x91 [10<<20]byte; z = x91 var x90 [10<<20]byte
var x92 [10<<20]byte; z = x92 var x91 [10<<20]byte
var x93 [10<<20]byte; z = x93 var x92 [10<<20]byte
var x94 [10<<20]byte; z = x94 var x93 [10<<20]byte
var x95 [10<<20]byte; z = x95 var x94 [10<<20]byte
var x96 [10<<20]byte; z = x96 var x95 [10<<20]byte
var x97 [10<<20]byte; z = x97 var x96 [10<<20]byte
var x98 [10<<20]byte; z = x98 var x97 [10<<20]byte
var x99 [10<<20]byte; z = x99 var x98 [10<<20]byte
var x100 [10<<20]byte; z = x100 var x99 [10<<20]byte
var x101 [10<<20]byte; z = x101 var x100 [10<<20]byte
var x102 [10<<20]byte; z = x102 var x101 [10<<20]byte
var x103 [10<<20]byte; z = x103 var x102 [10<<20]byte
var x104 [10<<20]byte; z = x104 var x103 [10<<20]byte
var x105 [10<<20]byte; z = x105 var x104 [10<<20]byte
var x106 [10<<20]byte; z = x106 var x105 [10<<20]byte
var x107 [10<<20]byte; z = x107 var x106 [10<<20]byte
var x108 [10<<20]byte; z = x108 var x107 [10<<20]byte
var x109 [10<<20]byte; z = x109 var x108 [10<<20]byte
var x110 [10<<20]byte; z = x110 var x109 [10<<20]byte
var x111 [10<<20]byte; z = x111 var x110 [10<<20]byte
var x112 [10<<20]byte; z = x112 var x111 [10<<20]byte
var x113 [10<<20]byte; z = x113 var x112 [10<<20]byte
var x114 [10<<20]byte; z = x114 var x113 [10<<20]byte
var x115 [10<<20]byte; z = x115 var x114 [10<<20]byte
var x116 [10<<20]byte; z = x116 var x115 [10<<20]byte
var x117 [10<<20]byte; z = x117 var x116 [10<<20]byte
var x118 [10<<20]byte; z = x118 var x117 [10<<20]byte
var x119 [10<<20]byte; z = x119 var x118 [10<<20]byte
var x120 [10<<20]byte; z = x120 var x119 [10<<20]byte
var x121 [10<<20]byte; z = x121 var x120 [10<<20]byte
var x122 [10<<20]byte; z = x122 var x121 [10<<20]byte
var x123 [10<<20]byte; z = x123 var x122 [10<<20]byte
var x124 [10<<20]byte; z = x124 var x123 [10<<20]byte
var x125 [10<<20]byte; z = x125 var x124 [10<<20]byte
var x126 [10<<20]byte; z = x126 var x125 [10<<20]byte
var x127 [10<<20]byte; z = x127 var x126 [10<<20]byte
var x128 [10<<20]byte; z = x128 var x127 [10<<20]byte
var x129 [10<<20]byte; z = x129 var x128 [10<<20]byte
var x130 [10<<20]byte; z = x130 var x129 [10<<20]byte
var x131 [10<<20]byte; z = x131 var x130 [10<<20]byte
var x132 [10<<20]byte; z = x132 var x131 [10<<20]byte
var x133 [10<<20]byte; z = x133 var x132 [10<<20]byte
var x134 [10<<20]byte; z = x134 var x133 [10<<20]byte
var x135 [10<<20]byte; z = x135 var x134 [10<<20]byte
var x136 [10<<20]byte; z = x136 var x135 [10<<20]byte
var x137 [10<<20]byte; z = x137 var x136 [10<<20]byte
var x138 [10<<20]byte; z = x138 var x137 [10<<20]byte
var x139 [10<<20]byte; z = x139 var x138 [10<<20]byte
var x140 [10<<20]byte; z = x140 var x139 [10<<20]byte
var x141 [10<<20]byte; z = x141 var x140 [10<<20]byte
var x142 [10<<20]byte; z = x142 var x141 [10<<20]byte
var x143 [10<<20]byte; z = x143 var x142 [10<<20]byte
var x144 [10<<20]byte; z = x144 var x143 [10<<20]byte
var x145 [10<<20]byte; z = x145 var x144 [10<<20]byte
var x146 [10<<20]byte; z = x146 var x145 [10<<20]byte
var x147 [10<<20]byte; z = x147 var x146 [10<<20]byte
var x148 [10<<20]byte; z = x148 var x147 [10<<20]byte
var x149 [10<<20]byte; z = x149 var x148 [10<<20]byte
var x150 [10<<20]byte; z = x150 var x149 [10<<20]byte
var x151 [10<<20]byte; z = x151 var x150 [10<<20]byte
var x152 [10<<20]byte; z = x152 var x151 [10<<20]byte
var x153 [10<<20]byte; z = x153 var x152 [10<<20]byte
var x154 [10<<20]byte; z = x154 var x153 [10<<20]byte
var x155 [10<<20]byte; z = x155 var x154 [10<<20]byte
var x156 [10<<20]byte; z = x156 var x155 [10<<20]byte
var x157 [10<<20]byte; z = x157 var x156 [10<<20]byte
var x158 [10<<20]byte; z = x158 var x157 [10<<20]byte
var x159 [10<<20]byte; z = x159 var x158 [10<<20]byte
var x160 [10<<20]byte; z = x160 var x159 [10<<20]byte
var x161 [10<<20]byte; z = x161 var x160 [10<<20]byte
var x162 [10<<20]byte; z = x162 var x161 [10<<20]byte
var x163 [10<<20]byte; z = x163 var x162 [10<<20]byte
var x164 [10<<20]byte; z = x164 var x163 [10<<20]byte
var x165 [10<<20]byte; z = x165 var x164 [10<<20]byte
var x166 [10<<20]byte; z = x166 var x165 [10<<20]byte
var x167 [10<<20]byte; z = x167 var x166 [10<<20]byte
var x168 [10<<20]byte; z = x168 var x167 [10<<20]byte
var x169 [10<<20]byte; z = x169 var x168 [10<<20]byte
var x170 [10<<20]byte; z = x170 var x169 [10<<20]byte
var x171 [10<<20]byte; z = x171 var x170 [10<<20]byte
var x172 [10<<20]byte; z = x172 var x171 [10<<20]byte
var x173 [10<<20]byte; z = x173 var x172 [10<<20]byte
var x174 [10<<20]byte; z = x174 var x173 [10<<20]byte
var x175 [10<<20]byte; z = x175 var x174 [10<<20]byte
var x176 [10<<20]byte; z = x176 var x175 [10<<20]byte
var x177 [10<<20]byte; z = x177 var x176 [10<<20]byte
var x178 [10<<20]byte; z = x178 var x177 [10<<20]byte
var x179 [10<<20]byte; z = x179 var x178 [10<<20]byte
var x180 [10<<20]byte; z = x180 var x179 [10<<20]byte
var x181 [10<<20]byte; z = x181 var x180 [10<<20]byte
var x182 [10<<20]byte; z = x182 var x181 [10<<20]byte
var x183 [10<<20]byte; z = x183 var x182 [10<<20]byte
var x184 [10<<20]byte; z = x184 var x183 [10<<20]byte
var x185 [10<<20]byte; z = x185 var x184 [10<<20]byte
var x186 [10<<20]byte; z = x186 var x185 [10<<20]byte
var x187 [10<<20]byte; z = x187 var x186 [10<<20]byte
var x188 [10<<20]byte; z = x188 var x187 [10<<20]byte
var x189 [10<<20]byte; z = x189 var x188 [10<<20]byte
var x190 [10<<20]byte; z = x190 var x189 [10<<20]byte
var x191 [10<<20]byte; z = x191 var x190 [10<<20]byte
var x192 [10<<20]byte; z = x192 var x191 [10<<20]byte
var x193 [10<<20]byte; z = x193 var x192 [10<<20]byte
var x194 [10<<20]byte; z = x194 var x193 [10<<20]byte
var x195 [10<<20]byte; z = x195 var x194 [10<<20]byte
var x196 [10<<20]byte; z = x196 var x195 [10<<20]byte
var x197 [10<<20]byte; z = x197 var x196 [10<<20]byte
var x198 [10<<20]byte; z = x198 var x197 [10<<20]byte
var x199 [10<<20]byte; z = x199 var x198 [10<<20]byte
var x200 [10<<20]byte; z = x200 var x199 [10<<20]byte
var x201 [10<<20]byte; z = x201 var x200 [10<<20]byte
var x202 [10<<20]byte; z = x202 var x201 [10<<20]byte
var x203 [10<<20]byte; z = x203 var x202 [10<<20]byte
var x204 [10<<20]byte; z = x204 var x203 [10<<20]byte
var x205 [10<<20]byte; z = x205 var x204 [10<<20]byte
var x206 [10<<20]byte; z = x206 var x205 [10<<20]byte
var x206 [10<<20]byte
var x207 [10<<20]byte
z = x1
z = x2
z = x3
z = x4
z = x5
z = x6
z = x7
z = x8
z = x9
z = x10
z = x11
z = x12
z = x13
z = x14
z = x15
z = x16
z = x17
z = x18
z = x19
z = x20
z = x21
z = x22
z = x23
z = x24
z = x25
z = x26
z = x27
z = x28
z = x29
z = x30
z = x31
z = x32
z = x33
z = x34
z = x35
z = x36
z = x37
z = x38
z = x39
z = x40
z = x41
z = x42
z = x43
z = x44
z = x45
z = x46
z = x47
z = x48
z = x49
z = x50
z = x51
z = x52
z = x53
z = x54
z = x55
z = x56
z = x57
z = x58
z = x59
z = x60
z = x61
z = x62
z = x63
z = x64
z = x65
z = x66
z = x67
z = x68
z = x69
z = x70
z = x71
z = x72
z = x73
z = x74
z = x75
z = x76
z = x77
z = x78
z = x79
z = x80
z = x81
z = x82
z = x83
z = x84
z = x85
z = x86
z = x87
z = x88
z = x89
z = x90
z = x91
z = x92
z = x93
z = x94
z = x95
z = x96
z = x97
z = x98
z = x99
z = x100
z = x101
z = x102
z = x103
z = x104
z = x105
z = x106
z = x107
z = x108
z = x109
z = x110
z = x111
z = x112
z = x113
z = x114
z = x115
z = x116
z = x117
z = x118
z = x119
z = x120
z = x121
z = x122
z = x123
z = x124
z = x125
z = x126
z = x127
z = x128
z = x129
z = x130
z = x131
z = x132
z = x133
z = x134
z = x135
z = x136
z = x137
z = x138
z = x139
z = x140
z = x141
z = x142
z = x143
z = x144
z = x145
z = x146
z = x147
z = x148
z = x149
z = x150
z = x151
z = x152
z = x153
z = x154
z = x155
z = x156
z = x157
z = x158
z = x159
z = x160
z = x161
z = x162
z = x163
z = x164
z = x165
z = x166
z = x167
z = x168
z = x169
z = x170
z = x171
z = x172
z = x173
z = x174
z = x175
z = x176
z = x177
z = x178
z = x179
z = x180
z = x181
z = x182
z = x183
z = x184
z = x185
z = x186
z = x187
z = x188
z = x189
z = x190
z = x191
z = x192
z = x193
z = x194
z = x195
z = x196
z = x197
z = x198
z = x199
z = x200
z = x201
z = x202
z = x203
z = x204
z = x205
z = x206
z = x207
} }