cmd/compile/internal: merge stack slots for selected local auto vars

Preliminary compiler support for merging/overlapping stack
slots of local variables whose access patterns are disjoint.

This patch includes changes in AllocFrame to do the actual
merging/overlapping based on information returned from a new
liveness.MergeLocals helper. The MergeLocals helper identifies
candidates by looking for sets of AUTO variables that either A) have
the same size and GC shape (if types contain pointers), or B) have the
same size (but potentially different types as long as those types have
no pointers). Variables must be greater than (3*types.PtrSize) in size
to be considered for merging.

After forming candidates, MergeLocals collects variables into "can be
overlapped" equivalence classes or partitions; this process is driven
by an additional liveness analysis pass. Ideally it would be nice to
move the existing stackmap liveness pass up before AllocFrame
and "widen" it to include merge candidates so that we can do just a
single liveness as opposed to two passes, however this may be difficult
given that the merge-locals liveness has to take into account
writes corresponding to dead stores.

This patch also required a change to the way ssa.OpVarDef pseudo-ops
are generated; prior to this point they would only be created for
variables whose type included pointers; if stack slot merging is
enabled then the ssagen code creates OpVarDef ops for all auto vars
that are merge candidates.

Note that some temporaries created late in the compilation process
(e.g. during ssa backend) are difficult to reason about, especially in
cases where we take the address of a temp and pass it to the runtime.
For the time being we mark most of the vars created post-ssagen as
"not a merge candidate".

Stack slot merging for locals/autos is enabled by default if "-N" is
not in effect, and can be disabled via "-gcflags=-d=mergelocals=0".

Fixmes/todos/restrictions:
- try lowering size restrictions
- re-evaluate the various skips that happen in SSA-created autotmps

Fixes #62737.
Updates #65532.
Updates #65495.

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest
Change-Id: Ibc22e8a76c87e47bc9fafe4959804d9ea923623d
Reviewed-on: https://go-review.googlesource.com/c/go/+/553055
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Than McIntosh 2023-12-28 14:26:34 +00:00
parent 754f870381
commit 89f7805c2e
14 changed files with 1512 additions and 222 deletions

View File

@ -41,6 +41,10 @@ type DebugFlags struct {
LoopVarHash string `help:"for debugging changes in loop behavior. Overrides experiment and loopvar flag."`
LocationLists int `help:"print information about DWARF location list creation"`
MaxShapeLen int `help:"hash shape names longer than this threshold (default 500)" concurrent:"ok"`
MergeLocals int `help:"merge together non-interfering local stack slots" concurrent:"ok"`
MergeLocalsDumpFunc string `help:"dump specified func in merge locals"`
MergeLocalsHash string `help:"hash value for debugging stack slot merging of local variables" concurrent:"ok"`
MergeLocalsTrace int `help:"trace debug output for locals merging"`
Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers" concurrent:"ok"`
NoRefName int `help:"do not include referenced symbol names in object file" concurrent:"ok"`

View File

@ -184,6 +184,7 @@ func ParseFlags() {
Debug.SyncFrames = -1 // disable sync markers by default
Debug.ZeroCopy = 1
Debug.RangeFuncCheck = 1
Debug.MergeLocals = 1
Debug.Checkptr = -1 // so we can tell whether it is set explicitly
@ -260,6 +261,9 @@ func ParseFlags() {
if Debug.PGOHash != "" {
PGOHash = NewHashDebug("pgohash", Debug.PGOHash, nil)
}
if Debug.MergeLocalsHash != "" {
MergeLocalsHash = NewHashDebug("mergelocals", Debug.MergeLocalsHash, nil)
}
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)

View File

@ -56,6 +56,7 @@ var hashDebug *HashDebug
var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
var PGOHash *HashDebug // for debugging PGO optimization decisions
var MergeLocalsHash *HashDebug // for debugging local stack slot merging changes
// DebugHashMatchPkgFunc reports whether debug variable Gossahash
//

View File

@ -194,6 +194,7 @@ const (
nameLibfuzzer8BitCounter // if PEXTERN should be assigned to __sancov_cntrs section
nameCoverageAuxVar // instrumentation counter var or pkg ID for cmd/cover
nameAlias // is type name an alias
nameNonMergeable // not a candidate for stack slot merging
)
func (n *Name) Readonly() bool { return n.flags&nameReadonly != 0 }
@ -209,6 +210,7 @@ func (n *Name) InlLocal() bool { return n.flags&nameInlLocal !=
func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 }
func (n *Name) Libfuzzer8BitCounter() bool { return n.flags&nameLibfuzzer8BitCounter != 0 }
func (n *Name) CoverageAuxVar() bool { return n.flags&nameCoverageAuxVar != 0 }
func (n *Name) NonMergeable() bool { return n.flags&nameNonMergeable != 0 }
func (n *Name) setReadonly(b bool) { n.flags.set(nameReadonly, b) }
func (n *Name) SetNeedzero(b bool) { n.flags.set(nameNeedzero, b) }
@ -223,6 +225,7 @@ func (n *Name) SetInlLocal(b bool) { n.flags.set(nameInlLocal, b
func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) }
func (n *Name) SetLibfuzzer8BitCounter(b bool) { n.flags.set(nameLibfuzzer8BitCounter, b) }
func (n *Name) SetCoverageAuxVar(b bool) { n.flags.set(nameCoverageAuxVar, b) }
func (n *Name) SetNonMergeable(b bool) { n.flags.set(nameNonMergeable, b) }
// OnStack reports whether variable n may reside on the stack.
func (n *Name) OnStack() bool {

View File

@ -0,0 +1,691 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package liveness
import (
"cmd/compile/internal/base"
"cmd/compile/internal/bitvec"
"cmd/compile/internal/ir"
"cmd/compile/internal/reflectdata"
"cmd/compile/internal/ssa"
"cmd/internal/obj"
"cmd/internal/src"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
)
// MergeLocalsState encapsulates information about which AUTO
// (stack-allocated) variables within a function can be safely
// merged/overlapped, e.g. share a stack slot with some other auto).
// An instance of MergeLocalsState is produced by MergeLocals() below
// and then consumed in ssagen.AllocFrame. The map 'partition' contains
// entries of the form <N,SL> where N is an *ir.Name and SL is a slice
// holding the indices (within 'vars') of other variables that share the
// same slot. For example, if a function contains five variables where
// v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
// MergeLocalsState content might look like
//
// vars: [v1, v2, v3, v4, v5]
// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
// v4 -> [3, 4], v5 -> [3, 4]
//
// A nil MergeLocalsState indicates that no local variables meet the
// necessary criteria for overlap.
type MergeLocalsState struct {
// contains auto vars that participate in overlapping
vars []*ir.Name
// maps auto variable to overlap partition
partition map[*ir.Name][]int
}
// candRegion is a sub-range (start, end) corresponding to an interval
// [st,en] within the list of candidate variables.
type candRegion struct {
st, en int
}
// MergeLocals analyzes the specified ssa function f to determine which
// of its auto variables can safely share the same stack slot, returning
// a state object that describes how the overlap should be done.
func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
cands, idx, regions := collectMergeCandidates(fn)
if len(regions) == 0 {
return nil
}
lv := newliveness(fn, f, cands, idx, 0)
// If we have a local variable such as "r2" below that's written
// but then not read, something like:
//
// vardef r1
// r1.x = ...
// vardef r2
// r2.x = 0
// r2.y = ...
// <call foo>
// // no subsequent use of r2
// ... = r1.x
//
// then for the purpose of calculating stack maps at the call, we
// can ignore "r2" completely during liveness analysis for stack
// maps, however for stack slock merging we most definitely want
// to treat the writes as "uses".
lv.conservativeWrites = true
lv.prologue()
lv.solve()
cs := &cstate{
fn: fn,
ibuilders: make([]IntervalsBuilder, len(cands)),
}
computeIntervals(lv, cs)
rv := performMerging(lv, cs, regions)
if err := rv.check(); err != nil {
base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
}
return rv
}
// Subsumed returns whether variable n is subsumed, e.g. appears
// in an overlap position but is not the leader in that partition.
func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
return true
}
return false
}
// IsLeader returns whether a variable n is the leader (first element)
// in a sharing partition.
func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
return true
}
return false
}
// Leader returns the leader variable for subsumed var n.
func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
if sl, ok := mls.partition[n]; ok {
if mls.vars[sl[0]] == n {
panic("variable is not subsumed")
}
return mls.vars[sl[0]]
}
panic("not a merge candidate")
}
// Followers writes a list of the followers for leader n into the slice tmp.
func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
tmp = tmp[:0]
sl, ok := mls.partition[n]
if !ok {
panic("no entry for leader")
}
if mls.vars[sl[0]] != n {
panic("followers invoked on subsumed var")
}
for _, k := range sl[1:] {
tmp = append(tmp, mls.vars[k])
}
sort.SliceStable(tmp, func(i, j int) bool {
return tmp[i].Sym().Name < tmp[j].Sym().Name
})
return tmp
}
// EstSavings returns the estimated reduction in stack size for
// the given merge locals state.
func (mls *MergeLocalsState) EstSavings() int {
tot := 0
for n := range mls.partition {
if mls.Subsumed(n) {
tot += int(n.Type().Size())
}
}
return tot
}
// check tests for various inconsistencies and problems in mls,
// returning an error if any problems are found.
func (mls *MergeLocalsState) check() error {
if mls == nil {
return nil
}
used := make(map[int]bool)
seenv := make(map[*ir.Name]int)
for ii, v := range mls.vars {
if prev, ok := seenv[v]; ok {
return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
v.Sym().Name, ii, prev)
}
seenv[v] = ii
}
for k, sl := range mls.partition {
// length of slice value needs to be more than 1
if len(sl) < 2 {
return fmt.Errorf("k=%q v=%+v slice len %d invalid",
k.Sym().Name, sl, len(sl))
}
// values in the slice need to be var indices
for i, v := range sl {
if v < 0 || v > len(mls.vars)-1 {
return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
}
}
}
for k, sl := range mls.partition {
foundk := false
for i, v := range sl {
vv := mls.vars[v]
if i == 0 {
if !mls.IsLeader(vv) {
return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
}
} else {
if !mls.Subsumed(vv) {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
}
if mls.Leader(vv) != mls.vars[sl[0]] {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
}
}
if vv == k {
foundk = true
if used[v] {
return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
}
used[v] = true
}
}
if !foundk {
return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
}
}
for i := range used {
if !used[i] {
return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
}
}
return nil
}
func (mls *MergeLocalsState) String() string {
var leaders []*ir.Name
for n, sl := range mls.partition {
if n == mls.vars[sl[0]] {
leaders = append(leaders, n)
}
}
sort.Slice(leaders, func(i, j int) bool {
return leaders[i].Sym().Name < leaders[j].Sym().Name
})
var sb strings.Builder
for _, n := range leaders {
sb.WriteString(n.Sym().Name + ":")
sl := mls.partition[n]
for _, k := range sl[1:] {
n := mls.vars[k]
sb.WriteString(" " + n.Sym().Name)
}
sb.WriteString("\n")
}
return sb.String()
}
// collectMergeCandidates visits all of the AUTO vars declared in
// function fn and returns a list of candidate variables for merging /
// overlapping. Return values are: 1) a slice of ir.Name's
// corresponding to the candidates, 2) a map that maps ir.Name to slot
// in the slice, and 3) a slice containing regions (start/end pairs)
// corresponding to variables that could be overlapped provided that
// their lifetimes are disjoint.
func collectMergeCandidates(fn *ir.Func) ([]*ir.Name, map[*ir.Name]int32, []candRegion) {
m := make(map[*ir.Name]int32)
var cands []*ir.Name
var regions []candRegion
// Collect up the available set of appropriate AUTOs in the
// function as a first step.
for _, n := range fn.Dcl {
if !n.Used() {
continue
}
if !ssa.IsMergeCandidate(n) {
continue
}
cands = append(cands, n)
}
if len(cands) < 2 {
return nil, nil, nil
}
// Sort by pointerness, size, and then name.
sort.SliceStable(cands, func(i, j int) bool {
ci, cj := cands[i], cands[j]
ihp, jhp := 0, 0
var ilsym, jlsym *obj.LSym
if ci.Type().HasPointers() {
ihp = 1
ilsym, _, _ = reflectdata.GCSym(ci.Type())
}
if cj.Type().HasPointers() {
jhp = 1
jlsym, _, _ = reflectdata.GCSym(cj.Type())
}
if ihp != jhp {
return ihp < jhp
}
if ci.Type().Size() != cj.Type().Size() {
return ci.Type().Size() < cj.Type().Size()
}
if ihp != 0 && jhp != 0 && ilsym != jlsym {
// FIXME: find less clunky way to do this
return fmt.Sprintf("%v", ilsym) < fmt.Sprintf("%v", jlsym)
}
if ci.Sym().Name != cj.Sym().Name {
return ci.Sym().Name < cj.Sym().Name
}
return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", ci.Pos())
})
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", fn)
for i := range cands {
dumpCand(cands[i], i)
}
}
// Now generate a pruned candidate list-- we only want to return a
// non-empty list if there is some possibility of overlapping two
// vars.
var pruned []*ir.Name
st := 0
for {
en := nextRegion(cands, st)
if en == -1 {
break
}
if st == en {
// region has just one element, we can skip it
st++
continue
}
pst := len(pruned)
pen := pst + (en - st)
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= add part %d -> %d\n", pst, pen)
}
// non-empty region, add to pruned
pruned = append(pruned, cands[st:en+1]...)
regions = append(regions, candRegion{st: pst, en: pen})
st = en + 1
}
if len(pruned) < 2 {
return nil, nil, nil
}
for i, n := range pruned {
m[n] = int32(i)
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= pruned candidate list for func %v:\n", fn)
for i := range pruned {
dumpCand(pruned[i], i)
}
}
return pruned, m, regions
}
// nextRegion starts at location idx and walks forward in the cands
// slice looking for variables that are "compatible" (overlappable)
// with the variable at position idx; it returns the end of the new
// region (range of compatible variables starting at idx).
func nextRegion(cands []*ir.Name, idx int) int {
n := len(cands)
if idx >= n {
return -1
}
c0 := cands[idx]
hp0 := c0.Type().HasPointers()
for j := idx + 1; j < n; j++ {
cj := cands[j]
hpj := cj.Type().HasPointers()
ok := true
if hp0 {
if !hpj || c0.Type().Size() != cj.Type().Size() {
return j - 1
}
// GC shape must match if both types have pointers.
gcsym0, _, _ := reflectdata.GCSym(c0.Type())
gcsymj, _, _ := reflectdata.GCSym(cj.Type())
if gcsym0 != gcsymj {
return j - 1
}
} else {
// If no pointers, match size only.
if !ok || hp0 != hpj || c0.Type().Size() != cj.Type().Size() {
return j - 1
}
}
}
return n - 1
}
type cstate struct {
fn *ir.Func
ibuilders []IntervalsBuilder
}
// mergeVisitRegion tries to perform overlapping of variables with a
// given subrange of cands described by st and en (indices into our
// candidate var list), where the variables within this range have
// already been determined to be compatible with respect to type,
// size, etc. Overlapping is done in a a greedy fashion: we select the
// first element in the st->en range, then walk the rest of the
// elements adding in vars whose lifetimes don't overlap with the
// first element, then repeat the process until we run out of work to do.
func (mls *MergeLocalsState) mergeVisitRegion(lv *liveness, ivs []Intervals, st, en int) {
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
}
n := en - st + 1
used := bitvec.New(int32(n))
nxt := func(slot int) int {
for c := slot - st; c < n; c++ {
if used.Get(int32(c)) {
continue
}
return c + st
}
return -1
}
navail := n
cands := lv.vars
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail)
}
for navail >= 2 {
leader := nxt(st)
used.Set(int32(leader - st))
navail--
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader,
used.String())
}
elems := []int{leader}
lints := ivs[leader]
for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
// Skip if de-selected by merge locals hash.
if base.Debug.MergeLocalsHash != "" {
if !base.MergeLocalsHash.MatchPosWithInfo(cands[succ].Pos(), "mergelocals", nil) {
continue
}
}
// Skip if already used.
if used.Get(int32(succ - st)) {
continue
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
}
// Can we overlap leader with this var?
if lints.Overlaps(ivs[succ]) {
continue
} else {
// Add to overlap set.
elems = append(elems, succ)
lints = lints.Merge(ivs[succ])
}
}
if len(elems) > 1 {
// We found some things to overlap with leader. Add the
// candidate elements to "vars" and update "partition".
off := len(mls.vars)
sl := make([]int, len(elems))
for i, candslot := range elems {
sl[i] = off + i
mls.vars = append(mls.vars, cands[candslot])
mls.partition[cands[candslot]] = sl
}
navail -= (len(elems) - 1)
for i := range elems {
used.Set(int32(elems[i] - st))
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
for i := range sl {
dumpCand(mls.vars[sl[i]], sl[i])
}
for i, v := range elems {
fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
}
}
}
}
}
// performMerging carries out variable merging within each of the
// candidate ranges in regions, returning a state object
// that describes the variable overlaps.
func performMerging(lv *liveness, cs *cstate, regions []candRegion) *MergeLocalsState {
cands := lv.vars
mls := &MergeLocalsState{
partition: make(map[*ir.Name][]int),
}
// Finish intervals construction.
ivs := make([]Intervals, len(cands))
for i := range cands {
var err error
ivs[i], err = cs.ibuilders[i].Finish()
if err != nil {
ninstr := 0
if base.Debug.MergeLocalsTrace != 0 {
iidx := 0
for k := 0; k < len(lv.f.Blocks); k++ {
b := lv.f.Blocks[k]
fmt.Fprintf(os.Stderr, "\n")
for _, v := range b.Values {
fmt.Fprintf(os.Stderr, " b%d %d: %s\n", k, iidx, v.LongString())
iidx++
ninstr++
}
}
}
base.FatalfAt(cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
return nil
}
}
// Dump state before attempting overlap.
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
for i := range cands {
c := cands[i]
fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
i, c.Sym().Name, c.Type().Size(), ivs[i].String())
}
fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(regions))
for _, cr := range regions {
fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
}
fmt.Fprintf(os.Stderr, "\n")
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= len(regions) = %d\n", len(regions))
}
// Apply a greedy merge/overlap strategy within each region
// of compatible variables.
for _, cr := range regions {
mls.mergeVisitRegion(lv, ivs, cr.st, cr.en)
}
if len(mls.vars) == 0 {
return nil
}
return mls
}
// computeIntervals performs a backwards sweep over the instructions
// of the function we're compiling, building up an Intervals object
// for each candidate variable by looking for upwards exposed uses
// and kills.
func computeIntervals(lv *liveness, cs *cstate) {
nvars := int32(len(lv.vars))
liveout := bitvec.New(nvars)
if base.Debug.MergeLocalsDumpFunc != "" &&
strings.HasSuffix(fmt.Sprintf("%v", cs.fn), base.Debug.MergeLocalsDumpFunc) {
fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
ii := 0
for k, b := range lv.f.Blocks {
fmt.Fprintf(os.Stderr, "b%d:\n", k)
for _, v := range b.Values {
pos := base.Ctxt.PosTable.Pos(v.Pos)
fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
ii++
}
}
}
// Count instructions.
ninstr := 0
for _, b := range lv.f.Blocks {
ninstr += len(b.Values)
}
// current instruction index during backwards walk
iidx := ninstr - 1
// Make a backwards pass over all blocks
for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
b := lv.f.Blocks[k]
be := lv.blockEffects(b)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
for j := range lv.vars {
if be.liveout.Get(int32(j)) {
fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
}
}
fmt.Fprintf(os.Stderr, "\n")
}
// Take into account effects taking place at end of this basic
// block by comparing our current live set with liveout for
// the block. If a given var was not live before and is now
// becoming live we need to mark this transition with a
// builder "Live" call; similarly if a var was live before and
// is now no longer live, we need a "Kill" call.
for j := range lv.vars {
isLive := liveout.Get(int32(j))
blockLiveOut := be.liveout.Get(int32(j))
if isLive {
if !blockLiveOut {
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
}
cs.ibuilders[j].Kill(iidx)
}
} else if blockLiveOut {
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
iidx, lv.vars[j])
}
cs.ibuilders[j].Live(iidx)
}
}
// Set our working "currently live" set to the previously
// computed live out set for the block.
liveout.Copy(be.liveout)
// Now walk backwards through this block.
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
}
// Update liveness based on what we see happening in this
// instruction.
pos, e := lv.valueEffects(v)
becomeslive := e&uevar != 0
iskilled := e&varkill != 0
if becomeslive && iskilled {
// we do not ever expect to see both a kill and an
// upwards exposed use given our size constraints.
panic("should never happen")
}
if iskilled && liveout.Get(pos) {
cs.ibuilders[pos].Kill(iidx)
liveout.Unset(pos)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
iidx, lv.vars[pos])
}
} else if becomeslive && !liveout.Get(pos) {
cs.ibuilders[pos].Live(iidx)
liveout.Set(pos)
if base.Debug.MergeLocalsTrace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
iidx, lv.vars[pos])
}
}
iidx--
}
if b == lv.f.Entry {
for j, v := range lv.vars {
if liveout.Get(int32(j)) {
lv.f.Fatalf("%v %L recorded as live on entry",
lv.fn.Nname, v)
}
}
}
}
if iidx != -1 {
panic("iidx underflow")
}
}
func dumpCand(c *ir.Name, i int) {
fmtFullPos := func(p src.XPos) string {
var sb strings.Builder
sep := ""
base.Ctxt.AllPos(p, func(pos src.Pos) {
fmt.Fprintf(&sb, sep)
sep = "|"
file := filepath.Base(pos.Filename())
fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
})
return sb.String()
}
fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v t=%v\n",
i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
c.Type().HasPointers(), c.Type())
}
// for unit testing only.
func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
mls := &MergeLocalsState{partition: partition, vars: vars}
if err := mls.check(); err != nil {
return nil, err
}
return mls, nil
}

View File

@ -143,6 +143,11 @@ type liveness struct {
doClobber bool // Whether to clobber dead stack slots in this function.
noClobberArgs bool // Do not clobber function arguments
// treat "dead" writes as equivalent to reads during the analysis;
// used only during liveness analysis for stack slot merging (doesn't
// make sense for stackmap analysis).
conservativeWrites bool
}
// Map maps from *ssa.Value to StackMapIndex.
@ -312,8 +317,12 @@ func (lv *liveness) valueEffects(v *ssa.Value) (int32, liveEffect) {
if e&(ssa.SymRead|ssa.SymAddr) != 0 {
effect |= uevar
}
if e&ssa.SymWrite != 0 && (!isfat(n.Type()) || v.Op == ssa.OpVarDef) {
if e&ssa.SymWrite != 0 {
if !isfat(n.Type()) || v.Op == ssa.OpVarDef {
effect |= varkill
} else if lv.conservativeWrites {
effect |= uevar
}
}
if effect == 0 {
@ -450,6 +459,11 @@ func (lv *liveness) blockEffects(b *ssa.Block) *blockEffects {
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, locals bitvec.BitVec) {
var slotsSeen map[int64]*ir.Name
checkForDuplicateSlots := base.Debug.MergeLocals != 0
if checkForDuplicateSlots {
slotsSeen = make(map[int64]*ir.Name)
}
for i := int32(0); ; i++ {
i = liveout.Next(i)
if i < 0 {
@ -468,6 +482,12 @@ func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, loc
fallthrough // PPARAMOUT in registers acts memory-allocates like an AUTO
case ir.PAUTO:
typebits.Set(node.Type(), node.FrameOffset()+lv.stkptrsize, locals)
if checkForDuplicateSlots {
if prev, ok := slotsSeen[node.FrameOffset()]; ok {
base.FatalfAt(node.Pos(), "two vars live at pointerMap generation: %q and %q", prev.Sym().Name, node.Sym().Name)
}
slotsSeen[node.FrameOffset()] = node
}
}
}
}

View File

@ -314,8 +314,9 @@ func checkFunc(f *Func) {
f.Fatalf("bad arg 1 type to %s: want integer, have %s", v.Op, v.Args[1].LongString())
}
case OpVarDef:
if !v.Aux.(*ir.Name).Type().HasPointers() {
f.Fatalf("vardef must have pointer type %s", v.Aux.(*ir.Name).Type().String())
n := v.Aux.(*ir.Name)
if !n.Type().HasPointers() && !IsMergeCandidate(n) {
f.Fatalf("vardef must be merge candidate or have pointer type %s", v.Aux.(*ir.Name).Type().String())
}
case OpNilCheck:
// nil checks have pointer type before scheduling, and

View File

@ -838,5 +838,25 @@ func (f *Func) useFMA(v *Value) bool {
// NewLocal returns a new anonymous local variable of the given type.
func (f *Func) NewLocal(pos src.XPos, typ *types.Type) *ir.Name {
return typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
nn := typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
nn.SetNonMergeable(true)
return nn
}
// IsMergeCandidate returns true if variable n could participate in
// stack slot merging. For now we're restricting the set to things to
// items larger than what CanSSA would allow (approximateky, we disallow things
// marked as open defer slots so as to avoid complicating liveness
// analysis.
func IsMergeCandidate(n *ir.Name) bool {
if base.Debug.MergeLocals == 0 ||
base.Flag.N != 0 ||
n.Class != ir.PAUTO ||
n.Type().Size() <= int64(3*types.PtrSize) ||
n.Addrtaken() ||
n.NonMergeable() ||
n.OpenDeferSlot() {
return false
}
return true
}

View File

@ -13,6 +13,7 @@ import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/objw"
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
@ -151,6 +152,18 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
}
}
var mls *liveness.MergeLocalsState
if base.Debug.MergeLocals != 0 {
mls = liveness.MergeLocals(fn, f)
if base.Debug.MergeLocalsTrace == 1 && mls != nil {
fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging\n", ir.FuncName(fn), mls.EstSavings())
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
fn, mls)
}
}
}
// Use sort.SliceStable instead of sort.Slice so stack layout (and thus
// compiler output) is less sensitive to frontend changes that
// introduce or remove unused variables.
@ -158,6 +171,22 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j])
})
if base.Debug.MergeLocalsTrace > 1 && mls != nil {
fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
for i, v := range fn.Dcl {
if !ssa.IsMergeCandidate(v) {
continue
}
fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used())
}
}
var leaders map[*ir.Name]int64
if mls != nil {
leaders = make(map[*ir.Name]int64)
}
// Reassign stack offsets of the locals that are used.
lastHasPtr := false
for i, n := range fn.Dcl {
@ -165,12 +194,14 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
continue
}
if mls != nil && mls.Subsumed(n) {
continue
}
if !n.Used() {
fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
fn.Dcl = fn.Dcl[:i]
break
}
types.CalcSize(n.Type())
w := n.Type().Size()
if w >= types.MaxWidth || w < 0 {
@ -195,6 +226,42 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
lastHasPtr = false
}
n.SetFrameOffset(-s.stksize)
if mls != nil && mls.IsLeader(n) {
leaders[n] = -s.stksize
}
}
if mls != nil {
followers := []*ir.Name{}
newdcl := make([]*ir.Name, 0, len(fn.Dcl))
for i := 0; i < len(fn.Dcl); i++ {
n := fn.Dcl[i]
if mls.Subsumed(n) {
continue
}
newdcl = append(newdcl, n)
if off, ok := leaders[n]; ok {
followers = mls.Followers(n, followers)
for _, f := range followers {
// Set the stack offset for each follower to be
// the same as the leader.
f.SetFrameOffset(off)
}
// position followers immediately after leader
newdcl = append(newdcl, followers...)
}
}
fn.Dcl = newdcl
}
if base.Debug.MergeLocalsTrace > 1 {
fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
for i, v := range fn.Dcl {
if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
continue
}
fmt.Fprintf(os.Stderr, " %d: %q frameoff %d used=%v\n", i, v.Sym().Name, v.FrameOffset(), v.Used())
}
}
s.stksize = types.RoundUp(s.stksize, s.stkalign)

View File

@ -633,7 +633,7 @@ func (s *state) zeroResults() {
if typ := n.Type(); ssa.CanSSA(typ) {
s.assign(n, s.zeroVal(typ), false, 0)
} else {
if typ.HasPointers() {
if typ.HasPointers() || ssa.IsMergeCandidate(n) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, n, s.mem())
}
s.zero(n.Type(), s.decladdrs[n])
@ -3942,7 +3942,7 @@ func (s *state) assignWhichMayOverlap(left ir.Node, right *ssa.Value, deref bool
// If this assignment clobbers an entire local variable, then emit
// OpVarDef so liveness analysis knows the variable is redefined.
if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && t.HasPointers() {
if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && (t.HasPointers() || ssa.IsMergeCandidate(base)) {
s.vars[memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, base, s.mem(), !ir.IsAutoTmp(base))
}
@ -5382,7 +5382,8 @@ func (s *state) call(n *ir.CallExpr, k callKind, returnResultAddr bool, deferExt
}
// Make a defer struct on the stack.
t := deferstruct()
_, addr := s.temp(n.Pos(), t)
n, addr := s.temp(n.Pos(), t)
n.SetNonMergeable(true)
s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(deferStructFnField), addr),
closure)
@ -6886,7 +6887,7 @@ func (s *state) dottype1(pos src.XPos, src, dst *types.Type, iface, source, targ
// temp allocates a temp of type t at position pos
func (s *state) temp(pos src.XPos, t *types.Type) (*ir.Name, *ssa.Value) {
tmp := typecheck.TempAt(pos, s.curfn, t)
if t.HasPointers() {
if t.HasPointers() || (ssa.IsMergeCandidate(tmp) && t != deferstruct()) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem())
}
addr := s.addr(tmp)

View File

@ -0,0 +1,184 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package test
import (
"cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/src"
"internal/testenv"
"path/filepath"
"slices"
"sort"
"strings"
"testing"
)
func TestMergeLocalState(t *testing.T) {
mkiv := func(name string) *ir.Name {
i32 := types.Types[types.TINT32]
s := typecheck.Lookup(name)
v := ir.NewNameAt(src.NoXPos, s, i32)
return v
}
v1 := mkiv("v1")
v2 := mkiv("v2")
v3 := mkiv("v3")
testcases := []struct {
vars []*ir.Name
partition map[*ir.Name][]int
experr bool
}{
{
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: false,
},
{
// invalid mls.v slot -1
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{-1, 0},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// duplicate var in v
vars: []*ir.Name{v1, v2, v2},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// single element in partition
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0},
v2: []int{0, 1, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
{
// missing element 2
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1},
v2: []int{0, 1},
v3: []int{0, 1},
},
experr: true,
},
{
// partitions disagree for v1 vs v2
vars: []*ir.Name{v1, v2, v3},
partition: map[*ir.Name][]int{
v1: []int{0, 1, 2},
v2: []int{1, 0, 2},
v3: []int{0, 1, 2},
},
experr: true,
},
}
for k, testcase := range testcases {
mls, err := liveness.MakeMergeLocalsState(testcase.partition, testcase.vars)
t.Logf("tc %d err is %v\n", k, err)
if testcase.experr && err == nil {
t.Fatalf("tc:%d missing error mls %v", k, mls)
} else if !testcase.experr && err != nil {
t.Fatalf("tc:%d unexpected error mls %v", k, err)
}
if mls != nil {
t.Logf("tc %d: mls: %v\n", k, mls.String())
}
}
}
func TestMergeLocalsIntegration(t *testing.T) {
testenv.MustHaveGoBuild(t)
// This test does a build of a specific canned package to
// check whether merging of stack slots is taking place.
// The idea is to do the compile with a trace option turned
// on and then pick up on the frame offsets of specific
// variables.
//
// Stack slot merging is a greedy algorithm, and there can
// be many possible ways to overlap a given set of candidate
// variables, all of them legal. Rather than locking down
// a specific set of overlappings or frame offsets, this
// tests just verifies that there is one clump of 3 vars that
// get overlapped, then another clump of 2 that share the same
// frame offset.
//
// The expected output blob we're interested in looks like this:
//
// =-= stack layout for ABC:
// 2: "p1" frameoff -8200 used=true
// 3: "xp3" frameoff -8200 used=true
// 4: "xp4" frameoff -8200 used=true
// 5: "p2" frameoff -16400 used=true
// 6: "s" frameoff -24592 used=true
// 7: "v1" frameoff -32792 used=true
// 8: "v3" frameoff -32792 used=true
// 9: "v2" frameoff -40992 used=true
//
tmpdir := t.TempDir()
src := filepath.Join("testdata", "mergelocals", "integration.go")
obj := filepath.Join(tmpdir, "p.a")
out, err := testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-p=p", "-c", "1", "-o", obj, "-d=mergelocalstrace=2,mergelocals=1", src).CombinedOutput()
if err != nil {
t.Fatalf("failed to compile: %v\n%s", err, out)
}
vars := make(map[string]string)
lines := strings.Split(string(out), "\n")
prolog := true
varsAtFrameOffset := make(map[string]int)
for _, line := range lines {
if line == "=-= stack layout for ABC:" {
prolog = false
continue
} else if prolog || line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) != 5 {
t.Fatalf("bad trace output line: %s", line)
}
vname := fields[1]
frameoff := fields[3]
varsAtFrameOffset[frameoff] = varsAtFrameOffset[frameoff] + 1
vars[vname] = frameoff
}
wantvnum := 8
gotvnum := len(vars)
if wantvnum != gotvnum {
t.Fatalf("expected trace output on %d vars got %d\n", wantvnum, gotvnum)
}
// We expect one clump of 3, another clump of 2, and the rest singletons.
expected := []int{1, 1, 1, 2, 3}
got := []int{}
for _, v := range varsAtFrameOffset {
got = append(got, v)
}
sort.Ints(got)
if !slices.Equal(got, expected) {
t.Fatalf("expected variable clumps %+v not equal to what we got: %+v", expected, got)
}
}

View File

@ -0,0 +1,83 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
// This type and the following one will share the same GC shape and size.
type Pointery struct {
p *Pointery
x [1024]int
}
type Pointery2 struct {
p *Pointery2
x [1024]int
}
// This type and the following one will have the same size.
type Vanilla struct {
np uintptr
x [1024]int
}
type Vanilla2 struct {
np uintptr
x [1023]int
y int
}
type Single struct {
np uintptr
x [1023]int
}
func ABC(i, j int) int {
r := 0
// here v1 interferes with v2 but could be overlapped with v3.
// we can also overlap v1 with v3.
var v1 Vanilla
if i < 101 {
var v2 Vanilla
v1.x[i] = j
r += v1.x[j]
v2.x[i] = j
r += v2.x[j]
}
{
var v3 Vanilla2
v3.x[i] = j
r += v3.x[j]
}
var s Single
s.x[i] = j
r += s.x[j]
// Here p1 and p2 interfere, but p1 could be overlapped with xp3.
var p1, p2 Pointery
p1.x[i] = j
r += p1.x[j]
p2.x[i] = j
r += p2.x[j]
{
var xp3 Pointery2
xp3.x[i] = j
r += xp3.x[j]
}
if i == j*2 {
// p2 live on this path
p2.x[i] += j
r += p2.x[j]
} else {
// p2 not live on this path
var xp4 Pointery2
xp4.x[i] = j
r += xp4.x[j]
}
return r
}

View File

@ -25,7 +25,9 @@ func initStackTemp(init *ir.Nodes, tmp *ir.Name, val ir.Node) *ir.AddrExpr {
// allocated temporary variable of the given type. Statements to
// zero-initialize tmp are appended to init.
func stackTempAddr(init *ir.Nodes, typ *types.Type) *ir.AddrExpr {
return initStackTemp(init, typecheck.TempAt(base.Pos, ir.CurFunc, typ), nil)
n := typecheck.TempAt(base.Pos, ir.CurFunc, typ)
n.SetNonMergeable(true)
return initStackTemp(init, n, nil)
}
// stackBufAddr returns the expression &tmp, where tmp is a newly

View File

@ -11,214 +11,423 @@
package main
var z [10<<20]byte
var z [10 << 20]byte
func main() { // GC_ERROR "stack frame too large"
// seq 1 206 | sed 's/.*/ var x& [10<<20]byte; z = x&/'
var x1 [10<<20]byte; z = x1
var x2 [10<<20]byte; z = x2
var x3 [10<<20]byte; z = x3
var x4 [10<<20]byte; z = x4
var x5 [10<<20]byte; z = x5
var x6 [10<<20]byte; z = x6
var x7 [10<<20]byte; z = x7
var x8 [10<<20]byte; z = x8
var x9 [10<<20]byte; z = x9
var x10 [10<<20]byte; z = x10
var x11 [10<<20]byte; z = x11
var x12 [10<<20]byte; z = x12
var x13 [10<<20]byte; z = x13
var x14 [10<<20]byte; z = x14
var x15 [10<<20]byte; z = x15
var x16 [10<<20]byte; z = x16
var x17 [10<<20]byte; z = x17
var x18 [10<<20]byte; z = x18
var x19 [10<<20]byte; z = x19
var x20 [10<<20]byte; z = x20
var x21 [10<<20]byte; z = x21
var x22 [10<<20]byte; z = x22
var x23 [10<<20]byte; z = x23
var x24 [10<<20]byte; z = x24
var x25 [10<<20]byte; z = x25
var x26 [10<<20]byte; z = x26
var x27 [10<<20]byte; z = x27
var x28 [10<<20]byte; z = x28
var x29 [10<<20]byte; z = x29
var x30 [10<<20]byte; z = x30
var x31 [10<<20]byte; z = x31
var x32 [10<<20]byte; z = x32
var x33 [10<<20]byte; z = x33
var x34 [10<<20]byte; z = x34
var x35 [10<<20]byte; z = x35
var x36 [10<<20]byte; z = x36
var x37 [10<<20]byte; z = x37
var x38 [10<<20]byte; z = x38
var x39 [10<<20]byte; z = x39
var x40 [10<<20]byte; z = x40
var x41 [10<<20]byte; z = x41
var x42 [10<<20]byte; z = x42
var x43 [10<<20]byte; z = x43
var x44 [10<<20]byte; z = x44
var x45 [10<<20]byte; z = x45
var x46 [10<<20]byte; z = x46
var x47 [10<<20]byte; z = x47
var x48 [10<<20]byte; z = x48
var x49 [10<<20]byte; z = x49
var x50 [10<<20]byte; z = x50
var x51 [10<<20]byte; z = x51
var x52 [10<<20]byte; z = x52
var x53 [10<<20]byte; z = x53
var x54 [10<<20]byte; z = x54
var x55 [10<<20]byte; z = x55
var x56 [10<<20]byte; z = x56
var x57 [10<<20]byte; z = x57
var x58 [10<<20]byte; z = x58
var x59 [10<<20]byte; z = x59
var x60 [10<<20]byte; z = x60
var x61 [10<<20]byte; z = x61
var x62 [10<<20]byte; z = x62
var x63 [10<<20]byte; z = x63
var x64 [10<<20]byte; z = x64
var x65 [10<<20]byte; z = x65
var x66 [10<<20]byte; z = x66
var x67 [10<<20]byte; z = x67
var x68 [10<<20]byte; z = x68
var x69 [10<<20]byte; z = x69
var x70 [10<<20]byte; z = x70
var x71 [10<<20]byte; z = x71
var x72 [10<<20]byte; z = x72
var x73 [10<<20]byte; z = x73
var x74 [10<<20]byte; z = x74
var x75 [10<<20]byte; z = x75
var x76 [10<<20]byte; z = x76
var x77 [10<<20]byte; z = x77
var x78 [10<<20]byte; z = x78
var x79 [10<<20]byte; z = x79
var x80 [10<<20]byte; z = x80
var x81 [10<<20]byte; z = x81
var x82 [10<<20]byte; z = x82
var x83 [10<<20]byte; z = x83
var x84 [10<<20]byte; z = x84
var x85 [10<<20]byte; z = x85
var x86 [10<<20]byte; z = x86
var x87 [10<<20]byte; z = x87
var x88 [10<<20]byte; z = x88
var x89 [10<<20]byte; z = x89
var x90 [10<<20]byte; z = x90
var x91 [10<<20]byte; z = x91
var x92 [10<<20]byte; z = x92
var x93 [10<<20]byte; z = x93
var x94 [10<<20]byte; z = x94
var x95 [10<<20]byte; z = x95
var x96 [10<<20]byte; z = x96
var x97 [10<<20]byte; z = x97
var x98 [10<<20]byte; z = x98
var x99 [10<<20]byte; z = x99
var x100 [10<<20]byte; z = x100
var x101 [10<<20]byte; z = x101
var x102 [10<<20]byte; z = x102
var x103 [10<<20]byte; z = x103
var x104 [10<<20]byte; z = x104
var x105 [10<<20]byte; z = x105
var x106 [10<<20]byte; z = x106
var x107 [10<<20]byte; z = x107
var x108 [10<<20]byte; z = x108
var x109 [10<<20]byte; z = x109
var x110 [10<<20]byte; z = x110
var x111 [10<<20]byte; z = x111
var x112 [10<<20]byte; z = x112
var x113 [10<<20]byte; z = x113
var x114 [10<<20]byte; z = x114
var x115 [10<<20]byte; z = x115
var x116 [10<<20]byte; z = x116
var x117 [10<<20]byte; z = x117
var x118 [10<<20]byte; z = x118
var x119 [10<<20]byte; z = x119
var x120 [10<<20]byte; z = x120
var x121 [10<<20]byte; z = x121
var x122 [10<<20]byte; z = x122
var x123 [10<<20]byte; z = x123
var x124 [10<<20]byte; z = x124
var x125 [10<<20]byte; z = x125
var x126 [10<<20]byte; z = x126
var x127 [10<<20]byte; z = x127
var x128 [10<<20]byte; z = x128
var x129 [10<<20]byte; z = x129
var x130 [10<<20]byte; z = x130
var x131 [10<<20]byte; z = x131
var x132 [10<<20]byte; z = x132
var x133 [10<<20]byte; z = x133
var x134 [10<<20]byte; z = x134
var x135 [10<<20]byte; z = x135
var x136 [10<<20]byte; z = x136
var x137 [10<<20]byte; z = x137
var x138 [10<<20]byte; z = x138
var x139 [10<<20]byte; z = x139
var x140 [10<<20]byte; z = x140
var x141 [10<<20]byte; z = x141
var x142 [10<<20]byte; z = x142
var x143 [10<<20]byte; z = x143
var x144 [10<<20]byte; z = x144
var x145 [10<<20]byte; z = x145
var x146 [10<<20]byte; z = x146
var x147 [10<<20]byte; z = x147
var x148 [10<<20]byte; z = x148
var x149 [10<<20]byte; z = x149
var x150 [10<<20]byte; z = x150
var x151 [10<<20]byte; z = x151
var x152 [10<<20]byte; z = x152
var x153 [10<<20]byte; z = x153
var x154 [10<<20]byte; z = x154
var x155 [10<<20]byte; z = x155
var x156 [10<<20]byte; z = x156
var x157 [10<<20]byte; z = x157
var x158 [10<<20]byte; z = x158
var x159 [10<<20]byte; z = x159
var x160 [10<<20]byte; z = x160
var x161 [10<<20]byte; z = x161
var x162 [10<<20]byte; z = x162
var x163 [10<<20]byte; z = x163
var x164 [10<<20]byte; z = x164
var x165 [10<<20]byte; z = x165
var x166 [10<<20]byte; z = x166
var x167 [10<<20]byte; z = x167
var x168 [10<<20]byte; z = x168
var x169 [10<<20]byte; z = x169
var x170 [10<<20]byte; z = x170
var x171 [10<<20]byte; z = x171
var x172 [10<<20]byte; z = x172
var x173 [10<<20]byte; z = x173
var x174 [10<<20]byte; z = x174
var x175 [10<<20]byte; z = x175
var x176 [10<<20]byte; z = x176
var x177 [10<<20]byte; z = x177
var x178 [10<<20]byte; z = x178
var x179 [10<<20]byte; z = x179
var x180 [10<<20]byte; z = x180
var x181 [10<<20]byte; z = x181
var x182 [10<<20]byte; z = x182
var x183 [10<<20]byte; z = x183
var x184 [10<<20]byte; z = x184
var x185 [10<<20]byte; z = x185
var x186 [10<<20]byte; z = x186
var x187 [10<<20]byte; z = x187
var x188 [10<<20]byte; z = x188
var x189 [10<<20]byte; z = x189
var x190 [10<<20]byte; z = x190
var x191 [10<<20]byte; z = x191
var x192 [10<<20]byte; z = x192
var x193 [10<<20]byte; z = x193
var x194 [10<<20]byte; z = x194
var x195 [10<<20]byte; z = x195
var x196 [10<<20]byte; z = x196
var x197 [10<<20]byte; z = x197
var x198 [10<<20]byte; z = x198
var x199 [10<<20]byte; z = x199
var x200 [10<<20]byte; z = x200
var x201 [10<<20]byte; z = x201
var x202 [10<<20]byte; z = x202
var x203 [10<<20]byte; z = x203
var x204 [10<<20]byte; z = x204
var x205 [10<<20]byte; z = x205
var x206 [10<<20]byte; z = x206
// seq 1 206 | sed 's/.*/ var x& [10<<20]byte/'
// seq 1 206 | sed 's/.*/ z = x&/'
var x1 [10<<20]byte
var x2 [10<<20]byte
var x3 [10<<20]byte
var x4 [10<<20]byte
var x5 [10<<20]byte
var x6 [10<<20]byte
var x7 [10<<20]byte
var x8 [10<<20]byte
var x9 [10<<20]byte
var x10 [10<<20]byte
var x11 [10<<20]byte
var x12 [10<<20]byte
var x13 [10<<20]byte
var x14 [10<<20]byte
var x15 [10<<20]byte
var x16 [10<<20]byte
var x17 [10<<20]byte
var x18 [10<<20]byte
var x19 [10<<20]byte
var x20 [10<<20]byte
var x21 [10<<20]byte
var x22 [10<<20]byte
var x23 [10<<20]byte
var x24 [10<<20]byte
var x25 [10<<20]byte
var x26 [10<<20]byte
var x27 [10<<20]byte
var x28 [10<<20]byte
var x29 [10<<20]byte
var x30 [10<<20]byte
var x31 [10<<20]byte
var x32 [10<<20]byte
var x33 [10<<20]byte
var x34 [10<<20]byte
var x35 [10<<20]byte
var x36 [10<<20]byte
var x37 [10<<20]byte
var x38 [10<<20]byte
var x39 [10<<20]byte
var x40 [10<<20]byte
var x41 [10<<20]byte
var x42 [10<<20]byte
var x43 [10<<20]byte
var x44 [10<<20]byte
var x45 [10<<20]byte
var x46 [10<<20]byte
var x47 [10<<20]byte
var x48 [10<<20]byte
var x49 [10<<20]byte
var x50 [10<<20]byte
var x51 [10<<20]byte
var x52 [10<<20]byte
var x53 [10<<20]byte
var x54 [10<<20]byte
var x55 [10<<20]byte
var x56 [10<<20]byte
var x57 [10<<20]byte
var x58 [10<<20]byte
var x59 [10<<20]byte
var x60 [10<<20]byte
var x61 [10<<20]byte
var x62 [10<<20]byte
var x63 [10<<20]byte
var x64 [10<<20]byte
var x65 [10<<20]byte
var x66 [10<<20]byte
var x67 [10<<20]byte
var x68 [10<<20]byte
var x69 [10<<20]byte
var x70 [10<<20]byte
var x71 [10<<20]byte
var x72 [10<<20]byte
var x73 [10<<20]byte
var x74 [10<<20]byte
var x75 [10<<20]byte
var x76 [10<<20]byte
var x77 [10<<20]byte
var x78 [10<<20]byte
var x79 [10<<20]byte
var x80 [10<<20]byte
var x81 [10<<20]byte
var x82 [10<<20]byte
var x83 [10<<20]byte
var x84 [10<<20]byte
var x85 [10<<20]byte
var x86 [10<<20]byte
var x87 [10<<20]byte
var x88 [10<<20]byte
var x89 [10<<20]byte
var x90 [10<<20]byte
var x91 [10<<20]byte
var x92 [10<<20]byte
var x93 [10<<20]byte
var x94 [10<<20]byte
var x95 [10<<20]byte
var x96 [10<<20]byte
var x97 [10<<20]byte
var x98 [10<<20]byte
var x99 [10<<20]byte
var x100 [10<<20]byte
var x101 [10<<20]byte
var x102 [10<<20]byte
var x103 [10<<20]byte
var x104 [10<<20]byte
var x105 [10<<20]byte
var x106 [10<<20]byte
var x107 [10<<20]byte
var x108 [10<<20]byte
var x109 [10<<20]byte
var x110 [10<<20]byte
var x111 [10<<20]byte
var x112 [10<<20]byte
var x113 [10<<20]byte
var x114 [10<<20]byte
var x115 [10<<20]byte
var x116 [10<<20]byte
var x117 [10<<20]byte
var x118 [10<<20]byte
var x119 [10<<20]byte
var x120 [10<<20]byte
var x121 [10<<20]byte
var x122 [10<<20]byte
var x123 [10<<20]byte
var x124 [10<<20]byte
var x125 [10<<20]byte
var x126 [10<<20]byte
var x127 [10<<20]byte
var x128 [10<<20]byte
var x129 [10<<20]byte
var x130 [10<<20]byte
var x131 [10<<20]byte
var x132 [10<<20]byte
var x133 [10<<20]byte
var x134 [10<<20]byte
var x135 [10<<20]byte
var x136 [10<<20]byte
var x137 [10<<20]byte
var x138 [10<<20]byte
var x139 [10<<20]byte
var x140 [10<<20]byte
var x141 [10<<20]byte
var x142 [10<<20]byte
var x143 [10<<20]byte
var x144 [10<<20]byte
var x145 [10<<20]byte
var x146 [10<<20]byte
var x147 [10<<20]byte
var x148 [10<<20]byte
var x149 [10<<20]byte
var x150 [10<<20]byte
var x151 [10<<20]byte
var x152 [10<<20]byte
var x153 [10<<20]byte
var x154 [10<<20]byte
var x155 [10<<20]byte
var x156 [10<<20]byte
var x157 [10<<20]byte
var x158 [10<<20]byte
var x159 [10<<20]byte
var x160 [10<<20]byte
var x161 [10<<20]byte
var x162 [10<<20]byte
var x163 [10<<20]byte
var x164 [10<<20]byte
var x165 [10<<20]byte
var x166 [10<<20]byte
var x167 [10<<20]byte
var x168 [10<<20]byte
var x169 [10<<20]byte
var x170 [10<<20]byte
var x171 [10<<20]byte
var x172 [10<<20]byte
var x173 [10<<20]byte
var x174 [10<<20]byte
var x175 [10<<20]byte
var x176 [10<<20]byte
var x177 [10<<20]byte
var x178 [10<<20]byte
var x179 [10<<20]byte
var x180 [10<<20]byte
var x181 [10<<20]byte
var x182 [10<<20]byte
var x183 [10<<20]byte
var x184 [10<<20]byte
var x185 [10<<20]byte
var x186 [10<<20]byte
var x187 [10<<20]byte
var x188 [10<<20]byte
var x189 [10<<20]byte
var x190 [10<<20]byte
var x191 [10<<20]byte
var x192 [10<<20]byte
var x193 [10<<20]byte
var x194 [10<<20]byte
var x195 [10<<20]byte
var x196 [10<<20]byte
var x197 [10<<20]byte
var x198 [10<<20]byte
var x199 [10<<20]byte
var x200 [10<<20]byte
var x201 [10<<20]byte
var x202 [10<<20]byte
var x203 [10<<20]byte
var x204 [10<<20]byte
var x205 [10<<20]byte
var x206 [10<<20]byte
var x207 [10<<20]byte
z = x1
z = x2
z = x3
z = x4
z = x5
z = x6
z = x7
z = x8
z = x9
z = x10
z = x11
z = x12
z = x13
z = x14
z = x15
z = x16
z = x17
z = x18
z = x19
z = x20
z = x21
z = x22
z = x23
z = x24
z = x25
z = x26
z = x27
z = x28
z = x29
z = x30
z = x31
z = x32
z = x33
z = x34
z = x35
z = x36
z = x37
z = x38
z = x39
z = x40
z = x41
z = x42
z = x43
z = x44
z = x45
z = x46
z = x47
z = x48
z = x49
z = x50
z = x51
z = x52
z = x53
z = x54
z = x55
z = x56
z = x57
z = x58
z = x59
z = x60
z = x61
z = x62
z = x63
z = x64
z = x65
z = x66
z = x67
z = x68
z = x69
z = x70
z = x71
z = x72
z = x73
z = x74
z = x75
z = x76
z = x77
z = x78
z = x79
z = x80
z = x81
z = x82
z = x83
z = x84
z = x85
z = x86
z = x87
z = x88
z = x89
z = x90
z = x91
z = x92
z = x93
z = x94
z = x95
z = x96
z = x97
z = x98
z = x99
z = x100
z = x101
z = x102
z = x103
z = x104
z = x105
z = x106
z = x107
z = x108
z = x109
z = x110
z = x111
z = x112
z = x113
z = x114
z = x115
z = x116
z = x117
z = x118
z = x119
z = x120
z = x121
z = x122
z = x123
z = x124
z = x125
z = x126
z = x127
z = x128
z = x129
z = x130
z = x131
z = x132
z = x133
z = x134
z = x135
z = x136
z = x137
z = x138
z = x139
z = x140
z = x141
z = x142
z = x143
z = x144
z = x145
z = x146
z = x147
z = x148
z = x149
z = x150
z = x151
z = x152
z = x153
z = x154
z = x155
z = x156
z = x157
z = x158
z = x159
z = x160
z = x161
z = x162
z = x163
z = x164
z = x165
z = x166
z = x167
z = x168
z = x169
z = x170
z = x171
z = x172
z = x173
z = x174
z = x175
z = x176
z = x177
z = x178
z = x179
z = x180
z = x181
z = x182
z = x183
z = x184
z = x185
z = x186
z = x187
z = x188
z = x189
z = x190
z = x191
z = x192
z = x193
z = x194
z = x195
z = x196
z = x197
z = x198
z = x199
z = x200
z = x201
z = x202
z = x203
z = x204
z = x205
z = x206
z = x207
}