diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go index 4790e3383a..3dae5fbf07 100644 --- a/src/cmd/compile/internal/ssa/dom.go +++ b/src/cmd/compile/internal/ssa/dom.go @@ -296,7 +296,8 @@ func dominatorsSimple(f *Func) []*Block { // intersect finds the closest dominator of both b and c. // It requires a postorder numbering of all the blocks. func intersect(b, c *Block, postnum []int, idom []*Block) *Block { - // TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*. + // TODO: This loop is O(n^2). It used to be used in nilcheck, + // see BenchmarkNilCheckDeep*. for b != c { if postnum[b.ID] < postnum[c.ID] { b = idom[b.ID] diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index b005876096..5dc352e991 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -104,6 +104,31 @@ func (f *Func) newValue(op Op, t Type, b *Block, pos src.XPos) *Value { return v } +// newValueNoBlock allocates a new Value with the given fields. +// The returned value is not placed in any block. Once the caller +// decides on a block b, it must set b.Block and append +// the returned value to b.Values. +func (f *Func) newValueNoBlock(op Op, t Type, pos src.XPos) *Value { + var v *Value + if f.freeValues != nil { + v = f.freeValues + f.freeValues = v.argstorage[0] + v.argstorage[0] = nil + } else { + ID := f.vid.get() + if int(ID) < len(f.Config.values) { + v = &f.Config.values[ID] + } else { + v = &Value{ID: ID} + } + } + v.Op = op + v.Type = t + v.Block = nil // caller must fix this. + v.Pos = pos + return v +} + // logPassStat writes a string key and int value as a warning in a // tab-separated format easily handled by spreadsheets or awk. // file names, lines, and function names are included to provide enough (?) diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index e40fe17ad4..95f2f7c91a 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -26,28 +26,48 @@ // Spilling // -// For every value, we generate a spill immediately after the value itself. -// x = Op y z : AX -// x2 = StoreReg x -// While AX still holds x, any uses of x will use that value. When AX is needed -// for another value, we simply reuse AX. Spill code has already been generated -// so there is no code generated at "spill" time. When x is referenced -// subsequently, we issue a load to restore x to a register using x2 as -// its argument: -// x3 = Restore x2 : CX -// x3 can then be used wherever x is referenced again. -// If the spill (x2) is never used, it will be removed at the end of regalloc. +// During the normal course of the allocator, we might throw a still-live +// value out of all registers. When that value is subsequently used, we must +// load it from a slot on the stack. We must also issue an instruction to +// initialize that stack location with a copy of v. +// +// pre-regalloc: +// (1) v = Op ... +// (2) x = Op ... +// (3) ... = Op v ... +// +// post-regalloc: +// (1) v = Op ... : AX // computes v, store result in AX +// s = StoreReg v // spill v to a stack slot +// (2) x = Op ... : AX // some other op uses AX +// c = LoadReg s : CX // restore v from stack slot +// (3) ... = Op c ... // use the restored value +// +// Allocation occurs normally until we reach (3) and we realize we have +// a use of v and it isn't in any register. At that point, we allocate +// a spill (a StoreReg) for v. We can't determine the correct place for +// the spill at this point, so we allocate the spill as blockless initially. +// The restore is then generated to load v back into a register so it can +// be used. Subsequent uses of v will use the restored value c instead. +// +// What remains is the question of where to schedule the spill. +// During allocation, we keep track of the dominator of all restores of v. +// The spill of v must dominate that block. The spill must also be issued at +// a point where v is still in a register. +// +// To find the right place, start at b, the block which dominates all restores. +// - If b is v.Block, then issue the spill right after v. +// It is known to be in a register at that point, and dominates any restores. +// - Otherwise, if v is in a register at the start of b, +// put the spill of v at the start of b. +// - Otherwise, set b = immediate dominator of b, and repeat. // // Phi values are special, as always. We define two kinds of phis, those // where the merge happens in a register (a "register" phi) and those where // the merge happens in a stack location (a "stack" phi). // // A register phi must have the phi and all of its inputs allocated to the -// same register. Register phis are spilled similarly to regular ops: -// b1: y = ... : AX b2: z = ... : AX -// goto b3 goto b3 -// b3: x = phi(y, z) : AX -// x2 = StoreReg x +// same register. Register phis are spilled similarly to regular ops. // // A stack phi must have the phi and all of its inputs allocated to the same // stack location. Stack phis start out life already spilled - each phi @@ -91,18 +111,6 @@ // will have no use (so don't run deadcode after regalloc!). // TODO: maybe we should introduce these extra phis? -// Additional not-quite-SSA output occurs when spills are sunk out -// of loops to the targets of exit edges from the loop. Before sinking, -// there is one spill site (one StoreReg) targeting stack slot X, after -// sinking there may be multiple spill sites targeting stack slot X, -// with no phi functions at any join points reachable by the multiple -// spill sites. In addition, uses of the spill from copies of the original -// will not name the copy in their reference; instead they will name -// the original, though both will have the same spill location. The -// first sunk spill will be the original, but moved, to an exit block, -// thus ensuring that there is a definition somewhere corresponding to -// the original spill's uses. - package ssa import ( @@ -186,14 +194,15 @@ type use struct { next *use // linked list of uses of a value in nondecreasing dist order } +// A valState records the register allocation state for a (pre-regalloc) value. type valState struct { regs regMask // the set of registers holding a Value (usually just one) uses *use // list of uses in this block - spill *Value // spilled copy of the Value - spillUsed bool - spillUsedShuffle bool // true if used in shuffling, after ordinary uses - needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() - rematerializeable bool // cached value of v.rematerializeable() + spill *Value // spilled copy of the Value (if any) + restoreMin int32 // minimum of all restores' blocks' sdom.entry + restoreMax int32 // maximum of all restores' blocks' sdom.exit + needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() + rematerializeable bool // cached value of v.rematerializeable() } type regState struct { @@ -205,6 +214,7 @@ type regState struct { type regAllocState struct { f *Func + sdom SparseTree registers []Register numRegs register SPReg register @@ -271,15 +281,6 @@ type regAllocState struct { loopnest *loopnest } -type spillToSink struct { - spill *Value // Spill instruction to move (a StoreReg) - dests int32 // Bitmask indicating exit blocks from loop in which spill/val is defined. 1< logSpills { - s.f.Config.Warnl(vi.spill.Pos, "load spill for %v from %v", v, vi.spill) - } - c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, vi.spill) - vi.spillUsed = true - default: - s.f.Fatalf("attempt to load unspilled value %v", v.LongString()) + spill := s.makeSpill(v, s.curBlock) + if s.f.pass.debug > logSpills { + s.f.Config.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill) } + c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill) } s.setOrig(c, v) s.assignReg(r, v, c) @@ -615,6 +634,7 @@ func (s *regAllocState) init(f *Func) { s.endRegs = make([][]endReg, f.NumBlocks()) s.startRegs = make([][]startReg, f.NumBlocks()) s.spillLive = make([][]ID, f.NumBlocks()) + s.sdom = f.sdom() } // Adds a use record for id at distance dist from the start of the block. @@ -688,22 +708,6 @@ func (s *regAllocState) compatRegs(t Type) regMask { return m & s.allocatable } -// loopForBlock returns the loop containing block b, -// provided that the loop is "interesting" for purposes -// of improving register allocation (= is inner, and does -// not contain a call) -func (s *regAllocState) loopForBlock(b *Block) *loop { - loop := s.loopnest.b2l[b.ID] - - // Minor for-the-time-being optimization: nothing happens - // unless a loop is both inner and call-free, therefore - // don't bother with other loops. - if loop != nil && (loop.containsCall || !loop.isInner) { - loop = nil - } - return loop -} - func (s *regAllocState) regalloc(f *Func) { liveSet := f.newSparseSet(f.NumValues()) defer f.retSparseSet(liveSet) @@ -712,14 +716,6 @@ func (s *regAllocState) regalloc(f *Func) { var phiRegs []register var args []*Value - // statistics - var nSpills int // # of spills remaining - var nSpillsInner int // # of spills remaining in inner loops - var nSpillsSunk int // # of sunk spills remaining - var nSpillsChanged int // # of sunk spills lost because of register use change - var nSpillsSunkUnused int // # of spills not sunk because they were removed completely - var nSpillsNotSunkLateUse int // # of spills not sunk because of very late use (in shuffle) - // Data structure used for computing desired registers. var desired desiredState @@ -734,24 +730,8 @@ func (s *regAllocState) regalloc(f *Func) { f.Fatalf("entry block must be first") } - // Get loop nest so that spills in inner loops can be - // tracked. When the last block of a loop is processed, - // attempt to move spills out of the loop. - s.loopnest.findExits() - - // Spills are moved from one block's slice of values to another's. - // This confuses register allocation if it occurs before it is - // complete, so candidates are recorded, then rechecked and - // moved after all allocation (register and stack) is complete. - // Because movement is only within a stack slot's lifetime, it - // is safe to do this. - var toSink []spillToSink - // Will be used to figure out live inputs to exit blocks of inner loops. - entryCandidates := newSparseMap(f.NumValues()) - for _, b := range f.Blocks { s.curBlock = b - loop := s.loopForBlock(b) // Initialize liveSet and uses fields for this block. // Walk backwards through the block doing liveness analysis. @@ -945,22 +925,11 @@ func (s *regAllocState) regalloc(f *Func) { if r == noRegister { // stack-based phi // Spills will be inserted in all the predecessors below. - s.values[v.ID].spill = v // v starts life spilled - s.values[v.ID].spillUsed = true // use is guaranteed + s.values[v.ID].spill = v // v starts life spilled continue } // register-based phi s.assignReg(r, v, v) - // Spill the phi in case we need to restore it later. - spill := b.NewValue1(v.Pos, OpStoreReg, v.Type, v) - s.setOrig(spill, v) - s.values[v.ID].spill = spill - s.values[v.ID].spillUsed = false - if loop != nil { - loop.spills = append(loop.spills, v) - nSpillsInner++ - } - nSpills++ } // Save the starting state for use by merge edges. @@ -975,14 +944,14 @@ func (s *regAllocState) regalloc(f *Func) { // specially during merge edge processing. continue } - regList = append(regList, startReg{r, v.ID, s.values[v.ID].uses.pos}) + regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos}) } s.startRegs[b.ID] = regList if s.f.pass.debug > regDebug { fmt.Printf("after phis\n") for _, x := range s.startRegs[b.ID] { - fmt.Printf(" %s: v%d\n", s.registers[x.r].Name(), x.vid) + fmt.Printf(" %s: v%d\n", s.registers[x.r].Name(), x.v.ID) } } } @@ -1005,7 +974,7 @@ func (s *regAllocState) regalloc(f *Func) { succ := e.b // TODO: prioritize likely successor? for _, x := range s.startRegs[succ.ID] { - desired.add(x.vid, x.r) + desired.add(x.v.ID, x.r) } // Process phi ops in succ. pidx := e.i @@ -1100,7 +1069,6 @@ func (s *regAllocState) regalloc(f *Func) { // any register here. We just set up the spill pointer to // point at itself and any later user will restore it to use it. s.values[v.ID].spill = v - s.values[v.ID].spillUsed = true // use is guaranteed b.Values = append(b.Values, v) s.advanceUses(v) continue @@ -1109,7 +1077,7 @@ func (s *regAllocState) regalloc(f *Func) { // Make sure the argument to v is still live here. s.advanceUses(v) vi := &s.values[v.Args[0].ID] - if vi.spillUsed { + if vi.spill != nil { // Use the spill location. v.SetArg(0, vi.spill) } else { @@ -1373,28 +1341,7 @@ func (s *regAllocState) regalloc(f *Func) { } b.Values = append(b.Values, v) - // Issue a spill for this value. We issue spills unconditionally, - // then at the end of regalloc delete the ones we never use. - // TODO: schedule the spill at a point that dominates all restores. - // The restore may be off in an unlikely branch somewhere and it - // would be better to have the spill in that unlikely branch as well. - // v := ... - // if unlikely { - // f() - // } - // It would be good to have both spill and restore inside the IF. issueSpill: - if s.values[v.ID].needReg { - spill := b.NewValue1(v.Pos, OpStoreReg, v.Type, v) - s.setOrig(spill, v) - s.values[v.ID].spill = spill - s.values[v.ID].spillUsed = false - if loop != nil { - loop.spills = append(loop.spills, v) - nSpillsInner++ - } - nSpills++ - } } // Load control value into reg. @@ -1504,91 +1451,24 @@ func (s *regAllocState) regalloc(f *Func) { } // If a value is live at the end of the block and - // isn't in a register, remember that its spill location - // is live. We need to remember this information so that + // isn't in a register, generate a use for the spill location. + // We need to remember this information so that // the liveness analysis in stackalloc is correct. for _, e := range s.live[b.ID] { - if s.values[e.ID].regs != 0 { + vi := &s.values[e.ID] + if vi.regs != 0 { // in a register, we'll use that source for the merge. continue } - spill := s.values[e.ID].spill - if spill == nil { - // rematerializeable values will have spill==nil. + if vi.rematerializeable { + // we'll rematerialize during the merge. continue } + //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b) + spill := s.makeSpill(s.orig[e.ID], b) s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) - s.values[e.ID].spillUsed = true } - // Keep track of values that are spilled in the loop, but whose spill - // is not used in the loop. It may be possible to move ("sink") the - // spill out of the loop into one or more exit blocks. - if loop != nil { - loop.scratch++ // increment count of blocks in this loop that have been processed - if loop.scratch == loop.nBlocks { // just processed last block of loop, if it is an inner loop. - // This check is redundant with code at the top of the loop. - // This is definitive; the one at the top of the loop is an optimization. - if loop.isInner && // Common case, easier, most likely to be profitable - !loop.containsCall && // Calls force spills, also lead to puzzling spill info. - len(loop.exits) <= 32 { // Almost no inner loops have more than 32 exits, - // and this allows use of a bitvector and a sparseMap. - - // TODO: exit calculation is messed up for non-inner loops - // because of multilevel exits that are not part of the "exit" - // count. - - // Compute the set of spill-movement candidates live at entry to exit blocks. - // isLoopSpillCandidate filters for - // (1) defined in appropriate loop - // (2) needs a register - // (3) spill not already used (in the loop) - // Condition (3) === "in a register at all loop exits" - - entryCandidates.clear() - - for whichExit, ss := range loop.exits { - // Start with live at end. - for _, li := range s.live[ss.ID] { - if s.isLoopSpillCandidate(loop, s.orig[li.ID]) { - // s.live contains original IDs, use s.orig above to map back to *Value - entryCandidates.setBit(li.ID, uint(whichExit)) - } - } - // Control can also be live. - if ss.Control != nil && s.orig[ss.Control.ID] != nil && s.isLoopSpillCandidate(loop, s.orig[ss.Control.ID]) { - entryCandidates.setBit(s.orig[ss.Control.ID].ID, uint(whichExit)) - } - // Walk backwards, filling in locally live values, removing those defined. - for i := len(ss.Values) - 1; i >= 0; i-- { - v := ss.Values[i] - vorig := s.orig[v.ID] - if vorig != nil { - entryCandidates.remove(vorig.ID) // Cannot be an issue, only keeps the sets smaller. - } - for _, a := range v.Args { - aorig := s.orig[a.ID] - if aorig != nil && s.isLoopSpillCandidate(loop, aorig) { - entryCandidates.setBit(aorig.ID, uint(whichExit)) - } - } - } - } - - for _, e := range loop.spills { - whichblocks := entryCandidates.get(e.ID) - oldSpill := s.values[e.ID].spill - if whichblocks != 0 && whichblocks != -1 { // -1 = not in map. - toSink = append(toSink, spillToSink{spill: oldSpill, dests: whichblocks}) - } - } - - } // loop is inner etc - loop.scratch = 0 // Don't leave a mess, just in case. - loop.spills = nil - } // if scratch == nBlocks - } // if loop is not nil - // Clear any final uses. // All that is left should be the pseudo-uses added for values which // are live at the end of b. @@ -1606,52 +1486,8 @@ func (s *regAllocState) regalloc(f *Func) { } } - // Erase any spills we never used - for i := range s.values { - vi := s.values[i] - if vi.spillUsed { - if s.f.pass.debug > logSpills && vi.spill.Op != OpArg { - s.f.Config.Warnl(vi.spill.Pos, "spilled value at %v remains", vi.spill) - } - continue - } - spill := vi.spill - if spill == nil { - // Constants, SP, SB, ... - continue - } - loop := s.loopForBlock(spill.Block) - if loop != nil { - nSpillsInner-- - } - - spill.Args[0].Uses-- - f.freeValue(spill) - nSpills-- - } - - for _, b := range f.Blocks { - i := 0 - for _, v := range b.Values { - if v.Op == OpInvalid { - continue - } - b.Values[i] = v - i++ - } - b.Values = b.Values[:i] - // TODO: zero b.Values[i:], recycle Values - // Not important now because this is the last phase that manipulates Values - } - - // Must clear these out before any potential recycling, though that's - // not currently implemented. - for i, ts := range toSink { - vsp := ts.spill - if vsp.Op == OpInvalid { // This spill was completely eliminated - toSink[i].spill = nil - } - } + // Decide where the spills we generated will go. + s.placeSpills() // Anything that didn't get a register gets a stack location here. // (StoreReg, stack-based phis, inputs, ...) @@ -1660,114 +1496,6 @@ func (s *regAllocState) regalloc(f *Func) { // Fix up all merge edges. s.shuffle(stacklive) - // Insert moved spills (that have not been marked invalid above) - // at start of appropriate block and remove the originals from their - // location within loops. Notice that this can break SSA form; - // if a spill is sunk to multiple exits, there will be no phi for that - // spill at a join point downstream of those two exits, though the - // two spills will target the same stack slot. Notice also that this - // takes place after stack allocation, so the stack allocator does - // not need to process these malformed flow graphs. -sinking: - for _, ts := range toSink { - vsp := ts.spill - if vsp == nil { // This spill was completely eliminated - nSpillsSunkUnused++ - continue sinking - } - e := ts.spilledValue() - if s.values[e.ID].spillUsedShuffle { - nSpillsNotSunkLateUse++ - continue sinking - } - - // move spills to a better (outside of loop) block. - // This would be costly if it occurred very often, but it doesn't. - b := vsp.Block - loop := s.loopnest.b2l[b.ID] - dests := ts.dests - - // Pre-check to be sure that spilled value is still in expected register on all exits where live. - check_val_still_in_reg: - for i := uint(0); i < 32 && dests != 0; i++ { - - if dests&(1< 1 { - panic("Should be impossible given critical edges removed") - } - p := d.Preds[0].b // block in loop exiting to d. - - endregs := s.endRegs[p.ID] - for _, regrec := range endregs { - if regrec.v == e && regrec.r != noRegister && regrec.c == e { // TODO: regrec.c != e implies different spill possible. - continue check_val_still_in_reg - } - } - // If here, the register assignment was lost down at least one exit and it can't be sunk - if s.f.pass.debug > moveSpills { - s.f.Config.Warnl(e.Pos, "lost register assignment for spill %v in %v at exit %v to %v", - vsp, b, p, d) - } - nSpillsChanged++ - continue sinking - } - - nSpillsSunk++ - nSpillsInner-- - // don't update nSpills, since spill is only moved, and if it is duplicated, the spills-on-a-path is not increased. - - dests = ts.dests - - // remove vsp from b.Values - i := 0 - for _, w := range b.Values { - if vsp == w { - continue - } - b.Values[i] = w - i++ - } - b.Values = b.Values[:i] - - first := true - for i := uint(0); i < 32 && dests != 0; i++ { - - if dests&(1< moveSpills { - s.f.Config.Warnl(e.Pos, "copied spill %v in %v for %v to %v in %v", - vsp, b, e, vspnew, d) - } - } else { - first = false - vspnew.Block = d - d.Values = append(d.Values, vspnew) - if s.f.pass.debug > moveSpills { - s.f.Config.Warnl(e.Pos, "moved spill %v in %v for %v to %v in %v", - vsp, b, e, vspnew, d) - } - } - - // shuffle vspnew to the beginning of its block - copy(d.Values[1:], d.Values[0:len(d.Values)-1]) - d.Values[0] = vspnew - - } - } - // Erase any copies we never used. // Also, an unused copy might be the only use of another copy, // so continue erasing until we reach a fixed point. @@ -1800,38 +1528,143 @@ sinking: } b.Values = b.Values[:i] } +} - if f.pass.stats > 0 { - f.LogStat("spills_info", - nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed") +func (s *regAllocState) placeSpills() { + f := s.f + + // Precompute some useful info. + phiRegs := make([]regMask, f.NumBlocks()) + for _, b := range f.Blocks { + var m regMask + for _, v := range b.Values { + if v.Op != OpPhi { + break + } + if r, ok := f.getHome(v.ID).(*Register); ok { + m |= regMask(1) << uint(r.num) + } + } + phiRegs[b.ID] = m } -} -// isLoopSpillCandidate indicates whether the spill for v satisfies preliminary -// spill-sinking conditions just after the last block of loop has been processed. -// In particular: -// v needs a register. -// v's spill is not (YET) used. -// v's definition is within loop. -// The spill may be used in the future, either by an outright use -// in the code, or by shuffling code inserted after stack allocation. -// Outright uses cause sinking; shuffling (within the loop) inhibits it. -func (s *regAllocState) isLoopSpillCandidate(loop *loop, v *Value) bool { - return s.values[v.ID].needReg && !s.values[v.ID].spillUsed && s.loopnest.b2l[v.Block.ID] == loop -} + // Start maps block IDs to the list of spills + // that go at the start of the block (but after any phis). + start := map[ID][]*Value{} + // After maps value IDs to the list of spills + // that go immediately after that value ID. + after := map[ID][]*Value{} -// lateSpillUse notes a late (after stack allocation) use of the spill of value with ID vid. -// This will inhibit spill sinking. -func (s *regAllocState) lateSpillUse(vid ID) { - // TODO investigate why this is necessary. - // It appears that an outside-the-loop use of - // an otherwise sinkable spill makes the spill - // a candidate for shuffling, when it would not - // otherwise have been the case (spillUsed was not - // true when isLoopSpillCandidate was called, yet - // it was shuffled). Such shuffling cuts the amount - // of spill sinking by more than half (in make.bash) - s.values[vid].spillUsedShuffle = true + for i := range s.values { + vi := s.values[i] + spill := vi.spill + if spill == nil { + continue + } + if spill.Block != nil { + // Some spills are already fully set up, + // like OpArgs and stack-based phis. + continue + } + v := s.orig[i] + + // Walk down the dominator tree looking for a good place to + // put the spill of v. At the start "best" is the best place + // we have found so far. + // TODO: find a way to make this O(1) without arbitrary cutoffs. + best := v.Block + bestArg := v + var bestDepth int16 + if l := s.loopnest.b2l[best.ID]; l != nil { + bestDepth = l.depth + } + b := best + const maxSpillSearch = 100 + for i := 0; i < maxSpillSearch; i++ { + // Find the child of b in the dominator tree which + // dominates all restores. + p := b + b = nil + for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 { + if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax { + // c also dominates all restores. Walk down into c. + b = c + break + } + } + if b == nil { + // Ran out of blocks which dominate all restores. + break + } + + var depth int16 + if l := s.loopnest.b2l[b.ID]; l != nil { + depth = l.depth + } + if depth > bestDepth { + // Don't push the spill into a deeper loop. + continue + } + + // If v is in a register at the start of b, we can + // place the spill here (after the phis). + if len(b.Preds) == 1 { + for _, e := range s.endRegs[b.Preds[0].b.ID] { + if e.v == v { + // Found a better spot for the spill. + best = b + bestArg = e.c + bestDepth = depth + break + } + } + } else { + for _, e := range s.startRegs[b.ID] { + if e.v == v { + // Found a better spot for the spill. + best = b + bestArg = e.c + bestDepth = depth + break + } + } + } + } + + // Put the spill in the best block we found. + spill.Block = best + spill.AddArg(bestArg) + if best == v.Block && v.Op != OpPhi { + // Place immediately after v. + after[v.ID] = append(after[v.ID], spill) + } else { + // Place at the start of best block. + start[best.ID] = append(start[best.ID], spill) + } + } + + // Insert spill instructions into the block schedules. + var oldSched []*Value + for _, b := range f.Blocks { + nphi := 0 + for _, v := range b.Values { + if v.Op != OpPhi { + break + } + nphi++ + } + oldSched = append(oldSched[:0], b.Values[nphi:]...) + b.Values = b.Values[:nphi] + for _, v := range start[b.ID] { + b.Values = append(b.Values, v) + } + for _, v := range oldSched { + b.Values = append(b.Values, v) + for _, w := range after[v.ID] { + b.Values = append(b.Values, w) + } + } + } } // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). @@ -1919,13 +1752,24 @@ func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive for _, spillID := range stacklive { v := e.s.orig[spillID] spill := e.s.values[v.ID].spill + if !e.s.sdom.isAncestorEq(spill.Block, e.p) { + // Spills were placed that only dominate the uses found + // during the first regalloc pass. The edge fixup code + // can't use a spill location if the spill doesn't dominate + // the edge. + // We are guaranteed that if the spill doesn't dominate this edge, + // then the value is available in a register (because we called + // makeSpill for every value not in a register at the start + // of an edge). + continue + } e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source } // Figure out all the destinations we need. dsts := e.destinations[:0] for _, x := range dstReg { - dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.vid, nil, x.pos}) + dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos}) } // Phis need their args to end up in a specific location. for _, v := range e.b.Values { @@ -2010,7 +1854,6 @@ func (e *edgeState) process() { if _, isReg := loc.(*Register); isReg { c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) } else { - e.s.lateSpillUse(vid) c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c) } e.set(r, vid, c, false, d.pos) @@ -2028,9 +1871,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP (*splice).Uses-- *splice = occupant.c occupant.c.Uses++ - if occupant.c.Op == OpStoreReg { - e.s.lateSpillUse(vid) - } } // Note: if splice==nil then c will appear dead. This is // non-SSA formed code, so be careful after this pass not to run @@ -2106,7 +1946,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP } } else { if dstReg { - e.s.lateSpillUse(vid) x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) } else { // mem->mem. Use temp register. @@ -2124,7 +1963,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP e.erase(loc) r := e.findRegFor(c.Type) - e.s.lateSpillUse(vid) t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) e.set(r, vid, t, false, pos) x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) @@ -2630,3 +2468,16 @@ func (d *desiredState) merge(x *desiredState) { d.addList(e.ID, e.regs) } } + +func min32(x, y int32) int32 { + if x < y { + return x + } + return y +} +func max32(x, y int32) int32 { + if x > y { + return x + } + return y +}