diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 14a67846dc..5f1ed83ad1 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1804,11 +1804,16 @@ // Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible. // This happens most commonly when B is an autotmp inserted earlier // during compilation to ensure correctness. -(Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) - && s1 == s2 +// Take care that overlapping moves are preserved. +// Restrict this optimization to the stack, to avoid duplicating loads from the heap; +// see CL 145208 for discussion. +(Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _)) && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) - -> (Move {t1} [s1] dst src midmem) + && isStackPtr(src) + && disjoint(src, s, tmp2, s) + && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + -> (Move {t1} [s] dst src midmem) // Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go). // However, this rule is needed to prevent the previous rule from looping forever in such cases. diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 17d7cb3414..7ddf215478 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -530,6 +530,13 @@ func isSamePtr(p1, p2 *Value) bool { return false } +func isStackPtr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr { + v = v.Args[0] + } + return v.Op == OpSP || v.Op == OpLocalAddr +} + // disjoint reports whether the memory region specified by [p1:p1+n1) // does not overlap with [p2:p2+n2). // A return value of false does not imply the regions overlap. diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 7869fec21f..22e28bed54 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -17234,6 +17234,8 @@ func rewriteValuegeneric_OpMove_10(v *Value) bool { func rewriteValuegeneric_OpMove_20(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr [o2] p2) d1 (Store {t3} (OffPtr [o3] p3) d2 (Store {t4} (OffPtr [o4] p4) d3 (Store {t5} (OffPtr [o5] p5) d4 (Zero {t6} [n] p6 _))))))) // cond: isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && alignof(t2) <= alignof(t1) && alignof(t3) <= alignof(t1) && alignof(t4) <= alignof(t1) && alignof(t5) <= alignof(t1) && alignof(t6) <= alignof(t1) && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + sizeof(t2) && n >= o3 + sizeof(t3) && n >= o4 + sizeof(t4) && n >= o5 + sizeof(t5) // result: (Store {t2} (OffPtr [o2] dst) d1 (Store {t3} (OffPtr [o3] dst) d2 (Store {t4} (OffPtr [o4] dst) d3 (Store {t5} (OffPtr [o5] dst) d4 (Zero {t1} [n] dst mem))))) @@ -17355,11 +17357,11 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { v.AddArg(v1) return true } - // match: (Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) - // cond: s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) - // result: (Move {t1} [s1] dst src midmem) + // match: (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _)) + // cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + // result: (Move {t1} [s] dst src midmem) for { - s1 := v.AuxInt + s := v.AuxInt t1 := v.Aux _ = v.Args[2] dst := v.Args[0] @@ -17368,16 +17370,18 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { if midmem.Op != OpMove { break } - s2 := midmem.AuxInt + if midmem.AuxInt != s { + break + } t2 := midmem.Aux _ = midmem.Args[2] tmp2 := midmem.Args[0] src := midmem.Args[1] - if !(s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2)) { + if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) { break } v.reset(OpMove) - v.AuxInt = s1 + v.AuxInt = s v.Aux = t1 v.AddArg(dst) v.AddArg(src)