diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 11e7002df4..e194f9c403 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpARM64MOVBloadidx, + ssa.OpARM64MOVBUloadidx, + ssa.OpARM64MOVHloadidx, + ssa.OpARM64MOVHUloadidx, + ssa.OpARM64MOVWloadidx, + ssa.OpARM64MOVWUloadidx, + ssa.OpARM64MOVDloadidx: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_NONE + p.From.Reg = v.Args[0].Reg() + p.From.Index = v.Args[1].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() case ssa.OpARM64LDAR, ssa.OpARM64LDARW: p := s.Prog(v.Op.Asm()) @@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) + case ssa.OpARM64MOVBstoreidx, + ssa.OpARM64MOVHstoreidx, + ssa.OpARM64MOVWstoreidx, + ssa.OpARM64MOVDstoreidx: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[2].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_NONE + p.To.Reg = v.Args[0].Reg() + p.To.Index = v.Args[1].Reg() case ssa.OpARM64STP: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REGREG @@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) + case ssa.OpARM64MOVBstorezeroidx, + ssa.OpARM64MOVHstorezeroidx, + ssa.OpARM64MOVWstorezeroidx, + ssa.OpARM64MOVDstorezeroidx: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = arm64.REGZERO + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_NONE + p.To.Reg = v.Args[0].Reg() + p.To.Index = v.Args[1].Reg() case ssa.OpARM64MOVQstorezero: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REGREG diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index edeadfd1d2..41417482e8 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -606,6 +606,29 @@ && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> (FMOVDload [off1+off2] {sym} ptr mem) +// register indexed load +(MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx ptr idx mem) +(MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx ptr idx mem) +(MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx ptr idx mem) +(MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx ptr idx mem) +(MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx ptr idx mem) +(MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBUloadidx ptr idx mem) +(MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBloadidx ptr idx mem) +(MOVDloadidx ptr (MOVDconst [c]) mem) -> (MOVDload [c] ptr mem) +(MOVDloadidx (MOVDconst [c]) ptr mem) -> (MOVDload [c] ptr mem) +(MOVWUloadidx ptr (MOVDconst [c]) mem) -> (MOVWUload [c] ptr mem) +(MOVWUloadidx (MOVDconst [c]) ptr mem) -> (MOVWUload [c] ptr mem) +(MOVWloadidx ptr (MOVDconst [c]) mem) -> (MOVWload [c] ptr mem) +(MOVWloadidx (MOVDconst [c]) ptr mem) -> (MOVWload [c] ptr mem) +(MOVHUloadidx ptr (MOVDconst [c]) mem) -> (MOVHUload [c] ptr mem) +(MOVHUloadidx (MOVDconst [c]) ptr mem) -> (MOVHUload [c] ptr mem) +(MOVHloadidx ptr (MOVDconst [c]) mem) -> (MOVHload [c] ptr mem) +(MOVHloadidx (MOVDconst [c]) ptr mem) -> (MOVHload [c] ptr mem) +(MOVBUloadidx ptr (MOVDconst [c]) mem) -> (MOVBUload [c] ptr mem) +(MOVBUloadidx (MOVDconst [c]) ptr mem) -> (MOVBUload [c] ptr mem) +(MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem) +(MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem) + (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> (MOVBstore [off1+off2] {sym} ptr val mem) @@ -643,6 +666,20 @@ && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> (MOVQstorezero [off1+off2] {sym} ptr mem) +// register indexed store +(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx ptr idx val mem) +(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx ptr idx val mem) +(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx ptr idx val mem) +(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVBstoreidx ptr idx val mem) +(MOVDstoreidx ptr (MOVDconst [c]) val mem) -> (MOVDstore [c] ptr val mem) +(MOVDstoreidx (MOVDconst [c]) idx val mem) -> (MOVDstore [c] idx val mem) +(MOVWstoreidx ptr (MOVDconst [c]) val mem) -> (MOVWstore [c] ptr val mem) +(MOVWstoreidx (MOVDconst [c]) idx val mem) -> (MOVWstore [c] idx val mem) +(MOVHstoreidx ptr (MOVDconst [c]) val mem) -> (MOVHstore [c] ptr val mem) +(MOVHstoreidx (MOVDconst [c]) idx val mem) -> (MOVHstore [c] idx val mem) +(MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem) +(MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem) + (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> @@ -736,6 +773,24 @@ (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem) (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem) +// register indexed store zero +(MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx ptr idx mem) +(MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx ptr idx mem) +(MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx ptr idx mem) +(MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBstorezeroidx ptr idx mem) +(MOVDstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx ptr idx mem) +(MOVWstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx ptr idx mem) +(MOVHstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx ptr idx mem) +(MOVBstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVBstorezeroidx ptr idx mem) +(MOVDstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c] ptr mem) +(MOVDstorezeroidx (MOVDconst [c]) idx mem) -> (MOVDstorezero [c] idx mem) +(MOVWstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c] ptr mem) +(MOVWstorezeroidx (MOVDconst [c]) idx mem) -> (MOVWstorezero [c] idx mem) +(MOVHstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c] ptr mem) +(MOVHstorezeroidx (MOVDconst [c]) idx mem) -> (MOVHstorezero [c] idx mem) +(MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem) +(MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem) + // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) @@ -756,6 +811,21 @@ (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0]) (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0]) +(MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) +(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _)) + && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0]) + // don't extend after proper load (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x) @@ -772,6 +842,21 @@ (MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x) +(MOVBreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x) +(MOVBUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x) +(MOVHreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x) +(MOVHreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x) +(MOVHreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x) +(MOVHUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x) +(MOVHUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x) +(MOVWreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x) +(MOVWreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x) +(MOVWreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x) +(MOVWreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x) +(MOVWreg x:(MOVWloadidx _ _ _)) -> (MOVDreg x) +(MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x) +(MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x) +(MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x) // fold double extensions (MOVBreg x:(MOVBreg _)) -> (MOVDreg x) @@ -803,6 +888,18 @@ (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) +(MOVBstoreidx ptr idx (MOVBreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVBUreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHUreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWUreg x) mem) -> (MOVBstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHreg x) mem) -> (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWreg x) mem) -> (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVDnop doesn't emit instruction, only for ensuring the type. @@ -1410,6 +1507,17 @@ && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) -> @mergePoint(b,x0,x1) (MOVHUload {s} (OffPtr [i0] p) mem) +(ORshiftLL [8] + y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) + y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 + && mergePoint(b,x0,x1) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x0) && clobber(x1) + && clobber(y0) && clobber(y1) + -> @mergePoint(b,x0,x1) (MOVHUloadidx ptr0 idx0 mem) // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit (ORshiftLL [24] o0:(ORshiftLL [16] @@ -1426,6 +1534,21 @@ && clobber(y1) && clobber(y2) && clobber(o0) -> @mergePoint(b,x0,x1,x2) (MOVWUload {s} (OffPtr [i0] p) mem) +(ORshiftLL [24] o0:(ORshiftLL [16] + x0:(MOVHUloadidx ptr0 idx0 mem) + y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) + y2:(MOVDnop x2:(MOVBUload [3] {s} p mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 + && y1.Uses == 1 && y2.Uses == 1 + && o0.Uses == 1 + && mergePoint(b,x0,x1,x2) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) + && clobber(y1) && clobber(y2) + && clobber(o0) + -> @mergePoint(b,x0,x1,x2) (MOVWUloadidx ptr0 idx0 mem) // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] @@ -1446,6 +1569,23 @@ && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload {s} (OffPtr [i0] p) mem) +(ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] + x0:(MOVWUloadidx ptr0 idx0 mem) + y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) + y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) + y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) + y4:(MOVDnop x4:(MOVBUload [7] {s} p mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 + && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) + && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) + && clobber(o0) && clobber(o1) && clobber(o2) + -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx ptr0 idx0 mem) // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] @@ -1464,8 +1604,24 @@ && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload {s} (OffPtr [i0] p) mem) +(OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] + y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) + y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) + y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 + && mergePoint(b,x0,x1,x2,x3) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) + && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) + && clobber(o0) && clobber(o1) && clobber(s0) + -> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx ptr0 idx0 mem) -// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit, reverse +// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) @@ -1495,7 +1651,33 @@ && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) - -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDload {s} (OffPtr [i0] p) mem)) + -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload {s} (OffPtr [i0] p) mem) +(OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] + y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) + y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) + y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) + y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) + y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) + y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) + y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 + && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 + && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 + && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) + && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) + && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) + && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) + && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) + && clobber(o4) && clobber(o5) && clobber(s0) + -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx ptr0 idx0 mem) // big endian loads // b[1] | b[0]<<8 -> load 16-bit, reverse @@ -1509,6 +1691,17 @@ && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) -> @mergePoint(b,x0,x1) (REV16W (MOVHUload [i0] {s} p mem)) +(ORshiftLL [8] + y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) + y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 + && mergePoint(b,x0,x1) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x0) && clobber(x1) + && clobber(y0) && clobber(y1) + -> @mergePoint(b,x0,x1) (REV16W (MOVHUloadidx ptr0 idx0 mem)) // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse (ORshiftLL [24] o0:(ORshiftLL [16] @@ -1525,6 +1718,21 @@ && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0) -> @mergePoint(b,x0,x1,x2) (REVW (MOVWUload {s} (OffPtr [i0] p) mem)) +(ORshiftLL [24] o0:(ORshiftLL [16] + y0:(REV16W x0:(MOVHUload [2] {s} p mem)) + y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 + && o0.Uses == 1 + && mergePoint(b,x0,x1,x2) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) + && clobber(y0) && clobber(y1) && clobber(y2) + && clobber(o0) + -> @mergePoint(b,x0,x1,x2) (REVW (MOVWUloadidx ptr0 idx0 mem)) // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] @@ -1545,6 +1753,23 @@ && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) -> @mergePoint(b,x0,x1,x2,x3,x4) (REV (MOVDload {s} (OffPtr [i0] p) mem)) +(ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] + y0:(REVW x0:(MOVWUload [4] {s} p mem)) + y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) + y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) + y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) + && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) + && clobber(o0) && clobber(o1) && clobber(o2) + -> @mergePoint(b,x0,x1,x2,x3,x4) (REV (MOVDloadidx ptr0 idx0 mem)) // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] @@ -1563,6 +1788,22 @@ && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) -> @mergePoint(b,x0,x1,x2,x3) (REVW (MOVWUload {s} (OffPtr [i0] p) mem)) +(OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] + y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) + y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) + y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 + && mergePoint(b,x0,x1,x2,x3) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) + && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) + && clobber(o0) && clobber(o1) && clobber(s0) + -> @mergePoint(b,x0,x1,x2,x3) (REVW (MOVWUloadidx ptr0 idx0 mem)) // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] @@ -1595,6 +1836,32 @@ && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDload {s} (OffPtr [i0] p) mem)) +(OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] + y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) + y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) + y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) + y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) + y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) + y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) + y7:(MOVDnop x7:(MOVBUload [7] {s} p mem))) + && s == nil + && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 + && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 + && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 + && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 + && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 + && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) + && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) + && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) + && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) + && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) + && clobber(o4) && clobber(o5) && clobber(s0) + -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDloadidx ptr0 idx0 mem)) // Combine zero stores into larger (unaligned) stores. (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem)) @@ -1604,6 +1871,12 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstorezero [min(i,j)] {s} ptr0 mem) +(MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstorezeroidx ptr1 idx1 mem) (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem)) && x.Uses == 1 && areAdjacentOffsets(i,j,2) @@ -1611,6 +1884,12 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstorezero [min(i,j)] {s} ptr0 mem) +(MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVWstorezeroidx ptr1 idx1 mem) (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem)) && x.Uses == 1 && areAdjacentOffsets(i,j,4) @@ -1618,6 +1897,12 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVDstorezero [min(i,j)] {s} ptr0 mem) +(MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVDstorezeroidx ptr1 idx1 mem) (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem)) && x.Uses == 1 && areAdjacentOffsets(i,j,8) @@ -1625,6 +1910,12 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVQstorezero [min(i,j)] {s} ptr0 mem) +(MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVQstorezero [0] {s} p0 mem) // Combine stores into larger (unaligned) stores. (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) @@ -1632,69 +1923,150 @@ && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w mem) (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w mem) (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w mem) (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w mem) (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w0 mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w0 mem) (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) - && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) - && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) - && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 + && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) + && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) + && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w0 mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) + && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) + && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w0 mem) (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVHstore [i-1] {s} ptr0 w0 mem) +(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr1 idx1 w0 mem) (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstore [i-2] {s} ptr0 w mem) +(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVWstoreidx ptr1 idx1 w mem) (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstore [i-2] {s} ptr0 w mem) +(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVWstoreidx ptr1 idx1 w mem) (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstore [i-2] {s} ptr0 w mem) +(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVWstoreidx ptr1 idx1 w mem) (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVWstore [i-2] {s} ptr0 w0 mem) +(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVWstoreidx ptr1 idx1 w0 mem) (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVDstore [i-4] {s} ptr0 w mem) +(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVDstoreidx ptr1 idx1 w mem) (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem)) && x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) -> (MOVDstore [i-4] {s} ptr0 w0 mem) +(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVDstoreidx ptr1 idx1 w0 mem) (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) @@ -1718,6 +2090,32 @@ && clobber(x5) && clobber(x6) -> (MOVDstore [i-7] {s} ptr (REV w) mem) +(MOVBstore [7] {s} p w + x0:(MOVBstore [6] {s} p (SRLconst [8] w) + x1:(MOVBstore [5] {s} p (SRLconst [16] w) + x2:(MOVBstore [4] {s} p (SRLconst [24] w) + x3:(MOVBstore [3] {s} p (SRLconst [32] w) + x4:(MOVBstore [2] {s} p (SRLconst [40] w) + x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) + x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem)))))))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && x4.Uses == 1 + && x5.Uses == 1 + && x6.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) + && clobber(x1) + && clobber(x2) + && clobber(x3) + && clobber(x4) + && clobber(x5) + && clobber(x6) + -> (MOVDstoreidx ptr0 idx0 (REV w) mem) (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) @@ -1729,6 +2127,20 @@ && clobber(x1) && clobber(x2) -> (MOVWstore [i-3] {s} ptr (REVW w) mem) +(MOVBstore [3] {s} p w + x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w) + x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w) + x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) + && clobber(x1) + && clobber(x2) + -> (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) @@ -1740,6 +2152,20 @@ && clobber(x1) && clobber(x2) -> (MOVWstore [i-3] {s} ptr (REVW w) mem) +(MOVBstore [3] {s} p w + x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) + x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) + x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) + && clobber(x1) + && clobber(x2) + -> (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) @@ -1751,26 +2177,70 @@ && clobber(x1) && clobber(x2) -> (MOVWstore [i-3] {s} ptr (REVW w) mem) +(MOVBstore [3] {s} p w + x0:(MOVBstore [2] {s} p (SRLconst [8] w) + x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) + x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && isSamePtr(p1, p) + && clobber(x0) + && clobber(x1) + && clobber(x2) + -> (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr0 idx0 (REV16W w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr0 idx0 (REV16W w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr0 idx0 (REV16W w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr0 idx0 (REV16W w) mem) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) && x.Uses == 1 && clobber(x) -> (MOVHstore [i-1] {s} ptr (REV16W w) mem) +(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem)) + && x.Uses == 1 + && s == nil + && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) + && clobber(x) + -> (MOVHstoreidx ptr0 idx0 (REV16W w) mem) // FP simplification (FNEGS (FMULS x y)) -> (FNMULS x y) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index ec75ca38c6..184e22717e 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -144,6 +144,7 @@ func init() { gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} + gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} @@ -318,6 +319,15 @@ func init() { {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. + // register indexed load + {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true}, // load 64-bit dword from arg0 + arg1, arg2 = mem. + {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. + {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. + {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. + {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. + {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. + {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. + {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. @@ -326,12 +336,24 @@ func init() { {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. + // register indexed store + {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem. + {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem. + {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem. + {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem. + {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. + // register indexed store zero + {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem. + {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem. + {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem. + {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem. + {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion) {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4b782acfa6..0de1ccfdde 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1152,6 +1152,13 @@ const ( OpARM64MOVDload OpARM64FMOVSload OpARM64FMOVDload + OpARM64MOVDloadidx + OpARM64MOVWloadidx + OpARM64MOVWUloadidx + OpARM64MOVHloadidx + OpARM64MOVHUloadidx + OpARM64MOVBloadidx + OpARM64MOVBUloadidx OpARM64MOVBstore OpARM64MOVHstore OpARM64MOVWstore @@ -1159,11 +1166,19 @@ const ( OpARM64STP OpARM64FMOVSstore OpARM64FMOVDstore + OpARM64MOVBstoreidx + OpARM64MOVHstoreidx + OpARM64MOVWstoreidx + OpARM64MOVDstoreidx OpARM64MOVBstorezero OpARM64MOVHstorezero OpARM64MOVWstorezero OpARM64MOVDstorezero OpARM64MOVQstorezero + OpARM64MOVBstorezeroidx + OpARM64MOVHstorezeroidx + OpARM64MOVWstorezeroidx + OpARM64MOVDstorezeroidx OpARM64FMOVDgpfp OpARM64FMOVDfpgp OpARM64MOVBreg @@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MOVDloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVWloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVWUloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVWU, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVHloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVH, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVHUloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVHU, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVBloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MOVBUloadidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVBU, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "MOVBstore", auxType: auxSymOff, @@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MOVBstoreidx", + argLen: 4, + faultOnNilArg0: true, + asm: arm64.AMOVB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVHstoreidx", + argLen: 4, + faultOnNilArg0: true, + asm: arm64.AMOVH, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVWstoreidx", + argLen: 4, + faultOnNilArg0: true, + asm: arm64.AMOVW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVDstoreidx", + argLen: 4, + faultOnNilArg0: true, + asm: arm64.AMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, { name: "MOVBstorezero", auxType: auxSymOff, @@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MOVBstorezeroidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVHstorezeroidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVH, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVWstorezeroidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, + { + name: "MOVDstorezeroidx", + argLen: 3, + faultOnNilArg0: true, + asm: arm64.AMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + }, + }, { name: "FMOVDgpfp", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 407719e744..90cbff3a59 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -131,50 +131,80 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64MODW_0(v) case OpARM64MOVBUload: return rewriteValueARM64_OpARM64MOVBUload_0(v) + case OpARM64MOVBUloadidx: + return rewriteValueARM64_OpARM64MOVBUloadidx_0(v) case OpARM64MOVBUreg: return rewriteValueARM64_OpARM64MOVBUreg_0(v) case OpARM64MOVBload: return rewriteValueARM64_OpARM64MOVBload_0(v) + case OpARM64MOVBloadidx: + return rewriteValueARM64_OpARM64MOVBloadidx_0(v) case OpARM64MOVBreg: return rewriteValueARM64_OpARM64MOVBreg_0(v) case OpARM64MOVBstore: - return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v) + return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v) || rewriteValueARM64_OpARM64MOVBstore_30(v) || rewriteValueARM64_OpARM64MOVBstore_40(v) + case OpARM64MOVBstoreidx: + return rewriteValueARM64_OpARM64MOVBstoreidx_0(v) case OpARM64MOVBstorezero: return rewriteValueARM64_OpARM64MOVBstorezero_0(v) + case OpARM64MOVBstorezeroidx: + return rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v) case OpARM64MOVDload: return rewriteValueARM64_OpARM64MOVDload_0(v) + case OpARM64MOVDloadidx: + return rewriteValueARM64_OpARM64MOVDloadidx_0(v) case OpARM64MOVDreg: return rewriteValueARM64_OpARM64MOVDreg_0(v) case OpARM64MOVDstore: return rewriteValueARM64_OpARM64MOVDstore_0(v) + case OpARM64MOVDstoreidx: + return rewriteValueARM64_OpARM64MOVDstoreidx_0(v) case OpARM64MOVDstorezero: return rewriteValueARM64_OpARM64MOVDstorezero_0(v) + case OpARM64MOVDstorezeroidx: + return rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v) case OpARM64MOVHUload: return rewriteValueARM64_OpARM64MOVHUload_0(v) + case OpARM64MOVHUloadidx: + return rewriteValueARM64_OpARM64MOVHUloadidx_0(v) case OpARM64MOVHUreg: return rewriteValueARM64_OpARM64MOVHUreg_0(v) case OpARM64MOVHload: return rewriteValueARM64_OpARM64MOVHload_0(v) + case OpARM64MOVHloadidx: + return rewriteValueARM64_OpARM64MOVHloadidx_0(v) case OpARM64MOVHreg: - return rewriteValueARM64_OpARM64MOVHreg_0(v) + return rewriteValueARM64_OpARM64MOVHreg_0(v) || rewriteValueARM64_OpARM64MOVHreg_10(v) case OpARM64MOVHstore: return rewriteValueARM64_OpARM64MOVHstore_0(v) || rewriteValueARM64_OpARM64MOVHstore_10(v) + case OpARM64MOVHstoreidx: + return rewriteValueARM64_OpARM64MOVHstoreidx_0(v) case OpARM64MOVHstorezero: return rewriteValueARM64_OpARM64MOVHstorezero_0(v) + case OpARM64MOVHstorezeroidx: + return rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v) case OpARM64MOVQstorezero: return rewriteValueARM64_OpARM64MOVQstorezero_0(v) case OpARM64MOVWUload: return rewriteValueARM64_OpARM64MOVWUload_0(v) + case OpARM64MOVWUloadidx: + return rewriteValueARM64_OpARM64MOVWUloadidx_0(v) case OpARM64MOVWUreg: - return rewriteValueARM64_OpARM64MOVWUreg_0(v) + return rewriteValueARM64_OpARM64MOVWUreg_0(v) || rewriteValueARM64_OpARM64MOVWUreg_10(v) case OpARM64MOVWload: return rewriteValueARM64_OpARM64MOVWload_0(v) + case OpARM64MOVWloadidx: + return rewriteValueARM64_OpARM64MOVWloadidx_0(v) case OpARM64MOVWreg: return rewriteValueARM64_OpARM64MOVWreg_0(v) || rewriteValueARM64_OpARM64MOVWreg_10(v) case OpARM64MOVWstore: return rewriteValueARM64_OpARM64MOVWstore_0(v) + case OpARM64MOVWstoreidx: + return rewriteValueARM64_OpARM64MOVWstoreidx_0(v) case OpARM64MOVWstorezero: return rewriteValueARM64_OpARM64MOVWstorezero_0(v) + case OpARM64MOVWstorezeroidx: + return rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v) case OpARM64MUL: return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v) case OpARM64MULW: @@ -186,7 +216,7 @@ func rewriteValueARM64(v *Value) bool { case OpARM64NotEqual: return rewriteValueARM64_OpARM64NotEqual_0(v) case OpARM64OR: - return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v) + return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v) || rewriteValueARM64_OpARM64OR_30(v) case OpARM64ORN: return rewriteValueARM64_OpARM64ORN_0(v) case OpARM64ORNshiftLL: @@ -5754,6 +5784,30 @@ func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBUload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVBUloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBUloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -5804,6 +5858,66 @@ func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool { + // match: (MOVBUloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVBUload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVBUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVBUload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVBUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool { // match: (MOVBUreg x:(MOVBUload _ _)) // cond: @@ -5818,6 +5932,19 @@ func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBUreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } // match: (MOVBUreg x:(MOVBUreg _)) // cond: // result: (MOVDreg x) @@ -5937,6 +6064,30 @@ func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVBloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -5987,6 +6138,66 @@ func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool { + // match: (MOVBloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVBload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVBload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVBload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVBload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool { // match: (MOVBreg x:(MOVBload _ _)) // cond: @@ -6001,6 +6212,19 @@ func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBreg x:(MOVBloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } // match: (MOVBreg x:(MOVBreg _)) // cond: // result: (MOVDreg x) @@ -6077,6 +6301,32 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVBstoreidx ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -6259,6 +6509,9 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool { v.AddArg(mem) return true } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w mem) @@ -6302,11 +6555,51 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { - b := v.Block - _ = b + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w mem) @@ -6350,6 +6643,51 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64UBFX { + break + } + if v_1.AuxInt != arm64BFAuxInt(8, 8) { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w mem) @@ -6393,6 +6731,51 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64UBFX { + break + } + if v_1.AuxInt != arm64BFAuxInt(8, 24) { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w mem) @@ -6440,6 +6823,55 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 8 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w0 mem) @@ -6488,6 +6920,61 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-8 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { + b := v.Block + _ = b // match: (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w0 mem) @@ -6534,6 +7021,54 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64UBFX { + break + } + bfc := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64UBFX { + break + } + bfc2 := w0.AuxInt + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstore [i-1] {s} ptr0 w0 mem) @@ -6590,6 +7125,64 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-8 { + break + } + w0_0 := w0.Args[0] + if w0_0.Op != OpARM64MOVDreg { + break + } + if w != w0_0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem)))))))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) // result: (MOVDstore [i-7] {s} ptr (REV w) mem) @@ -6781,6 +7374,195 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [7] {s} p w x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) x4:(MOVBstore [2] {s} p (SRLconst [40] w) x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) + // result: (MOVDstoreidx ptr0 idx0 (REV w) mem) + for { + if v.AuxInt != 7 { + break + } + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x0 := v.Args[2] + if x0.Op != OpARM64MOVBstore { + break + } + if x0.AuxInt != 6 { + break + } + if x0.Aux != s { + break + } + _ = x0.Args[2] + if p != x0.Args[0] { + break + } + x0_1 := x0.Args[1] + if x0_1.Op != OpARM64SRLconst { + break + } + if x0_1.AuxInt != 8 { + break + } + if w != x0_1.Args[0] { + break + } + x1 := x0.Args[2] + if x1.Op != OpARM64MOVBstore { + break + } + if x1.AuxInt != 5 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[2] + if p != x1.Args[0] { + break + } + x1_1 := x1.Args[1] + if x1_1.Op != OpARM64SRLconst { + break + } + if x1_1.AuxInt != 16 { + break + } + if w != x1_1.Args[0] { + break + } + x2 := x1.Args[2] + if x2.Op != OpARM64MOVBstore { + break + } + if x2.AuxInt != 4 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[2] + if p != x2.Args[0] { + break + } + x2_1 := x2.Args[1] + if x2_1.Op != OpARM64SRLconst { + break + } + if x2_1.AuxInt != 24 { + break + } + if w != x2_1.Args[0] { + break + } + x3 := x2.Args[2] + if x3.Op != OpARM64MOVBstore { + break + } + if x3.AuxInt != 3 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[2] + if p != x3.Args[0] { + break + } + x3_1 := x3.Args[1] + if x3_1.Op != OpARM64SRLconst { + break + } + if x3_1.AuxInt != 32 { + break + } + if w != x3_1.Args[0] { + break + } + x4 := x3.Args[2] + if x4.Op != OpARM64MOVBstore { + break + } + if x4.AuxInt != 2 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[2] + if p != x4.Args[0] { + break + } + x4_1 := x4.Args[1] + if x4_1.Op != OpARM64SRLconst { + break + } + if x4_1.AuxInt != 40 { + break + } + if w != x4_1.Args[0] { + break + } + x5 := x4.Args[2] + if x5.Op != OpARM64MOVBstore { + break + } + if x5.AuxInt != 1 { + break + } + if x5.Aux != s { + break + } + _ = x5.Args[2] + p1 := x5.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + x5_1 := x5.Args[1] + if x5_1.Op != OpARM64SRLconst { + break + } + if x5_1.AuxInt != 48 { + break + } + if w != x5_1.Args[0] { + break + } + x6 := x5.Args[2] + if x6.Op != OpARM64MOVBstoreidx { + break + } + _ = x6.Args[3] + ptr0 := x6.Args[0] + idx0 := x6.Args[1] + x6_2 := x6.Args[2] + if x6_2.Op != OpARM64SRLconst { + break + } + if x6_2.AuxInt != 56 { + break + } + if w != x6_2.Args[0] { + break + } + mem := x6.Args[3] + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem)))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) // result: (MOVWstore [i-3] {s} ptr (REVW w) mem) @@ -6876,6 +7658,99 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) + // result: (MOVWstoreidx ptr0 idx0 (REVW w) mem) + for { + if v.AuxInt != 3 { + break + } + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x0 := v.Args[2] + if x0.Op != OpARM64MOVBstore { + break + } + if x0.AuxInt != 2 { + break + } + if x0.Aux != s { + break + } + _ = x0.Args[2] + if p != x0.Args[0] { + break + } + x0_1 := x0.Args[1] + if x0_1.Op != OpARM64UBFX { + break + } + if x0_1.AuxInt != arm64BFAuxInt(8, 24) { + break + } + if w != x0_1.Args[0] { + break + } + x1 := x0.Args[2] + if x1.Op != OpARM64MOVBstore { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[2] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + x1_1 := x1.Args[1] + if x1_1.Op != OpARM64UBFX { + break + } + if x1_1.AuxInt != arm64BFAuxInt(16, 16) { + break + } + if w != x1_1.Args[0] { + break + } + x2 := x1.Args[2] + if x2.Op != OpARM64MOVBstoreidx { + break + } + _ = x2.Args[3] + ptr0 := x2.Args[0] + idx0 := x2.Args[1] + x2_2 := x2.Args[2] + if x2_2.Op != OpARM64UBFX { + break + } + if x2_2.AuxInt != arm64BFAuxInt(24, 8) { + break + } + if w != x2_2.Args[0] { + break + } + mem := x2.Args[3] + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem)))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) // result: (MOVWstore [i-3] {s} ptr (REVW w) mem) @@ -6983,6 +7858,116 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) + // result: (MOVWstoreidx ptr0 idx0 (REVW w) mem) + for { + if v.AuxInt != 3 { + break + } + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x0 := v.Args[2] + if x0.Op != OpARM64MOVBstore { + break + } + if x0.AuxInt != 2 { + break + } + if x0.Aux != s { + break + } + _ = x0.Args[2] + if p != x0.Args[0] { + break + } + x0_1 := x0.Args[1] + if x0_1.Op != OpARM64SRLconst { + break + } + if x0_1.AuxInt != 8 { + break + } + x0_1_0 := x0_1.Args[0] + if x0_1_0.Op != OpARM64MOVDreg { + break + } + if w != x0_1_0.Args[0] { + break + } + x1 := x0.Args[2] + if x1.Op != OpARM64MOVBstore { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[2] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + x1_1 := x1.Args[1] + if x1_1.Op != OpARM64SRLconst { + break + } + if x1_1.AuxInt != 16 { + break + } + x1_1_0 := x1_1.Args[0] + if x1_1_0.Op != OpARM64MOVDreg { + break + } + if w != x1_1_0.Args[0] { + break + } + x2 := x1.Args[2] + if x2.Op != OpARM64MOVBstoreidx { + break + } + _ = x2.Args[3] + ptr0 := x2.Args[0] + idx0 := x2.Args[1] + x2_2 := x2.Args[2] + if x2_2.Op != OpARM64SRLconst { + break + } + if x2_2.AuxInt != 24 { + break + } + x2_2_0 := x2_2.Args[0] + if x2_2_0.Op != OpARM64MOVDreg { + break + } + if w != x2_2_0.Args[0] { + break + } + mem := x2.Args[3] + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_30(v *Value) bool { + b := v.Block + _ = b // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem)))) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) // result: (MOVWstore [i-3] {s} ptr (REVW w) mem) @@ -7078,11 +8063,99 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { - b := v.Block - _ = b + // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) + // result: (MOVWstoreidx ptr0 idx0 (REVW w) mem) + for { + if v.AuxInt != 3 { + break + } + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x0 := v.Args[2] + if x0.Op != OpARM64MOVBstore { + break + } + if x0.AuxInt != 2 { + break + } + if x0.Aux != s { + break + } + _ = x0.Args[2] + if p != x0.Args[0] { + break + } + x0_1 := x0.Args[1] + if x0_1.Op != OpARM64SRLconst { + break + } + if x0_1.AuxInt != 8 { + break + } + if w != x0_1.Args[0] { + break + } + x1 := x0.Args[2] + if x1.Op != OpARM64MOVBstore { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[2] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + x1_1 := x1.Args[1] + if x1_1.Op != OpARM64SRLconst { + break + } + if x1_1.AuxInt != 16 { + break + } + if w != x1_1.Args[0] { + break + } + x2 := x1.Args[2] + if x2.Op != OpARM64MOVBstoreidx { + break + } + _ = x2.Args[3] + ptr0 := x2.Args[0] + idx0 := x2.Args[1] + x2_2 := x2.Args[2] + if x2_2.Op != OpARM64SRLconst { + break + } + if x2_2.AuxInt != 24 { + break + } + if w != x2_2.Args[0] { + break + } + mem := x2.Args[3] + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -7130,6 +8203,53 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr0 idx0 (REV16W w) mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr1 := v_0.Args[0] + idx1 := v_0.Args[1] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr0 := x.Args[0] + idx0 := x.Args[1] + x_2 := x.Args[2] + if x_2.Op != OpARM64SRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -7177,6 +8297,53 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr0 idx0 (REV16W w) mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr1 := v_0.Args[0] + idx1 := v_0.Args[1] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr0 := x.Args[0] + idx0 := x.Args[1] + x_2 := x.Args[2] + if x_2.Op != OpARM64UBFX { + break + } + if x_2.AuxInt != arm64BFAuxInt(8, 8) { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -7228,6 +8395,57 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr0 idx0 (REV16W w) mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr1 := v_0.Args[0] + idx1 := v_0.Args[1] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr0 := x.Args[0] + idx0 := x.Args[1] + x_2 := x.Args[2] + if x_2.Op != OpARM64SRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + x_2_0 := x_2.Args[0] + if x_2_0.Op != OpARM64MOVDreg { + break + } + if w != x_2_0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -7275,6 +8493,58 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr0 idx0 (REV16W w) mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr1 := v_0.Args[0] + idx1 := v_0.Args[1] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr0 := x.Args[0] + idx0 := x.Args[1] + x_2 := x.Args[2] + if x_2.Op != OpARM64UBFX { + break + } + if x_2.AuxInt != arm64BFAuxInt(8, 24) { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_40(v *Value) bool { + b := v.Block + _ = b // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVHstore [i-1] {s} ptr (REV16W w) mem) @@ -7326,6 +8596,241 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstoreidx ptr0 idx0 (REV16W w) mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr1 := v_0.Args[0] + idx1 := v_0.Args[1] + w := v.Args[1] + x := v.Args[2] + if x.Op != OpARM64MOVBstoreidx { + break + } + _ = x.Args[3] + ptr0 := x.Args[0] + idx0 := x.Args[1] + x_2 := x.Args[2] + if x_2.Op != OpARM64SRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + x_2_0 := x_2.Args[0] + if x_2_0.Op != OpARM64MOVDreg { + break + } + if w != x_2_0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr0) + v.AddArg(idx0) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type) + v0.AddArg(w) + v.AddArg(v0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstoreidx_0(v *Value) bool { + // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem) + // cond: + // result: (MOVBstore [c] ptr val mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVBstore) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx (MOVDconst [c]) idx val mem) + // cond: + // result: (MOVBstore [c] idx val mem) + for { + _ = v.Args[3] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVBstore) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVBstorezeroidx ptr idx mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + if v_2.AuxInt != 0 { + break + } + mem := v.Args[3] + v.reset(OpARM64MOVBstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVBreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVHreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVWreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem) + // cond: + // result: (MOVBstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool { @@ -7382,6 +8887,30 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVBstorezeroidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem)) // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem) @@ -7411,6 +8940,78 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVHstorezeroidx ptr1 idx1 mem) + for { + if v.AuxInt != 1 { + break + } + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + x := v.Args[1] + if x.Op != OpARM64MOVBstorezeroidx { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + idx1 := x.Args[1] + mem := x.Args[2] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVHstorezeroidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool { + // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVBstorezero [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVBstorezero) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVBstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVBstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool { @@ -7442,6 +9043,30 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVDloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVDloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -7492,6 +9117,66 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool { + // match: (MOVDloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVDload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVDload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVDloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVDload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVDload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool { // match: (MOVDreg x) // cond: x.Uses == 1 @@ -7569,6 +9254,32 @@ func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVDstoreidx ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -7621,6 +9332,70 @@ func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool { + // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem) + // cond: + // result: (MOVDstore [c] ptr val mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVDstore) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVDstoreidx (MOVDconst [c]) idx val mem) + // cond: + // result: (MOVDstore [c] idx val mem) + for { + _ = v.Args[3] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVDstore) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVDstorezeroidx ptr idx mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + if v_2.AuxInt != 0 { + break + } + mem := v.Args[3] + v.reset(OpARM64MOVDstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + return false +} func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool { b := v.Block _ = b @@ -7675,6 +9450,30 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVDstorezeroidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVDstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem)) // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem) @@ -7704,6 +9503,79 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVQstorezero [0] {s} p0 mem) + for { + if v.AuxInt != 8 { + break + } + s := v.Aux + _ = v.Args[1] + p0 := v.Args[0] + if p0.Op != OpARM64ADD { + break + } + _ = p0.Args[1] + ptr0 := p0.Args[0] + idx0 := p0.Args[1] + x := v.Args[1] + if x.Op != OpARM64MOVDstorezeroidx { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + idx1 := x.Args[1] + mem := x.Args[2] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVQstorezero) + v.AuxInt = 0 + v.Aux = s + v.AddArg(p0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool { + // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVDstorezero [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVDstorezero) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVDstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVDstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool { @@ -7735,6 +9607,30 @@ func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHUload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVHUloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVHUloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -7785,6 +9681,66 @@ func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool { + // match: (MOVHUloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVHUload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVHUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVHUloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVHUload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool { // match: (MOVHUreg x:(MOVBUload _ _)) // cond: @@ -7812,6 +9768,32 @@ func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHUreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVHUreg x:(MOVHUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } // match: (MOVHUreg x:(MOVBUreg _)) // cond: // result: (MOVDreg x) @@ -7931,6 +9913,30 @@ func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVHloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVHloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -7981,6 +9987,66 @@ func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool { + // match: (MOVHloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVHload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVHload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVHloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVHload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool { // match: (MOVHreg x:(MOVBload _ _)) // cond: @@ -8021,6 +10087,45 @@ func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHreg x:(MOVBloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVHreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVHreg x:(MOVHloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } // match: (MOVHreg x:(MOVBreg _)) // cond: // result: (MOVDreg x) @@ -8070,6 +10175,9 @@ func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool { v.AuxInt = int64(int16(c)) return true } + return false +} +func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool { // match: (MOVHreg (SLLconst [lc] x)) // cond: lc < 16 // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x) @@ -8121,6 +10229,32 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVHstoreidx ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -8302,6 +10436,54 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVWstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 2 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 16 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool { // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVWstore [i-2] {s} ptr0 w mem) @@ -8345,6 +10527,51 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVWstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 2 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64UBFX { + break + } + if v_1.AuxInt != arm64BFAuxInt(16, 16) { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVWstore [i-2] {s} ptr0 w mem) @@ -8392,9 +10619,55 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool { + // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVWstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 2 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 16 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpARM64MOVDreg { + break + } + w := v_1_0.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVWstore [i-2] {s} ptr0 w0 mem) @@ -8443,6 +10716,200 @@ func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVWstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 2 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-16 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool { + // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem) + // cond: + // result: (MOVHstore [c] ptr val mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVHstore) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx (MOVDconst [c]) idx val mem) + // cond: + // result: (MOVHstore [c] idx val mem) + for { + _ = v.Args[3] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVHstore) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVHstorezeroidx ptr idx mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + if v_2.AuxInt != 0 { + break + } + mem := v.Args[3] + v.reset(OpARM64MOVHstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx ptr idx (MOVHreg x) mem) + // cond: + // result: (MOVHstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem) + // cond: + // result: (MOVHstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVHUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx ptr idx (MOVWreg x) mem) + // cond: + // result: (MOVHstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem) + // cond: + // result: (MOVHstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVHstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool { @@ -8499,6 +10966,30 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVHstorezeroidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVHstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem)) // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem) @@ -8528,6 +11019,78 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVWstorezeroidx ptr1 idx1 mem) + for { + if v.AuxInt != 2 { + break + } + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + x := v.Args[1] + if x.Op != OpARM64MOVHstorezeroidx { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + idx1 := x.Args[1] + mem := x.Args[2] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVWstorezeroidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool { + // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVHstorezero [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVHstorezero) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVHstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool { @@ -8615,6 +11178,30 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWUload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWUloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWUloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -8665,6 +11252,66 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { + // match: (MOVWUloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWUload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWUloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVWUload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVWUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { // match: (MOVWUreg x:(MOVBUload _ _)) // cond: @@ -8705,6 +11352,45 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWUreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVHUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVWUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVWUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } // match: (MOVWUreg x:(MOVBUreg _)) // cond: // result: (MOVDreg x) @@ -8756,6 +11442,9 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } + return false +} +func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool { // match: (MOVWUreg (MOVDconst [c])) // cond: // result: (MOVDconst [int64(uint32(c))]) @@ -8836,6 +11525,30 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -8886,6 +11599,66 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool { + // match: (MOVWloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVWload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVWload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool { // match: (MOVWreg x:(MOVBload _ _)) // cond: @@ -8952,6 +11725,74 @@ func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWreg x:(MOVBloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVHloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVHUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVWloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVWloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool { // match: (MOVWreg x:(MOVBreg _)) // cond: // result: (MOVDreg x) @@ -9012,9 +11853,6 @@ func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool { v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool { // match: (MOVWreg (MOVDconst [c])) // cond: // result: (MOVDconst [int64(int32(c))]) @@ -9079,6 +11917,32 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVWstoreidx ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -9216,6 +12080,51 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVDstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 32 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem)) // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVDstore [i-4] {s} ptr0 w0 mem) @@ -9264,6 +12173,160 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVDstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool { + // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem) + // cond: + // result: (MOVWstore [c] ptr val mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstore) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx (MOVDconst [c]) idx val mem) + // cond: + // result: (MOVWstore [c] idx val mem) + for { + _ = v.Args[3] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstore) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVWstorezeroidx ptr idx mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + if v_2.AuxInt != 0 { + break + } + mem := v.Args[3] + v.reset(OpARM64MOVWstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx ptr idx (MOVWreg x) mem) + // cond: + // result: (MOVWstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem) + // cond: + // result: (MOVWstoreidx ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWUreg { + break + } + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool { @@ -9320,6 +12383,30 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWstorezeroidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem)) // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem) @@ -9349,6 +12436,78 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVDstorezeroidx ptr1 idx1 mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + x := v.Args[1] + if x.Op != OpARM64MOVWstorezeroidx { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + idx1 := x.Args[1] + mem := x.Args[2] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstorezeroidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool { + // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWstorezero [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVWstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } return false } func rewriteValueARM64_OpARM64MUL_0(v *Value) bool { @@ -11029,9 +14188,241 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool { v0.AddArg(mem) return true } + // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx ptr0 idx0 mem) + for { + t := v.Type + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + s0 := o1.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 24 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUload { + break + } + if x0.AuxInt != 3 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + mem := x0.Args[1] + y1 := o1.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 2 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { + break + } + y2 := o0.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 1 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p1 := x2.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x2.Args[1] { + break + } + y3 := v.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUloadidx { + break + } + _ = x3.Args[2] + ptr0 := x3.Args[0] + idx0 := x3.Args[1] + if mem != x3.Args[2] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3) + v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } + // match: (OR y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx ptr0 idx0 mem) + for { + t := v.Type + _ = v.Args[1] + y3 := v.Args[0] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUloadidx { + break + } + _ = x3.Args[2] + ptr0 := x3.Args[0] + idx0 := x3.Args[1] + mem := x3.Args[2] + o0 := v.Args[1] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + s0 := o1.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 24 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUload { + break + } + if x0.AuxInt != 3 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + if mem != x0.Args[1] { + break + } + y1 := o1.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 2 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { + break + } + y2 := o0.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 1 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p1 := x2.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x2.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3) + v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem))) // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) - // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDload {s} (OffPtr [i0] p) mem)) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload {s} (OffPtr [i0] p) mem) for { t := v.Type _ = v.Args[1] @@ -11240,22 +14631,25 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool { break } b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) - v0 := b.NewValue0(v.Pos, OpARM64REV, t) + v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t) v.reset(OpCopy) v.AddArg(v0) - v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t) - v1.Aux = s - v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type) - v2.AuxInt = i0 - v2.AddArg(p) - v1.AddArg(v2) - v1.AddArg(mem) + v0.Aux = s + v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type) + v1.AuxInt = i0 + v1.AddArg(p) v0.AddArg(v1) + v0.AddArg(mem) return true } + return false +} +func rewriteValueARM64_OpARM64OR_20(v *Value) bool { + b := v.Block + _ = b // match: (OR y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))) // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) - // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDload {s} (OffPtr [i0] p) mem)) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload {s} (OffPtr [i0] p) mem) for { t := v.Type _ = v.Args[1] @@ -11464,17 +14858,479 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool { break } b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) - v0 := b.NewValue0(v.Pos, OpARM64REV, t) + v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t) v.reset(OpCopy) v.AddArg(v0) - v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t) - v1.Aux = s - v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type) - v2.AuxInt = i0 - v2.AddArg(p) - v1.AddArg(v2) - v1.AddArg(mem) + v0.Aux = s + v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type) + v1.AuxInt = i0 + v1.AddArg(p) v0.AddArg(v1) + v0.AddArg(mem) + return true + } + // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx ptr0 idx0 mem) + for { + t := v.Type + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 24 { + break + } + _ = o2.Args[1] + o3 := o2.Args[0] + if o3.Op != OpARM64ORshiftLL { + break + } + if o3.AuxInt != 32 { + break + } + _ = o3.Args[1] + o4 := o3.Args[0] + if o4.Op != OpARM64ORshiftLL { + break + } + if o4.AuxInt != 40 { + break + } + _ = o4.Args[1] + o5 := o4.Args[0] + if o5.Op != OpARM64ORshiftLL { + break + } + if o5.AuxInt != 48 { + break + } + _ = o5.Args[1] + s0 := o5.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 56 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUload { + break + } + if x0.AuxInt != 7 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + mem := x0.Args[1] + y1 := o5.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 6 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { + break + } + y2 := o4.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 5 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + if p != x2.Args[0] { + break + } + if mem != x2.Args[1] { + break + } + y3 := o3.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 4 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + y4 := o2.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUload { + break + } + if x4.AuxInt != 3 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[1] + if p != x4.Args[0] { + break + } + if mem != x4.Args[1] { + break + } + y5 := o1.Args[1] + if y5.Op != OpARM64MOVDnop { + break + } + x5 := y5.Args[0] + if x5.Op != OpARM64MOVBUload { + break + } + if x5.AuxInt != 2 { + break + } + if x5.Aux != s { + break + } + _ = x5.Args[1] + if p != x5.Args[0] { + break + } + if mem != x5.Args[1] { + break + } + y6 := o0.Args[1] + if y6.Op != OpARM64MOVDnop { + break + } + x6 := y6.Args[0] + if x6.Op != OpARM64MOVBUload { + break + } + if x6.AuxInt != 1 { + break + } + if x6.Aux != s { + break + } + _ = x6.Args[1] + p1 := x6.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x6.Args[1] { + break + } + y7 := v.Args[1] + if y7.Op != OpARM64MOVDnop { + break + } + x7 := y7.Args[0] + if x7.Op != OpARM64MOVBUloadidx { + break + } + _ = x7.Args[2] + ptr0 := x7.Args[0] + idx0 := x7.Args[1] + if mem != x7.Args[2] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) + v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } + // match: (OR y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx ptr0 idx0 mem) + for { + t := v.Type + _ = v.Args[1] + y7 := v.Args[0] + if y7.Op != OpARM64MOVDnop { + break + } + x7 := y7.Args[0] + if x7.Op != OpARM64MOVBUloadidx { + break + } + _ = x7.Args[2] + ptr0 := x7.Args[0] + idx0 := x7.Args[1] + mem := x7.Args[2] + o0 := v.Args[1] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 24 { + break + } + _ = o2.Args[1] + o3 := o2.Args[0] + if o3.Op != OpARM64ORshiftLL { + break + } + if o3.AuxInt != 32 { + break + } + _ = o3.Args[1] + o4 := o3.Args[0] + if o4.Op != OpARM64ORshiftLL { + break + } + if o4.AuxInt != 40 { + break + } + _ = o4.Args[1] + o5 := o4.Args[0] + if o5.Op != OpARM64ORshiftLL { + break + } + if o5.AuxInt != 48 { + break + } + _ = o5.Args[1] + s0 := o5.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 56 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUload { + break + } + if x0.AuxInt != 7 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + if mem != x0.Args[1] { + break + } + y1 := o5.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 6 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { + break + } + y2 := o4.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 5 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + if p != x2.Args[0] { + break + } + if mem != x2.Args[1] { + break + } + y3 := o3.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 4 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + y4 := o2.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUload { + break + } + if x4.AuxInt != 3 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[1] + if p != x4.Args[0] { + break + } + if mem != x4.Args[1] { + break + } + y5 := o1.Args[1] + if y5.Op != OpARM64MOVDnop { + break + } + x5 := y5.Args[0] + if x5.Op != OpARM64MOVBUload { + break + } + if x5.AuxInt != 2 { + break + } + if x5.Aux != s { + break + } + _ = x5.Args[1] + if p != x5.Args[0] { + break + } + if mem != x5.Args[1] { + break + } + y6 := o0.Args[1] + if y6.Op != OpARM64MOVDnop { + break + } + x6 := y6.Args[0] + if x6.Op != OpARM64MOVBUload { + break + } + if x6.AuxInt != 1 { + break + } + if x6.Aux != s { + break + } + _ = x6.Args[1] + p1 := x6.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x6.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) + v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) return true } // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) @@ -11593,11 +15449,6 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueARM64_OpARM64OR_20(v *Value) bool { - b := v.Block - _ = b // match: (OR y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))) // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) // result: @mergePoint(b,x0,x1,x2,x3) (REVW (MOVWUload {s} (OffPtr [i0] p) mem)) @@ -11714,6 +15565,242 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool { v0.AddArg(v1) return true } + // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3) (REVW (MOVWUloadidx ptr0 idx0 mem)) + for { + t := v.Type + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + s0 := o1.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 24 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + mem := x0.Args[2] + y1 := o1.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + s := x1.Aux + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := o0.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 2 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p := x2.Args[0] + if mem != x2.Args[1] { + break + } + y3 := v.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 3 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3) + v0 := b.NewValue0(v.Pos, OpARM64REVW, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } + // match: (OR y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3) (REVW (MOVWUloadidx ptr0 idx0 mem)) + for { + t := v.Type + _ = v.Args[1] + y3 := v.Args[0] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 3 { + break + } + s := x3.Aux + _ = x3.Args[1] + p := x3.Args[0] + mem := x3.Args[1] + o0 := v.Args[1] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + s0 := o1.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 24 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + if mem != x0.Args[2] { + break + } + y1 := o1.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := o0.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 2 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + if p != x2.Args[0] { + break + } + if mem != x2.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3) + v0 := b.NewValue0(v.Pos, OpARM64REVW, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem))) // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDload {s} (OffPtr [i0] p) mem)) @@ -12162,6 +16249,479 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool { v0.AddArg(v1) return true } + // match: (OR o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDloadidx ptr0 idx0 mem)) + for { + t := v.Type + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 24 { + break + } + _ = o2.Args[1] + o3 := o2.Args[0] + if o3.Op != OpARM64ORshiftLL { + break + } + if o3.AuxInt != 32 { + break + } + _ = o3.Args[1] + o4 := o3.Args[0] + if o4.Op != OpARM64ORshiftLL { + break + } + if o4.AuxInt != 40 { + break + } + _ = o4.Args[1] + o5 := o4.Args[0] + if o5.Op != OpARM64ORshiftLL { + break + } + if o5.AuxInt != 48 { + break + } + _ = o5.Args[1] + s0 := o5.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 56 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + mem := x0.Args[2] + y1 := o5.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + s := x1.Aux + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := o4.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 2 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p := x2.Args[0] + if mem != x2.Args[1] { + break + } + y3 := o3.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 3 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + y4 := o2.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUload { + break + } + if x4.AuxInt != 4 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[1] + if p != x4.Args[0] { + break + } + if mem != x4.Args[1] { + break + } + y5 := o1.Args[1] + if y5.Op != OpARM64MOVDnop { + break + } + x5 := y5.Args[0] + if x5.Op != OpARM64MOVBUload { + break + } + if x5.AuxInt != 5 { + break + } + if x5.Aux != s { + break + } + _ = x5.Args[1] + if p != x5.Args[0] { + break + } + if mem != x5.Args[1] { + break + } + y6 := o0.Args[1] + if y6.Op != OpARM64MOVDnop { + break + } + x6 := y6.Args[0] + if x6.Op != OpARM64MOVBUload { + break + } + if x6.AuxInt != 6 { + break + } + if x6.Aux != s { + break + } + _ = x6.Args[1] + if p != x6.Args[0] { + break + } + if mem != x6.Args[1] { + break + } + y7 := v.Args[1] + if y7.Op != OpARM64MOVDnop { + break + } + x7 := y7.Args[0] + if x7.Op != OpARM64MOVBUload { + break + } + if x7.AuxInt != 7 { + break + } + if x7.Aux != s { + break + } + _ = x7.Args[1] + if p != x7.Args[0] { + break + } + if mem != x7.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) + v0 := b.NewValue0(v.Pos, OpARM64REV, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } + return false +} +func rewriteValueARM64_OpARM64OR_30(v *Value) bool { + b := v.Block + _ = b + // match: (OR y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0) + // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV (MOVDloadidx ptr0 idx0 mem)) + for { + t := v.Type + _ = v.Args[1] + y7 := v.Args[0] + if y7.Op != OpARM64MOVDnop { + break + } + x7 := y7.Args[0] + if x7.Op != OpARM64MOVBUload { + break + } + if x7.AuxInt != 7 { + break + } + s := x7.Aux + _ = x7.Args[1] + p := x7.Args[0] + mem := x7.Args[1] + o0 := v.Args[1] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 8 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 16 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 24 { + break + } + _ = o2.Args[1] + o3 := o2.Args[0] + if o3.Op != OpARM64ORshiftLL { + break + } + if o3.AuxInt != 32 { + break + } + _ = o3.Args[1] + o4 := o3.Args[0] + if o4.Op != OpARM64ORshiftLL { + break + } + if o4.AuxInt != 40 { + break + } + _ = o4.Args[1] + o5 := o4.Args[0] + if o5.Op != OpARM64ORshiftLL { + break + } + if o5.AuxInt != 48 { + break + } + _ = o5.Args[1] + s0 := o5.Args[0] + if s0.Op != OpARM64SLLconst { + break + } + if s0.AuxInt != 56 { + break + } + y0 := s0.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + if mem != x0.Args[2] { + break + } + y1 := o5.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := o4.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 2 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + if p != x2.Args[0] { + break + } + if mem != x2.Args[1] { + break + } + y3 := o3.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 3 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + y4 := o2.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUload { + break + } + if x4.AuxInt != 4 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[1] + if p != x4.Args[0] { + break + } + if mem != x4.Args[1] { + break + } + y5 := o1.Args[1] + if y5.Op != OpARM64MOVDnop { + break + } + x5 := y5.Args[0] + if x5.Op != OpARM64MOVBUload { + break + } + if x5.AuxInt != 5 { + break + } + if x5.Aux != s { + break + } + _ = x5.Args[1] + if p != x5.Args[0] { + break + } + if mem != x5.Args[1] { + break + } + y6 := o0.Args[1] + if y6.Op != OpARM64MOVDnop { + break + } + x6 := y6.Args[0] + if x6.Op != OpARM64MOVBUload { + break + } + if x6.AuxInt != 6 { + break + } + if x6.Aux != s { + break + } + _ = x6.Args[1] + if p != x6.Args[0] { + break + } + if mem != x6.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) + v0 := b.NewValue0(v.Pos, OpARM64REV, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } return false } func rewriteValueARM64_OpARM64ORN_0(v *Value) bool { @@ -12680,6 +17240,67 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { v0.AddArg(mem) return true } + // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) + // result: @mergePoint(b,x0,x1) (MOVHUloadidx ptr0 idx0 mem) + for { + t := v.Type + if v.AuxInt != 8 { + break + } + _ = v.Args[1] + y0 := v.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + mem := x0.Args[2] + y1 := v.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + s := x1.Aux + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) { + break + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { + b := v.Block + _ = b // match: (ORshiftLL [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVWUload {s} (OffPtr [i0] p) mem) @@ -12759,11 +17380,85 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { - b := v.Block - _ = b + // match: (ORshiftLL [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0) + // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx ptr0 idx0 mem) + for { + t := v.Type + if v.AuxInt != 24 { + break + } + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 16 { + break + } + _ = o0.Args[1] + x0 := o0.Args[0] + if x0.Op != OpARM64MOVHUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + mem := x0.Args[2] + y1 := o0.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 2 { + break + } + s := x1.Aux + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := v.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 3 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p := x2.Args[0] + if mem != x2.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) { + break + } + b = mergePoint(b, x0, x1, x2) + v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } // match: (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem))) // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload {s} (OffPtr [i0] p) mem) @@ -12897,6 +17592,143 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(mem) return true } + // match: (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) + // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx ptr0 idx0 mem) + for { + t := v.Type + if v.AuxInt != 56 { + break + } + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 48 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 40 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 32 { + break + } + _ = o2.Args[1] + x0 := o2.Args[0] + if x0.Op != OpARM64MOVWUloadidx { + break + } + _ = x0.Args[2] + ptr0 := x0.Args[0] + idx0 := x0.Args[1] + mem := x0.Args[2] + y1 := o2.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 4 { + break + } + s := x1.Aux + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := o1.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 5 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + p := x2.Args[0] + if mem != x2.Args[1] { + break + } + y3 := o0.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 6 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + if p != x3.Args[0] { + break + } + if mem != x3.Args[1] { + break + } + y4 := v.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUload { + break + } + if x4.AuxInt != 7 { + break + } + if x4.Aux != s { + break + } + _ = x4.Args[1] + if p != x4.Args[0] { + break + } + if mem != x4.Args[1] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4) + v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AddArg(ptr0) + v0.AddArg(idx0) + v0.AddArg(mem) + return true + } // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem))) // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) // result: @mergePoint(b,x0,x1) (REV16W (MOVHUload [i0] {s} p mem)) @@ -12953,6 +17785,64 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(v1) return true } + // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) + // result: @mergePoint(b,x0,x1) (REV16W (MOVHUloadidx ptr0 idx0 mem)) + for { + t := v.Type + if v.AuxInt != 8 { + break + } + _ = v.Args[1] + y0 := v.Args[0] + if y0.Op != OpARM64MOVDnop { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVBUload { + break + } + if x0.AuxInt != 1 { + break + } + s := x0.Aux + _ = x0.Args[1] + p1 := x0.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + mem := x0.Args[1] + y1 := v.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUloadidx { + break + } + _ = x1.Args[2] + ptr0 := x1.Args[0] + idx0 := x1.Args[1] + if mem != x1.Args[2] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) { + break + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(v.Pos, OpARM64REV16W, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } // match: (ORshiftLL [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem))) // cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (REVW (MOVWUload {s} (OffPtr [i0] p) mem)) @@ -13038,6 +17928,91 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(v1) return true } + // match: (ORshiftLL [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0) + // result: @mergePoint(b,x0,x1,x2) (REVW (MOVWUloadidx ptr0 idx0 mem)) + for { + t := v.Type + if v.AuxInt != 24 { + break + } + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 16 { + break + } + _ = o0.Args[1] + y0 := o0.Args[0] + if y0.Op != OpARM64REV16W { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVHUload { + break + } + if x0.AuxInt != 2 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + mem := x0.Args[1] + y1 := o0.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 1 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + p1 := x1.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x1.Args[1] { + break + } + y2 := v.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUloadidx { + break + } + _ = x2.Args[2] + ptr0 := x2.Args[0] + idx0 := x2.Args[1] + if mem != x2.Args[2] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) { + break + } + b = mergePoint(b, x0, x1, x2) + v0 := b.NewValue0(v.Pos, OpARM64REVW, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } // match: (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem))) // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV (MOVDload {s} (OffPtr [i0] p) mem)) @@ -13177,6 +18152,149 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(v1) return true } + // match: (ORshiftLL [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem))) + // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2) + // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV (MOVDloadidx ptr0 idx0 mem)) + for { + t := v.Type + if v.AuxInt != 56 { + break + } + _ = v.Args[1] + o0 := v.Args[0] + if o0.Op != OpARM64ORshiftLL { + break + } + if o0.AuxInt != 48 { + break + } + _ = o0.Args[1] + o1 := o0.Args[0] + if o1.Op != OpARM64ORshiftLL { + break + } + if o1.AuxInt != 40 { + break + } + _ = o1.Args[1] + o2 := o1.Args[0] + if o2.Op != OpARM64ORshiftLL { + break + } + if o2.AuxInt != 32 { + break + } + _ = o2.Args[1] + y0 := o2.Args[0] + if y0.Op != OpARM64REVW { + break + } + x0 := y0.Args[0] + if x0.Op != OpARM64MOVWUload { + break + } + if x0.AuxInt != 4 { + break + } + s := x0.Aux + _ = x0.Args[1] + p := x0.Args[0] + mem := x0.Args[1] + y1 := o2.Args[1] + if y1.Op != OpARM64MOVDnop { + break + } + x1 := y1.Args[0] + if x1.Op != OpARM64MOVBUload { + break + } + if x1.AuxInt != 3 { + break + } + if x1.Aux != s { + break + } + _ = x1.Args[1] + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { + break + } + y2 := o1.Args[1] + if y2.Op != OpARM64MOVDnop { + break + } + x2 := y2.Args[0] + if x2.Op != OpARM64MOVBUload { + break + } + if x2.AuxInt != 2 { + break + } + if x2.Aux != s { + break + } + _ = x2.Args[1] + if p != x2.Args[0] { + break + } + if mem != x2.Args[1] { + break + } + y3 := o0.Args[1] + if y3.Op != OpARM64MOVDnop { + break + } + x3 := y3.Args[0] + if x3.Op != OpARM64MOVBUload { + break + } + if x3.AuxInt != 1 { + break + } + if x3.Aux != s { + break + } + _ = x3.Args[1] + p1 := x3.Args[0] + if p1.Op != OpARM64ADD { + break + } + _ = p1.Args[1] + ptr1 := p1.Args[0] + idx1 := p1.Args[1] + if mem != x3.Args[1] { + break + } + y4 := v.Args[1] + if y4.Op != OpARM64MOVDnop { + break + } + x4 := y4.Args[0] + if x4.Op != OpARM64MOVBUloadidx { + break + } + _ = x4.Args[2] + ptr0 := x4.Args[0] + idx0 := x4.Args[1] + if mem != x4.Args[2] { + break + } + if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) { + break + } + b = mergePoint(b, x0, x1, x2, x3, x4) + v0 := b.NewValue0(v.Pos, OpARM64REV, t) + v.reset(OpCopy) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t) + v1.AddArg(ptr0) + v1.AddArg(idx0) + v1.AddArg(mem) + v0.AddArg(v1) + return true + } return false } func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool { diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 17323bd2ab..631c8e6879 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -22,7 +22,7 @@ var sink16 uint16 func load_le64(b []byte) { // amd64:`MOVQ\s\(.*\),` // s390x:`MOVDBR\s\(.*\),` - // arm64:`MOVD\s\(R[0-9]+\),` + // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]` // ppc64le:`MOVD\s`,-`MOV[BHW]Z` sink64 = binary.LittleEndian.Uint64(b) } @@ -30,7 +30,7 @@ func load_le64(b []byte) { func load_le64_idx(b []byte, idx int) { // amd64:`MOVQ\s\(.*\)\(.*\*1\),` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),` - // arm64:`MOVD\s\(R[0-9]+\),` + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` sink64 = binary.LittleEndian.Uint64(b[idx:]) } @@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) { func load_le32(b []byte) { // amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),` // s390x:`MOVWBR\s\(.*\),` - // arm64:`MOVWU\s\(R[0-9]+\),` + // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]` // ppc64le:`MOVWZ\s` sink32 = binary.LittleEndian.Uint32(b) } @@ -46,7 +46,7 @@ func load_le32(b []byte) { func load_le32_idx(b []byte, idx int) { // amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),` - // arm64:`MOVWU\s\(R[0-9]+\),` + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]` // ppc64le:`MOVWZ\s` sink32 = binary.LittleEndian.Uint32(b[idx:]) } @@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) { func load_le16(b []byte) { // amd64:`MOVWLZX\s\(.*\),` // ppc64le:`MOVHZ\s` + // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB` sink16 = binary.LittleEndian.Uint16(b) } func load_le16_idx(b []byte, idx int) { // amd64:`MOVWLZX\s\(.*\),` // ppc64le:`MOVHZ\s` + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` sink16 = binary.LittleEndian.Uint16(b[idx:]) } func load_be64(b []byte) { // amd64:`BSWAPQ` // s390x:`MOVD\s\(.*\),` - // arm64:`REV` + // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W` sink64 = binary.BigEndian.Uint64(b) } func load_be64_idx(b []byte, idx int) { // amd64:`BSWAPQ` // s390x:`MOVD\s\(.*\)\(.*\*1\),` - // arm64:`REV` + // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W` sink64 = binary.BigEndian.Uint64(b[idx:]) } func load_be32(b []byte) { // amd64:`BSWAPL` // s390x:`MOVWZ\s\(.*\),` - // arm64:`REVW` + // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W` sink32 = binary.BigEndian.Uint32(b) } func load_be32_idx(b []byte, idx int) { // amd64:`BSWAPL` // s390x:`MOVWZ\s\(.*\)\(.*\*1\),` - // arm64:`REVW` + // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W` sink32 = binary.BigEndian.Uint32(b[idx:]) } func load_be16(b []byte) { // amd64:`ROLW\s\$8` + // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB` sink16 = binary.BigEndian.Uint16(b) } func load_be16_idx(b []byte, idx int) { // amd64:`ROLW\s\$8` + // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` sink16 = binary.BigEndian.Uint16(b[idx:]) } @@ -162,7 +166,7 @@ func store_le64(b []byte) { func store_le64_idx(b []byte, idx int) { // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` - // arm64:`MOVD`,-`MOV[WBH]` + // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]` // ppc64le:`MOVD\s`,-`MOV[BHW]\s` binary.LittleEndian.PutUint64(b[idx:], sink64) } @@ -176,7 +180,7 @@ func store_le32(b []byte) { func store_le32_idx(b []byte, idx int) { // amd64:`MOVL\s` - // arm64:`MOVW`,-`MOV[BH]` + // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]` // ppc64le:`MOVW\s` binary.LittleEndian.PutUint32(b[idx:], sink32) } @@ -190,32 +194,32 @@ func store_le16(b []byte) { func store_le16_idx(b []byte, idx int) { // amd64:`MOVW\s` - // arm64:`MOVH`,-`MOVB` + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` // ppc64le(DISABLED):`MOVH\s` binary.LittleEndian.PutUint16(b[idx:], sink16) } func store_be64(b []byte) { // amd64:`BSWAPQ`,-`SHR.` - // arm64:`MOVD`,`REV`,-`MOV[WBH]` + // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W` binary.BigEndian.PutUint64(b, sink64) } func store_be64_idx(b []byte, idx int) { // amd64:`BSWAPQ`,-`SHR.` - // arm64:`MOVD`,`REV`,-`MOV[WBH]` + // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW` binary.BigEndian.PutUint64(b[idx:], sink64) } func store_be32(b []byte) { // amd64:`BSWAPL`,-`SHR.` - // arm64:`MOVW`,`REVW`,-`MOV[BH]` + // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W` binary.BigEndian.PutUint32(b, sink32) } func store_be32_idx(b []byte, idx int) { // amd64:`BSWAPL`,-`SHR.` - // arm64:`MOVW`,`REVW`,-`MOV[BH]` + // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W` binary.BigEndian.PutUint32(b[idx:], sink32) } @@ -227,7 +231,7 @@ func store_be16(b []byte) { func store_be16_idx(b []byte, idx int) { // amd64:`ROLW\s\$8`,-`SHR.` - // arm64:`MOVH`,`REV16W`,-`MOVB` + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB` binary.BigEndian.PutUint16(b[idx:], sink16) }