diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index fdfd7858fb..747bd020f1 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -3,70 +3,68 @@ // license that can be found in the LICENSE file. (Add(Ptr|64|32|16|8) ...) => (ADD ...) -(Add(32F|64F) ...) => (FADD(S|D) ...) +(Add(32|64)F ...) => (FADD(S|D) ...) (Sub(Ptr|64|32|16|8) ...) => (SUB ...) -(Sub(32F|64F) ...) => (FSUB(S|D) ...) +(Sub(32|64)F ...) => (FSUB(S|D) ...) (Mul64 ...) => (MUL ...) (Mul(32|16|8) ...) => (MULW ...) -(Mul(32F|64F) ...) => (FMUL(S|D) ...) +(Mul(32|64)F ...) => (FMUL(S|D) ...) -(Hmul64 ...) => (MULH ...) +(Hmul64 ...) => (MULH ...) (Hmul64u ...) => (UMULH ...) -(Hmul32 x y) => (SRAconst (MULL x y) [32]) +(Hmul32 x y) => (SRAconst (MULL x y) [32]) (Hmul32u x y) => (SRAconst (UMULL x y) [32]) (Select0 (Mul64uhilo x y)) => (UMULH x y) (Select1 (Mul64uhilo x y)) => (MUL x y) -(Div64 [false] x y) => (DIV x y) -(Div64u ...) => (UDIV ...) +(Div64 [false] x y) => (DIV x y) (Div32 [false] x y) => (DIVW x y) -(Div32u ...) => (UDIVW ...) (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) -(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) +(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Div64u ...) => (UDIV ...) +(Div32u ...) => (UDIVW ...) (Div32F ...) => (FDIVS ...) (Div64F ...) => (FDIVD ...) (Mod64 x y) => (MOD x y) -(Mod64u ...) => (UMOD ...) (Mod32 x y) => (MODW x y) +(Mod64u ...) => (UMOD ...) (Mod32u ...) => (UMODW ...) -(Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y)) -(Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y)) -(Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Mod(16|8) x y) => (MODW (SignExt(16|8)to32 x) (SignExt(16|8)to32 y)) +(Mod(16|8)u x y) => (UMODW (ZeroExt(16|8)to32 x) (ZeroExt(16|8)to32 y)) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u x y) => (ADD (SRLconst (SUB x y) [1]) y) (And(64|32|16|8) ...) => (AND ...) -(Or(64|32|16|8) ...) => (OR ...) +(Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) // unary ops (Neg(64|32|16|8) ...) => (NEG ...) -(Neg(32F|64F) ...) => (FNEG(S|D) ...) +(Neg(32|64)F ...) => (FNEG(S|D) ...) (Com(64|32|16|8) ...) => (MVN ...) // math package intrinsics -(Abs ...) => (FABSD ...) -(Sqrt ...) => (FSQRTD ...) -(Ceil ...) => (FRINTPD ...) -(Floor ...) => (FRINTMD ...) -(Round ...) => (FRINTAD ...) +(Abs ...) => (FABSD ...) +(Sqrt ...) => (FSQRTD ...) +(Ceil ...) => (FRINTPD ...) +(Floor ...) => (FRINTMD ...) +(Round ...) => (FRINTAD ...) (RoundToEven ...) => (FRINTND ...) -(Trunc ...) => (FRINTZD ...) -(FMA x y z) => (FMADDD z x y) +(Trunc ...) => (FRINTZD ...) +(FMA x y z) => (FMADDD z x y) (Sqrt32 ...) => (FSQRTS ...) // lowering rotates // we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first. -(RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) -(RotateLeft8 x y) => (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) +(RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) +(RotateLeft8 x y) => (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) (RotateLeft16 x (MOVDconst [c])) => (Or16 (Lsh16x64 x (MOVDconst [c&15])) (Rsh16Ux64 x (MOVDconst [-c&15]))) (RotateLeft16 x y) => (RORW (ORshiftLL (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG y)) (RotateLeft32 x y) => (RORW x (NEG y)) @@ -74,10 +72,10 @@ (Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...) -(Ctz64 x) => (CLZ (RBIT x)) +(Ctz64 x) => (CLZ (RBIT x)) (Ctz32 x) => (CLZW (RBITW x)) (Ctz16 x) => (CLZW (RBITW (ORconst [0x10000] x))) -(Ctz8 x) => (CLZW (RBITW (ORconst [0x100] x))) +(Ctz8 x) => (CLZW (RBITW (ORconst [0x100] x))) (PopCount64 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp x)))) (PopCount32 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt32to64 x))))) @@ -88,17 +86,17 @@ (FMOVDfpgp (Arg [off] {sym})) => @b.Func.Entry (Arg [off] {sym}) // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. -(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) +(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem) -(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) +(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. -(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) -(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) +(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) +(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val) -(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) +(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) @@ -108,14 +106,14 @@ (BitRev64 ...) => (RBIT ...) (BitRev32 ...) => (RBITW ...) -(BitRev16 x) => (SRLconst [48] (RBIT x)) -(BitRev8 x) => (SRLconst [56] (RBIT x)) +(BitRev16 x) => (SRLconst [48] (RBIT x)) +(BitRev8 x) => (SRLconst [56] (RBIT x)) // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant. // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass. -(UMOD x y) => (MSUB x y (UDIV x y)) +(UMOD x y) => (MSUB x y (UDIV x y)) (UMODW x y) => (MSUBW x y (UDIVW x y)) // 64-bit addition with carry. @@ -128,10 +126,10 @@ // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) -(OrB ...) => (OR ...) -(EqB x y) => (XOR (MOVDconst [1]) (XOR x y)) +(OrB ...) => (OR ...) +(EqB x y) => (XOR (MOVDconst [1]) (XOR x y)) (NeqB ...) => (XOR ...) -(Not x) => (XOR (MOVDconst [1]) x) +(Not x) => (XOR (MOVDconst [1]) x) // shifts // hardware instruction uses only the low 6 bits of the shift @@ -145,7 +143,7 @@ (Lsh(64|32|16|8)x64 x y) && shiftIsBounded(v) => (SLL x y) (Lsh(64|32|16|8)x32 x y) && shiftIsBounded(v) => (SLL x y) (Lsh(64|32|16|8)x16 x y) && shiftIsBounded(v) => (SLL x y) -(Lsh(64|32|16|8)x8 x y) && shiftIsBounded(v) => (SLL x y) +(Lsh(64|32|16|8)x8 x y) && shiftIsBounded(v) => (SLL x y) // signed right shift (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y) @@ -160,69 +158,45 @@ (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL (ZeroExt8to64 x) y) // shift value may be out of range, use CMP + CSEL instead -(Lsh64x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh64x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh64x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh64x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh64x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh64x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh32x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh32x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh32x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh32x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh32x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh32x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh16x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh16x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh16x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh16x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh16x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh16x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh8x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh8x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh8x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh8x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh8x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh8x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh64Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] y)) -(Rsh64Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh64Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh64Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh64Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] y)) +(Rsh64Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh32Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh32Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh32Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh32Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh32Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh32Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh16Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh16Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh16Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh16Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh16Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh16Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh8Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh8Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh8Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh8Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh8Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh8Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh64x32 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh64x16 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh64x8 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh64x(32|16|8) x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh32x8 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh32x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh16x32 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh16x16 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh16x8 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh16x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh8x32 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh8x16 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh8x8 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh8x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) // constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) -(Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)]) +(Const(32|64)F [val]) => (FMOV(S|D)const [float64(val)]) (ConstNil) => (MOVDconst [0]) (ConstBool [t]) => (MOVDconst [b2i(t)]) @@ -230,41 +204,41 @@ // truncations // Because we ignore high parts of registers, truncates are just copies. -(Trunc16to8 ...) => (Copy ...) -(Trunc32to8 ...) => (Copy ...) +(Trunc16to8 ...) => (Copy ...) +(Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) -(Trunc64to8 ...) => (Copy ...) +(Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions -(ZeroExt8to16 ...) => (MOVBUreg ...) -(ZeroExt8to32 ...) => (MOVBUreg ...) +(ZeroExt8to16 ...) => (MOVBUreg ...) +(ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) -(ZeroExt8to64 ...) => (MOVBUreg ...) +(ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) -(SignExt8to16 ...) => (MOVBreg ...) -(SignExt8to32 ...) => (MOVBreg ...) +(SignExt8to16 ...) => (MOVBreg ...) +(SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) -(SignExt8to64 ...) => (MOVBreg ...) +(SignExt8to64 ...) => (MOVBreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) // float <=> int conversion -(Cvt32to32F ...) => (SCVTFWS ...) -(Cvt32to64F ...) => (SCVTFWD ...) -(Cvt64to32F ...) => (SCVTFS ...) -(Cvt64to64F ...) => (SCVTFD ...) +(Cvt32to32F ...) => (SCVTFWS ...) +(Cvt32to64F ...) => (SCVTFWD ...) +(Cvt64to32F ...) => (SCVTFS ...) +(Cvt64to64F ...) => (SCVTFD ...) (Cvt32Uto32F ...) => (UCVTFWS ...) (Cvt32Uto64F ...) => (UCVTFWD ...) (Cvt64Uto32F ...) => (UCVTFS ...) (Cvt64Uto64F ...) => (UCVTFD ...) -(Cvt32Fto32 ...) => (FCVTZSSW ...) -(Cvt64Fto32 ...) => (FCVTZSDW ...) -(Cvt32Fto64 ...) => (FCVTZSS ...) -(Cvt64Fto64 ...) => (FCVTZSD ...) +(Cvt32Fto32 ...) => (FCVTZSSW ...) +(Cvt64Fto32 ...) => (FCVTZSDW ...) +(Cvt32Fto64 ...) => (FCVTZSS ...) +(Cvt64Fto64 ...) => (FCVTZSD ...) (Cvt32Fto32U ...) => (FCVTZUSW ...) (Cvt64Fto32U ...) => (FCVTZUDW ...) (Cvt32Fto64U ...) => (FCVTZUS ...) @@ -278,26 +252,24 @@ (Round64F ...) => (LoweredRound64F ...) // comparisons -(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Eq32 x y) => (Equal (CMPW x y)) -(Eq64 x y) => (Equal (CMP x y)) -(EqPtr x y) => (Equal (CMP x y)) +(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Eq32 x y) => (Equal (CMPW x y)) +(Eq64 x y) => (Equal (CMP x y)) +(EqPtr x y) => (Equal (CMP x y)) (Eq32F x y) => (Equal (FCMPS x y)) (Eq64F x y) => (Equal (FCMPD x y)) -(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Neq32 x y) => (NotEqual (CMPW x y)) -(Neq64 x y) => (NotEqual (CMP x y)) -(NeqPtr x y) => (NotEqual (CMP x y)) -(Neq32F x y) => (NotEqual (FCMPS x y)) -(Neq64F x y) => (NotEqual (FCMPD x y)) +(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Neq32 x y) => (NotEqual (CMPW x y)) +(Neq64 x y) => (NotEqual (CMP x y)) +(NeqPtr x y) => (NotEqual (CMP x y)) +(Neq(32|64)F x y) => (NotEqual (FCMP(S|D) x y)) -(Less8 x y) => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y))) -(Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y))) +(Less(8|16) x y) => (LessThan (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Less32 x y) => (LessThan (CMPW x y)) -(Less64 x y) => (LessThan (CMP x y)) +(Less64 x y) => (LessThan (CMP x y)) // Set condition flags for floating-point comparisons "x < y" // and "x <= y". Because if either or both of the operands are @@ -313,16 +285,16 @@ // x < 1 => x == 0 // 1 <= x => x != 0 (Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x) -(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero) -(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0])) -(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x) +(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero) +(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0])) +(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x) -(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Less32U x y) => (LessThanU (CMPW x y)) (Less64U x y) => (LessThanU (CMP x y)) -(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) +(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) (Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y))) (Leq32 x y) => (LessEqual (CMPW x y)) (Leq64 x y) => (LessEqual (CMP x y)) @@ -331,7 +303,7 @@ (Leq32F x y) => (LessEqualF (FCMPS x y)) (Leq64F x y) => (LessEqualF (FCMPD x y)) -(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (LessEqualU (CMPW x y)) (Leq64U x y) => (LessEqualU (CMP x y)) @@ -374,7 +346,7 @@ (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) // zeroing -(Zero [0] _ mem) => mem +(Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem) (Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem) (Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem) @@ -456,7 +428,7 @@ mem) // moves -(Move [0] _ _ mem) => mem +(Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem) (Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem) (Move [3] dst src mem) => @@ -544,36 +516,36 @@ mem) // calls -(StaticCall ...) => (CALLstatic ...) +(StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) -(InterCall ...) => (CALLinter ...) -(TailCall ...) => (CALLtail ...) +(InterCall ...) => (CALLinter ...) +(TailCall ...) => (CALLtail ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr)) -(IsInBounds idx len) => (LessThanU (CMP idx len)) +(IsInBounds idx len) => (LessThanU (CMP idx len)) (IsSliceInBounds idx len) => (LessEqualU (CMP idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) -(GetCallerSP ...) => (LoweredGetCallerSP ...) -(GetCallerPC ...) => (LoweredGetCallerPC ...) +(GetCallerSP ...) => (LoweredGetCallerSP ...) +(GetCallerPC ...) => (LoweredGetCallerPC ...) // Absorb pseudo-ops into blocks. -(If (Equal cc) yes no) => (EQ cc yes no) -(If (NotEqual cc) yes no) => (NE cc yes no) -(If (LessThan cc) yes no) => (LT cc yes no) -(If (LessThanU cc) yes no) => (ULT cc yes no) -(If (LessEqual cc) yes no) => (LE cc yes no) -(If (LessEqualU cc) yes no) => (ULE cc yes no) -(If (GreaterThan cc) yes no) => (GT cc yes no) -(If (GreaterThanU cc) yes no) => (UGT cc yes no) -(If (GreaterEqual cc) yes no) => (GE cc yes no) +(If (Equal cc) yes no) => (EQ cc yes no) +(If (NotEqual cc) yes no) => (NE cc yes no) +(If (LessThan cc) yes no) => (LT cc yes no) +(If (LessThanU cc) yes no) => (ULT cc yes no) +(If (LessEqual cc) yes no) => (LE cc yes no) +(If (LessEqualU cc) yes no) => (ULE cc yes no) +(If (GreaterThan cc) yes no) => (GT cc yes no) +(If (GreaterThanU cc) yes no) => (UGT cc yes no) +(If (GreaterEqual cc) yes no) => (GE cc yes no) (If (GreaterEqualU cc) yes no) => (UGE cc yes no) -(If (LessThanF cc) yes no) => (FLT cc yes no) -(If (LessEqualF cc) yes no) => (FLE cc yes no) -(If (GreaterThanF cc) yes no) => (FGT cc yes no) +(If (LessThanF cc) yes no) => (FLT cc yes no) +(If (LessEqualF cc) yes no) => (FLE cc yes no) +(If (GreaterThanF cc) yes no) => (FGT cc yes no) (If (GreaterEqualF cc) yes no) => (FGE cc yes no) (If cond yes no) => (TBNZ [0] cond yes no) @@ -593,23 +565,18 @@ (AtomicStorePtrNoWB ...) => (STLR ...) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) -(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) -(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) +(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) +(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) (AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) (AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...) (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...) // Currently the updated value is not used, but we need a register to temporarily hold it. -(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) -(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem)) -(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) -(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem)) - -(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem)) -(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem)) -(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem)) -(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem)) +(AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem)) +(AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem)) +(AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem)) +(AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem)) // Write barrier. (WB ...) => (LoweredWB ...) @@ -624,214 +591,101 @@ // Optimizations // Absorb boolean tests into block -(NZ (Equal cc) yes no) => (EQ cc yes no) -(NZ (NotEqual cc) yes no) => (NE cc yes no) -(NZ (LessThan cc) yes no) => (LT cc yes no) -(NZ (LessThanU cc) yes no) => (ULT cc yes no) -(NZ (LessEqual cc) yes no) => (LE cc yes no) -(NZ (LessEqualU cc) yes no) => (ULE cc yes no) -(NZ (GreaterThan cc) yes no) => (GT cc yes no) -(NZ (GreaterThanU cc) yes no) => (UGT cc yes no) -(NZ (GreaterEqual cc) yes no) => (GE cc yes no) +(NZ (Equal cc) yes no) => (EQ cc yes no) +(NZ (NotEqual cc) yes no) => (NE cc yes no) +(NZ (LessThan cc) yes no) => (LT cc yes no) +(NZ (LessThanU cc) yes no) => (ULT cc yes no) +(NZ (LessEqual cc) yes no) => (LE cc yes no) +(NZ (LessEqualU cc) yes no) => (ULE cc yes no) +(NZ (GreaterThan cc) yes no) => (GT cc yes no) +(NZ (GreaterThanU cc) yes no) => (UGT cc yes no) +(NZ (GreaterEqual cc) yes no) => (GE cc yes no) (NZ (GreaterEqualU cc) yes no) => (UGE cc yes no) -(NZ (LessThanF cc) yes no) => (FLT cc yes no) -(NZ (LessEqualF cc) yes no) => (FLE cc yes no) -(NZ (GreaterThanF cc) yes no) => (FGT cc yes no) +(NZ (LessThanF cc) yes no) => (FLT cc yes no) +(NZ (LessEqualF cc) yes no) => (FLE cc yes no) +(NZ (GreaterThanF cc) yes no) => (FGT cc yes no) (NZ (GreaterEqualF cc) yes no) => (FGE cc yes no) -(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no) -(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no) -(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no) -(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no) -(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no) -(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no) -(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no) -(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no) -(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no) +(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no) +(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no) +(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no) +(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no) +(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no) +(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no) +(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no) +(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no) +(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no) (TBNZ [0] (GreaterEqualU cc) yes no) => (UGE cc yes no) -(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no) -(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no) -(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) +(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no) +(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no) +(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no) -(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no) -(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no) -(LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no) -(LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TST x y) yes no) -(GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no) -(GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no) - -(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no) -(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no) -(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no) -(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no) -(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no) -(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no) - -(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no) -(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no) -(LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no) -(LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TSTW x y) yes no) -(GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no) -(GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no) - -(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no) -(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no) -(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no) -(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no) -(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no) -(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst [c] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTWconst [int32(c)] y) yes no) // For conditional instructions such as CSET, CSEL. -(Equal (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TST x y)) -(NotEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TST x y)) -(LessThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TST x y)) -(LessEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TST x y)) -(GreaterThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TST x y)) -(GreaterEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TST x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] z:(AND x y))) && z.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TST x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTWconst [int32(c)] y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTW x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTconst [c] y)) -(Equal (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTWconst [int32(c)] y)) -(NotEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTWconst [int32(c)] y)) -(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTWconst [int32(c)] y)) -(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTWconst [int32(c)] y)) -(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTWconst [int32(c)] y)) -(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTWconst [int32(c)] y)) - -(Equal (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TSTW x y)) -(NotEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TSTW x y)) -(LessThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TSTW x y)) -(LessEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TSTW x y)) -(GreaterThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TSTW x y)) -(GreaterEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TSTW x y)) - -(Equal (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTconst [c] y)) -(NotEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTconst [c] y)) -(LessThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y)) -(LessEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y)) -(GreaterThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y)) -(GreaterEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y)) - -(EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no) -(NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no) -(LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no) -(LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no) -(GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no) -(GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no) - -(EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no) -(NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no) -(LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no) -(LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no) -(GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no) -(GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no) - -(EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no) -(NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no) -(LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no) -(LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no) -(GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no) -(GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no) - -(EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no) -(NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no) -(LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no) -(LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no) -(GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no) -(GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNconst [c] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNWconst [int32(c)] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW x y) yes no) // CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63 -(EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no) -(NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no) - -(Equal (CMP x z:(NEG y))) && z.Uses == 1 => (Equal (CMN x y)) -(NotEqual (CMP x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMN x y)) +((EQ|NE) (CMP x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMN x y) yes no) +((Equal|NotEqual) (CMP x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y)) // CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31 -(EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no) -(NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no) - -(Equal (CMPW x z:(NEG y))) && z.Uses == 1 => (Equal (CMNW x y)) -(NotEqual (CMPW x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMNW x y)) +((EQ|NE) (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMNW x y) yes no) +((Equal|NotEqual) (CMPW x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y)) // For conditional instructions such as CSET, CSEL. // TODO: add support for LT, LE, GT, GE, overflow needs to be considered. -(Equal (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNconst [c] y)) -(NotEqual (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNconst [c] y)) +((Equal|NotEqual) (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNconst [c] y)) +((Equal|NotEqual) (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNWconst [int32(c)] y)) +((Equal|NotEqual) (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y)) +((Equal|NotEqual) (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y)) +((Equal|NotEqual) (CMPconst [0] z:(MADD a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN a (MUL x y))) +((Equal|NotEqual) (CMPconst [0] z:(MSUB a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMP a (MUL x y))) +((Equal|NotEqual) (CMPWconst [0] z:(MADDW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW a (MULW x y))) +((Equal|NotEqual) (CMPWconst [0] z:(MSUBW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMPW a (MULW x y))) -(Equal (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNWconst [int32(c)] y)) -(NotEqual (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNWconst [int32(c)] y)) +((CMPconst|CMNconst) [c] y) && c < 0 && c != -1<<63 => ((CMNconst|CMPconst) [-c] y) +((CMPWconst|CMNWconst) [c] y) && c < 0 && c != -1<<31 => ((CMNWconst|CMPWconst) [-c] y) -(Equal (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMN x y)) -(NotEqual (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMN x y)) +((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no) +((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no) -(Equal (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMNW x y)) -(NotEqual (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMNW x y)) - -(Equal (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (Equal (CMN a (MUL x y))) -(NotEqual (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (NotEqual (CMN a (MUL x y))) - -(Equal (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (Equal (CMP a (MUL x y))) -(NotEqual (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (NotEqual (CMP a (MUL x y))) - -(Equal (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (Equal (CMNW a (MULW x y))) -(NotEqual (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (NotEqual (CMNW a (MULW x y))) - -(Equal (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (Equal (CMPW a (MULW x y))) -(NotEqual (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (NotEqual (CMPW a (MULW x y))) - -(CMPconst [c] y) && c < 0 && c != -1<<63 => (CMNconst [-c] y) -(CMPWconst [c] y) && c < 0 && c != -1<<31 => (CMNWconst [-c] y) -(CMNconst [c] y) && c < 0 && c != -1<<63 => (CMPconst [-c] y) -(CMNWconst [c] y) && c < 0 && c != -1<<31 => (CMPWconst [-c] y) - -(EQ (CMPconst [0] x) yes no) => (Z x yes no) -(NE (CMPconst [0] x) yes no) => (NZ x yes no) -(EQ (CMPWconst [0] x) yes no) => (ZW x yes no) -(NE (CMPWconst [0] x) yes no) => (NZW x yes no) - -(EQ (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL x y)) yes no) -(NE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL x y)) yes no) -(LT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL x y)) yes no) -(LE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL x y)) yes no) -(GT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL x y)) yes no) -(GE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL x y)) yes no) - -(EQ (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL x y)) yes no) -(NE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL x y)) yes no) -(LE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL x y)) yes no) -(LT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL x y)) yes no) -(GE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL x y)) yes no) -(GT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL x y)) yes no) - -(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW x y)) yes no) -(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW x y)) yes no) -(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW x y)) yes no) -(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW x y)) yes no) -(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW x y)) yes no) -(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW x y)) yes no) - -(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW x y)) yes no) -(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW x y)) yes no) -(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW x y)) yes no) -(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW x y)) yes no) -(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW x y)) yes no) -(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMPW a (MULW x y)) yes no) // Absorb bit-tests into block -(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) -(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) -(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) -(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) -(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) -(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) -(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) -(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) +(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) +(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) +(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) +(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) +(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) +(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) +(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) +(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) // Test sign-bit for signed comparisons against zero (GE (CMPWconst [0] x) yes no) => (TBZ [31] x yes no) -(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no) -(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no) -(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no) +(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no) +(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no) +(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no) // fold offset into address (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => @@ -882,6 +736,7 @@ (MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem) (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem) (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem) + (MOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem) @@ -972,20 +827,20 @@ (MOVQstorezero [off1+int32(off2)] {sym} ptr mem) // register indexed store -(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) -(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem) -(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem) -(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem) +(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) +(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem) +(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem) +(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem) (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem) (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem) -(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem) -(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem) -(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem) -(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem) -(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem) -(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem) -(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem) -(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem) +(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem) +(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem) +(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem) +(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem) +(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem) +(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem) +(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem) +(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem) (FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem) (FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem) (FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem) @@ -995,14 +850,14 @@ (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem) (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem) (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem) -(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem) -(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem) -(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem) -(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem) -(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem) +(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem) +(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem) +(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem) +(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem) (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem) (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem) @@ -1138,11 +993,11 @@ (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem) -(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem) +(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) -(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) +(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem) @@ -1152,24 +1007,24 @@ // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code -//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) -//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x) -//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x) -//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x) -//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x) -//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x) -//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x -//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y +//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) +//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x) +//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x) +//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x) +//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x) +//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x) +//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x +//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y -(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) @@ -1186,97 +1041,97 @@ (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) -(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) -(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) -(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) // don't extend after proper load -(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) -(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) -(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) +(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x) // fold double extensions -(MOVBreg x:(MOVBreg _)) => (MOVDreg x) +(MOVBreg x:(MOVBreg _)) => (MOVDreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVHreg x:(MOVBreg _)) => (MOVDreg x) -(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVHreg x:(MOVHreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHreg x:(MOVHreg _)) => (MOVDreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) -(MOVWreg x:(MOVBreg _)) => (MOVDreg x) -(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVWreg x:(MOVHreg _)) => (MOVDreg x) -(MOVWreg x:(MOVWreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVWreg _)) => (MOVDreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) // don't extend before store -(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) -(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) -(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) -(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem) -(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem) -(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem) +(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem) +(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) -(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem) +(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) -(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem) +(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem) (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem) // if a register move has only 1 use, just use the same register without emitting instruction @@ -1288,18 +1143,18 @@ (MOVDnop (MOVDconst [c])) => (MOVDconst [c]) // fold constant into arithmetic ops -(ADD x (MOVDconst [c])) && !t.IsPtr() => (ADDconst [c] x) -(SUB x (MOVDconst [c])) => (SUBconst [c] x) -(AND x (MOVDconst [c])) => (ANDconst [c] x) -(OR x (MOVDconst [c])) => (ORconst [c] x) -(XOR x (MOVDconst [c])) => (XORconst [c] x) -(TST x (MOVDconst [c])) => (TSTconst [c] x) +(ADD x (MOVDconst [c])) && !t.IsPtr() => (ADDconst [c] x) +(SUB x (MOVDconst [c])) => (SUBconst [c] x) +(AND x (MOVDconst [c])) => (ANDconst [c] x) +(OR x (MOVDconst [c])) => (ORconst [c] x) +(XOR x (MOVDconst [c])) => (XORconst [c] x) +(TST x (MOVDconst [c])) => (TSTconst [c] x) (TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x) -(CMN x (MOVDconst [c])) => (CMNconst [c] x) +(CMN x (MOVDconst [c])) => (CMNconst [c] x) (CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x) -(BIC x (MOVDconst [c])) => (ANDconst [^c] x) -(EON x (MOVDconst [c])) => (XORconst [^c] x) -(ORN x (MOVDconst [c])) => (ORconst [^c] x) +(BIC x (MOVDconst [c])) => (ANDconst [^c] x) +(EON x (MOVDconst [c])) => (XORconst [^c] x) +(ORN x (MOVDconst [c])) => (ORconst [^c] x) (SLL x (MOVDconst [c])) => (SLLconst x [c&63]) (SRL x (MOVDconst [c])) => (SRLconst x [c&63]) @@ -1308,12 +1163,12 @@ (SRL x (ANDconst [63] y)) => (SRL x y) (SRA x (ANDconst [63] y)) => (SRA x y) -(CMP x (MOVDconst [c])) => (CMPconst [c] x) -(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) +(CMP x (MOVDconst [c])) => (CMPconst [c] x) +(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) (CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x) (CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x)) -(ROR x (MOVDconst [c])) => (RORconst x [c&63]) +(ROR x (MOVDconst [c])) => (RORconst x [c&63]) (RORW x (MOVDconst [c])) => (RORWconst x [c&31]) (ADDSflags x (MOVDconst [c])) => (ADDSconstflags [c] x) @@ -1324,10 +1179,10 @@ ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x)) // mul-neg => mneg -(NEG (MUL x y)) => (MNEG x y) -(NEG (MULW x y)) => (MNEGW x y) -(MUL (NEG x) y) => (MNEG x y) -(MULW (NEG x) y) => (MNEGW x y) +(NEG (MUL x y)) => (MNEG x y) +(NEG (MULW x y)) => (MNEGW x y) +(MUL (NEG x) y) => (MNEG x y) +(MULW (NEG x) y) => (MNEGW x y) // madd/msub (ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) => (MADD a x y) @@ -1483,12 +1338,12 @@ (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)]) // div by constant -(UDIV x (MOVDconst [1])) => x -(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x) +(UDIV x (MOVDconst [1])) => x +(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x) (UDIVW x (MOVDconst [c])) && uint32(c)==1 => x (UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x) -(UMOD _ (MOVDconst [1])) => (MOVDconst [0]) -(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) +(UMOD _ (MOVDconst [1])) => (MOVDconst [0]) +(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0]) (UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x) @@ -1592,12 +1447,12 @@ (CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)]) // other known comparisons -(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)]) -(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1< (FlagConstant [subFlags64(0,1)]) -(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)]) +(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1< (FlagConstant [subFlags64(0,1)]) +(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) (CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) // absorb flag constants into branches @@ -1643,26 +1498,26 @@ (GEnoov (FlagConstant [fc]) yes no) && fc.geNoov() => (First yes no) (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes) -(Z (MOVDconst [0]) yes no) => (First yes no) -(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes) -(NZ (MOVDconst [0]) yes no) => (First no yes) -(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no) -(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no) -(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes) +(Z (MOVDconst [0]) yes no) => (First yes no) +(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes) +(NZ (MOVDconst [0]) yes no) => (First no yes) +(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no) +(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no) +(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no) // absorb InvertFlags into branches -(LT (InvertFlags cmp) yes no) => (GT cmp yes no) -(GT (InvertFlags cmp) yes no) => (LT cmp yes no) -(LE (InvertFlags cmp) yes no) => (GE cmp yes no) -(GE (InvertFlags cmp) yes no) => (LE cmp yes no) +(LT (InvertFlags cmp) yes no) => (GT cmp yes no) +(GT (InvertFlags cmp) yes no) => (LT cmp yes no) +(LE (InvertFlags cmp) yes no) => (GE cmp yes no) +(GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) -(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) -(NE (InvertFlags cmp) yes no) => (NE cmp yes no) +(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) +(NE (InvertFlags cmp) yes no) => (NE cmp yes no) (FLT (InvertFlags cmp) yes no) => (FGT cmp yes no) (FGT (InvertFlags cmp) yes no) => (FLT cmp yes no) (FLE (InvertFlags cmp) yes no) => (FGE cmp yes no) @@ -1673,39 +1528,39 @@ (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no) // absorb InvertFlags into conditional instructions -(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp) -(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp) -(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp) +(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp) +(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp) +(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp) (CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp) (CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp) (CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp) // absorb flag constants into boolean values -(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())]) -(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())]) -(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())]) -(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())]) -(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())]) -(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())]) -(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())]) -(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())]) -(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())]) +(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())]) +(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())]) +(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())]) +(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())]) +(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())]) +(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())]) +(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())]) +(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())]) +(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())]) (GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())]) // absorb InvertFlags into boolean values -(Equal (InvertFlags x)) => (Equal x) -(NotEqual (InvertFlags x)) => (NotEqual x) -(LessThan (InvertFlags x)) => (GreaterThan x) -(LessThanU (InvertFlags x)) => (GreaterThanU x) -(GreaterThan (InvertFlags x)) => (LessThan x) -(GreaterThanU (InvertFlags x)) => (LessThanU x) -(LessEqual (InvertFlags x)) => (GreaterEqual x) -(LessEqualU (InvertFlags x)) => (GreaterEqualU x) -(GreaterEqual (InvertFlags x)) => (LessEqual x) +(Equal (InvertFlags x)) => (Equal x) +(NotEqual (InvertFlags x)) => (NotEqual x) +(LessThan (InvertFlags x)) => (GreaterThan x) +(LessThanU (InvertFlags x)) => (GreaterThanU x) +(GreaterThan (InvertFlags x)) => (LessThan x) +(GreaterThanU (InvertFlags x)) => (LessThanU x) +(LessEqual (InvertFlags x)) => (GreaterEqual x) +(LessEqualU (InvertFlags x)) => (GreaterEqualU x) +(GreaterEqual (InvertFlags x)) => (LessEqual x) (GreaterEqualU (InvertFlags x)) => (LessEqualU x) -(LessThanF (InvertFlags x)) => (GreaterThanF x) -(LessEqualF (InvertFlags x)) => (GreaterEqualF x) -(GreaterThanF (InvertFlags x)) => (LessThanF x) +(LessThanF (InvertFlags x)) => (GreaterThanF x) +(LessEqualF (InvertFlags x)) => (GreaterEqualF x) +(GreaterThanF (InvertFlags x)) => (LessThanF x) (GreaterEqualF (InvertFlags x)) => (LessEqualF x) // Boolean-generating instructions (NOTE: NOT all boolean Values) always @@ -1713,28 +1568,26 @@ (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x) // omit unsign extension - (MOVWUreg x) && zeroUpper32Bits(x, 3) => x // omit sign extension - (MOVWreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst x [c]) (MOVHreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst x [c]) (MOVBreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffffff80) == 0 => (ANDconst x [c]) // absorb flag constants into conditional instructions -(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x -(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y -(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x -(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) +(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x +(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y +(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x +(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) (CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y) (CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y) (CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y) -(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1]) -(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) +(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1]) +(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) // absorb flags back into boolean CSEL (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil => @@ -1960,11 +1813,11 @@ // int64(x << lc) (MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x) -(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x) +(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x) // int64(x) << lc (SLLconst [lc] (MOVWreg x)) => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHreg x)) => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) -(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) +(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // sbfx // (x << lc) >> rc @@ -1972,7 +1825,7 @@ // int64(x) >> rc (SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x) (SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x) -(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x) +(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x) // merge sbfx and sign-extension into sbfx (MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x) (MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x) @@ -1991,11 +1844,11 @@ // uint64(x) << lc (SLLconst [lc] (MOVWUreg x)) => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHUreg x)) => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) -(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) +(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // uint64(x << lc) (MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x) -(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x) +(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x) // merge ANDconst into ubfiz // (x & ac) << sc @@ -2011,11 +1864,11 @@ // uint64(x) >> rc (SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x) (SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x) -(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x) +(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x) // uint64(x >> rc) (MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x) (MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x) -(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x) +(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x) // merge ANDconst into ubfx // (x >> sc) & ac (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0) @@ -2820,7 +2673,7 @@ && clobber(x) => (MOVDstoreidx ptr1 (SLLconst [2] idx1) w0 mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) @@ -2837,7 +2690,7 @@ && clobber(x0, x1, x2, x3, x4, x5, x6) => (MOVDstore [i-7] {s} ptr (REV w) mem) (MOVBstore [7] {s} p w - x0:(MOVBstore [6] {s} p (SRLconst [8] w) + x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) @@ -2857,9 +2710,9 @@ && clobber(x0, x1, x2, x3, x4, x5, x6) => (MOVDstoreidx ptr0 idx0 (REV w) mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w) - x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem)))) + x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 @@ -2878,7 +2731,7 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstoreidx ptr (ADDconst [3] idx) w - x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 @@ -2887,16 +2740,16 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr idx (REVW w) mem) (MOVBstoreidx ptr idx w - x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w) - x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem)))) + x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) => (MOVWstoreidx ptr idx w mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem)))) && x0.Uses == 1 @@ -2917,7 +2770,7 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem)))) && x0.Uses == 1 @@ -2987,25 +2840,25 @@ => (MOVHstoreidx ptr0 idx0 (REV16W w) mem) // FP simplification -(FNEGS (FMULS x y)) => (FNMULS x y) -(FNEGD (FMULD x y)) => (FNMULD x y) -(FMULS (FNEGS x) y) => (FNMULS x y) -(FMULD (FNEGD x) y) => (FNMULD x y) -(FNEGS (FNMULS x y)) => (FMULS x y) -(FNEGD (FNMULD x y)) => (FMULD x y) -(FNMULS (FNEGS x) y) => (FMULS x y) -(FNMULD (FNEGD x) y) => (FMULD x y) +(FNEGS (FMULS x y)) => (FNMULS x y) +(FNEGD (FMULD x y)) => (FNMULD x y) +(FMULS (FNEGS x) y) => (FNMULS x y) +(FMULD (FNEGD x) y) => (FNMULD x y) +(FNEGS (FNMULS x y)) => (FMULS x y) +(FNEGD (FNMULD x y)) => (FMULD x y) +(FNMULS (FNEGS x) y) => (FMULS x y) +(FNMULD (FNEGD x) y) => (FMULD x y) -(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) -(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) -(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) -(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) -(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) -(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) -(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) -(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) -(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) -(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) +(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) +(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) (FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y) (FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 41d9513160..b8c3c2c318 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -4219,7 +4219,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPconst [0] z:(MADD a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMN a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -4244,7 +4244,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPconst [0] z:(MSUB a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMP a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -4269,7 +4269,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPWconst [0] z:(MADDW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMNW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -4294,7 +4294,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPWconst [0] z:(MSUBW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMPW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -17021,7 +17021,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPconst [0] z:(MADD a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMN a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -17046,7 +17046,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPconst [0] z:(MSUB a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMP a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -17071,7 +17071,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPWconst [0] z:(MADDW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMNW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -17096,7 +17096,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPWconst [0] z:(MSUBW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMPW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {