cmd/compile: add more generic rewrite rules to reassociate (op (op y C) x|C)

With this patch, opt pass can expose more obvious constant-folding opportunites. Example: func test(i int) int {return (i+8)-(i+4)} The previous version: MOVD "".i(FP), R0 ADD $8, R0, R1 ADD $4, R0, R0 SUB R0, R1, R0 MOVD R0, "".~r1+8(FP) RET (R30) The optimized version: MOVD $4, R0 MOVD R0, "".~r1+8(FP) RET (R30) This patch removes some existing reassociation rules, such as "x+(z-C)", because the current generic rewrite rules will canonicalize "x-const" to "x+(-const)", making "x+(z-C)" equal to "x+(z+(-C))". This patch also adds test cases. Change-Id: I857108ba0b5fcc18a879eeab38e2551bc4277797 Reviewed-on: https://go-review.googlesource.com/c/go/+/237137 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2020-05-28 18:11:52 +08:00 · 2020-05-28 18:11:52 +08:00 · d556c251a1
parent 4e4d5df0b0
commit d556c251a1
3 changed files with 537 additions and 348 deletions
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@ -1807,6 +1807,8 @@
 // invariant that pointers must stay within the pointed-to object,
 // we can't pull part of a pointer computation above the AddPtr.
 // See issue 37881.
+// Note: we don't need to handle any (x-C) cases because we already rewrite
+// (x-C) to (x+(-C)).

 // x + (C + z) -> C + (x + z)
 (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Add64 <t> z x))
@ -1820,23 +1822,29 @@
 (Add16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> x z))
 (Add8  (Sub8  i:(Const8  <t>) z) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Add8  i (Sub8  <t> x z))

-// x + (z - C) -> (x + z) - C
-(Add64 (Sub64 z i:(Const64 <t>)) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Add64 <t> x z) i)
-(Add32 (Sub32 z i:(Const32 <t>)) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Add32 <t> x z) i)
-(Add16 (Sub16 z i:(Const16 <t>)) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Add16 <t> x z) i)
-(Add8  (Sub8  z i:(Const8  <t>)) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Sub8  (Add8  <t> x z) i)
-
 // x - (C - z) -> x + (z - C) -> (x + z) - C
 (Sub64 x (Sub64 i:(Const64 <t>) z)) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Add64 <t> x z) i)
 (Sub32 x (Sub32 i:(Const32 <t>) z)) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Add32 <t> x z) i)
 (Sub16 x (Sub16 i:(Const16 <t>) z)) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Add16 <t> x z) i)
 (Sub8  x (Sub8  i:(Const8  <t>) z)) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Sub8  (Add8  <t> x z) i)

-// x - (z - C) -> x + (C - z) -> (x - z) + C
-(Sub64 x (Sub64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> x z))
-(Sub32 x (Sub32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> x z))
-(Sub16 x (Sub16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> x z))
-(Sub8  x (Sub8  z i:(Const8  <t>))) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Add8  i (Sub8  <t> x z))
+// x - (z + C) -> x + (-z - C) -> (x - z) - C
+(Sub64 x (Add64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Sub64 <t> x z) i)
+(Sub32 x (Add32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Sub32 <t> x z) i)
+(Sub16 x (Add16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Sub16 <t> x z) i)
+(Sub8  x (Add8  z i:(Const8  <t>))) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Sub8 (Sub8  <t> x z) i)
+
+// (C - z) - x -> C - (z + x)
+(Sub64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 i (Add64 <t> z x))
+(Sub32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 i (Add32 <t> z x))
+(Sub16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 i (Add16 <t> z x))
+(Sub8  (Sub8  i:(Const8  <t>) z) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Sub8  i (Add8  <t> z x))
+
+// (z + C) -x -> C + (z - x)
+(Sub64 (Add64 z i:(Const64 <t>)) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> z x))
+(Sub32 (Add32 z i:(Const32 <t>)) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> z x))
+(Sub16 (Add16 z i:(Const16 <t>)) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> z x))
+(Sub8  (Add8  z i:(Const8  <t>)) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Add8  i (Sub8  <t> z x))

 // x & (C & z) -> C & (x & z)
 (And64 (And64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (And64 i (And64 <t> z x))
@ -1856,6 +1864,12 @@
 (Xor16 (Xor16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Xor16 i (Xor16 <t> z x))
 (Xor8  (Xor8  i:(Const8  <t>) z) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Xor8  i (Xor8  <t> z x))

+// x * (D * z) = D * (x * z)
+(Mul64 (Mul64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Mul64 i (Mul64 <t> x z))
+(Mul32 (Mul32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Mul32 i (Mul32 <t> x z))
+(Mul16 (Mul16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Mul16 i (Mul16 <t> x z))
+(Mul8  (Mul8  i:(Const8  <t>) z) x) && (z.Op != OpConst8  && x.Op != OpConst8)  => (Mul8  i (Mul8  <t> x z))
+
 // C + (D + x) -> (C + D) + x
 (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c+d]) x)
 (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c+d]) x)
@ -1868,24 +1882,18 @@
 (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c+d]) x)
 (Add8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x)) => (Sub8  (Const8  <t> [c+d]) x)

-// C + (x - D) -> (C - D) + x
-(Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d]))) => (Add64 (Const64 <t> [c-d]) x)
-(Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d]))) => (Add32 (Const32 <t> [c-d]) x)
-(Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d]))) => (Add16 (Const16 <t> [c-d]) x)
-(Add8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d]))) => (Add8  (Const8  <t> [c-d]) x)
-
-// C - (x - D) -> (C + D) - x
-(Sub64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d]))) => (Sub64 (Const64 <t> [c+d]) x)
-(Sub32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d]))) => (Sub32 (Const32 <t> [c+d]) x)
-(Sub16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d]))) => (Sub16 (Const16 <t> [c+d]) x)
-(Sub8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d]))) => (Sub8  (Const8  <t> [c+d]) x)
-
 // C - (D - x) -> (C - D) + x
 (Sub64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c-d]) x)
 (Sub32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c-d]) x)
 (Sub16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Add16 (Const16 <t> [c-d]) x)
 (Sub8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x)) => (Add8  (Const8  <t> [c-d]) x)

+// C - (D + x) -> (C - D) - x
+(Sub64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Sub64 (Const64 <t> [c-d]) x)
+(Sub32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Sub32 (Const32 <t> [c-d]) x)
+(Sub16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c-d]) x)
+(Sub8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x)) => (Sub8  (Const8  <t> [c-d]) x)
+
 // C & (D & x) -> (C & D) & x
 (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x)) => (And64 (Const64 <t> [c&d]) x)
 (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x)) => (And32 (Const32 <t> [c&d]) x)
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@ -462,7 +462,6 @@ func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
 	return a, b, c
 }

-
 // Divide -> shift rules usually require fixup for negative inputs.
 // If the input is non-negative, make sure the fixup is eliminated.
 func divInt(v int64) int64 {
@ -472,3 +471,33 @@ func divInt(v int64) int64 {
 	// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
 	return v / 512
 }
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(z + C) -x -> C + (z - x)" can optimize the following cases.
+func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
+	// arm64:"SUB","ADD\t[$]2"
+	r0 := (i0 + 3) - (j0 + 1)
+	// arm64:"SUB","SUB\t[$]4"
+	r1 := (i1 - 3) - (j1 + 1)
+	// arm64:"SUB","ADD\t[$]4"
+	r2 := (i2 + 3) - (j2 - 1)
+	// arm64:"SUB","SUB\t[$]2"
+	r3 := (i3 - 3) - (j3 - 1)
+	return r0, r1, r2, r3
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(C - z) - x -> C - (z + x)" can optimize the following cases.
+func constantFold2(i0, j0, i1, j1 int) (int, int) {
+	// arm64:"ADD","MOVD\t[$]2","SUB"
+	r0 := (3 - i0) - (j0 + 1)
+	// arm64:"ADD","MOVD\t[$]4","SUB"
+	r1 := (3 - i1) - (j1 - 1)
+	return r0, r1
+}
+
+func constantFold3(i, j int) int {
+	// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
+	r := (5 * i) * (6 * j)
+	return r
+}