cmd/compile/internal/ssa: optimize ARM64 code with TST

For signed comparisons, the following four optimization rules hold:

(CMPconst [0] z:(AND x y)) && z.Uses == 1 => (TST x y)
(CMPWconst [0] z:(AND x y)) && z.Uses == 1 => (TSTW x y)
(CMPconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTconst [c] y)
(CMPWconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTWconst [int32(c)] y)

But currently they only apply to jump instructions, not to conditional
instructions within a block, such as cset, csel, etc. This CL extends
the above rules into blocks so that conditional instructions can also be
optimized.

name                       old time/op  new time/op  delta
DivisiblePow2constI64-160  1.04ns ± 0%  0.86ns ± 0%  -17.30%  (p=0.008 n=5+5)
DivisiblePow2constI32-160  1.04ns ± 0%  0.87ns ± 0%  -16.16%  (p=0.016 n=4+5)
DivisiblePow2constI16-160  1.04ns ± 0%  0.87ns ± 0%  -16.03%  (p=0.008 n=5+5)
DivisiblePow2constI8-160   1.04ns ± 0%  0.86ns ± 0%  -17.15%  (p=0.008 n=5+5)

Change-Id: I6bc34bff30862210e8dd001e0340b8fe502fe3de
Reviewed-on: https://go-review.googlesource.com/c/go/+/420434
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
This commit is contained in:
eric fang 2022-07-29 04:05:03 +00:00 committed by Eric Fang
parent 8dc7710fae
commit efe5929dbd
3 changed files with 773 additions and 198 deletions

View File

@ -610,13 +610,6 @@
(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no)
(TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)
(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no)
(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no)
(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no)
(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no)
(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no)
(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no)
(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no)
(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no)
(LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no)
@ -624,6 +617,13 @@
(GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no)
(GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no)
(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no)
(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no)
(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no)
(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no)
(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no)
(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no)
(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no)
(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no)
(LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no)
@ -631,12 +631,41 @@
(GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no)
(GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no)
(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no)
(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no)
(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no)
(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no)
(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no)
(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no)
(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no)
(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no)
(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no)
(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no)
(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no)
(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no)
// For conditional instructions such as CSET, CSEL.
(Equal (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TST x y))
(NotEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TST x y))
(LessThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TST x y))
(LessEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TST x y))
(GreaterThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TST x y))
(GreaterEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TST x y))
(Equal (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTWconst [int32(c)] y))
(NotEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTWconst [int32(c)] y))
(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTWconst [int32(c)] y))
(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTWconst [int32(c)] y))
(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTWconst [int32(c)] y))
(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTWconst [int32(c)] y))
(Equal (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TSTW x y))
(NotEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TSTW x y))
(LessThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TSTW x y))
(LessEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TSTW x y))
(GreaterThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TSTW x y))
(GreaterEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TSTW x y))
(Equal (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTconst [c] y))
(NotEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTconst [c] y))
(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y))
(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y))
(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y))
(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y))
(EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no)
(NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no)

File diff suppressed because it is too large Load Diff

View File

@ -176,7 +176,7 @@ func MergeMuls2(n int) int {
// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
// ppc64le/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
// ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29"
// ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29"
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
}
@ -282,7 +282,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
a := n1%64 == 0 // signed divisible
@ -290,7 +290,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
// arm64:"AND\t[$]63",-"UDIV",-"ASR"
// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
b := n2%64 != 0 // signed indivisible
@ -572,16 +572,16 @@ func divInt(v int64) int64 {
// "(z + C) -x -> C + (z - x)" can optimize the following cases.
func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
// arm64:"SUB","ADD\t[$]2"
// ppc64:"SUB","ADD\t[$]2"
// ppc64le:"SUB","ADD\t[$]2"
// ppc64:"SUB","ADD\t[$]2"
// ppc64le:"SUB","ADD\t[$]2"
r0 := (i0 + 3) - (j0 + 1)
// arm64:"SUB","SUB\t[$]4"
// ppc64:"SUB","ADD\t[$]-4"
// ppc64le:"SUB","ADD\t[$]-4"
// ppc64:"SUB","ADD\t[$]-4"
// ppc64le:"SUB","ADD\t[$]-4"
r1 := (i1 - 3) - (j1 + 1)
// arm64:"SUB","ADD\t[$]4"
// ppc64:"SUB","ADD\t[$]4"
// ppc64le:"SUB","ADD\t[$]4"
// ppc64:"SUB","ADD\t[$]4"
// ppc64le:"SUB","ADD\t[$]4"
r2 := (i2 + 3) - (j2 - 1)
// arm64:"SUB","SUB\t[$]2"
// ppc64:"SUB","ADD\t[$]-2"
@ -606,8 +606,8 @@ func constantFold2(i0, j0, i1, j1 int) (int, int) {
func constantFold3(i, j int) int {
// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
// ppc64:"MULLD\t[$]30","MULLD"
// ppc64le:"MULLD\t[$]30","MULLD"
// ppc64:"MULLD\t[$]30","MULLD"
// ppc64le:"MULLD\t[$]30","MULLD"
r := (5 * i) * (6 * j)
return r
}