mirror of https://github.com/golang/go.git
cmd/compile: optimize ARM with MULS
MULS was introduced in ARMv7 and corresponding to MULA. This patch duplicated all MULA related SSA rules with MULS. Here was the contrast test result against the original go compiler. There was no improvement in total, but big improvement in special cases. 1. A specific test case accelerated 18.62%. (https://github.com/benshi001/ugo1/blob/master/mulsub_test.go) name old time/op new time/op delta MulSub-4 270µs ± 0% 219µs ± 0% -18.62% (p=0.000 n=35+40) 2. Total size of all .a files in pkg/ shrank by 0.002%. 3. The compilecmp benchmark showed no decline. name old time/op new time/op delta Template 2.37s ± 3% 2.36s ± 1% ~ (p=0.233 n=19+18) Unicode 1.32s ± 2% 1.34s ± 5% +1.32% (p=0.011 n=20+18) GoTypes 7.88s ± 1% 7.87s ± 1% ~ (p=0.758 n=20+20) Compiler 37.5s ± 1% 37.6s ± 1% ~ (p=0.194 n=20+19) SSA 83.7s ± 2% 83.5s ± 2% ~ (p=0.569 n=20+19) Flate 1.46s ± 3% 1.45s ± 1% ~ (p=0.619 n=20+17) GoParser 1.87s ± 2% 1.85s ± 1% -0.58% (p=0.048 n=20+18) Reflect 5.10s ± 2% 5.11s ± 2% ~ (p=0.365 n=19+20) Tar 1.78s ± 2% 1.78s ± 2% ~ (p=0.531 n=19+20) XML 2.62s ± 1% 2.61s ± 2% ~ (p=0.057 n=17+19) [Geo mean] 4.68s 4.67s -0.07% name old user-time/op new user-time/op delta Template 2.80s ± 1% 2.79s ± 2% ~ (p=0.686 n=17+20) Unicode 1.61s ± 4% 1.63s ± 6% ~ (p=0.222 n=20+20) GoTypes 9.59s ± 1% 9.60s ± 1% ~ (p=0.482 n=17+20) Compiler 46.1s ± 1% 46.2s ± 1% ~ (p=0.373 n=20+18) SSA 108s ± 1% 108s ± 2% ~ (p=0.784 n=20+20) Flate 1.68s ± 3% 1.69s ± 3% ~ (p=0.335 n=20+19) GoParser 2.20s ± 4% 2.19s ± 2% ~ (p=0.844 n=20+18) Reflect 5.97s ± 3% 6.01s ± 2% ~ (p=0.184 n=20+20) Tar 2.11s ± 2% 2.11s ± 4% ~ (p=0.961 n=19+20) XML 3.07s ± 1% 3.07s ± 3% ~ (p=0.786 n=16+19) [Geo mean] 5.61s 5.62s +0.19% name old text-bytes new text-bytes delta HelloSize 586kB ± 0% 586kB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.46kB ± 0% 5.46kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 72.9kB ± 0% 72.9kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.03MB ± 0% 1.03MB ± 0% ~ (all equal) 4. The go1 benchmark showed no decline in total. name old time/op new time/op delta BinaryTree17-4 41.7s ± 1% 41.7s ± 1% ~ (p=0.966 n=40+40) Fannkuch11-4 23.6s ± 0% 23.6s ± 1% -0.23% (p=0.000 n=40+40) FmtFprintfEmpty-4 844ns ± 1% 834ns ± 1% -1.23% (p=0.000 n=40+40) FmtFprintfString-4 1.39µs ± 1% 1.40µs ± 1% +0.71% (p=0.000 n=40+40) FmtFprintfInt-4 1.44µs ± 1% 1.45µs ± 1% +0.70% (p=0.000 n=40+40) FmtFprintfIntInt-4 2.10µs ± 1% 2.10µs ± 1% +0.30% (p=0.000 n=40+40) FmtFprintfPrefixedInt-4 2.49µs ± 0% 2.50µs ± 1% +0.66% (p=0.000 n=32+40) FmtFprintfFloat-4 4.42µs ± 1% 4.46µs ± 2% +0.94% (p=0.000 n=40+40) FmtManyArgs-4 8.31µs ± 1% 8.22µs ± 1% -1.09% (p=0.000 n=40+40) GobDecode-4 105ms ± 1% 102ms ± 1% -2.30% (p=0.000 n=39+39) GobEncode-4 90.2ms ± 1% 88.7ms ± 1% -1.66% (p=0.000 n=40+39) Gzip-4 4.17s ± 1% 4.16s ± 1% ~ (p=0.785 n=40+40) Gunzip-4 608ms ± 1% 608ms ± 1% ~ (p=0.481 n=40+40) HTTPClientServer-4 697µs ± 2% 684µs ± 3% -1.89% (p=0.000 n=37+40) JSONEncode-4 255ms ± 1% 256ms ± 1% +0.35% (p=0.000 n=40+40) JSONDecode-4 920ms ± 1% 926ms ± 1% +0.64% (p=0.000 n=40+39) Mandelbrot200-4 49.3ms ± 1% 49.3ms ± 0% +0.07% (p=0.005 n=40+40) GoParse-4 46.8ms ± 2% 46.7ms ± 1% ~ (p=1.000 n=40+40) RegexpMatchEasy0_32-4 1.27µs ± 0% 1.27µs ± 1% ~ (p=0.057 n=40+40) RegexpMatchEasy0_1K-4 7.97µs ± 7% 7.92µs ± 5% ~ (p=0.094 n=40+40) RegexpMatchEasy1_32-4 1.28µs ± 1% 1.28µs ± 1% ~ (p=0.406 n=40+40) RegexpMatchEasy1_1K-4 10.5µs ± 4% 10.5µs ± 3% ~ (p=0.855 n=40+40) RegexpMatchMedium_32-4 2.04µs ± 0% 2.04µs ± 1% -0.22% (p=0.000 n=39+40) RegexpMatchMedium_1K-4 541µs ± 0% 540µs ± 1% -0.25% (p=0.000 n=40+38) RegexpMatchHard_32-4 29.3µs ± 1% 29.3µs ± 0% ~ (p=0.149 n=40+40) RegexpMatchHard_1K-4 878µs ± 1% 880µs ± 0% +0.14% (p=0.005 n=36+35) Revcomp-4 81.8ms ± 2% 81.4ms ± 2% -0.43% (p=0.015 n=38+39) Template-4 1.05s ± 1% 1.05s ± 1% ~ (p=0.302 n=40+35) TimeParse-4 7.18µs ± 1% 7.26µs ± 1% +1.05% (p=0.000 n=40+36) TimeFormat-4 13.1µs ± 1% 13.1µs ± 1% ~ (p=0.698 n=37+40) [Geo mean] 733µs 732µs -0.16% name old speed new speed delta GobDecode-4 7.34MB/s ± 1% 7.51MB/s ± 1% +2.36% (p=0.000 n=39+39) GobEncode-4 8.51MB/s ± 1% 8.65MB/s ± 1% +1.69% (p=0.000 n=40+39) Gzip-4 4.66MB/s ± 1% 4.66MB/s ± 1% ~ (p=0.783 n=40+40) Gunzip-4 31.9MB/s ± 1% 31.9MB/s ± 1% ~ (p=0.466 n=40+40) JSONEncode-4 7.61MB/s ± 1% 7.58MB/s ± 1% -0.35% (p=0.001 n=40+40) JSONDecode-4 2.11MB/s ± 1% 2.10MB/s ± 1% -0.52% (p=0.000 n=38+39) GoParse-4 1.24MB/s ± 2% 1.24MB/s ± 1% ~ (p=0.556 n=40+39) RegexpMatchEasy0_32-4 25.1MB/s ± 0% 25.1MB/s ± 1% ~ (p=0.064 n=40+40) RegexpMatchEasy0_1K-4 129MB/s ± 8% 129MB/s ± 5% ~ (p=0.094 n=40+40) RegexpMatchEasy1_32-4 25.0MB/s ± 1% 25.1MB/s ± 1% ~ (p=0.331 n=40+40) RegexpMatchEasy1_1K-4 97.7MB/s ± 4% 97.8MB/s ± 3% ~ (p=0.851 n=40+40) RegexpMatchMedium_32-4 490kB/s ± 0% 490kB/s ± 0% ~ (all equal) RegexpMatchMedium_1K-4 1.89MB/s ± 0% 1.90MB/s ± 1% +0.12% (p=0.031 n=40+40) RegexpMatchHard_32-4 1.09MB/s ± 1% 1.09MB/s ± 1% ~ (p=0.597 n=40+40) RegexpMatchHard_1K-4 1.16MB/s ± 1% 1.16MB/s ± 1% ~ (p=0.565 n=40+35) Revcomp-4 31.1MB/s ± 2% 31.2MB/s ± 2% +0.44% (p=0.018 n=38+39) Template-4 1.85MB/s ± 1% 1.85MB/s ± 1% ~ (p=0.873 n=40+40) [Geo mean] 6.66MB/s 6.67MB/s +0.26% Change-Id: Icc972d8a78ea06c32c3aa15733ff0537c82c2dc7 Reviewed-on: https://go-review.googlesource.com/58950 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
dcef97e088
commit
64607dbd26
|
|
@ -402,7 +402,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.To.Type = obj.TYPE_REGREG
|
||||
p.To.Reg = v.Reg0() // high 32-bit
|
||||
p.To.Offset = int64(v.Reg1()) // low 32-bit
|
||||
case ssa.OpARMMULA:
|
||||
case ssa.OpARMMULA, ssa.OpARMMULS:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
|
|
|
|||
|
|
@ -602,6 +602,28 @@
|
|||
(MULA (MOVWconst [c]) x a) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADD (SLLconst <x.Type> [log2(c/7)] (RSBshiftLL <x.Type> x x [3])) a)
|
||||
(MULA (MOVWconst [c]) x a) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADD (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) a)
|
||||
|
||||
(MULS x (MOVWconst [c]) a) && int32(c) == -1 -> (ADD a x)
|
||||
(MULS _ (MOVWconst [0]) a) -> a
|
||||
(MULS x (MOVWconst [1]) a) -> (RSB x a)
|
||||
(MULS x (MOVWconst [c]) a) && isPowerOfTwo(c) -> (RSB (SLLconst <x.Type> [log2(c)] x) a)
|
||||
(MULS x (MOVWconst [c]) a) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (RSB (ADDshiftLL <x.Type> x x [log2(c-1)]) a)
|
||||
(MULS x (MOVWconst [c]) a) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (RSB (RSBshiftLL <x.Type> x x [log2(c+1)]) a)
|
||||
(MULS x (MOVWconst [c]) a) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) a)
|
||||
(MULS x (MOVWconst [c]) a) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) a)
|
||||
(MULS x (MOVWconst [c]) a) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/7)] (RSBshiftLL <x.Type> x x [3])) a)
|
||||
(MULS x (MOVWconst [c]) a) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) a)
|
||||
|
||||
(MULS (MOVWconst [c]) x a) && int32(c) == -1 -> (ADD a x)
|
||||
(MULS (MOVWconst [0]) _ a) -> a
|
||||
(MULS (MOVWconst [1]) x a) -> (RSB x a)
|
||||
(MULS (MOVWconst [c]) x a) && isPowerOfTwo(c) -> (RSB (SLLconst <x.Type> [log2(c)] x) a)
|
||||
(MULS (MOVWconst [c]) x a) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (RSB (ADDshiftLL <x.Type> x x [log2(c-1)]) a)
|
||||
(MULS (MOVWconst [c]) x a) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (RSB (RSBshiftLL <x.Type> x x [log2(c+1)]) a)
|
||||
(MULS (MOVWconst [c]) x a) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) a)
|
||||
(MULS (MOVWconst [c]) x a) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) a)
|
||||
(MULS (MOVWconst [c]) x a) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/7)] (RSBshiftLL <x.Type> x x [3])) a)
|
||||
(MULS (MOVWconst [c]) x a) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (RSB (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) a)
|
||||
|
||||
// div by constant
|
||||
(Select0 (CALLudiv x (MOVWconst [1]))) -> x
|
||||
(Select1 (CALLudiv _ (MOVWconst [1]))) -> (MOVWconst [0])
|
||||
|
|
@ -1215,6 +1237,8 @@
|
|||
(BIC x x) -> (MOVWconst [0])
|
||||
|
||||
(ADD (MUL x y) a) -> (MULA x y a)
|
||||
(SUB a (MUL x y)) && objabi.GOARM == 7 -> (MULS x y a)
|
||||
(RSB (MUL x y) a) && objabi.GOARM == 7 -> (MULS x y a)
|
||||
|
||||
(AND x (MVN y)) -> (BIC x y)
|
||||
|
||||
|
|
|
|||
|
|
@ -168,6 +168,7 @@ func init() {
|
|||
|
||||
{name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1
|
||||
{name: "MULA", argLength: 3, reg: gp31, asm: "MULA"}, // arg0 * arg1 + arg2
|
||||
{name: "MULS", argLength: 3, reg: gp31, asm: "MULS"}, // arg2 - arg0 * arg1
|
||||
|
||||
{name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1
|
||||
{name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1
|
||||
|
|
|
|||
|
|
@ -694,6 +694,7 @@ const (
|
|||
OpARMRSCconst
|
||||
OpARMMULLU
|
||||
OpARMMULA
|
||||
OpARMMULS
|
||||
OpARMADDF
|
||||
OpARMADDD
|
||||
OpARMSUBF
|
||||
|
|
@ -8456,6 +8457,21 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MULS",
|
||||
argLen: 3,
|
||||
asm: arm.AMULS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||
{1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||
{2, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ADDF",
|
||||
argLen: 2,
|
||||
|
|
|
|||
|
|
@ -201,6 +201,8 @@ func rewriteValueARM(v *Value) bool {
|
|||
return rewriteValueARM_OpARMMUL_0(v) || rewriteValueARM_OpARMMUL_10(v) || rewriteValueARM_OpARMMUL_20(v)
|
||||
case OpARMMULA:
|
||||
return rewriteValueARM_OpARMMULA_0(v) || rewriteValueARM_OpARMMULA_10(v) || rewriteValueARM_OpARMMULA_20(v)
|
||||
case OpARMMULS:
|
||||
return rewriteValueARM_OpARMMULS_0(v) || rewriteValueARM_OpARMMULS_10(v)
|
||||
case OpARMMVN:
|
||||
return rewriteValueARM_OpARMMVN_0(v)
|
||||
case OpARMMVNshiftLL:
|
||||
|
|
@ -9571,6 +9573,488 @@ func rewriteValueARM_OpARMMULA_20(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpARMMULS_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: int32(c) == -1
|
||||
// result: (ADD a x)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(int32(c) == -1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMADD)
|
||||
v.AddArg(a)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULS _ (MOVWconst [0]) a)
|
||||
// cond:
|
||||
// result: a
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
if v_1.AuxInt != 0 {
|
||||
break
|
||||
}
|
||||
a := v.Args[2]
|
||||
v.reset(OpCopy)
|
||||
v.Type = a.Type
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [1]) a)
|
||||
// cond:
|
||||
// result: (RSB x a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
if v_1.AuxInt != 1 {
|
||||
break
|
||||
}
|
||||
a := v.Args[2]
|
||||
v.reset(OpARMRSB)
|
||||
v.AddArg(x)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: isPowerOfTwo(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c)] x) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: isPowerOfTwo(c-1) && int32(c) >= 3
|
||||
// result: (RSB (ADDshiftLL <x.Type> x x [log2(c-1)]) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v0.AuxInt = log2(c - 1)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: isPowerOfTwo(c+1) && int32(c) >= 7
|
||||
// result: (RSB (RSBshiftLL <x.Type> x x [log2(c+1)]) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
|
||||
v0.AuxInt = log2(c + 1)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 3)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 1
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 5)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 2
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/7)] (RSBshiftLL <x.Type> x x [3])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 7)
|
||||
v1 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
|
||||
v1.AuxInt = 3
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS x (MOVWconst [c]) a)
|
||||
// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_1.AuxInt
|
||||
a := v.Args[2]
|
||||
if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 9)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 3
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpARMMULS_10(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: int32(c) == -1
|
||||
// result: (ADD a x)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(int32(c) == -1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMADD)
|
||||
v.AddArg(a)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [0]) _ a)
|
||||
// cond:
|
||||
// result: a
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
if v_0.AuxInt != 0 {
|
||||
break
|
||||
}
|
||||
a := v.Args[2]
|
||||
v.reset(OpCopy)
|
||||
v.Type = a.Type
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [1]) x a)
|
||||
// cond:
|
||||
// result: (RSB x a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
if v_0.AuxInt != 1 {
|
||||
break
|
||||
}
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
v.reset(OpARMRSB)
|
||||
v.AddArg(x)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: isPowerOfTwo(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c)] x) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: isPowerOfTwo(c-1) && int32(c) >= 3
|
||||
// result: (RSB (ADDshiftLL <x.Type> x x [log2(c-1)]) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v0.AuxInt = log2(c - 1)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: isPowerOfTwo(c+1) && int32(c) >= 7
|
||||
// result: (RSB (RSBshiftLL <x.Type> x x [log2(c+1)]) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
|
||||
v0.AuxInt = log2(c + 1)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 3)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 1
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 5)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 2
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/7)] (RSBshiftLL <x.Type> x x [3])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 7)
|
||||
v1 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
|
||||
v1.AuxInt = 3
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
// match: (MULS (MOVWconst [c]) x a)
|
||||
// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
|
||||
// result: (RSB (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) a)
|
||||
for {
|
||||
_ = v.Args[2]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMOVWconst {
|
||||
break
|
||||
}
|
||||
c := v_0.AuxInt
|
||||
x := v.Args[1]
|
||||
a := v.Args[2]
|
||||
if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMRSB)
|
||||
v0 := b.NewValue0(v.Pos, OpARMSLLconst, x.Type)
|
||||
v0.AuxInt = log2(c / 9)
|
||||
v1 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
|
||||
v1.AuxInt = 3
|
||||
v1.AddArg(x)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpARMMVN_0(v *Value) bool {
|
||||
// match: (MVN (MOVWconst [c]))
|
||||
// cond:
|
||||
|
|
@ -10835,6 +11319,28 @@ func rewriteValueARM_OpARMRSB_10(v *Value) bool {
|
|||
v.AuxInt = 0
|
||||
return true
|
||||
}
|
||||
// match: (RSB (MUL x y) a)
|
||||
// cond: objabi.GOARM == 7
|
||||
// result: (MULS x y a)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
if v_0.Op != OpARMMUL {
|
||||
break
|
||||
}
|
||||
_ = v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
y := v_0.Args[1]
|
||||
a := v.Args[1]
|
||||
if !(objabi.GOARM == 7) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMMULS)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpARMRSBSshiftLL_0(v *Value) bool {
|
||||
|
|
@ -12865,6 +13371,28 @@ func rewriteValueARM_OpARMSUB_10(v *Value) bool {
|
|||
v.AuxInt = 0
|
||||
return true
|
||||
}
|
||||
// match: (SUB a (MUL x y))
|
||||
// cond: objabi.GOARM == 7
|
||||
// result: (MULS x y a)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
a := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpARMMUL {
|
||||
break
|
||||
}
|
||||
_ = v_1.Args[1]
|
||||
x := v_1.Args[0]
|
||||
y := v_1.Args[1]
|
||||
if !(objabi.GOARM == 7) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARMMULS)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v.AddArg(a)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM_OpARMSUBS_0(v *Value) bool {
|
||||
|
|
|
|||
Loading…
Reference in New Issue