cmd/compile: inline constant sized memclrNoHeapPointers calls on PPC64

Update the function isInlinableMemclr for ppc64 and ppc64le
to enable inlining for the constant sized cases < 512.

Larger cases can use dcbz which performs better but requires
alignment checking so it is best to continue using memclrNoHeapPointers
for those cases.

Results on p10:

MemclrKnownSize1         2.07ns ± 0%     0.57ns ± 0%   -72.59%
MemclrKnownSize2         2.56ns ± 5%     0.57ns ± 0%   -77.82%
MemclrKnownSize4         5.15ns ± 0%     0.57ns ± 0%   -89.00%
MemclrKnownSize8         2.23ns ± 0%     0.57ns ± 0%   -74.57%
MemclrKnownSize16        2.23ns ± 0%     0.50ns ± 0%   -77.74%
MemclrKnownSize32        2.28ns ± 0%     0.56ns ± 0%   -75.28%
MemclrKnownSize64        2.49ns ± 0%     0.72ns ± 0%   -70.95%
MemclrKnownSize112       2.97ns ± 2%     1.14ns ± 0%   -61.72%
MemclrKnownSize128       4.64ns ± 6%     2.45ns ± 1%   -47.17%
MemclrKnownSize192       5.45ns ± 5%     2.79ns ± 0%   -48.87%
MemclrKnownSize248       4.51ns ± 0%     2.83ns ± 0%   -37.12%
MemclrKnownSize256       6.34ns ± 1%     3.58ns ± 0%   -43.53%
MemclrKnownSize512       3.64ns ± 0%     3.64ns ± 0%    -0.03%
MemclrKnownSize1024      4.73ns ± 0%     4.73ns ± 0%    +0.01%
MemclrKnownSize4096      17.1ns ± 0%     17.1ns ± 0%    +0.07%
MemclrKnownSize512KiB    2.12µs ± 0%     2.12µs ± 0%      ~     (all equal)

Change-Id: If1abf5749f4802c64523a41fe0058bd144d0ea46
Reviewed-on: https://go-review.googlesource.com/c/go/+/464340
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Jakub Ciolek <jakub@ciolek.dev>
Reviewed-by: Archana Ravindar <aravind5@in.ibm.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
Lynn Boger 2023-01-31 14:27:30 -06:00
parent 612c00bf4d
commit ebe49f98c8
4 changed files with 25 additions and 11 deletions

View File

@ -2070,7 +2070,7 @@
// Turn known-size calls to memclrNoHeapPointers into a Zero.
// Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
(SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
&& isInlinableMemclr(config)
&& isInlinableMemclr(config, int64(c))
&& isSameCall(sym, "runtime.memclrNoHeapPointers")
&& call.Uses == 1
&& clobber(call)

View File

@ -1365,10 +1365,16 @@ func zeroUpper56Bits(x *Value, depth int) bool {
return false
}
func isInlinableMemclr(c *Config) bool {
func isInlinableMemclr(c *Config, sz int64) bool {
// TODO: expand this check to allow other architectures
// see CL 454255 and issue 56997
return c.arch == "amd64" || c.arch == "arm64"
switch c.arch {
case "amd64", "arm64":
return true
case "ppc64le", "ppc64":
return sz < 512
}
return false
}
// isInlinableMemmove reports whether the given arch performs a Move of the given size

View File

@ -26411,7 +26411,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
return true
}
// match: (SelectN [0] call:(StaticCall {sym} sptr (Const64 [c]) mem))
// cond: isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
for {
if auxIntToInt64(v.AuxInt) != 0 {
@ -26429,7 +26429,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
break
}
c := auxIntToInt64(call_1.AuxInt)
if !(isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
break
}
v.reset(OpZero)
@ -26439,7 +26439,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
return true
}
// match: (SelectN [0] call:(StaticCall {sym} sptr (Const32 [c]) mem))
// cond: isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
for {
if auxIntToInt64(v.AuxInt) != 0 {
@ -26457,7 +26457,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
break
}
c := auxIntToInt32(call_1.AuxInt)
if !(isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
break
}
v.reset(OpZero)

View File

@ -47,7 +47,7 @@ func SliceExtensionConst(s []int) []int {
// amd64:-`.*runtime\.makeslice`
// amd64:-`.*runtime\.panicmakeslicelen`
// amd64:"MOVUPS\tX15"
// ppc64x:`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.makeslice`
// ppc64x:-`.*runtime\.panicmakeslicelen`
return append(s, make([]int, 1<<2)...)
@ -58,7 +58,7 @@ func SliceExtensionConstInt64(s []int) []int {
// amd64:-`.*runtime\.makeslice`
// amd64:-`.*runtime\.panicmakeslicelen`
// amd64:"MOVUPS\tX15"
// ppc64x:`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.makeslice`
// ppc64x:-`.*runtime\.panicmakeslicelen`
return append(s, make([]int, int64(1<<2))...)
@ -69,7 +69,7 @@ func SliceExtensionConstUint64(s []int) []int {
// amd64:-`.*runtime\.makeslice`
// amd64:-`.*runtime\.panicmakeslicelen`
// amd64:"MOVUPS\tX15"
// ppc64x:`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.makeslice`
// ppc64x:-`.*runtime\.panicmakeslicelen`
return append(s, make([]int, uint64(1<<2))...)
@ -80,12 +80,20 @@ func SliceExtensionConstUint(s []int) []int {
// amd64:-`.*runtime\.makeslice`
// amd64:-`.*runtime\.panicmakeslicelen`
// amd64:"MOVUPS\tX15"
// ppc64x:`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
// ppc64x:-`.*runtime\.makeslice`
// ppc64x:-`.*runtime\.panicmakeslicelen`
return append(s, make([]int, uint(1<<2))...)
}
// On ppc64x continue to use memclrNoHeapPointers
// for sizes >= 512.
func SliceExtensionConst512(s []int) []int {
// amd64:-`.*runtime\.memclrNoHeapPointers`
// ppc64x:`.*runtime\.memclrNoHeapPointers`
return append(s, make([]int, 1<<9)...)
}
func SliceExtensionPointer(s []*int, l int) []*int {
// amd64:`.*runtime\.memclrHasPointers`
// amd64:-`.*runtime\.makeslice`