cmd/compile: on AMD64, prefer XOR/AND for (x & 1) == 0 check

It's shorter to encode. Additionally, XOR and AND generally
have higher throughput than BT/SET*.

compilecmp:

runtime
runtime.(*sweepClass).split 58 -> 56  (-3.45%)
runtime.sweepClass.split 14 -> 11  (-21.43%)

runtime [cmd/compile]
runtime.(*sweepClass).split 58 -> 56  (-3.45%)
runtime.sweepClass.split 14 -> 11  (-21.43%)

strconv
strconv.ryuFtoaShortest changed

strconv [cmd/compile]
strconv.ryuFtoaShortest changed

math/big
math/big.(*Int).MulRange 255 -> 252  (-1.18%)

testing/quick
testing/quick.sizedValue changed

internal/fuzz
internal/fuzz.(*pcgRand).bool 69 -> 70  (+1.45%)

cmd/internal/obj/x86
cmd/internal/obj/x86.(*AsmBuf).asmevex changed

math/big [cmd/compile]
math/big.(*Int).MulRange 255 -> 252  (-1.18%)

cmd/internal/obj/x86 [cmd/compile]
cmd/internal/obj/x86.(*AsmBuf).asmevex changed

net/http
net/http.(*http2stream).isPushed 11 -> 10  (-9.09%)

cmd/vendor/github.com/google/pprof/internal/binutils
cmd/vendor/github.com/google/pprof/internal/binutils.(*file).computeBase changed

Change-Id: I9cb2987eb263c85ee4e93d6f8455c91a55273173
Reviewed-on: https://go-review.googlesource.com/c/go/+/640975
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Jakub Ciolek 2025-01-07 07:00:24 +01:00 committed by Gopher Robot
parent f7dbbf2519
commit e57769d5ad
3 changed files with 36 additions and 0 deletions

View File

@ -629,6 +629,8 @@
// x & 1 != 0 -> x & 1
(SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x)
(SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x)
// x & 1 == 0 -> (x & 1) ^ 1
(SETAE (BT(L|Q)const [0] x)) => (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
// Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
// Note that BTx instructions use the carry bit, so we need to convert tests for zero flag

View File

@ -16503,6 +16503,8 @@ func rewriteValueAMD64_OpAMD64SETA(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (SETAE (TESTQ x x))
// result: (ConstBool [true])
for {
@ -16559,6 +16561,36 @@ func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool {
v.AuxInt = boolToAuxInt(true)
return true
}
// match: (SETAE (BTLconst [0] x))
// result: (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
for {
if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(1)
v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool)
v0.AuxInt = int32ToAuxInt(1)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (SETAE (BTQconst [0] x))
// result: (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
for {
if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64XORLconst)
v.AuxInt = int32ToAuxInt(1)
v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool)
v0.AuxInt = int32ToAuxInt(1)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (SETAE (InvertFlags x))
// result: (SETBE x)
for {

View File

@ -47,6 +47,7 @@ func convertNeqBool32(x uint32) bool {
func convertEqBool32(x uint32) bool {
// ppc64x:"RLDICL",-"CMPW","XOR",-"ISEL"
// amd64:"ANDL","XORL",-"BTL",-"SETCC"
return x&1 == 0
}
@ -57,6 +58,7 @@ func convertNeqBool64(x uint64) bool {
func convertEqBool64(x uint64) bool {
// ppc64x:"RLDICL","XOR",-"CMP",-"ISEL"
// amd64:"ANDL","XORL",-"BTL",-"SETCC"
return x&1 == 0
}