cmd/compile: intrinsify Add64 on loong64

This is a follow up of CL 420094  on loong64.

Reduce go toolchain size slightly on linux/loong64.

compilecmp HEAD~1 -> HEAD
HEAD~1 (8a32354219): internal/trace: use strings.Builder
HEAD (1767784ac3): cmd/compile: intrinsify Add64 on loong64
platform: linux/loong64

file      before    after     Δ       %
addr2line 3882616   3882536   -80     -0.002%
api       5528866   5528450   -416    -0.008%
asm       5133780   5133796   +16     +0.000%
cgo       4668787   4668491   -296    -0.006%
compile   25163409  25164729  +1320   +0.005%
cover     4658055   4658007   -48     -0.001%
dist      3437783   3437727   -56     -0.002%
doc       3883069   3883205   +136    +0.004%
fix       3383254   3383070   -184    -0.005%
link      6747559   6747023   -536    -0.008%
nm        3793923   3793939   +16     +0.000%
objdump   4256628   4256812   +184    +0.004%
pack      2356328   2356144   -184    -0.008%
pprof     14233370  14131910  -101460 -0.713%
test2json 2638668   2638476   -192    -0.007%
trace     13392065  13360781  -31284  -0.234%
vet       7456388   7455588   -800    -0.011%
total     132498256 132364392 -133864 -0.101%

file                                    before    after     Δ       %
compile/internal/ssa.a                  35644590  35649482  +4892   +0.014%
compile/internal/ssagen.a               4101250   4099858   -1392   -0.034%
internal/edwards25519/field.a           226064    201718    -24346  -10.770%
internal/nistec/fiat.a                  1689922   1212254   -477668 -28.266%
tls.a                                   3256798   3256800   +2      +0.000%
big.a                                   1718552   1708518   -10034  -0.584%
bits.a                                  107786    106762    -1024   -0.950%
cmplx.a                                 169434    168214    -1220   -0.720%
math.a                                  581302    578762    -2540   -0.437%
netip.a                                 556096    555922    -174    -0.031%
net.a                                   3286526   3286528   +2      +0.000%
runtime.a                               8644786   8644510   -276    -0.003%
strconv.a                               519098    518374    -724    -0.139%
golang.org/x/crypto/internal/poly1305.a 115398    109546    -5852   -5.071%
total                                   260913122 260392768 -520354 -0.199%

Change-Id: I75b2bb7761fa5a0d0d032d4ebe3582d092ea77be
Reviewed-on: https://go-review.googlesource.com/c/go/+/428556
Reviewed-by: Carlos Amedee <carlos@golang.org>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Wayne Zuo 2022-09-06 22:12:16 +08:00 committed by David Chase
parent c4674e0134
commit 97760ed651
4 changed files with 81 additions and 6 deletions

View File

@ -38,6 +38,10 @@
(Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
(Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c)
(Select1 <t> (Add64carry x y c)) =>
(OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
// (x + y) / 2 with x>=y => (x - y) / 2 + y
(Avg64u <t> x y) => (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
@ -675,3 +679,7 @@
(GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes)
(GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
(GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes)
// SGT/SGTU with known outcomes.
(SGT x x) => (MOVVconst [0])
(SGTU x x) => (MOVVconst [0])

View File

@ -3367,6 +3367,17 @@ func rewriteValueLOONG64_OpLOONG64SGT(v *Value) bool {
v.AddArg(x)
return true
}
// match: (SGT x x)
// result: (MOVVconst [0])
for {
x := v_0
if x != v_1 {
break
}
v.reset(OpLOONG64MOVVconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
return false
}
func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool {
@ -3389,6 +3400,17 @@ func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool {
v.AddArg(x)
return true
}
// match: (SGTU x x)
// result: (MOVVconst [0])
for {
x := v_0
if x != v_1 {
break
}
v.reset(OpLOONG64MOVVconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
return false
}
func rewriteValueLOONG64_OpLOONG64SGTUconst(v *Value) bool {
@ -6806,6 +6828,22 @@ func rewriteValueLOONG64_OpSelect0(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (Select0 <t> (Add64carry x y c))
// result: (ADDV (ADDV <t> x y) c)
for {
t := v.Type
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpLOONG64ADDV)
v0 := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
v0.AddArg2(x, y)
v.AddArg2(v0, c)
return true
}
// match: (Select0 (DIVVU _ (MOVVconst [1])))
// result: (MOVVconst [0])
for {
@ -6918,6 +6956,28 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool {
v.AddArg2(v0, v2)
return true
}
// match: (Select1 <t> (Add64carry x y c))
// result: (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
for {
t := v.Type
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpLOONG64OR)
v0 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
s := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
s.AddArg2(x, y)
v0.AddArg2(x, s)
v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
v3 := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
v3.AddArg2(s, c)
v2.AddArg2(s, v3)
v.AddArg2(v0, v2)
return true
}
// match: (Select1 (MULVU x (MOVVconst [-1])))
// result: (NEGV x)
for {

View File

@ -1692,12 +1692,12 @@ func (s *state) stmt(n ir.Node) {
// Currently doesn't really work because (*p)[:len(*p)] appears here as:
// tmp = len(*p)
// (*p)[:tmp]
//if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) {
// if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) {
// j = nil
//}
//if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) {
// }
// if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) {
// k = nil
//}
// }
if i == nil {
skip |= skipPtr
if j == nil {
@ -4730,8 +4730,8 @@ func InitTables() {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
},
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchRISCV64)
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64)
alias("math/bits", "Add", "math/bits", "Add64", p8...)
addF("math/bits", "Sub64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])

View File

@ -451,6 +451,7 @@ func Add(x, y, ci uint) (r, co uint) {
func AddC(x, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// loong64: "ADDV", "SGTU"
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
@ -461,6 +462,7 @@ func AddC(x, ci uint) (r, co uint) {
func AddZ(x, y uint) (r, co uint) {
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
// loong64: "ADDV", "SGTU"
// ppc64: "ADDC", -"ADDE", "ADDZE"
// ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
@ -471,6 +473,7 @@ func AddZ(x, y uint) (r, co uint) {
func AddR(x, y, ci uint) uint {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
// loong64: "ADDV", -"SGTU"
// ppc64: "ADDC", "ADDE", -"ADDZE"
// ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
@ -492,6 +495,7 @@ func AddM(p, q, r *[3]uint) {
func Add64(x, y, ci uint64) (r, co uint64) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// loong64: "ADDV", "SGTU"
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
@ -502,6 +506,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
func Add64C(x, ci uint64) (r, co uint64) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
// loong64: "ADDV", "SGTU"
// ppc64: "ADDC", "ADDE", "ADDZE"
// ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
@ -512,6 +517,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
func Add64Z(x, y uint64) (r, co uint64) {
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
// loong64: "ADDV", "SGTU"
// ppc64: "ADDC", -"ADDE", "ADDZE"
// ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
@ -522,6 +528,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
func Add64R(x, y, ci uint64) uint64 {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
// loong64: "ADDV", -"SGTU"
// ppc64: "ADDC", "ADDE", -"ADDZE"
// ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"