mirror of https://github.com/golang/go.git
cmd/compile: intrinsify math/bits.Add64 for ppc64x
This change creates an intrinsic for Add64 for ppc64x and adds a testcase for it. name old time/op new time/op delta Add64-160 1.90ns ±40% 2.29ns ± 0% ~ (p=0.119 n=5+5) Add64multiple-160 6.69ns ± 2% 2.45ns ± 4% -63.47% (p=0.016 n=4+5) Change-Id: I9abe6fb023fdf62eea3c9b46a1820f60bb0a7f97 Reviewed-on: https://go-review.googlesource.com/c/go/+/173758 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
This commit is contained in:
parent
6b692300a1
commit
50ad09418e
|
|
@ -3573,8 +3573,8 @@ func init() {
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64)
|
sys.AMD64, sys.ARM64, sys.PPC64)
|
||||||
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64)
|
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
|
||||||
addF("math/bits", "Sub64",
|
addF("math/bits", "Sub64",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
||||||
|
|
|
||||||
|
|
@ -172,6 +172,31 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p1.To.Type = obj.TYPE_REG
|
p1.To.Type = obj.TYPE_REG
|
||||||
p1.To.Reg = v.Reg1()
|
p1.To.Reg = v.Reg1()
|
||||||
|
|
||||||
|
case ssa.OpPPC64LoweredAdd64Carry:
|
||||||
|
// ADDC Rarg2, -1, Rtmp
|
||||||
|
// ADDE Rarg1, Rarg0, Reg0
|
||||||
|
// ADDZE Rzero, Reg1
|
||||||
|
r0 := v.Args[0].Reg()
|
||||||
|
r1 := v.Args[1].Reg()
|
||||||
|
r2 := v.Args[2].Reg()
|
||||||
|
p := s.Prog(ppc64.AADDC)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = -1
|
||||||
|
p.Reg = r2
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = ppc64.REGTMP
|
||||||
|
p1 := s.Prog(ppc64.AADDE)
|
||||||
|
p1.From.Type = obj.TYPE_REG
|
||||||
|
p1.From.Reg = r1
|
||||||
|
p1.Reg = r0
|
||||||
|
p1.To.Type = obj.TYPE_REG
|
||||||
|
p1.To.Reg = v.Reg0()
|
||||||
|
p2 := s.Prog(ppc64.AADDZE)
|
||||||
|
p2.From.Type = obj.TYPE_REG
|
||||||
|
p2.From.Reg = ppc64.REGZERO
|
||||||
|
p2.To.Type = obj.TYPE_REG
|
||||||
|
p2.To.Reg = v.Reg1()
|
||||||
|
|
||||||
case ssa.OpPPC64LoweredAtomicAnd8,
|
case ssa.OpPPC64LoweredAtomicAnd8,
|
||||||
ssa.OpPPC64LoweredAtomicOr8:
|
ssa.OpPPC64LoweredAtomicOr8:
|
||||||
// LWSYNC
|
// LWSYNC
|
||||||
|
|
@ -620,7 +645,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
|
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
|
||||||
|
|
||||||
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
|
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
|
||||||
|
ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
|
||||||
|
ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
|
||||||
|
ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@
|
||||||
// (x + y) / 2 with x>=y -> (x - y) / 2 + y
|
// (x + y) / 2 with x>=y -> (x - y) / 2 + y
|
||||||
(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
|
(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
|
||||||
|
|
||||||
|
(Add64carry x y c) -> (LoweredAdd64Carry x y c)
|
||||||
(Mul64 x y) -> (MULLD x y)
|
(Mul64 x y) -> (MULLD x y)
|
||||||
(Mul(32|16|8) x y) -> (MULLW x y)
|
(Mul(32|16|8) x y) -> (MULLW x y)
|
||||||
(Mul64uhilo x y) -> (LoweredMuluhilo x y)
|
(Mul64uhilo x y) -> (LoweredMuluhilo x y)
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,7 @@ func init() {
|
||||||
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
|
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
|
||||||
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
|
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
|
||||||
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
||||||
|
gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
||||||
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
|
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
|
||||||
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
|
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
|
||||||
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
|
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
|
||||||
|
|
@ -199,6 +200,7 @@ func init() {
|
||||||
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
|
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
|
||||||
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
|
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
|
||||||
|
|
||||||
|
{name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry)
|
||||||
{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
|
{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
|
||||||
{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)
|
{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1686,6 +1686,7 @@ const (
|
||||||
OpPPC64SLW
|
OpPPC64SLW
|
||||||
OpPPC64ROTL
|
OpPPC64ROTL
|
||||||
OpPPC64ROTLW
|
OpPPC64ROTLW
|
||||||
|
OpPPC64LoweredAdd64Carry
|
||||||
OpPPC64ADDconstForCarry
|
OpPPC64ADDconstForCarry
|
||||||
OpPPC64MaskIfNotCarry
|
OpPPC64MaskIfNotCarry
|
||||||
OpPPC64SRADconst
|
OpPPC64SRADconst
|
||||||
|
|
@ -22443,6 +22444,22 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LoweredAdd64Carry",
|
||||||
|
argLen: 3,
|
||||||
|
resultNotInArgs: true,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
{2, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
{1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ADDconstForCarry",
|
name: "ADDconstForCarry",
|
||||||
auxType: auxInt16,
|
auxType: auxInt16,
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,8 @@ func rewriteValuePPC64(v *Value) bool {
|
||||||
return rewriteValuePPC64_OpAdd64_0(v)
|
return rewriteValuePPC64_OpAdd64_0(v)
|
||||||
case OpAdd64F:
|
case OpAdd64F:
|
||||||
return rewriteValuePPC64_OpAdd64F_0(v)
|
return rewriteValuePPC64_OpAdd64F_0(v)
|
||||||
|
case OpAdd64carry:
|
||||||
|
return rewriteValuePPC64_OpAdd64carry_0(v)
|
||||||
case OpAdd8:
|
case OpAdd8:
|
||||||
return rewriteValuePPC64_OpAdd8_0(v)
|
return rewriteValuePPC64_OpAdd8_0(v)
|
||||||
case OpAddPtr:
|
case OpAddPtr:
|
||||||
|
|
@ -786,6 +788,21 @@ func rewriteValuePPC64_OpAdd64F_0(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValuePPC64_OpAdd64carry_0(v *Value) bool {
|
||||||
|
// match: (Add64carry x y c)
|
||||||
|
// cond:
|
||||||
|
// result: (LoweredAdd64Carry x y c)
|
||||||
|
for {
|
||||||
|
c := v.Args[2]
|
||||||
|
x := v.Args[0]
|
||||||
|
y := v.Args[1]
|
||||||
|
v.reset(OpPPC64LoweredAdd64Carry)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AddArg(y)
|
||||||
|
v.AddArg(c)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValuePPC64_OpAdd8_0(v *Value) bool {
|
func rewriteValuePPC64_OpAdd8_0(v *Value) bool {
|
||||||
// match: (Add8 x y)
|
// match: (Add8 x y)
|
||||||
// cond:
|
// cond:
|
||||||
|
|
|
||||||
|
|
@ -410,24 +410,32 @@ func AddM(p, q, r *[3]uint) {
|
||||||
func Add64(x, y, ci uint64) (r, co uint64) {
|
func Add64(x, y, ci uint64) (r, co uint64) {
|
||||||
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
||||||
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
||||||
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
||||||
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
||||||
return bits.Add64(x, y, ci)
|
return bits.Add64(x, y, ci)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Add64C(x, ci uint64) (r, co uint64) {
|
func Add64C(x, ci uint64) (r, co uint64) {
|
||||||
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
||||||
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
||||||
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
||||||
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
||||||
return bits.Add64(x, 7, ci)
|
return bits.Add64(x, 7, ci)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Add64Z(x, y uint64) (r, co uint64) {
|
func Add64Z(x, y uint64) (r, co uint64) {
|
||||||
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
|
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
|
||||||
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
|
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
|
||||||
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
||||||
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
||||||
return bits.Add64(x, y, 0)
|
return bits.Add64(x, y, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Add64R(x, y, ci uint64) uint64 {
|
func Add64R(x, y, ci uint64) uint64 {
|
||||||
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
|
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
|
||||||
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
|
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
|
||||||
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
||||||
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
||||||
r, _ := bits.Add64(x, y, ci)
|
r, _ := bits.Add64(x, y, ci)
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
@ -436,6 +444,8 @@ func Add64M(p, q, r *[3]uint64) {
|
||||||
r[0], c = bits.Add64(p[0], q[0], c)
|
r[0], c = bits.Add64(p[0], q[0], c)
|
||||||
// arm64:"ADCS",-"ADD\t",-"CMP"
|
// arm64:"ADCS",-"ADD\t",-"CMP"
|
||||||
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
|
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
|
||||||
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
||||||
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
||||||
r[1], c = bits.Add64(p[1], q[1], c)
|
r[1], c = bits.Add64(p[1], q[1], c)
|
||||||
r[2], c = bits.Add64(p[2], q[2], c)
|
r[2], c = bits.Add64(p[2], q[2], c)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue