mirror of https://github.com/golang/go.git
cmd/compile,cmd/internal/obj/ppc64: use mulli where possible
This adds support to allow the use of mulli when one of the multiply operands is a constant that fits in 16 bits. This especially helps in the case where this instruction appears in a loop since the load of the constant is not being moved out of the loop. Some improvements seen in compress/flate on power9: Decode/Digits/Huffman/1e4 259µs ± 0% 261µs ± 0% +0.57% (p=1.000 n=1+1) Decode/Digits/Huffman/1e5 2.43ms ± 0% 2.45ms ± 0% +0.79% (p=1.000 n=1+1) Decode/Digits/Huffman/1e6 23.9ms ± 0% 24.2ms ± 0% +0.86% (p=1.000 n=1+1) Decode/Digits/Speed/1e4 278µs ± 0% 279µs ± 0% +0.34% (p=1.000 n=1+1) Decode/Digits/Speed/1e5 2.80ms ± 0% 2.81ms ± 0% +0.29% (p=1.000 n=1+1) Decode/Digits/Speed/1e6 28.0ms ± 0% 28.1ms ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e4 278µs ± 0% 278µs ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e5 2.68ms ± 0% 2.69ms ± 0% +0.19% (p=1.000 n=1+1) Decode/Digits/Default/1e6 26.6ms ± 0% 26.6ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e4 278µs ± 0% 278µs ± 0% +0.00% (p=1.000 n=1+1) Decode/Digits/Compression/1e5 2.68ms ± 0% 2.69ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e6 26.6ms ± 0% 26.6ms ± 0% +0.07% (p=1.000 n=1+1) Decode/Newton/Huffman/1e4 322µs ± 0% 312µs ± 0% -2.84% (p=1.000 n=1+1) Decode/Newton/Huffman/1e5 3.11ms ± 0% 2.91ms ± 0% -6.41% (p=1.000 n=1+1) Decode/Newton/Huffman/1e6 31.4ms ± 0% 29.3ms ± 0% -6.85% (p=1.000 n=1+1) Decode/Newton/Speed/1e4 282µs ± 0% 269µs ± 0% -4.69% (p=1.000 n=1+1) Decode/Newton/Speed/1e5 2.29ms ± 0% 2.20ms ± 0% -4.13% (p=1.000 n=1+1) Decode/Newton/Speed/1e6 22.7ms ± 0% 21.3ms ± 0% -6.06% (p=1.000 n=1+1) Decode/Newton/Default/1e4 254µs ± 0% 237µs ± 0% -6.60% (p=1.000 n=1+1) Decode/Newton/Default/1e5 1.86ms ± 0% 1.75ms ± 0% -5.99% (p=1.000 n=1+1) Decode/Newton/Default/1e6 18.1ms ± 0% 17.4ms ± 0% -4.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e4 254µs ± 0% 244µs ± 0% -3.91% (p=1.000 n=1+1) Decode/Newton/Compression/1e5 1.85ms ± 0% 1.79ms ± 0% -3.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e6 18.0ms ± 0% 17.3ms ± 0% -3.88% (p=1.000 n=1+1) Change-Id: I840320fab1c4bf64c76b001c2651ab79f23df4eb Reviewed-on: https://go-review.googlesource.com/c/go/+/259444 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Paul Murphy <murp@ibm.com> Reviewed-by: Carlos Eduardo Seo <carlos.seo@gmail.com> Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
1fb149fd64
commit
bdab5df40f
|
|
@ -204,12 +204,16 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
|
|||
|
||||
MULLW R3, R4 // 7c8419d6
|
||||
MULLW R3, R4, R5 // 7ca419d6
|
||||
MULLW $10, R3 // 1c63000a
|
||||
MULLW $10000000, R3 // 641f009863ff96807c7f19d6
|
||||
MULLWCC R3, R4, R5 // 7ca419d7
|
||||
MULHW R3, R4, R5 // 7ca41896
|
||||
|
||||
MULHWU R3, R4, R5 // 7ca41816
|
||||
MULLD R3, R4 // 7c8419d2
|
||||
MULLD R4, R4, R5 // 7ca421d2
|
||||
MULLD $20, R4 // 1c840014
|
||||
MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2
|
||||
MULLDCC R3, R4, R5 // 7ca419d3
|
||||
MULHD R3, R4, R5 // 7ca41892
|
||||
MULHDCC R3, R4, R5 // 7ca41893
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ package gc
|
|||
import "testing"
|
||||
|
||||
var globl int64
|
||||
var globl32 int32
|
||||
|
||||
func BenchmarkLoadAdd(b *testing.B) {
|
||||
x := make([]int64, 1024)
|
||||
|
|
@ -42,6 +43,17 @@ func BenchmarkModify(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkMullImm(b *testing.B) {
|
||||
x := make([]int32, 1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
var s int32
|
||||
for i := range x {
|
||||
s += x[i] * 100
|
||||
}
|
||||
globl32 = s
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkConstModify(b *testing.B) {
|
||||
a := make([]int64, 1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
|
|
|||
|
|
@ -677,7 +677,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.From.Reg = v.Args[0].Reg()
|
||||
|
||||
case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
|
||||
ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst:
|
||||
ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
|
||||
ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.Reg = v.Args[0].Reg()
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
|
|
|
|||
|
|
@ -821,6 +821,8 @@
|
|||
|
||||
(ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
|
||||
|
||||
(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x)
|
||||
|
||||
// Subtract from (with carry, but ignored) constant.
|
||||
// Note, these clobber the carry bit.
|
||||
(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
|
||||
|
|
|
|||
|
|
@ -181,6 +181,8 @@ func init() {
|
|||
|
||||
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
|
||||
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
|
||||
{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
||||
{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
||||
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
|
||||
|
||||
{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
|
||||
|
|
|
|||
|
|
@ -1832,6 +1832,8 @@ const (
|
|||
OpPPC64FSUBS
|
||||
OpPPC64MULLD
|
||||
OpPPC64MULLW
|
||||
OpPPC64MULLDconst
|
||||
OpPPC64MULLWconst
|
||||
OpPPC64MADDLD
|
||||
OpPPC64MULHD
|
||||
OpPPC64MULHW
|
||||
|
|
@ -24377,6 +24379,34 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MULLDconst",
|
||||
auxType: auxInt32,
|
||||
argLen: 1,
|
||||
asm: ppc64.AMULLD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MULLWconst",
|
||||
auxType: auxInt32,
|
||||
argLen: 1,
|
||||
asm: ppc64.AMULLW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MADDLD",
|
||||
argLen: 3,
|
||||
|
|
|
|||
|
|
@ -568,6 +568,10 @@ func rewriteValuePPC64(v *Value) bool {
|
|||
return rewriteValuePPC64_OpPPC64MOVWstorezero(v)
|
||||
case OpPPC64MTVSRD:
|
||||
return rewriteValuePPC64_OpPPC64MTVSRD(v)
|
||||
case OpPPC64MULLD:
|
||||
return rewriteValuePPC64_OpPPC64MULLD(v)
|
||||
case OpPPC64MULLW:
|
||||
return rewriteValuePPC64_OpPPC64MULLW(v)
|
||||
case OpPPC64NEG:
|
||||
return rewriteValuePPC64_OpPPC64NEG(v)
|
||||
case OpPPC64NOR:
|
||||
|
|
@ -11003,6 +11007,56 @@ func rewriteValuePPC64_OpPPC64MTVSRD(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpPPC64MULLD(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MULLD x (MOVDconst [c]))
|
||||
// cond: is16Bit(c)
|
||||
// result: (MULLDconst [int32(c)] x)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
if !(is16Bit(c)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64MULLDconst)
|
||||
v.AuxInt = int32ToAuxInt(int32(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpPPC64MULLW(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MULLW x (MOVDconst [c]))
|
||||
// cond: is16Bit(c)
|
||||
// result: (MULLWconst [int32(c)] x)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpPPC64MOVDconst {
|
||||
continue
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
if !(is16Bit(c)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64MULLWconst)
|
||||
v.AuxInt = int32ToAuxInt(int32(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuePPC64_OpPPC64NEG(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (NEG (ADDconst [c] x))
|
||||
|
|
|
|||
|
|
@ -1279,6 +1279,9 @@ func buildop(ctxt *obj.Link) {
|
|||
case AREMD:
|
||||
opset(AREMDU, r0)
|
||||
|
||||
case AMULLW:
|
||||
opset(AMULLD, r0)
|
||||
|
||||
case ADIVW: /* op Rb[,Ra],Rd */
|
||||
opset(AMULHW, r0)
|
||||
|
||||
|
|
@ -1312,7 +1315,6 @@ func buildop(ctxt *obj.Link) {
|
|||
opset(AMULHDCC, r0)
|
||||
opset(AMULHDU, r0)
|
||||
opset(AMULHDUCC, r0)
|
||||
opset(AMULLD, r0)
|
||||
opset(AMULLDCC, r0)
|
||||
opset(AMULLDVCC, r0)
|
||||
opset(AMULLDV, r0)
|
||||
|
|
@ -1996,7 +1998,6 @@ func buildop(ctxt *obj.Link) {
|
|||
AMOVB, /* macro: move byte with sign extension */
|
||||
AMOVBU, /* macro: move byte with sign extension & update */
|
||||
AMOVFL,
|
||||
AMULLW,
|
||||
/* op $s[,r2],r3; op r1[,r2],r3; no cc/v */
|
||||
ASUBC, /* op r1,$s,r3; op r1[,r2],r3 */
|
||||
ASTSW,
|
||||
|
|
@ -4990,8 +4991,8 @@ func (c *ctxt9) opirr(a obj.As) uint32 {
|
|||
case ADARN:
|
||||
return OPVCC(31, 755, 0, 0) /* darn - v3.00 */
|
||||
|
||||
case AMULLW:
|
||||
return OPVCC(7, 0, 0, 0)
|
||||
case AMULLW, AMULLD:
|
||||
return OPVCC(7, 0, 0, 0) /* mulli works with MULLW or MULLD */
|
||||
|
||||
case AOR:
|
||||
return OPVCC(24, 0, 0, 0)
|
||||
|
|
|
|||
Loading…
Reference in New Issue