mirror of https://github.com/golang/go.git
cmd/compile: optimize math.Float32bits and math.Float32frombits on mipsx
This CL use MFC1/MTC1 instructions to move data between GPR and FPR instead of stores and loads to move float/int values.
goos: linux
goarch: mipsle
pkg: math
│ oldmathf │ newmathf │
│ sec/op │ sec/op vs base │
Acos-4 282.7n ± 0% 282.1n ± 0% -0.18% (p=0.010 n=8)
Acosh-4 450.8n ± 0% 450.9n ± 0% ~ (p=0.699 n=8)
Asin-4 272.6n ± 0% 272.1n ± 0% ~ (p=0.050 n=8)
Asinh-4 476.8n ± 0% 475.1n ± 0% -0.35% (p=0.018 n=8)
Atan-4 208.1n ± 0% 207.7n ± 0% -0.17% (p=0.009 n=8)
Atanh-4 448.8n ± 0% 448.7n ± 0% -0.03% (p=0.014 n=8)
Atan2-4 310.2n ± 0% 310.1n ± 0% ~ (p=0.133 n=8)
Cbrt-4 357.9n ± 0% 358.4n ± 0% +0.11% (p=0.014 n=8)
Ceil-4 203.8n ± 0% 204.7n ± 0% +0.42% (p=0.008 n=8)
Compare-4 21.12n ± 0% 22.09n ± 0% +4.59% (p=0.000 n=8)
Compare32-4 19.105n ± 0% 6.022n ± 0% -68.48% (p=0.000 n=8)
Copysign-4 33.17n ± 0% 33.15n ± 0% ~ (p=0.795 n=8)
Cos-4 385.2n ± 0% 384.8n ± 1% ~ (p=0.112 n=8)
Cosh-4 546.0n ± 0% 545.0n ± 0% -0.17% (p=0.012 n=8)
Erf-4 192.4n ± 0% 195.4n ± 1% +1.59% (p=0.000 n=8)
Erfc-4 187.8n ± 0% 192.7n ± 0% +2.64% (p=0.000 n=8)
Erfinv-4 221.8n ± 1% 219.8n ± 0% -0.88% (p=0.000 n=8)
Erfcinv-4 224.1n ± 1% 219.9n ± 0% -1.87% (p=0.000 n=8)
Exp-4 434.7n ± 0% 435.0n ± 0% ~ (p=0.339 n=8)
ExpGo-4 433.7n ± 0% 434.2n ± 0% +0.13% (p=0.005 n=8)
Expm1-4 243.0n ± 0% 242.9n ± 0% ~ (p=0.103 n=8)
Exp2-4 426.6n ± 0% 426.6n ± 0% ~ (p=0.822 n=8)
Exp2Go-4 425.6n ± 0% 425.5n ± 0% ~ (p=0.377 n=8)
Abs-4 8.033n ± 0% 8.029n ± 0% ~ (p=0.065 n=8)
Dim-4 18.07n ± 0% 18.07n ± 0% ~ (p=0.051 n=8)
Floor-4 151.6n ± 0% 151.6n ± 0% ~ (p=0.450 n=8)
Max-4 100.9n ± 8% 103.2n ± 2% ~ (p=0.099 n=8)
Min-4 116.4n ± 0% 116.4n ± 0% ~ (p=0.467 n=8)
Mod-4 959.6n ± 1% 950.9n ± 0% -0.91% (p=0.006 n=8)
Frexp-4 147.6n ± 0% 147.5n ± 0% -0.07% (p=0.026 n=8)
Gamma-4 482.7n ± 0% 478.2n ± 2% -0.92% (p=0.000 n=8)
Hypot-4 139.8n ± 1% 127.1n ± 8% -9.12% (p=0.000 n=8)
HypotGo-4 137.2n ± 7% 117.5n ± 2% -14.39% (p=0.001 n=8)
Ilogb-4 109.5n ± 0% 108.4n ± 1% -1.05% (p=0.001 n=8)
J0-4 1.304µ ± 0% 1.304µ ± 0% ~ (p=0.853 n=8)
J1-4 1.349µ ± 0% 1.331µ ± 0% -1.33% (p=0.000 n=8)
Jn-4 2.774µ ± 0% 2.750µ ± 0% -0.87% (p=0.000 n=8)
Ldexp-4 151.6n ± 0% 151.5n ± 0% ~ (p=0.695 n=8)
Lgamma-4 226.9n ± 0% 233.9n ± 0% +3.09% (p=0.000 n=8)
Log-4 407.6n ± 0% 407.4n ± 0% ~ (p=0.340 n=8)
Logb-4 121.5n ± 0% 121.5n ± 0% -0.08% (p=0.042 n=8)
Log1p-4 315.5n ± 0% 315.6n ± 0% ~ (p=0.930 n=8)
Log10-4 417.8n ± 0% 417.5n ± 0% ~ (p=0.053 n=8)
Log2-4 208.8n ± 0% 208.8n ± 0% ~ (p=0.582 n=8)
Modf-4 126.5n ± 0% 126.4n ± 0% ~ (p=0.128 n=8)
Nextafter32-4 112.45n ± 0% 82.27n ± 0% -26.84% (p=0.000 n=8)
Nextafter64-4 141.5n ± 0% 141.5n ± 0% ~ (p=0.569 n=8)
PowInt-4 754.0n ± 1% 754.6n ± 0% ~ (p=0.279 n=8)
PowFrac-4 1.608µ ± 1% 1.596µ ± 1% ~ (p=0.661 n=8)
Pow10Pos-4 18.07n ± 0% 18.07n ± 0% ~ (p=0.413 n=8)
Pow10Neg-4 17.08n ± 0% 18.07n ± 0% +5.80% (p=0.000 n=8)
Round-4 68.30n ± 0% 69.29n ± 0% +1.45% (p=0.000 n=8)
RoundToEven-4 78.33n ± 0% 78.34n ± 0% ~ (p=0.975 n=8)
Remainder-4 740.6n ± 1% 736.7n ± 0% ~ (p=0.098 n=8)
Signbit-4 18.08n ± 0% 18.07n ± 0% ~ (p=0.546 n=8)
Sin-4 389.4n ± 0% 389.5n ± 0% ~ (p=0.451 n=8)
Sincos-4 415.6n ± 0% 415.6n ± 0% ~ (p=0.450 n=8)
Sinh-4 607.0n ± 0% 590.8n ± 1% -2.68% (p=0.000 n=8)
SqrtIndirect-4 8.034n ± 0% 8.030n ± 0% ~ (p=0.487 n=8)
SqrtLatency-4 8.031n ± 0% 8.034n ± 0% ~ (p=0.152 n=8)
SqrtIndirectLatency-4 8.032n ± 0% 8.032n ± 0% ~ (p=0.818 n=8)
SqrtGoLatency-4 895.8n ± 0% 895.3n ± 0% ~ (p=0.553 n=8)
SqrtPrime-4 5.405µ ± 0% 5.379µ ± 0% -0.48% (p=0.000 n=8)
Tan-4 405.6n ± 0% 405.7n ± 0% ~ (p=0.980 n=8)
Tanh-4 545.1n ± 0% 545.1n ± 0% ~ (p=0.806 n=8)
Trunc-4 146.5n ± 0% 146.6n ± 0% ~ (p=0.380 n=8)
Y0-4 1.308µ ± 0% 1.306µ ± 0% ~ (p=0.071 n=8)
Y1-4 1.311µ ± 0% 1.315µ ± 0% +0.31% (p=0.000 n=8)
Yn-4 2.737µ ± 0% 2.745µ ± 0% +0.27% (p=0.000 n=8)
Float64bits-4 14.56n ± 0% 14.56n ± 0% ~ (p=0.689 n=8)
Float64frombits-4 19.08n ± 0% 19.08n ± 0% ~ (p=0.580 n=8)
Float32bits-4 13.050n ± 0% 5.019n ± 0% -61.54% (p=0.000 n=8)
Float32frombits-4 13.060n ± 0% 4.016n ± 0% -69.25% (p=0.000 n=8)
FMA-4 608.5n ± 0% 586.1n ± 0% -3.67% (p=0.000 n=8)
geomean 185.5n 176.2n -5.02%
Change-Id: Ibf91092ffe70104e6c5ec03bc76d51259818b9b3
Reviewed-on: https://go-review.googlesource.com/c/go/+/494535
Run-TryBot: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
02d234e34d
commit
d9fd19a7f5
|
|
@ -361,6 +361,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpMIPSTRUNCDW,
|
||||
ssa.OpMIPSMOVFD,
|
||||
ssa.OpMIPSMOVDF,
|
||||
ssa.OpMIPSMOVWfpgp,
|
||||
ssa.OpMIPSMOVWgpfp,
|
||||
ssa.OpMIPSNEGF,
|
||||
ssa.OpMIPSNEGD,
|
||||
ssa.OpMIPSABSD,
|
||||
|
|
|
|||
|
|
@ -233,6 +233,15 @@
|
|||
(Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVFstore ptr val mem)
|
||||
(Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVDstore ptr val mem)
|
||||
|
||||
// float <=> int register moves, with no conversion.
|
||||
// These come up when compiling math.{Float32bits, Float32frombits}.
|
||||
(MOVWload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _)) => (MOVWfpgp val)
|
||||
(MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (MOVWgpfp val)
|
||||
|
||||
// Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set.
|
||||
(MOVWstore [off] {sym} ptr (MOVWfpgp val) mem) => (MOVFstore [off] {sym} ptr val mem)
|
||||
(MOVFstore [off] {sym} ptr (MOVWgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem)
|
||||
|
||||
// zero instructions
|
||||
(Zero [0] _ mem) => mem
|
||||
(Zero [1] ptr mem) => (MOVBstore ptr (MOVWconst [0]) mem)
|
||||
|
|
|
|||
|
|
@ -139,6 +139,8 @@ func init() {
|
|||
gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
|
||||
gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}}
|
||||
gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
|
||||
fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
|
||||
gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
|
||||
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
|
||||
fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
|
||||
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
|
||||
|
|
@ -233,6 +235,10 @@ func init() {
|
|||
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
|
||||
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
|
||||
|
||||
// moves (no conversion)
|
||||
{name: "MOVWfpgp", argLength: 1, reg: fpgp, asm: "MOVW"}, // move float32 to int32 (no conversion)
|
||||
{name: "MOVWgpfp", argLength: 1, reg: gpfp, asm: "MOVW"}, // move int32 to float32 (no conversion)
|
||||
|
||||
// conversions
|
||||
{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte
|
||||
{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
|
||||
|
|
|
|||
|
|
@ -1913,6 +1913,8 @@ const (
|
|||
OpMIPSMOVBstorezero
|
||||
OpMIPSMOVHstorezero
|
||||
OpMIPSMOVWstorezero
|
||||
OpMIPSMOVWfpgp
|
||||
OpMIPSMOVWgpfp
|
||||
OpMIPSMOVBreg
|
||||
OpMIPSMOVBUreg
|
||||
OpMIPSMOVHreg
|
||||
|
|
@ -25618,6 +25620,32 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MOVWfpgp",
|
||||
argLen: 1,
|
||||
asm: mips.AMOVW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 335544318}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 R28 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MOVWgpfp",
|
||||
argLen: 1,
|
||||
asm: mips.AMOVW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 335544318}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 R28 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MOVBreg",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -2974,6 +2974,23 @@ func rewriteValueMIPS_OpMIPSMOVDstore(v *Value) bool {
|
|||
func rewriteValueMIPS_OpMIPSMOVFload(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _))
|
||||
// result: (MOVWgpfp val)
|
||||
for {
|
||||
off := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
ptr := v_0
|
||||
if v_1.Op != OpMIPSMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
|
||||
break
|
||||
}
|
||||
val := v_1.Args[1]
|
||||
if ptr != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpMIPSMOVWgpfp)
|
||||
v.AddArg(val)
|
||||
return true
|
||||
}
|
||||
// match: (MOVFload [off1] {sym} x:(ADDconst [off2] ptr) mem)
|
||||
// cond: (is16Bit(int64(off1+off2)) || x.Uses == 1)
|
||||
// result: (MOVFload [off1+off2] {sym} ptr mem)
|
||||
|
|
@ -3044,6 +3061,23 @@ func rewriteValueMIPS_OpMIPSMOVFstore(v *Value) bool {
|
|||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVFstore [off] {sym} ptr (MOVWgpfp val) mem)
|
||||
// result: (MOVWstore [off] {sym} ptr val mem)
|
||||
for {
|
||||
off := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
ptr := v_0
|
||||
if v_1.Op != OpMIPSMOVWgpfp {
|
||||
break
|
||||
}
|
||||
val := v_1.Args[0]
|
||||
mem := v_2
|
||||
v.reset(OpMIPSMOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVFstore [off1] {sym} x:(ADDconst [off2] ptr) val mem)
|
||||
// cond: (is16Bit(int64(off1+off2)) || x.Uses == 1)
|
||||
// result: (MOVFstore [off1+off2] {sym} ptr val mem)
|
||||
|
|
@ -3623,6 +3657,23 @@ func rewriteValueMIPS_OpMIPSMOVHstorezero(v *Value) bool {
|
|||
func rewriteValueMIPS_OpMIPSMOVWload(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVWload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _))
|
||||
// result: (MOVWfpgp val)
|
||||
for {
|
||||
off := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
ptr := v_0
|
||||
if v_1.Op != OpMIPSMOVFstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
|
||||
break
|
||||
}
|
||||
val := v_1.Args[1]
|
||||
if ptr != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpMIPSMOVWfpgp)
|
||||
v.AddArg(val)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWload [off1] {sym} x:(ADDconst [off2] ptr) mem)
|
||||
// cond: (is16Bit(int64(off1+off2)) || x.Uses == 1)
|
||||
// result: (MOVWload [off1+off2] {sym} ptr mem)
|
||||
|
|
@ -3735,6 +3786,23 @@ func rewriteValueMIPS_OpMIPSMOVWstore(v *Value) bool {
|
|||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVWstore [off] {sym} ptr (MOVWfpgp val) mem)
|
||||
// result: (MOVFstore [off] {sym} ptr val mem)
|
||||
for {
|
||||
off := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
ptr := v_0
|
||||
if v_1.Op != OpMIPSMOVWfpgp {
|
||||
break
|
||||
}
|
||||
val := v_1.Args[0]
|
||||
mem := v_2
|
||||
v.reset(OpMIPSMOVFstore)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstore [off1] {sym} x:(ADDconst [off2] ptr) val mem)
|
||||
// cond: (is16Bit(int64(off1+off2)) || x.Uses == 1)
|
||||
// result: (MOVWstore [off1+off2] {sym} ptr val mem)
|
||||
|
|
|
|||
Loading…
Reference in New Issue