mirror of https://github.com/golang/go.git
math: optimize math.Abs on mipsx
This commit optimized math.Abs function implementation on mipsx.
Tested on loongson 3A2000.
goos: linux
goarch: mipsle
pkg: math
│ oldmath │ newmath │
│ sec/op │ sec/op vs base │
Acos-4 282.6n ± 0% 282.3n ± 0% ~ (p=0.140 n=7)
Acosh-4 506.1n ± 0% 451.8n ± 0% -10.73% (p=0.001 n=7)
Asin-4 272.3n ± 0% 272.2n ± 0% ~ (p=0.808 n=7)
Asinh-4 529.7n ± 0% 475.3n ± 0% -10.27% (p=0.001 n=7)
Atan-4 208.2n ± 0% 207.9n ± 0% ~ (p=0.134 n=7)
Atanh-4 503.4n ± 1% 449.7n ± 0% -10.67% (p=0.001 n=7)
Atan2-4 310.5n ± 0% 310.5n ± 0% ~ (p=0.928 n=7)
Cbrt-4 359.3n ± 0% 358.8n ± 0% ~ (p=0.121 n=7)
Ceil-4 203.9n ± 0% 204.0n ± 0% ~ (p=0.600 n=7)
Compare-4 23.11n ± 0% 23.11n ± 0% ~ (p=0.702 n=7)
Compare32-4 19.09n ± 0% 19.12n ± 0% ~ (p=0.070 n=7)
Copysign-4 33.20n ± 0% 34.02n ± 0% +2.47% (p=0.001 n=7)
Cos-4 422.5n ± 0% 385.4n ± 1% -8.78% (p=0.001 n=7)
Cosh-4 628.0n ± 0% 545.5n ± 0% -13.14% (p=0.001 n=7)
Erf-4 193.7n ± 2% 192.7n ± 1% ~ (p=0.430 n=7)
Erfc-4 192.8n ± 1% 193.0n ± 0% ~ (p=0.245 n=7)
Erfinv-4 220.7n ± 1% 221.5n ± 2% ~ (p=0.272 n=7)
Erfcinv-4 221.3n ± 1% 220.4n ± 2% ~ (p=0.738 n=7)
Exp-4 471.4n ± 0% 435.1n ± 0% -7.70% (p=0.001 n=7)
ExpGo-4 470.6n ± 0% 434.0n ± 0% -7.78% (p=0.001 n=7)
Expm1-4 243.1n ± 0% 243.4n ± 0% ~ (p=0.417 n=7)
Exp2-4 463.1n ± 0% 427.0n ± 0% -7.80% (p=0.001 n=7)
Exp2Go-4 462.4n ± 0% 426.2n ± 5% -7.83% (p=0.001 n=7)
Abs-4 37.000n ± 0% 8.039n ± 9% -78.27% (p=0.001 n=7)
Dim-4 18.09n ± 0% 18.11n ± 0% ~ (p=0.094 n=7)
Floor-4 151.9n ± 0% 151.8n ± 0% ~ (p=0.190 n=7)
Max-4 116.7n ± 1% 116.7n ± 1% ~ (p=0.842 n=7)
Min-4 116.6n ± 1% 116.6n ± 0% ~ (p=0.464 n=7)
Mod-4 1244.0n ± 0% 980.9n ± 0% -21.15% (p=0.001 n=7)
Frexp-4 199.0n ± 0% 146.7n ± 0% -26.28% (p=0.001 n=7)
Gamma-4 516.4n ± 0% 479.3n ± 1% -7.18% (p=0.001 n=7)
Hypot-4 169.8n ± 0% 117.8n ± 2% -30.62% (p=0.001 n=7)
HypotGo-4 170.8n ± 0% 117.5n ± 0% -31.21% (p=0.001 n=7)
Ilogb-4 160.8n ± 0% 109.5n ± 0% -31.90% (p=0.001 n=7)
J0-4 1.359µ ± 0% 1.305µ ± 0% -3.97% (p=0.001 n=7)
J1-4 1.386µ ± 0% 1.334µ ± 0% -3.75% (p=0.001 n=7)
Jn-4 2.864µ ± 0% 2.758µ ± 0% -3.70% (p=0.001 n=7)
Ldexp-4 202.9n ± 0% 151.7n ± 0% -25.23% (p=0.001 n=7)
Lgamma-4 234.0n ± 0% 234.3n ± 0% ~ (p=0.199 n=7)
Log-4 444.1n ± 0% 407.9n ± 0% -8.15% (p=0.001 n=7)
Logb-4 157.8n ± 0% 121.6n ± 0% -22.94% (p=0.001 n=7)
Log1p-4 354.8n ± 0% 315.4n ± 0% -11.10% (p=0.001 n=7)
Log10-4 453.9n ± 0% 417.9n ± 0% -7.93% (p=0.001 n=7)
Log2-4 245.3n ± 0% 209.1n ± 0% -14.76% (p=0.001 n=7)
Modf-4 126.6n ± 0% 126.6n ± 0% ~ (p=0.126 n=7)
Nextafter32-4 112.5n ± 0% 112.5n ± 0% ~ (p=0.853 n=7)
Nextafter64-4 141.7n ± 0% 141.6n ± 0% ~ (p=0.331 n=7)
PowInt-4 878.8n ± 1% 758.3n ± 1% -13.71% (p=0.001 n=7)
PowFrac-4 1.809µ ± 0% 1.615µ ± 0% -10.72% (p=0.001 n=7)
Pow10Pos-4 18.10n ± 0% 18.12n ± 0% ~ (p=0.464 n=7)
Pow10Neg-4 17.09n ± 0% 17.09n ± 0% ~ (p=0.263 n=7)
Round-4 68.36n ± 0% 68.33n ± 0% ~ (p=0.325 n=7)
RoundToEven-4 78.40n ± 0% 78.40n ± 0% ~ (p=0.934 n=7)
Remainder-4 894.0n ± 1% 753.4n ± 1% -15.73% (p=0.001 n=7)
Signbit-4 18.09n ± 0% 18.09n ± 0% ~ (p=0.761 n=7)
Sin-4 389.8n ± 1% 389.8n ± 0% ~ (p=0.995 n=7)
Sincos-4 416.0n ± 0% 415.9n ± 0% ~ (p=0.361 n=7)
Sinh-4 634.6n ± 4% 585.6n ± 1% -7.72% (p=0.001 n=7)
SqrtIndirect-4 8.035n ± 0% 8.036n ± 0% ~ (p=0.523 n=7)
SqrtLatency-4 8.039n ± 0% 8.037n ± 0% ~ (p=0.218 n=7)
SqrtIndirectLatency-4 8.040n ± 0% 8.040n ± 0% ~ (p=0.652 n=7)
SqrtGoLatency-4 895.7n ± 0% 896.6n ± 0% +0.10% (p=0.004 n=7)
SqrtPrime-4 5.406µ ± 0% 5.407µ ± 0% ~ (p=0.592 n=7)
Tan-4 406.1n ± 0% 405.8n ± 1% ~ (p=0.435 n=7)
Tanh-4 627.6n ± 0% 545.5n ± 0% -13.08% (p=0.001 n=7)
Trunc-4 146.7n ± 1% 146.7n ± 0% ~ (p=0.755 n=7)
Y0-4 1.359µ ± 0% 1.310µ ± 0% -3.61% (p=0.001 n=7)
Y1-4 1.351µ ± 0% 1.301µ ± 0% -3.70% (p=0.001 n=7)
Yn-4 2.829µ ± 0% 2.729µ ± 0% -3.53% (p=0.001 n=7)
Float64bits-4 14.08n ± 0% 14.07n ± 0% ~ (p=0.069 n=7)
Float64frombits-4 19.09n ± 0% 19.10n ± 0% ~ (p=0.755 n=7)
Float32bits-4 13.06n ± 0% 13.07n ± 1% ~ (p=0.586 n=7)
Float32frombits-4 13.06n ± 0% 13.06n ± 0% ~ (p=0.853 n=7)
FMA-4 606.9n ± 0% 606.8n ± 0% ~ (p=0.393 n=7)
geomean 201.1n 185.4n -7.81%
Change-Id: I6d41a97ad3789ed5731588588859ac0b8b13b664
Reviewed-on: https://go-review.googlesource.com/c/go/+/484675
Reviewed-by: Rong Zhang <rongrong@oss.cipunited.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Than McIntosh <thanm@google.com>
This commit is contained in:
parent
9fa81a8827
commit
5cad8d41ca
|
|
@ -363,6 +363,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpMIPSMOVDF,
|
||||
ssa.OpMIPSNEGF,
|
||||
ssa.OpMIPSNEGD,
|
||||
ssa.OpMIPSABSD,
|
||||
ssa.OpMIPSSQRTF,
|
||||
ssa.OpMIPSSQRTD,
|
||||
ssa.OpMIPSCLZ:
|
||||
|
|
|
|||
|
|
@ -37,6 +37,9 @@
|
|||
(Mod8 x y) => (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y)))
|
||||
(Mod8u x y) => (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
||||
|
||||
// math package intrinsics
|
||||
(Abs ...) => (ABSD ...)
|
||||
|
||||
// (x + y) / 2 with x>=y becomes (x - y) / 2 + y
|
||||
(Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
|
||||
|
||||
|
|
|
|||
|
|
@ -179,6 +179,7 @@ func init() {
|
|||
{name: "NEG", argLength: 1, reg: gp11}, // -arg0
|
||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
|
||||
|
|
|
|||
|
|
@ -1872,6 +1872,7 @@ const (
|
|||
OpMIPSNEG
|
||||
OpMIPSNEGF
|
||||
OpMIPSNEGD
|
||||
OpMIPSABSD
|
||||
OpMIPSSQRTD
|
||||
OpMIPSSQRTF
|
||||
OpMIPSSLL
|
||||
|
|
@ -25054,6 +25055,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ABSD",
|
||||
argLen: 1,
|
||||
asm: mips.AABSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTD",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ import "cmd/compile/internal/types"
|
|||
|
||||
func rewriteValueMIPS(v *Value) bool {
|
||||
switch v.Op {
|
||||
case OpAbs:
|
||||
v.Op = OpMIPSABSD
|
||||
return true
|
||||
case OpAdd16:
|
||||
v.Op = OpMIPSADD
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -4369,7 +4369,7 @@ func InitTables() {
|
|||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
|
||||
},
|
||||
sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS64)
|
||||
sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
|
||||
addF("math", "Copysign",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ func abs(x, y float64) {
|
|||
// wasm:"F64Abs"
|
||||
// arm/6:"ABSD\t"
|
||||
// mips64/hardfloat:"ABSD\t"
|
||||
// mips/hardfloat:"ABSD\t"
|
||||
sink64[0] = math.Abs(x)
|
||||
|
||||
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
||||
|
|
|
|||
Loading…
Reference in New Issue