mirror of https://github.com/golang/go.git
cmd/compile: intrinsify math/bits.Div on amd64
Note that the intrinsic implementation panics separately for overflow and divide by zero, which matches the behavior of the pure go implementation. There is a modest performance improvement after intrinsic implementation. name old time/op new time/op delta Div-4 53.0ns ± 1% 47.0ns ± 0% -11.28% (p=0.008 n=5+5) Div32-4 18.4ns ± 0% 18.5ns ± 1% ~ (p=0.444 n=5+5) Div64-4 53.3ns ± 0% 47.5ns ± 4% -10.77% (p=0.008 n=5+5) Updates #28273 Change-Id: Ic1688ecc0964acace2e91bf44ef16f5fb6b6bc82 Reviewed-on: https://go-review.googlesource.com/c/144378 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
ead5d1e316
commit
319787a528
|
|
@ -300,6 +300,7 @@ var (
|
||||||
panicdottypeI,
|
panicdottypeI,
|
||||||
panicindex,
|
panicindex,
|
||||||
panicnildottype,
|
panicnildottype,
|
||||||
|
panicoverflow,
|
||||||
panicslice,
|
panicslice,
|
||||||
raceread,
|
raceread,
|
||||||
racereadrange,
|
racereadrange,
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,7 @@ func initssaconfig() {
|
||||||
panicdottypeI = sysfunc("panicdottypeI")
|
panicdottypeI = sysfunc("panicdottypeI")
|
||||||
panicindex = sysfunc("panicindex")
|
panicindex = sysfunc("panicindex")
|
||||||
panicnildottype = sysfunc("panicnildottype")
|
panicnildottype = sysfunc("panicnildottype")
|
||||||
|
panicoverflow = sysfunc("panicoverflow")
|
||||||
panicslice = sysfunc("panicslice")
|
panicslice = sysfunc("panicslice")
|
||||||
raceread = sysfunc("raceread")
|
raceread = sysfunc("raceread")
|
||||||
racereadrange = sysfunc("racereadrange")
|
racereadrange = sysfunc("racereadrange")
|
||||||
|
|
@ -3487,20 +3488,29 @@ func init() {
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.PPC64)
|
sys.AMD64, sys.ARM64, sys.PPC64)
|
||||||
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
|
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
|
||||||
|
|
||||||
addF("math/bits", "Add64",
|
addF("math/bits", "Add64",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
||||||
},
|
},
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64)
|
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64)
|
||||||
|
|
||||||
addF("math/bits", "Sub64",
|
addF("math/bits", "Sub64",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
||||||
},
|
},
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64)
|
alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64)
|
||||||
|
addF("math/bits", "Div64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
// check for divide-by-zero/overflow and panic with appropriate message
|
||||||
|
cmpZero := s.newValue2(s.ssaOp(ONE, types.Types[TUINT64]), types.Types[TBOOL], args[2], s.zeroVal(types.Types[TUINT64]))
|
||||||
|
s.check(cmpZero, panicdivide)
|
||||||
|
cmpOverflow := s.newValue2(s.ssaOp(OLT, types.Types[TUINT64]), types.Types[TBOOL], args[0], args[2])
|
||||||
|
s.check(cmpOverflow, panicoverflow)
|
||||||
|
return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
|
||||||
|
},
|
||||||
|
sys.AMD64)
|
||||||
|
alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
|
||||||
|
|
||||||
/******** sync/atomic ********/
|
/******** sync/atomic ********/
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -465,3 +465,17 @@ func Mul64(x, y uint64) (hi, lo uint64) {
|
||||||
// ppc64le:"MULHDU","MULLD"
|
// ppc64le:"MULHDU","MULLD"
|
||||||
return bits.Mul64(x, y)
|
return bits.Mul64(x, y)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------- //
|
||||||
|
// bits.Div* //
|
||||||
|
// --------------- //
|
||||||
|
|
||||||
|
func Div(hi, lo, x uint) (q, r uint) {
|
||||||
|
// amd64:"DIVQ"
|
||||||
|
return bits.Div(hi, lo, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Div64(hi, lo, x uint64) (q, r uint64) {
|
||||||
|
// amd64:"DIVQ"
|
||||||
|
return bits.Div64(hi, lo, x)
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue