mirror of https://github.com/golang/go.git
cmd/compile: optimize 386's math.bits.TrailingZeros16
This CL optimizes math.bits.TrailingZeros16 on 386 with a pair of BSFL and ORL instrcutions. The case TrailingZeros16-4 of the benchmark test in math/bits shows big improvement. name old time/op new time/op delta TrailingZeros16-4 1.55ns ± 1% 0.87ns ± 1% -43.87% (p=0.000 n=50+49) Change-Id: Ia899975b0e46f45dcd20223b713ed632bc32740b Reviewed-on: https://go-review.googlesource.com/c/go/+/189277 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
e1e4c499f0
commit
22355d6cd2
|
|
@ -3370,7 +3370,7 @@ func init() {
|
|||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
|
||||
},
|
||||
sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
|
||||
sys.AMD64, sys.ARM, sys.I386, sys.ARM64, sys.Wasm)
|
||||
addF("math/bits", "TrailingZeros16",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@
|
|||
|
||||
(Sqrt x) -> (SQRTSD x)
|
||||
|
||||
(Ctz16 x) -> (BSFL (ORLconst <typ.UInt32> [0x10000] x))
|
||||
|
||||
// Lowering extension
|
||||
(SignExt8to16 x) -> (MOVBLSX x)
|
||||
(SignExt8to32 x) -> (MOVBLSX x)
|
||||
|
|
|
|||
|
|
@ -360,6 +360,8 @@ func rewriteValue386(v *Value) bool {
|
|||
return rewriteValue386_OpConstBool_0(v)
|
||||
case OpConstNil:
|
||||
return rewriteValue386_OpConstNil_0(v)
|
||||
case OpCtz16:
|
||||
return rewriteValue386_OpCtz16_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValue386_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64F:
|
||||
|
|
@ -20783,6 +20785,22 @@ func rewriteValue386_OpConstNil_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValue386_OpCtz16_0(v *Value) bool {
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (Ctz16 x)
|
||||
// cond:
|
||||
// result: (BSFL (ORLconst <typ.UInt32> [0x10000] x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(Op386BSFL)
|
||||
v0 := b.NewValue0(v.Pos, Op386ORLconst, typ.UInt32)
|
||||
v0.AuxInt = 0x10000
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValue386_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
|
|
|||
|
|
@ -296,6 +296,7 @@ func TrailingZeros32(n uint32) int {
|
|||
|
||||
func TrailingZeros16(n uint16) int {
|
||||
// amd64:"BSFL","BTSL\\t\\$16"
|
||||
// 386:"BSFL\t"
|
||||
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
|
||||
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
|
||||
// s390x:"FLOGR","OR\t\\$65536"
|
||||
|
|
|
|||
Loading…
Reference in New Issue