diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index e03988dac2..3f4355c387 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3290,7 +3290,7 @@ func init() { func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0]) }, - sys.AMD64) + sys.AMD64, sys.ARM64) addF("math/bits", "TrailingZeros16", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0]) @@ -3298,7 +3298,7 @@ func init() { y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c) return s.newValue1(ssa.OpCtz64, types.Types[TINT], y) }, - sys.ARM64, sys.S390X, sys.PPC64) + sys.S390X, sys.PPC64) addF("math/bits", "TrailingZeros8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0]) @@ -3311,7 +3311,7 @@ func init() { func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpCtz8, types.Types[TINT], args[0]) }, - sys.AMD64) + sys.AMD64, sys.ARM64) addF("math/bits", "TrailingZeros8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0]) @@ -3319,7 +3319,7 @@ func init() { y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c) return s.newValue1(ssa.OpCtz64, types.Types[TINT], y) }, - sys.ARM64, sys.S390X) + sys.S390X) alias("math/bits", "ReverseBytes64", "runtime/internal/sys", "Bswap64", all...) alias("math/bits", "ReverseBytes32", "runtime/internal/sys", "Bswap32", all...) // ReverseBytes inlines correctly, no need to intrinsify it. diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index ca123d7375..6e0420983a 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -97,9 +97,13 @@ (Ctz64NonZero x) -> (Ctz64 x) (Ctz32NonZero x) -> (Ctz32 x) +(Ctz16NonZero x) -> (Ctz32 x) +(Ctz8NonZero x) -> (Ctz32 x) (Ctz64 x) -> (CLZ (RBIT x)) (Ctz32 x) -> (CLZW (RBITW x)) +(Ctz16 x) -> (CLZW (RBITW (ORconst [0x10000] x))) +(Ctz8 x) -> (CLZW (RBITW (ORconst [0x100] x))) (PopCount64 x) -> (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp x)))) (PopCount32 x) -> (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt32to64 x))))) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 25246ce5e5..24f392a43e 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -473,6 +473,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpConstBool_0(v) case OpConstNil: return rewriteValueARM64_OpConstNil_0(v) + case OpCtz16: + return rewriteValueARM64_OpCtz16_0(v) + case OpCtz16NonZero: + return rewriteValueARM64_OpCtz16NonZero_0(v) case OpCtz32: return rewriteValueARM64_OpCtz32_0(v) case OpCtz32NonZero: @@ -481,6 +485,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpCtz64_0(v) case OpCtz64NonZero: return rewriteValueARM64_OpCtz64NonZero_0(v) + case OpCtz8: + return rewriteValueARM64_OpCtz8_0(v) + case OpCtz8NonZero: + return rewriteValueARM64_OpCtz8NonZero_0(v) case OpCvt32Fto32: return rewriteValueARM64_OpCvt32Fto32_0(v) case OpCvt32Fto32U: @@ -33182,6 +33190,39 @@ func rewriteValueARM64_OpConstNil_0(v *Value) bool { return true } } +func rewriteValueARM64_OpCtz16_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (Ctz16 x) + // cond: + // result: (CLZW (RBITW (ORconst [0x10000] x))) + for { + t := v.Type + x := v.Args[0] + v.reset(OpARM64CLZW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32) + v1.AuxInt = 0x10000 + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} +func rewriteValueARM64_OpCtz16NonZero_0(v *Value) bool { + // match: (Ctz16NonZero x) + // cond: + // result: (Ctz32 x) + for { + x := v.Args[0] + v.reset(OpCtz32) + v.AddArg(x) + return true + } +} func rewriteValueARM64_OpCtz32_0(v *Value) bool { b := v.Block _ = b @@ -33236,6 +33277,39 @@ func rewriteValueARM64_OpCtz64NonZero_0(v *Value) bool { return true } } +func rewriteValueARM64_OpCtz8_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (Ctz8 x) + // cond: + // result: (CLZW (RBITW (ORconst [0x100] x))) + for { + t := v.Type + x := v.Args[0] + v.reset(OpARM64CLZW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32) + v1.AuxInt = 0x100 + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} +func rewriteValueARM64_OpCtz8NonZero_0(v *Value) bool { + // match: (Ctz8NonZero x) + // cond: + // result: (Ctz32 x) + for { + x := v.Args[0] + v.reset(OpCtz32) + v.AddArg(x) + return true + } +} func rewriteValueARM64_OpCvt32Fto32_0(v *Value) bool { // match: (Cvt32Fto32 x) // cond: diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 09939bb6be..c77b66c3f7 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -242,6 +242,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 { func TrailingZeros(n uint) int { // amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // arm64:"RBIT","CLZ" // s390x:"FLOGR" // ppc64:"ANDN","POPCNTD" // ppc64le:"ANDN","POPCNTD" @@ -250,6 +251,7 @@ func TrailingZeros(n uint) int { func TrailingZeros64(n uint64) int { // amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // arm64:"RBIT","CLZ" // s390x:"FLOGR" // ppc64:"ANDN","POPCNTD" // ppc64le:"ANDN","POPCNTD" @@ -258,6 +260,7 @@ func TrailingZeros64(n uint64) int { func TrailingZeros32(n uint32) int { // amd64:"BTSQ\\t\\$32","BSFQ" + // arm64:"RBITW","CLZW" // s390x:"FLOGR","MOVWZ" // ppc64:"ANDN","POPCNTW" // ppc64le:"ANDN","POPCNTW" @@ -266,6 +269,7 @@ func TrailingZeros32(n uint32) int { func TrailingZeros16(n uint16) int { // amd64:"BSFL","BTSL\\t\\$16" + // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" // s390x:"FLOGR","OR\t\\$65536" // ppc64:"POPCNTD","OR\\t\\$65536" // ppc64le:"POPCNTD","OR\\t\\$65536" @@ -274,6 +278,7 @@ func TrailingZeros16(n uint16) int { func TrailingZeros8(n uint8) int { // amd64:"BSFL","BTSL\\t\\$8" + // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" // s390x:"FLOGR","OR\t\\$256" return bits.TrailingZeros8(n) } @@ -314,6 +319,7 @@ func IterateBits16(n uint16) int { i := 0 for n != 0 { // amd64:"BSFL",-"BTSL" + // arm64:"RBITW","CLZW",-"ORR" i += bits.TrailingZeros16(n) n &= n - 1 } @@ -324,6 +330,7 @@ func IterateBits8(n uint8) int { i := 0 for n != 0 { // amd64:"BSFL",-"BTSL" + // arm64:"RBITW","CLZW",-"ORR" i += bits.TrailingZeros8(n) n &= n - 1 }