cmd/compile: simplify intrinsification of BitLen16 and BitLen8

Decompose BitLen16 and BitLen8 within the SSA rules for architectures that
support BitLen32 or BitLen64, rather than having a custom intrinsic.

Change-Id: Ie4188ce69d1021e63cec27a8e7418efb0714812b
Reviewed-on: https://go-review.googlesource.com/c/go/+/651817
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Joel Sing 2025-02-23 00:02:27 +11:00
parent 969a0da362
commit 1b1c6b838e
15 changed files with 284 additions and 34 deletions

View File

@ -80,6 +80,7 @@
// bit length
(BitLen32 <t> x) => (RSBconst [32] (CLZ <t> x))
(BitLen(16|8) x) => (BitLen32 (ZeroExt(16|8)to32 x))
// byte swap for ARMv5
// let (a, b, c, d) be the bytes of x from high to low

View File

@ -103,6 +103,7 @@
(BitLen64 x) => (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
(BitLen32 x) => (SUB (MOVDconst [32]) (CLZW <typ.Int> x))
(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x))
(Bswap64 ...) => (REV ...)
(Bswap32 ...) => (REVW ...)

View File

@ -147,6 +147,7 @@
(BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
(BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x))
(Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...)
(BitRev8 ...) => (BITREV4B ...)
(BitRev16 <t> x) => (REVB2H (BITREV4B <t> x))

View File

@ -135,6 +135,7 @@
// bit length
(BitLen32 <t> x) => (SUB (MOVWconst [32]) (CLZ <t> x))
(BitLen(16|8) x) => (BitLen32 (ZeroExt(16|8)to32 x))
// boolean ops -- booleans are represented with 0=false, 1=true
(AndB ...) => (AND ...)

View File

@ -267,6 +267,7 @@
(BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x))
(BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x))
(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x))
(PopCount64 ...) => (POPCNTD ...)
(PopCount(32|16|8) x) => (POPCNT(W|W|B) (MOV(W|H|B)Zreg x))

View File

@ -89,6 +89,7 @@
(Ctz32 <t> x) => (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
(BitLen64 x) => (SUB (MOVDconst [64]) (FLOGR x))
(BitLen(32|16|8) x) => (BitLen64 (ZeroExt(32|16|8)to64 x))
// POPCNT treats the input register as a vector of 8 bytes, producing
// a population count for each individual byte. For inputs larger than

View File

@ -329,6 +329,7 @@
(Ctz(64|32|16|8)NonZero ...) => (I64Ctz ...)
(BitLen64 x) => (I64Sub (I64Const [64]) (I64Clz x))
(BitLen(32|16|8) x) => (BitLen64 (ZeroExt(32|16|8)to64 x))
(PopCount64 ...) => (I64Popcnt ...)
(PopCount32 x) => (I64Popcnt (ZeroExt32to64 x))

View File

@ -466,8 +466,12 @@ func rewriteValueARM(v *Value) bool {
return true
case OpAvg32u:
return rewriteValueARM_OpAvg32u(v)
case OpBitLen16:
return rewriteValueARM_OpBitLen16(v)
case OpBitLen32:
return rewriteValueARM_OpBitLen32(v)
case OpBitLen8:
return rewriteValueARM_OpBitLen8(v)
case OpBswap32:
return rewriteValueARM_OpBswap32(v)
case OpClosureCall:
@ -13070,6 +13074,21 @@ func rewriteValueARM_OpAvg32u(v *Value) bool {
return true
}
}
func rewriteValueARM_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen32 (ZeroExt16to32 x))
for {
x := v_0
v.reset(OpBitLen32)
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueARM_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -13086,6 +13105,21 @@ func rewriteValueARM_OpBitLen32(v *Value) bool {
return true
}
}
func rewriteValueARM_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen32 (ZeroExt8to32 x))
for {
x := v_0
v.reset(OpBitLen32)
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueARM_OpBswap32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -563,10 +563,14 @@ func rewriteValueARM64(v *Value) bool {
return true
case OpAvg64u:
return rewriteValueARM64_OpAvg64u(v)
case OpBitLen16:
return rewriteValueARM64_OpBitLen16(v)
case OpBitLen32:
return rewriteValueARM64_OpBitLen32(v)
case OpBitLen64:
return rewriteValueARM64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueARM64_OpBitLen8(v)
case OpBitRev16:
return rewriteValueARM64_OpBitRev16(v)
case OpBitRev32:
@ -18350,6 +18354,21 @@ func rewriteValueARM64_OpAvg64u(v *Value) bool {
return true
}
}
func rewriteValueARM64_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen64 (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -18384,6 +18403,21 @@ func rewriteValueARM64_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValueARM64_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen64 (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpBitRev16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -129,10 +129,14 @@ func rewriteValueLOONG64(v *Value) bool {
return true
case OpAvg64u:
return rewriteValueLOONG64_OpAvg64u(v)
case OpBitLen16:
return rewriteValueLOONG64_OpBitLen16(v)
case OpBitLen32:
return rewriteValueLOONG64_OpBitLen32(v)
case OpBitLen64:
return rewriteValueLOONG64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueLOONG64_OpBitLen8(v)
case OpBitRev16:
return rewriteValueLOONG64_OpBitRev16(v)
case OpBitRev32:
@ -995,6 +999,21 @@ func rewriteValueLOONG64_OpAvg64u(v *Value) bool {
return true
}
}
func rewriteValueLOONG64_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen64 (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueLOONG64_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -1033,6 +1052,21 @@ func rewriteValueLOONG64_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValueLOONG64_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen64 (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueLOONG64_OpBitRev16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -82,8 +82,12 @@ func rewriteValueMIPS(v *Value) bool {
return true
case OpAvg32u:
return rewriteValueMIPS_OpAvg32u(v)
case OpBitLen16:
return rewriteValueMIPS_OpBitLen16(v)
case OpBitLen32:
return rewriteValueMIPS_OpBitLen32(v)
case OpBitLen8:
return rewriteValueMIPS_OpBitLen8(v)
case OpClosureCall:
v.Op = OpMIPSCALLclosure
return true
@ -792,6 +796,21 @@ func rewriteValueMIPS_OpAvg32u(v *Value) bool {
return true
}
}
func rewriteValueMIPS_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen32 (ZeroExt16to32 x))
for {
x := v_0
v.reset(OpBitLen32)
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueMIPS_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -810,6 +829,21 @@ func rewriteValueMIPS_OpBitLen32(v *Value) bool {
return true
}
}
func rewriteValueMIPS_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen32 (ZeroExt8to32 x))
for {
x := v_0
v.reset(OpBitLen32)
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueMIPS_OpCom16(v *Value) bool {
v_0 := v.Args[0]
// match: (Com16 x)

View File

@ -106,10 +106,14 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpAtomicStoreRel64(v)
case OpAvg64u:
return rewriteValuePPC64_OpAvg64u(v)
case OpBitLen16:
return rewriteValuePPC64_OpBitLen16(v)
case OpBitLen32:
return rewriteValuePPC64_OpBitLen32(v)
case OpBitLen64:
return rewriteValuePPC64_OpBitLen64(v)
case OpBitLen8:
return rewriteValuePPC64_OpBitLen8(v)
case OpBswap16:
return rewriteValuePPC64_OpBswap16(v)
case OpBswap32:
@ -1123,6 +1127,21 @@ func rewriteValuePPC64_OpAvg64u(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen64 (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValuePPC64_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -1155,6 +1174,21 @@ func rewriteValuePPC64_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen64 (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValuePPC64_OpBswap16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -88,8 +88,14 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpAtomicStoreRel32(v)
case OpAvg64u:
return rewriteValueS390X_OpAvg64u(v)
case OpBitLen16:
return rewriteValueS390X_OpBitLen16(v)
case OpBitLen32:
return rewriteValueS390X_OpBitLen32(v)
case OpBitLen64:
return rewriteValueS390X_OpBitLen64(v)
case OpBitLen8:
return rewriteValueS390X_OpBitLen8(v)
case OpBswap16:
return rewriteValueS390X_OpBswap16(v)
case OpBswap32:
@ -1261,6 +1267,36 @@ func rewriteValueS390X_OpAvg64u(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen64 (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen32 x)
// result: (BitLen64 (ZeroExt32to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpBitLen64(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -1278,6 +1314,21 @@ func rewriteValueS390X_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen64 (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpBswap16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -49,8 +49,14 @@ func rewriteValueWasm(v *Value) bool {
case OpAndB:
v.Op = OpWasmI64And
return true
case OpBitLen16:
return rewriteValueWasm_OpBitLen16(v)
case OpBitLen32:
return rewriteValueWasm_OpBitLen32(v)
case OpBitLen64:
return rewriteValueWasm_OpBitLen64(v)
case OpBitLen8:
return rewriteValueWasm_OpBitLen8(v)
case OpCeil:
v.Op = OpWasmF64Ceil
return true
@ -679,6 +685,36 @@ func rewriteValueWasm_OpAddr(v *Value) bool {
return true
}
}
func rewriteValueWasm_OpBitLen16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen16 x)
// result: (BitLen64 (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueWasm_OpBitLen32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen32 x)
// result: (BitLen64 (ZeroExt32to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueWasm_OpBitLen64(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@ -696,6 +732,21 @@ func rewriteValueWasm_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValueWasm_OpBitLen8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (BitLen8 x)
// result: (BitLen64 (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpBitLen64)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueWasm_OpCom16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@ -963,51 +963,22 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
addF("math/bits", "Len32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
addF("math/bits", "Len32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
}
x := s.newValue1(ssa.OpZeroExt32to64, types.Types[types.TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
},
sys.ARM, sys.S390X, sys.MIPS, sys.Wasm)
addF("math/bits", "Len16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0])
return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x)
}
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
},
sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
addF("math/bits", "Len16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
},
sys.AMD64)
addF("math/bits", "Len8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0])
return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x)
}
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
},
sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
addF("math/bits", "Len8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
},
sys.AMD64)
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
addF("math/bits", "Len",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
@ -1015,7 +986,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
}
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
// LeadingZeros is handled because it trivially calls Len.
addF("math/bits", "Reverse64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {