mirror of https://github.com/golang/go.git
cmd/compile: intrinsics for math/bits.TrailingZerosX
Implement math/bits.TrailingZerosX using intrinsics.
Generally reorganize the intrinsic spec a bit.
The instrinsics data structure is now built at init time.
This will make doing the other functions in math/bits easier.
Update sys.CtzX to return int instead of uint{64,32} so it
matches math/bits.TrailingZerosX.
Improve the intrinsics a bit for amd64. We don't need the CMOV
for <64 bit versions.
Update #18616
Change-Id: Ic1c5339c943f961d830ae56f12674d7b29d4ff39
Reviewed-on: https://go-review.googlesource.com/38155
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
16200c7333
commit
d5dc490519
|
|
@ -162,7 +162,7 @@ var allAsmTests = []*asmTests{
|
||||||
{
|
{
|
||||||
arch: "amd64",
|
arch: "amd64",
|
||||||
os: "linux",
|
os: "linux",
|
||||||
imports: []string{"encoding/binary"},
|
imports: []string{"encoding/binary", "math/bits"},
|
||||||
tests: linuxAMD64Tests,
|
tests: linuxAMD64Tests,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -174,7 +174,7 @@ var allAsmTests = []*asmTests{
|
||||||
{
|
{
|
||||||
arch: "s390x",
|
arch: "s390x",
|
||||||
os: "linux",
|
os: "linux",
|
||||||
imports: []string{"encoding/binary"},
|
imports: []string{"encoding/binary", "math/bits"},
|
||||||
tests: linuxS390XTests,
|
tests: linuxS390XTests,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -543,6 +543,39 @@ var linuxAMD64Tests = []*asmTest{
|
||||||
`,
|
`,
|
||||||
[]string{"\tBTQ\t\\$60"},
|
[]string{"\tBTQ\t\\$60"},
|
||||||
},
|
},
|
||||||
|
// Intrinsic tests for math/bits
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f41(a uint64) int {
|
||||||
|
return bits.TrailingZeros64(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tBSFQ\t", "\tMOVQ\t\\$64,", "\tCMOVQEQ\t"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f42(a uint32) int {
|
||||||
|
return bits.TrailingZeros32(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tBSFQ\t", "\tORQ\t[^$]", "\tMOVQ\t\\$4294967296,"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f43(a uint16) int {
|
||||||
|
return bits.TrailingZeros16(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tBSFQ\t", "\tORQ\t\\$65536,"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f44(a uint8) int {
|
||||||
|
return bits.TrailingZeros8(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tBSFQ\t", "\tORQ\t\\$256,"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var linux386Tests = []*asmTest{
|
var linux386Tests = []*asmTest{
|
||||||
|
|
@ -710,6 +743,39 @@ var linuxS390XTests = []*asmTest{
|
||||||
`,
|
`,
|
||||||
[]string{"\tFMSUBS\t"},
|
[]string{"\tFMSUBS\t"},
|
||||||
},
|
},
|
||||||
|
// Intrinsic tests for math/bits
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f18(a uint64) int {
|
||||||
|
return bits.TrailingZeros64(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tFLOGR\t"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f19(a uint32) int {
|
||||||
|
return bits.TrailingZeros32(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tFLOGR\t", "\tMOVWZ\t"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f20(a uint16) int {
|
||||||
|
return bits.TrailingZeros16(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tFLOGR\t", "\tOR\t\\$65536,"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func f21(a uint8) int {
|
||||||
|
return bits.TrailingZeros8(a)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tFLOGR\t", "\tOR\t\\$256,"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var linuxARMTests = []*asmTest{
|
var linuxARMTests = []*asmTest{
|
||||||
|
|
|
||||||
|
|
@ -2455,270 +2455,334 @@ const (
|
||||||
callGo
|
callGo
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: make this a field of a configuration object instead of a global.
|
var intrinsics map[intrinsicKey]intrinsicBuilder
|
||||||
var intrinsics *intrinsicInfo
|
|
||||||
|
|
||||||
type intrinsicInfo struct {
|
|
||||||
std map[intrinsicKey]intrinsicBuilder
|
|
||||||
intSized map[sizedIntrinsicKey]intrinsicBuilder
|
|
||||||
ptrSized map[sizedIntrinsicKey]intrinsicBuilder
|
|
||||||
}
|
|
||||||
|
|
||||||
// An intrinsicBuilder converts a call node n into an ssa value that
|
// An intrinsicBuilder converts a call node n into an ssa value that
|
||||||
// implements that call as an intrinsic. args is a list of arguments to the func.
|
// implements that call as an intrinsic. args is a list of arguments to the func.
|
||||||
type intrinsicBuilder func(s *state, n *Node, args []*ssa.Value) *ssa.Value
|
type intrinsicBuilder func(s *state, n *Node, args []*ssa.Value) *ssa.Value
|
||||||
|
|
||||||
type intrinsicKey struct {
|
type intrinsicKey struct {
|
||||||
pkg string
|
arch *sys.Arch
|
||||||
fn string
|
|
||||||
}
|
|
||||||
|
|
||||||
type sizedIntrinsicKey struct {
|
|
||||||
pkg string
|
pkg string
|
||||||
fn string
|
fn string
|
||||||
size int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// disableForInstrumenting returns nil when instrumenting, fn otherwise
|
func init() {
|
||||||
func disableForInstrumenting(fn intrinsicBuilder) intrinsicBuilder {
|
intrinsics = map[intrinsicKey]intrinsicBuilder{}
|
||||||
if instrumenting {
|
|
||||||
return nil
|
var all []*sys.Arch
|
||||||
|
var i4 []*sys.Arch
|
||||||
|
var i8 []*sys.Arch
|
||||||
|
var p4 []*sys.Arch
|
||||||
|
var p8 []*sys.Arch
|
||||||
|
for _, a := range sys.Archs {
|
||||||
|
all = append(all, a)
|
||||||
|
if a.IntSize == 4 {
|
||||||
|
i4 = append(i4, a)
|
||||||
|
} else {
|
||||||
|
i8 = append(i8, a)
|
||||||
|
}
|
||||||
|
if a.PtrSize == 4 {
|
||||||
|
p4 = append(p4, a)
|
||||||
|
} else {
|
||||||
|
p8 = append(p8, a)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return fn
|
|
||||||
}
|
|
||||||
|
|
||||||
// enableOnArch returns fn on given archs, nil otherwise
|
// add adds the intrinsic b for pkg.fn for the given list of architectures.
|
||||||
func enableOnArch(fn intrinsicBuilder, archs ...sys.ArchFamily) intrinsicBuilder {
|
add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
|
||||||
if Thearch.LinkArch.InFamily(archs...) {
|
for _, a := range archs {
|
||||||
return fn
|
intrinsics[intrinsicKey{a, pkg, fn}] = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// addF does the same as add but operates on architecture families.
|
||||||
|
addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
|
||||||
|
m := 0
|
||||||
|
for _, f := range archFamilies {
|
||||||
|
if f >= 32 {
|
||||||
|
panic("too many architecture families")
|
||||||
|
}
|
||||||
|
m |= 1 << uint(f)
|
||||||
|
}
|
||||||
|
for _, a := range all {
|
||||||
|
if m>>uint(a.Family)&1 != 0 {
|
||||||
|
intrinsics[intrinsicKey{a, pkg, fn}] = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// alias defines pkg.fn = pkg2.fn2 for all architectures in archs for which pkg2.fn2 exists.
|
||||||
|
alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) {
|
||||||
|
for _, a := range archs {
|
||||||
|
if b, ok := intrinsics[intrinsicKey{a, pkg2, fn2}]; ok {
|
||||||
|
intrinsics[intrinsicKey{a, pkg, fn}] = b
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func intrinsicInit() {
|
/******** runtime ********/
|
||||||
i := &intrinsicInfo{}
|
if !instrumenting {
|
||||||
intrinsics = i
|
add("runtime", "slicebytetostringtmp",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
// initial set of intrinsics.
|
// Compiler frontend optimizations emit OARRAYBYTESTRTMP nodes
|
||||||
i.std = map[intrinsicKey]intrinsicBuilder{
|
// for the backend instead of slicebytetostringtmp calls
|
||||||
/******** runtime ********/
|
// when not instrumenting.
|
||||||
intrinsicKey{"runtime", "slicebytetostringtmp"}: disableForInstrumenting(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
slice := args[0]
|
||||||
// Compiler frontend optimizations emit OARRAYBYTESTRTMP nodes
|
ptr := s.newValue1(ssa.OpSlicePtr, ptrto(Types[TUINT8]), slice)
|
||||||
// for the backend instead of slicebytetostringtmp calls
|
len := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
|
||||||
// when not instrumenting.
|
return s.newValue2(ssa.OpStringMake, n.Type, ptr, len)
|
||||||
slice := args[0]
|
},
|
||||||
ptr := s.newValue1(ssa.OpSlicePtr, ptrto(Types[TUINT8]), slice)
|
all...)
|
||||||
len := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
|
}
|
||||||
return s.newValue2(ssa.OpStringMake, n.Type, ptr, len)
|
add("runtime", "KeepAlive",
|
||||||
}),
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
intrinsicKey{"runtime", "KeepAlive"}: func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
|
||||||
data := s.newValue1(ssa.OpIData, ptrto(Types[TUINT8]), args[0])
|
data := s.newValue1(ssa.OpIData, ptrto(Types[TUINT8]), args[0])
|
||||||
s.vars[&memVar] = s.newValue2(ssa.OpKeepAlive, ssa.TypeMem, data, s.mem())
|
s.vars[&memVar] = s.newValue2(ssa.OpKeepAlive, ssa.TypeMem, data, s.mem())
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
|
all...)
|
||||||
|
|
||||||
/******** runtime/internal/sys ********/
|
/******** runtime/internal/sys ********/
|
||||||
intrinsicKey{"runtime/internal/sys", "Ctz32"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/sys", "Ctz32",
|
||||||
return s.newValue1(ssa.OpCtz32, Types[TUINT32], args[0])
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
}, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS),
|
return s.newValue1(ssa.OpCtz32, Types[TINT], args[0])
|
||||||
intrinsicKey{"runtime/internal/sys", "Ctz64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
},
|
||||||
return s.newValue1(ssa.OpCtz64, Types[TUINT64], args[0])
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
||||||
}, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS),
|
addF("runtime/internal/sys", "Ctz64",
|
||||||
intrinsicKey{"runtime/internal/sys", "Bswap32"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
return s.newValue1(ssa.OpCtz64, Types[TINT], args[0])
|
||||||
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
||||||
|
addF("runtime/internal/sys", "Bswap32",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpBswap32, Types[TUINT32], args[0])
|
return s.newValue1(ssa.OpBswap32, Types[TUINT32], args[0])
|
||||||
}, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
|
},
|
||||||
intrinsicKey{"runtime/internal/sys", "Bswap64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
|
||||||
|
addF("runtime/internal/sys", "Bswap64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpBswap64, Types[TUINT64], args[0])
|
return s.newValue1(ssa.OpBswap64, Types[TUINT64], args[0])
|
||||||
}, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
|
||||||
|
|
||||||
/******** runtime/internal/atomic ********/
|
/******** runtime/internal/atomic ********/
|
||||||
intrinsicKey{"runtime/internal/atomic", "Load"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "Load",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], s.mem())
|
v := s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Load64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
|
||||||
|
addF("runtime/internal/atomic", "Load64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], s.mem())
|
v := s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Loadp"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Loadp",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), args[0], s.mem())
|
v := s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(ptrto(Types[TUINT8]), ssa.TypeMem), args[0], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v)
|
return s.newValue1(ssa.OpSelect0, ptrto(Types[TUINT8]), v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
|
||||||
intrinsicKey{"runtime/internal/atomic", "Store"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "Store",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, args[0], args[1], s.mem())
|
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, args[0], args[1], s.mem())
|
||||||
return nil
|
return nil
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Store64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Store64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, args[0], args[1], s.mem())
|
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, args[0], args[1], s.mem())
|
||||||
return nil
|
return nil
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "StorepNoWB"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "StorepNoWB",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, args[0], args[1], s.mem())
|
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, args[0], args[1], s.mem())
|
||||||
return nil
|
return nil
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS)
|
||||||
|
|
||||||
intrinsicKey{"runtime/internal/atomic", "Xchg"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "Xchg",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], args[1], s.mem())
|
v := s.newValue3(ssa.OpAtomicExchange32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], args[1], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Xchg64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Xchg64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], args[1], s.mem())
|
v := s.newValue3(ssa.OpAtomicExchange64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], args[1], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
|
||||||
|
|
||||||
intrinsicKey{"runtime/internal/atomic", "Xadd"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "Xadd",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], args[1], s.mem())
|
v := s.newValue3(ssa.OpAtomicAdd32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), args[0], args[1], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT32], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Xadd64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Xadd64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], args[1], s.mem())
|
v := s.newValue3(ssa.OpAtomicAdd64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), args[0], args[1], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
return s.newValue1(ssa.OpSelect0, Types[TUINT64], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
|
||||||
|
|
||||||
intrinsicKey{"runtime/internal/atomic", "Cas"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "Cas",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), args[0], args[1], args[2], s.mem())
|
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), args[0], args[1], args[2], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
|
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Cas64"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Cas64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), args[0], args[1], args[2], s.mem())
|
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, ssa.MakeTuple(Types[TBOOL], ssa.TypeMem), args[0], args[1], args[2], s.mem())
|
||||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, v)
|
||||||
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
|
return s.newValue1(ssa.OpSelect0, Types[TBOOL], v)
|
||||||
}, sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X, sys.PPC64)
|
||||||
|
|
||||||
intrinsicKey{"runtime/internal/atomic", "And8"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
addF("runtime/internal/atomic", "And8",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, args[0], args[1], s.mem())
|
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, ssa.TypeMem, args[0], args[1], s.mem())
|
||||||
return nil
|
return nil
|
||||||
}, sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64),
|
},
|
||||||
intrinsicKey{"runtime/internal/atomic", "Or8"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64)
|
||||||
|
addF("runtime/internal/atomic", "Or8",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, ssa.TypeMem, args[0], args[1], s.mem())
|
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, ssa.TypeMem, args[0], args[1], s.mem())
|
||||||
return nil
|
return nil
|
||||||
}, sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64),
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64)
|
||||||
|
|
||||||
/******** math ********/
|
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
|
||||||
intrinsicKey{"math", "Sqrt"}: enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
|
||||||
|
alias("runtime/internal/atomic", "Loaduint", "runtime/internal/atomic", "Load", i4...)
|
||||||
|
alias("runtime/internal/atomic", "Loaduint", "runtime/internal/atomic", "Load64", i8...)
|
||||||
|
alias("runtime/internal/atomic", "Loaduintptr", "runtime/internal/atomic", "Load", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Loaduintptr", "runtime/internal/atomic", "Load64", p8...)
|
||||||
|
alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store64", p8...)
|
||||||
|
alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg64", p8...)
|
||||||
|
alias("runtime/internal/atomic", "Xadduintptr", "runtime/internal/atomic", "Xadd", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Xadduintptr", "runtime/internal/atomic", "Xadd64", p8...)
|
||||||
|
alias("runtime/internal/atomic", "Casuintptr", "runtime/internal/atomic", "Cas", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Casuintptr", "runtime/internal/atomic", "Cas64", p8...)
|
||||||
|
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
|
||||||
|
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
|
||||||
|
|
||||||
|
/******** math ********/
|
||||||
|
addF("math", "Sqrt",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpSqrt, Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpSqrt, Types[TFLOAT64], args[0])
|
||||||
}, sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X),
|
},
|
||||||
}
|
sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||||
|
|
||||||
// aliases internal to runtime/internal/atomic
|
/******** math/bits ********/
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Loadint64"}] =
|
addF("math/bits", "TrailingZeros64",
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}]
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xaddint64"}] =
|
return s.newValue1(ssa.OpCtz64, Types[TINT], args[0])
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}]
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
||||||
// intrinsics which vary depending on the size of int/ptr.
|
addF("math/bits", "TrailingZeros32",
|
||||||
i.intSized = map[sizedIntrinsicKey]intrinsicBuilder{
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Loaduint", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Load"}],
|
return s.newValue1(ssa.OpCtz32, Types[TINT], args[0])
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Loaduint", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}],
|
},
|
||||||
}
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
||||||
i.ptrSized = map[sizedIntrinsicKey]intrinsicBuilder{
|
addF("math/bits", "TrailingZeros16",
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Loaduintptr", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Load"}],
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Loaduintptr", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}],
|
x := s.newValue1(ssa.OpZeroExt16to32, Types[TUINT32], args[0])
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Storeuintptr", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Store"}],
|
c := s.constInt32(Types[TUINT32], 1<<16)
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Storeuintptr", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Store64"}],
|
y := s.newValue2(ssa.OpOr32, Types[TUINT32], x, c)
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Xchguintptr", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Xchg"}],
|
return s.newValue1(ssa.OpCtz32, Types[TINT], y)
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Xchguintptr", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Xchg64"}],
|
},
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Xadduintptr", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}],
|
sys.ARM, sys.MIPS)
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Xadduintptr", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}],
|
addF("math/bits", "TrailingZeros16",
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Casuintptr", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Cas"}],
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Casuintptr", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Cas64"}],
|
x := s.newValue1(ssa.OpZeroExt16to64, Types[TUINT64], args[0])
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Casp1", 4}: i.std[intrinsicKey{"runtime/internal/atomic", "Cas"}],
|
c := s.constInt64(Types[TUINT64], 1<<16)
|
||||||
sizedIntrinsicKey{"runtime/internal/atomic", "Casp1", 8}: i.std[intrinsicKey{"runtime/internal/atomic", "Cas64"}],
|
y := s.newValue2(ssa.OpOr64, Types[TUINT64], x, c)
|
||||||
}
|
return s.newValue1(ssa.OpCtz64, Types[TINT], y)
|
||||||
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X)
|
||||||
|
addF("math/bits", "TrailingZeros8",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
x := s.newValue1(ssa.OpZeroExt8to32, Types[TUINT32], args[0])
|
||||||
|
c := s.constInt32(Types[TUINT32], 1<<8)
|
||||||
|
y := s.newValue2(ssa.OpOr32, Types[TUINT32], x, c)
|
||||||
|
return s.newValue1(ssa.OpCtz32, Types[TINT], y)
|
||||||
|
},
|
||||||
|
sys.ARM, sys.MIPS)
|
||||||
|
addF("math/bits", "TrailingZeros8",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
x := s.newValue1(ssa.OpZeroExt8to64, Types[TUINT64], args[0])
|
||||||
|
c := s.constInt64(Types[TUINT64], 1<<8)
|
||||||
|
y := s.newValue2(ssa.OpOr64, Types[TUINT64], x, c)
|
||||||
|
return s.newValue1(ssa.OpCtz64, Types[TINT], y)
|
||||||
|
},
|
||||||
|
sys.AMD64, sys.ARM64, sys.S390X)
|
||||||
|
|
||||||
/******** sync/atomic ********/
|
/******** sync/atomic ********/
|
||||||
if flag_race {
|
|
||||||
// The race detector needs to be able to intercept these calls.
|
|
||||||
// We can't intrinsify them.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// these are all aliases to runtime/internal/atomic implementations.
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "LoadInt32"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load"}]
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "LoadInt64"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}]
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "LoadPointer"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Loadp"}]
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "LoadUint32"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load"}]
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "LoadUint64"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "LoadUintptr", 4}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "LoadUintptr", 8}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Load64"}]
|
|
||||||
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "StoreInt32"}] =
|
// Note: these are disabled by flag_race in findIntrinsic below.
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store"}]
|
alias("sync/atomic", "LoadInt32", "runtime/internal/atomic", "Load", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "StoreInt64"}] =
|
alias("sync/atomic", "LoadInt64", "runtime/internal/atomic", "Load64", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store64"}]
|
alias("sync/atomic", "LoadPointer", "runtime/internal/atomic", "Loadp", all...)
|
||||||
|
alias("sync/atomic", "LoadUint32", "runtime/internal/atomic", "Load", all...)
|
||||||
|
alias("sync/atomic", "LoadUint64", "runtime/internal/atomic", "Load64", all...)
|
||||||
|
alias("sync/atomic", "LoadUintptr", "runtime/internal/atomic", "Load", p4...)
|
||||||
|
alias("sync/atomic", "LoadUintptr", "runtime/internal/atomic", "Load64", p8...)
|
||||||
|
|
||||||
|
alias("sync/atomic", "StoreInt32", "runtime/internal/atomic", "Store", all...)
|
||||||
|
alias("sync/atomic", "StoreInt64", "runtime/internal/atomic", "Store64", all...)
|
||||||
// Note: not StorePointer, that needs a write barrier. Same below for {CompareAnd}Swap.
|
// Note: not StorePointer, that needs a write barrier. Same below for {CompareAnd}Swap.
|
||||||
i.std[intrinsicKey{"sync/atomic", "StoreUint32"}] =
|
alias("sync/atomic", "StoreUint32", "runtime/internal/atomic", "Store", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store"}]
|
alias("sync/atomic", "StoreUint64", "runtime/internal/atomic", "Store64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "StoreUint64"}] =
|
alias("sync/atomic", "StoreUintptr", "runtime/internal/atomic", "Store", p4...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store64"}]
|
alias("sync/atomic", "StoreUintptr", "runtime/internal/atomic", "Store64", p8...)
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "StoreUintptr", 4}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "StoreUintptr", 8}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Store64"}]
|
|
||||||
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "SwapInt32"}] =
|
alias("sync/atomic", "SwapInt32", "runtime/internal/atomic", "Xchg", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg"}]
|
alias("sync/atomic", "SwapInt64", "runtime/internal/atomic", "Xchg64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "SwapInt64"}] =
|
alias("sync/atomic", "SwapUint32", "runtime/internal/atomic", "Xchg", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg64"}]
|
alias("sync/atomic", "SwapUint64", "runtime/internal/atomic", "Xchg64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "SwapUint32"}] =
|
alias("sync/atomic", "SwapUintptr", "runtime/internal/atomic", "Xchg", p4...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg"}]
|
alias("sync/atomic", "SwapUintptr", "runtime/internal/atomic", "Xchg64", p8...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "SwapUint64"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg64"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "SwapUintptr", 4}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "SwapUintptr", 8}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xchg64"}]
|
|
||||||
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "CompareAndSwapInt32"}] =
|
alias("sync/atomic", "CompareAndSwapInt32", "runtime/internal/atomic", "Cas", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas"}]
|
alias("sync/atomic", "CompareAndSwapInt64", "runtime/internal/atomic", "Cas64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "CompareAndSwapInt64"}] =
|
alias("sync/atomic", "CompareAndSwapUint32", "runtime/internal/atomic", "Cas", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas64"}]
|
alias("sync/atomic", "CompareAndSwapUint64", "runtime/internal/atomic", "Cas64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "CompareAndSwapUint32"}] =
|
alias("sync/atomic", "CompareAndSwapUintptr", "runtime/internal/atomic", "Cas", p4...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas"}]
|
alias("sync/atomic", "CompareAndSwapUintptr", "runtime/internal/atomic", "Cas64", p8...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "CompareAndSwapUint64"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas64"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "CompareAndSwapUintptr", 4}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "CompareAndSwapUintptr", 8}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Cas64"}]
|
|
||||||
|
|
||||||
i.std[intrinsicKey{"sync/atomic", "AddInt32"}] =
|
alias("sync/atomic", "AddInt32", "runtime/internal/atomic", "Xadd", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}]
|
alias("sync/atomic", "AddInt64", "runtime/internal/atomic", "Xadd64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "AddInt64"}] =
|
alias("sync/atomic", "AddUint32", "runtime/internal/atomic", "Xadd", all...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}]
|
alias("sync/atomic", "AddUint64", "runtime/internal/atomic", "Xadd64", all...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "AddUint32"}] =
|
alias("sync/atomic", "AddUintptr", "runtime/internal/atomic", "Xadd", p4...)
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}]
|
alias("sync/atomic", "AddUintptr", "runtime/internal/atomic", "Xadd64", p8...)
|
||||||
i.std[intrinsicKey{"sync/atomic", "AddUint64"}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "AddUintptr", 4}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd"}]
|
|
||||||
i.ptrSized[sizedIntrinsicKey{"sync/atomic", "AddUintptr", 8}] =
|
|
||||||
i.std[intrinsicKey{"runtime/internal/atomic", "Xadd64"}]
|
|
||||||
|
|
||||||
/******** math/big ********/
|
/******** math/big ********/
|
||||||
i.intSized[sizedIntrinsicKey{"math/big", "mulWW", 8}] =
|
add("math/big", "mulWW",
|
||||||
enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue2(ssa.OpMul64uhilo, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), args[0], args[1])
|
return s.newValue2(ssa.OpMul64uhilo, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), args[0], args[1])
|
||||||
}, sys.AMD64)
|
},
|
||||||
i.intSized[sizedIntrinsicKey{"math/big", "divWW", 8}] =
|
sys.ArchAMD64)
|
||||||
enableOnArch(func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
add("math/big", "divWW",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(ssa.OpDiv128u, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), args[0], args[1], args[2])
|
return s.newValue3(ssa.OpDiv128u, ssa.MakeTuple(Types[TUINT64], Types[TUINT64]), args[0], args[1], args[2])
|
||||||
}, sys.AMD64)
|
},
|
||||||
|
sys.ArchAMD64)
|
||||||
}
|
}
|
||||||
|
|
||||||
// findIntrinsic returns a function which builds the SSA equivalent of the
|
// findIntrinsic returns a function which builds the SSA equivalent of the
|
||||||
|
|
@ -2730,23 +2794,17 @@ func findIntrinsic(sym *Sym) intrinsicBuilder {
|
||||||
if sym == nil || sym.Pkg == nil {
|
if sym == nil || sym.Pkg == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if intrinsics == nil {
|
|
||||||
intrinsicInit()
|
|
||||||
}
|
|
||||||
pkg := sym.Pkg.Path
|
pkg := sym.Pkg.Path
|
||||||
if sym.Pkg == localpkg {
|
if sym.Pkg == localpkg {
|
||||||
pkg = myimportpath
|
pkg = myimportpath
|
||||||
}
|
}
|
||||||
|
if flag_race && pkg == "sync/atomic" {
|
||||||
|
// The race detector needs to be able to intercept these calls.
|
||||||
|
// We can't intrinsify them.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
fn := sym.Name
|
fn := sym.Name
|
||||||
f := intrinsics.std[intrinsicKey{pkg, fn}]
|
return intrinsics[intrinsicKey{Thearch.LinkArch.Arch, pkg, fn}]
|
||||||
if f != nil {
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
f = intrinsics.intSized[sizedIntrinsicKey{pkg, fn, Widthint}]
|
|
||||||
if f != nil {
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
return intrinsics.ptrSized[sizedIntrinsicKey{pkg, fn, Widthptr}]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isIntrinsicCall(n *Node) bool {
|
func isIntrinsicCall(n *Node) bool {
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@
|
||||||
|
|
||||||
// Lowering other arithmetic
|
// Lowering other arithmetic
|
||||||
(Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
|
(Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
|
||||||
(Ctz32 <t> x) -> (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
|
(Ctz32 x) -> (Select0 (BSFQ (ORQ <config.Frontend().TypeUInt64()> (MOVQconst [1<<32]) x)))
|
||||||
|
|
||||||
(Bswap64 x) -> (BSWAPQ x)
|
(Bswap64 x) -> (BSWAPQ x)
|
||||||
(Bswap32 x) -> (BSWAPL x)
|
(Bswap32 x) -> (BSWAPL x)
|
||||||
|
|
@ -2083,3 +2083,9 @@
|
||||||
(CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
|
(CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
|
||||||
(CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) ->
|
(CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) ->
|
||||||
(CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
|
(CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
|
||||||
|
|
||||||
|
// We don't need the conditional move if we know the arg of BSF is not zero.
|
||||||
|
(CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x
|
||||||
|
// Extension is unnecessary for trailing zeros.
|
||||||
|
(BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x))
|
||||||
|
(BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x))
|
||||||
|
|
|
||||||
|
|
@ -108,13 +108,11 @@
|
||||||
(Com32 <config.fe.TypeUInt32()> (Int64Lo x)))
|
(Com32 <config.fe.TypeUInt32()> (Int64Lo x)))
|
||||||
|
|
||||||
(Ctz64 x) ->
|
(Ctz64 x) ->
|
||||||
(Int64Make
|
(Add32 <config.fe.TypeUInt32()>
|
||||||
(Const32 <config.fe.TypeUInt32()> [0])
|
(Ctz32 <config.fe.TypeUInt32()> (Int64Lo x))
|
||||||
(Add32 <config.fe.TypeUInt32()>
|
(And32 <config.fe.TypeUInt32()>
|
||||||
(Ctz32 <config.fe.TypeUInt32()> (Int64Lo x))
|
(Com32 <config.fe.TypeUInt32()> (Zeromask (Int64Lo x)))
|
||||||
(And32 <config.fe.TypeUInt32()>
|
(Ctz32 <config.fe.TypeUInt32()> (Int64Hi x))))
|
||||||
(Com32 <config.fe.TypeUInt32()> (Zeromask (Int64Lo x)))
|
|
||||||
(Ctz32 <config.fe.TypeUInt32()> (Int64Hi x)))))
|
|
||||||
|
|
||||||
(Bswap64 x) ->
|
(Bswap64 x) ->
|
||||||
(Int64Make
|
(Int64Make
|
||||||
|
|
|
||||||
|
|
@ -236,7 +236,7 @@ var genericOps = []opData{
|
||||||
{name: "Com32", argLength: 1},
|
{name: "Com32", argLength: 1},
|
||||||
{name: "Com64", argLength: 1},
|
{name: "Com64", argLength: 1},
|
||||||
|
|
||||||
{name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
|
{name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
|
||||||
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
|
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
|
||||||
|
|
||||||
{name: "Bswap32", argLength: 1}, // Swap bytes
|
{name: "Bswap32", argLength: 1}, // Swap bytes
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,12 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
|
||||||
return rewriteValueAMD64_OpAMD64ANDQ(v, config)
|
return rewriteValueAMD64_OpAMD64ANDQ(v, config)
|
||||||
case OpAMD64ANDQconst:
|
case OpAMD64ANDQconst:
|
||||||
return rewriteValueAMD64_OpAMD64ANDQconst(v, config)
|
return rewriteValueAMD64_OpAMD64ANDQconst(v, config)
|
||||||
|
case OpAMD64BSFQ:
|
||||||
|
return rewriteValueAMD64_OpAMD64BSFQ(v, config)
|
||||||
case OpAMD64BTQconst:
|
case OpAMD64BTQconst:
|
||||||
return rewriteValueAMD64_OpAMD64BTQconst(v, config)
|
return rewriteValueAMD64_OpAMD64BTQconst(v, config)
|
||||||
|
case OpAMD64CMOVQEQ:
|
||||||
|
return rewriteValueAMD64_OpAMD64CMOVQEQ(v, config)
|
||||||
case OpAMD64CMPB:
|
case OpAMD64CMPB:
|
||||||
return rewriteValueAMD64_OpAMD64CMPB(v, config)
|
return rewriteValueAMD64_OpAMD64CMPB(v, config)
|
||||||
case OpAMD64CMPBconst:
|
case OpAMD64CMPBconst:
|
||||||
|
|
@ -2158,6 +2162,59 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64BSFQ(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x)))
|
||||||
|
// cond:
|
||||||
|
// result: (BSFQ (ORQconst <t> [1<<8] x))
|
||||||
|
for {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
if v_0.Op != OpAMD64ORQconst {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t := v_0.Type
|
||||||
|
if v_0.AuxInt != 1<<8 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v_0_0 := v_0.Args[0]
|
||||||
|
if v_0_0.Op != OpAMD64MOVBQZX {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
x := v_0_0.Args[0]
|
||||||
|
v.reset(OpAMD64BSFQ)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
|
||||||
|
v0.AuxInt = 1 << 8
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x)))
|
||||||
|
// cond:
|
||||||
|
// result: (BSFQ (ORQconst <t> [1<<16] x))
|
||||||
|
for {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
if v_0.Op != OpAMD64ORQconst {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t := v_0.Type
|
||||||
|
if v_0.AuxInt != 1<<16 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v_0_0 := v_0.Args[0]
|
||||||
|
if v_0_0.Op != OpAMD64MOVWQZX {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
x := v_0_0.Args[0]
|
||||||
|
v.reset(OpAMD64BSFQ)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
|
||||||
|
v0.AuxInt = 1 << 16
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpAMD64BTQconst(v *Value, config *Config) bool {
|
func rewriteValueAMD64_OpAMD64BTQconst(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -2177,6 +2234,37 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value, config *Config) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64CMOVQEQ(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
|
||||||
|
// cond: c != 0
|
||||||
|
// result: x
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
if v_2.Op != OpSelect1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v_2_0 := v_2.Args[0]
|
||||||
|
if v_2_0.Op != OpAMD64BSFQ {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v_2_0_0 := v_2_0.Args[0]
|
||||||
|
if v_2_0_0.Op != OpAMD64ORQconst {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_2_0_0.AuxInt
|
||||||
|
if !(c != 0) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpCopy)
|
||||||
|
v.Type = x.Type
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
|
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -17902,26 +17990,20 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool {
|
||||||
func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
|
func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
// match: (Ctz32 <t> x)
|
// match: (Ctz32 x)
|
||||||
// cond:
|
// cond:
|
||||||
// result: (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
|
// result: (Select0 (BSFQ (ORQ <config.Frontend().TypeUInt64()> (MOVQconst [1<<32]) x)))
|
||||||
for {
|
for {
|
||||||
t := v.Type
|
|
||||||
x := v.Args[0]
|
x := v.Args[0]
|
||||||
v.reset(OpAMD64CMOVLEQ)
|
v.reset(OpSelect0)
|
||||||
v0 := b.NewValue0(v.Pos, OpSelect0, t)
|
v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, MakeTuple(config.fe.TypeUInt64(), TypeFlags))
|
||||||
v1 := b.NewValue0(v.Pos, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
|
v1 := b.NewValue0(v.Pos, OpAMD64ORQ, config.Frontend().TypeUInt64())
|
||||||
|
v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, config.fe.TypeUInt64())
|
||||||
|
v2.AuxInt = 1 << 32
|
||||||
|
v1.AddArg(v2)
|
||||||
v1.AddArg(x)
|
v1.AddArg(x)
|
||||||
v0.AddArg(v1)
|
v0.AddArg(v1)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
v2 := b.NewValue0(v.Pos, OpAMD64MOVLconst, t)
|
|
||||||
v2.AuxInt = 32
|
|
||||||
v.AddArg(v2)
|
|
||||||
v3 := b.NewValue0(v.Pos, OpSelect1, TypeFlags)
|
|
||||||
v4 := b.NewValue0(v.Pos, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
|
|
||||||
v4.AddArg(x)
|
|
||||||
v3.AddArg(v4)
|
|
||||||
v.AddArg(v3)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -368,34 +368,30 @@ func rewriteValuedec64_OpCtz64(v *Value, config *Config) bool {
|
||||||
_ = b
|
_ = b
|
||||||
// match: (Ctz64 x)
|
// match: (Ctz64 x)
|
||||||
// cond:
|
// cond:
|
||||||
// result: (Int64Make (Const32 <config.fe.TypeUInt32()> [0]) (Add32 <config.fe.TypeUInt32()> (Ctz32 <config.fe.TypeUInt32()> (Int64Lo x)) (And32 <config.fe.TypeUInt32()> (Com32 <config.fe.TypeUInt32()> (Zeromask (Int64Lo x))) (Ctz32 <config.fe.TypeUInt32()> (Int64Hi x)))))
|
// result: (Add32 <config.fe.TypeUInt32()> (Ctz32 <config.fe.TypeUInt32()> (Int64Lo x)) (And32 <config.fe.TypeUInt32()> (Com32 <config.fe.TypeUInt32()> (Zeromask (Int64Lo x))) (Ctz32 <config.fe.TypeUInt32()> (Int64Hi x))))
|
||||||
for {
|
for {
|
||||||
x := v.Args[0]
|
x := v.Args[0]
|
||||||
v.reset(OpInt64Make)
|
v.reset(OpAdd32)
|
||||||
v0 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
|
v.Type = config.fe.TypeUInt32()
|
||||||
v0.AuxInt = 0
|
v0 := b.NewValue0(v.Pos, OpCtz32, config.fe.TypeUInt32())
|
||||||
|
v1 := b.NewValue0(v.Pos, OpInt64Lo, config.fe.TypeUInt32())
|
||||||
|
v1.AddArg(x)
|
||||||
|
v0.AddArg(v1)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
v1 := b.NewValue0(v.Pos, OpAdd32, config.fe.TypeUInt32())
|
v2 := b.NewValue0(v.Pos, OpAnd32, config.fe.TypeUInt32())
|
||||||
v2 := b.NewValue0(v.Pos, OpCtz32, config.fe.TypeUInt32())
|
v3 := b.NewValue0(v.Pos, OpCom32, config.fe.TypeUInt32())
|
||||||
v3 := b.NewValue0(v.Pos, OpInt64Lo, config.fe.TypeUInt32())
|
v4 := b.NewValue0(v.Pos, OpZeromask, config.fe.TypeUInt32())
|
||||||
v3.AddArg(x)
|
v5 := b.NewValue0(v.Pos, OpInt64Lo, config.fe.TypeUInt32())
|
||||||
|
v5.AddArg(x)
|
||||||
|
v4.AddArg(v5)
|
||||||
|
v3.AddArg(v4)
|
||||||
v2.AddArg(v3)
|
v2.AddArg(v3)
|
||||||
v1.AddArg(v2)
|
v6 := b.NewValue0(v.Pos, OpCtz32, config.fe.TypeUInt32())
|
||||||
v4 := b.NewValue0(v.Pos, OpAnd32, config.fe.TypeUInt32())
|
v7 := b.NewValue0(v.Pos, OpInt64Hi, config.fe.TypeUInt32())
|
||||||
v5 := b.NewValue0(v.Pos, OpCom32, config.fe.TypeUInt32())
|
|
||||||
v6 := b.NewValue0(v.Pos, OpZeromask, config.fe.TypeUInt32())
|
|
||||||
v7 := b.NewValue0(v.Pos, OpInt64Lo, config.fe.TypeUInt32())
|
|
||||||
v7.AddArg(x)
|
v7.AddArg(x)
|
||||||
v6.AddArg(v7)
|
v6.AddArg(v7)
|
||||||
v5.AddArg(v6)
|
v2.AddArg(v6)
|
||||||
v4.AddArg(v5)
|
v.AddArg(v2)
|
||||||
v8 := b.NewValue0(v.Pos, OpCtz32, config.fe.TypeUInt32())
|
|
||||||
v9 := b.NewValue0(v.Pos, OpInt64Hi, config.fe.TypeUInt32())
|
|
||||||
v9.AddArg(x)
|
|
||||||
v8.AddArg(v9)
|
|
||||||
v4.AddArg(v8)
|
|
||||||
v1.AddArg(v4)
|
|
||||||
v.AddArg(v1)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,22 +32,22 @@ var deBruijnIdx32 = [32]byte{
|
||||||
|
|
||||||
// Ctz64 counts trailing (low-order) zeroes,
|
// Ctz64 counts trailing (low-order) zeroes,
|
||||||
// and if all are zero, then 64.
|
// and if all are zero, then 64.
|
||||||
func Ctz64(x uint64) uint64 {
|
func Ctz64(x uint64) int {
|
||||||
x &= -x // isolate low-order bit
|
x &= -x // isolate low-order bit
|
||||||
y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
|
y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
|
||||||
y = uint64(deBruijnIdx64[y]) // convert to bit index
|
i := int(deBruijnIdx64[y]) // convert to bit index
|
||||||
z := (x - 1) >> 57 & 64 // adjustment if zero
|
z := int((x - 1) >> 57 & 64) // adjustment if zero
|
||||||
return y + z
|
return i + z
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ctz32 counts trailing (low-order) zeroes,
|
// Ctz32 counts trailing (low-order) zeroes,
|
||||||
// and if all are zero, then 32.
|
// and if all are zero, then 32.
|
||||||
func Ctz32(x uint32) uint32 {
|
func Ctz32(x uint32) int {
|
||||||
x &= -x // isolate low-order bit
|
x &= -x // isolate low-order bit
|
||||||
y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
|
y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
|
||||||
y = uint32(deBruijnIdx32[y]) // convert to bit index
|
i := int(deBruijnIdx32[y]) // convert to bit index
|
||||||
z := (x - 1) >> 26 & 32 // adjustment if zero
|
z := int((x - 1) >> 26 & 32) // adjustment if zero
|
||||||
return y + z
|
return i + z
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bswap64 returns its input with byte order reversed
|
// Bswap64 returns its input with byte order reversed
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,12 @@
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-16
|
TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
|
||||||
MOVL $0, ret_hi+12(FP)
|
|
||||||
|
|
||||||
// Try low 32 bits.
|
// Try low 32 bits.
|
||||||
MOVL x_lo+0(FP), AX
|
MOVL x_lo+0(FP), AX
|
||||||
BSFL AX, AX
|
BSFL AX, AX
|
||||||
JZ tryhigh
|
JZ tryhigh
|
||||||
MOVL AX, ret_lo+8(FP)
|
MOVL AX, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
tryhigh:
|
tryhigh:
|
||||||
|
|
@ -20,12 +18,12 @@ tryhigh:
|
||||||
BSFL AX, AX
|
BSFL AX, AX
|
||||||
JZ none
|
JZ none
|
||||||
ADDL $32, AX
|
ADDL $32, AX
|
||||||
MOVL AX, ret_lo+8(FP)
|
MOVL AX, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
none:
|
none:
|
||||||
// No bits are set.
|
// No bits are set.
|
||||||
MOVL $64, ret_lo+8(FP)
|
MOVL $64, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
package sys
|
package sys
|
||||||
|
|
||||||
func Ctz64(x uint64) uint64
|
func Ctz64(x uint64) int
|
||||||
func Ctz32(x uint32) uint32
|
func Ctz32(x uint32) int
|
||||||
func Bswap64(x uint64) uint64
|
func Bswap64(x uint64) uint64
|
||||||
func Bswap32(x uint32) uint32
|
func Bswap32(x uint32) uint32
|
||||||
|
|
|
||||||
|
|
@ -6,17 +6,17 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCtz64(t *testing.T) {
|
func TestCtz64(t *testing.T) {
|
||||||
for i := uint(0); i <= 64; i++ {
|
for i := 0; i <= 64; i++ {
|
||||||
x := uint64(5) << i
|
x := uint64(5) << uint(i)
|
||||||
if got := sys.Ctz64(x); got != uint64(i) {
|
if got := sys.Ctz64(x); got != i {
|
||||||
t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
|
t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func TestCtz32(t *testing.T) {
|
func TestCtz32(t *testing.T) {
|
||||||
for i := uint(0); i <= 32; i++ {
|
for i := 0; i <= 32; i++ {
|
||||||
x := uint32(5) << i
|
x := uint32(5) << uint(i)
|
||||||
if got := sys.Ctz32(x); got != uint32(i) {
|
if got := sys.Ctz32(x); got != i {
|
||||||
t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
|
t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -491,7 +491,7 @@ func nextFreeFast(s *mspan) gclinkptr {
|
||||||
if freeidx%64 == 0 && freeidx != s.nelems {
|
if freeidx%64 == 0 && freeidx != s.nelems {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
s.allocCache >>= (theBit + 1)
|
s.allocCache >>= uint(theBit + 1)
|
||||||
s.freeindex = freeidx
|
s.freeindex = freeidx
|
||||||
v := gclinkptr(result*s.elemsize + s.base())
|
v := gclinkptr(result*s.elemsize + s.base())
|
||||||
s.allocCount++
|
s.allocCount++
|
||||||
|
|
|
||||||
|
|
@ -248,7 +248,7 @@ func (s *mspan) nextFreeIndex() uintptr {
|
||||||
return snelems
|
return snelems
|
||||||
}
|
}
|
||||||
|
|
||||||
s.allocCache >>= (bitIndex + 1)
|
s.allocCache >>= uint(bitIndex + 1)
|
||||||
sfreeindex = result + 1
|
sfreeindex = result + 1
|
||||||
|
|
||||||
if sfreeindex%64 == 0 && sfreeindex != snelems {
|
if sfreeindex%64 == 0 && sfreeindex != snelems {
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ func logf(f string, args ...interface{}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func test(i, x uint64) {
|
func test(i int, x uint64) {
|
||||||
t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
||||||
if i != t {
|
if i != t {
|
||||||
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
||||||
|
|
@ -36,12 +36,12 @@ func test(i, x uint64) {
|
||||||
if i <= 32 {
|
if i <= 32 {
|
||||||
x32 := uint32(x)
|
x32 := uint32(x)
|
||||||
t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
||||||
if uint32(i) != t32 {
|
if i != t32 {
|
||||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||||
}
|
}
|
||||||
x32 = -x32
|
x32 = -x32
|
||||||
t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
||||||
if uint32(i) != t32 {
|
if i != t32 {
|
||||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -83,10 +83,10 @@ func main() {
|
||||||
logf("ctz64(0) != 64")
|
logf("ctz64(0) != 64")
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := uint64(0); i <= 64; i++ {
|
for i := 0; i <= 64; i++ {
|
||||||
for j := uint64(1); j <= 255; j += 2 {
|
for j := uint64(1); j <= 255; j += 2 {
|
||||||
for k := uint64(1); k <= 65537; k += 128 {
|
for k := uint64(1); k <= 65537; k += 128 {
|
||||||
x := (j * k) << i
|
x := (j * k) << uint(i)
|
||||||
test(i, x)
|
test(i, x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue