runtime/internal/atomic: add 386/amd64 And/Or operators

This CL adds the atomic primitives for the And/Or operators
on x86-64. It also includes benchmarks for the ops.

Note that the race variants for those operators are not yet
implemented since we depend on an upstream llvm tsan patch as
well as rebuilding the race runtime with x/build/cmd/racebuild.
This will come as a separate patch at a later time once the
infraestructure and upstream patches supporting it are ready.

See llvm/llvm-project#65695 for the llvm tsan patch.

For [reserved]
This commit is contained in:
Mauri de Souza Meneguzzo 2023-09-13 17:17:24 -03:00
parent e73e25b624
commit df800be192
5 changed files with 262 additions and 1 deletions

View File

@ -76,6 +76,24 @@ func And(ptr *uint32, val uint32)
//go:noescape
func Or(ptr *uint32, val uint32)
//go:noescape
func And32(ptr *uint32, val uint32) uint32
//go:noescape
func Or32(ptr *uint32, val uint32) uint32
//go:noescape
func And64(ptr *uint64, val uint64) uint64
//go:noescape
func Or64(ptr *uint64, val uint64) uint64
//go:noescape
func Anduintptr(ptr *uintptr, val uintptr) uintptr
//go:noescape
func Oruintptr(ptr *uintptr, val uintptr) uintptr
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape

View File

@ -283,3 +283,84 @@ TEXT ·And(SB), NOSPLIT, $0-8
LOCK
ANDL BX, (AX)
RET
// func And32(addr *uint32, v uint32) old uint32
TEXT ·And32(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL val+4(FP), CX
casloop:
MOVL CX, DX
MOVL (BX), AX
ANDL AX, DX
LOCK
CMPXCHGL DX, (BX)
JNZ casloop
MOVL AX, ret+8(FP)
RET
// func Or32(addr *uint32, v uint32) old uint32
TEXT ·Or32(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL val+4(FP), CX
casloop:
MOVL CX, DX
MOVL (BX), AX
ORL AX, DX
LOCK
CMPXCHGL DX, (BX)
JNZ casloop
MOVL AX, ret+8(FP)
RET
// func And64(addr *uint64, v uint64) old uint64
TEXT ·And64(SB), NOSPLIT, $0-20
MOVL ptr+0(FP), BP
// DI:SI = v
MOVL val_lo+4(FP), SI
MOVL val_hi+8(FP), DI
// DX:AX = *addr
MOVL 0(BP), AX
MOVL 4(BP), DX
casloop:
// CX:BX = DX:AX (*addr) & DI:SI (mask)
MOVL AX, BX
MOVL DX, CX
ANDL SI, BX
ANDL DI, CX
LOCK
CMPXCHG8B 0(BP)
JNZ casloop
MOVL AX, ret_lo+12(FP)
MOVL DX, ret_hi+16(FP)
RET
// func Or64(addr *uint64, v uint64) old uint64
TEXT ·Or64(SB), NOSPLIT, $0-20
MOVL ptr+0(FP), BP
// DI:SI = v
MOVL val_lo+4(FP), SI
MOVL val_hi+8(FP), DI
// DX:AX = *addr
MOVL 0(BP), AX
MOVL 4(BP), DX
casloop:
// CX:BX = DX:AX (*addr) | DI:SI (mask)
MOVL AX, BX
MOVL DX, CX
ORL SI, BX
ORL DI, CX
LOCK
CMPXCHG8B 0(BP)
JNZ casloop
MOVL AX, ret_lo+12(FP)
MOVL DX, ret_hi+16(FP)
RET
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Anduintptr(SB), NOSPLIT, $0-12
JMP ·And32(SB)
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Oruintptr(SB), NOSPLIT, $0-12
JMP ·Or32(SB)

View File

@ -84,6 +84,24 @@ func And(ptr *uint32, val uint32)
//go:noescape
func Or(ptr *uint32, val uint32)
//go:noescape
func And32(ptr *uint32, val uint32) uint32
//go:noescape
func Or32(ptr *uint32, val uint32) uint32
//go:noescape
func And64(ptr *uint64, val uint64) uint64
//go:noescape
func Or64(ptr *uint64, val uint64) uint64
//go:noescape
func Anduintptr(ptr *uintptr, val uintptr) uintptr
//go:noescape
func Oruintptr(ptr *uintptr, val uintptr) uintptr
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape

View File

@ -223,3 +223,67 @@ TEXT ·And(SB), NOSPLIT, $0-12
LOCK
ANDL BX, (AX)
RET
// func Or32(addr *uint32, v uint32) old uint32
TEXT ·Or32(SB), NOSPLIT, $0-20
MOVQ ptr+0(FP), BX
MOVL val+8(FP), CX
casloop:
MOVL CX, DX
MOVL (BX), AX
ORL AX, DX
LOCK
CMPXCHGL DX, (BX)
JNZ casloop
MOVL AX, ret+16(FP)
RET
// func And32(addr *uint32, v uint32) old uint32
TEXT ·And32(SB), NOSPLIT, $0-20
MOVQ ptr+0(FP), BX
MOVL val+8(FP), CX
casloop:
MOVL CX, DX
MOVL (BX), AX
ANDL AX, DX
LOCK
CMPXCHGL DX, (BX)
JNZ casloop
MOVL AX, ret+16(FP)
RET
// func Or64(addr *uint64, v uint64) old uint64
TEXT ·Or64(SB), NOSPLIT, $0-24
MOVQ ptr+0(FP), BX
MOVQ val+8(FP), CX
casloop:
MOVQ CX, DX
MOVQ (BX), AX
ORQ AX, DX
LOCK
CMPXCHGQ DX, (BX)
JNZ casloop
MOVQ AX, ret+16(FP)
RET
// func And64(addr *uint64, v uint64) old uint64
TEXT ·And64(SB), NOSPLIT, $0-24
MOVQ ptr+0(FP), BX
MOVQ val+8(FP), CX
casloop:
MOVQ CX, DX
MOVQ (BX), AX
ANDQ AX, DX
LOCK
CMPXCHGQ DX, (BX)
JNZ casloop
MOVQ AX, ret+16(FP)
RET
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Anduintptr(SB), NOSPLIT, $0-24
JMP ·And64(SB)
// func Oruintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Oruintptr(SB), NOSPLIT, $0-24
JMP ·Or64(SB)

View File

@ -1,4 +1,4 @@
//go:build ppc64 || ppc64le || riscv64 || wasm
//go:build 386 || amd64 || ppc64 || ppc64le || riscv64 || wasm
//
// Copyright 2023 The Go Authors. All rights reserved.
@ -167,3 +167,83 @@ func TestOr64(t *testing.T) {
}
}
}
func BenchmarkAnd32(b *testing.B) {
var x [128]uint32 // give x its own cache line
sink = &x
for i := 0; i < b.N; i++ {
atomic.And32(&x[63], uint32(i))
}
}
func BenchmarkAnd32Parallel(b *testing.B) {
var x [128]uint32 // give x its own cache line
sink = &x
b.RunParallel(func(pb *testing.PB) {
i := uint32(0)
for pb.Next() {
atomic.And32(&x[63], i)
i++
}
})
}
func BenchmarkAnd64(b *testing.B) {
var x [128]uint64 // give x its own cache line
sink = &x
for i := 0; i < b.N; i++ {
atomic.And64(&x[63], uint64(i))
}
}
func BenchmarkAnd64Parallel(b *testing.B) {
var x [128]uint64 // give x its own cache line
sink = &x
b.RunParallel(func(pb *testing.PB) {
i := uint64(0)
for pb.Next() {
atomic.And64(&x[63], i)
i++
}
})
}
func BenchmarkOr32(b *testing.B) {
var x [128]uint32 // give x its own cache line
sink = &x
for i := 0; i < b.N; i++ {
atomic.Or32(&x[63], uint32(i))
}
}
func BenchmarkOr32Parallel(b *testing.B) {
var x [128]uint32 // give x its own cache line
sink = &x
b.RunParallel(func(pb *testing.PB) {
i := uint32(0)
for pb.Next() {
atomic.Or32(&x[63], i)
i++
}
})
}
func BenchmarkOr64(b *testing.B) {
var x [128]uint64 // give x its own cache line
sink = &x
for i := 0; i < b.N; i++ {
atomic.Or64(&x[63], uint64(i))
}
}
func BenchmarkOr64Parallel(b *testing.B) {
var x [128]uint64 // give x its own cache line
sink = &x
b.RunParallel(func(pb *testing.PB) {
i := uint64(0)
for pb.Next() {
atomic.Or64(&x[63], i)
i++
}
})
}