mirror of https://github.com/golang/go.git
internal/runtime/atomic: add arm native implementations of And8/Or8
With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7.
Benchmark results for a raspberry Pi 3 model B+:
goos: linux
goarch: arm
pkg: internal/runtime/atomic
cpu: ARMv7 Processor rev 4 (v7l)
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
And8-4 127.65n ± 0% 68.74n ± 0% -46.15% (p=0.000 n=10)
Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc
GitHub-Last-Rev: a7351802fd
GitHub-Pull-Request: golang/go#70002
Cq-Include-Trybots: luci.golang.try:gotip-linux-arm
Reviewed-on: https://go-review.googlesource.com/c/go/+/622075
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
bbdc65bb38
commit
c5d424bfca
|
|
@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
|
||||||
addrLock(addr).unlock()
|
addrLock(addr).unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func Or8(addr *uint8, v uint8)
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func Or8(addr *uint8, v uint8) {
|
func goOr8(addr *uint8, v uint8) {
|
||||||
// Align down to 4 bytes and use 32-bit CAS.
|
// Align down to 4 bytes and use 32-bit CAS.
|
||||||
uaddr := uintptr(unsafe.Pointer(addr))
|
uaddr := uintptr(unsafe.Pointer(addr))
|
||||||
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
||||||
|
|
@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func And8(addr *uint8, v uint8)
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func And8(addr *uint8, v uint8) {
|
func goAnd8(addr *uint8, v uint8) {
|
||||||
// Align down to 4 bytes and use 32-bit CAS.
|
// Align down to 4 bytes and use 32-bit CAS.
|
||||||
uaddr := uintptr(unsafe.Pointer(addr))
|
uaddr := uintptr(unsafe.Pointer(addr))
|
||||||
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
||||||
|
|
|
||||||
|
|
@ -228,6 +228,42 @@ store64loop:
|
||||||
DMB MB_ISH
|
DMB MB_ISH
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT armAnd8<>(SB),NOSPLIT,$0-5
|
||||||
|
// addr is already in R1
|
||||||
|
MOVB v+4(FP), R2
|
||||||
|
|
||||||
|
and8loop:
|
||||||
|
LDREXB (R1), R6
|
||||||
|
|
||||||
|
DMB MB_ISHST
|
||||||
|
|
||||||
|
AND R2, R6
|
||||||
|
STREXB R6, (R1), R0
|
||||||
|
CMP $0, R0
|
||||||
|
BNE and8loop
|
||||||
|
|
||||||
|
DMB MB_ISH
|
||||||
|
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT armOr8<>(SB),NOSPLIT,$0-5
|
||||||
|
// addr is already in R1
|
||||||
|
MOVB v+4(FP), R2
|
||||||
|
|
||||||
|
or8loop:
|
||||||
|
LDREXB (R1), R6
|
||||||
|
|
||||||
|
DMB MB_ISHST
|
||||||
|
|
||||||
|
ORR R2, R6
|
||||||
|
STREXB R6, (R1), R0
|
||||||
|
CMP $0, R0
|
||||||
|
BNE or8loop
|
||||||
|
|
||||||
|
DMB MB_ISH
|
||||||
|
|
||||||
|
RET
|
||||||
|
|
||||||
// The following functions all panic if their address argument isn't
|
// The following functions all panic if their address argument isn't
|
||||||
// 8-byte aligned. Since we're calling back into Go code to do this,
|
// 8-byte aligned. Since we're calling back into Go code to do this,
|
||||||
// we have to cooperate with stack unwinding. In the normal case, the
|
// we have to cooperate with stack unwinding. In the normal case, the
|
||||||
|
|
@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
|
||||||
JMP ·goStore64(SB)
|
JMP ·goStore64(SB)
|
||||||
#endif
|
#endif
|
||||||
JMP armStore64<>(SB)
|
JMP armStore64<>(SB)
|
||||||
|
|
||||||
|
TEXT ·And8(SB),NOSPLIT,$-4-5
|
||||||
|
NO_LOCAL_POINTERS
|
||||||
|
MOVW addr+0(FP), R1
|
||||||
|
|
||||||
|
// Uses STREXB/LDREXB that is armv6k or later.
|
||||||
|
// For simplicity we only enable this on armv7.
|
||||||
|
#ifndef GOARM_7
|
||||||
|
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
|
||||||
|
CMP $1, R11
|
||||||
|
BEQ 2(PC)
|
||||||
|
JMP ·goAnd8(SB)
|
||||||
|
#endif
|
||||||
|
JMP armAnd8<>(SB)
|
||||||
|
|
||||||
|
TEXT ·Or8(SB),NOSPLIT,$-4-5
|
||||||
|
NO_LOCAL_POINTERS
|
||||||
|
MOVW addr+0(FP), R1
|
||||||
|
|
||||||
|
// Uses STREXB/LDREXB that is armv6k or later.
|
||||||
|
// For simplicity we only enable this on armv7.
|
||||||
|
#ifndef GOARM_7
|
||||||
|
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
|
||||||
|
CMP $1, R11
|
||||||
|
BEQ 2(PC)
|
||||||
|
JMP ·goOr8(SB)
|
||||||
|
#endif
|
||||||
|
JMP armOr8<>(SB)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue