runtime: remove the restriction that write barrier ptrs come in pairs

Future CLs will remove the invariant that pointers are always put in
the write barrier in pairs.

The behavior of the assembly code changes a bit, where instead of writing
the pointers unconditionally and then checking for overflow, check for
overflow first and then write the pointers.

Also changed the write barrier flush function to not take the src/dst
as arguments.

Change-Id: I2ef708038367b7b82ea67cbaf505a1d5904c775c
Reviewed-on: https://go-review.googlesource.com/c/go/+/447779
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Bypass: Keith Randall <khr@golang.org>
This commit is contained in:
Keith Randall 2022-10-25 17:58:07 -07:00
parent 209df389c2
commit d3daeb5267
15 changed files with 186 additions and 177 deletions

View File

@ -87,6 +87,8 @@ func TestIntendedInlining(t *testing.T) {
"(*mspan).markBitsForIndex",
"(*muintptr).set",
"(*puintptr).set",
"(*wbBuf).get1",
"(*wbBuf).get2",
},
"runtime/internal/sys": {},
"runtime/internal/math": {

View File

@ -1377,6 +1377,7 @@ TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
// faster than having the caller spill these.
MOVL CX, 20(SP)
MOVL BX, 24(SP)
retry:
// TODO: Consider passing g.m.p in as an argument so they can be shared
// across a sequence of write barriers.
get_tls(BX)
@ -1386,15 +1387,15 @@ TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
MOVL (p_wbBuf+wbBuf_next)(BX), CX
// Increment wbBuf.next position.
LEAL 8(CX), CX
MOVL CX, (p_wbBuf+wbBuf_next)(BX)
// Is the buffer full?
CMPL CX, (p_wbBuf+wbBuf_end)(BX)
JA flush
// Commit to the larger buffer.
MOVL CX, (p_wbBuf+wbBuf_next)(BX)
// Record the write.
MOVL AX, -8(CX) // Record value
MOVL (DI), BX // TODO: This turns bad writes into bad reads.
MOVL BX, -4(CX) // Record *slot
// Is the buffer full? (flags set in CMPL above)
JEQ flush
ret:
MOVL 20(SP), CX
MOVL 24(SP), BX
// Do the write.
@ -1404,8 +1405,8 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
MOVL DI, 0(SP) // Also first argument to wbBufFlush
MOVL AX, 4(SP) // Also second argument to wbBufFlush
MOVL DI, 0(SP)
MOVL AX, 4(SP)
// BX already saved
// CX already saved
MOVL DX, 8(SP)
@ -1413,7 +1414,6 @@ flush:
MOVL SI, 16(SP)
// DI already saved
// This takes arguments DI and AX
CALL runtime·wbBufFlush(SB)
MOVL 0(SP), DI
@ -1421,7 +1421,7 @@ flush:
MOVL 8(SP), DX
MOVL 12(SP), BP
MOVL 16(SP), SI
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -1634,15 +1634,20 @@ TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
// faster than having the caller spill these.
MOVQ R12, 96(SP)
MOVQ R13, 104(SP)
retry:
// TODO: Consider passing g.m.p in as an argument so they can be shared
// across a sequence of write barriers.
MOVQ g_m(R14), R13
MOVQ m_p(R13), R13
// Get current buffer write position.
MOVQ (p_wbBuf+wbBuf_next)(R13), R12
// Increment wbBuf.next position.
LEAQ 16(R12), R12
MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
// Is the buffer full?
CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
JA flush
// Commit to the larger buffer.
MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
// Record the write.
MOVQ AX, -16(R12) // Record value
// Note: This turns bad pointer writes into bad
@ -1653,9 +1658,6 @@ TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
// combine the read and the write.
MOVQ (DI), R13
MOVQ R13, -8(R12) // Record *slot
// Is the buffer full? (flags set in CMPQ above)
JEQ flush
ret:
MOVQ 96(SP), R12
MOVQ 104(SP), R13
// Do the write.
@ -1675,8 +1677,8 @@ flush:
//
// TODO: We could strike a different balance; e.g., saving X0
// and not saving GP registers that are less likely to be used.
MOVQ DI, 0(SP) // Also first argument to wbBufFlush
MOVQ AX, 8(SP) // Also second argument to wbBufFlush
MOVQ DI, 0(SP)
MOVQ AX, 8(SP)
MOVQ BX, 16(SP)
MOVQ CX, 24(SP)
MOVQ DX, 32(SP)
@ -1692,7 +1694,6 @@ flush:
// R14 is g
MOVQ R15, 88(SP)
// This takes arguments DI and AX
CALL runtime·wbBufFlush(SB)
MOVQ 0(SP), DI
@ -1707,7 +1708,7 @@ flush:
MOVQ 72(SP), R10
MOVQ 80(SP), R11
MOVQ 88(SP), R15
JMP ret
JMP retry
// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
// Defined as ABIInternal since it does not use the stable Go ABI.

View File

@ -882,21 +882,22 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
TEXT runtime·gcWriteBarrier(SB),NOSPLIT|NOFRAME,$0
// Save the registers clobbered by the fast path.
MOVM.DB.W [R0,R1], (R13)
retry:
MOVW g_m(g), R0
MOVW m_p(R0), R0
MOVW (p_wbBuf+wbBuf_next)(R0), R1
MOVW (p_wbBuf+wbBuf_end)(R0), R11
// Increment wbBuf.next position.
ADD $8, R1
// Is the buffer full?
CMP R11, R1
BHI flush
// Commit to the larger buffer.
MOVW R1, (p_wbBuf+wbBuf_next)(R0)
MOVW (p_wbBuf+wbBuf_end)(R0), R0
CMP R1, R0
// Record the write.
MOVW R3, -8(R1) // Record value
MOVW (R2), R0 // TODO: This turns bad writes into bad reads.
MOVW R0, -4(R1) // Record *slot
// Is the buffer full? (flags set in CMP above)
B.EQ flush
ret:
MOVM.IA.W (R13), [R0,R1]
// Do the write.
MOVW R3, (R2)
@ -911,20 +912,16 @@ flush:
// R11 is linker temp, so no need to save.
// R13 is stack pointer.
// R15 is PC.
//
// This also sets up R2 and R3 as the arguments to wbBufFlush.
MOVM.DB.W [R2-R9,R12], (R13)
// Save R14 (LR) because the fast path above doesn't save it,
// but needs it to RET. This is after the MOVM so it appears below
// the arguments in the stack frame.
// but needs it to RET.
MOVM.DB.W [R14], (R13)
// This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
MOVM.IA.W (R13), [R14]
MOVM.IA.W (R13), [R2-R9,R12]
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -1194,7 +1194,7 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
// - R2 is the destination of the write
// - R3 is the value being written at R2
// It clobbers condition codes.
// It does not clobber any general-purpose registers,
// It does not clobber any general-purpose registers except R27,
// but may clobber others (e.g., floating point registers)
// The act of CALLing gcWriteBarrier will clobber R30 (LR).
//
@ -1203,21 +1203,22 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$200
// Save the registers clobbered by the fast path.
STP (R0, R1), 184(RSP)
retry:
MOVD g_m(g), R0
MOVD m_p(R0), R0
MOVD (p_wbBuf+wbBuf_next)(R0), R1
MOVD (p_wbBuf+wbBuf_next)(R0), R1
MOVD (p_wbBuf+wbBuf_end)(R0), R27
// Increment wbBuf.next position.
ADD $16, R1
// Is the buffer full?
CMP R27, R1
BHI flush
// Commit to the larger buffer.
MOVD R1, (p_wbBuf+wbBuf_next)(R0)
MOVD (p_wbBuf+wbBuf_end)(R0), R0
CMP R1, R0
// Record the write.
MOVD R3, -16(R1) // Record value
MOVD (R2), R0 // TODO: This turns bad writes into bad reads.
MOVD R0, -8(R1) // Record *slot
// Is the buffer full? (flags set in CMP above)
BEQ flush
ret:
LDP 184(RSP), (R0, R1)
// Do the write.
MOVD R3, (R2)
@ -1227,7 +1228,7 @@ flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
// R0 and R1 already saved
STP (R2, R3), 1*8(RSP) // Also first and second arguments to wbBufFlush
STP (R2, R3), 1*8(RSP)
STP (R4, R5), 3*8(RSP)
STP (R6, R7), 5*8(RSP)
STP (R8, R9), 7*8(RSP)
@ -1246,7 +1247,6 @@ flush:
// R30 is LR, which was saved by the prologue.
// R31 is SP.
// This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
LDP 1*8(RSP), (R2, R3)
LDP 3*8(RSP), (R4, R5)
@ -1259,7 +1259,7 @@ flush:
LDP 17*8(RSP), (R21, R22)
LDP 19*8(RSP), (R23, R24)
LDP 21*8(RSP), (R25, R26)
JMP ret
JMP retry
DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below

View File

@ -628,21 +628,21 @@ TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$216
// Save the registers clobbered by the fast path.
MOVV R19, 208(R3)
MOVV R13, 216(R3)
retry:
MOVV g_m(g), R19
MOVV m_p(R19), R19
MOVV (p_wbBuf+wbBuf_next)(R19), R13
MOVV (p_wbBuf+wbBuf_end)(R19), R30 // R30 is linker temp register
// Increment wbBuf.next position.
ADDV $16, R13
// Is the buffer full?
BLTU R30, R13, flush
// Commit to the larger buffer.
MOVV R13, (p_wbBuf+wbBuf_next)(R19)
MOVV (p_wbBuf+wbBuf_end)(R19), R19
MOVV R19, R30 // R30 is linker temp register
// Record the write.
MOVV R28, -16(R13) // Record value
MOVV (R27), R19 // TODO: This turns bad writes into bad reads.
MOVV R19, -8(R13) // Record *slot
// Is the buffer full?
BEQ R13, R30, flush
ret:
MOVV 208(R3), R19
MOVV 216(R3), R13
// Do the write.
@ -652,8 +652,8 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
MOVV R27, 8(R3) // Also first argument to wbBufFlush
MOVV R28, 16(R3) // Also second argument to wbBufFlush
MOVV R27, 8(R3)
MOVV R28, 16(R3)
// R1 is LR, which was saved by the prologue.
MOVV R2, 24(R3)
// R3 is SP.
@ -686,8 +686,6 @@ flush:
// R30 is tmp register.
MOVV R31, 200(R3)
// This takes arguments R27 and R28.
CALL runtime·wbBufFlush(SB)
MOVV 8(R3), R27
@ -715,7 +713,7 @@ flush:
MOVV 184(R3), R26
MOVV 192(R3), R29
MOVV 200(R3), R31
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -644,21 +644,22 @@ TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$192
// Save the registers clobbered by the fast path.
MOVV R1, 184(R29)
MOVV R2, 192(R29)
retry:
MOVV g_m(g), R1
MOVV m_p(R1), R1
MOVV (p_wbBuf+wbBuf_next)(R1), R2
MOVV (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register
// Increment wbBuf.next position.
ADDV $16, R2
// Is the buffer full?
SGTU R2, R23, R23
BNE R23, flush
// Commit to the larger buffer.
MOVV R2, (p_wbBuf+wbBuf_next)(R1)
MOVV (p_wbBuf+wbBuf_end)(R1), R1
MOVV R1, R23 // R23 is linker temp register
// Record the write.
MOVV R21, -16(R2) // Record value
MOVV (R20), R1 // TODO: This turns bad writes into bad reads.
MOVV R1, -8(R2) // Record *slot
// Is the buffer full?
BEQ R2, R23, flush
ret:
MOVV 184(R29), R1
MOVV 192(R29), R2
// Do the write.
@ -668,8 +669,8 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
MOVV R20, 8(R29) // Also first argument to wbBufFlush
MOVV R21, 16(R29) // Also second argument to wbBufFlush
MOVV R20, 8(R29)
MOVV R21, 16(R29)
// R1 already saved
// R2 already saved
MOVV R3, 24(R29)
@ -702,7 +703,6 @@ flush:
// R30 is g.
// R31 is LR, which was saved by the prologue.
// This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVV 8(R29), R20
@ -727,7 +727,7 @@ flush:
MOVV 160(R29), R22
MOVV 168(R29), R24
MOVV 176(R29), R25
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -637,21 +637,22 @@ TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$104
// Save the registers clobbered by the fast path.
MOVW R1, 100(R29)
MOVW R2, 104(R29)
retry:
MOVW g_m(g), R1
MOVW m_p(R1), R1
MOVW (p_wbBuf+wbBuf_next)(R1), R2
MOVW (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register
// Increment wbBuf.next position.
ADD $8, R2
// Is the buffer full?
SGTU R2, R23, R23
BNE R23, flush
// Commit to the larger buffer.
MOVW R2, (p_wbBuf+wbBuf_next)(R1)
MOVW (p_wbBuf+wbBuf_end)(R1), R1
MOVW R1, R23 // R23 is linker temp register
// Record the write.
MOVW R21, -8(R2) // Record value
MOVW (R20), R1 // TODO: This turns bad writes into bad reads.
MOVW R1, -4(R2) // Record *slot
// Is the buffer full?
BEQ R2, R23, flush
ret:
MOVW 100(R29), R1
MOVW 104(R29), R2
// Do the write.
@ -661,8 +662,8 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
MOVW R20, 4(R29) // Also first argument to wbBufFlush
MOVW R21, 8(R29) // Also second argument to wbBufFlush
MOVW R20, 4(R29)
MOVW R21, 8(R29)
// R1 already saved
// R2 already saved
MOVW R3, 12(R29)
@ -696,7 +697,6 @@ flush:
// R30 is g.
// R31 is LR, which was saved by the prologue.
// This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVW 4(R29), R20
@ -723,7 +723,7 @@ flush:
MOVW 88(R29), R24
MOVW 92(R29), R25
MOVW 96(R29), R28
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -938,22 +938,23 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
// but may clobber any other register, *including* R31.
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
// The standard prologue clobbers R31.
// We use R18 and R19 as scratch registers.
// We use R18, R19, and R31 as scratch registers.
retry:
MOVD g_m(g), R18
MOVD m_p(R18), R18
MOVD (p_wbBuf+wbBuf_next)(R18), R19
MOVD (p_wbBuf+wbBuf_end)(R18), R31
// Increment wbBuf.next position.
ADD $16, R19
// Is the buffer full?
CMPU R31, R19
BLT flush
// Commit to the larger buffer.
MOVD R19, (p_wbBuf+wbBuf_next)(R18)
MOVD (p_wbBuf+wbBuf_end)(R18), R18
CMP R18, R19
// Record the write.
MOVD R21, -16(R19) // Record value
MOVD (R20), R18 // TODO: This turns bad writes into bad reads.
MOVD R18, -8(R19) // Record *slot
// Is the buffer full? (flags set in CMP above)
BEQ flush
ret:
// Do the write.
MOVD R21, (R20)
RET
@ -961,8 +962,8 @@ ret:
flush:
// Save registers R0 through R15 since these were not saved by the caller.
// We don't save all registers on ppc64 because it takes too much space.
MOVD R20, (FIXED_FRAME+0)(R1) // Also first argument to wbBufFlush
MOVD R21, (FIXED_FRAME+8)(R1) // Also second argument to wbBufFlush
MOVD R20, (FIXED_FRAME+0)(R1)
MOVD R21, (FIXED_FRAME+8)(R1)
// R0 is always 0, so no need to spill.
// R1 is SP.
// R2 is SB.
@ -981,7 +982,6 @@ flush:
MOVD R16, (FIXED_FRAME+96)(R1)
MOVD R17, (FIXED_FRAME+104)(R1)
// This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVD (FIXED_FRAME+0)(R1), R20
@ -998,7 +998,7 @@ flush:
MOVD (FIXED_FRAME+88)(R1), R15
MOVD (FIXED_FRAME+96)(R1), R16
MOVD (FIXED_FRAME+104)(R1), R17
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -714,10 +714,10 @@ TEXT ·unspillArgs(SB),NOSPLIT,$0-0
// gcWriteBarrier performs a heap pointer write and informs the GC.
//
// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
// - T0 is the destination of the write
// - T1 is the value being written at T0.
// It clobbers R30 (the linker temp register - REG_TMP).
// gcWriteBarrier does NOT follow the Go ABI. It accepts the
// number of bytes of buffer needed in X24, and returns a pointer
// to the buffer spcae in X24.
// It clobbers X31 aka T6 (the linker temp register - REG_TMP).
// The act of CALLing gcWriteBarrier will clobber RA (LR).
// It does not clobber any other general-purpose registers,
// but may clobber others (e.g., floating point registers).
@ -725,21 +725,21 @@ TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$208
// Save the registers clobbered by the fast path.
MOV A0, 24*8(X2)
MOV A1, 25*8(X2)
retry:
MOV g_m(g), A0
MOV m_p(A0), A0
MOV (p_wbBuf+wbBuf_next)(A0), A1
MOV (p_wbBuf+wbBuf_end)(A0), T6 // T6 is linker temp register (REG_TMP)
// Increment wbBuf.next position.
ADD $16, A1
// Is the buffer full?
BLTU T6, A1, flush
// Commit to the larger buffer.
MOV A1, (p_wbBuf+wbBuf_next)(A0)
MOV (p_wbBuf+wbBuf_end)(A0), A0
MOV A0, T6 // T6 is linker temp register (REG_TMP)
// Record the write.
MOV T1, -16(A1) // Record value
MOV (T0), A0 // TODO: This turns bad writes into bad reads.
MOV A0, -8(A1) // Record *slot
// Is the buffer full?
BEQ A1, T6, flush
ret:
MOV 24*8(X2), A0
MOV 25*8(X2), A1
// Do the write.
@ -749,15 +749,13 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
MOV T0, 1*8(X2) // Also first argument to wbBufFlush
MOV T1, 2*8(X2) // Also second argument to wbBufFlush
MOV T0, 1*8(X2)
MOV T1, 2*8(X2)
// X0 is zero register
// X1 is LR, saved by prologue
// X2 is SP
// X3 is GP
// X4 is TP
// X5 is first arg to wbBufFlush (T0)
// X6 is second arg to wbBufFlush (T1)
MOV X7, 3*8(X2)
MOV X8, 4*8(X2)
MOV X9, 5*8(X2)
@ -784,7 +782,6 @@ flush:
MOV X30, 23*8(X2)
// X31 is tmp register.
// This takes arguments T0 and T1.
CALL runtime·wbBufFlush(SB)
MOV 1*8(X2), T0
@ -811,7 +808,7 @@ flush:
MOV 22*8(X2), X29
MOV 23*8(X2), X30
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers (ssa/gen/RISCV64Ops.go), but the space for those

View File

@ -790,20 +790,21 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$96
// Save the registers clobbered by the fast path.
MOVD R4, 96(R15)
retry:
MOVD g_m(g), R1
MOVD m_p(R1), R1
// Increment wbBuf.next position.
MOVD $16, R4
ADD (p_wbBuf+wbBuf_next)(R1), R4
// Is the buffer full?
MOVD (p_wbBuf+wbBuf_end)(R1), R10
CMPUBGT R4, R10, flush
// Commit to the larger buffer.
MOVD R4, (p_wbBuf+wbBuf_next)(R1)
MOVD (p_wbBuf+wbBuf_end)(R1), R1
// Record the write.
MOVD R3, -16(R4) // Record value
MOVD (R2), R10 // TODO: This turns bad writes into bad reads.
MOVD R10, -8(R4) // Record *slot
// Is the buffer full?
CMPBEQ R4, R1, flush
ret:
MOVD 96(R15), R4
// Do the write.
MOVD R3, (R2)
@ -812,7 +813,7 @@ ret:
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
STMG R2, R3, 8(R15) // set R2 and R3 as arguments for wbBufFlush
STMG R2, R3, 8(R15)
MOVD R0, 24(R15)
// R1 already saved.
// R4 already saved.
@ -821,13 +822,12 @@ flush:
// R14 is LR.
// R15 is SP.
// This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
LMG 8(R15), R2, R3 // restore R2 - R3
MOVD 24(R15), R0 // restore R0
LMG 32(R15), R5, R12 // restore R5 - R12
JMP ret
JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated

View File

@ -410,36 +410,52 @@ TEXT runtime·cgocallback(SB), NOSPLIT, $0-24
// R0: the destination of the write (i64)
// R1: the value being written (i64)
TEXT runtime·gcWriteBarrier(SB), NOSPLIT, $16
// R3 = g.m
MOVD g_m(g), R3
// R4 = p
MOVD m_p(R3), R4
// R5 = wbBuf.next
MOVD p_wbBuf+wbBuf_next(R4), R5
Loop
// R3 = g.m
MOVD g_m(g), R3
// R4 = p
MOVD m_p(R3), R4
// R5 = wbBuf.next
MOVD p_wbBuf+wbBuf_next(R4), R5
// Record value
MOVD R1, 0(R5)
// Record *slot
MOVD (R0), 8(R5)
// Increment wbBuf.next
Get R5
I64Const $16
I64Add
Set R5
// Increment wbBuf.next
Get R5
I64Const $16
I64Add
Set R5
MOVD R5, p_wbBuf+wbBuf_next(R4)
// Is the buffer full?
Get R5
I64Load (p_wbBuf+wbBuf_end)(R4)
I64LeU
If
// Commit to the larger buffer.
MOVD R5, p_wbBuf+wbBuf_next(R4)
// Back up to write position (wasm stores can't use negative offsets)
Get R5
I64Const $16
I64Sub
Set R5
// Record value
MOVD R1, 0(R5)
// Record *slot
MOVD (R0), 8(R5)
// Do the write
MOVD R1, (R0)
RET
End
Get R5
I64Load (p_wbBuf+wbBuf_end)(R4)
I64Eq
If
// Flush
MOVD R0, 0(SP)
MOVD R1, 8(SP)
CALLNORESUME runtime·wbBufFlush(SB)
MOVD 0(SP), R0
MOVD 8(SP), R1
// Retry
Br $0
End
// Do the write
MOVD R1, (R0)
RET

View File

@ -21,9 +21,9 @@ import (
//go:nosplit
func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) {
slot := (*uintptr)(unsafe.Pointer(ptr))
if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) {
wbBufFlush()
}
buf := getg().m.p.ptr().wbBuf.get2()
buf[0] = *slot
buf[1] = uintptr(new)
}
// atomicstorep performs *ptr = new atomically and invokes a write barrier.

View File

@ -573,9 +573,8 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
break
}
dstx := (*uintptr)(unsafe.Pointer(addr))
if !buf.putFast(*dstx, 0) {
wbBufFlush()
}
p := buf.get1()
p[0] = *dstx
}
} else {
for {
@ -585,9 +584,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
}
dstx := (*uintptr)(unsafe.Pointer(addr))
srcx := (*uintptr)(unsafe.Pointer(src + (addr - dst)))
if !buf.putFast(*dstx, *srcx) {
wbBufFlush()
}
p := buf.get2()
p[0] = *dstx
p[1] = *srcx
}
}
}
@ -617,9 +616,8 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
break
}
srcx := (*uintptr)(unsafe.Pointer(addr - dst + src))
if !buf.putFast(0, *srcx) {
wbBufFlush()
}
p := buf.get1()
p[0] = *srcx
}
}
@ -650,14 +648,13 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
if !buf.putFast(*dstx, 0) {
wbBufFlush()
}
p := buf.get1()
p[0] = *dstx
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
if !buf.putFast(*dstx, *srcx) {
wbBufFlush()
}
p := buf.get2()
p[0] = *dstx
p[1] = *srcx
}
}
mask <<= 1
@ -709,9 +706,9 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
if bits&1 != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
if !buf.putFast(*dstx, *srcx) {
wbBufFlush()
}
p := buf.get2()
p[0] = *dstx
p[1] = *srcx
}
}
}

View File

@ -53,15 +53,13 @@ type wbBuf struct {
// be updated without write barriers.
end uintptr
// buf stores a series of pointers to execute write barriers
// on. This must be a multiple of wbBufEntryPointers because
// the write barrier only checks for overflow once per entry.
buf [wbBufEntryPointers * wbBufEntries]uintptr
// buf stores a series of pointers to execute write barriers on.
buf [wbBufEntries]uintptr
}
const (
// wbBufEntries is the number of write barriers between
// flushes of the write barrier buffer.
// wbBufEntries is the maximum number of pointers that can be
// stored in the write barrier buffer.
//
// This trades latency for throughput amortization. Higher
// values amortize flushing overhead more, but increase the
@ -69,11 +67,11 @@ const (
// footprint of the buffer.
//
// TODO: What is the latency cost of this? Tune this value.
wbBufEntries = 256
wbBufEntries = 512
// wbBufEntryPointers is the number of pointers added to the
// buffer by each write barrier.
wbBufEntryPointers = 2
// Maximum number of entries that we need to ask from the
// buffer in a single call.
wbMaxEntriesPerCall = 2
)
// reset empties b by resetting its next and end pointers.
@ -81,16 +79,15 @@ func (b *wbBuf) reset() {
start := uintptr(unsafe.Pointer(&b.buf[0]))
b.next = start
if testSmallBuf {
// For testing, allow two barriers in the buffer. If
// we only did one, then barriers of non-heap pointers
// would be no-ops. This lets us combine a buffered
// barrier with a flush at a later time.
b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers]))
// For testing, make the buffer smaller but more than
// 1 write barrier's worth, so it tests both the
// immediate flush and delayed flush cases.
b.end = uintptr(unsafe.Pointer(&b.buf[wbMaxEntriesPerCall+1]))
} else {
b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
}
if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 {
if (b.end-b.next)%unsafe.Sizeof(b.buf[0]) != 0 {
throw("bad write barrier buffer bounds")
}
}
@ -109,13 +106,12 @@ func (b *wbBuf) empty() bool {
return b.next == uintptr(unsafe.Pointer(&b.buf[0]))
}
// putFast adds old and new to the write barrier buffer and returns
// false if a flush is necessary. Callers should use this as:
// getX returns space in the write barrier buffer to store X pointers.
// getX will flush the buffer if necessary. Callers should use this as:
//
// buf := &getg().m.p.ptr().wbBuf
// if !buf.putFast(old, new) {
// wbBufFlush()
// }
// p := buf.get2()
// p[0], p[1] = old, new
// ... actual memory write ...
//
// The caller must ensure there are no preemption points during the
@ -125,19 +121,31 @@ func (b *wbBuf) empty() bool {
// could allow a GC phase change, which could result in missed write
// barriers.
//
// putFast must be nowritebarrierrec to because write barriers here would
// getX must be nowritebarrierrec to because write barriers here would
// corrupt the write barrier buffer. It (and everything it calls, if
// it called anything) has to be nosplit to avoid scheduling on to a
// different P and a different buffer.
//
//go:nowritebarrierrec
//go:nosplit
func (b *wbBuf) putFast(old, new uintptr) bool {
func (b *wbBuf) get1() *[1]uintptr {
if b.next+goarch.PtrSize > b.end {
wbBufFlush()
}
p := (*[1]uintptr)(unsafe.Pointer(b.next))
b.next += goarch.PtrSize
return p
}
//go:nowritebarrierrec
//go:nosplit
func (b *wbBuf) get2() *[2]uintptr {
if b.next+2*goarch.PtrSize > b.end {
wbBufFlush()
}
p := (*[2]uintptr)(unsafe.Pointer(b.next))
p[0] = old
p[1] = new
b.next += 2 * goarch.PtrSize
return b.next != b.end
return p
}
// wbBufFlush flushes the current P's write barrier buffer to the GC
@ -159,13 +167,6 @@ func wbBufFlush() {
// Note: Every possible return from this function must reset
// the buffer's next pointer to prevent buffer overflow.
// This *must not* modify its arguments because this
// function's argument slots do double duty in gcWriteBarrier
// as register spill slots. Currently, not modifying the
// arguments is sufficient to keep the spill slots unmodified
// (which seems unlikely to change since it costs little and
// helps with debugging).
if getg().m.dying > 0 {
// We're going down. Not much point in write barriers
// and this way we can allow write barriers in the
@ -175,7 +176,7 @@ func wbBufFlush() {
}
// Switch to the system stack so we don't have to worry about
// the untyped stack slots or safe points.
// safe points.
systemstack(func() {
wbBufFlush1(getg().m.p.ptr())
})