From 21d82e6ac80fc2aea1eac9c8eec9afdd79cb5bdd Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 1 Nov 2022 16:46:43 -0700 Subject: cmd/compile: batch write barrier calls Have the write barrier call return a pointer to a buffer into which the generated code records pointers that need write barrier treatment. Change-Id: I7871764298e0aa1513de417010c8d46b296b199e Reviewed-on: https://go-review.googlesource.com/c/go/+/447781 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Bypass: Keith Randall Reviewed-by: Cherry Mui --- src/runtime/asm_386.s | 60 ++++++++++++++++++----- src/runtime/asm_amd64.s | 120 ++++++++++++++++++---------------------------- src/runtime/asm_arm.s | 44 +++++++++++++---- src/runtime/asm_arm64.s | 51 ++++++++++++++------ src/runtime/asm_loong64.s | 44 +++++++++++++---- src/runtime/asm_mips64x.s | 44 +++++++++++++---- src/runtime/asm_mipsx.s | 44 +++++++++++++---- src/runtime/asm_ppc64x.s | 43 +++++++++++++---- src/runtime/asm_riscv64.s | 38 ++++++++++++--- src/runtime/asm_s390x.s | 44 +++++++++++++---- src/runtime/asm_wasm.s | 64 +++++++++++++++++-------- src/runtime/mwbbuf.go | 2 +- src/runtime/stubs.go | 9 +++- 13 files changed, 418 insertions(+), 189 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 8865f5502e..f07fc6bdb4 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -1365,14 +1365,25 @@ TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 MOVL AX, ret+8(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - DI is the destination of the write -// - AX is the value being written at DI +// gcWriteBarrier returns space in a write barrier buffer which +// should be filled in by the caller. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in DI, and returns a pointer +// to the buffer space in DI. // It clobbers FLAGS. It does not clobber any general-purpose registers, // but may clobber others (e.g., SSE registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28 +// Typical use would be, when doing *(CX+88) = AX +// CMPL $0, runtime.writeBarrier(SB) +// JEQ dowrite +// CALL runtime.gcBatchBarrier2(SB) +// MOVL AX, (DI) +// MOVL 88(CX), DX +// MOVL DX, 4(DI) +// dowrite: +// MOVL AX, 88(CX) +TEXT gcWriteBarrier<>(SB),NOSPLIT,$28 // Save the registers clobbered by the fast path. This is slightly // faster than having the caller spill these. MOVL CX, 20(SP) @@ -1384,18 +1395,18 @@ retry: MOVL g(BX), BX MOVL g_m(BX), BX MOVL m_p(BX), BX - MOVL (p_wbBuf+wbBuf_next)(BX), CX - // Increment wbBuf.next position. - LEAL 8(CX), CX + // Get current buffer write position. + MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position + ADDL DI, CX // new next position // Is the buffer full? CMPL CX, (p_wbBuf+wbBuf_end)(BX) JA flush // Commit to the larger buffer. MOVL CX, (p_wbBuf+wbBuf_next)(BX) - // Record the write. - MOVL AX, -8(CX) // Record value - MOVL (DI), BX // TODO: This turns bad writes into bad reads. - MOVL BX, -4(CX) // Record *slot + // Make return value (the original next position) + SUBL DI, CX + MOVL CX, DI + // Restore registers. MOVL 20(SP), CX MOVL 24(SP), BX RET @@ -1421,6 +1432,31 @@ flush: MOVL 16(SP), SI JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVL $4, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVL $8, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVL $12, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVL $16, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVL $20, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVL $24, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVL $28, DI + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVL $32, DI + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 69a363320d..8051b269d6 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1621,15 +1621,25 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0 #endif JMP ·sigpanic(SB) -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - DI is the destination of the write -// - AX is the value being written at DI +// gcWriteBarrier returns space in a write barrier buffer which +// should be filled in by the caller. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R11, and returns a pointer +// to the buffer space in R11. // It clobbers FLAGS. It does not clobber any general-purpose registers, // but may clobber others (e.g., SSE registers). -// Defined as ABIInternal since it does not use the stack-based Go ABI. -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112 +// Typical use would be, when doing *(CX+88) = AX +// CMPL $0, runtime.writeBarrier(SB) +// JEQ dowrite +// CALL runtime.gcBatchBarrier2(SB) +// MOVQ AX, (R11) +// MOVQ 88(CX), DX +// MOVQ DX, 8(R11) +// dowrite: +// MOVQ AX, 88(CX) +TEXT gcWriteBarrier<>(SB),NOSPLIT,$112 // Save the registers clobbered by the fast path. This is slightly // faster than having the caller spill these. MOVQ R12, 96(SP) @@ -1640,24 +1650,17 @@ retry: MOVQ g_m(R14), R13 MOVQ m_p(R13), R13 // Get current buffer write position. - MOVQ (p_wbBuf+wbBuf_next)(R13), R12 - // Increment wbBuf.next position. - LEAQ 16(R12), R12 + MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position + ADDQ R11, R12 // new next position // Is the buffer full? CMPQ R12, (p_wbBuf+wbBuf_end)(R13) JA flush // Commit to the larger buffer. MOVQ R12, (p_wbBuf+wbBuf_next)(R13) - // Record the write. - MOVQ AX, -16(R12) // Record value - // Note: This turns bad pointer writes into bad - // pointer reads, which could be confusing. We could avoid - // reading from obviously bad pointers, which would - // take care of the vast majority of these. We could - // patch this up in the signal handler, or use XCHG to - // combine the read and the write. - MOVQ (DI), R13 - MOVQ R13, -8(R12) // Record *slot + // Make return value (the original next position) + SUBQ R11, R12 + MOVQ R12, R11 + // Restore registers. MOVQ 96(SP), R12 MOVQ 104(SP), R13 RET @@ -1708,61 +1711,30 @@ flush: MOVQ 88(SP), R15 JMP retry -// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT|NOFRAME,$0 - XCHGQ CX, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ CX, AX - RET - -// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT|NOFRAME,$0 - XCHGQ DX, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ DX, AX - RET - -// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT|NOFRAME,$0 - XCHGQ BX, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ BX, AX - RET - -// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT|NOFRAME,$0 - XCHGQ BP, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ BP, AX - RET - -// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT|NOFRAME,$0 - XCHGQ SI, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ SI, AX - RET - -// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT|NOFRAME,$0 - XCHGQ R8, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ R8, AX - RET - -// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT|NOFRAME,$0 - XCHGQ R9, AX - CALL runtime·gcWriteBarrier(SB) - XCHGQ R9, AX - RET +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT|NOFRAME,$0 + MOVL $8, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT|NOFRAME,$0 + MOVL $16, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT|NOFRAME,$0 + MOVL $24, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT|NOFRAME,$0 + MOVL $32, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT|NOFRAME,$0 + MOVL $40, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT|NOFRAME,$0 + MOVL $48, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT|NOFRAME,$0 + MOVL $56, R11 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT|NOFRAME,$0 + MOVL $64, R11 + JMP gcWriteBarrier<>(SB) DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 3cabe748cd..569165ed19 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -870,16 +870,16 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R3, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R2 is the destination of the write -// - R3 is the value being written at R2 +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R8, and returns a pointer +// to the buffer space in R8. // It clobbers condition codes. // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). // The act of CALLing gcWriteBarrier will clobber R14 (LR). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT|NOFRAME,$0 +TEXT gcWriteBarrier<>(SB),NOSPLIT|NOFRAME,$0 // Save the registers clobbered by the fast path. MOVM.DB.W [R0,R1], (R13) retry: @@ -888,16 +888,15 @@ retry: MOVW (p_wbBuf+wbBuf_next)(R0), R1 MOVW (p_wbBuf+wbBuf_end)(R0), R11 // Increment wbBuf.next position. - ADD $8, R1 + ADD R8, R1 // Is the buffer full? CMP R11, R1 BHI flush // Commit to the larger buffer. MOVW R1, (p_wbBuf+wbBuf_next)(R0) - // Record the write. - MOVW R3, -8(R1) // Record value - MOVW (R2), R0 // TODO: This turns bad writes into bad reads. - MOVW R0, -4(R1) // Record *slot + // Make return value (the original next position) + SUB R8, R1, R8 + // Restore registers. MOVM.IA.W (R13), [R0,R1] RET @@ -921,6 +920,31 @@ flush: MOVM.IA.W (R13), [R2-R9,R12] JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVW $4, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVW $8, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVW $12, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVW $16, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVW $20, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVW $24, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVW $28, R8 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVW $32, R8 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index e8399712de..d0dd73cc00 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -1188,37 +1188,33 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R3, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R2 is the destination of the write -// - R3 is the value being written at R2 +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R25, and returns a pointer +// to the buffer space in R25. // It clobbers condition codes. // It does not clobber any general-purpose registers except R27, // but may clobber others (e.g., floating point registers) // The act of CALLing gcWriteBarrier will clobber R30 (LR). -// -// Defined as ABIInternal since the compiler generates ABIInternal -// calls to it directly and it does not use the stack-based Go ABI. -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$200 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$200 // Save the registers clobbered by the fast path. STP (R0, R1), 184(RSP) retry: MOVD g_m(g), R0 MOVD m_p(R0), R0 - MOVD (p_wbBuf+wbBuf_next)(R0), R1 - MOVD (p_wbBuf+wbBuf_end)(R0), R27 + MOVD (p_wbBuf+wbBuf_next)(R0), R1 + MOVD (p_wbBuf+wbBuf_end)(R0), R27 // Increment wbBuf.next position. - ADD $16, R1 + ADD R25, R1 // Is the buffer full? CMP R27, R1 BHI flush // Commit to the larger buffer. MOVD R1, (p_wbBuf+wbBuf_next)(R0) - // Record the write. - MOVD R3, -16(R1) // Record value - MOVD (R2), R0 // TODO: This turns bad writes into bad reads. - MOVD R0, -8(R1) // Record *slot + // Make return value (the original next position) + SUB R25, R1, R25 + // Restore registers. LDP 184(RSP), (R0, R1) RET @@ -1259,6 +1255,31 @@ flush: LDP 21*8(RSP), (R25, R26) JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVD $8, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVD $16, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVD $24, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVD $32, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVD $40, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVD $48, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVD $56, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVD $64, R25 + JMP gcWriteBarrier<>(SB) + DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s index dfa3497b69..58e6286385 100644 --- a/src/runtime/asm_loong64.s +++ b/src/runtime/asm_loong64.s @@ -615,16 +615,16 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R19, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R27 is the destination of the write -// - R28 is the value being written at R27. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R29, and returns a pointer +// to the buffer space in R29. // It clobbers R30 (the linker temp register). // The act of CALLing gcWriteBarrier will clobber R1 (LR). // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$216 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$216 // Save the registers clobbered by the fast path. MOVV R19, 208(R3) MOVV R13, 216(R3) @@ -634,15 +634,14 @@ retry: MOVV (p_wbBuf+wbBuf_next)(R19), R13 MOVV (p_wbBuf+wbBuf_end)(R19), R30 // R30 is linker temp register // Increment wbBuf.next position. - ADDV $16, R13 + ADDV R29, R13 // Is the buffer full? BLTU R30, R13, flush // Commit to the larger buffer. MOVV R13, (p_wbBuf+wbBuf_next)(R19) - // Record the write. - MOVV R28, -16(R13) // Record value - MOVV (R27), R19 // TODO: This turns bad writes into bad reads. - MOVV R19, -8(R13) // Record *slot + // Make return value (the original next position) + SUBV R29, R13, R29 + // Restore registers. MOVV 208(R3), R19 MOVV 216(R3), R13 RET @@ -713,6 +712,31 @@ flush: MOVV 200(R3), R31 JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVV $8, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVV $16, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVV $24, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVV $32, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVV $40, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVV $48, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVV $56, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVV $64, R29 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s index c6677d0014..53944ec892 100644 --- a/src/runtime/asm_mips64x.s +++ b/src/runtime/asm_mips64x.s @@ -631,16 +631,16 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R1, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R20 is the destination of the write -// - R21 is the value being written at R20. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R25, and returns a pointer +// to the buffer space in R25. // It clobbers R23 (the linker temp register). // The act of CALLing gcWriteBarrier will clobber R31 (LR). // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$192 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$192 // Save the registers clobbered by the fast path. MOVV R1, 184(R29) MOVV R2, 192(R29) @@ -650,16 +650,15 @@ retry: MOVV (p_wbBuf+wbBuf_next)(R1), R2 MOVV (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register // Increment wbBuf.next position. - ADDV $16, R2 + ADDV R25, R2 // Is the buffer full? SGTU R2, R23, R23 BNE R23, flush // Commit to the larger buffer. MOVV R2, (p_wbBuf+wbBuf_next)(R1) - // Record the write. - MOVV R21, -16(R2) // Record value - MOVV (R20), R1 // TODO: This turns bad writes into bad reads. - MOVV R1, -8(R2) // Record *slot + // Make return value (the original next position) + SUBV R25, R2, R25 + // Restore registers. MOVV 184(R29), R1 MOVV 192(R29), R2 RET @@ -727,6 +726,31 @@ flush: MOVV 176(R29), R25 JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVV $8, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVV $16, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVV $24, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVV $32, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVV $40, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVV $48, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVV $56, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVV $64, R25 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s index a43177ec13..33d37b2d02 100644 --- a/src/runtime/asm_mipsx.s +++ b/src/runtime/asm_mipsx.s @@ -624,16 +624,16 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R1, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R20 is the destination of the write -// - R21 is the value being written at R20. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R25, and returns a pointer +// to the buffer space in R25. // It clobbers R23 (the linker temp register). // The act of CALLing gcWriteBarrier will clobber R31 (LR). // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$104 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$104 // Save the registers clobbered by the fast path. MOVW R1, 100(R29) MOVW R2, 104(R29) @@ -643,16 +643,15 @@ retry: MOVW (p_wbBuf+wbBuf_next)(R1), R2 MOVW (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register // Increment wbBuf.next position. - ADD $8, R2 + ADD R25, R2 // Is the buffer full? SGTU R2, R23, R23 BNE R23, flush // Commit to the larger buffer. MOVW R2, (p_wbBuf+wbBuf_next)(R1) - // Record the write. - MOVW R21, -8(R2) // Record value - MOVW (R20), R1 // TODO: This turns bad writes into bad reads. - MOVW R1, -4(R2) // Record *slot + // Make return value (the original next position) + SUB R25, R2, R25 + // Restore registers. MOVW 100(R29), R1 MOVW 104(R29), R2 RET @@ -723,6 +722,31 @@ flush: MOVW 96(R29), R28 JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVW $4, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVW $8, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVW $12, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVW $16, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVW $20, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVW $24, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVW $28, R25 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVW $32, R25 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 0f6421f6f5..37472c0f79 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -928,15 +928,15 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB R3, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R20 is the destination of the write -// - R21 is the value being written at R20. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R29, and returns a pointer +// to the buffer space in R29. // It clobbers condition codes. // It does not clobber R0 through R17 (except special registers), // but may clobber any other register, *including* R31. -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$112 // The standard prologue clobbers R31. // We use R18, R19, and R31 as scratch registers. retry: @@ -945,16 +945,14 @@ retry: MOVD (p_wbBuf+wbBuf_next)(R18), R19 MOVD (p_wbBuf+wbBuf_end)(R18), R31 // Increment wbBuf.next position. - ADD $16, R19 + ADD R29, R19 // Is the buffer full? CMPU R31, R19 BLT flush // Commit to the larger buffer. MOVD R19, (p_wbBuf+wbBuf_next)(R18) - // Record the write. - MOVD R21, -16(R19) // Record value - MOVD (R20), R18 // TODO: This turns bad writes into bad reads. - MOVD R18, -8(R19) // Record *slot + // Make return value (the original next position) + SUB R29, R19, R29 RET flush: @@ -998,6 +996,31 @@ flush: MOVD (FIXED_FRAME+104)(R1), R17 JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVD $8, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVD $16, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVD $24, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVD $32, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVD $40, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVD $48, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVD $56, R29 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVD $64, R29 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index 4fd9c427e3..7626f69684 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -712,7 +712,7 @@ TEXT ·unspillArgs(SB),NOSPLIT,$0-0 MOVD (31*8)(X25), F23 RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // // gcWriteBarrier does NOT follow the Go ABI. It accepts the // number of bytes of buffer needed in X24, and returns a pointer @@ -721,7 +721,7 @@ TEXT ·unspillArgs(SB),NOSPLIT,$0-0 // The act of CALLing gcWriteBarrier will clobber RA (LR). // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$208 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$208 // Save the registers clobbered by the fast path. MOV A0, 24*8(X2) MOV A1, 25*8(X2) @@ -731,15 +731,14 @@ retry: MOV (p_wbBuf+wbBuf_next)(A0), A1 MOV (p_wbBuf+wbBuf_end)(A0), T6 // T6 is linker temp register (REG_TMP) // Increment wbBuf.next position. - ADD $16, A1 + ADD X24, A1 // Is the buffer full? BLTU T6, A1, flush // Commit to the larger buffer. MOV A1, (p_wbBuf+wbBuf_next)(A0) - // Record the write. - MOV T1, -16(A1) // Record value - MOV (T0), A0 // TODO: This turns bad writes into bad reads. - MOV A0, -8(A1) // Record *slot + // Make the return value (the original next position) + SUB X24, A1, X24 + // Restore registers. MOV 24*8(X2), A0 MOV 25*8(X2), A1 RET @@ -808,6 +807,31 @@ flush: JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOV $8, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOV $16, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOV $24, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOV $32, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOV $40, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOV $48, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOV $56, X24 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOV $64, X24 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers (ssa/gen/RISCV64Ops.go), but the space for those // arguments are allocated in the caller's stack frame. diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index 094e25c40f..e8fa10dee6 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -779,32 +779,31 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB $1, ret+0(FP) RET -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: -// - R2 is the destination of the write -// - R3 is the value being written at R2. +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed in R9, and returns a pointer +// to the buffer space in R9. // It clobbers R10 (the temp register) and R1 (used by PLT stub). // It does not clobber any other general-purpose registers, // but may clobber others (e.g., floating point registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$96 +TEXT gcWriteBarrier<>(SB),NOSPLIT,$96 // Save the registers clobbered by the fast path. MOVD R4, 96(R15) retry: MOVD g_m(g), R1 MOVD m_p(R1), R1 // Increment wbBuf.next position. - MOVD $16, R4 + MOVD R9, R4 ADD (p_wbBuf+wbBuf_next)(R1), R4 // Is the buffer full? MOVD (p_wbBuf+wbBuf_end)(R1), R10 CMPUBGT R4, R10, flush // Commit to the larger buffer. MOVD R4, (p_wbBuf+wbBuf_next)(R1) - // Record the write. - MOVD R3, -16(R4) // Record value - MOVD (R2), R10 // TODO: This turns bad writes into bad reads. - MOVD R10, -8(R4) // Record *slot + // Make return value (the original next position) + SUB R9, R4, R9 + // Restore registers. MOVD 96(R15), R4 RET @@ -827,6 +826,31 @@ flush: LMG 32(R15), R5, R12 // restore R5 - R12 JMP retry +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + MOVD $8, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + MOVD $16, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + MOVD $24, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + MOVD $32, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + MOVD $40, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + MOVD $48, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + MOVD $56, R9 + JMP gcWriteBarrier<>(SB) +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + MOVD $64, R9 + JMP gcWriteBarrier<>(SB) + // Note: these functions use a special calling convention to save generated code space. // Arguments are passed in registers, but the space for those arguments are allocated // in the caller's stack frame. These stubs write the args into that stack space and diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index e108bb4362..330c10cc0c 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -404,12 +404,15 @@ TEXT runtime·goexit(SB), NOSPLIT|TOPFRAME, $0-0 TEXT runtime·cgocallback(SB), NOSPLIT, $0-24 UNDEF -// gcWriteBarrier performs a heap pointer write and informs the GC. +// gcWriteBarrier informs the GC about heap pointer writes. // -// gcWriteBarrier does NOT follow the Go ABI. It has two WebAssembly parameters: -// R0: the destination of the write (i64) -// R1: the value being written (i64) -TEXT runtime·gcWriteBarrier(SB), NOSPLIT, $16 +// gcWriteBarrier does NOT follow the Go ABI. It accepts the +// number of bytes of buffer needed as a wasm argument +// (put on the TOS by the caller, lives in local R0 in this body) +// and returns a pointer to the buffer space as a wasm result +// (left on the TOS in this body, appears on the wasm stack +// in the caller). +TEXT gcWriteBarrier<>(SB), NOSPLIT, $0 Loop // R3 = g.m MOVD g_m(g), R3 @@ -420,7 +423,7 @@ TEXT runtime·gcWriteBarrier(SB), NOSPLIT, $16 // Increment wbBuf.next Get R5 - I64Const $16 + Get R0 I64Add Set R5 @@ -432,27 +435,50 @@ TEXT runtime·gcWriteBarrier(SB), NOSPLIT, $16 // Commit to the larger buffer. MOVD R5, p_wbBuf+wbBuf_next(R4) - // Back up to write position (wasm stores can't use negative offsets) + // Make return value (the original next position) Get R5 - I64Const $16 + Get R0 I64Sub - Set R5 - // Record value - MOVD R1, 0(R5) - // Record *slot - MOVD (R0), 8(R5) - - RET + Return End // Flush - MOVD R0, 0(SP) - MOVD R1, 8(SP) CALLNORESUME runtime·wbBufFlush(SB) - MOVD 0(SP), R0 - MOVD 8(SP), R1 // Retry Br $0 End + +TEXT runtime·gcWriteBarrier1(SB),NOSPLIT,$0 + I64Const $8 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier2(SB),NOSPLIT,$0 + I64Const $16 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier3(SB),NOSPLIT,$0 + I64Const $24 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier4(SB),NOSPLIT,$0 + I64Const $32 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier5(SB),NOSPLIT,$0 + I64Const $40 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier6(SB),NOSPLIT,$0 + I64Const $48 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier7(SB),NOSPLIT,$0 + I64Const $56 + Call gcWriteBarrier<>(SB) + Return +TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 + I64Const $64 + Call gcWriteBarrier<>(SB) + Return diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go index 4236cfb838..7419bd291d 100644 --- a/src/runtime/mwbbuf.go +++ b/src/runtime/mwbbuf.go @@ -71,7 +71,7 @@ const ( // Maximum number of entries that we need to ask from the // buffer in a single call. - wbMaxEntriesPerCall = 2 + wbMaxEntriesPerCall = 8 ) // reset empties b by resetting its next and end pointers. diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index 42c2612e68..5fe3506d5e 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -445,7 +445,14 @@ func bool2int(x bool) int { func abort() // Called from compiled code; declared for vet; do NOT call from Go. -func gcWriteBarrier() +func gcWriteBarrier1() +func gcWriteBarrier2() +func gcWriteBarrier3() +func gcWriteBarrier4() +func gcWriteBarrier5() +func gcWriteBarrier6() +func gcWriteBarrier7() +func gcWriteBarrier8() func duffzero() func duffcopy() -- cgit v1.3