diff --git a/src/runtime/memclr_ppc64x.s b/src/runtime/memclr_ppc64x.s index 442faa25f2..f7375dbee6 100644 --- a/src/runtime/memclr_ppc64x.s +++ b/src/runtime/memclr_ppc64x.s @@ -7,25 +7,56 @@ #include "textflag.h" // void runtime·memclr(void*, uintptr) -TEXT runtime·memclr(SB),NOSPLIT|NOFRAME,$0-16 - MOVD ptr+0(FP), R3 - MOVD n+8(FP), R4 - SRADCC $3, R4, R6 // R6 is the number of words to zero - BEQ bytes +TEXT runtime·memclr(SB), NOSPLIT|NOFRAME, $0-16 + MOVD ptr+0(FP), R3 + MOVD n+8(FP), R4 - SUB $8, R3 - MOVD R6, CTR - MOVDU R0, 8(R3) - BC 25, 0, -1(PC) // bdnz+ $-4 - ADD $8, R3 + // Determine if there are doublewords to clear +check: + ANDCC $7, R4, R5 // R5: leftover bytes to clear + SRAD $3, R4, R6 // R6: double words to clear + CMP R6, $0, CR1 // CR1[EQ] set if no double words -bytes: - ANDCC $7, R4, R7 // R7 is the number of bytes to zero - BEQ done - SUB $1, R3 - MOVD R7, CTR - MOVBU R0, 1(R3) - BC 25, 0, -1(PC) // bdnz+ $-4 + BC 12, 6, nozerolarge // only single bytes + MOVD R6, CTR // R6 = number of double words + SRADCC $2, R6, R7 // 32 byte chunks? + BNE zero32setup -done: + // Clear double words + +zero8: + MOVD R0, 0(R3) // double word + ADD $8, R3 + BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0 + BR nozerolarge // handle remainder + + // Prepare to clear 32 bytes at a time. + +zero32setup: + DCBTST (R3) // prepare data cache + MOVD R7, CTR // number of 32 byte chunks + +zero32: + MOVD R0, 0(R3) // clear 4 double words + MOVD R0, 8(R3) + MOVD R0, 16(R3) + MOVD R0, 24(R3) + ADD $32, R3 + BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0 + RLDCLCC $61, R4, $3, R6 // remaining doublewords + BEQ nozerolarge + MOVD R6, CTR // set up the CTR for doublewords + BR zero8 + +nozerolarge: + CMP R5, $0 // any remaining bytes + BC 4, 1, LR // ble lr + +zerotail: + MOVD R5, CTR // set up to clear tail bytes + +zerotailloop: + MOVB R0, 0(R3) // clear single bytes + ADD $1, R3 + BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0 RET