diff --git a/src/internal/bytealg/equal_ppc64x.s b/src/internal/bytealg/equal_ppc64x.s index 9c9cf77588..34d2a2574b 100644 --- a/src/internal/bytealg/equal_ppc64x.s +++ b/src/internal/bytealg/equal_ppc64x.s @@ -7,17 +7,15 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Equal(SB),NOSPLIT,$0-49 +TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49 MOVD a_len+8(FP), R4 MOVD b_len+32(FP), R5 CMP R5, R4 // unequal lengths are not equal BNE noteq MOVD a_base+0(FP), R3 MOVD b_base+24(FP), R4 - BL memeqbody<>(SB) - - MOVBZ R9,ret+48(FP) - RET + MOVD $ret+48(FP), R10 + BR memeqbody<>(SB) noteq: MOVBZ $0,ret+48(FP) @@ -28,7 +26,7 @@ equal: MOVBZ R3,ret+48(FP) RET -TEXT bytes·Equal(SB),NOSPLIT,$0-49 +TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49 FUNCDATA $0, ·Equal·args_stackmap(SB) MOVD a_len+8(FP), R4 MOVD b_len+32(FP), R5 @@ -36,10 +34,8 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49 BNE noteq MOVD a_base+0(FP), R3 MOVD b_base+24(FP), R4 - BL memeqbody<>(SB) - - MOVBZ R9,ret+48(FP) - RET + MOVD $ret+48(FP), R10 + BR memeqbody<>(SB) noteq: MOVBZ $0,ret+48(FP) @@ -51,25 +47,23 @@ equal: RET // memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT,$0-25 +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 MOVD a+0(FP), R3 MOVD b+8(FP), R4 MOVD size+16(FP), R5 + MOVD $ret+24(FP), R10 - BL memeqbody<>(SB) - MOVB R9, ret+24(FP) - RET + BR memeqbody<>(SB) // memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 MOVD a+0(FP), R3 MOVD b+8(FP), R4 CMP R3, R4 BEQ eq MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure - BL memeqbody<>(SB) - MOVB R9, ret+16(FP) - RET + MOVD $ret+16(FP), R10 + BR memeqbody<>(SB) eq: MOVD $1, R3 MOVB R3, ret+16(FP) @@ -79,7 +73,7 @@ eq: // R3 = s1 // R4 = s2 // R5 = len -// R9 = return value +// R10 = addr of return value (byte) TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 MOVD R5,CTR CMP R5,$8 // only optimize >=8 @@ -92,26 +86,19 @@ TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 setup32a: // 8 byte aligned, >= 32 bytes SRADCC $5,R5,R6 // number of 32 byte chunks to compare MOVD R6,CTR + MOVD $16,R14 // index for VSX loads and stores loop32a: - MOVD 0(R3),R6 // doublewords to compare - MOVD 0(R4),R7 - MOVD 8(R3),R8 // - MOVD 8(R4),R9 - CMP R6,R7 // bytes batch? - BNE noteq - MOVD 16(R3),R6 - MOVD 16(R4),R7 - CMP R8,R9 // bytes match? - MOVD 24(R3),R8 - MOVD 24(R4),R9 - BNE noteq - CMP R6,R7 // bytes match? - BNE noteq + LXVD2X (R3+R0), VS32 // VS32 = V0 + LXVD2X (R4+R0), VS33 // VS33 = V1 + VCMPEQUBCC V0, V1, V2 // compare, setting CR6 + BGE CR6, noteq + LXVD2X (R3+R14), VS32 + LXVD2X (R4+R14), VS33 + VCMPEQUBCC V0, V1, V2 + BGE CR6, noteq ADD $32,R3 // bump up to next 32 ADD $32,R4 - CMP R8,R9 // bytes match? - BC 8,2,loop32a // br ctr and cr - BNE noteq + BC 16, 0, loop32a // br ctr and cr ANDCC $24,R5,R6 // Any 8 byte chunks? BEQ leftover // and result is 0 setup8a: @@ -145,9 +132,10 @@ simple: BNE noteq BR equal noteq: - MOVD $0, R9 + MOVB $0, (R10) RET equal: - MOVD $1, R9 + MOVD $1, R3 + MOVB R3, (R10) RET