mirror of https://github.com/golang/go.git
crypto/md5: fix md5block asm to work on big endian ppc64
This handles a TODO in the md5block_ppc64le.s file to make use of byte reverse loads so the function works for big endian as well as little endian. File name is now md5block_ppc64x.s. name old time/op new time/op delta Hash8Bytes 537ns ± 0% 299ns ± 0% -44.32% (p=0.000 n=8+1) Hash1K 4.74µs ± 0% 3.24µs ± 0% -31.64% (p=0.250 n=7+1) Hash8K 34.4µs ± 0% 23.6µs ± 0% -31.56% (p=0.222 n=8+1) Hash8BytesUnaligned 537ns ± 0% 298ns ± 0% -44.51% (p=0.000 n=8+1) Hash1KUnaligned 4.74µs ± 0% 3.24µs ± 0% -31.48% (p=0.222 n=8+1) Hash8KUnaligned 34.4µs ± 0% 23.6µs ± 0% -31.39% (p=0.222 n=8+1) name old speed new speed delta Hash8Bytes 14.9MB/s ± 0% 26.8MB/s ± 0% +79.76% (p=0.222 n=8+1) Hash1K 216MB/s ± 0% 316MB/s ± 0% +46.29% (p=0.250 n=7+1) Hash8K 238MB/s ± 0% 348MB/s ± 0% +46.11% (p=0.222 n=8+1) Hash8BytesUnaligned 14.9MB/s ± 0% 26.8MB/s ± 0% +79.76% (p=0.222 n=8+1) Hash1KUnaligned 216MB/s ± 0% 316MB/s ± 0% +45.95% (p=0.222 n=8+1) Hash8KUnaligned 238MB/s ± 0% 347MB/s ± 0% +45.75% (p=0.222 n=8+1) Change-Id: I2e226bf7e69e0acd49db1af42e4fd8b87b155606 Reviewed-on: https://go-review.googlesource.com/c/144599 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> Reviewed-by: Michael Munday <mike.munday@ibm.com>
This commit is contained in:
parent
4d913b332c
commit
fcd1ec0c82
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build amd64 amd64p32 386 arm ppc64le s390x arm64
|
||||
// +build amd64 amd64p32 386 arm ppc64le ppc64 s390x arm64
|
||||
|
||||
package md5
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!amd64p32,!386,!arm,!ppc64le,!s390x,!arm64
|
||||
// +build !amd64,!amd64p32,!386,!arm,!ppc64le,!ppc64,!s390x,!arm64
|
||||
|
||||
package md5
|
||||
|
||||
|
|
|
|||
|
|
@ -10,14 +10,23 @@
|
|||
// Licence: I hereby disclaim the copyright on this code and place it
|
||||
// in the public domain.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// TODO: Could be updated for ppc64 big endian
|
||||
// by using the correct byte reverse instruction.
|
||||
// Changes required in the Go assembler to make
|
||||
// that instruction work.
|
||||
|
||||
#define MOVE_LITTLE_ENDIAN MOVWZ
|
||||
// ENDIAN_MOVE generates the appropriate
|
||||
// 4 byte load for big or little endian.
|
||||
// The 4 bytes at ptr+off is loaded into dst.
|
||||
// The idx reg is only needed for big endian
|
||||
// and is clobbered when used.
|
||||
#ifdef GOARCH_ppc64le
|
||||
#define ENDIAN_MOVE(off, ptr, dst, idx) \
|
||||
MOVWZ off(ptr),dst
|
||||
#else
|
||||
#define ENDIAN_MOVE(off, ptr, dst, idx) \
|
||||
MOVD $off,idx; \
|
||||
MOVWBR (idx)(ptr), dst
|
||||
#endif
|
||||
|
||||
TEXT ·block(SB),NOSPLIT,$0-32
|
||||
MOVD dig+0(FP), R10
|
||||
|
|
@ -40,7 +49,7 @@ loop:
|
|||
MOVWZ R4, R16
|
||||
MOVWZ R5, R17
|
||||
|
||||
MOVE_LITTLE_ENDIAN 0(R6), R8
|
||||
ENDIAN_MOVE(0,R6,R8,R21)
|
||||
MOVWZ R5, R9
|
||||
|
||||
#define ROUND1(a, b, c, d, index, const, shift) \
|
||||
|
|
@ -49,7 +58,7 @@ loop:
|
|||
ADD R8, a; \
|
||||
AND b, R9; \
|
||||
XOR d, R9; \
|
||||
MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \
|
||||
ENDIAN_MOVE(index*4,R6,R8,R21); \
|
||||
ADD R9, a; \
|
||||
RLWMI $shift, a, $0xffffffff, a; \
|
||||
MOVWZ c, R9; \
|
||||
|
|
@ -73,7 +82,7 @@ loop:
|
|||
ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
|
||||
ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
|
||||
|
||||
MOVE_LITTLE_ENDIAN (1*4)(R6), R8
|
||||
ENDIAN_MOVE(1*4,R6,R8,R21)
|
||||
MOVWZ R5, R9
|
||||
MOVWZ R5, R10
|
||||
|
||||
|
|
@ -83,7 +92,7 @@ loop:
|
|||
ADD R8, a; \
|
||||
AND b, R10; \
|
||||
AND c, R9; \
|
||||
MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \
|
||||
ENDIAN_MOVE(index*4,R6,R8,R21); \
|
||||
OR R9, R10; \
|
||||
MOVWZ c, R9; \
|
||||
ADD R10, a; \
|
||||
|
|
@ -109,13 +118,13 @@ loop:
|
|||
ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
|
||||
ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
|
||||
|
||||
MOVE_LITTLE_ENDIAN (5*4)(R6), R8
|
||||
ENDIAN_MOVE(5*4,R6,R8,R21)
|
||||
MOVWZ R4, R9
|
||||
|
||||
#define ROUND3(a, b, c, d, index, const, shift) \
|
||||
ADD $const, a; \
|
||||
ADD R8, a; \
|
||||
MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \
|
||||
ENDIAN_MOVE(index*4,R6,R8,R21); \
|
||||
XOR d, R9; \
|
||||
XOR b, R9; \
|
||||
ADD R9, a; \
|
||||
|
|
@ -141,7 +150,7 @@ loop:
|
|||
ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
|
||||
ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
|
||||
|
||||
MOVE_LITTLE_ENDIAN (0*4)(R6), R8
|
||||
ENDIAN_MOVE(0,R6,R8,R21)
|
||||
MOVWZ $0xffffffff, R9
|
||||
XOR R5, R9
|
||||
|
||||
|
|
@ -151,7 +160,7 @@ loop:
|
|||
OR b, R9; \
|
||||
XOR c, R9; \
|
||||
ADD R9, a; \
|
||||
MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \
|
||||
ENDIAN_MOVE(index*4,R6,R8,R21); \
|
||||
MOVWZ $0xffffffff, R9; \
|
||||
RLWMI $shift, a, $0xffffffff, a; \
|
||||
XOR c, R9; \
|
||||
Loading…
Reference in New Issue