mirror of https://github.com/golang/go.git
crypto/subtle: implement xorBytes in hardware on loong64
goos: linux
goarch: loong64
pkg: crypto/subtle
cpu: Loongson-3A6000 @ 2500.00MHz
│ bench.old │ bench.new │
│ sec/op │ sec/op vs base │
XORBytes/8Bytes 11.250n ± 0% 6.403n ± 0% -43.08% (p=0.000 n=20)
XORBytes/128Bytes 24.61n ± 0% 12.21n ± 0% -50.39% (p=0.000 n=20)
XORBytes/2048Bytes 216.7n ± 0% 108.3n ± 0% -50.02% (p=0.000 n=20)
XORBytes/32768Bytes 3.657µ ± 0% 1.683µ ± 0% -53.98% (p=0.000 n=20)
geomean 121.7n 61.44n -49.52%
│ bench.old │ bench.new │
│ B/s │ B/s vs base │
XORBytes/8Bytes 678.1Mi ± 0% 1191.5Mi ± 0% +75.72% (p=0.000 n=20)
XORBytes/128Bytes 4.844Gi ± 0% 9.766Gi ± 0% +101.63% (p=0.000 n=20)
XORBytes/2048Bytes 8.801Gi ± 0% 17.619Gi ± 0% +100.18% (p=0.000 n=20)
XORBytes/32768Bytes 8.346Gi ± 0% 18.137Gi ± 0% +117.32% (p=0.000 n=20)
geomean 3.918Gi 7.763Gi +98.14%
goos: linux
goarch: loong64
pkg: crypto/subtle
cpu: Loongson-3A5000 @ 2500.00MHz
│ bench.old │ bench.new │
│ sec/op │ sec/op vs base │
XORBytes/8Bytes 16.420n ± 0% 8.806n ± 0% -46.37% (p=0.000 n=20)
XORBytes/128Bytes 35.84n ± 0% 16.42n ± 0% -54.19% (p=0.000 n=20)
XORBytes/2048Bytes 332.0n ± 0% 160.5n ± 0% -51.64% (p=0.000 n=20)
XORBytes/32768Bytes 4.944µ ± 0% 2.474µ ± 0% -49.96% (p=0.000 n=20)
geomean 176.3n 87.05n -50.62%
│ bench.old │ bench.new │
│ B/s │ B/s vs base │
XORBytes/8Bytes 464.7Mi ± 0% 866.4Mi ± 0% +86.45% (p=0.000 n=20)
XORBytes/128Bytes 3.326Gi ± 0% 7.261Gi ± 0% +118.31% (p=0.000 n=20)
XORBytes/2048Bytes 5.745Gi ± 0% 11.880Gi ± 0% +106.80% (p=0.000 n=20)
XORBytes/32768Bytes 6.172Gi ± 0% 12.334Gi ± 0% +99.83% (p=0.000 n=20)
geomean 2.705Gi 5.477Gi +102.52%
Change-Id: Id404f9023a57025f78b6922659cfa8870881d646
Reviewed-on: https://go-review.googlesource.com/c/go/+/590175
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Tim King <taking@google.com>
Reviewed-by: Tim King <taking@google.com>
This commit is contained in:
parent
1dfb33e861
commit
69827b5c8d
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !arm64 && !ppc64 && !ppc64le) || purego
|
||||
//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le) || purego
|
||||
|
||||
package subtle
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
package subtle
|
||||
|
||||
//go:noescape
|
||||
func xorBytes(dst, a, b *byte, n int)
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytes(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytes(SB), NOSPLIT, $0
|
||||
MOVV dst+0(FP), R4
|
||||
MOVV a+8(FP), R5
|
||||
MOVV b+16(FP), R6
|
||||
MOVV n+24(FP), R7
|
||||
|
||||
MOVV $64, R9
|
||||
BGEU R7, R9, loop64 // n >= 64
|
||||
tail:
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_32 // n >= 32 && n < 64
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_16 // n >= 16 && n < 32
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_8 // n >= 8 && n < 16
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_4 // n >= 4 && n < 8
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_2 // n >= 2 && n < 4
|
||||
SRLV $1, R9
|
||||
BGEU R7, R9, xor_1 // n = 1
|
||||
|
||||
loop64:
|
||||
MOVV (R5), R10
|
||||
MOVV 8(R5), R11
|
||||
MOVV 16(R5), R12
|
||||
MOVV 24(R5), R13
|
||||
MOVV (R6), R14
|
||||
MOVV 8(R6), R15
|
||||
MOVV 16(R6), R16
|
||||
MOVV 24(R6), R17
|
||||
XOR R10, R14
|
||||
XOR R11, R15
|
||||
XOR R12, R16
|
||||
XOR R13, R17
|
||||
MOVV R14, (R4)
|
||||
MOVV R15, 8(R4)
|
||||
MOVV R16, 16(R4)
|
||||
MOVV R17, 24(R4)
|
||||
MOVV 32(R5), R10
|
||||
MOVV 40(R5), R11
|
||||
MOVV 48(R5), R12
|
||||
MOVV 56(R5), R13
|
||||
MOVV 32(R6), R14
|
||||
MOVV 40(R6), R15
|
||||
MOVV 48(R6), R16
|
||||
MOVV 56(R6), R17
|
||||
XOR R10, R14
|
||||
XOR R11, R15
|
||||
XOR R12, R16
|
||||
XOR R13, R17
|
||||
MOVV R14, 32(R4)
|
||||
MOVV R15, 40(R4)
|
||||
MOVV R16, 48(R4)
|
||||
MOVV R17, 56(R4)
|
||||
ADDV $64, R5
|
||||
ADDV $64, R6
|
||||
ADDV $64, R4
|
||||
SUBV $64, R7
|
||||
// 64 in R9
|
||||
BGEU R7, R9, loop64
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_32_check:
|
||||
SRLV $1, R9
|
||||
BLT R7, R9, xor_16_check
|
||||
xor_32:
|
||||
MOVV (R5), R10
|
||||
MOVV 8(R5), R11
|
||||
MOVV 16(R5), R12
|
||||
MOVV 24(R5), R13
|
||||
MOVV (R6), R14
|
||||
MOVV 8(R6), R15
|
||||
MOVV 16(R6), R16
|
||||
MOVV 24(R6), R17
|
||||
XOR R10, R14
|
||||
XOR R11, R15
|
||||
XOR R12, R16
|
||||
XOR R13, R17
|
||||
MOVV R14, (R4)
|
||||
MOVV R15, 8(R4)
|
||||
MOVV R16, 16(R4)
|
||||
MOVV R17, 24(R4)
|
||||
ADDV $32, R5
|
||||
ADDV $32, R6
|
||||
ADDV $32, R4
|
||||
SUBV $32, R7
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_16_check:
|
||||
SRLV $1, R9
|
||||
BLT R7, R9, xor_8_check
|
||||
xor_16:
|
||||
MOVV (R5), R10
|
||||
MOVV 8(R5), R11
|
||||
MOVV (R6), R12
|
||||
MOVV 8(R6), R13
|
||||
XOR R10, R12
|
||||
XOR R11, R13
|
||||
MOVV R12, (R4)
|
||||
MOVV R13, 8(R4)
|
||||
ADDV $16, R5
|
||||
ADDV $16, R6
|
||||
ADDV $16, R4
|
||||
SUBV $16, R7
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_8_check:
|
||||
SRLV $1, R9
|
||||
BLT R7, R9, xor_4_check
|
||||
xor_8:
|
||||
MOVV (R5), R10
|
||||
MOVV (R6), R11
|
||||
XOR R10, R11
|
||||
MOVV R11, (R4)
|
||||
ADDV $8, R5
|
||||
ADDV $8, R6
|
||||
ADDV $8, R4
|
||||
SUBV $8, R7
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_4_check:
|
||||
SRLV $1, R9
|
||||
BLT R7, R9, xor_2_check
|
||||
xor_4:
|
||||
MOVW (R5), R10
|
||||
MOVW (R6), R11
|
||||
XOR R10, R11
|
||||
MOVW R11, (R4)
|
||||
ADDV $4, R5
|
||||
ADDV $4, R6
|
||||
ADDV $4, R4
|
||||
SUBV $4, R7
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_2_check:
|
||||
SRLV $1, R9
|
||||
BLT R7, R9, xor_1
|
||||
xor_2:
|
||||
MOVH (R5), R10
|
||||
MOVH (R6), R11
|
||||
XOR R10, R11
|
||||
MOVH R11, (R4)
|
||||
ADDV $2, R5
|
||||
ADDV $2, R6
|
||||
ADDV $2, R4
|
||||
SUBV $2, R7
|
||||
BEQ R7, R0, end
|
||||
|
||||
xor_1:
|
||||
MOVB (R5), R10
|
||||
MOVB (R6), R11
|
||||
XOR R10, R11
|
||||
MOVB R11, (R4)
|
||||
|
||||
end:
|
||||
RET
|
||||
Loading…
Reference in New Issue