mirror of https://github.com/golang/go.git
crypto/aes: speed up using AES-NI on amd64
This CL requires CL 5970055. benchmark old ns/op new ns/op delta BenchmarkEncrypt 161 23 -85.71% BenchmarkDecrypt 158 24 -84.24% BenchmarkExpand 526 62 -88.21% benchmark old MB/s new MB/s speedup BenchmarkEncrypt 99.32 696.19 7.01x BenchmarkDecrypt 100.93 641.56 6.36x R=golang-dev, bradfitz, dave, rsc CC=golang-dev https://golang.org/cl/6549055
This commit is contained in:
parent
e039c405c8
commit
948db4e091
|
|
@ -221,7 +221,10 @@ L:
|
|||
if tt.dec != nil {
|
||||
dec = make([]uint32, len(tt.dec))
|
||||
}
|
||||
expandKey(tt.key, enc, dec)
|
||||
// This test could only test Go version of expandKey because asm
|
||||
// version might use different memory layout for expanded keys
|
||||
// This is OK because we don't expose expanded keys to the outside
|
||||
expandKeyGo(tt.key, enc, dec)
|
||||
for j, v := range enc {
|
||||
if v != tt.enc[j] {
|
||||
t.Errorf("key %d: enc[%d] = %#x, want %#x", i, j, v, tt.enc[j])
|
||||
|
|
|
|||
|
|
@ -0,0 +1,287 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// func hasAsm() bool
|
||||
// returns whether AES-NI is supported
|
||||
TEXT ·hasAsm(SB),7,$0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $25, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
TEXT ·encryptBlockAsm(SB),7,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS 0(BX), X0
|
||||
ADDQ $16, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $12, CX
|
||||
JE Lenc196
|
||||
JB Lenc128
|
||||
Lenc256:
|
||||
MOVUPS 0(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
ADDQ $32, AX
|
||||
Lenc196:
|
||||
MOVUPS 0(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
ADDQ $32, AX
|
||||
Lenc128:
|
||||
MOVUPS 0(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
AESENCLAST X1, X0
|
||||
MOVUPS X0, 0(DX)
|
||||
RET
|
||||
|
||||
// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
TEXT ·decryptBlockAsm(SB),7,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS 0(BX), X0
|
||||
ADDQ $16, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $12, CX
|
||||
JE Ldec196
|
||||
JB Ldec128
|
||||
Ldec256:
|
||||
MOVUPS 0(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
ADDQ $32, AX
|
||||
Ldec196:
|
||||
MOVUPS 0(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
ADDQ $32, AX
|
||||
Ldec128:
|
||||
MOVUPS 0(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
AESDECLAST X1, X0
|
||||
MOVUPS X0, 0(DX)
|
||||
RET
|
||||
|
||||
// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
|
||||
// Note that round keys are stored in uint128 format, not uint32
|
||||
TEXT ·expandKeyAsm(SB),7,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ key+8(FP), AX
|
||||
MOVQ enc+16(FP), BX
|
||||
MOVQ dec+24(FP), DX
|
||||
MOVUPS (AX), X0
|
||||
// enc
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
PXOR X4, X4 // _expand_key_* expect X4 to be zero
|
||||
CMPL CX, $12
|
||||
JE Lexp_enc196
|
||||
JB Lexp_enc128
|
||||
Lexp_enc256:
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS X2, (BX)
|
||||
ADDQ $16, BX
|
||||
AESKEYGENASSIST $0x01, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x01, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x02, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x02, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x04, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x04, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x08, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x08, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x10, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x10, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x20, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x20, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x40, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
JMP Lexp_dec
|
||||
Lexp_enc196:
|
||||
MOVQ 16(AX), X2
|
||||
AESKEYGENASSIST $0x01, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x02, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x04, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x08, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x10, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x20, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x40, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x80, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
JMP Lexp_dec
|
||||
Lexp_enc128:
|
||||
AESKEYGENASSIST $0x01, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x02, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x04, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x08, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x10, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x20, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x40, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x80, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x1b, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x36, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
Lexp_dec:
|
||||
// dec
|
||||
SUBQ $16, BX
|
||||
MOVUPS (BX), X1
|
||||
MOVUPS X1, (DX)
|
||||
DECQ CX
|
||||
Lexp_dec_loop:
|
||||
MOVUPS -16(BX), X1
|
||||
AESIMC X1, X0
|
||||
MOVUPS X0, 16(DX)
|
||||
SUBQ $16, BX
|
||||
ADDQ $16, DX
|
||||
DECQ CX
|
||||
JNZ Lexp_dec_loop
|
||||
MOVUPS -16(BX), X0
|
||||
MOVUPS X0, 16(DX)
|
||||
RET
|
||||
|
||||
#define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
|
||||
#define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
|
||||
TEXT _expand_key_128<>(SB),7,$0
|
||||
PSHUFD $0xff, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
RET
|
||||
|
||||
#define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
|
||||
#define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
|
||||
TEXT _expand_key_192a<>(SB),7,$0
|
||||
PSHUFD $0x55, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
|
||||
MOVAPS X2, X5
|
||||
MOVAPS X2, X6
|
||||
PSLLDQ_X5_; BYTE $0x4
|
||||
PSHUFD $0xff, X0, X3
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
|
||||
MOVAPS X0, X1
|
||||
SHUFPS $0x44, X0, X6
|
||||
MOVUPS X6, (BX)
|
||||
SHUFPS $0x4e, X2, X1
|
||||
MOVUPS X1, 16(BX)
|
||||
ADDQ $32, BX
|
||||
RET
|
||||
|
||||
TEXT _expand_key_192b<>(SB),7,$0
|
||||
PSHUFD $0x55, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
|
||||
MOVAPS X2, X5
|
||||
PSLLDQ_X5_; BYTE $0x4
|
||||
PSHUFD $0xff, X0, X3
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
RET
|
||||
|
||||
TEXT _expand_key_256a<>(SB),7,$0
|
||||
JMP _expand_key_128<>(SB)
|
||||
|
||||
TEXT _expand_key_256b<>(SB),7,$0
|
||||
PSHUFD $0xaa, X1, X1
|
||||
SHUFPS $0x10, X2, X4
|
||||
PXOR X4, X2
|
||||
SHUFPS $0x8c, X2, X4
|
||||
PXOR X4, X2
|
||||
PXOR X1, X2
|
||||
|
||||
MOVUPS X2, (BX)
|
||||
ADDQ $16, BX
|
||||
RET
|
||||
|
|
@ -37,7 +37,7 @@
|
|||
package aes
|
||||
|
||||
// Encrypt one block from src into dst, using the expanded key xk.
|
||||
func encryptBlock(xk []uint32, dst, src []byte) {
|
||||
func encryptBlockGo(xk []uint32, dst, src []byte) {
|
||||
var s0, s1, s2, s3, t0, t1, t2, t3 uint32
|
||||
|
||||
s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
|
||||
|
|
@ -82,7 +82,7 @@ func encryptBlock(xk []uint32, dst, src []byte) {
|
|||
}
|
||||
|
||||
// Decrypt one block from src into dst, using the expanded key xk.
|
||||
func decryptBlock(xk []uint32, dst, src []byte) {
|
||||
func decryptBlockGo(xk []uint32, dst, src []byte) {
|
||||
var s0, s1, s2, s3, t0, t1, t2, t3 uint32
|
||||
|
||||
s0 = uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
|
||||
|
|
@ -139,7 +139,7 @@ func rotw(w uint32) uint32 { return w<<8 | w>>24 }
|
|||
|
||||
// Key expansion algorithm. See FIPS-197, Figure 11.
|
||||
// Their rcon[i] is our powx[i-1] << 24.
|
||||
func expandKey(key []byte, enc, dec []uint32) {
|
||||
func expandKeyGo(key []byte, enc, dec []uint32) {
|
||||
// Encryption key setup.
|
||||
var i int
|
||||
nk := len(key) / 4
|
||||
|
|
|
|||
|
|
@ -45,6 +45,10 @@ func NewCipher(key []byte) (cipher.Block, error) {
|
|||
|
||||
func (c *aesCipher) BlockSize() int { return BlockSize }
|
||||
|
||||
func (c *aesCipher) Encrypt(dst, src []byte) { encryptBlock(c.enc, dst, src) }
|
||||
func (c *aesCipher) Encrypt(dst, src []byte) {
|
||||
encryptBlock(c.enc, dst, src)
|
||||
}
|
||||
|
||||
func (c *aesCipher) Decrypt(dst, src []byte) { decryptBlock(c.dec, dst, src) }
|
||||
func (c *aesCipher) Decrypt(dst, src []byte) {
|
||||
decryptBlock(c.dec, dst, src)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build amd64
|
||||
|
||||
package aes
|
||||
|
||||
// defined in asm_$GOARCH.s
|
||||
func hasAsm() bool
|
||||
func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
|
||||
|
||||
var useAsm = hasAsm()
|
||||
|
||||
func encryptBlock(xk []uint32, dst, src []byte) {
|
||||
if useAsm {
|
||||
encryptBlockAsm(len(xk)/4-1, &xk[0], &dst[0], &src[0])
|
||||
} else {
|
||||
encryptBlockGo(xk, dst, src)
|
||||
}
|
||||
}
|
||||
func decryptBlock(xk []uint32, dst, src []byte) {
|
||||
if useAsm {
|
||||
decryptBlockAsm(len(xk)/4-1, &xk[0], &dst[0], &src[0])
|
||||
} else {
|
||||
decryptBlockGo(xk, dst, src)
|
||||
}
|
||||
}
|
||||
func expandKey(key []byte, enc, dec []uint32) {
|
||||
if useAsm {
|
||||
rounds := 10
|
||||
switch len(key) {
|
||||
case 128 / 8:
|
||||
rounds = 10
|
||||
case 192 / 8:
|
||||
rounds = 12
|
||||
case 256 / 8:
|
||||
rounds = 14
|
||||
}
|
||||
expandKeyAsm(rounds, &key[0], &enc[0], &dec[0])
|
||||
} else {
|
||||
expandKeyGo(key, enc, dec)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
|
||||
package aes
|
||||
|
||||
func encryptBlock(xk []uint32, dst, src []byte) {
|
||||
encryptBlockGo(xk, dst, src)
|
||||
}
|
||||
|
||||
func decryptBlock(xk []uint32, dst, src []byte) {
|
||||
decryptBlockGo(xk, dst, src)
|
||||
}
|
||||
|
||||
func expandKey(key []byte, enc, dec []uint32) {
|
||||
expandKeyGo(key, enc, dec)
|
||||
}
|
||||
Loading…
Reference in New Issue