mirror of https://github.com/golang/go.git
crypto/aes: Avo port of asm_amd64.s
This implementation utilizes the same registers found in the reference
implementation, aiming to produce a minimal semantic diff between the
Avo-generated output and the original hand-written assembly.
To verify the Avo implementation, the reference and Avo-generated
assembly files are fed to `go tool asm`, capturing the debug output into
corresponding temp files. The debug output contains supplementary
metadata (line numbers, instruction offsets, and source file references)
that must be removed in order to obtain a semantic diff of the two
files. This is accomplished via a small utility script written in awk.
The reference assembly file does not specify a frame size for some of
the defined assembly functions. Avo automatically infers the frame size
when generating TEXT directives, leading to a diff on those lines.
Commands used to verify Avo output:
GOROOT=$(go env GOROOT)
ASM_PATH="src/crypto/aes/asm_amd64.s"
REFERENCE="54fe0fd43fcf8609666c16ae6d15ed92873b1564"
go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
<(git cat-file -p "$REFERENCE:$ASM_PATH") \
> /tmp/reference.s
go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
"$ASM_PATH" \
> /tmp/avo.s
normalize(){
awk '{
$1=$2=$3="";
print substr($0,4)
}'
}
diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s)
1c1
< TEXT <unlinkable>.encryptBlockAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.encryptBlockAsm(SB), NOSPLIT, $0-32
45c45
< TEXT <unlinkable>.decryptBlockAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.decryptBlockAsm(SB), NOSPLIT, $0-32
89c89
< TEXT <unlinkable>.expandKeyAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.expandKeyAsm(SB), NOSPLIT, $0-32
Change-Id: If647584df4137146d355f91ac0f6a8285d07c932
Reviewed-on: https://go-review.googlesource.com/c/go/+/600375
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
This commit is contained in:
parent
6ee8c07c3c
commit
d61b73c1d1
|
|
@ -356,6 +356,7 @@ var excluded = map[string]bool{
|
|||
|
||||
// go.dev/issue/46027: some imports are missing for this submodule.
|
||||
"crypto/aes/_asm/gcm": true,
|
||||
"crypto/aes/_asm/standard": true,
|
||||
"crypto/internal/bigmod/_asm": true,
|
||||
"crypto/internal/edwards25519/field/_asm": true,
|
||||
"crypto/md5/_asm": true,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,385 @@
|
|||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
. "github.com/mmcloughlin/avo/build"
|
||||
"github.com/mmcloughlin/avo/ir"
|
||||
. "github.com/mmcloughlin/avo/operand"
|
||||
. "github.com/mmcloughlin/avo/reg"
|
||||
)
|
||||
|
||||
//go:generate go run . -out ../../asm_amd64.s -pkg aes
|
||||
|
||||
func main() {
|
||||
Package("crypto/aes")
|
||||
ConstraintExpr("!purego")
|
||||
encryptBlockAsm()
|
||||
decryptBlockAsm()
|
||||
expandKeyAsm()
|
||||
_expand_key_128()
|
||||
_expand_key_192a()
|
||||
_expand_key_192b()
|
||||
_expand_key_256a()
|
||||
_expand_key_256b()
|
||||
Generate()
|
||||
|
||||
var internalFunctions []string = []string{
|
||||
"·_expand_key_128<>",
|
||||
"·_expand_key_192a<>",
|
||||
"·_expand_key_192b<>",
|
||||
"·_expand_key_256a<>",
|
||||
"·_expand_key_256b<>",
|
||||
}
|
||||
removePeskyUnicodeDot(internalFunctions, "../../asm_amd64.s")
|
||||
}
|
||||
|
||||
func encryptBlockAsm() {
|
||||
Implement("encryptBlockAsm")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
Load(Param("nr"), RCX)
|
||||
Load(Param("xk"), RAX)
|
||||
Load(Param("dst"), RDX)
|
||||
Load(Param("src"), RBX)
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
MOVUPS(Mem{Base: BX}.Offset(0), X0)
|
||||
ADDQ(Imm(16), RAX)
|
||||
PXOR(X1, X0)
|
||||
SUBQ(Imm(12), RCX)
|
||||
JE(LabelRef("Lenc192"))
|
||||
JB(LabelRef("Lenc128"))
|
||||
|
||||
Label("Lenc256")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESENC(X1, X0)
|
||||
ADDQ(Imm(32), RAX)
|
||||
|
||||
Label("Lenc192")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESENC(X1, X0)
|
||||
ADDQ(Imm(32), RAX)
|
||||
|
||||
Label("Lenc128")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(32), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(48), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(64), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(80), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(96), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(112), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(128), X1)
|
||||
AESENC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(144), X1)
|
||||
AESENCLAST(X1, X0)
|
||||
MOVUPS(X0, Mem{Base: DX}.Offset(0))
|
||||
RET()
|
||||
}
|
||||
|
||||
func decryptBlockAsm() {
|
||||
Implement("decryptBlockAsm")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
Load(Param("nr"), RCX)
|
||||
Load(Param("xk"), RAX)
|
||||
Load(Param("dst"), RDX)
|
||||
Load(Param("src"), RBX)
|
||||
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
MOVUPS(Mem{Base: BX}.Offset(0), X0)
|
||||
ADDQ(Imm(16), RAX)
|
||||
PXOR(X1, X0)
|
||||
SUBQ(Imm(12), RCX)
|
||||
JE(LabelRef("Ldec192"))
|
||||
JB(LabelRef("Ldec128"))
|
||||
|
||||
Label("Ldec256")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESDEC(X1, X0)
|
||||
ADDQ(Imm(32), RAX)
|
||||
|
||||
Label("Ldec192")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESDEC(X1, X0)
|
||||
ADDQ(Imm(32), RAX)
|
||||
|
||||
Label("Ldec128")
|
||||
MOVUPS(Mem{Base: AX}.Offset(0), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(32), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(48), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(64), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(80), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(96), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(112), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(128), X1)
|
||||
AESDEC(X1, X0)
|
||||
MOVUPS(Mem{Base: AX}.Offset(144), X1)
|
||||
AESDECLAST(X1, X0)
|
||||
MOVUPS(X0, Mem{Base: DX}.Offset(0))
|
||||
RET()
|
||||
}
|
||||
|
||||
// Note that round keys are stored in uint128 format, not uint32
|
||||
func expandKeyAsm() {
|
||||
Implement("expandKeyAsm")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
Load(Param("nr"), RCX)
|
||||
Load(Param("key"), RAX)
|
||||
Load(Param("enc"), RBX)
|
||||
Load(Param("dec"), RDX)
|
||||
|
||||
MOVUPS(Mem{Base: AX}, X0)
|
||||
Comment("enc")
|
||||
MOVUPS(X0, Mem{Base: BX})
|
||||
ADDQ(Imm(16), RBX)
|
||||
PXOR(X4, X4) // _expand_key_* expect X4 to be zero
|
||||
CMPL(ECX, Imm(12))
|
||||
JE(LabelRef("Lexp_enc192"))
|
||||
JB(LabelRef("Lexp_enc128"))
|
||||
|
||||
Lexp_enc256()
|
||||
Lexp_enc192()
|
||||
Lexp_enc128()
|
||||
Lexp_dec()
|
||||
Lexp_dec_loop()
|
||||
}
|
||||
|
||||
func Lexp_enc256() {
|
||||
Label("Lexp_enc256")
|
||||
MOVUPS(Mem{Base: AX}.Offset(16), X2)
|
||||
MOVUPS(X2, Mem{Base: BX})
|
||||
ADDQ(Imm(16), RBX)
|
||||
|
||||
var rcon uint64 = 1
|
||||
for i := 0; i < 6; i++ {
|
||||
AESKEYGENASSIST(Imm(rcon), X2, X1)
|
||||
CALL(LabelRef("_expand_key_256a<>(SB)"))
|
||||
AESKEYGENASSIST(Imm(rcon), X0, X1)
|
||||
CALL(LabelRef("_expand_key_256b<>(SB)"))
|
||||
rcon <<= 1
|
||||
}
|
||||
AESKEYGENASSIST(Imm(0x40), X2, X1)
|
||||
CALL(LabelRef("_expand_key_256a<>(SB)"))
|
||||
JMP(LabelRef("Lexp_dec"))
|
||||
}
|
||||
|
||||
func Lexp_enc192() {
|
||||
Label("Lexp_enc192")
|
||||
MOVQ(Mem{Base: AX}.Offset(16), X2)
|
||||
|
||||
var rcon uint64 = 1
|
||||
for i := 0; i < 8; i++ {
|
||||
AESKEYGENASSIST(Imm(rcon), X2, X1)
|
||||
if i%2 == 0 {
|
||||
CALL(LabelRef("_expand_key_192a<>(SB)"))
|
||||
} else {
|
||||
CALL(LabelRef("_expand_key_192b<>(SB)"))
|
||||
}
|
||||
rcon <<= 1
|
||||
}
|
||||
JMP(LabelRef("Lexp_dec"))
|
||||
}
|
||||
|
||||
func Lexp_enc128() {
|
||||
Label("Lexp_enc128")
|
||||
var rcon uint64 = 1
|
||||
for i := 0; i < 8; i++ {
|
||||
AESKEYGENASSIST(Imm(rcon), X0, X1)
|
||||
CALL(LabelRef("_expand_key_128<>(SB)"))
|
||||
rcon <<= 1
|
||||
}
|
||||
AESKEYGENASSIST(Imm(0x1b), X0, X1)
|
||||
CALL(LabelRef("_expand_key_128<>(SB)"))
|
||||
AESKEYGENASSIST(Imm(0x36), X0, X1)
|
||||
CALL(LabelRef("_expand_key_128<>(SB)"))
|
||||
}
|
||||
|
||||
func Lexp_dec() {
|
||||
Label("Lexp_dec")
|
||||
Comment("dec")
|
||||
SUBQ(Imm(16), RBX)
|
||||
MOVUPS(Mem{Base: BX}, X1)
|
||||
MOVUPS(X1, Mem{Base: DX})
|
||||
DECQ(RCX)
|
||||
}
|
||||
|
||||
func Lexp_dec_loop() {
|
||||
Label("Lexp_dec_loop")
|
||||
MOVUPS(Mem{Base: BX}.Offset(-16), X1)
|
||||
AESIMC(X1, X0)
|
||||
MOVUPS(X0, Mem{Base: DX}.Offset(16))
|
||||
SUBQ(Imm(16), RBX)
|
||||
ADDQ(Imm(16), RDX)
|
||||
DECQ(RCX)
|
||||
JNZ(LabelRef("Lexp_dec_loop"))
|
||||
MOVUPS(Mem{Base: BX}.Offset(-16), X0)
|
||||
MOVUPS(X0, Mem{Base: DX}.Offset(16))
|
||||
RET()
|
||||
}
|
||||
|
||||
func _expand_key_128() {
|
||||
Function("_expand_key_128<>")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
PSHUFD(Imm(0xff), X1, X1)
|
||||
SHUFPS(Imm(0x10), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
SHUFPS(Imm(0x8c), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
PXOR(X1, X0)
|
||||
MOVUPS(X0, Mem{Base: BX})
|
||||
ADDQ(Imm(16), RBX)
|
||||
RET()
|
||||
}
|
||||
|
||||
func _expand_key_192a() {
|
||||
Function("_expand_key_192a<>")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
PSHUFD(Imm(0x55), X1, X1)
|
||||
SHUFPS(Imm(0x10), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
SHUFPS(Imm(0x8c), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
PXOR(X1, X0)
|
||||
|
||||
MOVAPS(X2, X5)
|
||||
MOVAPS(X2, X6)
|
||||
PSLLDQ(Imm(0x4), X5)
|
||||
PSHUFD(Imm(0xff), X0, X3)
|
||||
PXOR(X3, X2)
|
||||
PXOR(X5, X2)
|
||||
|
||||
MOVAPS(X0, X1)
|
||||
SHUFPS(Imm(0x44), X0, X6)
|
||||
MOVUPS(X6, Mem{Base: BX})
|
||||
SHUFPS(Imm(0x4e), X2, X1)
|
||||
MOVUPS(X1, Mem{Base: BX}.Offset(16))
|
||||
ADDQ(Imm(32), RBX)
|
||||
RET()
|
||||
}
|
||||
|
||||
func _expand_key_192b() {
|
||||
Function("_expand_key_192b<>")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
PSHUFD(Imm(0x55), X1, X1)
|
||||
SHUFPS(Imm(0x10), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
SHUFPS(Imm(0x8c), X0, X4)
|
||||
PXOR(X4, X0)
|
||||
PXOR(X1, X0)
|
||||
|
||||
MOVAPS(X2, X5)
|
||||
PSLLDQ(Imm(0x4), X5)
|
||||
PSHUFD(Imm(0xff), X0, X3)
|
||||
PXOR(X3, X2)
|
||||
PXOR(X5, X2)
|
||||
|
||||
MOVUPS(X0, Mem{Base: BX})
|
||||
ADDQ(Imm(16), RBX)
|
||||
RET()
|
||||
}
|
||||
|
||||
func _expand_key_256a() {
|
||||
Function("_expand_key_256a<>")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
// Hack to get Avo to emit:
|
||||
// JMP _expand_key_128<>(SB)
|
||||
Instruction(&ir.Instruction{
|
||||
Opcode: "JMP",
|
||||
Operands: []Op{
|
||||
LabelRef("_expand_key_128<>(SB)"),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func _expand_key_256b() {
|
||||
Function("_expand_key_256b<>")
|
||||
Attributes(NOSPLIT)
|
||||
AllocLocal(0)
|
||||
|
||||
PSHUFD(Imm(0xaa), X1, X1)
|
||||
SHUFPS(Imm(0x10), X2, X4)
|
||||
PXOR(X4, X2)
|
||||
SHUFPS(Imm(0x8c), X2, X4)
|
||||
PXOR(X4, X2)
|
||||
PXOR(X1, X2)
|
||||
|
||||
MOVUPS(X2, Mem{Base: BX})
|
||||
ADDQ(Imm(16), RBX)
|
||||
RET()
|
||||
}
|
||||
|
||||
const ThatPeskyUnicodeDot = "\u00b7"
|
||||
|
||||
// removePeskyUnicodeDot strips the dot from the relevant TEXT directives such that they
|
||||
// can exist as internal assembly functions
|
||||
//
|
||||
// Avo v0.6.0 does not support the generation of internal assembly functions. Go's unicode
|
||||
// dot tells the compiler to link a TEXT symbol to a function in the current Go package
|
||||
// (or another package if specified). Avo unconditionally prepends the unicode dot to all
|
||||
// TEXT symbols, making it impossible to emit an internal function without this hack.
|
||||
//
|
||||
// There is a pending PR to add internal functions to Avo:
|
||||
// https://github.com/mmcloughlin/avo/pull/443
|
||||
//
|
||||
// If merged it should allow the usage of InternalFunction("NAME") for the specified functions
|
||||
func removePeskyUnicodeDot(internalFunctions []string, target string) {
|
||||
bytes, err := os.ReadFile(target)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
content := string(bytes)
|
||||
|
||||
for _, from := range internalFunctions {
|
||||
to := strings.ReplaceAll(from, ThatPeskyUnicodeDot, "")
|
||||
content = strings.ReplaceAll(content, from, to)
|
||||
}
|
||||
|
||||
err = os.WriteFile(target, []byte(content), 0644)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
module std/crypto/aes/_asm/standard
|
||||
|
||||
go 1.24
|
||||
|
||||
require github.com/mmcloughlin/avo v0.6.0
|
||||
|
||||
require (
|
||||
golang.org/x/mod v0.20.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/tools v0.24.0 // indirect
|
||||
)
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY=
|
||||
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
|
||||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
||||
|
|
@ -1,276 +1,286 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// Code generated by command: go run asm_amd64.go -out ../../asm_amd64.s -pkg aes. DO NOT EDIT.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS 0(BX), X0
|
||||
ADDQ $16, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $12, CX
|
||||
JE Lenc192
|
||||
JB Lenc128
|
||||
Lenc256:
|
||||
MOVUPS 0(AX), X1
|
||||
// func encryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
|
||||
// Requires: AES, SSE, SSE2
|
||||
TEXT ·encryptBlockAsm(SB), NOSPLIT, $0-32
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS (AX), X1
|
||||
MOVUPS (BX), X0
|
||||
ADDQ $0x10, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $0x0c, CX
|
||||
JE Lenc192
|
||||
JB Lenc128
|
||||
MOVUPS (AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
ADDQ $32, AX
|
||||
ADDQ $0x20, AX
|
||||
|
||||
Lenc192:
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS (AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
ADDQ $32, AX
|
||||
ADDQ $0x20, AX
|
||||
|
||||
Lenc128:
|
||||
MOVUPS 0(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
MOVUPS (AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
AESENCLAST X1, X0
|
||||
MOVUPS X0, 0(DX)
|
||||
MOVUPS X0, (DX)
|
||||
RET
|
||||
|
||||
// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
|
||||
TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS 0(BX), X0
|
||||
ADDQ $16, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $12, CX
|
||||
JE Ldec192
|
||||
JB Ldec128
|
||||
Ldec256:
|
||||
MOVUPS 0(AX), X1
|
||||
// func decryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
|
||||
// Requires: AES, SSE, SSE2
|
||||
TEXT ·decryptBlockAsm(SB), NOSPLIT, $0-32
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ xk+8(FP), AX
|
||||
MOVQ dst+16(FP), DX
|
||||
MOVQ src+24(FP), BX
|
||||
MOVUPS (AX), X1
|
||||
MOVUPS (BX), X0
|
||||
ADDQ $0x10, AX
|
||||
PXOR X1, X0
|
||||
SUBQ $0x0c, CX
|
||||
JE Ldec192
|
||||
JB Ldec128
|
||||
MOVUPS (AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
ADDQ $32, AX
|
||||
ADDQ $0x20, AX
|
||||
|
||||
Ldec192:
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS (AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
ADDQ $32, AX
|
||||
ADDQ $0x20, AX
|
||||
|
||||
Ldec128:
|
||||
MOVUPS 0(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
MOVUPS (AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 16(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 32(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 48(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 64(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 80(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 96(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 112(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 128(AX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS 144(AX), X1
|
||||
AESDECLAST X1, X0
|
||||
MOVUPS X0, 0(DX)
|
||||
MOVUPS X0, (DX)
|
||||
RET
|
||||
|
||||
// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
|
||||
// Note that round keys are stored in uint128 format, not uint32
|
||||
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ key+8(FP), AX
|
||||
MOVQ enc+16(FP), BX
|
||||
MOVQ dec+24(FP), DX
|
||||
// func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
|
||||
// Requires: AES, SSE, SSE2
|
||||
TEXT ·expandKeyAsm(SB), NOSPLIT, $0-32
|
||||
MOVQ nr+0(FP), CX
|
||||
MOVQ key+8(FP), AX
|
||||
MOVQ enc+16(FP), BX
|
||||
MOVQ dec+24(FP), DX
|
||||
MOVUPS (AX), X0
|
||||
|
||||
// enc
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
PXOR X4, X4 // _expand_key_* expect X4 to be zero
|
||||
CMPL CX, $12
|
||||
JE Lexp_enc192
|
||||
JB Lexp_enc128
|
||||
Lexp_enc256:
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS X2, (BX)
|
||||
ADDQ $16, BX
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $0x10, BX
|
||||
PXOR X4, X4
|
||||
CMPL CX, $0x0c
|
||||
JE Lexp_enc192
|
||||
JB Lexp_enc128
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS X2, (BX)
|
||||
ADDQ $0x10, BX
|
||||
AESKEYGENASSIST $0x01, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x01, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x02, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x02, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x04, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x04, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x08, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x08, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x10, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x10, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x20, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
CALL _expand_key_256a<>(SB)
|
||||
AESKEYGENASSIST $0x20, X0, X1
|
||||
CALL _expand_key_256b<>(SB)
|
||||
CALL _expand_key_256b<>(SB)
|
||||
AESKEYGENASSIST $0x40, X2, X1
|
||||
CALL _expand_key_256a<>(SB)
|
||||
JMP Lexp_dec
|
||||
CALL _expand_key_256a<>(SB)
|
||||
JMP Lexp_dec
|
||||
|
||||
Lexp_enc192:
|
||||
MOVQ 16(AX), X2
|
||||
MOVQ 16(AX), X2
|
||||
AESKEYGENASSIST $0x01, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x02, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x04, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x08, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x10, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x20, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
CALL _expand_key_192b<>(SB)
|
||||
AESKEYGENASSIST $0x40, X2, X1
|
||||
CALL _expand_key_192a<>(SB)
|
||||
CALL _expand_key_192a<>(SB)
|
||||
AESKEYGENASSIST $0x80, X2, X1
|
||||
CALL _expand_key_192b<>(SB)
|
||||
JMP Lexp_dec
|
||||
CALL _expand_key_192b<>(SB)
|
||||
JMP Lexp_dec
|
||||
|
||||
Lexp_enc128:
|
||||
AESKEYGENASSIST $0x01, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x02, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x04, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x08, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x10, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x20, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x40, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x80, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x1b, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
AESKEYGENASSIST $0x36, X0, X1
|
||||
CALL _expand_key_128<>(SB)
|
||||
CALL _expand_key_128<>(SB)
|
||||
|
||||
Lexp_dec:
|
||||
// dec
|
||||
SUBQ $16, BX
|
||||
SUBQ $0x10, BX
|
||||
MOVUPS (BX), X1
|
||||
MOVUPS X1, (DX)
|
||||
DECQ CX
|
||||
DECQ CX
|
||||
|
||||
Lexp_dec_loop:
|
||||
MOVUPS -16(BX), X1
|
||||
AESIMC X1, X0
|
||||
MOVUPS X0, 16(DX)
|
||||
SUBQ $16, BX
|
||||
ADDQ $16, DX
|
||||
DECQ CX
|
||||
JNZ Lexp_dec_loop
|
||||
SUBQ $0x10, BX
|
||||
ADDQ $0x10, DX
|
||||
DECQ CX
|
||||
JNZ Lexp_dec_loop
|
||||
MOVUPS -16(BX), X0
|
||||
MOVUPS X0, 16(DX)
|
||||
RET
|
||||
|
||||
TEXT _expand_key_128<>(SB),NOSPLIT,$0
|
||||
// func _expand_key_128<>()
|
||||
// Requires: SSE, SSE2
|
||||
TEXT _expand_key_128<>(SB), NOSPLIT, $0
|
||||
PSHUFD $0xff, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
ADDQ $0x10, BX
|
||||
RET
|
||||
|
||||
TEXT _expand_key_192a<>(SB),NOSPLIT,$0
|
||||
// func _expand_key_192a<>()
|
||||
// Requires: SSE, SSE2
|
||||
TEXT _expand_key_192a<>(SB), NOSPLIT, $0
|
||||
PSHUFD $0x55, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
MOVAPS X2, X5
|
||||
MOVAPS X2, X6
|
||||
PSLLDQ $0x4, X5
|
||||
PSLLDQ $0x04, X5
|
||||
PSHUFD $0xff, X0, X3
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
MOVAPS X0, X1
|
||||
SHUFPS $0x44, X0, X6
|
||||
MOVUPS X6, (BX)
|
||||
SHUFPS $0x4e, X2, X1
|
||||
MOVUPS X1, 16(BX)
|
||||
ADDQ $32, BX
|
||||
ADDQ $0x20, BX
|
||||
RET
|
||||
|
||||
TEXT _expand_key_192b<>(SB),NOSPLIT,$0
|
||||
// func _expand_key_192b<>()
|
||||
// Requires: SSE, SSE2
|
||||
TEXT _expand_key_192b<>(SB), NOSPLIT, $0
|
||||
PSHUFD $0x55, X1, X1
|
||||
SHUFPS $0x10, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X4, X0
|
||||
SHUFPS $0x8c, X0, X4
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X1, X0
|
||||
MOVAPS X2, X5
|
||||
PSLLDQ $0x4, X5
|
||||
PSLLDQ $0x04, X5
|
||||
PSHUFD $0xff, X0, X3
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
|
||||
PXOR X3, X2
|
||||
PXOR X5, X2
|
||||
MOVUPS X0, (BX)
|
||||
ADDQ $16, BX
|
||||
ADDQ $0x10, BX
|
||||
RET
|
||||
|
||||
TEXT _expand_key_256a<>(SB),NOSPLIT,$0
|
||||
// func _expand_key_256a<>()
|
||||
TEXT _expand_key_256a<>(SB), NOSPLIT, $0
|
||||
JMP _expand_key_128<>(SB)
|
||||
|
||||
TEXT _expand_key_256b<>(SB),NOSPLIT,$0
|
||||
// func _expand_key_256b<>()
|
||||
// Requires: SSE, SSE2
|
||||
TEXT _expand_key_256b<>(SB), NOSPLIT, $0
|
||||
PSHUFD $0xaa, X1, X1
|
||||
SHUFPS $0x10, X2, X4
|
||||
PXOR X4, X2
|
||||
PXOR X4, X2
|
||||
SHUFPS $0x8c, X2, X4
|
||||
PXOR X4, X2
|
||||
PXOR X1, X2
|
||||
|
||||
PXOR X4, X2
|
||||
PXOR X1, X2
|
||||
MOVUPS X2, (BX)
|
||||
ADDQ $16, BX
|
||||
ADDQ $0x10, BX
|
||||
RET
|
||||
|
|
|
|||
|
|
@ -358,6 +358,7 @@ var excluded = map[string]bool{
|
|||
|
||||
// See go.dev/issue/46027: some imports are missing for this submodule.
|
||||
"crypto/aes/_asm/gcm": true,
|
||||
"crypto/aes/_asm/standard": true,
|
||||
"crypto/internal/bigmod/_asm": true,
|
||||
"crypto/internal/edwards25519/field/_asm": true,
|
||||
"crypto/md5/_asm": true,
|
||||
|
|
|
|||
Loading…
Reference in New Issue