crypto/aes: Avo port of asm_amd64.s

This implementation utilizes the same registers found in the reference
implementation, aiming to produce a minimal semantic diff between the
Avo-generated output and the original hand-written assembly.

To verify the Avo implementation, the reference and Avo-generated
assembly files are fed to `go tool asm`, capturing the debug output into
corresponding temp files. The debug output contains supplementary
metadata (line numbers, instruction offsets, and source file references)
that must be removed in order to obtain a semantic diff of the two
files. This is accomplished via a small utility script written in awk.

The reference assembly file does not specify a frame size for some of
the defined assembly functions. Avo automatically infers the frame size
when generating TEXT directives, leading to a diff on those lines.

Commands used to verify Avo output:

GOROOT=$(go env GOROOT)
ASM_PATH="src/crypto/aes/asm_amd64.s"
REFERENCE="54fe0fd43fcf8609666c16ae6d15ed92873b1564"

go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
  <(git cat-file -p "$REFERENCE:$ASM_PATH") \
  > /tmp/reference.s

go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
  "$ASM_PATH" \
  > /tmp/avo.s

normalize(){
  awk '{
    $1=$2=$3="";
    print substr($0,4)
  }'
}

diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s)

1c1
< TEXT <unlinkable>.encryptBlockAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.encryptBlockAsm(SB), NOSPLIT, $0-32
45c45
< TEXT <unlinkable>.decryptBlockAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.decryptBlockAsm(SB), NOSPLIT, $0-32
89c89
< TEXT <unlinkable>.expandKeyAsm(SB), NOSPLIT, $0
---
> TEXT <unlinkable>.expandKeyAsm(SB), NOSPLIT, $0-32

Change-Id: If647584df4137146d355f91ac0f6a8285d07c932
Reviewed-on: https://go-review.googlesource.com/c/go/+/600375
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
This commit is contained in:
Garrett Bodley 2024-07-22 23:39:44 -04:00 committed by Roland Shoemaker
parent 6ee8c07c3c
commit d61b73c1d1
6 changed files with 584 additions and 168 deletions

View File

@ -356,6 +356,7 @@ var excluded = map[string]bool{
// go.dev/issue/46027: some imports are missing for this submodule.
"crypto/aes/_asm/gcm": true,
"crypto/aes/_asm/standard": true,
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,

View File

@ -0,0 +1,385 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"os"
"strings"
. "github.com/mmcloughlin/avo/build"
"github.com/mmcloughlin/avo/ir"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
//go:generate go run . -out ../../asm_amd64.s -pkg aes
func main() {
Package("crypto/aes")
ConstraintExpr("!purego")
encryptBlockAsm()
decryptBlockAsm()
expandKeyAsm()
_expand_key_128()
_expand_key_192a()
_expand_key_192b()
_expand_key_256a()
_expand_key_256b()
Generate()
var internalFunctions []string = []string{
"·_expand_key_128<>",
"·_expand_key_192a<>",
"·_expand_key_192b<>",
"·_expand_key_256a<>",
"·_expand_key_256b<>",
}
removePeskyUnicodeDot(internalFunctions, "../../asm_amd64.s")
}
func encryptBlockAsm() {
Implement("encryptBlockAsm")
Attributes(NOSPLIT)
AllocLocal(0)
Load(Param("nr"), RCX)
Load(Param("xk"), RAX)
Load(Param("dst"), RDX)
Load(Param("src"), RBX)
MOVUPS(Mem{Base: AX}.Offset(0), X1)
MOVUPS(Mem{Base: BX}.Offset(0), X0)
ADDQ(Imm(16), RAX)
PXOR(X1, X0)
SUBQ(Imm(12), RCX)
JE(LabelRef("Lenc192"))
JB(LabelRef("Lenc128"))
Label("Lenc256")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESENC(X1, X0)
ADDQ(Imm(32), RAX)
Label("Lenc192")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESENC(X1, X0)
ADDQ(Imm(32), RAX)
Label("Lenc128")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(32), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(48), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(64), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(80), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(96), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(112), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(128), X1)
AESENC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(144), X1)
AESENCLAST(X1, X0)
MOVUPS(X0, Mem{Base: DX}.Offset(0))
RET()
}
func decryptBlockAsm() {
Implement("decryptBlockAsm")
Attributes(NOSPLIT)
AllocLocal(0)
Load(Param("nr"), RCX)
Load(Param("xk"), RAX)
Load(Param("dst"), RDX)
Load(Param("src"), RBX)
MOVUPS(Mem{Base: AX}.Offset(0), X1)
MOVUPS(Mem{Base: BX}.Offset(0), X0)
ADDQ(Imm(16), RAX)
PXOR(X1, X0)
SUBQ(Imm(12), RCX)
JE(LabelRef("Ldec192"))
JB(LabelRef("Ldec128"))
Label("Ldec256")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESDEC(X1, X0)
ADDQ(Imm(32), RAX)
Label("Ldec192")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESDEC(X1, X0)
ADDQ(Imm(32), RAX)
Label("Ldec128")
MOVUPS(Mem{Base: AX}.Offset(0), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(16), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(32), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(48), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(64), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(80), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(96), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(112), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(128), X1)
AESDEC(X1, X0)
MOVUPS(Mem{Base: AX}.Offset(144), X1)
AESDECLAST(X1, X0)
MOVUPS(X0, Mem{Base: DX}.Offset(0))
RET()
}
// Note that round keys are stored in uint128 format, not uint32
func expandKeyAsm() {
Implement("expandKeyAsm")
Attributes(NOSPLIT)
AllocLocal(0)
Load(Param("nr"), RCX)
Load(Param("key"), RAX)
Load(Param("enc"), RBX)
Load(Param("dec"), RDX)
MOVUPS(Mem{Base: AX}, X0)
Comment("enc")
MOVUPS(X0, Mem{Base: BX})
ADDQ(Imm(16), RBX)
PXOR(X4, X4) // _expand_key_* expect X4 to be zero
CMPL(ECX, Imm(12))
JE(LabelRef("Lexp_enc192"))
JB(LabelRef("Lexp_enc128"))
Lexp_enc256()
Lexp_enc192()
Lexp_enc128()
Lexp_dec()
Lexp_dec_loop()
}
func Lexp_enc256() {
Label("Lexp_enc256")
MOVUPS(Mem{Base: AX}.Offset(16), X2)
MOVUPS(X2, Mem{Base: BX})
ADDQ(Imm(16), RBX)
var rcon uint64 = 1
for i := 0; i < 6; i++ {
AESKEYGENASSIST(Imm(rcon), X2, X1)
CALL(LabelRef("_expand_key_256a<>(SB)"))
AESKEYGENASSIST(Imm(rcon), X0, X1)
CALL(LabelRef("_expand_key_256b<>(SB)"))
rcon <<= 1
}
AESKEYGENASSIST(Imm(0x40), X2, X1)
CALL(LabelRef("_expand_key_256a<>(SB)"))
JMP(LabelRef("Lexp_dec"))
}
func Lexp_enc192() {
Label("Lexp_enc192")
MOVQ(Mem{Base: AX}.Offset(16), X2)
var rcon uint64 = 1
for i := 0; i < 8; i++ {
AESKEYGENASSIST(Imm(rcon), X2, X1)
if i%2 == 0 {
CALL(LabelRef("_expand_key_192a<>(SB)"))
} else {
CALL(LabelRef("_expand_key_192b<>(SB)"))
}
rcon <<= 1
}
JMP(LabelRef("Lexp_dec"))
}
func Lexp_enc128() {
Label("Lexp_enc128")
var rcon uint64 = 1
for i := 0; i < 8; i++ {
AESKEYGENASSIST(Imm(rcon), X0, X1)
CALL(LabelRef("_expand_key_128<>(SB)"))
rcon <<= 1
}
AESKEYGENASSIST(Imm(0x1b), X0, X1)
CALL(LabelRef("_expand_key_128<>(SB)"))
AESKEYGENASSIST(Imm(0x36), X0, X1)
CALL(LabelRef("_expand_key_128<>(SB)"))
}
func Lexp_dec() {
Label("Lexp_dec")
Comment("dec")
SUBQ(Imm(16), RBX)
MOVUPS(Mem{Base: BX}, X1)
MOVUPS(X1, Mem{Base: DX})
DECQ(RCX)
}
func Lexp_dec_loop() {
Label("Lexp_dec_loop")
MOVUPS(Mem{Base: BX}.Offset(-16), X1)
AESIMC(X1, X0)
MOVUPS(X0, Mem{Base: DX}.Offset(16))
SUBQ(Imm(16), RBX)
ADDQ(Imm(16), RDX)
DECQ(RCX)
JNZ(LabelRef("Lexp_dec_loop"))
MOVUPS(Mem{Base: BX}.Offset(-16), X0)
MOVUPS(X0, Mem{Base: DX}.Offset(16))
RET()
}
func _expand_key_128() {
Function("_expand_key_128<>")
Attributes(NOSPLIT)
AllocLocal(0)
PSHUFD(Imm(0xff), X1, X1)
SHUFPS(Imm(0x10), X0, X4)
PXOR(X4, X0)
SHUFPS(Imm(0x8c), X0, X4)
PXOR(X4, X0)
PXOR(X1, X0)
MOVUPS(X0, Mem{Base: BX})
ADDQ(Imm(16), RBX)
RET()
}
func _expand_key_192a() {
Function("_expand_key_192a<>")
Attributes(NOSPLIT)
AllocLocal(0)
PSHUFD(Imm(0x55), X1, X1)
SHUFPS(Imm(0x10), X0, X4)
PXOR(X4, X0)
SHUFPS(Imm(0x8c), X0, X4)
PXOR(X4, X0)
PXOR(X1, X0)
MOVAPS(X2, X5)
MOVAPS(X2, X6)
PSLLDQ(Imm(0x4), X5)
PSHUFD(Imm(0xff), X0, X3)
PXOR(X3, X2)
PXOR(X5, X2)
MOVAPS(X0, X1)
SHUFPS(Imm(0x44), X0, X6)
MOVUPS(X6, Mem{Base: BX})
SHUFPS(Imm(0x4e), X2, X1)
MOVUPS(X1, Mem{Base: BX}.Offset(16))
ADDQ(Imm(32), RBX)
RET()
}
func _expand_key_192b() {
Function("_expand_key_192b<>")
Attributes(NOSPLIT)
AllocLocal(0)
PSHUFD(Imm(0x55), X1, X1)
SHUFPS(Imm(0x10), X0, X4)
PXOR(X4, X0)
SHUFPS(Imm(0x8c), X0, X4)
PXOR(X4, X0)
PXOR(X1, X0)
MOVAPS(X2, X5)
PSLLDQ(Imm(0x4), X5)
PSHUFD(Imm(0xff), X0, X3)
PXOR(X3, X2)
PXOR(X5, X2)
MOVUPS(X0, Mem{Base: BX})
ADDQ(Imm(16), RBX)
RET()
}
func _expand_key_256a() {
Function("_expand_key_256a<>")
Attributes(NOSPLIT)
AllocLocal(0)
// Hack to get Avo to emit:
// JMP _expand_key_128<>(SB)
Instruction(&ir.Instruction{
Opcode: "JMP",
Operands: []Op{
LabelRef("_expand_key_128<>(SB)"),
},
})
}
func _expand_key_256b() {
Function("_expand_key_256b<>")
Attributes(NOSPLIT)
AllocLocal(0)
PSHUFD(Imm(0xaa), X1, X1)
SHUFPS(Imm(0x10), X2, X4)
PXOR(X4, X2)
SHUFPS(Imm(0x8c), X2, X4)
PXOR(X4, X2)
PXOR(X1, X2)
MOVUPS(X2, Mem{Base: BX})
ADDQ(Imm(16), RBX)
RET()
}
const ThatPeskyUnicodeDot = "\u00b7"
// removePeskyUnicodeDot strips the dot from the relevant TEXT directives such that they
// can exist as internal assembly functions
//
// Avo v0.6.0 does not support the generation of internal assembly functions. Go's unicode
// dot tells the compiler to link a TEXT symbol to a function in the current Go package
// (or another package if specified). Avo unconditionally prepends the unicode dot to all
// TEXT symbols, making it impossible to emit an internal function without this hack.
//
// There is a pending PR to add internal functions to Avo:
// https://github.com/mmcloughlin/avo/pull/443
//
// If merged it should allow the usage of InternalFunction("NAME") for the specified functions
func removePeskyUnicodeDot(internalFunctions []string, target string) {
bytes, err := os.ReadFile(target)
if err != nil {
panic(err)
}
content := string(bytes)
for _, from := range internalFunctions {
to := strings.ReplaceAll(from, ThatPeskyUnicodeDot, "")
content = strings.ReplaceAll(content, from, to)
}
err = os.WriteFile(target, []byte(content), 0644)
if err != nil {
panic(err)
}
}

View File

@ -0,0 +1,11 @@
module std/crypto/aes/_asm/standard
go 1.24
require github.com/mmcloughlin/avo v0.6.0
require (
golang.org/x/mod v0.20.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/tools v0.24.0 // indirect
)

View File

@ -0,0 +1,8 @@
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY=
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=

View File

@ -1,276 +1,286 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by command: go run asm_amd64.go -out ../../asm_amd64.s -pkg aes. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS 0(AX), X1
MOVUPS 0(BX), X0
ADDQ $16, AX
PXOR X1, X0
SUBQ $12, CX
JE Lenc192
JB Lenc128
Lenc256:
MOVUPS 0(AX), X1
// func encryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
// Requires: AES, SSE, SSE2
TEXT ·encryptBlockAsm(SB), NOSPLIT, $0-32
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS (AX), X1
MOVUPS (BX), X0
ADDQ $0x10, AX
PXOR X1, X0
SUBQ $0x0c, CX
JE Lenc192
JB Lenc128
MOVUPS (AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
ADDQ $32, AX
ADDQ $0x20, AX
Lenc192:
MOVUPS 0(AX), X1
MOVUPS (AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
ADDQ $32, AX
ADDQ $0x20, AX
Lenc128:
MOVUPS 0(AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
MOVUPS 32(AX), X1
AESENC X1, X0
MOVUPS 48(AX), X1
AESENC X1, X0
MOVUPS 64(AX), X1
AESENC X1, X0
MOVUPS 80(AX), X1
AESENC X1, X0
MOVUPS 96(AX), X1
AESENC X1, X0
MOVUPS 112(AX), X1
AESENC X1, X0
MOVUPS 128(AX), X1
AESENC X1, X0
MOVUPS 144(AX), X1
MOVUPS (AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
MOVUPS 32(AX), X1
AESENC X1, X0
MOVUPS 48(AX), X1
AESENC X1, X0
MOVUPS 64(AX), X1
AESENC X1, X0
MOVUPS 80(AX), X1
AESENC X1, X0
MOVUPS 96(AX), X1
AESENC X1, X0
MOVUPS 112(AX), X1
AESENC X1, X0
MOVUPS 128(AX), X1
AESENC X1, X0
MOVUPS 144(AX), X1
AESENCLAST X1, X0
MOVUPS X0, 0(DX)
MOVUPS X0, (DX)
RET
// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS 0(AX), X1
MOVUPS 0(BX), X0
ADDQ $16, AX
PXOR X1, X0
SUBQ $12, CX
JE Ldec192
JB Ldec128
Ldec256:
MOVUPS 0(AX), X1
// func decryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
// Requires: AES, SSE, SSE2
TEXT ·decryptBlockAsm(SB), NOSPLIT, $0-32
MOVQ nr+0(FP), CX
MOVQ xk+8(FP), AX
MOVQ dst+16(FP), DX
MOVQ src+24(FP), BX
MOVUPS (AX), X1
MOVUPS (BX), X0
ADDQ $0x10, AX
PXOR X1, X0
SUBQ $0x0c, CX
JE Ldec192
JB Ldec128
MOVUPS (AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
ADDQ $32, AX
ADDQ $0x20, AX
Ldec192:
MOVUPS 0(AX), X1
MOVUPS (AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
ADDQ $32, AX
ADDQ $0x20, AX
Ldec128:
MOVUPS 0(AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
MOVUPS 32(AX), X1
AESDEC X1, X0
MOVUPS 48(AX), X1
AESDEC X1, X0
MOVUPS 64(AX), X1
AESDEC X1, X0
MOVUPS 80(AX), X1
AESDEC X1, X0
MOVUPS 96(AX), X1
AESDEC X1, X0
MOVUPS 112(AX), X1
AESDEC X1, X0
MOVUPS 128(AX), X1
AESDEC X1, X0
MOVUPS 144(AX), X1
MOVUPS (AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
MOVUPS 32(AX), X1
AESDEC X1, X0
MOVUPS 48(AX), X1
AESDEC X1, X0
MOVUPS 64(AX), X1
AESDEC X1, X0
MOVUPS 80(AX), X1
AESDEC X1, X0
MOVUPS 96(AX), X1
AESDEC X1, X0
MOVUPS 112(AX), X1
AESDEC X1, X0
MOVUPS 128(AX), X1
AESDEC X1, X0
MOVUPS 144(AX), X1
AESDECLAST X1, X0
MOVUPS X0, 0(DX)
MOVUPS X0, (DX)
RET
// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
// Note that round keys are stored in uint128 format, not uint32
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
MOVQ nr+0(FP), CX
MOVQ key+8(FP), AX
MOVQ enc+16(FP), BX
MOVQ dec+24(FP), DX
// func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
// Requires: AES, SSE, SSE2
TEXT ·expandKeyAsm(SB), NOSPLIT, $0-32
MOVQ nr+0(FP), CX
MOVQ key+8(FP), AX
MOVQ enc+16(FP), BX
MOVQ dec+24(FP), DX
MOVUPS (AX), X0
// enc
MOVUPS X0, (BX)
ADDQ $16, BX
PXOR X4, X4 // _expand_key_* expect X4 to be zero
CMPL CX, $12
JE Lexp_enc192
JB Lexp_enc128
Lexp_enc256:
MOVUPS 16(AX), X2
MOVUPS X2, (BX)
ADDQ $16, BX
MOVUPS X0, (BX)
ADDQ $0x10, BX
PXOR X4, X4
CMPL CX, $0x0c
JE Lexp_enc192
JB Lexp_enc128
MOVUPS 16(AX), X2
MOVUPS X2, (BX)
ADDQ $0x10, BX
AESKEYGENASSIST $0x01, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x01, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x02, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x02, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x04, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x08, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x08, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x10, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x20, X2, X1
CALL _expand_key_256a<>(SB)
CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x20, X0, X1
CALL _expand_key_256b<>(SB)
CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
CALL _expand_key_256a<>(SB)
JMP Lexp_dec
CALL _expand_key_256a<>(SB)
JMP Lexp_dec
Lexp_enc192:
MOVQ 16(AX), X2
MOVQ 16(AX), X2
AESKEYGENASSIST $0x01, X2, X1
CALL _expand_key_192a<>(SB)
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x02, X2, X1
CALL _expand_key_192b<>(SB)
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
CALL _expand_key_192a<>(SB)
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x08, X2, X1
CALL _expand_key_192b<>(SB)
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
CALL _expand_key_192a<>(SB)
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x20, X2, X1
CALL _expand_key_192b<>(SB)
CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
CALL _expand_key_192a<>(SB)
CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x80, X2, X1
CALL _expand_key_192b<>(SB)
JMP Lexp_dec
CALL _expand_key_192b<>(SB)
JMP Lexp_dec
Lexp_enc128:
AESKEYGENASSIST $0x01, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x02, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x04, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x08, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x10, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x20, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x40, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x80, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x1b, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x36, X0, X1
CALL _expand_key_128<>(SB)
CALL _expand_key_128<>(SB)
Lexp_dec:
// dec
SUBQ $16, BX
SUBQ $0x10, BX
MOVUPS (BX), X1
MOVUPS X1, (DX)
DECQ CX
DECQ CX
Lexp_dec_loop:
MOVUPS -16(BX), X1
AESIMC X1, X0
MOVUPS X0, 16(DX)
SUBQ $16, BX
ADDQ $16, DX
DECQ CX
JNZ Lexp_dec_loop
SUBQ $0x10, BX
ADDQ $0x10, DX
DECQ CX
JNZ Lexp_dec_loop
MOVUPS -16(BX), X0
MOVUPS X0, 16(DX)
RET
TEXT _expand_key_128<>(SB),NOSPLIT,$0
// func _expand_key_128<>()
// Requires: SSE, SSE2
TEXT _expand_key_128<>(SB), NOSPLIT, $0
PSHUFD $0xff, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
PXOR X4, X0
PXOR X1, X0
MOVUPS X0, (BX)
ADDQ $16, BX
ADDQ $0x10, BX
RET
TEXT _expand_key_192a<>(SB),NOSPLIT,$0
// func _expand_key_192a<>()
// Requires: SSE, SSE2
TEXT _expand_key_192a<>(SB), NOSPLIT, $0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
PXOR X4, X0
PXOR X1, X0
MOVAPS X2, X5
MOVAPS X2, X6
PSLLDQ $0x4, X5
PSLLDQ $0x04, X5
PSHUFD $0xff, X0, X3
PXOR X3, X2
PXOR X5, X2
PXOR X3, X2
PXOR X5, X2
MOVAPS X0, X1
SHUFPS $0x44, X0, X6
MOVUPS X6, (BX)
SHUFPS $0x4e, X2, X1
MOVUPS X1, 16(BX)
ADDQ $32, BX
ADDQ $0x20, BX
RET
TEXT _expand_key_192b<>(SB),NOSPLIT,$0
// func _expand_key_192b<>()
// Requires: SSE, SSE2
TEXT _expand_key_192b<>(SB), NOSPLIT, $0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
PXOR X4, X0
PXOR X4, X0
SHUFPS $0x8c, X0, X4
PXOR X4, X0
PXOR X1, X0
PXOR X4, X0
PXOR X1, X0
MOVAPS X2, X5
PSLLDQ $0x4, X5
PSLLDQ $0x04, X5
PSHUFD $0xff, X0, X3
PXOR X3, X2
PXOR X5, X2
PXOR X3, X2
PXOR X5, X2
MOVUPS X0, (BX)
ADDQ $16, BX
ADDQ $0x10, BX
RET
TEXT _expand_key_256a<>(SB),NOSPLIT,$0
// func _expand_key_256a<>()
TEXT _expand_key_256a<>(SB), NOSPLIT, $0
JMP _expand_key_128<>(SB)
TEXT _expand_key_256b<>(SB),NOSPLIT,$0
// func _expand_key_256b<>()
// Requires: SSE, SSE2
TEXT _expand_key_256b<>(SB), NOSPLIT, $0
PSHUFD $0xaa, X1, X1
SHUFPS $0x10, X2, X4
PXOR X4, X2
PXOR X4, X2
SHUFPS $0x8c, X2, X4
PXOR X4, X2
PXOR X1, X2
PXOR X4, X2
PXOR X1, X2
MOVUPS X2, (BX)
ADDQ $16, BX
ADDQ $0x10, BX
RET

View File

@ -358,6 +358,7 @@ var excluded = map[string]bool{
// See go.dev/issue/46027: some imports are missing for this submodule.
"crypto/aes/_asm/gcm": true,
"crypto/aes/_asm/standard": true,
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,