cmd/asm: Add SHA3 hardware instructions for ARM64

Armv8.2-SHA introduced four SHA3-related instructions

EOR3 <Vd>.16B, <Vn>.16B, <Vm>.16B, <Va>.16B
RAX1 <Vd>.2D, <Vn>.2D, <Vm>.2D
XAR <Vd>.2D, <Vn>.2D, <Vm>.2D, #<imm6>
BCAX <Vd>.16B, <Vn>.16B, <Vm>.16B, <Va>.16B

We convert them into Go asm style as:

VEOR3 <Va>.B16, <Vm>.B16, <Vn>.B16, <Vd>.B16
VRAX1 <Vm>.D2, <Vn>.D2, <Vd>.D2
VXAR $imm6, <Vm>.D2, <Vn>.D2, <Vd>.D2
VBCAX <Va>.B16, <Vm>.B16, <Vn>.B16, <Vd>.B16

Armv8 Reference Manual:
* EOR3 (Three-way Exclusive OR) on C7.2.42
* RAX1 (Rotate and Exclusive OR) on C7.2.217
* XAR (Exclusive OR and Rotate) on C7.2.401
* BCAX (Bit Clear and Exclusive OR) on C7.2.12

Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a
Reviewed-on: https://go-review.googlesource.com/c/go/+/180757
Run-TryBot: Meng Zhuo <mzh@golangcn.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Emmanuel Odeke <emm.odeke@gmail.com>
This commit is contained in:
Meng Zhuo 2020-06-13 00:06:49 +08:00 committed by Cherry Zhang
parent d317ba5d44
commit 3036b76df0
4 changed files with 83 additions and 2 deletions

View File

@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
SHA512H2 V4.D2, V3, V2 // 628464ce
SHA512SU0 V9.D2, V8.D2 // 2881c0ce
SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce
VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace
VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce
VADDV V0.S4, V0 // 00b8b14e
VMOVI $82, V0.B16 // 40e6024f
VUADDLV V6.B16, V6 // c638306e
@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e
VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e
VTBL V3.B8, [V27.B16], V8.B8 // 6803030e
VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce
VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce
VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e
VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e
VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e

View File

@ -958,9 +958,11 @@ const (
AVADDP
AVAND
AVBIF
AVBCAX
AVCMEQ
AVCNT
AVEOR
AVEOR3
AVMOV
AVLD1
AVLD2
@ -989,6 +991,7 @@ const (
AVPMULL2
AVEXT
AVRBIT
AVRAX1
AVUSHR
AVUSHLL
AVUSHLL2
@ -1001,6 +1004,7 @@ const (
AVBSL
AVBIT
AVTBL
AVXAR
AVZIP1
AVZIP2
AVCMTST

View File

@ -464,9 +464,11 @@ var Anames = []string{
"VADDP",
"VAND",
"VBIF",
"VBCAX",
"VCMEQ",
"VCNT",
"VEOR",
"VEOR3",
"VMOV",
"VLD1",
"VLD2",
@ -495,6 +497,7 @@ var Anames = []string{
"VPMULL2",
"VEXT",
"VRBIT",
"VRAX1",
"VUSHR",
"VUSHLL",
"VUSHLL2",
@ -507,6 +510,7 @@ var Anames = []string{
"VBSL",
"VBIT",
"VTBL",
"VXAR",
"VZIP1",
"VZIP2",
"VCMTST",

View File

@ -843,6 +843,8 @@ var optab = []Optab{
{ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0},
{AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0},
{AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0},
{AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0},
{AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0},
{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0},
{obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0},
@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) {
case AVADD:
oprangeset(AVSUB, t)
oprangeset(AVRAX1, t)
case AAESD:
oprangeset(AAESE, t)
@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) {
oprangeset(AVLD4, t)
oprangeset(AVLD4R, t)
case AVEOR3:
oprangeset(AVBCAX, t)
case ASHA1H,
AVCNT,
AVMOV,
@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) {
AVDUP,
AVMOVI,
APRFM,
AVEXT:
AVEXT,
AVXAR:
break
case obj.ANOP,
@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
rel.Add = 0
rel.Type = objabi.R_ARM64_GOTPCREL
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm.<T>, Vn.<T>, Vd.<T> */
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm.<T>, Vn.<T>, Vd.<T> */
af := int((p.From.Reg >> 5) & 15)
af3 := int((p.Reg >> 5) & 15)
at := int((p.To.Reg >> 5) & 15)
@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
} else {
size = 0
}
case AVRAX1:
if af != ARNG_2D {
c.ctxt.Diag("invalid arrangement: %v", p)
}
size = 0
Q = 0
}
o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31)
@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
c.ctxt.Diag("shift amount out of range: %v\n", p)
}
o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31)
case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */
ta := (p.From.Reg >> 5) & 15
tm := (p.Reg >> 5) & 15
td := (p.To.Reg >> 5) & 15
tn := ((p.GetFrom3().Reg) >> 5) & 15
if ta != tm || ta != tn || ta != td || ta != ARNG_16B {
c.ctxt.Diag("invalid arrangement: %v", p)
break
}
o1 = c.oprrr(p, p.As)
ra := int(p.From.Reg)
rm := int(p.Reg)
rn := int(p.GetFrom3().Reg)
rd := int(p.To.Reg)
o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31
case 104: /* vxar $imm4, Vm.<T>, Vn.<T>, Vd.<T> */
af := ((p.GetFrom3().Reg) >> 5) & 15
at := (p.To.Reg >> 5) & 15
a := (p.Reg >> 5) & 15
index := int(p.From.Offset)
if af != a || af != at {
c.ctxt.Diag("invalid arrangement: %v", p)
break
}
if af != ARNG_2D {
c.ctxt.Diag("invalid arrangement, should be D2: %v", p)
break
}
if index < 0 || index > 63 {
c.ctxt.Diag("illegal offset: %v", p)
}
o1 = c.opirr(p, p.As)
rf := (p.GetFrom3().Reg) & 31
rt := (p.To.Reg) & 31
r := (p.Reg) & 31
o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31)
}
out[0] = o1
out[1] = o2
@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVAND:
return 7<<25 | 1<<21 | 7<<10
case AVBCAX:
return 0xCE<<24 | 1<<21
case AVCMEQ:
return 1<<29 | 0x71<<21 | 0x23<<10
@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVEOR:
return 1<<29 | 0x71<<21 | 7<<10
case AVEOR3:
return 0xCE << 24
case AVORR:
return 7<<25 | 5<<21 | 7<<10
case AVREV16:
return 3<<26 | 2<<24 | 1<<21 | 3<<11
case AVRAX1:
return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10
case AVREV32:
return 11<<26 | 2<<24 | 1<<21 | 1<<11
@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
case AVUSHLL2, AVUXTL2:
return 3<<29 | 15<<24 | 0x29<<10
case AVXAR:
return 0xCE<<24 | 1<<23
}
c.ctxt.Diag("%v: bad irr %v", p, a)