mirror of https://github.com/golang/go.git
cmd/asm: Add SHA3 hardware instructions for ARM64
Armv8.2-SHA introduced four SHA3-related instructions EOR3 <Vd>.16B, <Vn>.16B, <Vm>.16B, <Va>.16B RAX1 <Vd>.2D, <Vn>.2D, <Vm>.2D XAR <Vd>.2D, <Vn>.2D, <Vm>.2D, #<imm6> BCAX <Vd>.16B, <Vn>.16B, <Vm>.16B, <Va>.16B We convert them into Go asm style as: VEOR3 <Va>.B16, <Vm>.B16, <Vn>.B16, <Vd>.B16 VRAX1 <Vm>.D2, <Vn>.D2, <Vd>.D2 VXAR $imm6, <Vm>.D2, <Vn>.D2, <Vd>.D2 VBCAX <Va>.B16, <Vm>.B16, <Vn>.B16, <Vd>.B16 Armv8 Reference Manual: * EOR3 (Three-way Exclusive OR) on C7.2.42 * RAX1 (Rotate and Exclusive OR) on C7.2.217 * XAR (Exclusive OR and Rotate) on C7.2.401 * BCAX (Bit Clear and Exclusive OR) on C7.2.12 Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a Reviewed-on: https://go-review.googlesource.com/c/go/+/180757 Run-TryBot: Meng Zhuo <mzh@golangcn.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Trust: Emmanuel Odeke <emm.odeke@gmail.com>
This commit is contained in:
parent
d317ba5d44
commit
3036b76df0
|
|
@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
SHA512H2 V4.D2, V3, V2 // 628464ce
|
||||
SHA512SU0 V9.D2, V8.D2 // 2881c0ce
|
||||
SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce
|
||||
VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace
|
||||
VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce
|
||||
VADDV V0.S4, V0 // 00b8b14e
|
||||
VMOVI $82, V0.B16 // 40e6024f
|
||||
VUADDLV V6.B16, V6 // c638306e
|
||||
|
|
@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e
|
||||
VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e
|
||||
VTBL V3.B8, [V27.B16], V8.B8 // 6803030e
|
||||
VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce
|
||||
VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce
|
||||
VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e
|
||||
VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e
|
||||
VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e
|
||||
|
|
|
|||
|
|
@ -958,9 +958,11 @@ const (
|
|||
AVADDP
|
||||
AVAND
|
||||
AVBIF
|
||||
AVBCAX
|
||||
AVCMEQ
|
||||
AVCNT
|
||||
AVEOR
|
||||
AVEOR3
|
||||
AVMOV
|
||||
AVLD1
|
||||
AVLD2
|
||||
|
|
@ -989,6 +991,7 @@ const (
|
|||
AVPMULL2
|
||||
AVEXT
|
||||
AVRBIT
|
||||
AVRAX1
|
||||
AVUSHR
|
||||
AVUSHLL
|
||||
AVUSHLL2
|
||||
|
|
@ -1001,6 +1004,7 @@ const (
|
|||
AVBSL
|
||||
AVBIT
|
||||
AVTBL
|
||||
AVXAR
|
||||
AVZIP1
|
||||
AVZIP2
|
||||
AVCMTST
|
||||
|
|
|
|||
|
|
@ -464,9 +464,11 @@ var Anames = []string{
|
|||
"VADDP",
|
||||
"VAND",
|
||||
"VBIF",
|
||||
"VBCAX",
|
||||
"VCMEQ",
|
||||
"VCNT",
|
||||
"VEOR",
|
||||
"VEOR3",
|
||||
"VMOV",
|
||||
"VLD1",
|
||||
"VLD2",
|
||||
|
|
@ -495,6 +497,7 @@ var Anames = []string{
|
|||
"VPMULL2",
|
||||
"VEXT",
|
||||
"VRBIT",
|
||||
"VRAX1",
|
||||
"VUSHR",
|
||||
"VUSHLL",
|
||||
"VUSHLL2",
|
||||
|
|
@ -507,6 +510,7 @@ var Anames = []string{
|
|||
"VBSL",
|
||||
"VBIT",
|
||||
"VTBL",
|
||||
"VXAR",
|
||||
"VZIP1",
|
||||
"VZIP2",
|
||||
"VCMTST",
|
||||
|
|
|
|||
|
|
@ -843,6 +843,8 @@ var optab = []Optab{
|
|||
{ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0},
|
||||
{AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0},
|
||||
{AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0},
|
||||
{AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0},
|
||||
{AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0},
|
||||
|
||||
{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0},
|
||||
{obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0},
|
||||
|
|
@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) {
|
|||
|
||||
case AVADD:
|
||||
oprangeset(AVSUB, t)
|
||||
oprangeset(AVRAX1, t)
|
||||
|
||||
case AAESD:
|
||||
oprangeset(AAESE, t)
|
||||
|
|
@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) {
|
|||
oprangeset(AVLD4, t)
|
||||
oprangeset(AVLD4R, t)
|
||||
|
||||
case AVEOR3:
|
||||
oprangeset(AVBCAX, t)
|
||||
|
||||
case ASHA1H,
|
||||
AVCNT,
|
||||
AVMOV,
|
||||
|
|
@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) {
|
|||
AVDUP,
|
||||
AVMOVI,
|
||||
APRFM,
|
||||
AVEXT:
|
||||
AVEXT,
|
||||
AVXAR:
|
||||
break
|
||||
|
||||
case obj.ANOP,
|
||||
|
|
@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
rel.Add = 0
|
||||
rel.Type = objabi.R_ARM64_GOTPCREL
|
||||
|
||||
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm.<T>, Vn.<T>, Vd.<T> */
|
||||
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm.<T>, Vn.<T>, Vd.<T> */
|
||||
af := int((p.From.Reg >> 5) & 15)
|
||||
af3 := int((p.Reg >> 5) & 15)
|
||||
at := int((p.To.Reg >> 5) & 15)
|
||||
|
|
@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
} else {
|
||||
size = 0
|
||||
}
|
||||
case AVRAX1:
|
||||
if af != ARNG_2D {
|
||||
c.ctxt.Diag("invalid arrangement: %v", p)
|
||||
}
|
||||
size = 0
|
||||
Q = 0
|
||||
}
|
||||
|
||||
o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31)
|
||||
|
|
@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
c.ctxt.Diag("shift amount out of range: %v\n", p)
|
||||
}
|
||||
o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31)
|
||||
case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */
|
||||
ta := (p.From.Reg >> 5) & 15
|
||||
tm := (p.Reg >> 5) & 15
|
||||
td := (p.To.Reg >> 5) & 15
|
||||
tn := ((p.GetFrom3().Reg) >> 5) & 15
|
||||
|
||||
if ta != tm || ta != tn || ta != td || ta != ARNG_16B {
|
||||
c.ctxt.Diag("invalid arrangement: %v", p)
|
||||
break
|
||||
}
|
||||
|
||||
o1 = c.oprrr(p, p.As)
|
||||
ra := int(p.From.Reg)
|
||||
rm := int(p.Reg)
|
||||
rn := int(p.GetFrom3().Reg)
|
||||
rd := int(p.To.Reg)
|
||||
o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31
|
||||
|
||||
case 104: /* vxar $imm4, Vm.<T>, Vn.<T>, Vd.<T> */
|
||||
af := ((p.GetFrom3().Reg) >> 5) & 15
|
||||
at := (p.To.Reg >> 5) & 15
|
||||
a := (p.Reg >> 5) & 15
|
||||
index := int(p.From.Offset)
|
||||
|
||||
if af != a || af != at {
|
||||
c.ctxt.Diag("invalid arrangement: %v", p)
|
||||
break
|
||||
}
|
||||
|
||||
if af != ARNG_2D {
|
||||
c.ctxt.Diag("invalid arrangement, should be D2: %v", p)
|
||||
break
|
||||
}
|
||||
|
||||
if index < 0 || index > 63 {
|
||||
c.ctxt.Diag("illegal offset: %v", p)
|
||||
}
|
||||
|
||||
o1 = c.opirr(p, p.As)
|
||||
rf := (p.GetFrom3().Reg) & 31
|
||||
rt := (p.To.Reg) & 31
|
||||
r := (p.Reg) & 31
|
||||
|
||||
o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31)
|
||||
|
||||
}
|
||||
out[0] = o1
|
||||
out[1] = o2
|
||||
|
|
@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
|
|||
case AVAND:
|
||||
return 7<<25 | 1<<21 | 7<<10
|
||||
|
||||
case AVBCAX:
|
||||
return 0xCE<<24 | 1<<21
|
||||
|
||||
case AVCMEQ:
|
||||
return 1<<29 | 0x71<<21 | 0x23<<10
|
||||
|
||||
|
|
@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
|
|||
case AVEOR:
|
||||
return 1<<29 | 0x71<<21 | 7<<10
|
||||
|
||||
case AVEOR3:
|
||||
return 0xCE << 24
|
||||
|
||||
case AVORR:
|
||||
return 7<<25 | 5<<21 | 7<<10
|
||||
|
||||
case AVREV16:
|
||||
return 3<<26 | 2<<24 | 1<<21 | 3<<11
|
||||
|
||||
case AVRAX1:
|
||||
return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10
|
||||
|
||||
case AVREV32:
|
||||
return 11<<26 | 2<<24 | 1<<21 | 1<<11
|
||||
|
||||
|
|
@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
|
|||
|
||||
case AVUSHLL2, AVUXTL2:
|
||||
return 3<<29 | 15<<24 | 0x29<<10
|
||||
case AVXAR:
|
||||
return 0xCE<<24 | 1<<23
|
||||
}
|
||||
|
||||
c.ctxt.Diag("%v: bad irr %v", p, a)
|
||||
|
|
|
|||
Loading…
Reference in New Issue