mirror of https://github.com/golang/go.git
cmd/asm: add more SIMD instructions on arm64
This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested by #40725. And since UXTL* are aliases of USHLL*, this CL also merges them into one case. Updates #40725 Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816 Reviewed-on: https://go-review.googlesource.com/c/go/+/253659 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
9b2df72b63
commit
d7ab277eed
|
|
@ -156,6 +156,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
VCMTST V2.B8, V29.B8, V2.B8 // a28f220e
|
||||
VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e
|
||||
VSUB V2.B8, V30.B8, V30.B8 // de87222e
|
||||
VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e
|
||||
VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e
|
||||
VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e
|
||||
VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e
|
||||
VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e
|
||||
VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e
|
||||
VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e
|
||||
VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e
|
||||
VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e
|
||||
VUSHLL $0, V30.B8, V30.H8 // dea7082f
|
||||
VUSHLL $0, V30.H4, V29.S4 // dda7102f
|
||||
VUSHLL $0, V29.S2, V2.D2 // a2a7202f
|
||||
VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f
|
||||
VUSHLL2 $0, V30.H8, V30.S4 // dea7106f
|
||||
VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f
|
||||
VUSHLL $7, V30.B8, V30.H8 // dea70f2f
|
||||
VUSHLL $15, V30.H4, V29.S4 // dda71f2f
|
||||
VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f
|
||||
VBIF V0.B8, V30.B8, V1.B8 // c11fe02e
|
||||
VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e
|
||||
MOVD (R2)(R6.SXTW), R4 // 44c866f8
|
||||
MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
|
||||
MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8
|
||||
|
|
|
|||
|
|
@ -345,4 +345,12 @@ TEXT errors(SB),$0
|
|||
VUXTL V30.D2, V30.H8 // ERROR "operand mismatch"
|
||||
VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch"
|
||||
VUXTL V3.D2, V4.B8 // ERROR "operand mismatch"
|
||||
VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch"
|
||||
VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement"
|
||||
VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch"
|
||||
VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch"
|
||||
VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range"
|
||||
VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range"
|
||||
VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch"
|
||||
VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement"
|
||||
RET
|
||||
|
|
|
|||
|
|
@ -954,6 +954,7 @@ const (
|
|||
AVADD
|
||||
AVADDP
|
||||
AVAND
|
||||
AVBIF
|
||||
AVCMEQ
|
||||
AVCNT
|
||||
AVEOR
|
||||
|
|
@ -986,6 +987,12 @@ const (
|
|||
AVEXT
|
||||
AVRBIT
|
||||
AVUSHR
|
||||
AVUSHLL
|
||||
AVUSHLL2
|
||||
AVUXTL
|
||||
AVUXTL2
|
||||
AVUZP1
|
||||
AVUZP2
|
||||
AVSHL
|
||||
AVSRI
|
||||
AVBSL
|
||||
|
|
@ -994,8 +1001,6 @@ const (
|
|||
AVZIP1
|
||||
AVZIP2
|
||||
AVCMTST
|
||||
AVUXTL
|
||||
AVUXTL2
|
||||
ALAST
|
||||
AB = obj.AJMP
|
||||
ABL = obj.ACALL
|
||||
|
|
|
|||
|
|
@ -461,6 +461,7 @@ var Anames = []string{
|
|||
"VADD",
|
||||
"VADDP",
|
||||
"VAND",
|
||||
"VBIF",
|
||||
"VCMEQ",
|
||||
"VCNT",
|
||||
"VEOR",
|
||||
|
|
@ -493,6 +494,12 @@ var Anames = []string{
|
|||
"VEXT",
|
||||
"VRBIT",
|
||||
"VUSHR",
|
||||
"VUSHLL",
|
||||
"VUSHLL2",
|
||||
"VUXTL",
|
||||
"VUXTL2",
|
||||
"VUZP1",
|
||||
"VUZP2",
|
||||
"VSHL",
|
||||
"VSRI",
|
||||
"VBSL",
|
||||
|
|
@ -501,7 +508,5 @@ var Anames = []string{
|
|||
"VZIP1",
|
||||
"VZIP2",
|
||||
"VCMTST",
|
||||
"VUXTL",
|
||||
"VUXTL2",
|
||||
"LAST",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -480,6 +480,7 @@ var optab = []Optab{
|
|||
{AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0},
|
||||
{AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0},
|
||||
{AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
|
||||
{AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
|
||||
{AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
|
||||
|
||||
/* conditional operations */
|
||||
|
|
@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) {
|
|||
oprangeset(AVBSL, t)
|
||||
oprangeset(AVBIT, t)
|
||||
oprangeset(AVCMTST, t)
|
||||
oprangeset(AVUZP1, t)
|
||||
oprangeset(AVUZP2, t)
|
||||
oprangeset(AVBIF, t)
|
||||
|
||||
case AVADD:
|
||||
oprangeset(AVSUB, t)
|
||||
|
|
@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) {
|
|||
case AVUXTL:
|
||||
oprangeset(AVUXTL2, t)
|
||||
|
||||
case AVUSHLL:
|
||||
oprangeset(AVUSHLL2, t)
|
||||
|
||||
case AVLD1R:
|
||||
oprangeset(AVLD2, t)
|
||||
oprangeset(AVLD2R, t)
|
||||
|
|
@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
rel.Add = 0
|
||||
rel.Type = objabi.R_ARM64_GOTPCREL
|
||||
|
||||
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */
|
||||
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm.<T>, Vn.<T>, Vd.<T> */
|
||||
af := int((p.From.Reg >> 5) & 15)
|
||||
af3 := int((p.Reg >> 5) & 15)
|
||||
at := int((p.To.Reg >> 5) & 15)
|
||||
|
|
@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
}
|
||||
|
||||
switch p.As {
|
||||
case AVORR, AVAND, AVEOR, AVBIT, AVBSL:
|
||||
case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF:
|
||||
if af != ARNG_16B && af != ARNG_8B {
|
||||
c.ctxt.Diag("invalid arrangement: %v", p)
|
||||
}
|
||||
|
|
@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
size = 0
|
||||
case AVBSL:
|
||||
size = 1
|
||||
case AVORR, AVBIT:
|
||||
case AVORR, AVBIT, AVBIF:
|
||||
size = 2
|
||||
case AVFMLA, AVFMLS:
|
||||
if af == ARNG_2D {
|
||||
|
|
@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
|
||||
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
|
||||
|
||||
case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta>
|
||||
af := int((p.From.Reg >> 5) & 15)
|
||||
at := int((p.To.Reg >> 5) & 15)
|
||||
var Q, immh uint32
|
||||
switch at {
|
||||
case ARNG_8H:
|
||||
if af == ARNG_8B {
|
||||
immh = 1
|
||||
Q = 0
|
||||
} else if af == ARNG_16B {
|
||||
immh = 1
|
||||
Q = 1
|
||||
} else {
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
}
|
||||
case ARNG_4S:
|
||||
if af == ARNG_4H {
|
||||
immh = 2
|
||||
Q = 0
|
||||
} else if af == ARNG_8H {
|
||||
immh = 2
|
||||
Q = 1
|
||||
} else {
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
}
|
||||
case ARNG_2D:
|
||||
if af == ARNG_2S {
|
||||
immh = 4
|
||||
Q = 0
|
||||
} else if af == ARNG_4S {
|
||||
immh = 4
|
||||
Q = 1
|
||||
} else {
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
}
|
||||
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
|
||||
o1 = c.opirr(p, p.As)
|
||||
rf := p.Reg
|
||||
af := uint8((p.Reg >> 5) & 15)
|
||||
at := uint8((p.To.Reg >> 5) & 15)
|
||||
shift := int(p.From.Offset)
|
||||
if p.As == AVUXTL || p.As == AVUXTL2 {
|
||||
rf = p.From.Reg
|
||||
af = uint8((p.From.Reg >> 5) & 15)
|
||||
shift = 0
|
||||
}
|
||||
|
||||
pack := func(q, x, y uint8) uint32 {
|
||||
return uint32(q)<<16 | uint32(x)<<8 | uint32(y)
|
||||
}
|
||||
|
||||
var Q uint8 = uint8(o1>>30) & 1
|
||||
var immh, width uint8
|
||||
switch pack(Q, af, at) {
|
||||
case pack(0, ARNG_8B, ARNG_8H):
|
||||
immh, width = 1, 8
|
||||
case pack(1, ARNG_16B, ARNG_8H):
|
||||
immh, width = 1, 8
|
||||
case pack(0, ARNG_4H, ARNG_4S):
|
||||
immh, width = 2, 16
|
||||
case pack(1, ARNG_8H, ARNG_4S):
|
||||
immh, width = 2, 16
|
||||
case pack(0, ARNG_2S, ARNG_2D):
|
||||
immh, width = 4, 32
|
||||
case pack(1, ARNG_4S, ARNG_2D):
|
||||
immh, width = 4, 32
|
||||
default:
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
}
|
||||
|
||||
if p.As == AVUXTL && Q == 1 {
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
if !(0 <= shift && shift <= int(width-1)) {
|
||||
c.ctxt.Diag("shift amount out of range: %v\n", p)
|
||||
}
|
||||
if p.As == AVUXTL2 && Q == 0 {
|
||||
c.ctxt.Diag("operand mismatch: %v\n", p)
|
||||
}
|
||||
|
||||
o1 = c.oprrr(p, p.As)
|
||||
rf := int((p.From.Reg) & 31)
|
||||
rt := int((p.To.Reg) & 31)
|
||||
o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31)
|
||||
o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31)
|
||||
}
|
||||
out[0] = o1
|
||||
out[1] = o2
|
||||
|
|
@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
|
|||
case AVLD2R, AVLD4R:
|
||||
return 0xD<<24 | 3<<21
|
||||
|
||||
case AVBIF:
|
||||
return 1<<29 | 7<<25 | 7<<21 | 7<<10
|
||||
|
||||
case AVBIT:
|
||||
return 1<<29 | 0x75<<21 | 7<<10
|
||||
|
||||
|
|
@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
|
|||
case AVCMTST:
|
||||
return 0xE<<24 | 1<<21 | 0x23<<10
|
||||
|
||||
case AVUXTL, AVUXTL2:
|
||||
return 0x5e<<23 | 0x29<<10
|
||||
case AVUZP1:
|
||||
return 7<<25 | 3<<11
|
||||
|
||||
case AVUZP2:
|
||||
return 7<<25 | 1<<14 | 3<<11
|
||||
}
|
||||
|
||||
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
|
||||
|
|
@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
|
|||
|
||||
case AVSRI:
|
||||
return 0x5E<<23 | 17<<10
|
||||
|
||||
case AVUSHLL, AVUXTL:
|
||||
return 1<<29 | 15<<24 | 0x29<<10
|
||||
|
||||
case AVUSHLL2, AVUXTL2:
|
||||
return 3<<29 | 15<<24 | 0x29<<10
|
||||
}
|
||||
|
||||
c.ctxt.Diag("%v: bad irr %v", p, a)
|
||||
|
|
|
|||
Loading…
Reference in New Issue