mirror of https://github.com/golang/go.git
cmd/internal/obj/ppc64: add support for DQ-form instructions
POWER9 (ISA 3.0) introduced a new format of load/store instructions to implement indexed load/store quadword, using an immediate value instead of a register index. This change adds support for this new instruction encoding and adds the new load/store quadword instructions (lxv/stxv) to the assembler. This change also adds the missing XX1-form loads/stores (halfword and byte) included in ISA 3.0. Change-Id: Ibcdf53c342d7a352d64a9403c2fe7b25be9c3b24 Reviewed-on: https://go-review.googlesource.com/c/go/+/200399 Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
d2c039fb21
commit
6b67f7d65d
|
|
@ -1011,20 +1011,34 @@ label1:
|
||||||
// <MNEMONIC> (RB)(RA*1),XT produces
|
// <MNEMONIC> (RB)(RA*1),XT produces
|
||||||
// <mnemonic> XT,RA,RB
|
// <mnemonic> XT,RA,RB
|
||||||
LXVD2X (R1)(R2*1), VS0
|
LXVD2X (R1)(R2*1), VS0
|
||||||
LXVDSX (R1)(R2*1), VS0
|
|
||||||
LXVW4X (R1)(R2*1), VS0
|
LXVW4X (R1)(R2*1), VS0
|
||||||
|
LXVH8X (R1)(R2*1), VS0
|
||||||
|
LXVB16X (R1)(R2*1), VS0
|
||||||
|
LXVDSX (R1)(R2*1), VS0
|
||||||
LXSDX (R1)(R2*1), VS0
|
LXSDX (R1)(R2*1), VS0
|
||||||
LXSIWAX (R1)(R2*1), VS0
|
LXSIWAX (R1)(R2*1), VS0
|
||||||
LXSIWZX (R1)(R2*1), VS0
|
LXSIWZX (R1)(R2*1), VS0
|
||||||
|
|
||||||
|
// VSX load, DQ-form
|
||||||
|
// <MNEMONIC> DQ(RA), XS produces
|
||||||
|
// <mnemonic> XS, DQ(RA)
|
||||||
|
LXV 32752(R1), VS0
|
||||||
|
|
||||||
// VSX store, XX1-form
|
// VSX store, XX1-form
|
||||||
// <MNEMONIC> XS,(RB)(RA*1) produces
|
// <MNEMONIC> XS,(RB)(RA*1) produces
|
||||||
// <mnemonic> XS,RA,RB
|
// <mnemonic> XS,RA,RB
|
||||||
STXVD2X VS63, (R1)(R2*1)
|
STXVD2X VS63, (R1)(R2*1)
|
||||||
STXVW4X VS63, (R1)(R2*1)
|
STXVW4X VS63, (R1)(R2*1)
|
||||||
|
STXVH8X VS63, (R1)(R2*1)
|
||||||
|
STXVB16X VS63, (R1)(R2*1)
|
||||||
STXSDX VS63, (R1)(R2*1)
|
STXSDX VS63, (R1)(R2*1)
|
||||||
STXSIWX VS63, (R1)(R2*1)
|
STXSIWX VS63, (R1)(R2*1)
|
||||||
|
|
||||||
|
// VSX store, DQ-form
|
||||||
|
// <MNEMONIC> DQ(RA), XS produces
|
||||||
|
// <mnemonic> XS, DQ(RA)
|
||||||
|
STXV VS63, -32752(R1)
|
||||||
|
|
||||||
// VSX move from VSR, XX1-form
|
// VSX move from VSR, XX1-form
|
||||||
// <MNEMONIC> XS,RA produces
|
// <MNEMONIC> XS,RA produces
|
||||||
// <mnemonic> RA,XS
|
// <mnemonic> RA,XS
|
||||||
|
|
|
||||||
|
|
@ -935,11 +935,15 @@ const (
|
||||||
/* VSX */
|
/* VSX */
|
||||||
ALXV
|
ALXV
|
||||||
ALXVD2X
|
ALXVD2X
|
||||||
ALXVDSX
|
|
||||||
ALXVW4X
|
ALXVW4X
|
||||||
|
ALXVH8X
|
||||||
|
ALXVB16X
|
||||||
|
ALXVDSX
|
||||||
ASTXV
|
ASTXV
|
||||||
ASTXVD2X
|
ASTXVD2X
|
||||||
ASTXVW4X
|
ASTXVW4X
|
||||||
|
ASTXVH8X
|
||||||
|
ASTXVB16X
|
||||||
ALXS
|
ALXS
|
||||||
ALXSDX
|
ALXSDX
|
||||||
ASTXS
|
ASTXS
|
||||||
|
|
|
||||||
|
|
@ -523,11 +523,15 @@ var Anames = []string{
|
||||||
"VMRGOW",
|
"VMRGOW",
|
||||||
"LXV",
|
"LXV",
|
||||||
"LXVD2X",
|
"LXVD2X",
|
||||||
"LXVDSX",
|
|
||||||
"LXVW4X",
|
"LXVW4X",
|
||||||
|
"LXVH8X",
|
||||||
|
"LXVB16X",
|
||||||
|
"LXVDSX",
|
||||||
"STXV",
|
"STXV",
|
||||||
"STXVD2X",
|
"STXVD2X",
|
||||||
"STXVW4X",
|
"STXVW4X",
|
||||||
|
"STXVH8X",
|
||||||
|
"STXVB16X",
|
||||||
"LXS",
|
"LXS",
|
||||||
"LXSDX",
|
"LXSDX",
|
||||||
"STXS",
|
"STXS",
|
||||||
|
|
|
||||||
|
|
@ -476,10 +476,12 @@ var optab = []Optab{
|
||||||
{AVSHASIGMA, C_ANDCON, C_VREG, C_ANDCON, C_VREG, 82, 4, 0}, /* vector SHA sigma, vx-form */
|
{AVSHASIGMA, C_ANDCON, C_VREG, C_ANDCON, C_VREG, 82, 4, 0}, /* vector SHA sigma, vx-form */
|
||||||
|
|
||||||
/* VSX vector load */
|
/* VSX vector load */
|
||||||
{ALXV, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx vector load, xx1-form */
|
{ALXVD2X, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx vector load, xx1-form */
|
||||||
|
{ALXV, C_SOREG, C_NONE, C_NONE, C_VSREG, 96, 4, 0}, /* vsx vector load, dq-form */
|
||||||
|
|
||||||
/* VSX vector store */
|
/* VSX vector store */
|
||||||
{ASTXV, C_VSREG, C_NONE, C_NONE, C_SOREG, 86, 4, 0}, /* vsx vector store, xx1-form */
|
{ASTXVD2X, C_VSREG, C_NONE, C_NONE, C_SOREG, 86, 4, 0}, /* vsx vector store, xx1-form */
|
||||||
|
{ASTXV, C_VSREG, C_NONE, C_NONE, C_SOREG, 97, 4, 0}, /* vsx vector store, dq-form */
|
||||||
|
|
||||||
/* VSX scalar load */
|
/* VSX scalar load */
|
||||||
{ALXS, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx scalar load, xx1-form */
|
{ALXS, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx scalar load, xx1-form */
|
||||||
|
|
@ -1542,14 +1544,22 @@ func buildop(ctxt *obj.Link) {
|
||||||
opset(AVSHASIGMAW, r0)
|
opset(AVSHASIGMAW, r0)
|
||||||
opset(AVSHASIGMAD, r0)
|
opset(AVSHASIGMAD, r0)
|
||||||
|
|
||||||
case ALXV: /* lxvd2x, lxvdsx, lxvw4x */
|
case ALXVD2X: /* lxvd2x, lxvdsx, lxvw4x, lxvh8x, lxvb16x */
|
||||||
opset(ALXVD2X, r0)
|
|
||||||
opset(ALXVDSX, r0)
|
opset(ALXVDSX, r0)
|
||||||
opset(ALXVW4X, r0)
|
opset(ALXVW4X, r0)
|
||||||
|
opset(ALXVH8X, r0)
|
||||||
|
opset(ALXVB16X, r0)
|
||||||
|
|
||||||
case ASTXV: /* stxvd2x, stxvdsx, stxvw4x */
|
case ALXV: /* lxv */
|
||||||
opset(ASTXVD2X, r0)
|
opset(ALXV, r0)
|
||||||
|
|
||||||
|
case ASTXVD2X: /* stxvd2x, stxvdsx, stxvw4x, stxvh8x, stxvb16x */
|
||||||
opset(ASTXVW4X, r0)
|
opset(ASTXVW4X, r0)
|
||||||
|
opset(ASTXVH8X, r0)
|
||||||
|
opset(ASTXVB16X, r0)
|
||||||
|
|
||||||
|
case ASTXV: /* stxv */
|
||||||
|
opset(ASTXV, r0)
|
||||||
|
|
||||||
case ALXS: /* lxsdx */
|
case ALXS: /* lxsdx */
|
||||||
opset(ALXSDX, r0)
|
opset(ALXSDX, r0)
|
||||||
|
|
@ -1981,6 +1991,10 @@ func OPVXX4(o uint32, xo uint32, oe uint32) uint32 {
|
||||||
return o<<26 | xo<<4 | oe<<11
|
return o<<26 | xo<<4 | oe<<11
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OPDQ(o uint32, xo uint32, oe uint32) uint32 {
|
||||||
|
return o<<26 | xo | oe<<4
|
||||||
|
}
|
||||||
|
|
||||||
func OPVX(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
|
func OPVX(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
|
||||||
return o<<26 | xo | oe<<11 | rc&1
|
return o<<26 | xo | oe<<11 | rc&1
|
||||||
}
|
}
|
||||||
|
|
@ -2080,6 +2094,21 @@ func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
|
||||||
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
|
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* DQ-form, VSR register, register + offset operands */
|
||||||
|
func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
|
||||||
|
/* For the DQ-form encodings, we need the VSX register number to be exactly */
|
||||||
|
/* between 0-63, so we can properly set the SX bit. */
|
||||||
|
r := d - REG_VS0
|
||||||
|
/* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
|
||||||
|
/* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
|
||||||
|
/* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
|
||||||
|
/* not -2047 or 2047), so 'b' needs to be adjusted to the expected 12-bit DQ value. Bear in mind that */
|
||||||
|
/* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
|
||||||
|
/* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
|
||||||
|
dq := b >> 4
|
||||||
|
return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2
|
||||||
|
}
|
||||||
|
|
||||||
/* Z23-form, 3-register operands + CY field */
|
/* Z23-form, 3-register operands + CY field */
|
||||||
func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
|
func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
|
||||||
return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7
|
return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7
|
||||||
|
|
@ -3686,6 +3715,24 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||||
rel.Siz = 8
|
rel.Siz = 8
|
||||||
rel.Sym = p.From.Sym
|
rel.Sym = p.From.Sym
|
||||||
rel.Type = objabi.R_ADDRPOWER_TOCREL_DS
|
rel.Type = objabi.R_ADDRPOWER_TOCREL_DS
|
||||||
|
|
||||||
|
case 96: /* VSX load, DQ-form */
|
||||||
|
/* reg imm reg */
|
||||||
|
/* operand order: (RA)(DQ), XT */
|
||||||
|
dq := int16(c.regoff(&p.From))
|
||||||
|
if (dq & 15) != 0 {
|
||||||
|
c.ctxt.Diag("invalid offset for DQ form load/store %v", dq)
|
||||||
|
}
|
||||||
|
o1 = AOP_DQ(c.opload(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(dq))
|
||||||
|
|
||||||
|
case 97: /* VSX store, DQ-form */
|
||||||
|
/* reg imm reg */
|
||||||
|
/* operand order: XT, (RA)(DQ) */
|
||||||
|
dq := int16(c.regoff(&p.To))
|
||||||
|
if (dq & 15) != 0 {
|
||||||
|
c.ctxt.Diag("invalid offset for DQ form load/store %v", dq)
|
||||||
|
}
|
||||||
|
o1 = AOP_DQ(c.opstore(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(dq))
|
||||||
}
|
}
|
||||||
|
|
||||||
out[0] = o1
|
out[0] = o1
|
||||||
|
|
@ -4888,6 +4935,8 @@ func (c *ctxt9) opload(a obj.As) uint32 {
|
||||||
return OPVCC(33, 0, 0, 0) /* lwzu */
|
return OPVCC(33, 0, 0, 0) /* lwzu */
|
||||||
case AMOVW:
|
case AMOVW:
|
||||||
return OPVCC(58, 0, 0, 0) | 1<<1 /* lwa */
|
return OPVCC(58, 0, 0, 0) | 1<<1 /* lwa */
|
||||||
|
case ALXV:
|
||||||
|
return OPDQ(61, 1, 0) /* lxv - ISA v3.00 */
|
||||||
|
|
||||||
/* no AMOVWU */
|
/* no AMOVWU */
|
||||||
case AMOVB, AMOVBZ:
|
case AMOVB, AMOVBZ:
|
||||||
|
|
@ -5007,14 +5056,16 @@ func (c *ctxt9) oploadx(a obj.As) uint32 {
|
||||||
/* ISA 2.06 enables these for POWER7. */
|
/* ISA 2.06 enables these for POWER7. */
|
||||||
case ALXVD2X:
|
case ALXVD2X:
|
||||||
return OPVXX1(31, 844, 0) /* lxvd2x - v2.06 */
|
return OPVXX1(31, 844, 0) /* lxvd2x - v2.06 */
|
||||||
case ALXVDSX:
|
|
||||||
return OPVXX1(31, 332, 0) /* lxvdsx - v2.06 */
|
|
||||||
case ALXVW4X:
|
case ALXVW4X:
|
||||||
return OPVXX1(31, 780, 0) /* lxvw4x - v2.06 */
|
return OPVXX1(31, 780, 0) /* lxvw4x - v2.06 */
|
||||||
|
case ALXVH8X:
|
||||||
|
return OPVXX1(31, 812, 0) /* lxvh8x - v3.00 */
|
||||||
|
case ALXVB16X:
|
||||||
|
return OPVXX1(31, 876, 0) /* lxvb16x - v3.00 */
|
||||||
|
case ALXVDSX:
|
||||||
|
return OPVXX1(31, 332, 0) /* lxvdsx - v2.06 */
|
||||||
case ALXSDX:
|
case ALXSDX:
|
||||||
return OPVXX1(31, 588, 0) /* lxsdx - v2.06 */
|
return OPVXX1(31, 588, 0) /* lxsdx - v2.06 */
|
||||||
|
|
||||||
case ALXSIWAX:
|
case ALXSIWAX:
|
||||||
return OPVXX1(31, 76, 0) /* lxsiwax - v2.07 */
|
return OPVXX1(31, 76, 0) /* lxsiwax - v2.07 */
|
||||||
case ALXSIWZX:
|
case ALXSIWZX:
|
||||||
|
|
@ -5065,6 +5116,8 @@ func (c *ctxt9) opstore(a obj.As) uint32 {
|
||||||
return OPVCC(62, 0, 0, 0) /* std */
|
return OPVCC(62, 0, 0, 0) /* std */
|
||||||
case AMOVDU:
|
case AMOVDU:
|
||||||
return OPVCC(62, 0, 0, 1) /* stdu */
|
return OPVCC(62, 0, 0, 1) /* stdu */
|
||||||
|
case ASTXV:
|
||||||
|
return OPDQ(61, 5, 0) /* stxv */
|
||||||
}
|
}
|
||||||
|
|
||||||
c.ctxt.Diag("unknown store opcode %v", a)
|
c.ctxt.Diag("unknown store opcode %v", a)
|
||||||
|
|
@ -5145,12 +5198,17 @@ func (c *ctxt9) opstorex(a obj.As) uint32 {
|
||||||
return OPVXX1(31, 972, 0) /* stxvd2x - v2.06 */
|
return OPVXX1(31, 972, 0) /* stxvd2x - v2.06 */
|
||||||
case ASTXVW4X:
|
case ASTXVW4X:
|
||||||
return OPVXX1(31, 908, 0) /* stxvw4x - v2.06 */
|
return OPVXX1(31, 908, 0) /* stxvw4x - v2.06 */
|
||||||
|
case ASTXVH8X:
|
||||||
|
return OPVXX1(31, 940, 0) /* stxvh8x - v3.00 */
|
||||||
|
case ASTXVB16X:
|
||||||
|
return OPVXX1(31, 1004, 0) /* stxvb16x - v3.00 */
|
||||||
|
|
||||||
case ASTXSDX:
|
case ASTXSDX:
|
||||||
return OPVXX1(31, 716, 0) /* stxsdx - v2.06 */
|
return OPVXX1(31, 716, 0) /* stxsdx - v2.06 */
|
||||||
|
|
||||||
case ASTXSIWX:
|
case ASTXSIWX:
|
||||||
return OPVXX1(31, 140, 0) /* stxsiwx - v2.07 */
|
return OPVXX1(31, 140, 0) /* stxsiwx - v2.07 */
|
||||||
|
|
||||||
/* End of vector scalar instructions */
|
/* End of vector scalar instructions */
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue