cmd/asm,cmd/compile: generate less instructions for most 32 bit constant adds on ppc64x

For GOPPC64 < 10 targets, most large 32 bit constants (those
exceeding int16 capacity) can be added using two instructions
instead of 3.

This cannot be done for values greater than 0x7FFF7FFF, so this
must be done during asm preprocessing as the optab matching
rules cannot differentiate this special case.

Likewise, constants 0x8000 <= x < 0x10000 are not converted. The
assembler currently generates 2 instructions sequences for these
constants.

Change-Id: I1ccc839c6c28fc32f15d286b2e52e2d22a2a06d4
Reviewed-on: https://go-review.googlesource.com/c/go/+/568116
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Paul E. Murphy 2024-02-16 13:29:16 -06:00 committed by Paul Murphy
parent a46ecdca36
commit 6e5398bad1
3 changed files with 67 additions and 7 deletions

View File

@ -192,8 +192,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
// this is OK since r0 == $0, but the latter is preferred.
ADD $0, R6, R5 // 7ca60214
ADD $1234567, R5 // 641f001263ffd6877cbf2a14 or 0600001238a5d687
ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 or 0600001238c5d687
//TODO: the assembler rewrites these into ADDIS $19, R5, Rx and ADD $-10617, Rx, Rx, but the test only sees the first ADDIS
ADD $1234567, R5 // 3ca50013 or 0600001238a5d687
ADD $1234567, R5, R6 // 3cc50013 or 0600001238c5d687
ADDEX R3, R5, $3, R6 // 7cc32f54
ADDEX R3, $3, R5, R6 // 7cc32f54
ADDIS $8, R3 // 3c630008

View File

@ -35,6 +35,7 @@ import (
"cmd/internal/src"
"cmd/internal/sys"
"internal/abi"
"internal/buildcfg"
"log"
"math/bits"
)
@ -203,17 +204,48 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
}
case ASUB:
if p.From.Type == obj.TYPE_CONST {
p.From.Offset = -p.From.Offset
p.As = AADD
if p.From.Type != obj.TYPE_CONST {
break
}
// Rewrite SUB $const,... into ADD $-const,...
p.From.Offset = -p.From.Offset
p.As = AADD
// This is now an ADD opcode, try simplifying it below.
fallthrough
// Rewrite ADD/OR/XOR/ANDCC $const,... forms into ADDIS/ORIS/XORIS/ANDISCC
case AADD:
// AADD can encode signed 34b values, ensure it is a valid signed 32b integer too.
if p.From.Type == obj.TYPE_CONST && p.From.Offset&0xFFFF == 0 && int64(int32(p.From.Offset)) == p.From.Offset && p.From.Offset != 0 {
// Don't rewrite if this is not adding a constant value, or is not an int32
if p.From.Type != obj.TYPE_CONST || p.From.Offset == 0 || int64(int32(p.From.Offset)) != p.From.Offset {
break
}
if p.From.Offset&0xFFFF == 0 {
// The constant can be added using ADDIS
p.As = AADDIS
p.From.Offset >>= 16
} else if buildcfg.GOPPC64 >= 10 {
// Let the assembler generate paddi for large constants.
break
} else if (p.From.Offset < -0x8000 && int64(int32(p.From.Offset)) == p.From.Offset) || (p.From.Offset > 0xFFFF && p.From.Offset < 0x7FFF8000) {
// For a constant x, 0xFFFF (UINT16_MAX) < x < 0x7FFF8000 or -0x80000000 (INT32_MIN) <= x < -0x8000 (INT16_MIN)
// This is not done for 0x7FFF < x < 0x10000; the assembler will generate a slightly faster instruction sequence.
//
// The constant x can be rewritten as ADDIS + ADD as follows:
// ADDIS $x>>16 + (x>>15)&1, rX, rY
// ADD $int64(int16(x)), rY, rY
// The range is slightly asymmetric as 0x7FFF8000 and above overflow the sign bit, whereas for
// negative values, this would happen with constant values between -1 and -32768 which can
// assemble into a single addi.
is := p.From.Offset>>16 + (p.From.Offset>>15)&1
i := int64(int16(p.From.Offset))
p.As = AADDIS
p.From.Offset = is
q := obj.Appendp(p, c.newprog)
q.As = AADD
q.From.SetConst(i)
q.Reg = p.To.Reg
q.To = p.To
p = q
}
case AOR:
if p.From.Type == obj.TYPE_CONST && uint64(p.From.Offset)&0xFFFFFFFF0000FFFF == 0 && p.From.Offset != 0 {

View File

@ -23,6 +23,32 @@ func AddLargeConst(a uint64, out []uint64) {
// ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
// ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
out[1] = a + 0xFFFFFFFE00000000
// ppc64x/power10:"ADD\t[$]1234567,"
// ppc64x/power9:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
// ppc64x/power8:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
out[2] = a + 1234567
// ppc64x/power10:"ADD\t[$]-1234567,"
// ppc64x/power9:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
// ppc64x/power8:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
out[3] = a - 1234567
// ppc64x/power10:"ADD\t[$]2147450879,"
// ppc64x/power9:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
// ppc64x/power8:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
out[4] = a + 0x7FFF7FFF
// ppc64x/power10:"ADD\t[$]-2147483647,"
// ppc64x/power9:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
// ppc64x/power8:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
out[5] = a - 2147483647
// ppc64x:"ADDIS\t[$]-32768,", ^"ADD\t"
out[6] = a - 2147483648
// ppc64x:"ADD\t[$]2147450880,", ^"ADDIS\t"
out[7] = a + 0x7FFF8000
// ppc64x:"ADD\t[$]-32768,", ^"ADDIS\t"
out[8] = a - 32768
// ppc64x/power10:"ADD\t[$]-32769,"
// ppc64x/power9:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
// ppc64x/power8:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
out[9] = a - 32769
}
// ----------------- //