cmd/compile: use jump table on ARM64

Following CL 357330, use jump tables on ARM64.

name                         old time/op  new time/op  delta
Switch8Predictable-4         3.41ns ± 0%  3.21ns ± 0%     ~     (p=0.079 n=4+5)
Switch8Unpredictable-4       12.0ns ± 0%   9.5ns ± 0%  -21.17%  (p=0.000 n=5+4)
Switch32Predictable-4        3.06ns ± 0%  2.82ns ± 0%   -7.78%  (p=0.008 n=5+5)
Switch32Unpredictable-4      13.3ns ± 0%   9.5ns ± 0%  -28.87%  (p=0.016 n=4+5)
SwitchStringPredictable-4    3.71ns ± 0%  3.21ns ± 0%  -13.43%  (p=0.000 n=5+4)
SwitchStringUnpredictable-4  14.8ns ± 0%  15.1ns ± 0%   +2.37%  (p=0.008 n=5+5)

Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6
Reviewed-on: https://go-review.googlesource.com/c/go/+/403979
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Cherry Mui 2022-04-25 17:18:19 -04:00
parent ba8310cf29
commit 540f8c2b50
8 changed files with 87 additions and 35 deletions

View File

@ -99,21 +99,22 @@ func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int
return p
}
// generate the memory operand for the indexed load/store instructions
func genIndexedOperand(v *ssa.Value) obj.Addr {
// generate the memory operand for the indexed load/store instructions.
// base and idx are registers.
func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
switch op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
mop.Index = arm64.REG_LSL | 3<<5 | idx&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
mop.Index = arm64.REG_LSL | 2<<5 | idx&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
mop.Index = arm64.REG_LSL | 1<<5 | idx&31
default: // not shifted
mop.Index = v.Args[1].Reg()
mop.Index = idx
}
return mop
}
@ -465,7 +466,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64FMOVDloadidx8,
ssa.OpARM64FMOVSloadidx4:
p := s.Prog(v.Op.Asm())
p.From = genIndexedOperand(v)
p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDAR,
@ -504,7 +505,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVDstoreidx8,
ssa.OpARM64FMOVDstoreidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
case ssa.OpARM64STP:
@ -533,7 +534,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64MOVDstorezeroidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
case ssa.OpARM64MOVQstorezero:
@ -1325,6 +1326,20 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
s.CombJump(b, next, &leJumps)
case ssa.BlockARM64GTnoov:
s.CombJump(b, next, &gtJumps)
case ssa.BlockARM64JUMPTABLE:
// MOVD (TABLE)(IDX<<3), Rtmp
// JMP (Rtmp)
p := s.Prog(arm64.AMOVD)
p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REGTMP
p = s.Prog(obj.AJMP)
p.To.Type = obj.TYPE_MEM
p.To.Reg = arm64.REGTMP
// Save jump tables for later resolution of the target blocks.
s.JumpTables = append(s.JumpTables, b)
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}

View File

@ -534,6 +534,8 @@
(If cond yes no) => (TBNZ [0] cond yes no)
(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
// atomic intrinsics
// Note: these ops do not accept offset.
(AtomicLoad8 ...) => (LDARB ...)

View File

@ -773,6 +773,12 @@ func init() {
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
// JUMPTABLE implements jump tables.
// Aux is the symbol (an *obj.LSym) for the jump table.
// control[0] is the index into the jump table.
// control[1] is the address of the jump table (the address of the symbol stored in Aux).
{name: "JUMPTABLE", controls: 2, aux: "Sym"},
}
archs = append(archs, arch{

View File

@ -92,6 +92,7 @@ const (
BlockARM64LEnoov
BlockARM64GTnoov
BlockARM64GEnoov
BlockARM64JUMPTABLE
BlockLOONG64EQ
BlockLOONG64NE
@ -217,30 +218,31 @@ var blockString = [...]string{
BlockARMGTnoov: "GTnoov",
BlockARMGEnoov: "GEnoov",
BlockARM64EQ: "EQ",
BlockARM64NE: "NE",
BlockARM64LT: "LT",
BlockARM64LE: "LE",
BlockARM64GT: "GT",
BlockARM64GE: "GE",
BlockARM64ULT: "ULT",
BlockARM64ULE: "ULE",
BlockARM64UGT: "UGT",
BlockARM64UGE: "UGE",
BlockARM64Z: "Z",
BlockARM64NZ: "NZ",
BlockARM64ZW: "ZW",
BlockARM64NZW: "NZW",
BlockARM64TBZ: "TBZ",
BlockARM64TBNZ: "TBNZ",
BlockARM64FLT: "FLT",
BlockARM64FLE: "FLE",
BlockARM64FGT: "FGT",
BlockARM64FGE: "FGE",
BlockARM64LTnoov: "LTnoov",
BlockARM64LEnoov: "LEnoov",
BlockARM64GTnoov: "GTnoov",
BlockARM64GEnoov: "GEnoov",
BlockARM64EQ: "EQ",
BlockARM64NE: "NE",
BlockARM64LT: "LT",
BlockARM64LE: "LE",
BlockARM64GT: "GT",
BlockARM64GE: "GE",
BlockARM64ULT: "ULT",
BlockARM64ULE: "ULE",
BlockARM64UGT: "UGT",
BlockARM64UGE: "UGE",
BlockARM64Z: "Z",
BlockARM64NZ: "NZ",
BlockARM64ZW: "ZW",
BlockARM64NZW: "NZW",
BlockARM64TBZ: "TBZ",
BlockARM64TBNZ: "TBNZ",
BlockARM64FLT: "FLT",
BlockARM64FLE: "FLE",
BlockARM64FGT: "FGT",
BlockARM64FGE: "FGE",
BlockARM64LTnoov: "LTnoov",
BlockARM64LEnoov: "LEnoov",
BlockARM64GTnoov: "GTnoov",
BlockARM64GEnoov: "GEnoov",
BlockARM64JUMPTABLE: "JUMPTABLE",
BlockLOONG64EQ: "EQ",
BlockLOONG64NE: "NE",

View File

@ -27491,6 +27491,7 @@ func rewriteValueARM64_OpZero(v *Value) bool {
return false
}
func rewriteBlockARM64(b *Block) bool {
typ := &b.Func.Config.Types
switch b.Kind {
case BlockARM64EQ:
// match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
@ -28845,6 +28846,19 @@ func rewriteBlockARM64(b *Block) bool {
b.AuxInt = int64ToAuxInt(0)
return true
}
case BlockJumpTable:
// match: (JumpTable idx)
// result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
for {
idx := b.Controls[0]
v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr)
v0.Aux = symToAux(makeJumpTableSym(b))
v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
v0.AddArg(v1)
b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0)
b.Aux = symToAux(makeJumpTableSym(b))
return true
}
case BlockARM64LE:
// match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
// cond: x.Uses == 1

View File

@ -1222,6 +1222,16 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
// so instruction sequences that use REGTMP are unsafe to
// preempt asynchronously.
obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
// Now that we know byte offsets, we can generate jump table entries.
for _, jt := range cursym.Func().JumpTables {
for i, p := range jt.Targets {
// The ith jumptable entry points to the p.Pc'th
// byte in the function symbol s.
// TODO: try using relative PCs.
jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
}
}
}
// isUnsafePoint returns whether p is an unsafe point.

View File

@ -130,6 +130,7 @@ var ArchARM64 = &Arch{
MinLC: 4,
Alignment: 1,
CanMergeLoads: true,
CanJumpTable: true,
HasLR: true,
FixedFrameSize: 8, // LR
}

View File

@ -24,6 +24,7 @@ func f(x string) int {
// use jump tables for 8+ int cases
func square(x int) int {
// amd64:`JMP\s\(.*\)\(.*\)$`
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
switch x {
case 1:
return 1
@ -49,6 +50,7 @@ func square(x int) int {
// use jump tables for 8+ string lengths
func length(x string) int {
// amd64:`JMP\s\(.*\)\(.*\)$`
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
switch x {
case "a":
return 1