mirror of https://github.com/golang/go.git
cmd/compile: use jump table on ARM64
Following CL 357330, use jump tables on ARM64. name old time/op new time/op delta Switch8Predictable-4 3.41ns ± 0% 3.21ns ± 0% ~ (p=0.079 n=4+5) Switch8Unpredictable-4 12.0ns ± 0% 9.5ns ± 0% -21.17% (p=0.000 n=5+4) Switch32Predictable-4 3.06ns ± 0% 2.82ns ± 0% -7.78% (p=0.008 n=5+5) Switch32Unpredictable-4 13.3ns ± 0% 9.5ns ± 0% -28.87% (p=0.016 n=4+5) SwitchStringPredictable-4 3.71ns ± 0% 3.21ns ± 0% -13.43% (p=0.000 n=5+4) SwitchStringUnpredictable-4 14.8ns ± 0% 15.1ns ± 0% +2.37% (p=0.008 n=5+5) Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6 Reviewed-on: https://go-review.googlesource.com/c/go/+/403979 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
ba8310cf29
commit
540f8c2b50
|
|
@ -99,21 +99,22 @@ func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int
|
|||
return p
|
||||
}
|
||||
|
||||
// generate the memory operand for the indexed load/store instructions
|
||||
func genIndexedOperand(v *ssa.Value) obj.Addr {
|
||||
// generate the memory operand for the indexed load/store instructions.
|
||||
// base and idx are registers.
|
||||
func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
|
||||
// Reg: base register, Index: (shifted) index register
|
||||
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
|
||||
switch v.Op {
|
||||
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
|
||||
switch op {
|
||||
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
|
||||
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
|
||||
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
|
||||
mop.Index = arm64.REG_LSL | 3<<5 | idx&31
|
||||
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
|
||||
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
|
||||
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
|
||||
mop.Index = arm64.REG_LSL | 2<<5 | idx&31
|
||||
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
|
||||
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
|
||||
mop.Index = arm64.REG_LSL | 1<<5 | idx&31
|
||||
default: // not shifted
|
||||
mop.Index = v.Args[1].Reg()
|
||||
mop.Index = idx
|
||||
}
|
||||
return mop
|
||||
}
|
||||
|
|
@ -465,7 +466,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpARM64FMOVDloadidx8,
|
||||
ssa.OpARM64FMOVSloadidx4:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From = genIndexedOperand(v)
|
||||
p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpARM64LDAR,
|
||||
|
|
@ -504,7 +505,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpARM64MOVDstoreidx8,
|
||||
ssa.OpARM64FMOVDstoreidx8:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.To = genIndexedOperand(v)
|
||||
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[2].Reg()
|
||||
case ssa.OpARM64STP:
|
||||
|
|
@ -533,7 +534,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpARM64MOVWstorezeroidx4,
|
||||
ssa.OpARM64MOVDstorezeroidx8:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.To = genIndexedOperand(v)
|
||||
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = arm64.REGZERO
|
||||
case ssa.OpARM64MOVQstorezero:
|
||||
|
|
@ -1325,6 +1326,20 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
|
|||
s.CombJump(b, next, &leJumps)
|
||||
case ssa.BlockARM64GTnoov:
|
||||
s.CombJump(b, next, >Jumps)
|
||||
|
||||
case ssa.BlockARM64JUMPTABLE:
|
||||
// MOVD (TABLE)(IDX<<3), Rtmp
|
||||
// JMP (Rtmp)
|
||||
p := s.Prog(arm64.AMOVD)
|
||||
p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = arm64.REGTMP
|
||||
p = s.Prog(obj.AJMP)
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = arm64.REGTMP
|
||||
// Save jump tables for later resolution of the target blocks.
|
||||
s.JumpTables = append(s.JumpTables, b)
|
||||
|
||||
default:
|
||||
b.Fatalf("branch not implemented: %s", b.LongString())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -534,6 +534,8 @@
|
|||
|
||||
(If cond yes no) => (TBNZ [0] cond yes no)
|
||||
|
||||
(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
|
||||
|
||||
// atomic intrinsics
|
||||
// Note: these ops do not accept offset.
|
||||
(AtomicLoad8 ...) => (LDARB ...)
|
||||
|
|
|
|||
|
|
@ -773,6 +773,12 @@ func init() {
|
|||
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
|
||||
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
|
||||
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
|
||||
|
||||
// JUMPTABLE implements jump tables.
|
||||
// Aux is the symbol (an *obj.LSym) for the jump table.
|
||||
// control[0] is the index into the jump table.
|
||||
// control[1] is the address of the jump table (the address of the symbol stored in Aux).
|
||||
{name: "JUMPTABLE", controls: 2, aux: "Sym"},
|
||||
}
|
||||
|
||||
archs = append(archs, arch{
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ const (
|
|||
BlockARM64LEnoov
|
||||
BlockARM64GTnoov
|
||||
BlockARM64GEnoov
|
||||
BlockARM64JUMPTABLE
|
||||
|
||||
BlockLOONG64EQ
|
||||
BlockLOONG64NE
|
||||
|
|
@ -217,30 +218,31 @@ var blockString = [...]string{
|
|||
BlockARMGTnoov: "GTnoov",
|
||||
BlockARMGEnoov: "GEnoov",
|
||||
|
||||
BlockARM64EQ: "EQ",
|
||||
BlockARM64NE: "NE",
|
||||
BlockARM64LT: "LT",
|
||||
BlockARM64LE: "LE",
|
||||
BlockARM64GT: "GT",
|
||||
BlockARM64GE: "GE",
|
||||
BlockARM64ULT: "ULT",
|
||||
BlockARM64ULE: "ULE",
|
||||
BlockARM64UGT: "UGT",
|
||||
BlockARM64UGE: "UGE",
|
||||
BlockARM64Z: "Z",
|
||||
BlockARM64NZ: "NZ",
|
||||
BlockARM64ZW: "ZW",
|
||||
BlockARM64NZW: "NZW",
|
||||
BlockARM64TBZ: "TBZ",
|
||||
BlockARM64TBNZ: "TBNZ",
|
||||
BlockARM64FLT: "FLT",
|
||||
BlockARM64FLE: "FLE",
|
||||
BlockARM64FGT: "FGT",
|
||||
BlockARM64FGE: "FGE",
|
||||
BlockARM64LTnoov: "LTnoov",
|
||||
BlockARM64LEnoov: "LEnoov",
|
||||
BlockARM64GTnoov: "GTnoov",
|
||||
BlockARM64GEnoov: "GEnoov",
|
||||
BlockARM64EQ: "EQ",
|
||||
BlockARM64NE: "NE",
|
||||
BlockARM64LT: "LT",
|
||||
BlockARM64LE: "LE",
|
||||
BlockARM64GT: "GT",
|
||||
BlockARM64GE: "GE",
|
||||
BlockARM64ULT: "ULT",
|
||||
BlockARM64ULE: "ULE",
|
||||
BlockARM64UGT: "UGT",
|
||||
BlockARM64UGE: "UGE",
|
||||
BlockARM64Z: "Z",
|
||||
BlockARM64NZ: "NZ",
|
||||
BlockARM64ZW: "ZW",
|
||||
BlockARM64NZW: "NZW",
|
||||
BlockARM64TBZ: "TBZ",
|
||||
BlockARM64TBNZ: "TBNZ",
|
||||
BlockARM64FLT: "FLT",
|
||||
BlockARM64FLE: "FLE",
|
||||
BlockARM64FGT: "FGT",
|
||||
BlockARM64FGE: "FGE",
|
||||
BlockARM64LTnoov: "LTnoov",
|
||||
BlockARM64LEnoov: "LEnoov",
|
||||
BlockARM64GTnoov: "GTnoov",
|
||||
BlockARM64GEnoov: "GEnoov",
|
||||
BlockARM64JUMPTABLE: "JUMPTABLE",
|
||||
|
||||
BlockLOONG64EQ: "EQ",
|
||||
BlockLOONG64NE: "NE",
|
||||
|
|
|
|||
|
|
@ -27491,6 +27491,7 @@ func rewriteValueARM64_OpZero(v *Value) bool {
|
|||
return false
|
||||
}
|
||||
func rewriteBlockARM64(b *Block) bool {
|
||||
typ := &b.Func.Config.Types
|
||||
switch b.Kind {
|
||||
case BlockARM64EQ:
|
||||
// match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
||||
|
|
@ -28845,6 +28846,19 @@ func rewriteBlockARM64(b *Block) bool {
|
|||
b.AuxInt = int64ToAuxInt(0)
|
||||
return true
|
||||
}
|
||||
case BlockJumpTable:
|
||||
// match: (JumpTable idx)
|
||||
// result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
|
||||
for {
|
||||
idx := b.Controls[0]
|
||||
v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr)
|
||||
v0.Aux = symToAux(makeJumpTableSym(b))
|
||||
v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
|
||||
v0.AddArg(v1)
|
||||
b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0)
|
||||
b.Aux = symToAux(makeJumpTableSym(b))
|
||||
return true
|
||||
}
|
||||
case BlockARM64LE:
|
||||
// match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
||||
// cond: x.Uses == 1
|
||||
|
|
|
|||
|
|
@ -1222,6 +1222,16 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||
// so instruction sequences that use REGTMP are unsafe to
|
||||
// preempt asynchronously.
|
||||
obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
|
||||
|
||||
// Now that we know byte offsets, we can generate jump table entries.
|
||||
for _, jt := range cursym.Func().JumpTables {
|
||||
for i, p := range jt.Targets {
|
||||
// The ith jumptable entry points to the p.Pc'th
|
||||
// byte in the function symbol s.
|
||||
// TODO: try using relative PCs.
|
||||
jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// isUnsafePoint returns whether p is an unsafe point.
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ var ArchARM64 = &Arch{
|
|||
MinLC: 4,
|
||||
Alignment: 1,
|
||||
CanMergeLoads: true,
|
||||
CanJumpTable: true,
|
||||
HasLR: true,
|
||||
FixedFrameSize: 8, // LR
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ func f(x string) int {
|
|||
// use jump tables for 8+ int cases
|
||||
func square(x int) int {
|
||||
// amd64:`JMP\s\(.*\)\(.*\)$`
|
||||
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
|
||||
switch x {
|
||||
case 1:
|
||||
return 1
|
||||
|
|
@ -49,6 +50,7 @@ func square(x int) int {
|
|||
// use jump tables for 8+ string lengths
|
||||
func length(x string) int {
|
||||
// amd64:`JMP\s\(.*\)\(.*\)$`
|
||||
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
|
||||
switch x {
|
||||
case "a":
|
||||
return 1
|
||||
|
|
|
|||
Loading…
Reference in New Issue