mirror of https://github.com/golang/go.git
cmd/compile: use jump table on ARM64
Following CL 357330, use jump tables on ARM64. name old time/op new time/op delta Switch8Predictable-4 3.41ns ± 0% 3.21ns ± 0% ~ (p=0.079 n=4+5) Switch8Unpredictable-4 12.0ns ± 0% 9.5ns ± 0% -21.17% (p=0.000 n=5+4) Switch32Predictable-4 3.06ns ± 0% 2.82ns ± 0% -7.78% (p=0.008 n=5+5) Switch32Unpredictable-4 13.3ns ± 0% 9.5ns ± 0% -28.87% (p=0.016 n=4+5) SwitchStringPredictable-4 3.71ns ± 0% 3.21ns ± 0% -13.43% (p=0.000 n=5+4) SwitchStringUnpredictable-4 14.8ns ± 0% 15.1ns ± 0% +2.37% (p=0.008 n=5+5) Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6 Reviewed-on: https://go-review.googlesource.com/c/go/+/403979 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
ba8310cf29
commit
540f8c2b50
|
|
@ -99,21 +99,22 @@ func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
// generate the memory operand for the indexed load/store instructions
|
// generate the memory operand for the indexed load/store instructions.
|
||||||
func genIndexedOperand(v *ssa.Value) obj.Addr {
|
// base and idx are registers.
|
||||||
|
func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
|
||||||
// Reg: base register, Index: (shifted) index register
|
// Reg: base register, Index: (shifted) index register
|
||||||
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
|
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
|
||||||
switch v.Op {
|
switch op {
|
||||||
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
|
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
|
||||||
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
|
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
|
||||||
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
|
mop.Index = arm64.REG_LSL | 3<<5 | idx&31
|
||||||
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
|
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
|
||||||
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
|
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
|
||||||
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
|
mop.Index = arm64.REG_LSL | 2<<5 | idx&31
|
||||||
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
|
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
|
||||||
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
|
mop.Index = arm64.REG_LSL | 1<<5 | idx&31
|
||||||
default: // not shifted
|
default: // not shifted
|
||||||
mop.Index = v.Args[1].Reg()
|
mop.Index = idx
|
||||||
}
|
}
|
||||||
return mop
|
return mop
|
||||||
}
|
}
|
||||||
|
|
@ -465,7 +466,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
ssa.OpARM64FMOVDloadidx8,
|
ssa.OpARM64FMOVDloadidx8,
|
||||||
ssa.OpARM64FMOVSloadidx4:
|
ssa.OpARM64FMOVSloadidx4:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From = genIndexedOperand(v)
|
p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = v.Reg()
|
p.To.Reg = v.Reg()
|
||||||
case ssa.OpARM64LDAR,
|
case ssa.OpARM64LDAR,
|
||||||
|
|
@ -504,7 +505,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
ssa.OpARM64MOVDstoreidx8,
|
ssa.OpARM64MOVDstoreidx8,
|
||||||
ssa.OpARM64FMOVDstoreidx8:
|
ssa.OpARM64FMOVDstoreidx8:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.To = genIndexedOperand(v)
|
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = v.Args[2].Reg()
|
p.From.Reg = v.Args[2].Reg()
|
||||||
case ssa.OpARM64STP:
|
case ssa.OpARM64STP:
|
||||||
|
|
@ -533,7 +534,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
ssa.OpARM64MOVWstorezeroidx4,
|
ssa.OpARM64MOVWstorezeroidx4,
|
||||||
ssa.OpARM64MOVDstorezeroidx8:
|
ssa.OpARM64MOVDstorezeroidx8:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.To = genIndexedOperand(v)
|
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = arm64.REGZERO
|
p.From.Reg = arm64.REGZERO
|
||||||
case ssa.OpARM64MOVQstorezero:
|
case ssa.OpARM64MOVQstorezero:
|
||||||
|
|
@ -1325,6 +1326,20 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
|
||||||
s.CombJump(b, next, &leJumps)
|
s.CombJump(b, next, &leJumps)
|
||||||
case ssa.BlockARM64GTnoov:
|
case ssa.BlockARM64GTnoov:
|
||||||
s.CombJump(b, next, >Jumps)
|
s.CombJump(b, next, >Jumps)
|
||||||
|
|
||||||
|
case ssa.BlockARM64JUMPTABLE:
|
||||||
|
// MOVD (TABLE)(IDX<<3), Rtmp
|
||||||
|
// JMP (Rtmp)
|
||||||
|
p := s.Prog(arm64.AMOVD)
|
||||||
|
p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = arm64.REGTMP
|
||||||
|
p = s.Prog(obj.AJMP)
|
||||||
|
p.To.Type = obj.TYPE_MEM
|
||||||
|
p.To.Reg = arm64.REGTMP
|
||||||
|
// Save jump tables for later resolution of the target blocks.
|
||||||
|
s.JumpTables = append(s.JumpTables, b)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
b.Fatalf("branch not implemented: %s", b.LongString())
|
b.Fatalf("branch not implemented: %s", b.LongString())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -534,6 +534,8 @@
|
||||||
|
|
||||||
(If cond yes no) => (TBNZ [0] cond yes no)
|
(If cond yes no) => (TBNZ [0] cond yes no)
|
||||||
|
|
||||||
|
(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
|
||||||
|
|
||||||
// atomic intrinsics
|
// atomic intrinsics
|
||||||
// Note: these ops do not accept offset.
|
// Note: these ops do not accept offset.
|
||||||
(AtomicLoad8 ...) => (LDARB ...)
|
(AtomicLoad8 ...) => (LDARB ...)
|
||||||
|
|
|
||||||
|
|
@ -773,6 +773,12 @@ func init() {
|
||||||
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
|
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
|
||||||
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
|
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
|
||||||
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
|
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
|
||||||
|
|
||||||
|
// JUMPTABLE implements jump tables.
|
||||||
|
// Aux is the symbol (an *obj.LSym) for the jump table.
|
||||||
|
// control[0] is the index into the jump table.
|
||||||
|
// control[1] is the address of the jump table (the address of the symbol stored in Aux).
|
||||||
|
{name: "JUMPTABLE", controls: 2, aux: "Sym"},
|
||||||
}
|
}
|
||||||
|
|
||||||
archs = append(archs, arch{
|
archs = append(archs, arch{
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,7 @@ const (
|
||||||
BlockARM64LEnoov
|
BlockARM64LEnoov
|
||||||
BlockARM64GTnoov
|
BlockARM64GTnoov
|
||||||
BlockARM64GEnoov
|
BlockARM64GEnoov
|
||||||
|
BlockARM64JUMPTABLE
|
||||||
|
|
||||||
BlockLOONG64EQ
|
BlockLOONG64EQ
|
||||||
BlockLOONG64NE
|
BlockLOONG64NE
|
||||||
|
|
@ -217,30 +218,31 @@ var blockString = [...]string{
|
||||||
BlockARMGTnoov: "GTnoov",
|
BlockARMGTnoov: "GTnoov",
|
||||||
BlockARMGEnoov: "GEnoov",
|
BlockARMGEnoov: "GEnoov",
|
||||||
|
|
||||||
BlockARM64EQ: "EQ",
|
BlockARM64EQ: "EQ",
|
||||||
BlockARM64NE: "NE",
|
BlockARM64NE: "NE",
|
||||||
BlockARM64LT: "LT",
|
BlockARM64LT: "LT",
|
||||||
BlockARM64LE: "LE",
|
BlockARM64LE: "LE",
|
||||||
BlockARM64GT: "GT",
|
BlockARM64GT: "GT",
|
||||||
BlockARM64GE: "GE",
|
BlockARM64GE: "GE",
|
||||||
BlockARM64ULT: "ULT",
|
BlockARM64ULT: "ULT",
|
||||||
BlockARM64ULE: "ULE",
|
BlockARM64ULE: "ULE",
|
||||||
BlockARM64UGT: "UGT",
|
BlockARM64UGT: "UGT",
|
||||||
BlockARM64UGE: "UGE",
|
BlockARM64UGE: "UGE",
|
||||||
BlockARM64Z: "Z",
|
BlockARM64Z: "Z",
|
||||||
BlockARM64NZ: "NZ",
|
BlockARM64NZ: "NZ",
|
||||||
BlockARM64ZW: "ZW",
|
BlockARM64ZW: "ZW",
|
||||||
BlockARM64NZW: "NZW",
|
BlockARM64NZW: "NZW",
|
||||||
BlockARM64TBZ: "TBZ",
|
BlockARM64TBZ: "TBZ",
|
||||||
BlockARM64TBNZ: "TBNZ",
|
BlockARM64TBNZ: "TBNZ",
|
||||||
BlockARM64FLT: "FLT",
|
BlockARM64FLT: "FLT",
|
||||||
BlockARM64FLE: "FLE",
|
BlockARM64FLE: "FLE",
|
||||||
BlockARM64FGT: "FGT",
|
BlockARM64FGT: "FGT",
|
||||||
BlockARM64FGE: "FGE",
|
BlockARM64FGE: "FGE",
|
||||||
BlockARM64LTnoov: "LTnoov",
|
BlockARM64LTnoov: "LTnoov",
|
||||||
BlockARM64LEnoov: "LEnoov",
|
BlockARM64LEnoov: "LEnoov",
|
||||||
BlockARM64GTnoov: "GTnoov",
|
BlockARM64GTnoov: "GTnoov",
|
||||||
BlockARM64GEnoov: "GEnoov",
|
BlockARM64GEnoov: "GEnoov",
|
||||||
|
BlockARM64JUMPTABLE: "JUMPTABLE",
|
||||||
|
|
||||||
BlockLOONG64EQ: "EQ",
|
BlockLOONG64EQ: "EQ",
|
||||||
BlockLOONG64NE: "NE",
|
BlockLOONG64NE: "NE",
|
||||||
|
|
|
||||||
|
|
@ -27491,6 +27491,7 @@ func rewriteValueARM64_OpZero(v *Value) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteBlockARM64(b *Block) bool {
|
func rewriteBlockARM64(b *Block) bool {
|
||||||
|
typ := &b.Func.Config.Types
|
||||||
switch b.Kind {
|
switch b.Kind {
|
||||||
case BlockARM64EQ:
|
case BlockARM64EQ:
|
||||||
// match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
// match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
||||||
|
|
@ -28845,6 +28846,19 @@ func rewriteBlockARM64(b *Block) bool {
|
||||||
b.AuxInt = int64ToAuxInt(0)
|
b.AuxInt = int64ToAuxInt(0)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
case BlockJumpTable:
|
||||||
|
// match: (JumpTable idx)
|
||||||
|
// result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
|
||||||
|
for {
|
||||||
|
idx := b.Controls[0]
|
||||||
|
v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr)
|
||||||
|
v0.Aux = symToAux(makeJumpTableSym(b))
|
||||||
|
v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
|
||||||
|
v0.AddArg(v1)
|
||||||
|
b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0)
|
||||||
|
b.Aux = symToAux(makeJumpTableSym(b))
|
||||||
|
return true
|
||||||
|
}
|
||||||
case BlockARM64LE:
|
case BlockARM64LE:
|
||||||
// match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
// match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
|
||||||
// cond: x.Uses == 1
|
// cond: x.Uses == 1
|
||||||
|
|
|
||||||
|
|
@ -1222,6 +1222,16 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||||
// so instruction sequences that use REGTMP are unsafe to
|
// so instruction sequences that use REGTMP are unsafe to
|
||||||
// preempt asynchronously.
|
// preempt asynchronously.
|
||||||
obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
|
obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
|
||||||
|
|
||||||
|
// Now that we know byte offsets, we can generate jump table entries.
|
||||||
|
for _, jt := range cursym.Func().JumpTables {
|
||||||
|
for i, p := range jt.Targets {
|
||||||
|
// The ith jumptable entry points to the p.Pc'th
|
||||||
|
// byte in the function symbol s.
|
||||||
|
// TODO: try using relative PCs.
|
||||||
|
jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// isUnsafePoint returns whether p is an unsafe point.
|
// isUnsafePoint returns whether p is an unsafe point.
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,7 @@ var ArchARM64 = &Arch{
|
||||||
MinLC: 4,
|
MinLC: 4,
|
||||||
Alignment: 1,
|
Alignment: 1,
|
||||||
CanMergeLoads: true,
|
CanMergeLoads: true,
|
||||||
|
CanJumpTable: true,
|
||||||
HasLR: true,
|
HasLR: true,
|
||||||
FixedFrameSize: 8, // LR
|
FixedFrameSize: 8, // LR
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ func f(x string) int {
|
||||||
// use jump tables for 8+ int cases
|
// use jump tables for 8+ int cases
|
||||||
func square(x int) int {
|
func square(x int) int {
|
||||||
// amd64:`JMP\s\(.*\)\(.*\)$`
|
// amd64:`JMP\s\(.*\)\(.*\)$`
|
||||||
|
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
|
||||||
switch x {
|
switch x {
|
||||||
case 1:
|
case 1:
|
||||||
return 1
|
return 1
|
||||||
|
|
@ -49,6 +50,7 @@ func square(x int) int {
|
||||||
// use jump tables for 8+ string lengths
|
// use jump tables for 8+ string lengths
|
||||||
func length(x string) int {
|
func length(x string) int {
|
||||||
// amd64:`JMP\s\(.*\)\(.*\)$`
|
// amd64:`JMP\s\(.*\)\(.*\)$`
|
||||||
|
// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
|
||||||
switch x {
|
switch x {
|
||||||
case "a":
|
case "a":
|
||||||
return 1
|
return 1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue