mirror of https://github.com/golang/go.git
cmd/internal/obj/arm64: optimize function prologue/epilogue with STP/LDP
In function prologue and epilogue, we save and restore FP and LR
registers, and adjust RSP. The current instruction sequence is as
follow.
For frame size <= 240B,
prologue:
MOVD.W R30, -offset(RSP)
MOVD R29, -8(RSP)
epilogue:
MOVD -8(RSP), R29
MOVD.P offset(RSP), R30
For frame size > 240B,
prologue:
SUB $offset, RSP, R27
MOVD R30, (R27)
MOVD R27, RSP
MOVD R29, -8(RSP)
epilogue:
MOVD -8(RSP), R29
MOVD (RSP), R30
ADD $offset, RSP
Each sequence uses two load or store instructions, actually we can load
or store two registers with one LDP or STP instruction. This CL changes
the sequences as follow.
For frame size <= 496B,
prologue:
STP (R29, R30), -(offset+8)(RSP)
SUB $offset, RSP, RSP
epilogue:
LDP -8(RSP), (R29, R30)
ADD $offset, RSP, RSP
For frame size > 496B,
prologue:
SUB $offset, RSP, R20
STP (R29, R30), -8(R20)
MOVD R20, RSP
epilogue:
LDP -8(RSP), (R29, R30)
ADD $offset, RSP, RSP
Change-Id: Ia58af85fc81cce9b7c393dc38df43bffb203baad
Reviewed-on: https://go-review.googlesource.com/c/go/+/379075
Reviewed-by: Cherry Mui <cherryyz@google.com>
Trust: Eric Fang <eric.fang@arm.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
This commit is contained in:
parent
1045faa38c
commit
c6d9b38dd8
|
|
@ -622,92 +622,124 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||||
var prologueEnd *obj.Prog
|
var prologueEnd *obj.Prog
|
||||||
|
|
||||||
aoffset := c.autosize
|
aoffset := c.autosize
|
||||||
if aoffset > 0xF0 {
|
if aoffset > 0x1f0 {
|
||||||
aoffset = 0xF0
|
// LDP offset variant range is -512 to 504, SP should be 16-byte aligned,
|
||||||
|
// so the maximum aoffset value is 496.
|
||||||
|
aoffset = 0x1f0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Frame is non-empty. Make sure to save link register, even if
|
// Frame is non-empty. Make sure to save link register, even if
|
||||||
// it is a leaf function, so that traceback works.
|
// it is a leaf function, so that traceback works.
|
||||||
q = p
|
q = p
|
||||||
if c.autosize > aoffset {
|
if c.autosize > aoffset {
|
||||||
// Frame size is too large for a MOVD.W instruction.
|
// Frame size is too large for a STP instruction. Store the frame pointer
|
||||||
// Store link register before decrementing SP, so if a signal comes
|
// register and link register before decrementing SP, so if a signal comes
|
||||||
// during the execution of the function prologue, the traceback
|
// during the execution of the function prologue, the traceback code will
|
||||||
// code will not see a half-updated stack frame.
|
// not see a half-updated stack frame.
|
||||||
// This sequence is not async preemptible, as if we open a frame
|
|
||||||
// at the current SP, it will clobber the saved LR.
|
|
||||||
q = c.ctxt.StartUnsafePoint(q, c.newprog)
|
|
||||||
|
|
||||||
q = obj.Appendp(q, c.newprog)
|
|
||||||
q.Pos = p.Pos
|
|
||||||
q.As = ASUB
|
|
||||||
q.From.Type = obj.TYPE_CONST
|
|
||||||
q.From.Offset = int64(c.autosize)
|
|
||||||
q.Reg = REGSP
|
|
||||||
q.To.Type = obj.TYPE_REG
|
|
||||||
q.To.Reg = REGTMP
|
|
||||||
|
|
||||||
prologueEnd = q
|
|
||||||
|
|
||||||
q = obj.Appendp(q, c.newprog)
|
|
||||||
q.Pos = p.Pos
|
|
||||||
q.As = AMOVD
|
|
||||||
q.From.Type = obj.TYPE_REG
|
|
||||||
q.From.Reg = REGLINK
|
|
||||||
q.To.Type = obj.TYPE_MEM
|
|
||||||
q.To.Reg = REGTMP
|
|
||||||
|
|
||||||
|
// SUB $autosize, RSP, R20
|
||||||
q1 = obj.Appendp(q, c.newprog)
|
q1 = obj.Appendp(q, c.newprog)
|
||||||
q1.Pos = p.Pos
|
q1.Pos = p.Pos
|
||||||
|
q1.As = ASUB
|
||||||
|
q1.From.Type = obj.TYPE_CONST
|
||||||
|
q1.From.Offset = int64(c.autosize)
|
||||||
|
q1.Reg = REGSP
|
||||||
|
q1.To.Type = obj.TYPE_REG
|
||||||
|
q1.To.Reg = REG_R20
|
||||||
|
|
||||||
|
prologueEnd = q1
|
||||||
|
|
||||||
|
// STP (R29, R30), -8(R20)
|
||||||
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
|
q1.Pos = p.Pos
|
||||||
|
q1.As = ASTP
|
||||||
|
q1.From.Type = obj.TYPE_REGREG
|
||||||
|
q1.From.Reg = REGFP
|
||||||
|
q1.From.Offset = REGLINK
|
||||||
|
q1.To.Type = obj.TYPE_MEM
|
||||||
|
q1.To.Reg = REG_R20
|
||||||
|
q1.To.Offset = -8
|
||||||
|
|
||||||
|
// This is not async preemptible, as if we open a frame
|
||||||
|
// at the current SP, it will clobber the saved LR.
|
||||||
|
q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
|
||||||
|
|
||||||
|
// MOVD R20, RSP
|
||||||
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
|
q1.Pos = p.Pos
|
||||||
q1.As = AMOVD
|
q1.As = AMOVD
|
||||||
q1.From.Type = obj.TYPE_REG
|
q1.From.Type = obj.TYPE_REG
|
||||||
q1.From.Reg = REGTMP
|
q1.From.Reg = REG_R20
|
||||||
q1.To.Type = obj.TYPE_REG
|
q1.To.Type = obj.TYPE_REG
|
||||||
q1.To.Reg = REGSP
|
q1.To.Reg = REGSP
|
||||||
q1.Spadj = c.autosize
|
q1.Spadj = c.autosize
|
||||||
|
|
||||||
|
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
|
||||||
|
|
||||||
if buildcfg.GOOS == "ios" {
|
if buildcfg.GOOS == "ios" {
|
||||||
// iOS does not support SA_ONSTACK. We will run the signal handler
|
// iOS does not support SA_ONSTACK. We will run the signal handler
|
||||||
// on the G stack. If we write below SP, it may be clobbered by
|
// on the G stack. If we write below SP, it may be clobbered by
|
||||||
// the signal handler. So we save LR after decrementing SP.
|
// the signal handler. So we save FP and LR after decrementing SP.
|
||||||
|
// STP (R29, R30), -8(RSP)
|
||||||
q1 = obj.Appendp(q1, c.newprog)
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
q1.Pos = p.Pos
|
q1.Pos = p.Pos
|
||||||
q1.As = AMOVD
|
q1.As = ASTP
|
||||||
q1.From.Type = obj.TYPE_REG
|
q1.From.Type = obj.TYPE_REGREG
|
||||||
q1.From.Reg = REGLINK
|
q1.From.Reg = REGFP
|
||||||
|
q1.From.Offset = REGLINK
|
||||||
q1.To.Type = obj.TYPE_MEM
|
q1.To.Type = obj.TYPE_MEM
|
||||||
q1.To.Reg = REGSP
|
q1.To.Reg = REGSP
|
||||||
|
q1.To.Offset = -8
|
||||||
}
|
}
|
||||||
|
|
||||||
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
|
|
||||||
} else {
|
} else {
|
||||||
// small frame, update SP and save LR in a single MOVD.W instruction
|
// small frame, save FP and LR with one STP instruction, then update SP.
|
||||||
|
// Store first, so if a signal comes during the execution of the function
|
||||||
|
// prologue, the traceback code will not see a half-updated stack frame.
|
||||||
|
// STP (R29, R30), -aoffset-8(RSP)
|
||||||
q1 = obj.Appendp(q, c.newprog)
|
q1 = obj.Appendp(q, c.newprog)
|
||||||
q1.As = AMOVD
|
q1.As = ASTP
|
||||||
q1.Pos = p.Pos
|
q1.Pos = p.Pos
|
||||||
q1.From.Type = obj.TYPE_REG
|
q1.From.Type = obj.TYPE_REGREG
|
||||||
q1.From.Reg = REGLINK
|
q1.From.Reg = REGFP
|
||||||
|
q1.From.Offset = REGLINK
|
||||||
q1.To.Type = obj.TYPE_MEM
|
q1.To.Type = obj.TYPE_MEM
|
||||||
q1.Scond = C_XPRE
|
q1.To.Offset = int64(-aoffset - 8)
|
||||||
q1.To.Offset = int64(-aoffset)
|
q1.To.Reg = REGSP
|
||||||
|
|
||||||
|
prologueEnd = q1
|
||||||
|
|
||||||
|
q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
|
||||||
|
// This instruction is not async preemptible, see the above comment.
|
||||||
|
// SUB $aoffset, RSP, RSP
|
||||||
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
|
q1.Pos = p.Pos
|
||||||
|
q1.As = ASUB
|
||||||
|
q1.From.Type = obj.TYPE_CONST
|
||||||
|
q1.From.Offset = int64(aoffset)
|
||||||
|
q1.Reg = REGSP
|
||||||
|
q1.To.Type = obj.TYPE_REG
|
||||||
q1.To.Reg = REGSP
|
q1.To.Reg = REGSP
|
||||||
q1.Spadj = aoffset
|
q1.Spadj = aoffset
|
||||||
|
|
||||||
prologueEnd = q1
|
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
|
||||||
|
|
||||||
|
if buildcfg.GOOS == "ios" {
|
||||||
|
// See the above comment.
|
||||||
|
// STP (R29, R30), -8(RSP)
|
||||||
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
|
q1.As = ASTP
|
||||||
|
q1.Pos = p.Pos
|
||||||
|
q1.From.Type = obj.TYPE_REGREG
|
||||||
|
q1.From.Reg = REGFP
|
||||||
|
q1.From.Offset = REGLINK
|
||||||
|
q1.To.Type = obj.TYPE_MEM
|
||||||
|
q1.To.Offset = int64(-8)
|
||||||
|
q1.To.Reg = REGSP
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
|
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
|
||||||
|
|
||||||
// Frame pointer.
|
|
||||||
q1 = obj.Appendp(q1, c.newprog)
|
|
||||||
q1.Pos = p.Pos
|
|
||||||
q1.As = AMOVD
|
|
||||||
q1.From.Type = obj.TYPE_REG
|
|
||||||
q1.From.Reg = REGFP
|
|
||||||
q1.To.Type = obj.TYPE_MEM
|
|
||||||
q1.To.Reg = REGSP
|
|
||||||
q1.To.Offset = -8
|
|
||||||
|
|
||||||
q1 = obj.Appendp(q1, c.newprog)
|
q1 = obj.Appendp(q1, c.newprog)
|
||||||
q1.Pos = p.Pos
|
q1.Pos = p.Pos
|
||||||
q1.As = ASUB
|
q1.As = ASUB
|
||||||
|
|
@ -850,48 +882,28 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||||
p.To.Reg = REGFP
|
p.To.Reg = REGFP
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
|
|
||||||
|
|
||||||
// Frame pointer.
|
|
||||||
p.As = AMOVD
|
|
||||||
p.From.Type = obj.TYPE_MEM
|
|
||||||
p.From.Reg = REGSP
|
|
||||||
p.From.Offset = -8
|
|
||||||
p.To.Type = obj.TYPE_REG
|
|
||||||
p.To.Reg = REGFP
|
|
||||||
p = obj.Appendp(p, c.newprog)
|
|
||||||
|
|
||||||
aoffset := c.autosize
|
aoffset := c.autosize
|
||||||
|
// LDP -8(RSP), (R29, R30)
|
||||||
|
p.As = ALDP
|
||||||
|
p.From.Type = obj.TYPE_MEM
|
||||||
|
p.From.Offset = -8
|
||||||
|
p.From.Reg = REGSP
|
||||||
|
p.To.Type = obj.TYPE_REGREG
|
||||||
|
p.To.Reg = REGFP
|
||||||
|
p.To.Offset = REGLINK
|
||||||
|
|
||||||
if aoffset <= 0xF0 {
|
// ADD $aoffset, RSP, RSP
|
||||||
p.As = AMOVD
|
q = newprog()
|
||||||
p.From.Type = obj.TYPE_MEM
|
q.As = AADD
|
||||||
p.Scond = C_XPOST
|
q.From.Type = obj.TYPE_CONST
|
||||||
p.From.Offset = int64(aoffset)
|
q.From.Offset = int64(aoffset)
|
||||||
p.From.Reg = REGSP
|
q.To.Type = obj.TYPE_REG
|
||||||
p.To.Type = obj.TYPE_REG
|
q.To.Reg = REGSP
|
||||||
p.To.Reg = REGLINK
|
q.Spadj = -aoffset
|
||||||
p.Spadj = -aoffset
|
q.Pos = p.Pos
|
||||||
} else {
|
q.Link = p.Link
|
||||||
p.As = AMOVD
|
p.Link = q
|
||||||
p.From.Type = obj.TYPE_MEM
|
p = q
|
||||||
p.From.Offset = 0
|
|
||||||
p.From.Reg = REGSP
|
|
||||||
p.To.Type = obj.TYPE_REG
|
|
||||||
p.To.Reg = REGLINK
|
|
||||||
|
|
||||||
q = newprog()
|
|
||||||
q.As = AADD
|
|
||||||
q.From.Type = obj.TYPE_CONST
|
|
||||||
q.From.Offset = int64(aoffset)
|
|
||||||
q.To.Type = obj.TYPE_REG
|
|
||||||
q.To.Reg = REGSP
|
|
||||||
q.Link = p.Link
|
|
||||||
q.Spadj = int32(-q.From.Offset)
|
|
||||||
q.Pos = p.Pos
|
|
||||||
p.Link = q
|
|
||||||
p = q
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
|
// If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue