cmd/link, runtime: convert FUNCDATA relocations to offsets

Every function has associated numbered extra funcdata to another symbol.
Prior to this change, a funcdata pointer was stored as a relocation.

This change alters this to be an offset relative to go.func.* or go.funcrel.*.

This reduces the number of relocations on darwin/arm64 by about 40%.
It also shrinks externally linked binaries. On darwin/arm64:

size      before    after     Δ        %
addr2line 3788498   3699730   -88768   -2.343%
api       5100018   4951074   -148944  -2.920%
asm       4855234   4744274   -110960  -2.285%
buildid   2500162   2419986   -80176   -3.207%
cgo       4338258   4218306   -119952  -2.765%
compile   22764418  22132226  -632192  -2.777%
cover     4583186   4432770   -150416  -3.282%
dist      3200962   3094626   -106336  -3.322%
doc       3680402   3583602   -96800   -2.630%
fix       3114914   3023922   -90992   -2.921%
link      6308578   6154786   -153792  -2.438%
nm        3754338   3665826   -88512   -2.358%
objdump   4124738   4015234   -109504  -2.655%
pack      2232626   2155010   -77616   -3.476%
pprof     13497474  13044066  -453408  -3.359%
test2json 2483810   2402146   -81664   -3.288%
trace     10108898  9748802   -360096  -3.562%
vet       6884322   6681314   -203008  -2.949%
total     107320836 104167700 -3153136 -2.938%

relocs    before  after   Δ       %
addr2line 33357   25563   -7794   -23.365%
api       31589   18409   -13180  -41.723%
asm       27825   18904   -8921   -32.061%
buildid   15603   9513    -6090   -39.031%
cgo       27809   17103   -10706  -38.498%
compile   114769  64829   -49940  -43.513%
cover     32932   19462   -13470  -40.902%
dist      18797   10796   -8001   -42.565%
doc       22891   13503   -9388   -41.012%
fix       19700   11465   -8235   -41.802%
link      37324   23198   -14126  -37.847%
nm        33226   25480   -7746   -23.313%
objdump   35237   26610   -8627   -24.483%
pack      13535   7951    -5584   -41.256%
pprof     97986   63961   -34025  -34.724%
test2json 15113   8735    -6378   -42.202%
trace     66786   39636   -27150  -40.652%
vet       43328   25971   -17357  -40.060%
total     687806  431088  -256718 -37.324%

It should also incrementally speed up binary launching
and may reduce linker memory use.

This is another step towards removing relocations so
that pages that were previously dirtied by the loader may remain clean,
which will offer memory savings useful in constrained environments like iOS.

Removing the relocations in .stkobj symbols will allow some simplifications.
There will be no references into go.funcrel.*,
so we will no longer need to use the bottom bit to distinguish offset bases.

Change-Id: I83d34c1701d6f3f515b9905941477d522441019d
Reviewed-on: https://go-review.googlesource.com/c/go/+/352110
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Josh Bleecher Snyder 2021-10-01 16:35:43 -07:00
parent cfd74f7727
commit 017ffcd10d
3 changed files with 58 additions and 52 deletions

View File

@ -552,11 +552,8 @@ type pclnSetUint func(*loader.SymbolBuilder, *sys.Arch, int64, uint64) int64
//
// Because of timing in the linker, generating this table takes two passes.
// The first pass is executed early in the link, and it creates any needed
// relocations to lay out the data. The pieces that need relocations are:
// 1) the PC->func table.
// 2) The funcdata.
// (1) is handled in writePCToFunc. (2) is handled in writeFuncdata.
//
// relocations to lay out the data. The piece that needs relocations is
// the PC->func table, handled in writePCToFunc.
// After relocations, once we know where to write things in the output buffer,
// we execute the second pass, which is actually writing the data.
func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, cuOffsets []uint32, nameOffsets map[loader.Sym]uint32) {
@ -592,7 +589,6 @@ func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms ma
// Write the data.
writePCToFunc(ctxt, sb, funcs, startLocations, setAddr, (*loader.SymbolBuilder).SetUint)
writeFuncs(ctxt, sb, funcs, inlSyms, startLocations, cuOffsets, nameOffsets)
state.writeFuncData(ctxt, sb, funcs, inlSyms, startLocations, setAddr, (*loader.SymbolBuilder).SetUint)
}
state.pclntab = state.addGeneratedSym(ctxt, "runtime.functab", size, writePcln)
@ -616,11 +612,6 @@ func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms ma
}
setUintNOP := func(*loader.SymbolBuilder, *sys.Arch, int64, uint64) int64 { return 0 }
writePCToFunc(ctxt, sb, funcs, startLocations, setAddr, setUintNOP)
if !useSymValue {
// Generate relocations for funcdata when externally linking.
state.writeFuncData(ctxt, sb, funcs, inlSyms, startLocations, setAddr, setUintNOP)
sb.SortRelocs()
}
}
// funcData returns the funcdata and offsets for the FuncInfo.
@ -675,7 +666,7 @@ func (state pclntab) calculateFunctabSize(ctxt *Link, funcs []loader.Sym) (int64
if numFuncData > 0 { // Func data is aligned.
size = Rnd(size, int64(ctxt.Arch.PtrSize))
}
size += int64(numFuncData * ctxt.Arch.PtrSize)
size += int64(numFuncData * 4)
}
}
@ -715,49 +706,12 @@ func writePCToFunc(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, sta
setAddr(sb, ctxt.Arch, int64(funcIndex)*2*int64(ctxt.Arch.PtrSize), prevFunc, ldr.SymSize(prevFunc))
}
// writeFuncData writes the funcdata tables.
//
// This function executes a callback for each funcdata needed in
// runtime.functab. It should be called once for internally linked static
// binaries, or twice (once to generate the needed relocations) for other
// build modes.
//
// Note the output of this function is interwoven with writeFuncs, but this is
// a separate function, because it's needed in different passes in
// generateFunctab.
func (state *pclntab) writeFuncData(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, startLocations []uint32, setAddr pclnSetAddr, setUint pclnSetUint) {
ldr := ctxt.loader
funcdata := []loader.Sym{}
for i, s := range funcs {
fi := ldr.FuncInfo(s)
if !fi.Valid() {
continue
}
fi.Preload()
// funcdata, must be pointer-aligned and we're only int32-aligned.
// Missing funcdata will be 0 (nil pointer).
funcdata = funcData(ldr, s, fi, inlSyms[s], funcdata)
if len(funcdata) > 0 {
off := int64(startLocations[i] + funcSize + numPCData(ldr, s, fi)*4)
off = Rnd(off, int64(ctxt.Arch.PtrSize))
for j := range funcdata {
dataoff := off + int64(ctxt.Arch.PtrSize*j)
if funcdata[j] == 0 {
setUint(sb, ctxt.Arch, dataoff, 0)
continue
}
// TODO: Does this need deduping?
setAddr(sb, ctxt.Arch, dataoff, funcdata[j], 0)
}
}
}
}
// writeFuncs writes the func structures and pcdata to runtime.functab.
func writeFuncs(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, startLocations, cuOffsets []uint32, nameOffsets map[loader.Sym]uint32) {
ldr := ctxt.loader
deferReturnSym := ldr.Lookup("runtime.deferreturn", abiInternalVer)
gofunc := ldr.Lookup("go.func.*", 0)
gofuncrel := ldr.Lookup("go.funcrel.*", 0)
textStart := ldr.SymValue(ldr.Lookup("runtime.text", 0))
funcdata := []loader.Sym{}
var pcsp, pcfile, pcline, pcinline loader.Sym
@ -844,6 +798,40 @@ func writeFuncs(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSym
sb.SetUint32(ctxt.Arch, int64(off+objabi.PCDATA_InlTreeIndex*4), uint32(ldr.SymValue(pcinline)))
}
}
// Write funcdata refs as offsets from go.func.* and go.funcrel.*.
funcdata = funcData(ldr, s, fi, inlSyms[s], funcdata)
// funcdata must be pointer-aligned and we're only int32-aligned.
// Missing funcdata will be ^0. See runtime/symtab.go:funcdata.
off = uint32(startLocations[i] + funcSize + numPCData(ldr, s, fi)*4)
off = uint32(Rnd(int64(off), int64(ctxt.Arch.PtrSize)))
for j := range funcdata {
dataoff := off + uint32(4*j)
fdsym := funcdata[j]
if fdsym == 0 {
sb.SetUint32(ctxt.Arch, int64(dataoff), ^uint32(0)) // ^0 is a sentinel for "no value"
continue
}
outer := ldr.OuterSym(fdsym)
if outer == 0 {
panic(fmt.Sprintf("no carrier sym for symbol %s (funcdata %s#%d)", ldr.SymName(fdsym), ldr.SymName(s), j))
}
rel := uint32(ldr.SymValue(fdsym) - ldr.SymValue(outer))
// Record gofunc vs gofuncrel in bottom bit. See runtime/symtab.go:funcdata.
// TODO: The only symbols that in gofuncrel are .stkobj symbols.
// Remove those relocations, and simplify this.
rel <<= 1
switch outer {
case gofunc:
case gofuncrel:
rel |= 1
default:
panic(fmt.Sprintf("expected symbol %s (funcdata %s#%d) to be placed in go.func.* or go.funcrel.*, got %s (%d)",
ldr.SymName(fdsym), ldr.SymName(s), j, ldr.SymName(outer), outer))
}
sb.SetUint32(ctxt.Arch, int64(dataoff), rel)
}
}
}

View File

@ -676,6 +676,12 @@ func (ctxt *Link) symtab(pcln *pclntab) []sym.SymKind {
moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.gcbss", 0))
moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.types", 0))
moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.etypes", 0))
moduledata.AddAddr(ctxt.Arch, ldr.Lookup("go.func.*", 0))
if gofuncrel := ldr.Lookup("go.funcrel.*", 0); gofuncrel != 0 {
moduledata.AddAddr(ctxt.Arch, gofuncrel)
} else {
moduledata.AddUint(ctxt.Arch, 0)
}
if ctxt.IsAIX() && ctxt.IsExternal() {
// Add R_XCOFFREF relocation to prevent ld's garbage collection of

View File

@ -427,6 +427,7 @@ type moduledata struct {
noptrbss, enoptrbss uintptr
end, gcdata, gcbss uintptr
types, etypes uintptr
gofunc, gofuncrel uintptr // go.func.*, go.funcrel.*
textsectmap []textsect
typelinks []int32 // offsets from types
@ -1073,6 +1074,8 @@ func pcdatavalue2(f funcInfo, table uint32, targetpc uintptr) (int32, uintptr) {
return pcvalue(f, pcdatastart(f, table), targetpc, nil, true)
}
// funcdata returns a pointer to the ith funcdata for f.
// funcdata should be kept in sync with cmd/link:writeFuncs.
func funcdata(f funcInfo, i uint8) unsafe.Pointer {
if i < 0 || i >= f.nfuncdata {
return nil
@ -1084,7 +1087,16 @@ func funcdata(f funcInfo, i uint8) unsafe.Pointer {
}
p = add(p, 4)
}
return *(*unsafe.Pointer)(add(p, uintptr(i)*goarch.PtrSize))
p = add(p, uintptr(i)*4)
off := *(*uint32)(p)
if off == ^uint32(0) {
return nil
}
base := f.datap.gofunc
if off&1 != 0 {
base = f.datap.gofuncrel
}
return unsafe.Pointer(base + uintptr(off>>1))
}
// step advances to the next pc, value pair in the encoded table.