cmd/compile: walk the progs to generate debug_lines

Rather than use the pcln tables, walk progs while generating
debug_lines.  This code slightly increases the number of is_stmt toggles
in the debug information due to a previous bug in how the pcline walking
worked. (Previous versions of the line walking code wouldn't return the
old_value, instead returning 0. This behavior might lose is_stmt toggles
in the line table.)

We suspected there would be a speedup with this change, but benchmarking
hasn't shown this to be true (but has been noisy enough to not really
show any large differences either way). These benchmarks are comparing
non-prog walking code with this prog-walking code:

name                      old time/op       new time/op       delta
Template                        321ms ± 4%        316ms ± 3%     ~     (p=0.165 n=10+10)
Unicode                         146ms ± 5%        142ms ± 4%     ~     (p=0.063 n=10+10)
GoTypes                         1.06s ± 2%        1.07s ± 2%     ~     (p=0.280 n=10+10)
Compiler                        4.07s ± 1%        4.06s ± 1%     ~     (p=0.549 n=10+9)
SSA                             12.6s ± 2%        12.7s ± 2%   +1.27%  (p=0.019 n=10+10)
Flate                           201ms ± 7%        202ms ± 4%     ~     (p=0.436 n=10+10)
GoParser                        248ms ± 4%        250ms ± 2%     ~     (p=0.356 n=9+10)
Reflect                         679ms ± 5%        678ms ± 4%     ~     (p=0.971 n=10+10)
Tar                             281ms ± 2%        283ms ± 3%     ~     (p=0.222 n=9+9)
XML                             381ms ± 3%        384ms ± 5%     ~     (p=0.393 n=10+10)
LinkCompiler                    1.08s ± 2%        1.10s ± 2%   +1.89%  (p=0.009 n=10+10)
ExternalLinkCompiler            2.23s ± 4%        2.23s ± 1%     ~     (p=1.000 n=10+8)
LinkWithoutDebugCompiler        654ms ± 4%        673ms ± 4%   +2.94%  (p=0.019 n=10+10)
StdCmd                          13.6s ± 2%        13.9s ± 1%   +2.00%  (p=0.000 n=10+10)

name                      old user-time/op  new user-time/op  delta
Template                        582ms ±11%        575ms ±14%     ~     (p=0.631 n=10+10)
Unicode                         431ms ±24%        390ms ±38%     ~     (p=0.315 n=10+10)
GoTypes                         2.47s ±11%        2.51s ± 4%     ~     (p=0.280 n=10+10)
Compiler                        9.09s ± 3%        9.04s ± 5%     ~     (p=0.684 n=10+10)
SSA                             25.8s ± 4%        26.0s ± 3%     ~     (p=0.529 n=10+10)
Flate                           318ms ±14%        322ms ±13%     ~     (p=0.912 n=10+10)
GoParser                        386ms ± 6%        386ms ± 5%     ~     (p=0.888 n=9+8)
Reflect                         1.42s ±20%        1.32s ±24%     ~     (p=0.393 n=10+10)
Tar                             476ms ±19%        471ms ±25%     ~     (p=1.000 n=10+10)
XML                             681ms ±25%        745ms ±21%     ~     (p=0.143 n=10+10)
LinkCompiler                    1.75s ±13%        1.86s ±12%     ~     (p=0.075 n=10+10)
ExternalLinkCompiler            2.98s ±18%        3.41s ±13%  +14.48%  (p=0.003 n=10+10)
LinkWithoutDebugCompiler        1.05s ±12%        1.08s ±16%     ~     (p=0.739 n=10+10)

name                      old alloc/op      new alloc/op      delta
Template                       36.4MB ± 0%       36.4MB ± 0%   -0.11%  (p=0.000 n=10+10)
Unicode                        28.6MB ± 0%       28.5MB ± 0%   -0.06%  (p=0.029 n=10+10)
GoTypes                         121MB ± 0%        121MB ± 0%   -0.09%  (p=0.000 n=9+9)
Compiler                        548MB ± 0%        547MB ± 0%   -0.10%  (p=0.000 n=10+10)
SSA                            1.87GB ± 0%       1.87GB ± 0%   -0.13%  (p=0.000 n=10+10)
Flate                          23.0MB ± 0%       22.9MB ± 0%   -0.09%  (p=0.000 n=9+10)
GoParser                       27.9MB ± 0%       27.8MB ± 0%   -0.12%  (p=0.000 n=10+10)
Reflect                        77.7MB ± 0%       77.6MB ± 0%   -0.12%  (p=0.000 n=8+10)
Tar                            34.5MB ± 0%       34.5MB ± 0%   -0.07%  (p=0.003 n=10+10)
XML                            44.4MB ± 0%       44.4MB ± 0%   -0.07%  (p=0.000 n=10+10)
LinkCompiler                    236MB ± 0%        240MB ± 0%   +1.72%  (p=0.000 n=10+10)
ExternalLinkCompiler            246MB ± 0%        254MB ± 0%   +3.02%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler        159MB ± 0%        164MB ± 0%   +3.35%  (p=0.000 n=10+10)

name                      old allocs/op     new allocs/op     delta
Template                         372k ± 0%         371k ± 0%   -0.23%  (p=0.000 n=10+10)
Unicode                          340k ± 0%         340k ± 0%   -0.05%  (p=0.000 n=10+10)
GoTypes                         1.33M ± 0%        1.32M ± 0%   -0.20%  (p=0.000 n=9+10)
Compiler                        5.37M ± 0%        5.36M ± 0%   -0.17%  (p=0.000 n=10+10)
SSA                             17.9M ± 0%        17.9M ± 0%   -0.15%  (p=0.000 n=10+10)
Flate                            234k ± 0%         233k ± 0%   -0.24%  (p=0.000 n=9+10)
GoParser                         309k ± 0%         309k ± 0%   -0.21%  (p=0.000 n=10+10)
Reflect                          969k ± 0%         966k ± 0%   -0.30%  (p=0.000 n=9+10)
Tar                              348k ± 0%         347k ± 0%   -0.22%  (p=0.000 n=10+9)
XML                              426k ± 0%         425k ± 0%   -0.15%  (p=0.000 n=9+10)
LinkCompiler                     638k ± 0%         637k ± 0%   -0.07%  (p=0.000 n=10+10)
ExternalLinkCompiler            1.69M ± 0%        1.69M ± 0%   -0.05%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler         222k ± 0%         221k ± 0%   -0.03%  (p=0.007 n=10+9)

name                      old object-bytes  new object-bytes  delta
Template                        559kB ± 0%        560kB ± 0%   +0.23%  (p=0.000 n=10+10)
Unicode                         216kB ± 0%        216kB ± 0%   +0.01%  (p=0.000 n=10+10)
GoTypes                        2.03MB ± 0%       2.04MB ± 0%   +0.31%  (p=0.000 n=10+10)
Compiler                       8.07MB ± 0%       8.10MB ± 0%   +0.35%  (p=0.000 n=10+10)
SSA                            27.1MB ± 0%       27.3MB ± 0%   +0.72%  (p=0.000 n=10+10)
Flate                           343kB ± 0%        344kB ± 0%   +0.22%  (p=0.000 n=10+10)
GoParser                        441kB ± 0%        442kB ± 0%   +0.34%  (p=0.000 n=10+10)
Reflect                        1.36MB ± 0%       1.36MB ± 0%   +0.23%  (p=0.000 n=10+10)
Tar                             487kB ± 0%        488kB ± 0%   +0.21%  (p=0.000 n=10+10)
XML                             632kB ± 0%        634kB ± 0%   +0.35%  (p=0.000 n=10+10)

name                      old export-bytes  new export-bytes  delta
Template                       18.5kB ± 0%       18.5kB ± 0%     ~     (all equal)
Unicode                        7.92kB ± 0%       7.92kB ± 0%     ~     (all equal)
GoTypes                        35.0kB ± 0%       35.0kB ± 0%     ~     (all equal)
Compiler                        109kB ± 0%        109kB ± 0%   +0.00%  (p=0.000 n=10+10)
SSA                             137kB ± 0%        137kB ± 0%   +0.00%  (p=0.000 n=10+10)
Flate                          4.89kB ± 0%       4.89kB ± 0%     ~     (all equal)
GoParser                       8.49kB ± 0%       8.49kB ± 0%     ~     (all equal)
Reflect                        11.4kB ± 0%       11.4kB ± 0%     ~     (all equal)
Tar                            10.5kB ± 0%       10.5kB ± 0%     ~     (all equal)
XML                            16.7kB ± 0%       16.7kB ± 0%     ~     (all equal)

name                      old text-bytes    new text-bytes    delta
HelloSize                       760kB ± 0%        760kB ± 0%     ~     (all equal)
CmdGoSize                      10.8MB ± 0%       10.8MB ± 0%     ~     (all equal)

name                      old data-bytes    new data-bytes    delta
HelloSize                      10.7kB ± 0%       10.7kB ± 0%     ~     (all equal)
CmdGoSize                       312kB ± 0%        312kB ± 0%     ~     (all equal)

name                      old bss-bytes     new bss-bytes     delta
HelloSize                       122kB ± 0%        122kB ± 0%     ~     (all equal)
CmdGoSize                       146kB ± 0%        146kB ± 0%     ~     (all equal)

name                      old exe-bytes     new exe-bytes     delta
HelloSize                      1.13MB ± 0%       1.13MB ± 0%     ~     (all equal)
CmdGoSize                      15.0MB ± 0%       15.1MB ± 0%   +0.22%  (p=0.000 n=10+10)

Change-Id: If6e0982cd1398062a88e6c0c7513e141f9503531
Reviewed-on: https://go-review.googlesource.com/c/go/+/196661
Run-TryBot: Jeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Jeremy Faller 2019-09-20 14:02:24 -04:00
parent 64785bf96c
commit 9e6a84f2b1
2 changed files with 38 additions and 110 deletions

View File

@ -8,6 +8,7 @@ package obj
import (
"cmd/internal/dwarf"
"cmd/internal/src"
"fmt"
)
@ -31,95 +32,51 @@ const (
func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
dctxt := dwCtxt{ctxt}
// The Pcfile table is used to generate the debug_lines section, and the file
// indices for that data could differ from the files we write out for the
// debug_lines section. Here we generate a LUT between those two indices.
fileNums := make(map[int32]int64)
for i, filename := range s.Func.Pcln.File {
if symbolIndex := ctxt.PosTable.FileIndex(filename); symbolIndex >= 0 {
fileNums[int32(i)] = int64(symbolIndex) + 1
} else {
panic(fmt.Sprintf("First time we've seen filename: %q", filename))
}
}
// Set up the debug_lines state machine.
// NB: This state machine is reset to this state when we've finished
// generating the line table. See below.
// TODO: Once delve can support multiple DW_LNS_end_statements, we don't have
// to do this.
is_stmt := uint8(1)
stmt := true
line := int64(1)
pc := s.Func.Text.Pc
line := 1
file := 1
name := ""
prologue, wrotePrologue := false, false
// The linker will insert the DW_LNE_set_address once determined; therefore,
// it's omitted here.
// Walk the progs, generating the DWARF table.
for p := s.Func.Text; p != nil; p = p.Link {
prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd)
// If we're not at a real instruction, keep looping!
if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == pc) {
continue
}
newStmt := p.Pos.IsStmt() != src.PosNotStmt
newName, newLine := linkgetlineFromPos(ctxt, p.Pos)
// Generate the actual line information.
// We use the pcline and pcfile to generate this section, and it's suboptimal.
// Likely better would be to generate this dirrectly from the progs and not
// parse those tables.
// TODO: Generate from the progs if it's faster.
pcfile := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcline := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcstmt := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcfile.Init(s.Func.Pcln.Pcfile.P)
pcline.Init(s.Func.Pcln.Pcline.P)
var pctostmtData Pcdata
funcpctab(ctxt, &pctostmtData, s, "pctostmt", pctostmt, nil)
pcstmt.Init(pctostmtData.P)
var thispc uint32
for !pcfile.Done && !pcline.Done {
// Only changed if it advanced
if int32(file) != pcfile.Value {
// Output debug info.
wrote := false
if name != newName {
newFile := ctxt.PosTable.FileIndex(newName) + 1 // 1 indexing for the table.
dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
dwarf.Uleb128put(dctxt, lines, fileNums[pcfile.Value])
file = int(pcfile.Value)
dwarf.Uleb128put(dctxt, lines, int64(newFile))
name = newName
wrote = true
}
if prologue && !wrotePrologue {
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
wrotePrologue = true
wrote = true
}
if stmt != newStmt {
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
stmt = newStmt
wrote = true
}
// Only changed if it advanced
if is_stmt != uint8(pcstmt.Value) {
new_stmt := uint8(pcstmt.Value)
switch new_stmt &^ 1 {
case PrologueEnd:
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
case EpilogueBegin:
// TODO if there is a use for this, add it.
// Don't forget to increase OPCODE_BASE by 1 and add entry for standard_opcode_lengths[11]
panic("unsupported EpilogueBegin")
}
new_stmt &= 1
if is_stmt != new_stmt {
is_stmt = new_stmt
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
}
}
// putpcldelta makes a row in the DWARF matrix, always, even if line is unchanged.
putpclcdelta(ctxt, dctxt, lines, uint64(s.Func.Text.Pc+int64(thispc)-pc), int64(pcline.Value)-int64(line))
pc = s.Func.Text.Pc + int64(thispc)
line = int(pcline.Value)
// Take the minimum step forward for the three iterators
thispc = pcfile.NextPC
if pcline.NextPC < thispc {
thispc = pcline.NextPC
}
if !pcstmt.Done && pcstmt.NextPC < thispc {
thispc = pcstmt.NextPC
}
if pcfile.NextPC == thispc {
pcfile.Next()
}
if !pcstmt.Done && pcstmt.NextPC == thispc {
pcstmt.Next()
}
if pcline.NextPC == thispc {
pcline.Next()
if line != int64(newLine) || wrote {
pcdelta := (p.Pc - pc) / int64(ctxt.Arch.MinLC)
putpclcdelta(ctxt, dctxt, lines, uint64(pcdelta), int64(newLine)-line)
line, pc = int64(newLine), p.Pc
}
}
@ -129,16 +86,16 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
// file = 1
// line = 1
// column = 0
// is_stmt = set in header, we assume true
// stmt = set in header, we assume true
// basic_block = false
// Careful readers of the DWARF specification will note that we don't reset
// the address of the state machine -- but this will happen at the beginning
// of the NEXT block of opcodes. (See the SetAddress call above.)
// of the NEXT block of opcodes.
dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
dwarf.Uleb128put(dctxt, lines, 1)
dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line)
dwarf.Sleb128put(dctxt, lines, int64(1-line))
if is_stmt != 1 {
if !stmt {
dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt)
}
dctxt.AddUint8(lines, dwarf.DW_LNS_copy)

View File

@ -5,7 +5,6 @@
package obj
import (
"cmd/internal/src"
"encoding/binary"
"log"
)
@ -249,34 +248,6 @@ func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg in
return oldval + p.Spadj
}
// pctostmt returns either,
// if phase==0, then whether the current instruction is a step-target (Dwarf is_stmt)
// bit-or'd with whether the current statement is a prologue end or epilogue begin
// else (phase == 1), zero.
//
func pctostmt(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 {
if phase == 1 {
return 0 // Ignored; also different from initial value of -1, if that ever matters.
}
s := p.Pos.IsStmt()
l := p.Pos.Xlogue()
var is_stmt int32
// PrologueEnd, at least, is passed to the next instruction
switch l {
case src.PosPrologueEnd:
is_stmt = PrologueEnd
case src.PosEpilogueBegin:
is_stmt = EpilogueBegin
}
if s != src.PosNotStmt {
is_stmt |= 1 // either PosDefaultStmt from asm, or PosIsStmt from go
}
return is_stmt
}
// pctopcdata computes the pcdata value in effect at p.
// A PCDATA instruction sets the value in effect at future
// non-PCDATA instructions.