diff --git a/src/cmd/compile/internal/gc/bexport.go b/src/cmd/compile/internal/gc/bexport.go index e5fa3c39a6..eee71291be 100644 --- a/src/cmd/compile/internal/gc/bexport.go +++ b/src/cmd/compile/internal/gc/bexport.go @@ -36,25 +36,21 @@ If the field is a pointer to another object, that object is serialized, recursively. Otherwise the field is written. Non-pointer fields are all encoded as integer or string values. -Only packages and types may be referred to more than once. When getting -to a package or type that was not serialized before, an integer _index_ +Some objects (packages, types) may be referred to more than once. When +reaching an object that was not serialized before, an integer _index_ is assigned to it, starting at 0. In this case, the encoding starts with an integer _tag_ < 0. The tag value indicates the kind of object -(package or type) that follows and that this is the first time that we -see this object. If the package or tag was already serialized, the encoding -starts with the respective package or type index >= 0. An importer can -trivially determine if a package or type needs to be read in for the first -time (tag < 0) and entered into the respective package or type table, or -if the package or type was seen already (index >= 0), in which case the -index is used to look up the object in a table. +that follows and that this is the first time that we see this object. +If the object was already serialized, the encoding is simply the object +index >= 0. An importer can trivially determine if an object needs to +be read in for the first time (tag < 0) and entered into the respective +object table, or if the object was seen already (index >= 0), in which +case the index is used to look up the object in a table. Before exporting or importing, the type tables are populated with the predeclared types (int, string, error, unsafe.Pointer, etc.). This way they are automatically encoded with a known and fixed type index. -TODO(gri) We may consider using the same sharing for other items -that are written out, such as strings, or possibly symbols (*Sym). - Encoding format: The export data starts with a single byte indicating the encoding format @@ -73,11 +69,17 @@ the previously imported type pointer so that we have exactly one version (i.e., one pointer) for each named type (and read but discard the current type encoding). Unnamed types simply encode their respective fields. -In the encoding, some lists start with the list length (incl. strings). -Some lists are terminated with an end marker (usually for lists where -we may not know the length a priori). +In the encoding, some lists start with the list length. Some lists are +terminated with an end marker (usually for lists where we may not know +the length a priori). -All integer values use variable-length encoding for compact representation. +Integers use variable-length encoding for compact representation. + +Strings are canonicalized similar to objects that may occur multiple times: +If the string was exported already, it is represented by its index only. +Otherwise, the export data starts with the negative string length (negative, +so we can distinguish from string index), followed by the string bytes. +The empty string is mapped to index 0. The exporter and importer are completely symmetric in implementation: For each encoding routine there is a matching and symmetric decoding routine. @@ -125,9 +127,15 @@ const exportInlined = true // default: true type exporter struct { out *bufio.Writer - pkgIndex map[*Pkg]int // pkg -> pkg index in order of appearance - typIndex map[*Type]int // type -> type index in order of appearance - funcList []*Func // in order of appearance + // object -> index maps, indexed in order of serialization + strIndex map[string]int + pkgIndex map[*Pkg]int + typIndex map[*Type]int + funcList []*Func + + // position encoding + prevFile string + prevLine int // debugging support written int // bytes written @@ -139,6 +147,7 @@ type exporter struct { func export(out *bufio.Writer, trace bool) int { p := exporter{ out: out, + strIndex: map[string]int{"": 0}, // empty string is mapped to 0 pkgIndex: make(map[*Pkg]int), typIndex: make(map[*Type]int), trace: trace, @@ -149,7 +158,7 @@ func export(out *bufio.Writer, trace bool) int { if debugFormat { format = 'd' } - p.byte(format) + p.rawByte(format) // --- generic export data --- @@ -419,6 +428,7 @@ func (p *exporter) obj(sym *Sym) { } p.tag(constTag) + p.pos(n) // TODO(gri) In inlined functions, constants are used directly // so they should never occur as re-exported objects. We may // not need the qualified name here. See also comment above. @@ -447,6 +457,7 @@ func (p *exporter) obj(sym *Sym) { if n.Type.Etype == TFUNC && n.Class == PFUNC { // function p.tag(funcTag) + p.pos(n) p.qualifiedName(sym) sig := sym.Def.Type @@ -471,6 +482,7 @@ func (p *exporter) obj(sym *Sym) { } else { // variable p.tag(varTag) + p.pos(n) p.qualifiedName(sym) p.typ(sym.Def.Type) } @@ -480,6 +492,26 @@ func (p *exporter) obj(sym *Sym) { } } +func (p *exporter) pos(n *Node) { + var file string + var line int + if n != nil { + file, line = Ctxt.LineHist.FileLine(int(n.Lineno)) + } + + if file == p.prevFile && line != p.prevLine { + // common case: write delta-encoded line number + p.int(line - p.prevLine) // != 0 + } else { + // uncommon case: filename changed, or line didn't change + p.int(0) + p.string(file) + p.int(line) + p.prevFile = file + } + p.prevLine = line +} + func isInlineable(n *Node) bool { if exportInlined && n != nil && n.Func != nil && len(n.Func.Inl.Slice()) != 0 { // when lazily typechecking inlined bodies, some re-exported ones may not have been typechecked yet. @@ -525,13 +557,17 @@ func (p *exporter) typ(t *Type) { if t.Orig == t { Fatalf("exporter: predeclared type missing from type map?") } - // TODO(gri) The assertion below seems incorrect (crashes during all.bash). - // we expect the respective definition to point to us + + // TODO(gri) The assertion below is incorrect (crashes during all.bash), + // likely because of symbol shadowing (we expect the respective definition + // to point to us). Determine the correct Def so we get correct position + // info. // if tsym.Def.Type != t { // Fatalf("exporter: type definition doesn't point to us?") // } p.tag(namedTag) + p.pos(tsym.Def) // TODO(gri) this may not be the correct node - fix and add tests p.qualifiedName(tsym) // write underlying type @@ -564,6 +600,7 @@ func (p *exporter) typ(t *Type) { Fatalf("invalid symbol name: %s (%v)", m.Sym.Name, m.Sym) } + p.pos(m.Sym.Def) p.fieldSym(m.Sym, false) sig := m.Type @@ -668,8 +705,12 @@ func (p *exporter) fieldList(t *Type) { } func (p *exporter) field(f *Field) { + p.pos(f.Sym.Def) p.fieldName(f.Sym, f) p.typ(f.Type) + // TODO(gri) Do we care that a non-present tag cannot be distinguished + // from a present but empty ta string? (reflect doesn't seem to make + // a difference). Investigate. p.note(f.Note) } @@ -697,6 +738,7 @@ func (p *exporter) methodList(t *Type) { } func (p *exporter) method(m *Field) { + p.pos(m.Sym.Def) p.fieldName(m.Sym, m) p.paramList(m.Type.Params(), false) p.paramList(m.Type.Results(), false) @@ -793,9 +835,6 @@ func (p *exporter) param(q *Field, n int, numbered bool) { // TODO(gri) This is compiler-specific (escape info). // Move into compiler-specific section eventually? // (Not having escape info causes tests to fail, e.g. runtime GCInfoTest) - // - // TODO(gri) The q.Note is much more verbose that necessary and - // adds significantly to export data size. FIX THIS. p.note(q.Note) } @@ -1497,9 +1536,17 @@ func (p *exporter) string(s string) { if p.trace { p.tracef("%q ", s) } - p.rawInt64(int64(len(s))) + // if we saw the string before, write its index (>= 0) + // (the empty string is mapped to 0) + if i, ok := p.strIndex[s]; ok { + p.rawInt64(int64(i)) + return + } + // otherwise, remember string and write its negative length and bytes + p.strIndex[s] = len(p.strIndex) + p.rawInt64(-int64(len(s))) for i := 0; i < len(s); i++ { - p.byte(s[i]) + p.rawByte(s[i]) } } @@ -1507,7 +1554,7 @@ func (p *exporter) string(s string) { // it easy for a reader to detect if it is "out of sync". Used only // if debugFormat is set. func (p *exporter) marker(m byte) { - p.byte(m) + p.rawByte(m) // Uncomment this for help tracking down the location // of an incorrect marker when running in debugFormat. // if p.trace { @@ -1521,12 +1568,12 @@ func (p *exporter) rawInt64(x int64) { var tmp [binary.MaxVarintLen64]byte n := binary.PutVarint(tmp[:], x) for i := 0; i < n; i++ { - p.byte(tmp[i]) + p.rawByte(tmp[i]) } } -// byte is the bottleneck interface to write to p.out. -// byte escapes b as follows (any encoding does that +// rawByte is the bottleneck interface to write to p.out. +// rawByte escapes b as follows (any encoding does that // hides '$'): // // '$' => '|' 'S' @@ -1534,7 +1581,8 @@ func (p *exporter) rawInt64(x int64) { // // Necessary so other tools can find the end of the // export data by searching for "$$". -func (p *exporter) byte(b byte) { +// rawByte should only be used by low-level encoders. +func (p *exporter) rawByte(b byte) { switch b { case '$': // write '$' as '|' 'S' diff --git a/src/cmd/compile/internal/gc/bimport.go b/src/cmd/compile/internal/gc/bimport.go index 223cc443aa..6654345ead 100644 --- a/src/cmd/compile/internal/gc/bimport.go +++ b/src/cmd/compile/internal/gc/bimport.go @@ -20,13 +20,18 @@ import ( // changes to bimport.go and bexport.go. type importer struct { - in *bufio.Reader - buf []byte // for reading strings - bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib + in *bufio.Reader + buf []byte // reused for reading strings - pkgList []*Pkg // in order of appearance - typList []*Type // in order of appearance - funcList []*Node // in order of appearance; nil entry means already declared + // object lists, in order of deserialization + strList []string + pkgList []*Pkg + typList []*Type + funcList []*Node // nil entry means already declared + + // position encoding + prevFile string + prevLine int // debugging support debugFormat bool @@ -35,11 +40,13 @@ type importer struct { // Import populates importpkg from the serialized package data. func Import(in *bufio.Reader) { - p := importer{in: in} - p.buf = p.bufarray[:] + p := importer{ + in: in, + strList: []string{""}, // empty string is mapped to 0 + } // read low-level encoding format - switch format := p.byte(); format { + switch format := p.rawByte(); format { case 'c': // compact format - nothing to do case 'd': @@ -221,6 +228,7 @@ func idealType(typ *Type) *Type { func (p *importer) obj(tag int) { switch tag { case constTag: + p.pos() sym := p.qualifiedName() typ := p.typ() val := p.value(typ) @@ -230,11 +238,13 @@ func (p *importer) obj(tag int) { p.typ() case varTag: + p.pos() sym := p.qualifiedName() typ := p.typ() importvar(sym, typ) case funcTag: + p.pos() sym := p.qualifiedName() params := p.paramList() result := p.paramList() @@ -268,6 +278,22 @@ func (p *importer) obj(tag int) { } } +func (p *importer) pos() { + file := p.prevFile + line := p.prevLine + + if delta := p.int(); delta != 0 { + line += delta + } else { + file = p.string() + line = p.int() + p.prevFile = file + } + p.prevLine = line + + // TODO(gri) register new position +} + func (p *importer) newtyp(etype EType) *Type { t := typ(etype) p.typList = append(p.typList, t) @@ -286,6 +312,7 @@ func (p *importer) typ() *Type { switch i { case namedTag: // parser.go:hidden_importsym + p.pos() tsym := p.qualifiedName() // parser.go:hidden_pkgtype @@ -311,6 +338,7 @@ func (p *importer) typ() *Type { for i := p.int(); i > 0; i-- { // parser.go:hidden_fndcl + p.pos() sym := p.fieldSym() recv := p.paramList() // TODO(gri) do we need a full param list for the receiver? @@ -409,20 +437,19 @@ func (p *importer) qualifiedName() *Sym { } // parser.go:hidden_structdcl_list -func (p *importer) fieldList() []*Node { - i := p.int() - if i == 0 { - return nil +func (p *importer) fieldList() (fields []*Node) { + if n := p.int(); n > 0 { + fields = make([]*Node, n) + for i := range fields { + fields[i] = p.field() + } } - n := make([]*Node, i) - for i := range n { - n[i] = p.field() - } - return n + return } // parser.go:hidden_structdcl func (p *importer) field() *Node { + p.pos() sym := p.fieldName() typ := p.typ() note := p.note() @@ -456,20 +483,19 @@ func (p *importer) note() (v Val) { } // parser.go:hidden_interfacedcl_list -func (p *importer) methodList() []*Node { - i := p.int() - if i == 0 { - return nil +func (p *importer) methodList() (methods []*Node) { + if n := p.int(); n > 0 { + methods = make([]*Node, n) + for i := range methods { + methods[i] = p.method() + } } - n := make([]*Node, i) - for i := range n { - n[i] = p.method() - } - return n + return } // parser.go:hidden_interfacedcl func (p *importer) method() *Node { + p.pos() sym := p.fieldName() params := p.paramList() result := p.paramList() @@ -1056,29 +1082,31 @@ func (p *importer) int64() int64 { } func (p *importer) string() string { - if p.debugFormat { + if debugFormat { p.marker('s') } - - // TODO(gri) should we intern strings here? - - if n := int(p.rawInt64()); n > 0 { - if cap(p.buf) < n { - p.buf = make([]byte, n) - } else { - p.buf = p.buf[:n] - } - for i := range p.buf { - p.buf[i] = p.byte() - } - return string(p.buf) + // if the string was seen before, i is its index (>= 0) + // (the empty string is at index 0) + i := p.rawInt64() + if i >= 0 { + return p.strList[i] } - - return "" + // otherwise, i is the negative string length (< 0) + if n := int(-i); n <= cap(p.buf) { + p.buf = p.buf[:n] + } else { + p.buf = make([]byte, n) + } + for i := range p.buf { + p.buf[i] = p.rawByte() + } + s := string(p.buf) + p.strList = append(p.strList, s) + return s } func (p *importer) marker(want byte) { - if got := p.byte(); got != want { + if got := p.rawByte(); got != want { Fatalf("importer: incorrect marker: got %c; want %c (pos = %d)", got, want, p.read) } @@ -1099,12 +1127,13 @@ func (p *importer) rawInt64() int64 { // needed for binary.ReadVarint in rawInt64 func (p *importer) ReadByte() (byte, error) { - return p.byte(), nil + return p.rawByte(), nil } -// byte is the bottleneck interface for reading from p.in. +// rawByte is the bottleneck interface for reading from p.in. // It unescapes '|' 'S' to '$' and '|' '|' to '|'. -func (p *importer) byte() byte { +// rawByte should only be used by low-level decoders. +func (p *importer) rawByte() byte { c, err := p.in.ReadByte() p.read++ if err != nil { diff --git a/src/go/internal/gcimporter/bimport.go b/src/go/internal/gcimporter/bimport.go index 7a7bc871f4..d75e533e97 100644 --- a/src/go/internal/gcimporter/bimport.go +++ b/src/go/internal/gcimporter/bimport.go @@ -19,13 +19,18 @@ type importer struct { imports map[string]*types.Package data []byte path string + buf []byte // for reading strings - buf []byte // for reading strings - bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib + // object lists + strList []string // in order of appearance + pkgList []*types.Package // in order of appearance + typList []types.Type // in order of appearance - pkgList []*types.Package - typList []types.Type + // position encoding + prevFile string + prevLine int + // debugging support debugFormat bool read int // bytes read } @@ -39,11 +44,11 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i imports: imports, data: data, path: path, + strList: []string{""}, // empty string is mapped to 0 } - p.buf = p.bufarray[:] // read low-level encoding format - switch format := p.byte(); format { + switch format := p.rawByte(); format { case 'c': // compact format - nothing to do case 'd': @@ -160,6 +165,7 @@ func (p *importer) declare(obj types.Object) { func (p *importer) obj(tag int) { switch tag { case constTag: + p.pos() pkg, name := p.qualifiedName() typ := p.typ(nil) val := p.value() @@ -169,11 +175,13 @@ func (p *importer) obj(tag int) { _ = p.typ(nil) case varTag: + p.pos() pkg, name := p.qualifiedName() typ := p.typ(nil) p.declare(types.NewVar(token.NoPos, pkg, name, typ)) case funcTag: + p.pos() pkg, name := p.qualifiedName() params, isddd := p.paramList() result, _ := p.paramList() @@ -185,6 +193,22 @@ func (p *importer) obj(tag int) { } } +func (p *importer) pos() { + file := p.prevFile + line := p.prevLine + + if delta := p.int(); delta != 0 { + line += delta + } else { + file = p.string() + line = p.int() + p.prevFile = file + } + p.prevLine = line + + // TODO(gri) register new position +} + func (p *importer) qualifiedName() (pkg *types.Package, name string) { name = p.string() pkg = p.pkg() @@ -220,6 +244,7 @@ func (p *importer) typ(parent *types.Package) types.Type { switch i { case namedTag: // read type object + p.pos() parent, name := p.qualifiedName() scope := parent.Scope() obj := scope.Lookup(name) @@ -252,6 +277,7 @@ func (p *importer) typ(parent *types.Package) types.Type { // read associated methods for i := p.int(); i > 0; i-- { // TODO(gri) replace this with something closer to fieldName + p.pos() name := p.string() if !exported(name) { p.pkg() @@ -293,14 +319,7 @@ func (p *importer) typ(parent *types.Package) types.Type { t := new(types.Struct) p.record(t) - n := p.int() - fields := make([]*types.Var, n) - tags := make([]string, n) - for i := range fields { - fields[i] = p.field(parent) - tags[i] = p.string() - } - *t = *types.NewStruct(fields, tags) + *t = *types.NewStruct(p.fieldList(parent)) return t case pointerTag: @@ -332,17 +351,7 @@ func (p *importer) typ(parent *types.Package) types.Type { panic("unexpected embedded interface") } - // read methods - methods := make([]*types.Func, p.int()) - for i := range methods { - pkg, name := p.fieldName(parent) - params, isddd := p.paramList() - result, _ := p.paramList() - sig := types.NewSignature(nil, params, result, isddd) - methods[i] = types.NewFunc(token.NoPos, pkg, name, sig) - } - - t := types.NewInterface(methods, nil) + t := types.NewInterface(p.methodList(parent), nil) p.typList[n] = t return t @@ -380,7 +389,20 @@ func (p *importer) typ(parent *types.Package) types.Type { } } +func (p *importer) fieldList(parent *types.Package) (fields []*types.Var, tags []string) { + if n := p.int(); n > 0 { + fields = make([]*types.Var, n) + tags = make([]string, n) + for i := range fields { + fields[i] = p.field(parent) + tags[i] = p.string() + } + } + return +} + func (p *importer) field(parent *types.Package) *types.Var { + p.pos() pkg, name := p.fieldName(parent) typ := p.typ(parent) @@ -402,6 +424,25 @@ func (p *importer) field(parent *types.Package) *types.Var { return types.NewField(token.NoPos, pkg, name, typ, anonymous) } +func (p *importer) methodList(parent *types.Package) (methods []*types.Func) { + if n := p.int(); n > 0 { + methods = make([]*types.Func, n) + for i := range methods { + methods[i] = p.method(parent) + } + } + return +} + +func (p *importer) method(parent *types.Package) *types.Func { + p.pos() + pkg, name := p.fieldName(parent) + params, isddd := p.paramList() + result, _ := p.paramList() + sig := types.NewSignature(nil, params, result, isddd) + return types.NewFunc(token.NoPos, pkg, name, sig) +} + func (p *importer) fieldName(parent *types.Package) (*types.Package, string) { pkg := parent if pkg == nil { @@ -567,24 +608,28 @@ func (p *importer) string() string { if p.debugFormat { p.marker('s') } - - if n := int(p.rawInt64()); n > 0 { - if cap(p.buf) < n { - p.buf = make([]byte, n) - } else { - p.buf = p.buf[:n] - } - for i := 0; i < n; i++ { - p.buf[i] = p.byte() - } - return string(p.buf) + // if the string was seen before, i is its index (>= 0) + // (the empty string is at index 0) + i := p.rawInt64() + if i >= 0 { + return p.strList[i] } - - return "" + // otherwise, i is the negative string length (< 0) + if n := int(-i); n <= cap(p.buf) { + p.buf = p.buf[:n] + } else { + p.buf = make([]byte, n) + } + for i := range p.buf { + p.buf[i] = p.rawByte() + } + s := string(p.buf) + p.strList = append(p.strList, s) + return s } func (p *importer) marker(want byte) { - if got := p.byte(); got != want { + if got := p.rawByte(); got != want { panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read)) } @@ -605,12 +650,13 @@ func (p *importer) rawInt64() int64 { // needed for binary.ReadVarint in rawInt64 func (p *importer) ReadByte() (byte, error) { - return p.byte(), nil + return p.rawByte(), nil } // byte is the bottleneck interface for reading p.data. // It unescapes '|' 'S' to '$' and '|' '|' to '|'. -func (p *importer) byte() byte { +// rawByte should only be used by low-level decoders. +func (p *importer) rawByte() byte { b := p.data[0] r := 1 if b == '|' {