From 22d149443a6474462b18eb49cbc26bf9b21b87f2 Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 16 Jun 2022 16:30:48 -0400 Subject: [PATCH] internal/gcimporter: add support for reading unified IR export data This does not include writing export data in the unified IR format. Most of this change is an import of code from the Go implementation, with minor tweaks and gaskets added. This does not (yet) address the registry issue mentioned in golang/go#52163. Updates golang/go#52163. Change-Id: I98030e6c9ff35c6ff678b8a7ce9b653b18e65e17 Reviewed-on: https://go-review.googlesource.com/c/tools/+/412821 TryBot-Result: Gopher Robot Run-TryBot: David Chase Reviewed-by: Matthew Dempsky gopls-CI: kokoro --- go/gcexportdata/gcexportdata.go | 28 +- go/internal/gcimporter/gcimporter.go | 28 +- go/internal/gcimporter/gcimporter_test.go | 39 +- go/internal/gcimporter/unified_no.go | 10 + go/internal/gcimporter/unified_yes.go | 10 + go/internal/gcimporter/ureader_no.go | 19 + go/internal/gcimporter/ureader_yes.go | 612 ++++++++++++++++++++++ go/internal/pkgbits/codes.go | 77 +++ go/internal/pkgbits/decoder.go | 433 +++++++++++++++ go/internal/pkgbits/doc.go | 32 ++ go/internal/pkgbits/encoder.go | 379 ++++++++++++++ go/internal/pkgbits/flags.go | 9 + go/internal/pkgbits/frames_go1.go | 21 + go/internal/pkgbits/frames_go17.go | 28 + go/internal/pkgbits/reloc.go | 42 ++ go/internal/pkgbits/support.go | 17 + go/internal/pkgbits/sync.go | 113 ++++ go/internal/pkgbits/syncmarker_string.go | 89 ++++ 18 files changed, 1968 insertions(+), 18 deletions(-) create mode 100644 go/internal/gcimporter/unified_no.go create mode 100644 go/internal/gcimporter/unified_yes.go create mode 100644 go/internal/gcimporter/ureader_no.go create mode 100644 go/internal/gcimporter/ureader_yes.go create mode 100644 go/internal/pkgbits/codes.go create mode 100644 go/internal/pkgbits/decoder.go create mode 100644 go/internal/pkgbits/doc.go create mode 100644 go/internal/pkgbits/encoder.go create mode 100644 go/internal/pkgbits/flags.go create mode 100644 go/internal/pkgbits/frames_go1.go create mode 100644 go/internal/pkgbits/frames_go17.go create mode 100644 go/internal/pkgbits/reloc.go create mode 100644 go/internal/pkgbits/support.go create mode 100644 go/internal/pkgbits/sync.go create mode 100644 go/internal/pkgbits/syncmarker_string.go diff --git a/go/gcexportdata/gcexportdata.go b/go/gcexportdata/gcexportdata.go index ddc276cfbc..2ed25a7502 100644 --- a/go/gcexportdata/gcexportdata.go +++ b/go/gcexportdata/gcexportdata.go @@ -116,13 +116,29 @@ func Read(in io.Reader, fset *token.FileSet, imports map[string]*types.Package, // The indexed export format starts with an 'i'; the older // binary export format starts with a 'c', 'd', or 'v' // (from "version"). Select appropriate importer. - if len(data) > 0 && data[0] == 'i' { - _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path) - return pkg, err - } + if len(data) > 0 { + switch data[0] { + case 'i': + _, pkg, err := gcimporter.IImportData(fset, imports, data[1:], path) + return pkg, err - _, pkg, err := gcimporter.BImportData(fset, imports, data, path) - return pkg, err + case 'v', 'c', 'd': + _, pkg, err := gcimporter.BImportData(fset, imports, data, path) + return pkg, err + + case 'u': + _, pkg, err := gcimporter.UImportData(fset, imports, data[1:], path) + return pkg, err + + default: + l := len(data) + if l > 10 { + l = 10 + } + return nil, fmt.Errorf("unexpected export data with prefix %q for path %s", string(data[:l]), path) + } + } + return nil, fmt.Errorf("empty export data for %s", path) } // Write writes encoded type information for the specified package to out. diff --git a/go/internal/gcimporter/gcimporter.go b/go/internal/gcimporter/gcimporter.go index 493bfa03b0..e96c39600d 100644 --- a/go/internal/gcimporter/gcimporter.go +++ b/go/internal/gcimporter/gcimporter.go @@ -181,8 +181,9 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func defer rc.Close() var hdr string + var size int64 buf := bufio.NewReader(rc) - if hdr, _, err = FindExportData(buf); err != nil { + if hdr, size, err = FindExportData(buf); err != nil { return } @@ -210,10 +211,27 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func // The indexed export format starts with an 'i'; the older // binary export format starts with a 'c', 'd', or 'v' // (from "version"). Select appropriate importer. - if len(data) > 0 && data[0] == 'i' { - _, pkg, err = IImportData(fset, packages, data[1:], id) - } else { - _, pkg, err = BImportData(fset, packages, data, id) + if len(data) > 0 { + switch data[0] { + case 'i': + _, pkg, err := IImportData(fset, packages, data[1:], id) + return pkg, err + + case 'v', 'c', 'd': + _, pkg, err := BImportData(fset, packages, data, id) + return pkg, err + + case 'u': + _, pkg, err := UImportData(fset, packages, data[1:size], id) + return pkg, err + + default: + l := len(data) + if l > 10 { + l = 10 + } + return nil, fmt.Errorf("unexpected export data with prefix %q for path %s", string(data[:l]), id) + } } default: diff --git a/go/internal/gcimporter/gcimporter_test.go b/go/internal/gcimporter/gcimporter_test.go index 4e992af76b..66c09269d8 100644 --- a/go/internal/gcimporter/gcimporter_test.go +++ b/go/internal/gcimporter/gcimporter_test.go @@ -45,6 +45,10 @@ func needsCompiler(t *testing.T, compiler string) { // compile runs the compiler on filename, with dirname as the working directory, // and writes the output file to outdirname. func compile(t *testing.T, dirname, filename, outdirname string) string { + return compilePkg(t, dirname, filename, outdirname, "p") +} + +func compilePkg(t *testing.T, dirname, filename, outdirname, pkg string) string { testenv.NeedsGoBuild(t) // filename must end with ".go" @@ -53,12 +57,12 @@ func compile(t *testing.T, dirname, filename, outdirname string) string { } basename := filepath.Base(filename) outname := filepath.Join(outdirname, basename[:len(basename)-2]+"o") - cmd := exec.Command("go", "tool", "compile", "-p=p", "-o", outname, filename) + cmd := exec.Command("go", "tool", "compile", "-p="+pkg, "-o", outname, filename) cmd.Dir = dirname out, err := cmd.CombinedOutput() if err != nil { t.Logf("%s", out) - t.Fatalf("go tool compile %s failed: %s", filename, err) + t.Fatalf("(cd %v && %v) failed: %s", cmd.Dir, cmd, err) } return outname } @@ -140,7 +144,11 @@ func TestImportTestdata(t *testing.T) { // For now, we just test the presence of a few packages // that we know are there for sure. got := fmt.Sprint(pkg.Imports()) - for _, want := range []string{"go/ast", "go/token"} { + wants := []string{"go/ast", "go/token"} + if unifiedIR { + wants = []string{"go/ast"} + } + for _, want := range wants { if !strings.Contains(got, want) { t.Errorf(`Package("exports").Imports() = %s, does not contain %s`, got, want) } @@ -364,6 +372,14 @@ func verifyInterfaceMethodRecvs(t *testing.T, named *types.Named, level int) { return // not an interface } + // The unified IR importer always sets interface method receiver + // parameters to point to the Interface type, rather than the Named. + // See #49906. + var want types.Type = named + if unifiedIR { + want = iface + } + // check explicitly declared methods for i := 0; i < iface.NumExplicitMethods(); i++ { m := iface.ExplicitMethod(i) @@ -372,8 +388,8 @@ func verifyInterfaceMethodRecvs(t *testing.T, named *types.Named, level int) { t.Errorf("%s: missing receiver type", m) continue } - if recv.Type() != named { - t.Errorf("%s: got recv type %s; want %s", m, recv.Type(), named) + if recv.Type() != want { + t.Errorf("%s: got recv type %s; want %s", m, recv.Type(), want) } } @@ -451,7 +467,7 @@ func TestIssue13566(t *testing.T) { if err != nil { t.Fatal(err) } - compile(t, "testdata", "a.go", testoutdir) + compilePkg(t, "testdata", "a.go", testoutdir, apkg(testoutdir)) compile(t, testoutdir, bpath, testoutdir) // import must succeed (test for issue at hand) @@ -611,13 +627,22 @@ func TestIssue51836(t *testing.T) { if err != nil { t.Fatal(err) } - compile(t, dir, "a.go", testoutdir) + compilePkg(t, dir, "a.go", testoutdir, apkg(testoutdir)) compile(t, testoutdir, bpath, testoutdir) // import must succeed (test for issue at hand) _ = importPkg(t, "./testdata/aa", tmpdir) } +// apkg returns the package "a" prefixed by (as a package) testoutdir +func apkg(testoutdir string) string { + apkg := testoutdir + "/a" + if os.PathSeparator != '/' { + apkg = strings.ReplaceAll(apkg, string(os.PathSeparator), "/") + } + return apkg +} + func importPkg(t *testing.T, path, srcDir string) *types.Package { pkg, err := Import(make(map[string]*types.Package), path, srcDir, nil) if err != nil { diff --git a/go/internal/gcimporter/unified_no.go b/go/internal/gcimporter/unified_no.go new file mode 100644 index 0000000000..286bf44548 --- /dev/null +++ b/go/internal/gcimporter/unified_no.go @@ -0,0 +1,10 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !(go1.18 && goexperiment.unified) +// +build !go1.18 !goexperiment.unified + +package gcimporter + +const unifiedIR = false diff --git a/go/internal/gcimporter/unified_yes.go b/go/internal/gcimporter/unified_yes.go new file mode 100644 index 0000000000..b5d69ffbe6 --- /dev/null +++ b/go/internal/gcimporter/unified_yes.go @@ -0,0 +1,10 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 && goexperiment.unified +// +build go1.18,goexperiment.unified + +package gcimporter + +const unifiedIR = true diff --git a/go/internal/gcimporter/ureader_no.go b/go/internal/gcimporter/ureader_no.go new file mode 100644 index 0000000000..8eb20729c2 --- /dev/null +++ b/go/internal/gcimporter/ureader_no.go @@ -0,0 +1,19 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.18 +// +build !go1.18 + +package gcimporter + +import ( + "fmt" + "go/token" + "go/types" +) + +func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) { + err = fmt.Errorf("go/tools compiled with a Go version earlier than 1.18 cannot read unified IR export data") + return +} diff --git a/go/internal/gcimporter/ureader_yes.go b/go/internal/gcimporter/ureader_yes.go new file mode 100644 index 0000000000..3c1a437543 --- /dev/null +++ b/go/internal/gcimporter/ureader_yes.go @@ -0,0 +1,612 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Derived from go/internal/gcimporter/ureader.go + +//go:build go1.18 +// +build go1.18 + +package gcimporter + +import ( + "go/token" + "go/types" + "strings" + + "golang.org/x/tools/go/internal/pkgbits" +) + +// A pkgReader holds the shared state for reading a unified IR package +// description. +type pkgReader struct { + pkgbits.PkgDecoder + + fake fakeFileSet + + ctxt *types.Context + imports map[string]*types.Package // previously imported packages, indexed by path + + // lazily initialized arrays corresponding to the unified IR + // PosBase, Pkg, and Type sections, respectively. + posBases []string // position bases (i.e., file names) + pkgs []*types.Package + typs []types.Type + + // laterFns holds functions that need to be invoked at the end of + // import reading. + laterFns []func() +} + +// later adds a function to be invoked at the end of import reading. +func (pr *pkgReader) later(fn func()) { + pr.laterFns = append(pr.laterFns, fn) +} + +// See cmd/compile/internal/noder.derivedInfo. +type derivedInfo struct { + idx pkgbits.Index + needed bool +} + +// See cmd/compile/internal/noder.typeInfo. +type typeInfo struct { + idx pkgbits.Index + derived bool +} + +func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) { + s := string(data) + s = s[:strings.LastIndex(s, "\n$$\n")] + input := pkgbits.NewPkgDecoder(path, s) + pkg = readUnifiedPackage(fset, nil, imports, input) + return +} + +// readUnifiedPackage reads a package description from the given +// unified IR export data decoder. +func readUnifiedPackage(fset *token.FileSet, ctxt *types.Context, imports map[string]*types.Package, input pkgbits.PkgDecoder) *types.Package { + pr := pkgReader{ + PkgDecoder: input, + + fake: fakeFileSet{ + fset: fset, + files: make(map[string]*fileInfo), + }, + + ctxt: ctxt, + imports: imports, + + posBases: make([]string, input.NumElems(pkgbits.RelocPosBase)), + pkgs: make([]*types.Package, input.NumElems(pkgbits.RelocPkg)), + typs: make([]types.Type, input.NumElems(pkgbits.RelocType)), + } + defer pr.fake.setLines() + + r := pr.newReader(pkgbits.RelocMeta, pkgbits.PublicRootIdx, pkgbits.SyncPublic) + pkg := r.pkg() + r.Bool() // has init + + for i, n := 0, r.Len(); i < n; i++ { + // As if r.obj(), but avoiding the Scope.Lookup call, + // to avoid eager loading of imports. + r.Sync(pkgbits.SyncObject) + assert(!r.Bool()) + r.p.objIdx(r.Reloc(pkgbits.RelocObj)) + assert(r.Len() == 0) + } + + r.Sync(pkgbits.SyncEOF) + + for _, fn := range pr.laterFns { + fn() + } + + pkg.MarkComplete() + return pkg +} + +// A reader holds the state for reading a single unified IR element +// within a package. +type reader struct { + pkgbits.Decoder + + p *pkgReader + + dict *readerDict +} + +// A readerDict holds the state for type parameters that parameterize +// the current unified IR element. +type readerDict struct { + // bounds is a slice of typeInfos corresponding to the underlying + // bounds of the element's type parameters. + bounds []typeInfo + + // tparams is a slice of the constructed TypeParams for the element. + tparams []*types.TypeParam + + // devived is a slice of types derived from tparams, which may be + // instantiated while reading the current element. + derived []derivedInfo + derivedTypes []types.Type // lazily instantiated from derived +} + +func (pr *pkgReader) newReader(k pkgbits.RelocKind, idx pkgbits.Index, marker pkgbits.SyncMarker) *reader { + return &reader{ + Decoder: pr.NewDecoder(k, idx, marker), + p: pr, + } +} + +// @@@ Positions + +func (r *reader) pos() token.Pos { + r.Sync(pkgbits.SyncPos) + if !r.Bool() { + return token.NoPos + } + + // TODO(mdempsky): Delta encoding. + posBase := r.posBase() + line := r.Uint() + col := r.Uint() + return r.p.fake.pos(posBase, int(line), int(col)) +} + +func (r *reader) posBase() string { + return r.p.posBaseIdx(r.Reloc(pkgbits.RelocPosBase)) +} + +func (pr *pkgReader) posBaseIdx(idx pkgbits.Index) string { + if b := pr.posBases[idx]; b != "" { + return b + } + + r := pr.newReader(pkgbits.RelocPosBase, idx, pkgbits.SyncPosBase) + + // Within types2, position bases have a lot more details (e.g., + // keeping track of where //line directives appeared exactly). + // + // For go/types, we just track the file name. + + filename := r.String() + + if r.Bool() { // file base + // Was: "b = token.NewTrimmedFileBase(filename, true)" + } else { // line base + pos := r.pos() + line := r.Uint() + col := r.Uint() + + // Was: "b = token.NewLineBase(pos, filename, true, line, col)" + _, _, _ = pos, line, col + } + + b := filename + pr.posBases[idx] = b + return b +} + +// @@@ Packages + +func (r *reader) pkg() *types.Package { + r.Sync(pkgbits.SyncPkg) + return r.p.pkgIdx(r.Reloc(pkgbits.RelocPkg)) +} + +func (pr *pkgReader) pkgIdx(idx pkgbits.Index) *types.Package { + // TODO(mdempsky): Consider using some non-nil pointer to indicate + // the universe scope, so we don't need to keep re-reading it. + if pkg := pr.pkgs[idx]; pkg != nil { + return pkg + } + + pkg := pr.newReader(pkgbits.RelocPkg, idx, pkgbits.SyncPkgDef).doPkg() + pr.pkgs[idx] = pkg + return pkg +} + +func (r *reader) doPkg() *types.Package { + path := r.String() + switch path { + case "": + path = r.p.PkgPath() + case "builtin": + return nil // universe + case "unsafe": + return types.Unsafe + } + + if pkg := r.p.imports[path]; pkg != nil { + return pkg + } + + name := r.String() + + pkg := types.NewPackage(path, name) + r.p.imports[path] = pkg + + imports := make([]*types.Package, r.Len()) + for i := range imports { + imports[i] = r.pkg() + } + pkg.SetImports(imports) + + return pkg +} + +// @@@ Types + +func (r *reader) typ() types.Type { + return r.p.typIdx(r.typInfo(), r.dict) +} + +func (r *reader) typInfo() typeInfo { + r.Sync(pkgbits.SyncType) + if r.Bool() { + return typeInfo{idx: pkgbits.Index(r.Len()), derived: true} + } + return typeInfo{idx: r.Reloc(pkgbits.RelocType), derived: false} +} + +func (pr *pkgReader) typIdx(info typeInfo, dict *readerDict) types.Type { + idx := info.idx + var where *types.Type + if info.derived { + where = &dict.derivedTypes[idx] + idx = dict.derived[idx].idx + } else { + where = &pr.typs[idx] + } + + if typ := *where; typ != nil { + return typ + } + + r := pr.newReader(pkgbits.RelocType, idx, pkgbits.SyncTypeIdx) + r.dict = dict + + typ := r.doTyp() + assert(typ != nil) + + // See comment in pkgReader.typIdx explaining how this happens. + if prev := *where; prev != nil { + return prev + } + + *where = typ + return typ +} + +func (r *reader) doTyp() (res types.Type) { + switch tag := pkgbits.CodeType(r.Code(pkgbits.SyncType)); tag { + default: + errorf("unhandled type tag: %v", tag) + panic("unreachable") + + case pkgbits.TypeBasic: + return types.Typ[r.Len()] + + case pkgbits.TypeNamed: + obj, targs := r.obj() + name := obj.(*types.TypeName) + if len(targs) != 0 { + t, _ := types.Instantiate(r.p.ctxt, name.Type(), targs, false) + return t + } + return name.Type() + + case pkgbits.TypeTypeParam: + return r.dict.tparams[r.Len()] + + case pkgbits.TypeArray: + len := int64(r.Uint64()) + return types.NewArray(r.typ(), len) + case pkgbits.TypeChan: + dir := types.ChanDir(r.Len()) + return types.NewChan(dir, r.typ()) + case pkgbits.TypeMap: + return types.NewMap(r.typ(), r.typ()) + case pkgbits.TypePointer: + return types.NewPointer(r.typ()) + case pkgbits.TypeSignature: + return r.signature(nil, nil, nil) + case pkgbits.TypeSlice: + return types.NewSlice(r.typ()) + case pkgbits.TypeStruct: + return r.structType() + case pkgbits.TypeInterface: + return r.interfaceType() + case pkgbits.TypeUnion: + return r.unionType() + } +} + +func (r *reader) structType() *types.Struct { + fields := make([]*types.Var, r.Len()) + var tags []string + for i := range fields { + pos := r.pos() + pkg, name := r.selector() + ftyp := r.typ() + tag := r.String() + embedded := r.Bool() + + fields[i] = types.NewField(pos, pkg, name, ftyp, embedded) + if tag != "" { + for len(tags) < i { + tags = append(tags, "") + } + tags = append(tags, tag) + } + } + return types.NewStruct(fields, tags) +} + +func (r *reader) unionType() *types.Union { + terms := make([]*types.Term, r.Len()) + for i := range terms { + terms[i] = types.NewTerm(r.Bool(), r.typ()) + } + return types.NewUnion(terms) +} + +func (r *reader) interfaceType() *types.Interface { + methods := make([]*types.Func, r.Len()) + embeddeds := make([]types.Type, r.Len()) + implicit := len(methods) == 0 && len(embeddeds) == 1 && r.Bool() + + for i := range methods { + pos := r.pos() + pkg, name := r.selector() + mtyp := r.signature(nil, nil, nil) + methods[i] = types.NewFunc(pos, pkg, name, mtyp) + } + + for i := range embeddeds { + embeddeds[i] = r.typ() + } + + iface := types.NewInterfaceType(methods, embeddeds) + if implicit { + iface.MarkImplicit() + } + return iface +} + +func (r *reader) signature(recv *types.Var, rtparams, tparams []*types.TypeParam) *types.Signature { + r.Sync(pkgbits.SyncSignature) + + params := r.params() + results := r.params() + variadic := r.Bool() + + return types.NewSignatureType(recv, rtparams, tparams, params, results, variadic) +} + +func (r *reader) params() *types.Tuple { + r.Sync(pkgbits.SyncParams) + + params := make([]*types.Var, r.Len()) + for i := range params { + params[i] = r.param() + } + + return types.NewTuple(params...) +} + +func (r *reader) param() *types.Var { + r.Sync(pkgbits.SyncParam) + + pos := r.pos() + pkg, name := r.localIdent() + typ := r.typ() + + return types.NewParam(pos, pkg, name, typ) +} + +// @@@ Objects + +func (r *reader) obj() (types.Object, []types.Type) { + r.Sync(pkgbits.SyncObject) + + assert(!r.Bool()) + + pkg, name := r.p.objIdx(r.Reloc(pkgbits.RelocObj)) + obj := pkgScope(pkg).Lookup(name) + + targs := make([]types.Type, r.Len()) + for i := range targs { + targs[i] = r.typ() + } + + return obj, targs +} + +func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { + rname := pr.newReader(pkgbits.RelocName, idx, pkgbits.SyncObject1) + + objPkg, objName := rname.qualifiedIdent() + assert(objName != "") + + tag := pkgbits.CodeObj(rname.Code(pkgbits.SyncCodeObj)) + + if tag == pkgbits.ObjStub { + assert(objPkg == nil || objPkg == types.Unsafe) + return objPkg, objName + } + + if objPkg.Scope().Lookup(objName) == nil { + dict := pr.objDictIdx(idx) + + r := pr.newReader(pkgbits.RelocObj, idx, pkgbits.SyncObject1) + r.dict = dict + + declare := func(obj types.Object) { + objPkg.Scope().Insert(obj) + } + + switch tag { + default: + panic("weird") + + case pkgbits.ObjAlias: + pos := r.pos() + typ := r.typ() + declare(types.NewTypeName(pos, objPkg, objName, typ)) + + case pkgbits.ObjConst: + pos := r.pos() + typ := r.typ() + val := r.Value() + declare(types.NewConst(pos, objPkg, objName, typ, val)) + + case pkgbits.ObjFunc: + pos := r.pos() + tparams := r.typeParamNames() + sig := r.signature(nil, nil, tparams) + declare(types.NewFunc(pos, objPkg, objName, sig)) + + case pkgbits.ObjType: + pos := r.pos() + + obj := types.NewTypeName(pos, objPkg, objName, nil) + named := types.NewNamed(obj, nil, nil) + declare(obj) + + named.SetTypeParams(r.typeParamNames()) + + // TODO(mdempsky): Rewrite receiver types to underlying is an + // Interface? The go/types importer does this (I think because + // unit tests expected that), but cmd/compile doesn't care + // about it, so maybe we can avoid worrying about that here. + rhs := r.typ() + r.p.later(func() { + underlying := rhs.Underlying() + named.SetUnderlying(underlying) + }) + + for i, n := 0, r.Len(); i < n; i++ { + named.AddMethod(r.method()) + } + + case pkgbits.ObjVar: + pos := r.pos() + typ := r.typ() + declare(types.NewVar(pos, objPkg, objName, typ)) + } + } + + return objPkg, objName +} + +func (pr *pkgReader) objDictIdx(idx pkgbits.Index) *readerDict { + r := pr.newReader(pkgbits.RelocObjDict, idx, pkgbits.SyncObject1) + + var dict readerDict + + if implicits := r.Len(); implicits != 0 { + errorf("unexpected object with %v implicit type parameter(s)", implicits) + } + + dict.bounds = make([]typeInfo, r.Len()) + for i := range dict.bounds { + dict.bounds[i] = r.typInfo() + } + + dict.derived = make([]derivedInfo, r.Len()) + dict.derivedTypes = make([]types.Type, len(dict.derived)) + for i := range dict.derived { + dict.derived[i] = derivedInfo{r.Reloc(pkgbits.RelocType), r.Bool()} + } + + // function references follow, but reader doesn't need those + + return &dict +} + +func (r *reader) typeParamNames() []*types.TypeParam { + r.Sync(pkgbits.SyncTypeParamNames) + + // Note: This code assumes it only processes objects without + // implement type parameters. This is currently fine, because + // reader is only used to read in exported declarations, which are + // always package scoped. + + if len(r.dict.bounds) == 0 { + return nil + } + + // Careful: Type parameter lists may have cycles. To allow for this, + // we construct the type parameter list in two passes: first we + // create all the TypeNames and TypeParams, then we construct and + // set the bound type. + + r.dict.tparams = make([]*types.TypeParam, len(r.dict.bounds)) + for i := range r.dict.bounds { + pos := r.pos() + pkg, name := r.localIdent() + + tname := types.NewTypeName(pos, pkg, name, nil) + r.dict.tparams[i] = types.NewTypeParam(tname, nil) + } + + typs := make([]types.Type, len(r.dict.bounds)) + for i, bound := range r.dict.bounds { + typs[i] = r.p.typIdx(bound, r.dict) + } + + // TODO(mdempsky): This is subtle, elaborate further. + // + // We have to save tparams outside of the closure, because + // typeParamNames() can be called multiple times with the same + // dictionary instance. + // + // Also, this needs to happen later to make sure SetUnderlying has + // been called. + // + // TODO(mdempsky): Is it safe to have a single "later" slice or do + // we need to have multiple passes? See comments on CL 386002 and + // go.dev/issue/52104. + tparams := r.dict.tparams + r.p.later(func() { + for i, typ := range typs { + tparams[i].SetConstraint(typ) + } + }) + + return r.dict.tparams +} + +func (r *reader) method() *types.Func { + r.Sync(pkgbits.SyncMethod) + pos := r.pos() + pkg, name := r.selector() + + rparams := r.typeParamNames() + sig := r.signature(r.param(), rparams, nil) + + _ = r.pos() // TODO(mdempsky): Remove; this is a hacker for linker.go. + return types.NewFunc(pos, pkg, name, sig) +} + +func (r *reader) qualifiedIdent() (*types.Package, string) { return r.ident(pkgbits.SyncSym) } +func (r *reader) localIdent() (*types.Package, string) { return r.ident(pkgbits.SyncLocalIdent) } +func (r *reader) selector() (*types.Package, string) { return r.ident(pkgbits.SyncSelector) } + +func (r *reader) ident(marker pkgbits.SyncMarker) (*types.Package, string) { + r.Sync(marker) + return r.pkg(), r.String() +} + +// pkgScope returns pkg.Scope(). +// If pkg is nil, it returns types.Universe instead. +// +// TODO(mdempsky): Remove after x/tools can depend on Go 1.19. +func pkgScope(pkg *types.Package) *types.Scope { + if pkg != nil { + return pkg.Scope() + } + return types.Universe +} diff --git a/go/internal/pkgbits/codes.go b/go/internal/pkgbits/codes.go new file mode 100644 index 0000000000..f0cabde96e --- /dev/null +++ b/go/internal/pkgbits/codes.go @@ -0,0 +1,77 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A Code is an enum value that can be encoded into bitstreams. +// +// Code types are preferable for enum types, because they allow +// Decoder to detect desyncs. +type Code interface { + // Marker returns the SyncMarker for the Code's dynamic type. + Marker() SyncMarker + + // Value returns the Code's ordinal value. + Value() int +} + +// A CodeVal distinguishes among go/constant.Value encodings. +type CodeVal int + +func (c CodeVal) Marker() SyncMarker { return SyncVal } +func (c CodeVal) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ValBool CodeVal = iota + ValString + ValInt64 + ValBigInt + ValBigRat + ValBigFloat +) + +// A CodeType distinguishes among go/types.Type encodings. +type CodeType int + +func (c CodeType) Marker() SyncMarker { return SyncType } +func (c CodeType) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + TypeBasic CodeType = iota + TypeNamed + TypePointer + TypeSlice + TypeArray + TypeChan + TypeMap + TypeSignature + TypeStruct + TypeInterface + TypeUnion + TypeTypeParam +) + +// A CodeObj distinguishes among go/types.Object encodings. +type CodeObj int + +func (c CodeObj) Marker() SyncMarker { return SyncCodeObj } +func (c CodeObj) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ObjAlias CodeObj = iota + ObjConst + ObjType + ObjFunc + ObjVar + ObjStub +) diff --git a/go/internal/pkgbits/decoder.go b/go/internal/pkgbits/decoder.go new file mode 100644 index 0000000000..2bc793668e --- /dev/null +++ b/go/internal/pkgbits/decoder.go @@ -0,0 +1,433 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "encoding/binary" + "fmt" + "go/constant" + "go/token" + "math/big" + "os" + "runtime" + "strings" +) + +// A PkgDecoder provides methods for decoding a package's Unified IR +// export data. +type PkgDecoder struct { + // version is the file format version. + version uint32 + + // sync indicates whether the file uses sync markers. + sync bool + + // pkgPath is the package path for the package to be decoded. + // + // TODO(mdempsky): Remove; unneeded since CL 391014. + pkgPath string + + // elemData is the full data payload of the encoded package. + // Elements are densely and contiguously packed together. + // + // The last 8 bytes of elemData are the package fingerprint. + elemData string + + // elemEnds stores the byte-offset end positions of element + // bitstreams within elemData. + // + // For example, element I's bitstream data starts at elemEnds[I-1] + // (or 0, if I==0) and ends at elemEnds[I]. + // + // Note: elemEnds is indexed by absolute indices, not + // section-relative indices. + elemEnds []uint32 + + // elemEndsEnds stores the index-offset end positions of relocation + // sections within elemEnds. + // + // For example, section K's end positions start at elemEndsEnds[K-1] + // (or 0, if K==0) and end at elemEndsEnds[K]. + elemEndsEnds [numRelocs]uint32 +} + +// PkgPath returns the package path for the package +// +// TODO(mdempsky): Remove; unneeded since CL 391014. +func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } + +// SyncMarkers reports whether pr uses sync markers. +func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } + +// NewPkgDecoder returns a PkgDecoder initialized to read the Unified +// IR export data from input. pkgPath is the package path for the +// compilation unit that produced the export data. +// +// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. +func NewPkgDecoder(pkgPath, input string) PkgDecoder { + pr := PkgDecoder{ + pkgPath: pkgPath, + } + + // TODO(mdempsky): Implement direct indexing of input string to + // avoid copying the position information. + + r := strings.NewReader(input) + + assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) + + switch pr.version { + default: + panic(fmt.Errorf("unsupported version: %v", pr.version)) + case 0: + // no flags + case 1: + var flags uint32 + assert(binary.Read(r, binary.LittleEndian, &flags) == nil) + pr.sync = flags&flagSyncMarkers != 0 + } + + assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) + + pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) + assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) + + pos, err := r.Seek(0, os.SEEK_CUR) + assert(err == nil) + + pr.elemData = input[pos:] + assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) + + return pr +} + +// NumElems returns the number of elements in section k. +func (pr *PkgDecoder) NumElems(k RelocKind) int { + count := int(pr.elemEndsEnds[k]) + if k > 0 { + count -= int(pr.elemEndsEnds[k-1]) + } + return count +} + +// TotalElems returns the total number of elements across all sections. +func (pr *PkgDecoder) TotalElems() int { + return len(pr.elemEnds) +} + +// Fingerprint returns the package fingerprint. +func (pr *PkgDecoder) Fingerprint() [8]byte { + var fp [8]byte + copy(fp[:], pr.elemData[len(pr.elemData)-8:]) + return fp +} + +// AbsIdx returns the absolute index for the given (section, index) +// pair. +func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { + absIdx := int(idx) + if k > 0 { + absIdx += int(pr.elemEndsEnds[k-1]) + } + if absIdx >= int(pr.elemEndsEnds[k]) { + errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) + } + return absIdx +} + +// DataIdx returns the raw element bitstream for the given (section, +// index) pair. +func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { + absIdx := pr.AbsIdx(k, idx) + + var start uint32 + if absIdx > 0 { + start = pr.elemEnds[absIdx-1] + } + end := pr.elemEnds[absIdx] + + return pr.elemData[start:end] +} + +// StringIdx returns the string value for the given string index. +func (pr *PkgDecoder) StringIdx(idx Index) string { + return pr.DataIdx(RelocString, idx) +} + +// NewDecoder returns a Decoder for the given (section, index) pair, +// and decodes the given SyncMarker from the element bitstream. +func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { + r := pr.NewDecoderRaw(k, idx) + r.Sync(marker) + return r +} + +// NewDecoderRaw returns a Decoder for the given (section, index) pair. +// +// Most callers should use NewDecoder instead. +func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { + r := Decoder{ + common: pr, + k: k, + Idx: idx, + } + + // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. + r.Data = *strings.NewReader(pr.DataIdx(k, idx)) + + r.Sync(SyncRelocs) + r.Relocs = make([]RelocEnt, r.Len()) + for i := range r.Relocs { + r.Sync(SyncReloc) + r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} + } + + return r +} + +// A Decoder provides methods for decoding an individual element's +// bitstream data. +type Decoder struct { + common *PkgDecoder + + Relocs []RelocEnt + Data strings.Reader + + k RelocKind + Idx Index +} + +func (r *Decoder) checkErr(err error) { + if err != nil { + errorf("unexpected decoding error: %w", err) + } +} + +func (r *Decoder) rawUvarint() uint64 { + x, err := binary.ReadUvarint(&r.Data) + r.checkErr(err) + return x +} + +func (r *Decoder) rawVarint() int64 { + ux := r.rawUvarint() + + // Zig-zag decode. + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x +} + +func (r *Decoder) rawReloc(k RelocKind, idx int) Index { + e := r.Relocs[idx] + assert(e.Kind == k) + return e.Idx +} + +// Sync decodes a sync marker from the element bitstream and asserts +// that it matches the expected marker. +// +// If r.common.sync is false, then Sync is a no-op. +func (r *Decoder) Sync(mWant SyncMarker) { + if !r.common.sync { + return + } + + pos, _ := r.Data.Seek(0, os.SEEK_CUR) // TODO(mdempsky): io.SeekCurrent after #44505 is resolved + mHave := SyncMarker(r.rawUvarint()) + writerPCs := make([]int, r.rawUvarint()) + for i := range writerPCs { + writerPCs[i] = int(r.rawUvarint()) + } + + if mHave == mWant { + return + } + + // There's some tension here between printing: + // + // (1) full file paths that tools can recognize (e.g., so emacs + // hyperlinks the "file:line" text for easy navigation), or + // + // (2) short file paths that are easier for humans to read (e.g., by + // omitting redundant or irrelevant details, so it's easier to + // focus on the useful bits that remain). + // + // The current formatting favors the former, as it seems more + // helpful in practice. But perhaps the formatting could be improved + // to better address both concerns. For example, use relative file + // paths if they would be shorter, or rewrite file paths to contain + // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how + // to reliably expand that again. + + fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) + + fmt.Printf("\nfound %v, written at:\n", mHave) + if len(writerPCs) == 0 { + fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) + } + for _, pc := range writerPCs { + fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) + } + + fmt.Printf("\nexpected %v, reading at:\n", mWant) + var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? + n := runtime.Callers(2, readerPCs[:]) + for _, pc := range fmtFrames(readerPCs[:n]...) { + fmt.Printf("\t%s\n", pc) + } + + // We already printed a stack trace for the reader, so now we can + // simply exit. Printing a second one with panic or base.Fatalf + // would just be noise. + os.Exit(1) +} + +// Bool decodes and returns a bool value from the element bitstream. +func (r *Decoder) Bool() bool { + r.Sync(SyncBool) + x, err := r.Data.ReadByte() + r.checkErr(err) + assert(x < 2) + return x != 0 +} + +// Int64 decodes and returns an int64 value from the element bitstream. +func (r *Decoder) Int64() int64 { + r.Sync(SyncInt64) + return r.rawVarint() +} + +// Int64 decodes and returns a uint64 value from the element bitstream. +func (r *Decoder) Uint64() uint64 { + r.Sync(SyncUint64) + return r.rawUvarint() +} + +// Len decodes and returns a non-negative int value from the element bitstream. +func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } + +// Int decodes and returns an int value from the element bitstream. +func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } + +// Uint decodes and returns a uint value from the element bitstream. +func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } + +// Code decodes a Code value from the element bitstream and returns +// its ordinal value. It's the caller's responsibility to convert the +// result to an appropriate Code type. +// +// TODO(mdempsky): Ideally this method would have signature "Code[T +// Code] T" instead, but we don't allow generic methods and the +// compiler can't depend on generics yet anyway. +func (r *Decoder) Code(mark SyncMarker) int { + r.Sync(mark) + return r.Len() +} + +// Reloc decodes a relocation of expected section k from the element +// bitstream and returns an index to the referenced element. +func (r *Decoder) Reloc(k RelocKind) Index { + r.Sync(SyncUseReloc) + return r.rawReloc(k, r.Len()) +} + +// String decodes and returns a string value from the element +// bitstream. +func (r *Decoder) String() string { + r.Sync(SyncString) + return r.common.StringIdx(r.Reloc(RelocString)) +} + +// Strings decodes and returns a variable-length slice of strings from +// the element bitstream. +func (r *Decoder) Strings() []string { + res := make([]string, r.Len()) + for i := range res { + res[i] = r.String() + } + return res +} + +// Value decodes and returns a constant.Value from the element +// bitstream. +func (r *Decoder) Value() constant.Value { + r.Sync(SyncValue) + isComplex := r.Bool() + val := r.scalar() + if isComplex { + val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) + } + return val +} + +func (r *Decoder) scalar() constant.Value { + switch tag := CodeVal(r.Code(SyncVal)); tag { + default: + panic(fmt.Errorf("unexpected scalar tag: %v", tag)) + + case ValBool: + return constant.MakeBool(r.Bool()) + case ValString: + return constant.MakeString(r.String()) + case ValInt64: + return constant.MakeInt64(r.Int64()) + case ValBigInt: + return constant.Make(r.bigInt()) + case ValBigRat: + num := r.bigInt() + denom := r.bigInt() + return constant.Make(new(big.Rat).SetFrac(num, denom)) + case ValBigFloat: + return constant.Make(r.bigFloat()) + } +} + +func (r *Decoder) bigInt() *big.Int { + v := new(big.Int).SetBytes([]byte(r.String())) + if r.Bool() { + v.Neg(v) + } + return v +} + +func (r *Decoder) bigFloat() *big.Float { + v := new(big.Float).SetPrec(512) + assert(v.UnmarshalText([]byte(r.String())) == nil) + return v +} + +// @@@ Helpers + +// TODO(mdempsky): These should probably be removed. I think they're a +// smell that the export data format is not yet quite right. + +// PeekPkgPath returns the package path for the specified package +// index. +func (pr *PkgDecoder) PeekPkgPath(idx Index) string { + r := pr.NewDecoder(RelocPkg, idx, SyncPkgDef) + path := r.String() + if path == "" { + path = pr.pkgPath + } + return path +} + +// PeekObj returns the package path, object name, and CodeObj for the +// specified object index. +func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { + r := pr.NewDecoder(RelocName, idx, SyncObject1) + r.Sync(SyncSym) + r.Sync(SyncPkg) + path := pr.PeekPkgPath(r.Reloc(RelocPkg)) + name := r.String() + assert(name != "") + + tag := CodeObj(r.Code(SyncCodeObj)) + + return path, name, tag +} diff --git a/go/internal/pkgbits/doc.go b/go/internal/pkgbits/doc.go new file mode 100644 index 0000000000..c8a2796b5e --- /dev/null +++ b/go/internal/pkgbits/doc.go @@ -0,0 +1,32 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pkgbits implements low-level coding abstractions for +// Unified IR's export data format. +// +// At a low-level, a package is a collection of bitstream elements. +// Each element has a "kind" and a dense, non-negative index. +// Elements can be randomly accessed given their kind and index. +// +// Individual elements are sequences of variable-length values (e.g., +// integers, booleans, strings, go/constant values, cross-references +// to other elements). Package pkgbits provides APIs for encoding and +// decoding these low-level values, but the details of mapping +// higher-level Go constructs into elements is left to higher-level +// abstractions. +// +// Elements may cross-reference each other with "relocations." For +// example, an element representing a pointer type has a relocation +// referring to the element type. +// +// Go constructs may be composed as a constellation of multiple +// elements. For example, a declared function may have one element to +// describe the object (e.g., its name, type, position), and a +// separate element to describe its function body. This allows readers +// some flexibility in efficiently seeking or re-reading data (e.g., +// inlining requires re-reading the function body for each inlined +// call, without needing to re-read the object-level details). +// +// This is a copy of internal/pkgbits in the Go implementation. +package pkgbits diff --git a/go/internal/pkgbits/encoder.go b/go/internal/pkgbits/encoder.go new file mode 100644 index 0000000000..c50c838caa --- /dev/null +++ b/go/internal/pkgbits/encoder.go @@ -0,0 +1,379 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "bytes" + "crypto/md5" + "encoding/binary" + "go/constant" + "io" + "math/big" + "runtime" +) + +// currentVersion is the current version number. +// +// - v0: initial prototype +// +// - v1: adds the flags uint32 word +const currentVersion uint32 = 1 + +// A PkgEncoder provides methods for encoding a package's Unified IR +// export data. +type PkgEncoder struct { + // elems holds the bitstream for previously encoded elements. + elems [numRelocs][]string + + // stringsIdx maps previously encoded strings to their index within + // the RelocString section, to allow deduplication. That is, + // elems[RelocString][stringsIdx[s]] == s (if present). + stringsIdx map[string]Index + + // syncFrames is the number of frames to write at each sync + // marker. A negative value means sync markers are omitted. + syncFrames int +} + +// SyncMarkers reports whether pw uses sync markers. +func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } + +// NewPkgEncoder returns an initialized PkgEncoder. +// +// syncFrames is the number of caller frames that should be serialized +// at Sync points. Serializing additional frames results in larger +// export data files, but can help diagnosing desync errors in +// higher-level Unified IR reader/writer code. If syncFrames is +// negative, then sync markers are omitted entirely. +func NewPkgEncoder(syncFrames int) PkgEncoder { + return PkgEncoder{ + stringsIdx: make(map[string]Index), + syncFrames: syncFrames, + } +} + +// DumpTo writes the package's encoded data to out0 and returns the +// package fingerprint. +func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { + h := md5.New() + out := io.MultiWriter(out0, h) + + writeUint32 := func(x uint32) { + assert(binary.Write(out, binary.LittleEndian, x) == nil) + } + + writeUint32(currentVersion) + + var flags uint32 + if pw.SyncMarkers() { + flags |= flagSyncMarkers + } + writeUint32(flags) + + // Write elemEndsEnds. + var sum uint32 + for _, elems := range &pw.elems { + sum += uint32(len(elems)) + writeUint32(sum) + } + + // Write elemEnds. + sum = 0 + for _, elems := range &pw.elems { + for _, elem := range elems { + sum += uint32(len(elem)) + writeUint32(sum) + } + } + + // Write elemData. + for _, elems := range &pw.elems { + for _, elem := range elems { + _, err := io.WriteString(out, elem) + assert(err == nil) + } + } + + // Write fingerprint. + copy(fingerprint[:], h.Sum(nil)) + _, err := out0.Write(fingerprint[:]) + assert(err == nil) + + return +} + +// StringIdx adds a string value to the strings section, if not +// already present, and returns its index. +func (pw *PkgEncoder) StringIdx(s string) Index { + if idx, ok := pw.stringsIdx[s]; ok { + assert(pw.elems[RelocString][idx] == s) + return idx + } + + idx := Index(len(pw.elems[RelocString])) + pw.elems[RelocString] = append(pw.elems[RelocString], s) + pw.stringsIdx[s] = idx + return idx +} + +// NewEncoder returns an Encoder for a new element within the given +// section, and encodes the given SyncMarker as the start of the +// element bitstream. +func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { + e := pw.NewEncoderRaw(k) + e.Sync(marker) + return e +} + +// NewEncoderRaw returns an Encoder for a new element within the given +// section. +// +// Most callers should use NewEncoder instead. +func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { + idx := Index(len(pw.elems[k])) + pw.elems[k] = append(pw.elems[k], "") // placeholder + + return Encoder{ + p: pw, + k: k, + Idx: idx, + } +} + +// An Encoder provides methods for encoding an individual element's +// bitstream data. +type Encoder struct { + p *PkgEncoder + + Relocs []RelocEnt + Data bytes.Buffer // accumulated element bitstream data + + encodingRelocHeader bool + + k RelocKind + Idx Index // index within relocation section +} + +// Flush finalizes the element's bitstream and returns its Index. +func (w *Encoder) Flush() Index { + var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved + + // Backup the data so we write the relocations at the front. + var tmp bytes.Buffer + io.Copy(&tmp, &w.Data) + + // TODO(mdempsky): Consider writing these out separately so they're + // easier to strip, along with function bodies, so that we can prune + // down to just the data that's relevant to go/types. + if w.encodingRelocHeader { + panic("encodingRelocHeader already true; recursive flush?") + } + w.encodingRelocHeader = true + w.Sync(SyncRelocs) + w.Len(len(w.Relocs)) + for _, rEnt := range w.Relocs { + w.Sync(SyncReloc) + w.Len(int(rEnt.Kind)) + w.Len(int(rEnt.Idx)) + } + + io.Copy(&sb, &w.Data) + io.Copy(&sb, &tmp) + w.p.elems[w.k][w.Idx] = sb.String() + + return w.Idx +} + +func (w *Encoder) checkErr(err error) { + if err != nil { + errorf("unexpected encoding error: %v", err) + } +} + +func (w *Encoder) rawUvarint(x uint64) { + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], x) + _, err := w.Data.Write(buf[:n]) + w.checkErr(err) +} + +func (w *Encoder) rawVarint(x int64) { + // Zig-zag encode. + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + + w.rawUvarint(ux) +} + +func (w *Encoder) rawReloc(r RelocKind, idx Index) int { + // TODO(mdempsky): Use map for lookup; this takes quadratic time. + for i, rEnt := range w.Relocs { + if rEnt.Kind == r && rEnt.Idx == idx { + return i + } + } + + i := len(w.Relocs) + w.Relocs = append(w.Relocs, RelocEnt{r, idx}) + return i +} + +func (w *Encoder) Sync(m SyncMarker) { + if !w.p.SyncMarkers() { + return + } + + // Writing out stack frame string references requires working + // relocations, but writing out the relocations themselves involves + // sync markers. To prevent infinite recursion, we simply trim the + // stack frame for sync markers within the relocation header. + var frames []string + if !w.encodingRelocHeader && w.p.syncFrames > 0 { + pcs := make([]uintptr, w.p.syncFrames) + n := runtime.Callers(2, pcs) + frames = fmtFrames(pcs[:n]...) + } + + // TODO(mdempsky): Save space by writing out stack frames as a + // linked list so we can share common stack frames. + w.rawUvarint(uint64(m)) + w.rawUvarint(uint64(len(frames))) + for _, frame := range frames { + w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) + } +} + +// Bool encodes and writes a bool value into the element bitstream, +// and then returns the bool value. +// +// For simple, 2-alternative encodings, the idiomatic way to call Bool +// is something like: +// +// if w.Bool(x != 0) { +// // alternative #1 +// } else { +// // alternative #2 +// } +// +// For multi-alternative encodings, use Code instead. +func (w *Encoder) Bool(b bool) bool { + w.Sync(SyncBool) + var x byte + if b { + x = 1 + } + err := w.Data.WriteByte(x) + w.checkErr(err) + return b +} + +// Int64 encodes and writes an int64 value into the element bitstream. +func (w *Encoder) Int64(x int64) { + w.Sync(SyncInt64) + w.rawVarint(x) +} + +// Uint64 encodes and writes a uint64 value into the element bitstream. +func (w *Encoder) Uint64(x uint64) { + w.Sync(SyncUint64) + w.rawUvarint(x) +} + +// Len encodes and writes a non-negative int value into the element bitstream. +func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } + +// Int encodes and writes an int value into the element bitstream. +func (w *Encoder) Int(x int) { w.Int64(int64(x)) } + +// Len encodes and writes a uint value into the element bitstream. +func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } + +// Reloc encodes and writes a relocation for the given (section, +// index) pair into the element bitstream. +// +// Note: Only the index is formally written into the element +// bitstream, so bitstream decoders must know from context which +// section an encoded relocation refers to. +func (w *Encoder) Reloc(r RelocKind, idx Index) { + w.Sync(SyncUseReloc) + w.Len(w.rawReloc(r, idx)) +} + +// Code encodes and writes a Code value into the element bitstream. +func (w *Encoder) Code(c Code) { + w.Sync(c.Marker()) + w.Len(c.Value()) +} + +// String encodes and writes a string value into the element +// bitstream. +// +// Internally, strings are deduplicated by adding them to the strings +// section (if not already present), and then writing a relocation +// into the element bitstream. +func (w *Encoder) String(s string) { + w.Sync(SyncString) + w.Reloc(RelocString, w.p.StringIdx(s)) +} + +// Strings encodes and writes a variable-length slice of strings into +// the element bitstream. +func (w *Encoder) Strings(ss []string) { + w.Len(len(ss)) + for _, s := range ss { + w.String(s) + } +} + +// Value encodes and writes a constant.Value into the element +// bitstream. +func (w *Encoder) Value(val constant.Value) { + w.Sync(SyncValue) + if w.Bool(val.Kind() == constant.Complex) { + w.scalar(constant.Real(val)) + w.scalar(constant.Imag(val)) + } else { + w.scalar(val) + } +} + +func (w *Encoder) scalar(val constant.Value) { + switch v := constant.Val(val).(type) { + default: + errorf("unhandled %v (%v)", val, val.Kind()) + case bool: + w.Code(ValBool) + w.Bool(v) + case string: + w.Code(ValString) + w.String(v) + case int64: + w.Code(ValInt64) + w.Int64(v) + case *big.Int: + w.Code(ValBigInt) + w.bigInt(v) + case *big.Rat: + w.Code(ValBigRat) + w.bigInt(v.Num()) + w.bigInt(v.Denom()) + case *big.Float: + w.Code(ValBigFloat) + w.bigFloat(v) + } +} + +func (w *Encoder) bigInt(v *big.Int) { + b := v.Bytes() + w.String(string(b)) // TODO: More efficient encoding. + w.Bool(v.Sign() < 0) +} + +func (w *Encoder) bigFloat(v *big.Float) { + b := v.Append(nil, 'p', -1) + w.String(string(b)) // TODO: More efficient encoding. +} diff --git a/go/internal/pkgbits/flags.go b/go/internal/pkgbits/flags.go new file mode 100644 index 0000000000..654222745f --- /dev/null +++ b/go/internal/pkgbits/flags.go @@ -0,0 +1,9 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +const ( + flagSyncMarkers = 1 << iota // file format contains sync markers +) diff --git a/go/internal/pkgbits/frames_go1.go b/go/internal/pkgbits/frames_go1.go new file mode 100644 index 0000000000..5294f6a63e --- /dev/null +++ b/go/internal/pkgbits/frames_go1.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.7 +// +build !go1.7 + +// TODO(mdempsky): Remove after #44505 is resolved + +package pkgbits + +import "runtime" + +func walkFrames(pcs []uintptr, visit frameVisitor) { + for _, pc := range pcs { + fn := runtime.FuncForPC(pc) + file, line := fn.FileLine(pc) + + visit(file, line, fn.Name(), pc-fn.Entry()) + } +} diff --git a/go/internal/pkgbits/frames_go17.go b/go/internal/pkgbits/frames_go17.go new file mode 100644 index 0000000000..2324ae7adf --- /dev/null +++ b/go/internal/pkgbits/frames_go17.go @@ -0,0 +1,28 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.7 +// +build go1.7 + +package pkgbits + +import "runtime" + +// walkFrames calls visit for each call frame represented by pcs. +// +// pcs should be a slice of PCs, as returned by runtime.Callers. +func walkFrames(pcs []uintptr, visit frameVisitor) { + if len(pcs) == 0 { + return + } + + frames := runtime.CallersFrames(pcs) + for { + frame, more := frames.Next() + visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) + if !more { + return + } + } +} diff --git a/go/internal/pkgbits/reloc.go b/go/internal/pkgbits/reloc.go new file mode 100644 index 0000000000..7a8f04ab3f --- /dev/null +++ b/go/internal/pkgbits/reloc.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A RelocKind indicates a particular section within a unified IR export. +type RelocKind int + +// An Index represents a bitstream element index within a particular +// section. +type Index int + +// A relocEnt (relocation entry) is an entry in an element's local +// reference table. +// +// TODO(mdempsky): Rename this too. +type RelocEnt struct { + Kind RelocKind + Idx Index +} + +// Reserved indices within the meta relocation section. +const ( + PublicRootIdx Index = 0 + PrivateRootIdx Index = 1 +) + +const ( + RelocString RelocKind = iota + RelocMeta + RelocPosBase + RelocPkg + RelocName + RelocType + RelocObj + RelocObjExt + RelocObjDict + RelocBody + + numRelocs = iota +) diff --git a/go/internal/pkgbits/support.go b/go/internal/pkgbits/support.go new file mode 100644 index 0000000000..ad26d3b28c --- /dev/null +++ b/go/internal/pkgbits/support.go @@ -0,0 +1,17 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import "fmt" + +func assert(b bool) { + if !b { + panic("assertion failed") + } +} + +func errorf(format string, args ...interface{}) { + panic(fmt.Errorf(format, args...)) +} diff --git a/go/internal/pkgbits/sync.go b/go/internal/pkgbits/sync.go new file mode 100644 index 0000000000..5bd51ef717 --- /dev/null +++ b/go/internal/pkgbits/sync.go @@ -0,0 +1,113 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "fmt" + "strings" +) + +// fmtFrames formats a backtrace for reporting reader/writer desyncs. +func fmtFrames(pcs ...uintptr) []string { + res := make([]string, 0, len(pcs)) + walkFrames(pcs, func(file string, line int, name string, offset uintptr) { + // Trim package from function name. It's just redundant noise. + name = strings.TrimPrefix(name, "cmd/compile/internal/noder.") + + res = append(res, fmt.Sprintf("%s:%v: %s +0x%v", file, line, name, offset)) + }) + return res +} + +type frameVisitor func(file string, line int, name string, offset uintptr) + +// SyncMarker is an enum type that represents markers that may be +// written to export data to ensure the reader and writer stay +// synchronized. +type SyncMarker int + +//go:generate stringer -type=SyncMarker -trimprefix=Sync + +const ( + _ SyncMarker = iota + + // Public markers (known to go/types importers). + + // Low-level coding markers. + SyncEOF + SyncBool + SyncInt64 + SyncUint64 + SyncString + SyncValue + SyncVal + SyncRelocs + SyncReloc + SyncUseReloc + + // Higher-level object and type markers. + SyncPublic + SyncPos + SyncPosBase + SyncObject + SyncObject1 + SyncPkg + SyncPkgDef + SyncMethod + SyncType + SyncTypeIdx + SyncTypeParamNames + SyncSignature + SyncParams + SyncParam + SyncCodeObj + SyncSym + SyncLocalIdent + SyncSelector + + // Private markers (only known to cmd/compile). + SyncPrivate + + SyncFuncExt + SyncVarExt + SyncTypeExt + SyncPragma + + SyncExprList + SyncExprs + SyncExpr + SyncExprType + SyncAssign + SyncOp + SyncFuncLit + SyncCompLit + + SyncDecl + SyncFuncBody + SyncOpenScope + SyncCloseScope + SyncCloseAnotherScope + SyncDeclNames + SyncDeclName + + SyncStmts + SyncBlockStmt + SyncIfStmt + SyncForStmt + SyncSwitchStmt + SyncRangeStmt + SyncCaseClause + SyncCommClause + SyncSelectStmt + SyncDecls + SyncLabeledStmt + SyncUseObjLocal + SyncAddLocal + SyncLinkname + SyncStmt1 + SyncStmtsEnd + SyncLabel + SyncOptLabel +) diff --git a/go/internal/pkgbits/syncmarker_string.go b/go/internal/pkgbits/syncmarker_string.go new file mode 100644 index 0000000000..4a5b0ca5f2 --- /dev/null +++ b/go/internal/pkgbits/syncmarker_string.go @@ -0,0 +1,89 @@ +// Code generated by "stringer -type=SyncMarker -trimprefix=Sync"; DO NOT EDIT. + +package pkgbits + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[SyncEOF-1] + _ = x[SyncBool-2] + _ = x[SyncInt64-3] + _ = x[SyncUint64-4] + _ = x[SyncString-5] + _ = x[SyncValue-6] + _ = x[SyncVal-7] + _ = x[SyncRelocs-8] + _ = x[SyncReloc-9] + _ = x[SyncUseReloc-10] + _ = x[SyncPublic-11] + _ = x[SyncPos-12] + _ = x[SyncPosBase-13] + _ = x[SyncObject-14] + _ = x[SyncObject1-15] + _ = x[SyncPkg-16] + _ = x[SyncPkgDef-17] + _ = x[SyncMethod-18] + _ = x[SyncType-19] + _ = x[SyncTypeIdx-20] + _ = x[SyncTypeParamNames-21] + _ = x[SyncSignature-22] + _ = x[SyncParams-23] + _ = x[SyncParam-24] + _ = x[SyncCodeObj-25] + _ = x[SyncSym-26] + _ = x[SyncLocalIdent-27] + _ = x[SyncSelector-28] + _ = x[SyncPrivate-29] + _ = x[SyncFuncExt-30] + _ = x[SyncVarExt-31] + _ = x[SyncTypeExt-32] + _ = x[SyncPragma-33] + _ = x[SyncExprList-34] + _ = x[SyncExprs-35] + _ = x[SyncExpr-36] + _ = x[SyncExprType-37] + _ = x[SyncAssign-38] + _ = x[SyncOp-39] + _ = x[SyncFuncLit-40] + _ = x[SyncCompLit-41] + _ = x[SyncDecl-42] + _ = x[SyncFuncBody-43] + _ = x[SyncOpenScope-44] + _ = x[SyncCloseScope-45] + _ = x[SyncCloseAnotherScope-46] + _ = x[SyncDeclNames-47] + _ = x[SyncDeclName-48] + _ = x[SyncStmts-49] + _ = x[SyncBlockStmt-50] + _ = x[SyncIfStmt-51] + _ = x[SyncForStmt-52] + _ = x[SyncSwitchStmt-53] + _ = x[SyncRangeStmt-54] + _ = x[SyncCaseClause-55] + _ = x[SyncCommClause-56] + _ = x[SyncSelectStmt-57] + _ = x[SyncDecls-58] + _ = x[SyncLabeledStmt-59] + _ = x[SyncUseObjLocal-60] + _ = x[SyncAddLocal-61] + _ = x[SyncLinkname-62] + _ = x[SyncStmt1-63] + _ = x[SyncStmtsEnd-64] + _ = x[SyncLabel-65] + _ = x[SyncOptLabel-66] +} + +const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabel" + +var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458} + +func (i SyncMarker) String() string { + i -= 1 + if i < 0 || i >= SyncMarker(len(_SyncMarker_index)-1) { + return "SyncMarker(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _SyncMarker_name[_SyncMarker_index[i]:_SyncMarker_index[i+1]] +}