From 9c2a5567e347d6d57de667256c748abf757d1ee4 Mon Sep 17 00:00:00 2001 From: Robert Findley Date: Thu, 7 Jul 2022 18:41:33 -0400 Subject: [PATCH] internal/lsp/cache: fail addPackageHandle if metadata is stale If metadata is refreshed during the execution of buildPackageHandle, we should not store the resulting package handle in the snapshot, as it breaks the invariant that computed packages match the currently loaded metadata. This strictness revealed another bug: because of our fine-grained locking in snapshot.load, it is possible that we set valid metadata multiple times, leading to unnecessary invalidation and potential further races to package handles. Fix this by building all metadata when processing the go/packages result, and only filtering updates after grabbing the lock. For golang/go#53733 Change-Id: Ib63ae4fbd97d0d25d45fe04f9bcd835996db41da Reviewed-on: https://go-review.googlesource.com/c/tools/+/416224 Run-TryBot: Robert Findley TryBot-Result: Gopher Robot Reviewed-by: Alan Donovan gopls-CI: kokoro --- internal/lsp/cache/check.go | 4 +-- internal/lsp/cache/load.go | 60 ++++++++++++++++++++-------------- internal/lsp/cache/snapshot.go | 22 +++++++++++-- 3 files changed, 55 insertions(+), 31 deletions(-) diff --git a/internal/lsp/cache/check.go b/internal/lsp/cache/check.go index aae6de0eea..c8b314a072 100644 --- a/internal/lsp/cache/check.go +++ b/internal/lsp/cache/check.go @@ -209,9 +209,7 @@ func (s *snapshot) buildPackageHandle(ctx context.Context, id PackageID, mode so // been cached, addPackage will return the cached value. This is fine, // since the original package handle above will have no references and be // garbage collected. - ph = s.addPackageHandle(ph, release) - - return ph, nil + return s.addPackageHandle(ph, release) } func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode { diff --git a/internal/lsp/cache/load.go b/internal/lsp/cache/load.go index 08c88ab8e7..8937f93403 100644 --- a/internal/lsp/cache/load.go +++ b/internal/lsp/cache/load.go @@ -47,6 +47,8 @@ func (s *snapshot) load(ctx context.Context, allowNetwork bool, scopes ...interf if errors.Is(err, context.Canceled) { return } + // TODO(rfindley): merge these metadata updates with the updates below, to + // avoid updating the graph twice. s.clearShouldLoad(scopes...) }() @@ -154,7 +156,7 @@ func (s *snapshot) load(ctx context.Context, allowNetwork bool, scopes ...interf } moduleErrs := make(map[string][]packages.Error) // module path -> errors - updates := make(map[PackageID]*KnownMetadata) + newMetadata := make(map[PackageID]*KnownMetadata) for _, pkg := range pkgs { // The Go command returns synthetic list results for module queries that // encountered module errors. @@ -196,31 +198,48 @@ func (s *snapshot) load(ctx context.Context, allowNetwork bool, scopes ...interf } // Skip filtered packages. They may be added anyway if they're // dependencies of non-filtered packages. + // + // TODO(rfindley): why exclude metadata arbitrarily here? It should be safe + // to capture all metadata. if s.view.allFilesExcluded(pkg) { continue } - // TODO: once metadata is immutable, we shouldn't have to lock here. - s.mu.Lock() - err := computeMetadataUpdates(ctx, s.meta, PackagePath(pkg.PkgPath), pkg, cfg, query, updates, nil) - s.mu.Unlock() - if err != nil { + if err := buildMetadata(ctx, PackagePath(pkg.PkgPath), pkg, cfg, query, newMetadata, nil); err != nil { return err } } - var loadedIDs []PackageID - for id := range updates { - loadedIDs = append(loadedIDs, id) + s.mu.Lock() + + // Only update metadata where we don't already have valid metadata. + // + // We want to preserve an invariant that s.packages.Get(id).m.Metadata + // matches s.meta.metadata[id].Metadata. By avoiding overwriting valid + // metadata, we minimize the amount of invalidation required to preserve this + // invariant. + // + // TODO(rfindley): perform a sanity check that metadata matches here. If not, + // we have an invalidation bug elsewhere. + updates := make(map[PackageID]*KnownMetadata) + var updatedIDs []PackageID + for _, m := range newMetadata { + if existing := s.meta.metadata[m.ID]; existing == nil || !existing.Valid { + updates[m.ID] = m + updatedIDs = append(updatedIDs, m.ID) + } } event.Log(ctx, fmt.Sprintf("%s: updating metadata for %d packages", eventName, len(updates))) - s.mu.Lock() + // Invalidate the reverse transitive closure of packages that have changed. + // + // Note that the original metadata is being invalidated here, so we use the + // original metadata graph to compute the reverse closure. + invalidatedPackages := s.meta.reverseTransitiveClosure(true, updatedIDs...) - // invalidate the reverse transitive closure of packages that have changed. - invalidatedPackages := s.meta.reverseTransitiveClosure(true, loadedIDs...) s.meta = s.meta.Clone(updates) s.resetIsActivePackageLocked() + // Invalidate any packages we may have associated with this metadata. // // TODO(rfindley): this should not be necessary, as we should have already @@ -431,10 +450,10 @@ func makeWorkspaceDir(ctx context.Context, workspace *workspace, fs source.FileS return tmpdir, nil } -// computeMetadataUpdates populates the updates map with metadata updates to +// buildMetadata populates the updates map with metadata updates to // apply, based on the given pkg. It recurs through pkg.Imports to ensure that // metadata exists for all dependencies. -func computeMetadataUpdates(ctx context.Context, g *metadataGraph, pkgPath PackagePath, pkg *packages.Package, cfg *packages.Config, query []string, updates map[PackageID]*KnownMetadata, path []PackageID) error { +func buildMetadata(ctx context.Context, pkgPath PackagePath, pkg *packages.Package, cfg *packages.Config, query []string, updates map[PackageID]*KnownMetadata, path []PackageID) error { id := PackageID(pkg.ID) if source.IsCommandLineArguments(pkg.ID) { suffix := ":" + strings.Join(query, ",") @@ -442,21 +461,12 @@ func computeMetadataUpdates(ctx context.Context, g *metadataGraph, pkgPath Packa pkgPath = PackagePath(string(pkgPath) + suffix) } - // If we have valid metadata for this package, don't update. This minimizes - // the amount of subsequent invalidation. - // - // TODO(rfindley): perform a sanity check that metadata matches here. If not, - // we have an invalidation bug elsewhere. - if existing := g.metadata[id]; existing != nil && existing.Valid { - return nil - } - if _, ok := updates[id]; ok { // If we've already seen this dependency, there may be an import cycle, or // we may have reached the same package transitively via distinct paths. // Check the path to confirm. - // TODO(rfindley): this doesn't look right. Any single piece of new + // TODO(rfindley): this doesn't look sufficient. Any single piece of new // metadata could theoretically introduce import cycles in the metadata // graph. What's the point of this limited check here (and is it even // possible to get an import cycle in data from go/packages)? Consider @@ -535,7 +545,7 @@ func computeMetadataUpdates(ctx context.Context, g *metadataGraph, pkgPath Packa m.MissingDeps[importPkgPath] = struct{}{} continue } - if err := computeMetadataUpdates(ctx, g, importPkgPath, importPkg, cfg, query, updates, append(path, id)); err != nil { + if err := buildMetadata(ctx, importPkgPath, importPkg, cfg, query, updates, append(path, id)); err != nil { event.Error(ctx, "error in dependency", err) } } diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go index b962435b62..5623f24c55 100644 --- a/internal/lsp/cache/snapshot.go +++ b/internal/lsp/cache/snapshot.go @@ -91,6 +91,11 @@ type snapshot struct { // packages maps a packageKey to a *packageHandle. // It may be invalidated when a file's content changes. + // + // Invariants to preserve: + // - packages.Get(id).m.Metadata == meta.metadata[id].Metadata for all ids + // - if a package is in packages, then all of its dependencies should also + // be in packages, unless there is a missing import packages packagesMap // isActivePackageCache maps package ID to the cached value if it is active or not. @@ -712,18 +717,29 @@ func (s *snapshot) getImportedBy(id PackageID) []PackageID { return s.meta.importedBy[id] } -func (s *snapshot) addPackageHandle(ph *packageHandle, release func()) *packageHandle { +// addPackageHandle stores ph in the snapshot, or returns a pre-existing handle +// for the given package key, if it exists. +// +// An error is returned if the metadata used to build ph is no longer relevant. +func (s *snapshot) addPackageHandle(ph *packageHandle, release func()) (*packageHandle, error) { s.mu.Lock() defer s.mu.Unlock() + if s.meta.metadata[ph.m.ID].Metadata != ph.m.Metadata { + return nil, fmt.Errorf("stale metadata for %s", ph.m.ID) + } + // If the package handle has already been cached, // return the cached handle instead of overriding it. if result, ok := s.packages.Get(ph.packageKey()); ok { release() - return result + if result.m.Metadata != ph.m.Metadata { + return nil, bug.Errorf("existing package handle does not match for %s", ph.m.ID) + } + return result, nil } s.packages.Set(ph.packageKey(), ph, release) - return ph + return ph, nil } func (s *snapshot) workspacePackageIDs() (ids []PackageID) {