From 324b35332a02709111b444669e3fa0309b327559 Mon Sep 17 00:00:00 2001 From: Suzy Mueller Date: Fri, 2 Aug 2019 14:05:22 -0400 Subject: [PATCH] internal/imports: save information about the module cache The module cache can only be added to, so any information discovered about directories that are within a module in the module cache will not change. Store the information we have discovered about the module cache. Updates #32750 Change-Id: I56c88f03f6a364221198fb032b139208497cd0e5 Reviewed-on: https://go-review.googlesource.com/c/tools/+/188762 Reviewed-by: Heschi Kreinick --- internal/imports/mod.go | 145 +++++++++++++++++++++++----------- internal/imports/mod_cache.go | 99 +++++++++++++++++++++++ internal/imports/mod_test.go | 44 +++++++++++ 3 files changed, 241 insertions(+), 47 deletions(-) create mode 100644 internal/imports/mod_cache.go diff --git a/internal/imports/mod.go b/internal/imports/mod.go index 4f088b746c..bddfba4130 100644 --- a/internal/imports/mod.go +++ b/internal/imports/mod.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io/ioutil" "os" "path" @@ -29,7 +30,12 @@ type ModuleResolver struct { ModsByModPath []*ModuleJSON // All modules, ordered by # of path components in module Path... ModsByDir []*ModuleJSON // ...or Dir. - ModCachePkgs map[string]*pkg // Packages in the mod cache, keyed by absolute directory. + // ModCachePkgs contains the canonicalized importPath and directory of packages + // in the module cache. Keyed by absolute directory. + ModCachePkgs map[string]*pkg + + // moduleCacheInfo stores information about the module cache. + moduleCacheInfo *moduleCacheInfo } type ModuleJSON struct { @@ -91,6 +97,11 @@ func (r *ModuleResolver) init() error { }) r.ModCachePkgs = make(map[string]*pkg) + if r.moduleCacheInfo == nil { + r.moduleCacheInfo = &moduleCacheInfo{ + modCacheDirInfo: make(map[string]*directoryPackageInfo), + } + } r.Initialized = true return nil @@ -246,36 +257,27 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) { return } - subdir := "" - if dir != root.Path { - subdir = dir[len(root.Path)+len("/"):] - } - importPath := filepath.ToSlash(subdir) - if strings.HasPrefix(importPath, "vendor/") { - // Ignore vendor dirs. If -mod=vendor is on, then things - // should mostly just work, but when it's not vendor/ - // is a mess. There's no easy way to tell if it's on. - // We can still find things in the mod cache and - // map them into /vendor when -mod=vendor is on. - return - } - switch root.Type { - case gopathwalk.RootCurrentModule: - importPath = path.Join(r.Main.Path, filepath.ToSlash(subdir)) - case gopathwalk.RootModuleCache: - matches := modCacheRegexp.FindStringSubmatch(subdir) - modPath, err := module.DecodePath(filepath.ToSlash(matches[1])) + info, ok := r.moduleCacheInfo.Load(dir) + if !ok { + var err error + info, err = r.scanDirForPackage(root, dir) if err != nil { - if r.env.Debug { - r.env.Logf("decoding module cache path %q: %v", subdir, err) - } return } - importPath = path.Join(modPath, filepath.ToSlash(matches[3])) - case gopathwalk.RootGOROOT: - importPath = subdir + if root.Type == gopathwalk.RootModuleCache { + r.moduleCacheInfo.Store(dir, info) + } } + if info.status < directoryScanned || + (info.status == directoryScanned && info.err != nil) { + return + } + + // The rest of this function canonicalizes the packages using the results + // of initializing the resolver from 'go list -m'. + importPath := info.nonCanonicalImportPath + // Check if the directory is underneath a module that's in scope. if mod := r.findModuleByDir(dir); mod != nil { // It is. If dir is the target of a replace directive, @@ -286,26 +288,11 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) { dirInMod := dir[len(mod.Dir)+len("/"):] importPath = path.Join(mod.Path, filepath.ToSlash(dirInMod)) } - } else { - // The package is in an unknown module. Check that it's - // not obviously impossible to import. - var modFile string - switch root.Type { - case gopathwalk.RootModuleCache: - matches := modCacheRegexp.FindStringSubmatch(subdir) - modFile = filepath.Join(matches[1], "@", matches[2], "go.mod") - default: - modFile = findModFile(dir) - } - - modBytes, err := ioutil.ReadFile(modFile) - if err == nil && !strings.HasPrefix(importPath, modulePath(modBytes)) { - // The module's declared path does not match - // its expected path. It probably needs a - // replace directive we don't have. - return - } + } else if info.needsReplace { + // This package needed a replace target we don't have. + return } + // We may have discovered a package that has a different version // in scope already. Canonicalize to that one if possible. if _, canonicalDir := r.findPackage(importPath); canonicalDir != "" { @@ -317,9 +304,11 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) { dir: dir, } - switch root.Type { - case gopathwalk.RootModuleCache: + if root.Type == gopathwalk.RootModuleCache { // Save the results of processing this directory. + // This needs to be invalidated when the results of + // 'go list -m' would change, as the directory and + // importPath in this map depend on those results. r.ModCachePkgs[absDir] = res } @@ -335,6 +324,68 @@ func (r *ModuleResolver) loadExports(ctx context.Context, expectPackage string, return loadExportsFromFiles(ctx, r.env, expectPackage, pkg.dir) } +func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (directoryPackageInfo, error) { + subdir := "" + if dir != root.Path { + subdir = dir[len(root.Path)+len("/"):] + } + importPath := filepath.ToSlash(subdir) + if strings.HasPrefix(importPath, "vendor/") { + // Ignore vendor dirs. If -mod=vendor is on, then things + // should mostly just work, but when it's not vendor/ + // is a mess. There's no easy way to tell if it's on. + // We can still find things in the mod cache and + // map them into /vendor when -mod=vendor is on. + return directoryPackageInfo{}, fmt.Errorf("vendor directory") + } + switch root.Type { + case gopathwalk.RootCurrentModule: + importPath = path.Join(r.Main.Path, filepath.ToSlash(subdir)) + case gopathwalk.RootModuleCache: + matches := modCacheRegexp.FindStringSubmatch(subdir) + modPath, err := module.DecodePath(filepath.ToSlash(matches[1])) + if err != nil { + if r.env.Debug { + r.env.Logf("decoding module cache path %q: %v", subdir, err) + } + return directoryPackageInfo{ + status: directoryScanned, + err: fmt.Errorf("decoding module cache path %q: %v", subdir, err), + }, nil + } + importPath = path.Join(modPath, filepath.ToSlash(matches[3])) + case gopathwalk.RootGOROOT: + importPath = subdir + } + + // Check that this package is not obviously impossible to import. + var modFile string + switch root.Type { + case gopathwalk.RootModuleCache: + matches := modCacheRegexp.FindStringSubmatch(subdir) + index := strings.Index(dir, matches[1]+"@"+matches[2]) + modFile = filepath.Join(dir[:index], matches[1]+"@"+matches[2], "go.mod") + default: + modFile = findModFile(dir) + } + + var needsReplace bool + modBytes, err := ioutil.ReadFile(modFile) + if err == nil && !strings.HasPrefix(importPath, modulePath(modBytes)) { + // The module's declared path does not match + // its expected path. It probably needs a + // replace directive we don't have. + needsReplace = true + } + + return directoryPackageInfo{ + status: directoryScanned, + dir: dir, + nonCanonicalImportPath: importPath, + needsReplace: needsReplace, + }, nil +} + // modCacheRegexp splits a path in a module cache into module, module version, and package. var modCacheRegexp = regexp.MustCompile(`(.*)@([^/\\]*)(.*)`) diff --git a/internal/imports/mod_cache.go b/internal/imports/mod_cache.go new file mode 100644 index 0000000000..884706a25f --- /dev/null +++ b/internal/imports/mod_cache.go @@ -0,0 +1,99 @@ +package imports + +import ( + "sync" +) + +// ModuleResolver implements Resolver for modules using the go command as little +// as feasible. +// +// To find packages to import, the resolver needs to know about all of the +// the packages that could be imported. This includes packages that are +// already in modules that are in (1) the current module, (2) replace targets, +// and (3) packages in the module cache. Packages in (1) and (2) may change over +// time, as the client may edit the current module and locally replaced modules. +// The module cache (which includes all of the packages in (3)) can only +// ever be added to. +// +// The resolver can thus save state about packages in the module cache +// and guarantee that this will not change over time. To obtain information +// about new modules added to the module cache, the module cache should be +// rescanned. +// +// It is OK to serve information about modules that have been deleted, +// as they do still exist. +// TODO(suzmue): can we share information with the caller about +// what module needs to be downloaded to import this package? + +type directoryPackageStatus int + +const ( + _ directoryPackageStatus = iota + directoryScanned +) + +type directoryPackageInfo struct { + // status indicates the extent to which this struct has been filled in. + status directoryPackageStatus + // err is non-nil when there was an error trying to reach status. + err error + + // Set when status > directoryScanned. + + // dir is the absolute directory of this package. + dir string + // nonCanonicalImportPath is the expected import path for this package. + // This may not be an import path that can be used to import this package. + nonCanonicalImportPath string + // needsReplace is true if the nonCanonicalImportPath does not match the + // the modules declared path, making it impossible to import without a + // replace directive. + needsReplace bool +} + +// moduleCacheInfo is a concurrency safe map for storing information about +// the directories in the module cache. +// +// The information in this cache is built incrementally. Entries are initialized in scan. +// No new keys should be added in any other functions, as all directories containing +// packages are identified in scan. +// +// Other functions, including loadExports and findPackage, may update entries in this cache +// as they discover new things about the directory. +// +// We do not need to protect the data in the cache for multiple writes, because it only stores +// module cache directories, which do not change. If two competing stores take place, there will be +// one store that wins. Although this could result in a loss of information it will not be incorrect +// and may just result in recomputing the same result later. +// +// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc) +type moduleCacheInfo struct { + mu sync.Mutex + // modCacheDirInfo stores information about packages in + // module cache directories. Keyed by absolute directory. + modCacheDirInfo map[string]*directoryPackageInfo +} + +// Store stores the package info for dir. +func (d *moduleCacheInfo) Store(dir string, info directoryPackageInfo) { + d.mu.Lock() + defer d.mu.Unlock() + d.modCacheDirInfo[dir] = &directoryPackageInfo{ + status: info.status, + err: info.err, + dir: info.dir, + nonCanonicalImportPath: info.nonCanonicalImportPath, + needsReplace: info.needsReplace, + } +} + +// Load returns a copy of the directoryPackageInfo for absolute directory dir. +func (d *moduleCacheInfo) Load(dir string) (directoryPackageInfo, bool) { + d.mu.Lock() + defer d.mu.Unlock() + info, ok := d.modCacheDirInfo[dir] + if !ok { + return directoryPackageInfo{}, false + } + return *info, true +} diff --git a/internal/imports/mod_test.go b/internal/imports/mod_test.go index a836307680..07c030cd47 100644 --- a/internal/imports/mod_test.go +++ b/internal/imports/mod_test.go @@ -137,6 +137,50 @@ import _ "example.com" } +// Tests that scanning the module cache > 1 after changing a package in module cache to make it unimportable +// is able to find the same module. +func TestModCacheEditModFile(t *testing.T) { + mt := setup(t, ` +-- go.mod -- +module x + +require rsc.io/quote v1.5.2 +-- x.go -- +package x +import _ "rsc.io/quote" +`, "") + defer mt.cleanup() + found := mt.assertScanFinds("rsc.io/quote", "quote") + + // Update the go.mod file of example.com so that it changes its module path (not allowed). + if err := os.Chmod(filepath.Join(found.dir, "go.mod"), 0644); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(filepath.Join(found.dir, "go.mod"), []byte("module bad.com\n"), 0644); err != nil { + t.Fatal(err) + } + + // Test that with its cache of module packages it still finds the package. + mt.assertScanFinds("rsc.io/quote", "quote") + + // Rewrite the main package so that rsc.io/quote is not in scope. + if err := ioutil.WriteFile(filepath.Join(mt.env.WorkingDir, "go.mod"), []byte("module x\n"), 0644); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(filepath.Join(mt.env.WorkingDir, "x.go"), []byte("package x\n"), 0644); err != nil { + t.Fatal(err) + } + + // Uninitialize the go.mod dependent cached information and make sure it still finds the package. + mt.resolver.Initialized = false + mt.resolver.Main = nil + mt.resolver.ModsByModPath = nil + mt.resolver.ModsByDir = nil + mt.resolver.ModCachePkgs = nil + mt.assertScanFinds("rsc.io/quote", "quote") + +} + // Tests that -mod=vendor sort of works. Adapted from mod_getmode_vendor.txt. func TestModeGetmodeVendor(t *testing.T) { mt := setup(t, `