internal/imports: save information about the module cache

The module cache can only be added to, so any information discovered
about directories that are within a module in the module cache will
not change. Store the information we have discovered about the module
cache.

Updates #32750

Change-Id: I56c88f03f6a364221198fb032b139208497cd0e5
Reviewed-on: https://go-review.googlesource.com/c/tools/+/188762
Reviewed-by: Heschi Kreinick <heschi@google.com>
This commit is contained in:
Suzy Mueller 2019-08-02 14:05:22 -04:00
parent f0be937dca
commit 324b35332a
3 changed files with 241 additions and 47 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path"
@ -29,7 +30,12 @@ type ModuleResolver struct {
ModsByModPath []*ModuleJSON // All modules, ordered by # of path components in module Path...
ModsByDir []*ModuleJSON // ...or Dir.
ModCachePkgs map[string]*pkg // Packages in the mod cache, keyed by absolute directory.
// ModCachePkgs contains the canonicalized importPath and directory of packages
// in the module cache. Keyed by absolute directory.
ModCachePkgs map[string]*pkg
// moduleCacheInfo stores information about the module cache.
moduleCacheInfo *moduleCacheInfo
}
type ModuleJSON struct {
@ -91,6 +97,11 @@ func (r *ModuleResolver) init() error {
})
r.ModCachePkgs = make(map[string]*pkg)
if r.moduleCacheInfo == nil {
r.moduleCacheInfo = &moduleCacheInfo{
modCacheDirInfo: make(map[string]*directoryPackageInfo),
}
}
r.Initialized = true
return nil
@ -246,36 +257,27 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) {
return
}
subdir := ""
if dir != root.Path {
subdir = dir[len(root.Path)+len("/"):]
}
importPath := filepath.ToSlash(subdir)
if strings.HasPrefix(importPath, "vendor/") {
// Ignore vendor dirs. If -mod=vendor is on, then things
// should mostly just work, but when it's not vendor/
// is a mess. There's no easy way to tell if it's on.
// We can still find things in the mod cache and
// map them into /vendor when -mod=vendor is on.
return
}
switch root.Type {
case gopathwalk.RootCurrentModule:
importPath = path.Join(r.Main.Path, filepath.ToSlash(subdir))
case gopathwalk.RootModuleCache:
matches := modCacheRegexp.FindStringSubmatch(subdir)
modPath, err := module.DecodePath(filepath.ToSlash(matches[1]))
info, ok := r.moduleCacheInfo.Load(dir)
if !ok {
var err error
info, err = r.scanDirForPackage(root, dir)
if err != nil {
if r.env.Debug {
r.env.Logf("decoding module cache path %q: %v", subdir, err)
}
return
}
importPath = path.Join(modPath, filepath.ToSlash(matches[3]))
case gopathwalk.RootGOROOT:
importPath = subdir
if root.Type == gopathwalk.RootModuleCache {
r.moduleCacheInfo.Store(dir, info)
}
}
if info.status < directoryScanned ||
(info.status == directoryScanned && info.err != nil) {
return
}
// The rest of this function canonicalizes the packages using the results
// of initializing the resolver from 'go list -m'.
importPath := info.nonCanonicalImportPath
// Check if the directory is underneath a module that's in scope.
if mod := r.findModuleByDir(dir); mod != nil {
// It is. If dir is the target of a replace directive,
@ -286,26 +288,11 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) {
dirInMod := dir[len(mod.Dir)+len("/"):]
importPath = path.Join(mod.Path, filepath.ToSlash(dirInMod))
}
} else {
// The package is in an unknown module. Check that it's
// not obviously impossible to import.
var modFile string
switch root.Type {
case gopathwalk.RootModuleCache:
matches := modCacheRegexp.FindStringSubmatch(subdir)
modFile = filepath.Join(matches[1], "@", matches[2], "go.mod")
default:
modFile = findModFile(dir)
}
modBytes, err := ioutil.ReadFile(modFile)
if err == nil && !strings.HasPrefix(importPath, modulePath(modBytes)) {
// The module's declared path does not match
// its expected path. It probably needs a
// replace directive we don't have.
return
}
} else if info.needsReplace {
// This package needed a replace target we don't have.
return
}
// We may have discovered a package that has a different version
// in scope already. Canonicalize to that one if possible.
if _, canonicalDir := r.findPackage(importPath); canonicalDir != "" {
@ -317,9 +304,11 @@ func (r *ModuleResolver) scan(_ references) ([]*pkg, error) {
dir: dir,
}
switch root.Type {
case gopathwalk.RootModuleCache:
if root.Type == gopathwalk.RootModuleCache {
// Save the results of processing this directory.
// This needs to be invalidated when the results of
// 'go list -m' would change, as the directory and
// importPath in this map depend on those results.
r.ModCachePkgs[absDir] = res
}
@ -335,6 +324,68 @@ func (r *ModuleResolver) loadExports(ctx context.Context, expectPackage string,
return loadExportsFromFiles(ctx, r.env, expectPackage, pkg.dir)
}
func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (directoryPackageInfo, error) {
subdir := ""
if dir != root.Path {
subdir = dir[len(root.Path)+len("/"):]
}
importPath := filepath.ToSlash(subdir)
if strings.HasPrefix(importPath, "vendor/") {
// Ignore vendor dirs. If -mod=vendor is on, then things
// should mostly just work, but when it's not vendor/
// is a mess. There's no easy way to tell if it's on.
// We can still find things in the mod cache and
// map them into /vendor when -mod=vendor is on.
return directoryPackageInfo{}, fmt.Errorf("vendor directory")
}
switch root.Type {
case gopathwalk.RootCurrentModule:
importPath = path.Join(r.Main.Path, filepath.ToSlash(subdir))
case gopathwalk.RootModuleCache:
matches := modCacheRegexp.FindStringSubmatch(subdir)
modPath, err := module.DecodePath(filepath.ToSlash(matches[1]))
if err != nil {
if r.env.Debug {
r.env.Logf("decoding module cache path %q: %v", subdir, err)
}
return directoryPackageInfo{
status: directoryScanned,
err: fmt.Errorf("decoding module cache path %q: %v", subdir, err),
}, nil
}
importPath = path.Join(modPath, filepath.ToSlash(matches[3]))
case gopathwalk.RootGOROOT:
importPath = subdir
}
// Check that this package is not obviously impossible to import.
var modFile string
switch root.Type {
case gopathwalk.RootModuleCache:
matches := modCacheRegexp.FindStringSubmatch(subdir)
index := strings.Index(dir, matches[1]+"@"+matches[2])
modFile = filepath.Join(dir[:index], matches[1]+"@"+matches[2], "go.mod")
default:
modFile = findModFile(dir)
}
var needsReplace bool
modBytes, err := ioutil.ReadFile(modFile)
if err == nil && !strings.HasPrefix(importPath, modulePath(modBytes)) {
// The module's declared path does not match
// its expected path. It probably needs a
// replace directive we don't have.
needsReplace = true
}
return directoryPackageInfo{
status: directoryScanned,
dir: dir,
nonCanonicalImportPath: importPath,
needsReplace: needsReplace,
}, nil
}
// modCacheRegexp splits a path in a module cache into module, module version, and package.
var modCacheRegexp = regexp.MustCompile(`(.*)@([^/\\]*)(.*)`)

View File

@ -0,0 +1,99 @@
package imports
import (
"sync"
)
// ModuleResolver implements Resolver for modules using the go command as little
// as feasible.
//
// To find packages to import, the resolver needs to know about all of the
// the packages that could be imported. This includes packages that are
// already in modules that are in (1) the current module, (2) replace targets,
// and (3) packages in the module cache. Packages in (1) and (2) may change over
// time, as the client may edit the current module and locally replaced modules.
// The module cache (which includes all of the packages in (3)) can only
// ever be added to.
//
// The resolver can thus save state about packages in the module cache
// and guarantee that this will not change over time. To obtain information
// about new modules added to the module cache, the module cache should be
// rescanned.
//
// It is OK to serve information about modules that have been deleted,
// as they do still exist.
// TODO(suzmue): can we share information with the caller about
// what module needs to be downloaded to import this package?
type directoryPackageStatus int
const (
_ directoryPackageStatus = iota
directoryScanned
)
type directoryPackageInfo struct {
// status indicates the extent to which this struct has been filled in.
status directoryPackageStatus
// err is non-nil when there was an error trying to reach status.
err error
// Set when status > directoryScanned.
// dir is the absolute directory of this package.
dir string
// nonCanonicalImportPath is the expected import path for this package.
// This may not be an import path that can be used to import this package.
nonCanonicalImportPath string
// needsReplace is true if the nonCanonicalImportPath does not match the
// the modules declared path, making it impossible to import without a
// replace directive.
needsReplace bool
}
// moduleCacheInfo is a concurrency safe map for storing information about
// the directories in the module cache.
//
// The information in this cache is built incrementally. Entries are initialized in scan.
// No new keys should be added in any other functions, as all directories containing
// packages are identified in scan.
//
// Other functions, including loadExports and findPackage, may update entries in this cache
// as they discover new things about the directory.
//
// We do not need to protect the data in the cache for multiple writes, because it only stores
// module cache directories, which do not change. If two competing stores take place, there will be
// one store that wins. Although this could result in a loss of information it will not be incorrect
// and may just result in recomputing the same result later.
//
// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc)
type moduleCacheInfo struct {
mu sync.Mutex
// modCacheDirInfo stores information about packages in
// module cache directories. Keyed by absolute directory.
modCacheDirInfo map[string]*directoryPackageInfo
}
// Store stores the package info for dir.
func (d *moduleCacheInfo) Store(dir string, info directoryPackageInfo) {
d.mu.Lock()
defer d.mu.Unlock()
d.modCacheDirInfo[dir] = &directoryPackageInfo{
status: info.status,
err: info.err,
dir: info.dir,
nonCanonicalImportPath: info.nonCanonicalImportPath,
needsReplace: info.needsReplace,
}
}
// Load returns a copy of the directoryPackageInfo for absolute directory dir.
func (d *moduleCacheInfo) Load(dir string) (directoryPackageInfo, bool) {
d.mu.Lock()
defer d.mu.Unlock()
info, ok := d.modCacheDirInfo[dir]
if !ok {
return directoryPackageInfo{}, false
}
return *info, true
}

View File

@ -137,6 +137,50 @@ import _ "example.com"
}
// Tests that scanning the module cache > 1 after changing a package in module cache to make it unimportable
// is able to find the same module.
func TestModCacheEditModFile(t *testing.T) {
mt := setup(t, `
-- go.mod --
module x
require rsc.io/quote v1.5.2
-- x.go --
package x
import _ "rsc.io/quote"
`, "")
defer mt.cleanup()
found := mt.assertScanFinds("rsc.io/quote", "quote")
// Update the go.mod file of example.com so that it changes its module path (not allowed).
if err := os.Chmod(filepath.Join(found.dir, "go.mod"), 0644); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(filepath.Join(found.dir, "go.mod"), []byte("module bad.com\n"), 0644); err != nil {
t.Fatal(err)
}
// Test that with its cache of module packages it still finds the package.
mt.assertScanFinds("rsc.io/quote", "quote")
// Rewrite the main package so that rsc.io/quote is not in scope.
if err := ioutil.WriteFile(filepath.Join(mt.env.WorkingDir, "go.mod"), []byte("module x\n"), 0644); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(filepath.Join(mt.env.WorkingDir, "x.go"), []byte("package x\n"), 0644); err != nil {
t.Fatal(err)
}
// Uninitialize the go.mod dependent cached information and make sure it still finds the package.
mt.resolver.Initialized = false
mt.resolver.Main = nil
mt.resolver.ModsByModPath = nil
mt.resolver.ModsByDir = nil
mt.resolver.ModCachePkgs = nil
mt.assertScanFinds("rsc.io/quote", "quote")
}
// Tests that -mod=vendor sort of works. Adapted from mod_getmode_vendor.txt.
func TestModeGetmodeVendor(t *testing.T) {
mt := setup(t, `