From a41fc9869a5ab7b215c33cb7549d3900adc27c5b Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Thu, 9 Jun 2022 18:38:26 -0400 Subject: [PATCH] internal/lsp/cache: use [256]byte Hash instead of hex digit string I had hoped to see a reduction in total allocation, but it does not appear to be significant according to the included crude benchmark. Nonetheless this is a slight code clarity improvement. Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7 Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655 TryBot-Result: Gopher Robot Run-TryBot: Robert Findley gopls-CI: kokoro Reviewed-by: Robert Findley --- gopls/internal/regtest/bench/bench_test.go | 30 +++++++++++++++++++++ internal/lsp/cache/analysis.go | 4 +-- internal/lsp/cache/cache.go | 19 ++----------- internal/lsp/cache/check.go | 31 +++++++++++----------- internal/lsp/cache/imports.go | 6 ++--- internal/lsp/cache/mod.go | 8 +++--- internal/lsp/cache/mod_tidy.go | 17 +++++------- internal/lsp/cache/session.go | 4 +-- internal/lsp/cache/snapshot.go | 24 +++-------------- internal/lsp/cache/symbols.go | 2 +- internal/lsp/cache/view.go | 2 +- internal/lsp/debug/serve.go | 3 ++- internal/lsp/source/view.go | 29 +++++++++++++++++--- 13 files changed, 98 insertions(+), 81 deletions(-) diff --git a/gopls/internal/regtest/bench/bench_test.go b/gopls/internal/regtest/bench/bench_test.go index 5e4eb5fc23..22f157f471 100644 --- a/gopls/internal/regtest/bench/bench_test.go +++ b/gopls/internal/regtest/bench/bench_test.go @@ -8,6 +8,7 @@ import ( "flag" "fmt" "os" + "runtime" "runtime/pprof" "testing" @@ -66,6 +67,7 @@ func TestBenchmarkIWL(t *testing.T) { results := testing.Benchmark(func(b *testing.B) { for i := 0; i < b.N; i++ { WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {}) + } }) @@ -192,3 +194,31 @@ func TestBenchmarkDidChange(t *testing.T) { printBenchmarkResults(result) }) } + +// TestPrintMemStats measures the memory usage of loading a project. +// It uses the same -didchange_dir flag as above. +// Always run it in isolation since it measures global heap usage. +// +// Kubernetes example: +// $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes +// TotalAlloc: 5766 MB +// HeapAlloc: 1984 MB +// +// Both figures exhibit variance of less than 1%. +func TestPrintMemStats(t *testing.T) { + if *benchDir == "" { + t.Skip("-didchange_dir is not set") + } + + // Load the program... + opts := benchmarkOptions(*benchDir) + WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) { + // ...and print the memory usage. + runtime.GC() + runtime.GC() + var mem runtime.MemStats + runtime.ReadMemStats(&mem) + t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6) + t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6) + }) +} diff --git a/internal/lsp/cache/analysis.go b/internal/lsp/cache/analysis.go index e882fb46f0..9f7a19c5c6 100644 --- a/internal/lsp/cache/analysis.go +++ b/internal/lsp/cache/analysis.go @@ -54,7 +54,7 @@ func (s *snapshot) Analyze(ctx context.Context, id string, analyzers []*source.A return results, nil } -type actionHandleKey string +type actionHandleKey source.Hash // An action represents one unit of analysis work: the application of // one analysis to one package. Actions form a DAG, both within a @@ -170,7 +170,7 @@ func (act *actionHandle) analyze(ctx context.Context, snapshot *snapshot) ([]*so } func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey { - return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key))))) + return actionHandleKey(source.Hashf("%p%s", a, ph.key[:])) } func (act *actionHandle) String() string { diff --git a/internal/lsp/cache/cache.go b/internal/lsp/cache/cache.go index f5796dfefa..2a8a169d51 100644 --- a/internal/lsp/cache/cache.go +++ b/internal/lsp/cache/cache.go @@ -6,7 +6,6 @@ package cache import ( "context" - "crypto/sha256" "fmt" "go/ast" "go/token" @@ -55,7 +54,7 @@ type fileHandle struct { modTime time.Time uri span.URI bytes []byte - hash string + hash source.Hash err error // size is the file length as reported by Stat, for the purpose of @@ -139,7 +138,7 @@ func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*fileHandle, e size: fi.Size(), uri: uri, bytes: data, - hash: hashContents(data), + hash: source.HashOf(data), }, nil } @@ -168,10 +167,6 @@ func (h *fileHandle) URI() span.URI { return h.uri } -func (h *fileHandle) Hash() string { - return h.hash -} - func (h *fileHandle) FileIdentity() source.FileIdentity { return source.FileIdentity{ URI: h.uri, @@ -183,16 +178,6 @@ func (h *fileHandle) Read() ([]byte, error) { return h.bytes, h.err } -// hashContents returns a string of hex digits denoting the hash of contents. -// -// TODO(adonovan): opt: use [32]byte array as a value more widely and convert -// to hex digits on demand (rare). The array is larger when it appears as a -// struct field (32B vs 16B) but smaller overall (string data is 64B), has -// better locality, and is more efficiently hashed by runtime maps. -func hashContents(contents []byte) string { - return fmt.Sprintf("%64x", sha256.Sum256(contents)) -} - var cacheIndex, sessionIndex, viewIndex int64 func (c *Cache) ID() string { return c.id } diff --git a/internal/lsp/cache/check.go b/internal/lsp/cache/check.go index b8a3655a9d..f09fc298a9 100644 --- a/internal/lsp/cache/check.go +++ b/internal/lsp/cache/check.go @@ -32,7 +32,7 @@ import ( "golang.org/x/tools/internal/typesinternal" ) -type packageHandleKey string +type packageHandleKey source.Hash type packageHandle struct { handle *memoize.Handle @@ -187,7 +187,7 @@ func (s *snapshot) buildKey(ctx context.Context, id PackageID, mode source.Parse } // One bad dependency should not prevent us from checking the entire package. // Add a special key to mark a bad dependency. - depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID))) + depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID))) continue } deps[depHandle.m.PkgPath] = depHandle @@ -215,6 +215,8 @@ func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode { } func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey { + // TODO(adonovan): opt: no need to materalize the bytes; hash them directly. + // Also, use field separators to avoid spurious collisions. b := bytes.NewBuffer(nil) b.WriteString(string(id)) if m.Module != nil { @@ -225,38 +227,37 @@ func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps // files, and deps). It should not otherwise affect the inputs to the type // checker, so this experiment omits it. This should increase cache hits on // the daemon as cfg contains the environment and working directory. - b.WriteString(hashConfig(m.Config)) + hc := hashConfig(m.Config) + b.Write(hc[:]) } b.WriteByte(byte(mode)) for _, dep := range deps { - b.WriteString(string(dep)) + b.Write(dep[:]) } for _, cgf := range pghs { b.WriteString(cgf.file.FileIdentity().String()) } - return packageHandleKey(hashContents(b.Bytes())) + return packageHandleKey(source.HashOf(b.Bytes())) } // hashEnv returns a hash of the snapshot's configuration. -func hashEnv(s *snapshot) string { +func hashEnv(s *snapshot) source.Hash { s.view.optionsMu.Lock() env := s.view.options.EnvSlice() s.view.optionsMu.Unlock() - b := &bytes.Buffer{} - for _, e := range env { - b.WriteString(e) - } - return hashContents(b.Bytes()) + return source.Hashf("%s", env) } // hashConfig returns the hash for the *packages.Config. -func hashConfig(config *packages.Config) string { - b := bytes.NewBuffer(nil) +func hashConfig(config *packages.Config) source.Hash { + // TODO(adonovan): opt: don't materialize the bytes; hash them directly. + // Also, use sound field separators to avoid collisions. + var b bytes.Buffer // Dir, Mode, Env, BuildFlags are the parts of the config that can change. b.WriteString(config.Dir) - b.WriteString(string(rune(config.Mode))) + b.WriteRune(rune(config.Mode)) for _, e := range config.Env { b.WriteString(e) @@ -264,7 +265,7 @@ func hashConfig(config *packages.Config) string { for _, f := range config.BuildFlags { b.WriteString(f) } - return hashContents(b.Bytes()) + return source.HashOf(b.Bytes()) } func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) { diff --git a/internal/lsp/cache/imports.go b/internal/lsp/cache/imports.go index 01a2468ef3..f333f700dd 100644 --- a/internal/lsp/cache/imports.go +++ b/internal/lsp/cache/imports.go @@ -27,7 +27,7 @@ type importsState struct { cleanupProcessEnv func() cacheRefreshDuration time.Duration cacheRefreshTimer *time.Timer - cachedModFileHash string + cachedModFileHash source.Hash cachedBuildFlags []string } @@ -38,7 +38,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot // Find the hash of the active mod file, if any. Using the unsaved content // is slightly wasteful, since we'll drop caches a little too often, but // the mod file shouldn't be changing while people are autocompleting. - var modFileHash string + var modFileHash source.Hash // If we are using 'legacyWorkspace' mode, we can just read the modfile from // the snapshot. Otherwise, we need to get the synthetic workspace mod file. // @@ -61,7 +61,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot if err != nil { return err } - modFileHash = hashContents(modBytes) + modFileHash = source.HashOf(modBytes) } // view.goEnv is immutable -- changes make a new view. Options can change. diff --git a/internal/lsp/cache/mod.go b/internal/lsp/cache/mod.go index 5ac199bd96..c076f424dc 100644 --- a/internal/lsp/cache/mod.go +++ b/internal/lsp/cache/mod.go @@ -201,9 +201,11 @@ func sumFilename(modURI span.URI) string { // modKey is uniquely identifies cached data for `go mod why` or dependencies // to upgrade. type modKey struct { - sessionID, env, view string - mod source.FileIdentity - verb modAction + sessionID string + env source.Hash + view string + mod source.FileIdentity + verb modAction } type modAction int diff --git a/internal/lsp/cache/mod_tidy.go b/internal/lsp/cache/mod_tidy.go index aa525e7413..bd2ff0c5f8 100644 --- a/internal/lsp/cache/mod_tidy.go +++ b/internal/lsp/cache/mod_tidy.go @@ -29,10 +29,10 @@ import ( type modTidyKey struct { sessionID string - env string + env source.Hash gomod source.FileIdentity - imports string - unsavedOverlays string + imports source.Hash + unsavedOverlays source.Hash view string } @@ -81,10 +81,6 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc if err != nil { return nil, err } - importHash, err := s.hashImports(ctx, workspacePkgs) - if err != nil { - return nil, err - } s.mu.Lock() overlayHash := hashUnsavedOverlays(s.files) @@ -93,7 +89,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc key := modTidyKey{ sessionID: s.view.session.id, view: s.view.folder.Filename(), - imports: importHash, + imports: s.hashImports(ctx, workspacePkgs), unsavedOverlays: overlayHash, gomod: fh.FileIdentity(), env: hashEnv(s), @@ -152,7 +148,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc return mth.tidy(ctx, s) } -func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) { +func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash { seen := map[string]struct{}{} var imports []string for _, ph := range wsPackages { @@ -164,8 +160,7 @@ func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) } } sort.Strings(imports) - hashed := strings.Join(imports, ",") - return hashContents([]byte(hashed)), nil + return source.Hashf("%s", imports) } // modTidyDiagnostics computes the differences between the original and tidied diff --git a/internal/lsp/cache/session.go b/internal/lsp/cache/session.go index 9da5c1e69f..cbb5874062 100644 --- a/internal/lsp/cache/session.go +++ b/internal/lsp/cache/session.go @@ -44,7 +44,7 @@ type overlay struct { session *Session uri span.URI text []byte - hash string + hash source.Hash version int32 kind source.FileKind @@ -637,7 +637,7 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif if c.OnDisk || c.Action == source.Save { version = o.version } - hash := hashContents(text) + hash := source.HashOf(text) var sameContentOnDisk bool switch c.Action { case source.Delete: diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go index 0d3c869cd2..6edd1dbe65 100644 --- a/internal/lsp/cache/snapshot.go +++ b/internal/lsp/cache/snapshot.go @@ -466,7 +466,7 @@ func (s *snapshot) buildOverlay() map[string][]byte { return overlays } -func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string { +func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash { var unsaved []string for uri, fh := range files { if overlay, ok := fh.(*overlay); ok && !overlay.saved { @@ -474,7 +474,7 @@ func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string { } } sort.Strings(unsaved) - return hashContents([]byte(strings.Join(unsaved, ""))) + return source.Hashf("%s", unsaved) } func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) { @@ -2652,25 +2652,7 @@ func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) { // -- internal-- // hash returns 8 bits from the key's file digest. -func (m *goFileMap) hash(k parseKey) int { - h := k.file.Hash - if h == "" { - // Sadly the Hash isn't always a hash because cache.GetFile may - // successfully return a *fileHandle containing an error and no hash. - // Lump the duds together for now. - // TODO(adonovan): fix the underlying bug. - return 0 - } - return unhex(h[0])<<4 | unhex(h[1]) -} - -// unhex returns the value of a valid hex digit. -func unhex(b byte) int { - if '0' <= b && b <= '9' { - return int(b - '0') - } - return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F] -} +func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] } // unshare makes k's stripe exclusive, allocating a copy if needed, and returns it. func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle { diff --git a/internal/lsp/cache/symbols.go b/internal/lsp/cache/symbols.go index db68912015..bf5e00b164 100644 --- a/internal/lsp/cache/symbols.go +++ b/internal/lsp/cache/symbols.go @@ -33,7 +33,7 @@ type symbolData struct { err error } -type symbolHandleKey string +type symbolHandleKey source.Hash func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle { if h := s.getSymbolHandle(fh.URI()); h != nil { diff --git a/internal/lsp/cache/view.go b/internal/lsp/cache/view.go index b0390a3fbd..0ed9883451 100644 --- a/internal/lsp/cache/view.go +++ b/internal/lsp/cache/view.go @@ -165,7 +165,7 @@ func (v *View) ID() string { return v.id } // given go.mod file. It is the caller's responsibility to clean up the files // when they are done using them. func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) { - filenameHash := hashContents([]byte(modFh.URI().Filename())) + filenameHash := source.Hashf("%s", modFh.URI().Filename()) tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash)) if err != nil { return "", nil, err diff --git a/internal/lsp/debug/serve.go b/internal/lsp/debug/serve.go index 0bdee92c5e..d343a6d65a 100644 --- a/internal/lsp/debug/serve.go +++ b/internal/lsp/debug/serve.go @@ -320,7 +320,8 @@ func (i *Instance) getFile(r *http.Request) interface{} { return nil } for _, o := range s.Overlays() { - if o.FileIdentity().Hash == identifier { + // TODO(adonovan): understand and document this comparison. + if o.FileIdentity().Hash.String() == identifier { return o } } diff --git a/internal/lsp/source/view.go b/internal/lsp/source/view.go index 5b908bc721..7960b0c036 100644 --- a/internal/lsp/source/view.go +++ b/internal/lsp/source/view.go @@ -7,6 +7,7 @@ package source import ( "bytes" "context" + "crypto/sha256" "errors" "fmt" "go/ast" @@ -528,12 +529,32 @@ type FileHandle interface { Saved() bool } +// A Hash is a cryptographic digest of the contents of a file. +// (Although at 32B it is larger than a 16B string header, it is smaller +// and has better locality than the string header + 64B of hex digits.) +type Hash [sha256.Size]byte + +// HashOf returns the hash of some data. +func HashOf(data []byte) Hash { + return Hash(sha256.Sum256(data)) +} + +// Hashf returns the hash of a printf-formatted string. +func Hashf(format string, args ...interface{}) Hash { + // Although this looks alloc-heavy, it is faster than using + // Fprintf on sha256.New() because the allocations don't escape. + return HashOf([]byte(fmt.Sprintf(format, args...))) +} + +// String returns the digest as a string of hex digits. +func (h Hash) String() string { + return fmt.Sprintf("%64x", [sha256.Size]byte(h)) +} + // FileIdentity uniquely identifies a file at a version from a FileSystem. type FileIdentity struct { - URI span.URI - - // Hash is a string of hex digits denoting the cryptographic digest of the file's content. - Hash string + URI span.URI + Hash Hash // digest of file contents } func (id FileIdentity) String() string {