internal/lsp/cache: use [256]byte Hash instead of hex digit string

I had hoped to see a reduction in total allocation, but it does not
appear to be significant according to the included crude benchmark.
Nonetheless this is a slight code clarity improvement.

Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7
Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
This commit is contained in:
Alan Donovan 2022-06-09 18:38:26 -04:00
parent c41ddceaa4
commit a41fc9869a
13 changed files with 98 additions and 81 deletions

View File

@ -8,6 +8,7 @@ import (
"flag"
"fmt"
"os"
"runtime"
"runtime/pprof"
"testing"
@ -66,6 +67,7 @@ func TestBenchmarkIWL(t *testing.T) {
results := testing.Benchmark(func(b *testing.B) {
for i := 0; i < b.N; i++ {
WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {})
}
})
@ -192,3 +194,31 @@ func TestBenchmarkDidChange(t *testing.T) {
printBenchmarkResults(result)
})
}
// TestPrintMemStats measures the memory usage of loading a project.
// It uses the same -didchange_dir flag as above.
// Always run it in isolation since it measures global heap usage.
//
// Kubernetes example:
// $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes
// TotalAlloc: 5766 MB
// HeapAlloc: 1984 MB
//
// Both figures exhibit variance of less than 1%.
func TestPrintMemStats(t *testing.T) {
if *benchDir == "" {
t.Skip("-didchange_dir is not set")
}
// Load the program...
opts := benchmarkOptions(*benchDir)
WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) {
// ...and print the memory usage.
runtime.GC()
runtime.GC()
var mem runtime.MemStats
runtime.ReadMemStats(&mem)
t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6)
t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6)
})
}

View File

@ -54,7 +54,7 @@ func (s *snapshot) Analyze(ctx context.Context, id string, analyzers []*source.A
return results, nil
}
type actionHandleKey string
type actionHandleKey source.Hash
// An action represents one unit of analysis work: the application of
// one analysis to one package. Actions form a DAG, both within a
@ -170,7 +170,7 @@ func (act *actionHandle) analyze(ctx context.Context, snapshot *snapshot) ([]*so
}
func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey {
return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key)))))
return actionHandleKey(source.Hashf("%p%s", a, ph.key[:]))
}
func (act *actionHandle) String() string {

View File

@ -6,7 +6,6 @@ package cache
import (
"context"
"crypto/sha256"
"fmt"
"go/ast"
"go/token"
@ -55,7 +54,7 @@ type fileHandle struct {
modTime time.Time
uri span.URI
bytes []byte
hash string
hash source.Hash
err error
// size is the file length as reported by Stat, for the purpose of
@ -139,7 +138,7 @@ func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*fileHandle, e
size: fi.Size(),
uri: uri,
bytes: data,
hash: hashContents(data),
hash: source.HashOf(data),
}, nil
}
@ -168,10 +167,6 @@ func (h *fileHandle) URI() span.URI {
return h.uri
}
func (h *fileHandle) Hash() string {
return h.hash
}
func (h *fileHandle) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
@ -183,16 +178,6 @@ func (h *fileHandle) Read() ([]byte, error) {
return h.bytes, h.err
}
// hashContents returns a string of hex digits denoting the hash of contents.
//
// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
// to hex digits on demand (rare). The array is larger when it appears as a
// struct field (32B vs 16B) but smaller overall (string data is 64B), has
// better locality, and is more efficiently hashed by runtime maps.
func hashContents(contents []byte) string {
return fmt.Sprintf("%64x", sha256.Sum256(contents))
}
var cacheIndex, sessionIndex, viewIndex int64
func (c *Cache) ID() string { return c.id }

View File

@ -32,7 +32,7 @@ import (
"golang.org/x/tools/internal/typesinternal"
)
type packageHandleKey string
type packageHandleKey source.Hash
type packageHandle struct {
handle *memoize.Handle
@ -187,7 +187,7 @@ func (s *snapshot) buildKey(ctx context.Context, id PackageID, mode source.Parse
}
// One bad dependency should not prevent us from checking the entire package.
// Add a special key to mark a bad dependency.
depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID)))
depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID)))
continue
}
deps[depHandle.m.PkgPath] = depHandle
@ -215,6 +215,8 @@ func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode {
}
func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey {
// TODO(adonovan): opt: no need to materalize the bytes; hash them directly.
// Also, use field separators to avoid spurious collisions.
b := bytes.NewBuffer(nil)
b.WriteString(string(id))
if m.Module != nil {
@ -225,38 +227,37 @@ func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps
// files, and deps). It should not otherwise affect the inputs to the type
// checker, so this experiment omits it. This should increase cache hits on
// the daemon as cfg contains the environment and working directory.
b.WriteString(hashConfig(m.Config))
hc := hashConfig(m.Config)
b.Write(hc[:])
}
b.WriteByte(byte(mode))
for _, dep := range deps {
b.WriteString(string(dep))
b.Write(dep[:])
}
for _, cgf := range pghs {
b.WriteString(cgf.file.FileIdentity().String())
}
return packageHandleKey(hashContents(b.Bytes()))
return packageHandleKey(source.HashOf(b.Bytes()))
}
// hashEnv returns a hash of the snapshot's configuration.
func hashEnv(s *snapshot) string {
func hashEnv(s *snapshot) source.Hash {
s.view.optionsMu.Lock()
env := s.view.options.EnvSlice()
s.view.optionsMu.Unlock()
b := &bytes.Buffer{}
for _, e := range env {
b.WriteString(e)
}
return hashContents(b.Bytes())
return source.Hashf("%s", env)
}
// hashConfig returns the hash for the *packages.Config.
func hashConfig(config *packages.Config) string {
b := bytes.NewBuffer(nil)
func hashConfig(config *packages.Config) source.Hash {
// TODO(adonovan): opt: don't materialize the bytes; hash them directly.
// Also, use sound field separators to avoid collisions.
var b bytes.Buffer
// Dir, Mode, Env, BuildFlags are the parts of the config that can change.
b.WriteString(config.Dir)
b.WriteString(string(rune(config.Mode)))
b.WriteRune(rune(config.Mode))
for _, e := range config.Env {
b.WriteString(e)
@ -264,7 +265,7 @@ func hashConfig(config *packages.Config) string {
for _, f := range config.BuildFlags {
b.WriteString(f)
}
return hashContents(b.Bytes())
return source.HashOf(b.Bytes())
}
func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) {

View File

@ -27,7 +27,7 @@ type importsState struct {
cleanupProcessEnv func()
cacheRefreshDuration time.Duration
cacheRefreshTimer *time.Timer
cachedModFileHash string
cachedModFileHash source.Hash
cachedBuildFlags []string
}
@ -38,7 +38,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
// Find the hash of the active mod file, if any. Using the unsaved content
// is slightly wasteful, since we'll drop caches a little too often, but
// the mod file shouldn't be changing while people are autocompleting.
var modFileHash string
var modFileHash source.Hash
// If we are using 'legacyWorkspace' mode, we can just read the modfile from
// the snapshot. Otherwise, we need to get the synthetic workspace mod file.
//
@ -61,7 +61,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
if err != nil {
return err
}
modFileHash = hashContents(modBytes)
modFileHash = source.HashOf(modBytes)
}
// view.goEnv is immutable -- changes make a new view. Options can change.

View File

@ -201,9 +201,11 @@ func sumFilename(modURI span.URI) string {
// modKey is uniquely identifies cached data for `go mod why` or dependencies
// to upgrade.
type modKey struct {
sessionID, env, view string
mod source.FileIdentity
verb modAction
sessionID string
env source.Hash
view string
mod source.FileIdentity
verb modAction
}
type modAction int

View File

@ -29,10 +29,10 @@ import (
type modTidyKey struct {
sessionID string
env string
env source.Hash
gomod source.FileIdentity
imports string
unsavedOverlays string
imports source.Hash
unsavedOverlays source.Hash
view string
}
@ -81,10 +81,6 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
if err != nil {
return nil, err
}
importHash, err := s.hashImports(ctx, workspacePkgs)
if err != nil {
return nil, err
}
s.mu.Lock()
overlayHash := hashUnsavedOverlays(s.files)
@ -93,7 +89,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
key := modTidyKey{
sessionID: s.view.session.id,
view: s.view.folder.Filename(),
imports: importHash,
imports: s.hashImports(ctx, workspacePkgs),
unsavedOverlays: overlayHash,
gomod: fh.FileIdentity(),
env: hashEnv(s),
@ -152,7 +148,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
return mth.tidy(ctx, s)
}
func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) {
func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash {
seen := map[string]struct{}{}
var imports []string
for _, ph := range wsPackages {
@ -164,8 +160,7 @@ func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle)
}
}
sort.Strings(imports)
hashed := strings.Join(imports, ",")
return hashContents([]byte(hashed)), nil
return source.Hashf("%s", imports)
}
// modTidyDiagnostics computes the differences between the original and tidied

View File

@ -44,7 +44,7 @@ type overlay struct {
session *Session
uri span.URI
text []byte
hash string
hash source.Hash
version int32
kind source.FileKind
@ -637,7 +637,7 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif
if c.OnDisk || c.Action == source.Save {
version = o.version
}
hash := hashContents(text)
hash := source.HashOf(text)
var sameContentOnDisk bool
switch c.Action {
case source.Delete:

View File

@ -466,7 +466,7 @@ func (s *snapshot) buildOverlay() map[string][]byte {
return overlays
}
func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash {
var unsaved []string
for uri, fh := range files {
if overlay, ok := fh.(*overlay); ok && !overlay.saved {
@ -474,7 +474,7 @@ func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
}
}
sort.Strings(unsaved)
return hashContents([]byte(strings.Join(unsaved, "")))
return source.Hashf("%s", unsaved)
}
func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) {
@ -2652,25 +2652,7 @@ func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) {
// -- internal--
// hash returns 8 bits from the key's file digest.
func (m *goFileMap) hash(k parseKey) int {
h := k.file.Hash
if h == "" {
// Sadly the Hash isn't always a hash because cache.GetFile may
// successfully return a *fileHandle containing an error and no hash.
// Lump the duds together for now.
// TODO(adonovan): fix the underlying bug.
return 0
}
return unhex(h[0])<<4 | unhex(h[1])
}
// unhex returns the value of a valid hex digit.
func unhex(b byte) int {
if '0' <= b && b <= '9' {
return int(b - '0')
}
return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
}
func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] }
// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {

View File

@ -33,7 +33,7 @@ type symbolData struct {
err error
}
type symbolHandleKey string
type symbolHandleKey source.Hash
func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle {
if h := s.getSymbolHandle(fh.URI()); h != nil {

View File

@ -165,7 +165,7 @@ func (v *View) ID() string { return v.id }
// given go.mod file. It is the caller's responsibility to clean up the files
// when they are done using them.
func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) {
filenameHash := hashContents([]byte(modFh.URI().Filename()))
filenameHash := source.Hashf("%s", modFh.URI().Filename())
tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash))
if err != nil {
return "", nil, err

View File

@ -320,7 +320,8 @@ func (i *Instance) getFile(r *http.Request) interface{} {
return nil
}
for _, o := range s.Overlays() {
if o.FileIdentity().Hash == identifier {
// TODO(adonovan): understand and document this comparison.
if o.FileIdentity().Hash.String() == identifier {
return o
}
}

View File

@ -7,6 +7,7 @@ package source
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"fmt"
"go/ast"
@ -528,12 +529,32 @@ type FileHandle interface {
Saved() bool
}
// A Hash is a cryptographic digest of the contents of a file.
// (Although at 32B it is larger than a 16B string header, it is smaller
// and has better locality than the string header + 64B of hex digits.)
type Hash [sha256.Size]byte
// HashOf returns the hash of some data.
func HashOf(data []byte) Hash {
return Hash(sha256.Sum256(data))
}
// Hashf returns the hash of a printf-formatted string.
func Hashf(format string, args ...interface{}) Hash {
// Although this looks alloc-heavy, it is faster than using
// Fprintf on sha256.New() because the allocations don't escape.
return HashOf([]byte(fmt.Sprintf(format, args...)))
}
// String returns the digest as a string of hex digits.
func (h Hash) String() string {
return fmt.Sprintf("%64x", [sha256.Size]byte(h))
}
// FileIdentity uniquely identifies a file at a version from a FileSystem.
type FileIdentity struct {
URI span.URI
// Hash is a string of hex digits denoting the cryptographic digest of the file's content.
Hash string
URI span.URI
Hash Hash // digest of file contents
}
func (id FileIdentity) String() string {