mirror of https://github.com/golang/go.git
internal/lsp/cache: use [256]byte Hash instead of hex digit string
I had hoped to see a reduction in total allocation, but it does not appear to be significant according to the included crude benchmark. Nonetheless this is a slight code clarity improvement. Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7 Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Robert Findley <rfindley@google.com> gopls-CI: kokoro <noreply+kokoro@google.com> Reviewed-by: Robert Findley <rfindley@google.com>
This commit is contained in:
parent
c41ddceaa4
commit
a41fc9869a
|
|
@ -8,6 +8,7 @@ import (
|
|||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/pprof"
|
||||
"testing"
|
||||
|
||||
|
|
@ -66,6 +67,7 @@ func TestBenchmarkIWL(t *testing.T) {
|
|||
results := testing.Benchmark(func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {})
|
||||
|
||||
}
|
||||
})
|
||||
|
||||
|
|
@ -192,3 +194,31 @@ func TestBenchmarkDidChange(t *testing.T) {
|
|||
printBenchmarkResults(result)
|
||||
})
|
||||
}
|
||||
|
||||
// TestPrintMemStats measures the memory usage of loading a project.
|
||||
// It uses the same -didchange_dir flag as above.
|
||||
// Always run it in isolation since it measures global heap usage.
|
||||
//
|
||||
// Kubernetes example:
|
||||
// $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes
|
||||
// TotalAlloc: 5766 MB
|
||||
// HeapAlloc: 1984 MB
|
||||
//
|
||||
// Both figures exhibit variance of less than 1%.
|
||||
func TestPrintMemStats(t *testing.T) {
|
||||
if *benchDir == "" {
|
||||
t.Skip("-didchange_dir is not set")
|
||||
}
|
||||
|
||||
// Load the program...
|
||||
opts := benchmarkOptions(*benchDir)
|
||||
WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) {
|
||||
// ...and print the memory usage.
|
||||
runtime.GC()
|
||||
runtime.GC()
|
||||
var mem runtime.MemStats
|
||||
runtime.ReadMemStats(&mem)
|
||||
t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6)
|
||||
t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ func (s *snapshot) Analyze(ctx context.Context, id string, analyzers []*source.A
|
|||
return results, nil
|
||||
}
|
||||
|
||||
type actionHandleKey string
|
||||
type actionHandleKey source.Hash
|
||||
|
||||
// An action represents one unit of analysis work: the application of
|
||||
// one analysis to one package. Actions form a DAG, both within a
|
||||
|
|
@ -170,7 +170,7 @@ func (act *actionHandle) analyze(ctx context.Context, snapshot *snapshot) ([]*so
|
|||
}
|
||||
|
||||
func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey {
|
||||
return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key)))))
|
||||
return actionHandleKey(source.Hashf("%p%s", a, ph.key[:]))
|
||||
}
|
||||
|
||||
func (act *actionHandle) String() string {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ package cache
|
|||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/token"
|
||||
|
|
@ -55,7 +54,7 @@ type fileHandle struct {
|
|||
modTime time.Time
|
||||
uri span.URI
|
||||
bytes []byte
|
||||
hash string
|
||||
hash source.Hash
|
||||
err error
|
||||
|
||||
// size is the file length as reported by Stat, for the purpose of
|
||||
|
|
@ -139,7 +138,7 @@ func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*fileHandle, e
|
|||
size: fi.Size(),
|
||||
uri: uri,
|
||||
bytes: data,
|
||||
hash: hashContents(data),
|
||||
hash: source.HashOf(data),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
@ -168,10 +167,6 @@ func (h *fileHandle) URI() span.URI {
|
|||
return h.uri
|
||||
}
|
||||
|
||||
func (h *fileHandle) Hash() string {
|
||||
return h.hash
|
||||
}
|
||||
|
||||
func (h *fileHandle) FileIdentity() source.FileIdentity {
|
||||
return source.FileIdentity{
|
||||
URI: h.uri,
|
||||
|
|
@ -183,16 +178,6 @@ func (h *fileHandle) Read() ([]byte, error) {
|
|||
return h.bytes, h.err
|
||||
}
|
||||
|
||||
// hashContents returns a string of hex digits denoting the hash of contents.
|
||||
//
|
||||
// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
|
||||
// to hex digits on demand (rare). The array is larger when it appears as a
|
||||
// struct field (32B vs 16B) but smaller overall (string data is 64B), has
|
||||
// better locality, and is more efficiently hashed by runtime maps.
|
||||
func hashContents(contents []byte) string {
|
||||
return fmt.Sprintf("%64x", sha256.Sum256(contents))
|
||||
}
|
||||
|
||||
var cacheIndex, sessionIndex, viewIndex int64
|
||||
|
||||
func (c *Cache) ID() string { return c.id }
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import (
|
|||
"golang.org/x/tools/internal/typesinternal"
|
||||
)
|
||||
|
||||
type packageHandleKey string
|
||||
type packageHandleKey source.Hash
|
||||
|
||||
type packageHandle struct {
|
||||
handle *memoize.Handle
|
||||
|
|
@ -187,7 +187,7 @@ func (s *snapshot) buildKey(ctx context.Context, id PackageID, mode source.Parse
|
|||
}
|
||||
// One bad dependency should not prevent us from checking the entire package.
|
||||
// Add a special key to mark a bad dependency.
|
||||
depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID)))
|
||||
depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID)))
|
||||
continue
|
||||
}
|
||||
deps[depHandle.m.PkgPath] = depHandle
|
||||
|
|
@ -215,6 +215,8 @@ func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode {
|
|||
}
|
||||
|
||||
func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey {
|
||||
// TODO(adonovan): opt: no need to materalize the bytes; hash them directly.
|
||||
// Also, use field separators to avoid spurious collisions.
|
||||
b := bytes.NewBuffer(nil)
|
||||
b.WriteString(string(id))
|
||||
if m.Module != nil {
|
||||
|
|
@ -225,38 +227,37 @@ func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps
|
|||
// files, and deps). It should not otherwise affect the inputs to the type
|
||||
// checker, so this experiment omits it. This should increase cache hits on
|
||||
// the daemon as cfg contains the environment and working directory.
|
||||
b.WriteString(hashConfig(m.Config))
|
||||
hc := hashConfig(m.Config)
|
||||
b.Write(hc[:])
|
||||
}
|
||||
b.WriteByte(byte(mode))
|
||||
for _, dep := range deps {
|
||||
b.WriteString(string(dep))
|
||||
b.Write(dep[:])
|
||||
}
|
||||
for _, cgf := range pghs {
|
||||
b.WriteString(cgf.file.FileIdentity().String())
|
||||
}
|
||||
return packageHandleKey(hashContents(b.Bytes()))
|
||||
return packageHandleKey(source.HashOf(b.Bytes()))
|
||||
}
|
||||
|
||||
// hashEnv returns a hash of the snapshot's configuration.
|
||||
func hashEnv(s *snapshot) string {
|
||||
func hashEnv(s *snapshot) source.Hash {
|
||||
s.view.optionsMu.Lock()
|
||||
env := s.view.options.EnvSlice()
|
||||
s.view.optionsMu.Unlock()
|
||||
|
||||
b := &bytes.Buffer{}
|
||||
for _, e := range env {
|
||||
b.WriteString(e)
|
||||
}
|
||||
return hashContents(b.Bytes())
|
||||
return source.Hashf("%s", env)
|
||||
}
|
||||
|
||||
// hashConfig returns the hash for the *packages.Config.
|
||||
func hashConfig(config *packages.Config) string {
|
||||
b := bytes.NewBuffer(nil)
|
||||
func hashConfig(config *packages.Config) source.Hash {
|
||||
// TODO(adonovan): opt: don't materialize the bytes; hash them directly.
|
||||
// Also, use sound field separators to avoid collisions.
|
||||
var b bytes.Buffer
|
||||
|
||||
// Dir, Mode, Env, BuildFlags are the parts of the config that can change.
|
||||
b.WriteString(config.Dir)
|
||||
b.WriteString(string(rune(config.Mode)))
|
||||
b.WriteRune(rune(config.Mode))
|
||||
|
||||
for _, e := range config.Env {
|
||||
b.WriteString(e)
|
||||
|
|
@ -264,7 +265,7 @@ func hashConfig(config *packages.Config) string {
|
|||
for _, f := range config.BuildFlags {
|
||||
b.WriteString(f)
|
||||
}
|
||||
return hashContents(b.Bytes())
|
||||
return source.HashOf(b.Bytes())
|
||||
}
|
||||
|
||||
func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ type importsState struct {
|
|||
cleanupProcessEnv func()
|
||||
cacheRefreshDuration time.Duration
|
||||
cacheRefreshTimer *time.Timer
|
||||
cachedModFileHash string
|
||||
cachedModFileHash source.Hash
|
||||
cachedBuildFlags []string
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
|
|||
// Find the hash of the active mod file, if any. Using the unsaved content
|
||||
// is slightly wasteful, since we'll drop caches a little too often, but
|
||||
// the mod file shouldn't be changing while people are autocompleting.
|
||||
var modFileHash string
|
||||
var modFileHash source.Hash
|
||||
// If we are using 'legacyWorkspace' mode, we can just read the modfile from
|
||||
// the snapshot. Otherwise, we need to get the synthetic workspace mod file.
|
||||
//
|
||||
|
|
@ -61,7 +61,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
modFileHash = hashContents(modBytes)
|
||||
modFileHash = source.HashOf(modBytes)
|
||||
}
|
||||
|
||||
// view.goEnv is immutable -- changes make a new view. Options can change.
|
||||
|
|
|
|||
|
|
@ -201,9 +201,11 @@ func sumFilename(modURI span.URI) string {
|
|||
// modKey is uniquely identifies cached data for `go mod why` or dependencies
|
||||
// to upgrade.
|
||||
type modKey struct {
|
||||
sessionID, env, view string
|
||||
mod source.FileIdentity
|
||||
verb modAction
|
||||
sessionID string
|
||||
env source.Hash
|
||||
view string
|
||||
mod source.FileIdentity
|
||||
verb modAction
|
||||
}
|
||||
|
||||
type modAction int
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ import (
|
|||
|
||||
type modTidyKey struct {
|
||||
sessionID string
|
||||
env string
|
||||
env source.Hash
|
||||
gomod source.FileIdentity
|
||||
imports string
|
||||
unsavedOverlays string
|
||||
imports source.Hash
|
||||
unsavedOverlays source.Hash
|
||||
view string
|
||||
}
|
||||
|
||||
|
|
@ -81,10 +81,6 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
importHash, err := s.hashImports(ctx, workspacePkgs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
overlayHash := hashUnsavedOverlays(s.files)
|
||||
|
|
@ -93,7 +89,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
|
|||
key := modTidyKey{
|
||||
sessionID: s.view.session.id,
|
||||
view: s.view.folder.Filename(),
|
||||
imports: importHash,
|
||||
imports: s.hashImports(ctx, workspacePkgs),
|
||||
unsavedOverlays: overlayHash,
|
||||
gomod: fh.FileIdentity(),
|
||||
env: hashEnv(s),
|
||||
|
|
@ -152,7 +148,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
|
|||
return mth.tidy(ctx, s)
|
||||
}
|
||||
|
||||
func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) {
|
||||
func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash {
|
||||
seen := map[string]struct{}{}
|
||||
var imports []string
|
||||
for _, ph := range wsPackages {
|
||||
|
|
@ -164,8 +160,7 @@ func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle)
|
|||
}
|
||||
}
|
||||
sort.Strings(imports)
|
||||
hashed := strings.Join(imports, ",")
|
||||
return hashContents([]byte(hashed)), nil
|
||||
return source.Hashf("%s", imports)
|
||||
}
|
||||
|
||||
// modTidyDiagnostics computes the differences between the original and tidied
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ type overlay struct {
|
|||
session *Session
|
||||
uri span.URI
|
||||
text []byte
|
||||
hash string
|
||||
hash source.Hash
|
||||
version int32
|
||||
kind source.FileKind
|
||||
|
||||
|
|
@ -637,7 +637,7 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif
|
|||
if c.OnDisk || c.Action == source.Save {
|
||||
version = o.version
|
||||
}
|
||||
hash := hashContents(text)
|
||||
hash := source.HashOf(text)
|
||||
var sameContentOnDisk bool
|
||||
switch c.Action {
|
||||
case source.Delete:
|
||||
|
|
|
|||
|
|
@ -466,7 +466,7 @@ func (s *snapshot) buildOverlay() map[string][]byte {
|
|||
return overlays
|
||||
}
|
||||
|
||||
func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
|
||||
func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash {
|
||||
var unsaved []string
|
||||
for uri, fh := range files {
|
||||
if overlay, ok := fh.(*overlay); ok && !overlay.saved {
|
||||
|
|
@ -474,7 +474,7 @@ func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
|
|||
}
|
||||
}
|
||||
sort.Strings(unsaved)
|
||||
return hashContents([]byte(strings.Join(unsaved, "")))
|
||||
return source.Hashf("%s", unsaved)
|
||||
}
|
||||
|
||||
func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) {
|
||||
|
|
@ -2652,25 +2652,7 @@ func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) {
|
|||
// -- internal--
|
||||
|
||||
// hash returns 8 bits from the key's file digest.
|
||||
func (m *goFileMap) hash(k parseKey) int {
|
||||
h := k.file.Hash
|
||||
if h == "" {
|
||||
// Sadly the Hash isn't always a hash because cache.GetFile may
|
||||
// successfully return a *fileHandle containing an error and no hash.
|
||||
// Lump the duds together for now.
|
||||
// TODO(adonovan): fix the underlying bug.
|
||||
return 0
|
||||
}
|
||||
return unhex(h[0])<<4 | unhex(h[1])
|
||||
}
|
||||
|
||||
// unhex returns the value of a valid hex digit.
|
||||
func unhex(b byte) int {
|
||||
if '0' <= b && b <= '9' {
|
||||
return int(b - '0')
|
||||
}
|
||||
return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
|
||||
}
|
||||
func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] }
|
||||
|
||||
// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
|
||||
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ type symbolData struct {
|
|||
err error
|
||||
}
|
||||
|
||||
type symbolHandleKey string
|
||||
type symbolHandleKey source.Hash
|
||||
|
||||
func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle {
|
||||
if h := s.getSymbolHandle(fh.URI()); h != nil {
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ func (v *View) ID() string { return v.id }
|
|||
// given go.mod file. It is the caller's responsibility to clean up the files
|
||||
// when they are done using them.
|
||||
func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) {
|
||||
filenameHash := hashContents([]byte(modFh.URI().Filename()))
|
||||
filenameHash := source.Hashf("%s", modFh.URI().Filename())
|
||||
tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash))
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
|
|
|
|||
|
|
@ -320,7 +320,8 @@ func (i *Instance) getFile(r *http.Request) interface{} {
|
|||
return nil
|
||||
}
|
||||
for _, o := range s.Overlays() {
|
||||
if o.FileIdentity().Hash == identifier {
|
||||
// TODO(adonovan): understand and document this comparison.
|
||||
if o.FileIdentity().Hash.String() == identifier {
|
||||
return o
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ package source
|
|||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
|
|
@ -528,12 +529,32 @@ type FileHandle interface {
|
|||
Saved() bool
|
||||
}
|
||||
|
||||
// A Hash is a cryptographic digest of the contents of a file.
|
||||
// (Although at 32B it is larger than a 16B string header, it is smaller
|
||||
// and has better locality than the string header + 64B of hex digits.)
|
||||
type Hash [sha256.Size]byte
|
||||
|
||||
// HashOf returns the hash of some data.
|
||||
func HashOf(data []byte) Hash {
|
||||
return Hash(sha256.Sum256(data))
|
||||
}
|
||||
|
||||
// Hashf returns the hash of a printf-formatted string.
|
||||
func Hashf(format string, args ...interface{}) Hash {
|
||||
// Although this looks alloc-heavy, it is faster than using
|
||||
// Fprintf on sha256.New() because the allocations don't escape.
|
||||
return HashOf([]byte(fmt.Sprintf(format, args...)))
|
||||
}
|
||||
|
||||
// String returns the digest as a string of hex digits.
|
||||
func (h Hash) String() string {
|
||||
return fmt.Sprintf("%64x", [sha256.Size]byte(h))
|
||||
}
|
||||
|
||||
// FileIdentity uniquely identifies a file at a version from a FileSystem.
|
||||
type FileIdentity struct {
|
||||
URI span.URI
|
||||
|
||||
// Hash is a string of hex digits denoting the cryptographic digest of the file's content.
|
||||
Hash string
|
||||
URI span.URI
|
||||
Hash Hash // digest of file contents
|
||||
}
|
||||
|
||||
func (id FileIdentity) String() string {
|
||||
|
|
|
|||
Loading…
Reference in New Issue