From 9651276d64be221532f972dbbbd8e6186c784fde Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Fri, 10 Jun 2022 13:34:52 -0400 Subject: [PATCH] internal/lsp/cache: optimize Snapshot.clone This change replaces the single large map used for snapshot.goFiles by a map of 256 stripes, each of which becomes immutable once shared. This optimizes the common case in which the copy is nearly identical to the original. We still need to visit each map entry to see whether it needs to be deleted (which is rare) and to inherit the handle in the usual case. This is now done concurrently. Also, share the (logically immutable) []PackageIDs slices across old and new snapshots. This was worth 5% of CPU and 1/3 of allocations (all small). Benchmark on darwin/arm64 shows a 29% reduction for DidChange. $ go test -v ./gopls/internal/regtest/bench -run=TestBenchmarkDidChange -didchange_dir=$HOME/w/kubernetes -didchange_file=pkg/util/hash/hash.go Before: BenchmarkStatistics 100 22955469 ns/op 11308095 B/op 47412 allocs/op BenchmarkStatistics 100 23454630 ns/op 11226742 B/op 46882 allocs/op BenchmarkStatistics 100 23618532 ns/op 11258619 B/op 47068 allocs/op After goFilesMap: BenchmarkStatistics 100 16643972 ns/op 8770787 B/op 46238 allocs/op BenchmarkStatistics 100 17805864 ns/op 8862926 B/op 46762 allocs/op BenchmarkStatistics 100 18618255 ns/op 9308864 B/op 49776 allocs/op After goFilesMap and ids sharing: BenchmarkStatistics 100 16703623 ns/op 8772626 B/op 33812 allocs/op BenchmarkStatistics 100 16927378 ns/op 8529491 B/op 32328 allocs/op BenchmarkStatistics 100 16632762 ns/op 8557533 B/op 32497 allocs/op Also: - Add comments documenting findings of profiling. - preallocate slice for knownSubdirs. - remove unwanted loop over slice in Generation.Inherit Updates golang/go#45686 Change-Id: Id953699191b8404cf36ba3a7ab9cd78b1d19c0a2 Reviewed-on: https://go-review.googlesource.com/c/tools/+/410176 TryBot-Result: Gopher Robot Reviewed-by: Robert Findley gopls-CI: kokoro Run-TryBot: Alan Donovan --- internal/lsp/cache/cache.go | 8 +- internal/lsp/cache/session.go | 2 +- internal/lsp/cache/snapshot.go | 184 +++++++++++++++++++++++++++---- internal/lsp/source/view.go | 2 +- internal/memoize/memoize.go | 23 ++-- internal/memoize/memoize_test.go | 3 +- internal/span/uri.go | 8 ++ 7 files changed, 192 insertions(+), 38 deletions(-) diff --git a/internal/lsp/cache/cache.go b/internal/lsp/cache/cache.go index ac670b573e..f5796dfefa 100644 --- a/internal/lsp/cache/cache.go +++ b/internal/lsp/cache/cache.go @@ -183,8 +183,14 @@ func (h *fileHandle) Read() ([]byte, error) { return h.bytes, h.err } +// hashContents returns a string of hex digits denoting the hash of contents. +// +// TODO(adonovan): opt: use [32]byte array as a value more widely and convert +// to hex digits on demand (rare). The array is larger when it appears as a +// struct field (32B vs 16B) but smaller overall (string data is 64B), has +// better locality, and is more efficiently hashed by runtime maps. func hashContents(contents []byte) string { - return fmt.Sprintf("%x", sha256.Sum256(contents)) + return fmt.Sprintf("%64x", sha256.Sum256(contents)) } var cacheIndex, sessionIndex, viewIndex int64 diff --git a/internal/lsp/cache/session.go b/internal/lsp/cache/session.go index e018cb33bd..9da5c1e69f 100644 --- a/internal/lsp/cache/session.go +++ b/internal/lsp/cache/session.go @@ -234,7 +234,7 @@ func (s *Session) createView(ctx context.Context, name string, folder span.URI, packages: make(map[packageKey]*packageHandle), meta: NewMetadataGraph(), files: make(map[span.URI]source.VersionedFileHandle), - goFiles: make(map[parseKey]*parseGoHandle), + goFiles: newGoFileMap(), symbols: make(map[span.URI]*symbolHandle), actions: make(map[actionKey]*actionHandle), workspacePackages: make(map[PackageID]PackagePath), diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go index a219935aa6..0d3c869cd2 100644 --- a/internal/lsp/cache/snapshot.go +++ b/internal/lsp/cache/snapshot.go @@ -76,7 +76,7 @@ type snapshot struct { files map[span.URI]source.VersionedFileHandle // goFiles maps a parseKey to its parseGoHandle. - goFiles map[parseKey]*parseGoHandle + goFiles *goFileMap // TODO(rfindley): consider merging this with files to reduce burden on clone. symbols map[span.URI]*symbolHandle @@ -663,16 +663,17 @@ func (s *snapshot) transitiveReverseDependencies(id PackageID, ids map[PackageID func (s *snapshot) getGoFile(key parseKey) *parseGoHandle { s.mu.Lock() defer s.mu.Unlock() - return s.goFiles[key] + return s.goFiles.get(key) } func (s *snapshot) addGoFile(key parseKey, pgh *parseGoHandle) *parseGoHandle { s.mu.Lock() defer s.mu.Unlock() - if existing, ok := s.goFiles[key]; ok { - return existing + + if prev := s.goFiles.get(key); prev != nil { + return prev } - s.goFiles[key] = pgh + s.goFiles.set(key, pgh) return pgh } @@ -811,6 +812,8 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru patterns := map[string]struct{}{ fmt.Sprintf("**/*.{%s}", extensions): {}, } + + // Add a pattern for each Go module in the workspace that is not within the view. dirs := s.workspace.dirs(ctx, s) for _, dir := range dirs { dirName := dir.Filename() @@ -830,14 +833,19 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru // contain Go code (golang/go#42348). To handle this, explicitly watch all // of the directories in the workspace. We find them by adding the // directories of every file in the snapshot's workspace directories. - var dirNames []string - for _, uri := range s.getKnownSubdirs(dirs) { - dirNames = append(dirNames, uri.Filename()) - } - sort.Strings(dirNames) - if len(dirNames) > 0 { + // There may be thousands. + knownSubdirs := s.getKnownSubdirs(dirs) + if n := len(knownSubdirs); n > 0 { + dirNames := make([]string, 0, n) + for _, uri := range knownSubdirs { + dirNames = append(dirNames, uri.Filename()) + } + sort.Strings(dirNames) + // The double allocation of Sprintf(Join()) accounts for 8% + // of DidChange, but specializing doesn't appear to help. :( patterns[fmt.Sprintf("{%s}", strings.Join(dirNames, ","))] = struct{}{} } + return patterns } @@ -874,7 +882,7 @@ func (s *snapshot) getKnownSubdirs(wsDirs []span.URI) []span.URI { } s.unprocessedSubdirChanges = nil - var result []span.URI + result := make([]span.URI, 0, len(s.knownSubdirs)) for uri := range s.knownSubdirs { result = append(result, uri) } @@ -1719,7 +1727,7 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC packages: make(map[packageKey]*packageHandle, len(s.packages)), actions: make(map[actionKey]*actionHandle, len(s.actions)), files: make(map[span.URI]source.VersionedFileHandle, len(s.files)), - goFiles: make(map[parseKey]*parseGoHandle, len(s.goFiles)), + goFiles: s.goFiles.clone(), symbols: make(map[span.URI]*symbolHandle, len(s.symbols)), workspacePackages: make(map[PackageID]PackagePath, len(s.workspacePackages)), unloadableFiles: make(map[span.URI]struct{}, len(s.unloadableFiles)), @@ -1764,12 +1772,27 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC result.parseWorkHandles[k] = v } - for k, v := range s.goFiles { - if _, ok := changes[k.file.URI]; ok { - continue + // Copy the handles of all Go source files. + // There may be tens of thousands of files, + // but changes are typically few, so we + // use a striped map optimized for this case + // and visit its stripes in parallel. + var ( + toDeleteMu sync.Mutex + toDelete []parseKey + ) + s.goFiles.forEachConcurrent(func(k parseKey, v *parseGoHandle) { + if changes[k.file.URI] == nil { + // no change (common case) + newGen.Inherit(v.handle) + } else { + toDeleteMu.Lock() + toDelete = append(toDelete, k) + toDeleteMu.Unlock() } - newGen.Inherit(v.handle) - result.goFiles[k] = v + }) + for _, k := range toDelete { + result.goFiles.delete(k) } // Copy all of the go.mod-related handles. They may be invalidated later, @@ -1975,21 +1998,34 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC deleteInvalidMetadata := forceReloadMetadata || workspaceModeChanged idsInSnapshot := map[PackageID]bool{} // track all known IDs for uri, ids := range s.meta.ids { - var resultIDs []PackageID - for _, id := range ids { + // Optimization: ids slices are typically numerous, short (<3), + // and rarely modified by this loop, so don't allocate copies + // until necessary. + var resultIDs []PackageID // nil implies equal to ids[:i:i] + for i, id := range ids { if skipID[id] || deleteInvalidMetadata && idsToInvalidate[id] { + resultIDs = ids[:i:i] // unshare continue } // The ID is not reachable from any workspace package, so it should // be deleted. if !reachableID[id] { + resultIDs = ids[:i:i] // unshare continue } idsInSnapshot[id] = true - resultIDs = append(resultIDs, id) + if resultIDs != nil { + resultIDs = append(resultIDs, id) + } + } + if resultIDs == nil { + resultIDs = ids } result.meta.ids[uri] = resultIDs } + // TODO(adonovan): opt: represent PackageID as an index into a process-global + // dup-free list of all package names ever seen, then use a bitmap instead of + // a hash table for "PackageSet" (e.g. idsInSnapshot). // Copy the package metadata. We only need to invalidate packages directly // containing the affected file, and only if it changed in a relevant way. @@ -2259,7 +2295,7 @@ func metadataChanges(ctx context.Context, lockedSnapshot *snapshot, oldFH, newFH // lockedSnapshot must be locked. func peekOrParse(ctx context.Context, lockedSnapshot *snapshot, fh source.FileHandle, mode source.ParseMode) (*source.ParsedGoFile, error) { key := parseKey{file: fh.FileIdentity(), mode: mode} - if pgh := lockedSnapshot.goFiles[key]; pgh != nil { + if pgh := lockedSnapshot.goFiles.get(key); pgh != nil { cached := pgh.handle.Cached(lockedSnapshot.generation) if cached != nil { cached := cached.(*parseGoData) @@ -2547,3 +2583,107 @@ func readGoSum(dst map[module.Version][]string, file string, data []byte) error } return nil } + +// -- goFileMap -- + +// A goFileMap is conceptually a map[parseKey]*parseGoHandle, +// optimized for cloning all or nearly all entries. +type goFileMap struct { + // The map is represented as a map of 256 stripes, one per + // distinct value of the top 8 bits of key.file.Hash. + // Each stripe has an associated boolean indicating whether it + // is shared, and thus immutable, and thus must be copied before any update. + // (The bits could be packed but it hasn't been worth it yet.) + stripes [256]map[parseKey]*parseGoHandle + exclusive [256]bool // exclusive[i] means stripe[i] is not shared and may be safely mutated +} + +// newGoFileMap returns a new empty goFileMap. +func newGoFileMap() *goFileMap { + return new(goFileMap) // all stripes are shared (non-exclusive) nil maps +} + +// clone returns a copy of m. +// For concurrency, it counts as an update to m. +func (m *goFileMap) clone() *goFileMap { + m.exclusive = [256]bool{} // original and copy are now nonexclusive + copy := *m + return © +} + +// get returns the value for key k. +func (m *goFileMap) get(k parseKey) *parseGoHandle { + return m.stripes[m.hash(k)][k] +} + +// set updates the value for key k to v. +func (m *goFileMap) set(k parseKey, v *parseGoHandle) { + m.unshare(k)[k] = v +} + +// delete deletes the value for key k, if any. +func (m *goFileMap) delete(k parseKey) { + // TODO(adonovan): opt?: skip unshare if k isn't present. + delete(m.unshare(k), k) +} + +// forEachConcurrent calls f for each entry in the map. +// Calls may be concurrent. +// f must not modify m. +func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) { + // Visit stripes in parallel chunks. + const p = 16 // concurrency level + var wg sync.WaitGroup + wg.Add(p) + for i := 0; i < p; i++ { + chunk := m.stripes[i*p : (i+1)*p] + go func() { + for _, stripe := range chunk { + for k, v := range stripe { + f(k, v) + } + } + wg.Done() + }() + } + wg.Wait() +} + +// -- internal-- + +// hash returns 8 bits from the key's file digest. +func (m *goFileMap) hash(k parseKey) int { + h := k.file.Hash + if h == "" { + // Sadly the Hash isn't always a hash because cache.GetFile may + // successfully return a *fileHandle containing an error and no hash. + // Lump the duds together for now. + // TODO(adonovan): fix the underlying bug. + return 0 + } + return unhex(h[0])<<4 | unhex(h[1]) +} + +// unhex returns the value of a valid hex digit. +func unhex(b byte) int { + if '0' <= b && b <= '9' { + return int(b - '0') + } + return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F] +} + +// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it. +func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle { + i := m.hash(k) + if !m.exclusive[i] { + m.exclusive[i] = true + + // Copy the map. + copy := make(map[parseKey]*parseGoHandle, len(m.stripes[i])) + for k, v := range m.stripes[i] { + copy[k] = v + } + m.stripes[i] = copy + } + return m.stripes[i] +} diff --git a/internal/lsp/source/view.go b/internal/lsp/source/view.go index 94037f33fe..5b908bc721 100644 --- a/internal/lsp/source/view.go +++ b/internal/lsp/source/view.go @@ -532,7 +532,7 @@ type FileHandle interface { type FileIdentity struct { URI span.URI - // Identifier represents a unique identifier for the file's content. + // Hash is a string of hex digits denoting the cryptographic digest of the file's content. Hash string } diff --git a/internal/memoize/memoize.go b/internal/memoize/memoize.go index 89f79c68b7..dec2fff683 100644 --- a/internal/memoize/memoize.go +++ b/internal/memoize/memoize.go @@ -234,19 +234,18 @@ func (s *Store) DebugOnlyIterate(f func(k, v interface{})) { } } -func (g *Generation) Inherit(hs ...*Handle) { - for _, h := range hs { - if atomic.LoadUint32(&g.destroyed) != 0 { - panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy) - } - - h.mu.Lock() - if h.state == stateDestroyed { - panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name)) - } - h.generations[g] = struct{}{} - h.mu.Unlock() +// Inherit makes h valid in generation g. It is concurrency-safe. +func (g *Generation) Inherit(h *Handle) { + if atomic.LoadUint32(&g.destroyed) != 0 { + panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy) } + + h.mu.Lock() + if h.state == stateDestroyed { + panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name)) + } + h.generations[g] = struct{}{} + h.mu.Unlock() } // Cached returns the value associated with a handle. diff --git a/internal/memoize/memoize_test.go b/internal/memoize/memoize_test.go index f05966b461..ee0fd23ea1 100644 --- a/internal/memoize/memoize_test.go +++ b/internal/memoize/memoize_test.go @@ -87,7 +87,8 @@ func TestCleanup(t *testing.T) { expectGet(t, h1, g1, &v1) expectGet(t, h2, g1, &v2) g2 := s.Generation("g2") - g2.Inherit(h1, h2) + g2.Inherit(h1) + g2.Inherit(h2) g1.Destroy("TestCleanup") expectGet(t, h1, g2, &v1) diff --git a/internal/span/uri.go b/internal/span/uri.go index a9777ff859..f2b39ca424 100644 --- a/internal/span/uri.go +++ b/internal/span/uri.go @@ -35,6 +35,10 @@ func (uri URI) Filename() string { } func filename(uri URI) (string, error) { + // This function is frequently called and its cost is + // dominated by the allocation of a net.URL. + // TODO(adonovan): opt: replace by a bespoke parseFileURI + // function that doesn't allocate. if uri == "" { return "", nil } @@ -80,6 +84,10 @@ func URIFromURI(s string) URI { return URI(u.String()) } +// CompareURI performs a three-valued comparison of two URIs. +// Lexically unequal URIs may compare equal if they are "file:" URIs +// that share the same base name (ignoring case) and denote the same +// file device/inode, according to stat(2). func CompareURI(a, b URI) int { if equalURI(a, b) { return 0