diff --git a/internal/lsp/lsppos/lsppos.go b/internal/lsp/lsppos/lsppos.go index 0e7433042e..35f6f13485 100644 --- a/internal/lsp/lsppos/lsppos.go +++ b/internal/lsp/lsppos/lsppos.go @@ -2,7 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package lsppos provides utilities for working with LSP positions. +// Package lsppos provides utilities for working with LSP positions. Much of +// this functionality is duplicated from the internal/span package, but this +// package is simpler and more accurate with respect to newline terminated +// content. // // See https://microsoft.github.io/language-server-protocol/specification#textDocuments // for a description of LSP positions. Notably: @@ -14,25 +17,30 @@ package lsppos import ( + "errors" "sort" "unicode/utf8" + + "golang.org/x/tools/internal/lsp/protocol" ) +// Mapper maps utf-8 byte offsets to LSP positions for a single file. type Mapper struct { nonASCII bool - src []byte + content []byte - // Start-of-line positions. If src is newline-terminated, the final entry will be empty. + // Start-of-line positions. If src is newline-terminated, the final entry + // will be len(content). lines []int } -func NewMapper(src []byte) *Mapper { - m := &Mapper{src: src} - if len(src) == 0 { - return m +// NewMapper creates a new Mapper for the given content. +func NewMapper(content []byte) *Mapper { + m := &Mapper{ + content: content, + lines: []int{0}, } - m.lines = []int{0} - for offset, b := range src { + for offset, b := range content { if b == '\n' { m.lines = append(m.lines, offset+1) } @@ -43,8 +51,11 @@ func NewMapper(src []byte) *Mapper { return m } -func (m *Mapper) Position(offset int) (line, char int) { - if offset < 0 || offset > len(m.src) { +// LineColUTF16 returns the 0-based UTF-16 line and character index for the +// given offset. It returns -1, -1 if offset is out of bounds for the file +// being mapped. +func (m *Mapper) LineColUTF16(offset int) (line, char int) { + if offset < 0 || offset > len(m.content) { return -1, -1 } nextLine := sort.Search(len(m.lines), func(i int) bool { @@ -57,27 +68,59 @@ func (m *Mapper) Position(offset int) (line, char int) { start := m.lines[line] var charOffset int if m.nonASCII { - charOffset = UTF16len(m.src[start:offset]) + charOffset = UTF16len(m.content[start:offset]) } else { charOffset = offset - start } var eol int if line == len(m.lines)-1 { - eol = len(m.src) + eol = len(m.content) } else { eol = m.lines[line+1] - 1 } // Adjustment for line-endings: \r|\n is the same as |\r\n. - if offset == eol && offset > 0 && m.src[offset-1] == '\r' { + if offset == eol && offset > 0 && m.content[offset-1] == '\r' { charOffset-- } return line, charOffset } +// Position returns the protocol position corresponding to the given offset. It +// returns false if offset is out of bounds for the file being mapped. +func (m *Mapper) Position(offset int) (protocol.Position, bool) { + l, c := m.LineColUTF16(offset) + if l < 0 { + return protocol.Position{}, false + } + return protocol.Position{ + Line: uint32(l), + Character: uint32(c), + }, true +} + +// Range returns the protocol range corresponding to the given start and end +// offsets. +func (m *Mapper) Range(start, end int) (protocol.Range, error) { + startPos, ok := m.Position(start) + if !ok { + return protocol.Range{}, errors.New("invalid start position") + } + endPos, ok := m.Position(end) + if !ok { + return protocol.Range{}, errors.New("invalid end position") + } + + return protocol.Range{Start: startPos, End: endPos}, nil +} + +// UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to +// be re-encoded as UTF-16. func UTF16len(buf []byte) int { + // This function copies buf, but microbenchmarks showed it to be faster than + // using utf8.DecodeRune due to inlining and avoiding bounds checks. cnt := 0 for _, r := range string(buf) { cnt++ diff --git a/internal/lsp/lsppos/lsppos_test.go b/internal/lsp/lsppos/lsppos_test.go new file mode 100644 index 0000000000..8353f92768 --- /dev/null +++ b/internal/lsp/lsppos/lsppos_test.go @@ -0,0 +1,107 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lsppos_test + +import ( + "fmt" + "strings" + "testing" + + . "golang.org/x/tools/internal/lsp/lsppos" + "golang.org/x/tools/internal/lsp/protocol" +) + +type testCase struct { + content string // input text + substrOrOffset interface{} // explicit integer offset, or a substring + wantLine, wantChar int // expected LSP position information +} + +// offset returns the test case byte offset +func (c testCase) offset() int { + switch x := c.substrOrOffset.(type) { + case int: + return x + case string: + i := strings.Index(c.content, x) + if i < 0 { + panic(fmt.Sprintf("%q does not contain substring %q", c.content, x)) + } + return i + } + panic("substrOrIndex must be an integer or string") +} + +var tests = []testCase{ + {"a𐐀b", "a", 0, 0}, + {"a𐐀b", "𐐀", 0, 1}, + {"a𐐀b", "b", 0, 3}, + {"a𐐀b\n", "\n", 0, 4}, + {"a𐐀b\r\n", "\n", 0, 4}, // \r|\n is not a valid position, so we move back to the end of the first line. + {"a𐐀b\r\nx", "x", 1, 0}, + {"a𐐀b\r\nx\ny", "y", 2, 0}, + + // Testing EOL and EOF positions + {"", 0, 0, 0}, // 0th position of an empty buffer is (0, 0) + {"abc", "c", 0, 2}, + {"abc", 3, 0, 3}, + {"abc\n", "\n", 0, 3}, + {"abc\n", 4, 1, 0}, // position after a newline is on the next line +} + +func TestLineChar(t *testing.T) { + for _, test := range tests { + m := NewMapper([]byte(test.content)) + offset := test.offset() + gotLine, gotChar := m.LineColUTF16(offset) + if gotLine != test.wantLine || gotChar != test.wantChar { + t.Errorf("LineChar(%d) = (%d,%d), want (%d,%d)", offset, gotLine, gotChar, test.wantLine, test.wantChar) + } + } +} + +func TestInvalidOffset(t *testing.T) { + content := []byte("a𐐀b\r\nx\ny") + m := NewMapper(content) + for _, offset := range []int{-1, 100} { + gotLine, gotChar := m.LineColUTF16(offset) + if gotLine != -1 { + t.Errorf("LineChar(%d) = (%d,%d), want (-1,-1)", offset, gotLine, gotChar) + } + } +} + +func TestPosition(t *testing.T) { + for _, test := range tests { + m := NewMapper([]byte(test.content)) + offset := test.offset() + got, ok := m.Position(offset) + if !ok { + t.Error("invalid position for", test.substrOrOffset) + continue + } + want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)} + if got != want { + t.Errorf("Position(%d) = %v, want %v", offset, got, want) + } + } +} + +func TestRange(t *testing.T) { + for _, test := range tests { + m := NewMapper([]byte(test.content)) + offset := test.offset() + got, err := m.Range(0, offset) + if err != nil { + t.Fatal(err) + } + want := protocol.Range{ + End: protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}, + } + if got != want { + t.Errorf("Range(%d) = %v, want %v", offset, got, want) + } + } +} diff --git a/internal/lsp/lsppos/token.go b/internal/lsp/lsppos/token.go new file mode 100644 index 0000000000..0a3caed948 --- /dev/null +++ b/internal/lsp/lsppos/token.go @@ -0,0 +1,59 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lsppos + +import ( + "errors" + "go/token" + + "golang.org/x/tools/internal/lsp/protocol" +) + +// TokenMapper maps token.Pos to LSP positions for a single file. +type TokenMapper struct { + // file is used for computing offsets. + file *token.File + + // For now, just delegate to a Mapper for position calculation. As an + // optimization we could avoid building the mapper and just use the file, but + // then have to correctly adjust for newline-terminated files. It is easier + // to just delegate unless performance becomes a concern. + mapper *Mapper +} + +// NewMapper creates a new TokenMapper for the given content, using the +// provided file to compute offsets. +func NewTokenMapper(content []byte, file *token.File) *TokenMapper { + return &TokenMapper{ + file: file, + mapper: NewMapper(content), + } +} + +// Position returns the protocol position corresponding to the given pos. It +// returns false if pos is out of bounds for the file being mapped. +func (m *TokenMapper) Position(pos token.Pos) (protocol.Position, bool) { + if int(pos) < m.file.Base() || int(pos) > m.file.Base()+m.file.Size() { + return protocol.Position{}, false + } + offset := m.file.Offset(pos) // usage of token.File.Offset is temporarily exempted + return m.mapper.Position(offset) +} + +// Range returns the protocol range corresponding to the given start and end +// positions. It returns an error if start or end is out of bounds for the file +// being mapped. +func (m *TokenMapper) Range(start, end token.Pos) (protocol.Range, error) { + startPos, ok := m.Position(start) + if !ok { + return protocol.Range{}, errors.New("invalid start position") + } + endPos, ok := m.Position(end) + if !ok { + return protocol.Range{}, errors.New("invalid end position") + } + + return protocol.Range{Start: startPos, End: endPos}, nil +} diff --git a/internal/lsp/lsppos/token_test.go b/internal/lsp/lsppos/token_test.go new file mode 100644 index 0000000000..c12d15026c --- /dev/null +++ b/internal/lsp/lsppos/token_test.go @@ -0,0 +1,57 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lsppos_test + +import ( + "go/token" + "testing" + + . "golang.org/x/tools/internal/lsp/lsppos" + "golang.org/x/tools/internal/lsp/protocol" +) + +func makeTokenMapper(content []byte) (*TokenMapper, *token.File) { + file := token.NewFileSet().AddFile("p.go", -1, len(content)) + file.SetLinesForContent(content) + return NewTokenMapper(content, file), file +} + +func TestInvalidPosition(t *testing.T) { + content := []byte("a𐐀b\r\nx\ny") + m, _ := makeTokenMapper(content) + + for _, pos := range []token.Pos{-1, 100} { + posn, ok := m.Position(pos) + if ok { + t.Errorf("Position(%d) = %v, want error", pos, posn) + } + } +} + +func TestTokenPosition(t *testing.T) { + for _, test := range tests { + m, f := makeTokenMapper([]byte(test.content)) + pos := token.Pos(f.Base() + test.offset()) + got, ok := m.Position(pos) + if !ok { + t.Error("invalid position for", test.substrOrOffset) + continue + } + want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)} + if got != want { + t.Errorf("Position(%d) = %v, want %v", pos, got, want) + } + gotRange, err := m.Range(token.Pos(f.Base()), pos) + if err != nil { + t.Fatal(err) + } + wantRange := protocol.Range{ + End: want, + } + if gotRange != wantRange { + t.Errorf("Range(%d) = %v, want %v", pos, got, want) + } + } +} diff --git a/internal/lsp/source/format.go b/internal/lsp/source/format.go index 79da0b3adb..5460ec332c 100644 --- a/internal/lsp/source/format.go +++ b/internal/lsp/source/format.go @@ -329,21 +329,18 @@ func ProtocolEditsFromSource(src []byte, edits []diff.TextEdit, converter span.C if err != nil { return nil, fmt.Errorf("computing offsets: %v", err) } - startLine, startChar := m.Position(spn.Start().Offset()) - endLine, endChar := m.Position(spn.End().Offset()) - if startLine < 0 || endLine < 0 { - return nil, fmt.Errorf("out of bound span: %v", spn) + rng, err := m.Range(spn.Start().Offset(), spn.End().Offset()) + if err != nil { + return nil, err } - pstart := protocol.Position{Line: uint32(startLine), Character: uint32(startChar)} - pend := protocol.Position{Line: uint32(endLine), Character: uint32(endChar)} - if pstart == pend && edit.NewText == "" { + if rng.Start == rng.End && edit.NewText == "" { // Degenerate case, which may result from a diff tool wanting to delete // '\r' in line endings. Filter it out. continue } result = append(result, protocol.TextEdit{ - Range: protocol.Range{Start: pstart, End: pend}, + Range: rng, NewText: edit.NewText, }) } diff --git a/internal/lsp/source/offset_test.go b/internal/lsp/source/offset_test.go index 10076773a5..71c7e72482 100644 --- a/internal/lsp/source/offset_test.go +++ b/internal/lsp/source/offset_test.go @@ -25,43 +25,39 @@ func TestTokenOffset(t *testing.T) { if err != nil { t.Fatal(err) } - var tokPkg *packages.Package + var tokenPkg, sourcePkg *packages.Package for _, pkg := range pkgs { - if pkg.PkgPath == "go/token" { - tokPkg = pkg - break + switch pkg.PkgPath { + case "go/token": + tokenPkg = pkg + case "golang.org/x/tools/internal/lsp/source": + sourcePkg = pkg } } - typname, ok := tokPkg.Types.Scope().Lookup("File").(*types.TypeName) - if !ok { - t.Fatal("expected go/token.File typename, got none") + + if tokenPkg == nil { + t.Fatal("missing package go/token") } - named, ok := typname.Type().(*types.Named) - if !ok { - t.Fatalf("expected named type, got %T", typname.Type) - } - var offset *types.Func - for i := 0; i < named.NumMethods(); i++ { - meth := named.Method(i) - if meth.Name() == "Offset" { - offset = meth - break - } + if sourcePkg == nil { + t.Fatal("missing package golang.org/x/tools/internal/lsp/source") } + + fileObj := tokenPkg.Types.Scope().Lookup("File") + tokenOffset, _, _ := types.LookupFieldOrMethod(fileObj.Type(), true, fileObj.Pkg(), "Offset") + + sourceOffset := sourcePkg.Types.Scope().Lookup("Offset").(*types.Func) + for _, pkg := range pkgs { + if pkg.PkgPath == "go/token" { // Allow usage from within go/token itself. + continue + } + if pkg.PkgPath == "golang.org/x/tools/internal/lsp/lsppos" { + continue // temporary exemption, to be refactored away + } for ident, obj := range pkg.TypesInfo.Uses { - if ident.Name != "Offset" { + if obj != tokenOffset { continue } - if pkg.PkgPath == "go/token" { - continue - } - if !types.Identical(offset.Type(), obj.Type()) { - continue - } - // The only permitted use is in golang.org/x/tools/internal/lsp/source.Offset, - // so check the enclosing function. - sourceOffset := pkg.Types.Scope().Lookup("Offset").(*types.Func) if sourceOffset.Pos() <= ident.Pos() && ident.Pos() <= sourceOffset.Scope().End() { continue // accepted usage }