gopls/internal/lsp/protocol: simplify OffsetRange, Position

These functions are now both expressed in terms of OffsetPosition,
which converts a file offset to a protocol (UTF-16) position.
No span.Point intermediaries are allocated.

Also, note some TODOs for further simplification.

Change-Id: I7e95c1b3ab16e4acfe4e461ee0aaa260efb6eec5
Reviewed-on: https://go-review.googlesource.com/c/tools/+/439276
TryBot-Result: Gopher Robot <gobot@golang.org>
gopls-CI: kokoro <noreply+kokoro@google.com>
Run-TryBot: Alan Donovan <adonovan@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
This commit is contained in:
Alan Donovan 2022-10-05 17:45:44 -04:00
parent dc3cf95c8a
commit ede3ab263c
1 changed files with 71 additions and 22 deletions

View File

@ -7,8 +7,10 @@
package protocol
import (
"bytes"
"fmt"
"go/token"
"unicode/utf8"
"golang.org/x/tools/internal/span"
)
@ -19,6 +21,16 @@ type ColumnMapper struct {
URI span.URI
TokFile *token.File
Content []byte
// File content is only really needed for UTF-16 column
// computation, which could be be achieved more compactly.
// For example, one could record only the lines for which
// UTF-16 columns differ from the UTF-8 ones, or only the
// indices of the non-ASCII characters.
//
// TODO(adonovan): consider not retaining the entire file
// content, or at least not exposing the fact that we
// currently retain it.
}
// NewColumnMapper creates a new column mapper for the given uri and content.
@ -72,44 +84,81 @@ func (m *ColumnMapper) Range(s span.Span) (Range, error) {
// OffsetRange returns a Range for the byte-offset interval Content[start:end],
func (m *ColumnMapper) OffsetRange(start, end int) (Range, error) {
// TODO(adonovan): this can surely be simplified by expressing
// it terms of more primitive operations.
// We use span.ToPosition for its "line+1 at EOF" workaround.
startLine, startCol, err := span.ToPosition(m.TokFile, start)
startPosition, err := m.OffsetPosition(start)
if err != nil {
return Range{}, fmt.Errorf("start line/col: %v", err)
}
startPoint := span.NewPoint(startLine, startCol, start)
startPosition, err := m.Position(startPoint)
if err != nil {
return Range{}, fmt.Errorf("start position: %v", err)
return Range{}, fmt.Errorf("start: %v", err)
}
endLine, endCol, err := span.ToPosition(m.TokFile, end)
endPosition, err := m.OffsetPosition(end)
if err != nil {
return Range{}, fmt.Errorf("end line/col: %v", err)
}
endPoint := span.NewPoint(endLine, endCol, end)
endPosition, err := m.Position(endPoint)
if err != nil {
return Range{}, fmt.Errorf("end position: %v", err)
return Range{}, fmt.Errorf("end: %v", err)
}
return Range{Start: startPosition, End: endPosition}, nil
}
// Position returns the protocol position for the specified point,
// which must have a byte offset.
func (m *ColumnMapper) Position(p span.Point) (Position, error) {
chr, err := span.ToUTF16Column(p, m.Content)
if !p.HasOffset() {
return Position{}, fmt.Errorf("point is missing offset")
}
return m.OffsetPosition(p.Offset())
}
// OffsetPosition returns the protocol position of the specified
// offset within m.Content.
func (m *ColumnMapper) OffsetPosition(offset int) (Position, error) {
// We use span.ToPosition for its "line+1 at EOF" workaround.
// TODO(adonovan): ToPosition honors //line directives. It probably shouldn't.
line, _, err := span.ToPosition(m.TokFile, offset)
if err != nil {
return Position{}, err
return Position{}, fmt.Errorf("OffsetPosition: %v", err)
}
// If that workaround executed, skip the usual column computation.
char := 0
if offset != m.TokFile.Size() {
char = m.utf16Column(offset)
}
return Position{
Line: uint32(p.Line() - 1),
Character: uint32(chr - 1),
Line: uint32(line - 1),
Character: uint32(char),
}, nil
}
// utf16Column returns the zero-based column index of the
// specified file offset, measured in UTF-16 codes.
// Precondition: 0 <= offset <= len(m.Content).
func (m *ColumnMapper) utf16Column(offset int) int {
s := m.Content[:offset]
if i := bytes.LastIndex(s, []byte("\n")); i >= 0 {
s = s[i+1:]
}
// s is the prefix of the line before offset.
return utf16len(s)
}
// utf16len returns the number of codes in the UTF-16 transcoding of s.
func utf16len(s []byte) int {
var n int
for len(s) > 0 {
n++
// Fast path for ASCII.
if s[0] < 0x80 {
s = s[1:]
continue
}
r, size := utf8.DecodeRune(s)
if r >= 0x10000 {
n++ // surrogate pair
}
s = s[size:]
}
return n
}
func (m *ColumnMapper) Span(l Location) (span.Span, error) {
return m.RangeSpan(l.Range)
}