gopls/internal/lsp/protocol: simplify OffsetRange, Position

These functions are now both expressed in terms of OffsetPosition, which converts a file offset to a protocol (UTF-16) position. No span.Point intermediaries are allocated. Also, note some TODOs for further simplification. Change-Id: I7e95c1b3ab16e4acfe4e461ee0aaa260efb6eec5 Reviewed-on: https://go-review.googlesource.com/c/tools/+/439276 TryBot-Result: Gopher Robot <gobot@golang.org> gopls-CI: kokoro <noreply+kokoro@google.com> Run-TryBot: Alan Donovan <adonovan@google.com> Reviewed-by: Robert Findley <rfindley@google.com>
2022-10-05 17:45:44 -04:00 · 2022-10-05 17:45:44 -04:00 · ede3ab263c
parent dc3cf95c8a
commit ede3ab263c
1 changed files with 71 additions and 22 deletions
--- a/gopls/internal/lsp/protocol/span.go
+++ b/gopls/internal/lsp/protocol/span.go
@ -7,8 +7,10 @@
 package protocol

 import (
+	"bytes"
 	"fmt"
 	"go/token"
+	"unicode/utf8"

 	"golang.org/x/tools/internal/span"
 )
@ -19,6 +21,16 @@ type ColumnMapper struct {
 	URI     span.URI
 	TokFile *token.File
 	Content []byte
+
+	// File content is only really needed for UTF-16 column
+	// computation, which could be be achieved more compactly.
+	// For example, one could record only the lines for which
+	// UTF-16 columns differ from the UTF-8 ones, or only the
+	// indices of the non-ASCII characters.
+	//
+	// TODO(adonovan): consider not retaining the entire file
+	// content, or at least not exposing the fact that we
+	// currently retain it.
 }

 // NewColumnMapper creates a new column mapper for the given uri and content.
@ -72,44 +84,81 @@ func (m *ColumnMapper) Range(s span.Span) (Range, error) {

 // OffsetRange returns a Range for the byte-offset interval Content[start:end],
 func (m *ColumnMapper) OffsetRange(start, end int) (Range, error) {
-	// TODO(adonovan): this can surely be simplified by expressing
-	// it terms of more primitive operations.
-
-	// We use span.ToPosition for its "line+1 at EOF" workaround.
-	startLine, startCol, err := span.ToPosition(m.TokFile, start)
+	startPosition, err := m.OffsetPosition(start)
 	if err != nil {
-		return Range{}, fmt.Errorf("start line/col: %v", err)
-	}
-	startPoint := span.NewPoint(startLine, startCol, start)
-	startPosition, err := m.Position(startPoint)
-	if err != nil {
-		return Range{}, fmt.Errorf("start position: %v", err)
+		return Range{}, fmt.Errorf("start: %v", err)
 	}

-	endLine, endCol, err := span.ToPosition(m.TokFile, end)
+	endPosition, err := m.OffsetPosition(end)
 	if err != nil {
-		return Range{}, fmt.Errorf("end line/col: %v", err)
-	}
-	endPoint := span.NewPoint(endLine, endCol, end)
-	endPosition, err := m.Position(endPoint)
-	if err != nil {
-		return Range{}, fmt.Errorf("end position: %v", err)
+		return Range{}, fmt.Errorf("end: %v", err)
 	}

 	return Range{Start: startPosition, End: endPosition}, nil
 }

+// Position returns the protocol position for the specified point,
+// which must have a byte offset.
 func (m *ColumnMapper) Position(p span.Point) (Position, error) {
-	chr, err := span.ToUTF16Column(p, m.Content)
+	if !p.HasOffset() {
+		return Position{}, fmt.Errorf("point is missing offset")
+	}
+	return m.OffsetPosition(p.Offset())
+}
+
+// OffsetPosition returns the protocol position of the specified
+// offset within m.Content.
+func (m *ColumnMapper) OffsetPosition(offset int) (Position, error) {
+	// We use span.ToPosition for its "line+1 at EOF" workaround.
+	// TODO(adonovan): ToPosition honors //line directives. It probably shouldn't.
+	line, _, err := span.ToPosition(m.TokFile, offset)
 	if err != nil {
-		return Position{}, err
+		return Position{}, fmt.Errorf("OffsetPosition: %v", err)
+	}
+	// If that workaround executed, skip the usual column computation.
+	char := 0
+	if offset != m.TokFile.Size() {
+		char = m.utf16Column(offset)
 	}
 	return Position{
-		Line:      uint32(p.Line() - 1),
-		Character: uint32(chr - 1),
+		Line:      uint32(line - 1),
+		Character: uint32(char),
 	}, nil
 }

+// utf16Column returns the zero-based column index of the
+// specified file offset, measured in UTF-16 codes.
+// Precondition: 0 <= offset <= len(m.Content).
+func (m *ColumnMapper) utf16Column(offset int) int {
+	s := m.Content[:offset]
+	if i := bytes.LastIndex(s, []byte("\n")); i >= 0 {
+		s = s[i+1:]
+	}
+	// s is the prefix of the line before offset.
+	return utf16len(s)
+}
+
+// utf16len returns the number of codes in the UTF-16 transcoding of s.
+func utf16len(s []byte) int {
+	var n int
+	for len(s) > 0 {
+		n++
+
+		// Fast path for ASCII.
+		if s[0] < 0x80 {
+			s = s[1:]
+			continue
+		}
+
+		r, size := utf8.DecodeRune(s)
+		if r >= 0x10000 {
+			n++ // surrogate pair
+		}
+		s = s[size:]
+	}
+	return n
+}
+
 func (m *ColumnMapper) Span(l Location) (span.Span, error) {
 	return m.RangeSpan(l.Range)
 }