diff --git a/gopls/internal/lsp/protocol/span.go b/gopls/internal/lsp/protocol/span.go index f24a28e112..cbf57a9ec5 100644 --- a/gopls/internal/lsp/protocol/span.go +++ b/gopls/internal/lsp/protocol/span.go @@ -7,8 +7,10 @@ package protocol import ( + "bytes" "fmt" "go/token" + "unicode/utf8" "golang.org/x/tools/internal/span" ) @@ -19,6 +21,16 @@ type ColumnMapper struct { URI span.URI TokFile *token.File Content []byte + + // File content is only really needed for UTF-16 column + // computation, which could be be achieved more compactly. + // For example, one could record only the lines for which + // UTF-16 columns differ from the UTF-8 ones, or only the + // indices of the non-ASCII characters. + // + // TODO(adonovan): consider not retaining the entire file + // content, or at least not exposing the fact that we + // currently retain it. } // NewColumnMapper creates a new column mapper for the given uri and content. @@ -72,44 +84,81 @@ func (m *ColumnMapper) Range(s span.Span) (Range, error) { // OffsetRange returns a Range for the byte-offset interval Content[start:end], func (m *ColumnMapper) OffsetRange(start, end int) (Range, error) { - // TODO(adonovan): this can surely be simplified by expressing - // it terms of more primitive operations. - - // We use span.ToPosition for its "line+1 at EOF" workaround. - startLine, startCol, err := span.ToPosition(m.TokFile, start) + startPosition, err := m.OffsetPosition(start) if err != nil { - return Range{}, fmt.Errorf("start line/col: %v", err) - } - startPoint := span.NewPoint(startLine, startCol, start) - startPosition, err := m.Position(startPoint) - if err != nil { - return Range{}, fmt.Errorf("start position: %v", err) + return Range{}, fmt.Errorf("start: %v", err) } - endLine, endCol, err := span.ToPosition(m.TokFile, end) + endPosition, err := m.OffsetPosition(end) if err != nil { - return Range{}, fmt.Errorf("end line/col: %v", err) - } - endPoint := span.NewPoint(endLine, endCol, end) - endPosition, err := m.Position(endPoint) - if err != nil { - return Range{}, fmt.Errorf("end position: %v", err) + return Range{}, fmt.Errorf("end: %v", err) } return Range{Start: startPosition, End: endPosition}, nil } +// Position returns the protocol position for the specified point, +// which must have a byte offset. func (m *ColumnMapper) Position(p span.Point) (Position, error) { - chr, err := span.ToUTF16Column(p, m.Content) + if !p.HasOffset() { + return Position{}, fmt.Errorf("point is missing offset") + } + return m.OffsetPosition(p.Offset()) +} + +// OffsetPosition returns the protocol position of the specified +// offset within m.Content. +func (m *ColumnMapper) OffsetPosition(offset int) (Position, error) { + // We use span.ToPosition for its "line+1 at EOF" workaround. + // TODO(adonovan): ToPosition honors //line directives. It probably shouldn't. + line, _, err := span.ToPosition(m.TokFile, offset) if err != nil { - return Position{}, err + return Position{}, fmt.Errorf("OffsetPosition: %v", err) + } + // If that workaround executed, skip the usual column computation. + char := 0 + if offset != m.TokFile.Size() { + char = m.utf16Column(offset) } return Position{ - Line: uint32(p.Line() - 1), - Character: uint32(chr - 1), + Line: uint32(line - 1), + Character: uint32(char), }, nil } +// utf16Column returns the zero-based column index of the +// specified file offset, measured in UTF-16 codes. +// Precondition: 0 <= offset <= len(m.Content). +func (m *ColumnMapper) utf16Column(offset int) int { + s := m.Content[:offset] + if i := bytes.LastIndex(s, []byte("\n")); i >= 0 { + s = s[i+1:] + } + // s is the prefix of the line before offset. + return utf16len(s) +} + +// utf16len returns the number of codes in the UTF-16 transcoding of s. +func utf16len(s []byte) int { + var n int + for len(s) > 0 { + n++ + + // Fast path for ASCII. + if s[0] < 0x80 { + s = s[1:] + continue + } + + r, size := utf8.DecodeRune(s) + if r >= 0x10000 { + n++ // surrogate pair + } + s = s[size:] + } + return n +} + func (m *ColumnMapper) Span(l Location) (span.Span, error) { return m.RangeSpan(l.Range) }