encoding/csv: add Reader.InputOffset method

Fixes #43401.

Change-Id: I2498e77e41d845130d95012bc8623bfb29c0dda1
Reviewed-on: https://go-review.googlesource.com/c/go/+/405675
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
hopehook 2022-05-11 22:42:00 +08:00 committed by Gopher Robot
parent cb458c05a8
commit 3474cd4eee
3 changed files with 27 additions and 7 deletions

1
api/next/43401.txt Normal file
View File

@ -0,0 +1 @@
pkg encoding/csv, method (*Reader) InputOffset() int64 #43401

View File

@ -149,6 +149,9 @@ type Reader struct {
// numLine is the current line being read in the CSV file.
numLine int
// offset is the input stream byte offset of the current reader position.
offset int64
// rawBuffer is a line buffer only used by the readLine method.
rawBuffer []byte
@ -210,6 +213,13 @@ func (r *Reader) FieldPos(field int) (line, column int) {
return p.line, p.col
}
// InputOffset returns the input stream byte offset of the current reader
// position. The offset gives the location of the end of the most recently
// read row and the beginning of the next row.
func (r *Reader) InputOffset() int64 {
return r.offset
}
// pos holds the position of a field in the current line.
type position struct {
line, col int
@ -247,14 +257,16 @@ func (r *Reader) readLine() ([]byte, error) {
}
line = r.rawBuffer
}
if len(line) > 0 && err == io.EOF {
readSize := len(line)
if readSize > 0 && err == io.EOF {
err = nil
// For backwards compatibility, drop trailing \r before EOF.
if line[len(line)-1] == '\r' {
line = line[:len(line)-1]
if line[readSize-1] == '\r' {
line = line[:readSize-1]
}
}
r.numLine++
r.offset += int64(readSize)
// Normalize \r\n to \n on all input lines.
if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
line[n-2] = '\n'

View File

@ -404,7 +404,7 @@ field"`,
}}
func TestRead(t *testing.T) {
newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) {
newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int, string) {
positions, errPositions, input := makePositions(tt.Input)
r := NewReader(strings.NewReader(input))
@ -420,12 +420,12 @@ func TestRead(t *testing.T) {
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
r.ReuseRecord = tt.ReuseRecord
return r, positions, errPositions
return r, positions, errPositions, input
}
for _, tt := range readTests {
t.Run(tt.Name, func(t *testing.T) {
r, positions, errPositions := newReader(tt)
r, positions, errPositions, input := newReader(tt)
out, err := r.ReadAll()
if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
if !reflect.DeepEqual(err, wantErr) {
@ -443,8 +443,15 @@ func TestRead(t *testing.T) {
}
}
// Check input offset after call ReadAll()
inputByteSize := len(input)
inputOffset := r.InputOffset()
if err == nil && int64(inputByteSize) != inputOffset {
t.Errorf("wrong input offset after call ReadAll():\ngot: %d\nwant: %d\ninput: %s", inputOffset, inputByteSize, input)
}
// Check field and error positions.
r, _, _ = newReader(tt)
r, _, _, _ = newReader(tt)
for recNum := 0; ; recNum++ {
rec, err := r.Read()
var wantErr error