diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go index 3c08b9f9d1..e646740b4f 100644 --- a/src/encoding/csv/reader.go +++ b/src/encoding/csv/reader.go @@ -62,23 +62,30 @@ import ( ) // A ParseError is returned for parsing errors. -// The first line is 1. The first column is 0. +// Line numbers are 1-indexed and columns are 0-indexed. type ParseError struct { - Line int // Line where the error occurred - Column int // Column (rune index) where the error occurred - Err error // The actual error + RecordLine int // Line where the record starts + Line int // Line where the error occurred + Column int // Column (rune index) where the error occurred + Err error // The actual error } func (e *ParseError) Error() string { - return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err) + if e.Err == ErrFieldCount { + return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) + } + if e.RecordLine != e.Line { + return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.RecordLine, e.Line, e.Column, e.Err) + } + return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) } // These are the errors that can be returned in ParseError.Error var ( - ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used + ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used. ErrBareQuote = errors.New("bare \" in non-quoted-field") - ErrQuote = errors.New("extraneous \" in field") - ErrFieldCount = errors.New("wrong number of fields in line") + ErrQuote = errors.New("extraneous or missing \" in field") + ErrFieldCount = errors.New("wrong number of fields") ) // A Reader reads records from a CSV-encoded file. @@ -86,17 +93,17 @@ var ( // As returned by NewReader, a Reader expects input conforming to RFC 4180. // The exported fields can be changed to customize the details before the // first call to Read or ReadAll. -// -// type Reader struct { // Comma is the field delimiter. // It is set to comma (',') by NewReader. Comma rune + // Comment, if not 0, is the comment character. Lines beginning with the // Comment character without preceding whitespace are ignored. // With leading whitespace the Comment character becomes part of the // field, even if TrimLeadingSpace is true. Comment rune + // FieldsPerRecord is the number of expected fields per record. // If FieldsPerRecord is positive, Read requires each record to // have the given number of fields. If FieldsPerRecord is 0, Read sets it to @@ -104,18 +111,22 @@ type Reader struct { // have the same field count. If FieldsPerRecord is negative, no check is // made and records may have a variable number of fields. FieldsPerRecord int + // If LazyQuotes is true, a quote may appear in an unquoted field and a // non-doubled quote may appear in a quoted field. - LazyQuotes bool - TrailingComma bool // ignored; here for backwards compatibility + LazyQuotes bool + // If TrimLeadingSpace is true, leading white space in a field is ignored. // This is done even if the field delimiter, Comma, is white space. TrimLeadingSpace bool + // ReuseRecord controls whether calls to Read may return a slice sharing // the backing array of the previous call's returned slice for performance. // By default, each call to Read returns newly allocated memory owned by the caller. ReuseRecord bool + TrailingComma bool // Deprecated: No longer used. + r *bufio.Reader // numLine is the current line being read in the CSV file. @@ -266,7 +277,7 @@ parseField: if !r.LazyQuotes { if j := bytes.IndexByte(field, '"'); j >= 0 { col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])]) - err = &ParseError{Line: r.numLine, Column: col, Err: ErrBareQuote} + err = &ParseError{RecordLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} break parseField } } @@ -306,7 +317,7 @@ parseField: default: // `"*` squence (invalid non-escaped quote). col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) - err = &ParseError{Line: r.numLine, Column: col, Err: ErrQuote} + err = &ParseError{RecordLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} break parseField } } else if len(line) > 0 { @@ -324,7 +335,7 @@ parseField: // Abrupt end of file (EOF or error). if !r.LazyQuotes && errRead == nil { col := utf8.RuneCount(fullLine) - err = &ParseError{Line: r.numLine, Column: col, Err: ErrQuote} + err = &ParseError{RecordLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} break parseField } r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) @@ -354,7 +365,7 @@ parseField: // Check or update the expected fields per record. if r.FieldsPerRecord > 0 { if len(dst) != r.FieldsPerRecord && err == nil { - err = &ParseError{Line: recLine, Err: ErrFieldCount} + err = &ParseError{RecordLine: recLine, Line: recLine, Err: ErrFieldCount} } } else if r.FieldsPerRecord == 0 { r.FieldsPerRecord = len(dst) diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go index 781847cefa..ed7d89dfe0 100644 --- a/src/encoding/csv/reader_test.go +++ b/src/encoding/csv/reader_test.go @@ -11,42 +11,35 @@ import ( "testing" ) -var readTests = []struct { - Name string - Input string - Output [][]string - UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 +func TestRead(t *testing.T) { + tests := []struct { + Name string + Input string + Output [][]string + Error error - // These fields are copied into the Reader - Comma rune - Comment rune - FieldsPerRecord int - LazyQuotes bool - TrailingComma bool - TrimLeadingSpace bool - ReuseRecord bool - - Error error - Line int // Expected error line if != 0 -}{ - { + // These fields are copied into the Reader + Comma rune + Comment rune + UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 + FieldsPerRecord int + LazyQuotes bool + TrimLeadingSpace bool + ReuseRecord bool + }{{ Name: "Simple", Input: "a,b,c\n", Output: [][]string{{"a", "b", "c"}}, - }, - { + }, { Name: "CRLF", Input: "a,b\r\nc,d\r\n", Output: [][]string{{"a", "b"}, {"c", "d"}}, - }, - { + }, { Name: "BareCR", Input: "a,b\rc,d\r\n", Output: [][]string{{"a", "b\rc", "d"}}, - }, - { - Name: "RFC4180test", - UseFieldsPerRecord: true, + }, { + Name: "RFC4180test", Input: `#field1,field2,field3 "aaa","bb b","ccc" @@ -59,163 +52,139 @@ zzz,yyy,xxx {"a,a", `b"bb`, "ccc"}, {"zzz", "yyy", "xxx"}, }, - }, - { + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "NoEOLTest", Input: "a,b,c", Output: [][]string{{"a", "b", "c"}}, - }, - { + }, { Name: "Semicolon", - Comma: ';', Input: "a;b;c\n", Output: [][]string{{"a", "b", "c"}}, - }, - { + Comma: ';', + }, { Name: "MultiLine", Input: `"two line","one line","three line field"`, Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, - }, - { + }, { Name: "BlankLine", Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, - }, - { - Name: "BlankLineFieldCount", - Input: "a,b,c\n\nd,e,f\n\n", - UseFieldsPerRecord: true, + }, { + Name: "BlankLineFieldCount", + Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, - }, - { + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "TrimSpace", Input: " a, b, c\n", - TrimLeadingSpace: true, Output: [][]string{{"a", "b", "c"}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "LeadingSpace", Input: " a, b, c\n", Output: [][]string{{" a", " b", " c"}}, - }, - { + }, { Name: "Comment", - Comment: '#', Input: "#1,2,3\na,b,c\n#comment", Output: [][]string{{"a", "b", "c"}}, - }, - { + Comment: '#', + }, { Name: "NoComment", Input: "#1,2,3\na,b,c", Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, - }, - { + }, { Name: "LazyQuotes", - LazyQuotes: true, Input: `a "word","1"2",a","b`, Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, - }, - { - Name: "BareQuotes", LazyQuotes: true, + }, { + Name: "BareQuotes", Input: `a "word","1"2",a"`, Output: [][]string{{`a "word"`, `1"2`, `a"`}}, - }, - { - Name: "BareDoubleQuotes", LazyQuotes: true, + }, { + Name: "BareDoubleQuotes", Input: `a""b,c`, Output: [][]string{{`a""b`, `c`}}, - }, - { + LazyQuotes: true, + }, { Name: "BadDoubleQuotes", Input: `a""b,c`, - Error: &ParseError{Line: 1, Column: 1, Err: ErrBareQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, + }, { Name: "TrimQuote", Input: ` "a"," b",c`, - TrimLeadingSpace: true, Output: [][]string{{"a", " b", "c"}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "BadBareQuote", Input: `a "word","b"`, - Error: &ParseError{Line: 1, Column: 2, Err: ErrBareQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, + }, { Name: "BadTrailingQuote", Input: `"a word",b"`, - Error: &ParseError{Line: 1, Column: 10, Err: ErrBareQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, + }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, - Error: &ParseError{Line: 1, Column: 3, Err: ErrQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 3, Err: ErrQuote}, + }, { Name: "BadFieldCount", - UseFieldsPerRecord: true, Input: "a,b,c\nd,e", - Error: &ParseError{Line: 2, Err: ErrFieldCount}, - }, - { + Error: &ParseError{RecordLine: 2, Line: 2, Err: ErrFieldCount}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "BadFieldCount1", + Input: `a,b,c`, + Error: &ParseError{RecordLine: 1, Line: 1, Err: ErrFieldCount}, UseFieldsPerRecord: true, FieldsPerRecord: 2, - Input: `a,b,c`, - Error: &ParseError{Line: 1, Err: ErrFieldCount}, - }, - { + }, { Name: "FieldCount", Input: "a,b,c\nd,e", Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, - }, - { + }, { Name: "TrailingCommaEOF", Input: "a,b,c,", Output: [][]string{{"a", "b", "c", ""}}, - }, - { + }, { Name: "TrailingCommaEOL", Input: "a,b,c,\n", Output: [][]string{{"a", "b", "c", ""}}, - }, - { + }, { Name: "TrailingCommaSpaceEOF", - TrimLeadingSpace: true, Input: "a,b,c, ", Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaSpaceEOL", TrimLeadingSpace: true, + }, { + Name: "TrailingCommaSpaceEOL", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaLine3", TrimLeadingSpace: true, + }, { + Name: "TrailingCommaLine3", Input: "a,b,c\nd,e,f\ng,hi,", Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "NotTrailingComma3", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", " "}}, - }, - { - Name: "CommaFieldTest", - TrailingComma: true, + }, { + Name: "CommaFieldTest", Input: `x,y,z,w x,y,z, x,y,, @@ -239,166 +208,136 @@ x,,, {"x", "", "", ""}, {"", "", "", ""}, }, - }, - { - Name: "TrailingCommaIneffective1", - TrailingComma: true, - TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", + }, { + Name: "TrailingCommaIneffective1", + Input: "a,b,\nc,d,e", Output: [][]string{ {"a", "b", ""}, {"c", "d", "e"}, }, - }, - { - Name: "TrailingCommaIneffective2", - TrailingComma: false, TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", - Output: [][]string{ - {"a", "b", ""}, - {"c", "d", "e"}, - }, - }, - { - Name: "ReadAllReuseRecord", - ReuseRecord: true, - Input: "a,b\nc,d", + }, { + Name: "ReadAllReuseRecord", + Input: "a,b\nc,d", Output: [][]string{ {"a", "b"}, {"c", "d"}, }, - }, - { // issue 19019 - Name: "RecordLine1", + ReuseRecord: true, + }, { + Name: "RecordLine1", // Issue 19019 Input: "a,\"b\nc\"d,e", - Error: &ParseError{Line: 2, Column: 1, Err: ErrQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 2, Column: 1, Err: ErrQuote}, + }, { Name: "RecordLine2", Input: "a,b\n\"d\n\n,e", - Error: &ParseError{Line: 5, Column: 0, Err: ErrQuote}, - }, - { // issue 21201 - Name: "CRLFInQuotedField", + Error: &ParseError{RecordLine: 2, Line: 5, Column: 0, Err: ErrQuote}, + }, { + Name: "CRLFInQuotedField", // Issue 21201 Input: "\"Hello\r\nHi\"", Output: [][]string{ {"Hello\r\nHi"}, }, - }, - { // issue 19410 - Name: "BinaryBlobField", + }, { + Name: "BinaryBlobField", // Issue 19410 Input: "x09\x41\xb4\x1c,aktau", Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, - }, - { + }, { Name: "TrailingCR", Input: "field1,field2\r", Output: [][]string{{"field1", "field2\r"}}, - }, - { + }, { Name: "NonASCIICommaAndComment", + Input: "a£b,c£ \td,e\n€ comment\n", + Output: [][]string{{"a", "b,c", "d,e"}}, TrimLeadingSpace: true, Comma: '£', Comment: '€', - Input: "a£b,c£ \td,e\n€ comment\n", - Output: [][]string{{"a", "b,c", "d,e"}}, - }, - { + }, { Name: "NonASCIICommaAndCommentWithQuotes", - Comma: '€', - Comment: 'λ', Input: "a€\" b,\"€ c\nλ comment\n", Output: [][]string{{"a", " b,", " c"}}, - }, - { + Comma: '€', + Comment: 'λ', + }, { + // λ and θ start with the same byte. + // This tests that the parser doesn't confuse such characters. Name: "NonASCIICommaConfusion", + Input: "\"abθcd\"λefθgh", + Output: [][]string{{"abθcd", "efθgh"}}, Comma: 'λ', Comment: '€', - // λ and θ start with the same byte. This test is intended to ensure the parser doesn't - // confuse such characters. - Input: "\"abθcd\"λefθgh", - Output: [][]string{{"abθcd", "efθgh"}}, - }, - { + }, { Name: "NonASCIICommentConfusion", - Comment: 'θ', Input: "λ\nλ\nθ\nλ\n", Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, - }, - { + Comment: 'θ', + }, { Name: "QuotedFieldMultipleLF", Input: "\"\n\n\n\n\"", Output: [][]string{{"\n\n\n\n"}}, - }, - { + }, { Name: "MultipleCRLF", Input: "\r\n\r\n\r\n\r\n", - }, - { + }, { // The implementation may read each line in several chunks if it doesn't fit entirely // in the read buffer, so we should test the code to handle that condition. Name: "HugeLines", - Comment: '#', Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, - }, - { + Comment: '#', + }, { Name: "QuoteWithTrailingCRLF", Input: "\"foo\"bar\"\r\n", - Error: &ParseError{Line: 1, Column: 4, Err: ErrQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 4, Err: ErrQuote}, + }, { Name: "LazyQuoteWithTrailingCRLF", Input: "\"foo\"bar\"\r\n", - LazyQuotes: true, Output: [][]string{{`foo"bar`}}, - }, - { + LazyQuotes: true, + }, { Name: "DoubleQuoteWithTrailingCRLF", Input: "\"foo\"\"bar\"\r\n", Output: [][]string{{`foo"bar`}}, - }, - { + }, { Name: "EvenQuotes", Input: `""""""""`, Output: [][]string{{`"""`}}, - }, - { + }, { Name: "OddQuotes", Input: `"""""""`, - Error: &ParseError{Line: 1, Column: 7, Err: ErrQuote}, - }, - { + Error: &ParseError{RecordLine: 1, Line: 1, Column: 7, Err: ErrQuote}, + }, { Name: "LazyOddQuotes", Input: `"""""""`, - LazyQuotes: true, Output: [][]string{{`"""`}}, - }, -} + LazyQuotes: true, + }} -func TestRead(t *testing.T) { - for _, tt := range readTests { - r := NewReader(strings.NewReader(tt.Input)) - r.Comment = tt.Comment - if tt.UseFieldsPerRecord { - r.FieldsPerRecord = tt.FieldsPerRecord - } else { - r.FieldsPerRecord = -1 - } - r.LazyQuotes = tt.LazyQuotes - r.TrailingComma = tt.TrailingComma - r.TrimLeadingSpace = tt.TrimLeadingSpace - r.ReuseRecord = tt.ReuseRecord - if tt.Comma != 0 { - r.Comma = tt.Comma - } - out, err := r.ReadAll() - if !reflect.DeepEqual(err, tt.Error) { - t.Errorf("%s: ReadAll() error:\ngot %v\nwant %v", tt.Name, err, tt.Error) - } else if !reflect.DeepEqual(out, tt.Output) { - t.Errorf("%s: ReadAll() output:\ngot %q\nwant %q", tt.Name, out, tt.Output) - } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r := NewReader(strings.NewReader(tt.Input)) + + if tt.Comma != 0 { + r.Comma = tt.Comma + } + r.Comment = tt.Comment + if tt.UseFieldsPerRecord { + r.FieldsPerRecord = tt.FieldsPerRecord + } else { + r.FieldsPerRecord = -1 + } + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + r.ReuseRecord = tt.ReuseRecord + + out, err := r.ReadAll() + if !reflect.DeepEqual(err, tt.Error) { + t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) + } else if !reflect.DeepEqual(out, tt.Output) { + t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + } + }) } }