bufio: handle excessive white space in ScanWords

LGTM=r
R=golang-codereviews, bradfitz, r
CC=golang-codereviews
https://golang.org/cl/109020043
This commit is contained in:
Matthew Dempsky 2014-06-16 12:59:10 -07:00 committed by Rob Pike
parent 311e28636a
commit 54bc760ad7
2 changed files with 17 additions and 5 deletions

View File

@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
break
}
}
if atEOF && len(data) == 0 {
return 0, nil, nil
}
// Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
return len(data), data[start:], nil
}
// Request more data.
return 0, nil, nil
return start, nil, nil
}

View File

@ -15,6 +15,8 @@ import (
"unicode/utf8"
)
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
for r := rune(0); r <= utf8.MaxRune; r++ {
@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
// Test the line splitter, including some carriage returns but no long lines.
func TestScanLongLines(t *testing.T) {
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
tmp := new(bytes.Buffer)
buf := new(bytes.Buffer)
@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
}
func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
const word = "ipsum"
s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
scanner := NewScanner(strings.NewReader(s))
scanner.MaxTokenSize(smallMaxTokenSize)
scanner.Split(ScanWords)
if !scanner.Scan() {
t.Fatal("scan failed: %v", scanner.Err())
}
if token := scanner.Text(); token != word {
t.Fatal("unexpected token: %v", token)
}
}