diff options
| author | Matthew Dempsky <mdempsky@google.com> | 2014-06-16 12:59:10 -0700 |
|---|---|---|
| committer | Rob Pike <r@golang.org> | 2014-06-16 12:59:10 -0700 |
| commit | 54bc760ad72578f5e948baaff0348ea0609f6395 (patch) | |
| tree | a40d891fb1297ed5d74daa1311f181cf6ee377a4 /src/pkg | |
| parent | 311e28636ab1b41a10510a46fe1c8728e8713057 (diff) | |
| download | go-54bc760ad72578f5e948baaff0348ea0609f6395.tar.xz | |
bufio: handle excessive white space in ScanWords
LGTM=r
R=golang-codereviews, bradfitz, r
CC=golang-codereviews
https://golang.org/cl/109020043
Diffstat (limited to 'src/pkg')
| -rw-r--r-- | src/pkg/bufio/scan.go | 5 | ||||
| -rw-r--r-- | src/pkg/bufio/scan_test.go | 17 |
2 files changed, 17 insertions, 5 deletions
diff --git a/src/pkg/bufio/scan.go b/src/pkg/bufio/scan.go index 715ce071e3..97ae109095 100644 --- a/src/pkg/bufio/scan.go +++ b/src/pkg/bufio/scan.go @@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { break } } - if atEOF && len(data) == 0 { - return 0, nil, nil - } // Scan until space, marking end of word. for width, i := 0, start; i < len(data); i += width { var r rune @@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { return len(data), data[start:], nil } // Request more data. - return 0, nil, nil + return start, nil, nil } diff --git a/src/pkg/bufio/scan_test.go b/src/pkg/bufio/scan_test.go index 0db7cad204..ce49ece93a 100644 --- a/src/pkg/bufio/scan_test.go +++ b/src/pkg/bufio/scan_test.go @@ -15,6 +15,8 @@ import ( "unicode/utf8" ) +const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Test white space table matches the Unicode definition. func TestSpace(t *testing.T) { for r := rune(0); r <= utf8.MaxRune; r++ { @@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { // Test the line splitter, including some carriage returns but no long lines. func TestScanLongLines(t *testing.T) { - const smallMaxTokenSize = 256 // Much smaller for more efficient testing. // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. tmp := new(bytes.Buffer) buf := new(bytes.Buffer) @@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) { t.Errorf("unexpected error: %v", err) } } + +func TestScanWordsExcessiveWhiteSpace(t *testing.T) { + const word = "ipsum" + s := strings.Repeat(" ", 4*smallMaxTokenSize) + word + scanner := NewScanner(strings.NewReader(s)) + scanner.MaxTokenSize(smallMaxTokenSize) + scanner.Split(ScanWords) + if !scanner.Scan() { + t.Fatal("scan failed: %v", scanner.Err()) + } + if token := scanner.Text(); token != word { + t.Fatal("unexpected token: %v", token) + } +} |
