aboutsummaryrefslogtreecommitdiff
path: root/src/pkg
diff options
context:
space:
mode:
authorMatthew Dempsky <mdempsky@google.com>2014-06-16 12:59:10 -0700
committerRob Pike <r@golang.org>2014-06-16 12:59:10 -0700
commit54bc760ad72578f5e948baaff0348ea0609f6395 (patch)
treea40d891fb1297ed5d74daa1311f181cf6ee377a4 /src/pkg
parent311e28636ab1b41a10510a46fe1c8728e8713057 (diff)
downloadgo-54bc760ad72578f5e948baaff0348ea0609f6395.tar.xz
bufio: handle excessive white space in ScanWords
LGTM=r R=golang-codereviews, bradfitz, r CC=golang-codereviews https://golang.org/cl/109020043
Diffstat (limited to 'src/pkg')
-rw-r--r--src/pkg/bufio/scan.go5
-rw-r--r--src/pkg/bufio/scan_test.go17
2 files changed, 17 insertions, 5 deletions
diff --git a/src/pkg/bufio/scan.go b/src/pkg/bufio/scan.go
index 715ce071e3..97ae109095 100644
--- a/src/pkg/bufio/scan.go
+++ b/src/pkg/bufio/scan.go
@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
break
}
}
- if atEOF && len(data) == 0 {
- return 0, nil, nil
- }
// Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
return len(data), data[start:], nil
}
// Request more data.
- return 0, nil, nil
+ return start, nil, nil
}
diff --git a/src/pkg/bufio/scan_test.go b/src/pkg/bufio/scan_test.go
index 0db7cad204..ce49ece93a 100644
--- a/src/pkg/bufio/scan_test.go
+++ b/src/pkg/bufio/scan_test.go
@@ -15,6 +15,8 @@ import (
"unicode/utf8"
)
+const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
+
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
for r := rune(0); r <= utf8.MaxRune; r++ {
@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
// Test the line splitter, including some carriage returns but no long lines.
func TestScanLongLines(t *testing.T) {
- const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
tmp := new(bytes.Buffer)
buf := new(bytes.Buffer)
@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
}
+
+func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
+ const word = "ipsum"
+ s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
+ scanner := NewScanner(strings.NewReader(s))
+ scanner.MaxTokenSize(smallMaxTokenSize)
+ scanner.Split(ScanWords)
+ if !scanner.Scan() {
+ t.Fatal("scan failed: %v", scanner.Err())
+ }
+ if token := scanner.Text(); token != word {
+ t.Fatal("unexpected token: %v", token)
+ }
+}