bufio: handle excessive white space in ScanWords

LGTM=r R=golang-codereviews, bradfitz, r CC=golang-codereviews https://golang.org/cl/109020043
author: Matthew Dempsky <mdempsky@google.com> 2014-06-16 12:59:10 -0700
committer: Rob Pike <r@golang.org> 2014-06-16 12:59:10 -0700
commit: 54bc760ad72578f5e948baaff0348ea0609f6395 (patch)
tree: a40d891fb1297ed5d74daa1311f181cf6ee377a4 /src/pkg
parent: 311e28636ab1b41a10510a46fe1c8728e8713057 (diff)
download: go-54bc760ad72578f5e948baaff0348ea0609f6395.tar.xz
2 files changed, 17 insertions, 5 deletions
diff --git a/src/pkg/bufio/scan.go b/src/pkg/bufio/scan.go
index 715ce071e3..97ae109095 100644
--- a/src/pkg/bufio/scan.go
+++ b/src/pkg/bufio/scan.go
@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
 			break
 		}
 	}
-	if atEOF && len(data) == 0 {
-		return 0, nil, nil
-	}
 	// Scan until space, marking end of word.
 	for width, i := 0, start; i < len(data); i += width {
 		var r rune
@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
 		return len(data), data[start:], nil
 	}
 	// Request more data.
-	return 0, nil, nil
+	return start, nil, nil
 }
diff --git a/src/pkg/bufio/scan_test.go b/src/pkg/bufio/scan_test.go
index 0db7cad204..ce49ece93a 100644
--- a/src/pkg/bufio/scan_test.go
+++ b/src/pkg/bufio/scan_test.go
@@ -15,6 +15,8 @@ import (
 	"unicode/utf8"
 )
 
+const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
+
 // Test white space table matches the Unicode definition.
 func TestSpace(t *testing.T) {
 	for r := rune(0); r <= utf8.MaxRune; r++ {
@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
 
 // Test the line splitter, including some carriage returns but no long lines.
 func TestScanLongLines(t *testing.T) {
-	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
 	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
 	tmp := new(bytes.Buffer)
 	buf := new(bytes.Buffer)
@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
 		t.Errorf("unexpected error: %v", err)
 	}
 }
+
+func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
+	const word = "ipsum"
+	s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
+	scanner := NewScanner(strings.NewReader(s))
+	scanner.MaxTokenSize(smallMaxTokenSize)
+	scanner.Split(ScanWords)
+	if !scanner.Scan() {
+		t.Fatal("scan failed: %v", scanner.Err())
+	}
+	if token := scanner.Text(); token != word {
+		t.Fatal("unexpected token: %v", token)
+	}
+}
author	Matthew Dempsky <mdempsky@google.com>	2014-06-16 12:59:10 -0700
committer	Rob Pike <r@golang.org>	2014-06-16 12:59:10 -0700
commit	54bc760ad72578f5e948baaff0348ea0609f6395 (patch)
tree	a40d891fb1297ed5d74daa1311f181cf6ee377a4 /src/pkg
parent	311e28636ab1b41a10510a46fe1c8728e8713057 (diff)
download	go-54bc760ad72578f5e948baaff0348ea0609f6395.tar.xz