encoding/csv: port the go-fuzz function to native fuzzing

Beyond the required file move and refactor to use the testing package, a number of changes were made to get the fuzzing working properly. First, add more logs to see what is going on. Second, some option combinations set Comma to the null character, which simply never worked at all. I suspect the author meant to leave the comma character as the default instead. This was spotted thanks to the added logging. Third, the round-trip DeepEqual check did not work at all when any comments were involved, as the writer does not support them. Fourth and last, massage the first and second parsed records before comparing them with DeepEqual, as the nature of Reader and Writer causes empty quoted records and CRLF sequences to change. With all the changes above, the fuzzing function appears to work normally on my laptop now. I fuzzed for a solid five minutes and could no longer encounter any errors or panics. Change-Id: Ie27f65f66099bdaa076343cee18b480803d2e4d3 Reviewed-on: https://go-review.googlesource.com/c/go/+/576375 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Joseph Tsai <joetsai@digital-static.net> Reviewed-by: Ian Lance Taylor <iant@google.com>
author: Daniel Martí <mvdan@mvdan.cc> 2024-04-04 13:05:13 +0900
committer: Daniel Martí <mvdan@mvdan.cc> 2024-04-05 01:26:13 +0000
commit: 2e064cf14441460290fd25d9d61f02a9d0bae671 (patch)
tree: 8f6794df88befa875af42cff631443eed3121ede /src/encoding
parent: fd999fda5941f215ef082c6ef70e44e648db5485 (diff)
download: go-2e064cf14441460290fd25d9d61f02a9d0bae671.tar.xz
2 files changed, 96 insertions, 70 deletions
diff --git a/src/encoding/csv/fuzz.go b/src/encoding/csv/fuzz.go
deleted file mode 100644
index 5f5cdfcbf8..0000000000
--- a/src/encoding/csv/fuzz.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gofuzz
-
-package csv
-
-import (
-	"bytes"
-	"fmt"
-	"reflect"
-)
-
-func Fuzz(data []byte) int {
-	score := 0
-	buf := new(bytes.Buffer)
-
-	for _, tt := range []Reader{
-		{},
-		{Comma: ';'},
-		{Comma: '\t'},
-		{LazyQuotes: true},
-		{TrimLeadingSpace: true},
-		{Comment: '#'},
-		{Comment: ';'},
-	} {
-		r := NewReader(bytes.NewReader(data))
-		r.Comma = tt.Comma
-		r.Comment = tt.Comment
-		r.LazyQuotes = tt.LazyQuotes
-		r.TrimLeadingSpace = tt.TrimLeadingSpace
-
-		records, err := r.ReadAll()
-		if err != nil {
-			continue
-		}
-		score = 1
-
-		buf.Reset()
-		w := NewWriter(buf)
-		w.Comma = tt.Comma
-		err = w.WriteAll(records)
-		if err != nil {
-			fmt.Printf("writer  = %#v\n", w)
-			fmt.Printf("records = %v\n", records)
-			panic(err)
-		}
-
-		r = NewReader(buf)
-		r.Comma = tt.Comma
-		r.Comment = tt.Comment
-		r.LazyQuotes = tt.LazyQuotes
-		r.TrimLeadingSpace = tt.TrimLeadingSpace
-		result, err := r.ReadAll()
-		if err != nil {
-			fmt.Printf("reader  = %#v\n", r)
-			fmt.Printf("records = %v\n", records)
-			panic(err)
-		}
-
-		if !reflect.DeepEqual(records, result) {
-			fmt.Println("records = \n", records)
-			fmt.Println("result  = \n", records)
-			panic("not equal")
-		}
-	}
-
-	return score
-}
diff --git a/src/encoding/csv/fuzz_test.go b/src/encoding/csv/fuzz_test.go
new file mode 100644
index 0000000000..6342fa416d
--- /dev/null
+++ b/src/encoding/csv/fuzz_test.go
@@ -0,0 +1,96 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package csv
+
+import (
+	"bytes"
+	"reflect"
+	"slices"
+	"strings"
+	"testing"
+)
+
+func FuzzRoundtrip(f *testing.F) {
+	f.Fuzz(func(t *testing.T, in []byte) {
+		buf := new(bytes.Buffer)
+
+		t.Logf("input = %q", in)
+		for _, tt := range []Reader{
+			{Comma: ','},
+			{Comma: ';'},
+			{Comma: '\t'},
+			{Comma: ',', LazyQuotes: true},
+			{Comma: ',', TrimLeadingSpace: true},
+			{Comma: ',', Comment: '#'},
+			{Comma: ',', Comment: ';'},
+		} {
+			t.Logf("With options:")
+			t.Logf("  Comma            = %q", tt.Comma)
+			t.Logf("  LazyQuotes       = %t", tt.LazyQuotes)
+			t.Logf("  TrimLeadingSpace = %t", tt.TrimLeadingSpace)
+			t.Logf("  Comment          = %q", tt.Comment)
+			r := NewReader(bytes.NewReader(in))
+			r.Comma = tt.Comma
+			r.Comment = tt.Comment
+			r.LazyQuotes = tt.LazyQuotes
+			r.TrimLeadingSpace = tt.TrimLeadingSpace
+
+			records, err := r.ReadAll()
+			if err != nil {
+				continue
+			}
+			t.Logf("first records = %#v", records)
+
+			buf.Reset()
+			w := NewWriter(buf)
+			w.Comma = tt.Comma
+			err = w.WriteAll(records)
+			if err != nil {
+				t.Logf("writer  = %#v\n", w)
+				t.Logf("records = %v\n", records)
+				t.Fatal(err)
+			}
+			if tt.Comment != 0 {
+				// Writer doesn't support comments, so it can turn the quoted record "#"
+				// into a non-quoted comment line, failing the roundtrip check below.
+				continue
+			}
+			t.Logf("second input = %q", buf.Bytes())
+
+			r = NewReader(buf)
+			r.Comma = tt.Comma
+			r.Comment = tt.Comment
+			r.LazyQuotes = tt.LazyQuotes
+			r.TrimLeadingSpace = tt.TrimLeadingSpace
+			result, err := r.ReadAll()
+			if err != nil {
+				t.Logf("reader  = %#v\n", r)
+				t.Logf("records = %v\n", records)
+				t.Fatal(err)
+			}
+
+			// The reader turns \r\n into \n.
+			for _, record := range records {
+				for i, s := range record {
+					record[i] = strings.ReplaceAll(s, "\r\n", "\n")
+				}
+			}
+			// Note that the reader parses the quoted record "" as an empty string,
+			// and the writer turns that into an empty line, which the reader skips over.
+			// Filter those out to avoid false positives.
+			records = slices.DeleteFunc(records, func(record []string) bool {
+				return len(record) == 1 && record[0] == ""
+			})
+			// The reader uses nil when returning no records at all.
+			if len(records) == 0 {
+				records = nil
+			}
+
+			if !reflect.DeepEqual(records, result) {
+				t.Fatalf("first read got %#v, second got %#v", records, result)
+			}
+		}
+	})
+}
author	Daniel Martí <mvdan@mvdan.cc>	2024-04-04 13:05:13 +0900
committer	Daniel Martí <mvdan@mvdan.cc>	2024-04-05 01:26:13 +0000
commit	2e064cf14441460290fd25d9d61f02a9d0bae671 (patch)
tree	8f6794df88befa875af42cff631443eed3121ede /src/encoding
parent	fd999fda5941f215ef082c6ef70e44e648db5485 (diff)
download	go-2e064cf14441460290fd25d9d61f02a9d0bae671.tar.xz