aboutsummaryrefslogtreecommitdiff
path: root/src/encoding
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2018-03-07 14:14:19 -0800
committerJoe Tsai <thebrokentoaster@gmail.com>2018-03-08 03:26:22 +0000
commit0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2 (patch)
treeff6bfb36c2885867e0d44bd62dcb488767a2571e /src/encoding
parent88466e93a4d4b7503dcbf880be2cd8ad7702d632 (diff)
downloadgo-0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2.tar.xz
encoding/csv: avoid mangling invalid UTF-8 in Writer
In the situation where a quoted field is necessary, avoid processing each UTF-8 rune one-by-one, which causes mangling of invalid sequences into utf8.RuneError, causing a loss of information. Instead, search only for the escaped characters, handle those specially and copy everything else in between verbatim. This symmetrically matches the behavior of Reader. Fixes #24298 Change-Id: I9276f64891084ce8487678f663fad711b4095dbb Reviewed-on: https://go-review.googlesource.com/99297 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/encoding')
-rw-r--r--src/encoding/csv/writer.go51
-rw-r--r--src/encoding/csv/writer_test.go2
2 files changed, 34 insertions, 19 deletions
diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go
index ef3594e523..31c4f9c22d 100644
--- a/src/encoding/csv/writer.go
+++ b/src/encoding/csv/writer.go
@@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
}
continue
}
+
if err := w.w.WriteByte('"'); err != nil {
return err
}
+ for len(field) > 0 {
+ // Search for special characters.
+ i := strings.IndexAny(field, "\"\r\n")
+ if i < 0 {
+ i = len(field)
+ }
+
+ // Copy verbatim everything before the special character.
+ if _, err := w.w.WriteString(field[:i]); err != nil {
+ return err
+ }
+ field = field[i:]
- for _, r1 := range field {
- var err error
- switch r1 {
- case '"':
- _, err = w.w.WriteString(`""`)
- case '\r':
- if !w.UseCRLF {
- err = w.w.WriteByte('\r')
+ // Encode the special character.
+ if len(field) > 0 {
+ var err error
+ switch field[0] {
+ case '"':
+ _, err = w.w.WriteString(`""`)
+ case '\r':
+ if !w.UseCRLF {
+ err = w.w.WriteByte('\r')
+ }
+ case '\n':
+ if w.UseCRLF {
+ _, err = w.w.WriteString("\r\n")
+ } else {
+ err = w.w.WriteByte('\n')
+ }
}
- case '\n':
- if w.UseCRLF {
- _, err = w.w.WriteString("\r\n")
- } else {
- err = w.w.WriteByte('\n')
+ field = field[1:]
+ if err != nil {
+ return err
}
- default:
- _, err = w.w.WriteRune(r1)
- }
- if err != nil {
- return err
}
}
-
if err := w.w.WriteByte('"'); err != nil {
return err
}
diff --git a/src/encoding/csv/writer_test.go b/src/encoding/csv/writer_test.go
index 8ddca0abe0..99bc84e998 100644
--- a/src/encoding/csv/writer_test.go
+++ b/src/encoding/csv/writer_test.go
@@ -39,6 +39,8 @@ var writeTests = []struct {
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
+ {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
+ {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
}
func TestWrite(t *testing.T) {