diff options
| author | Joe Tsai <joetsai@digital-static.net> | 2018-03-07 14:14:19 -0800 |
|---|---|---|
| committer | Joe Tsai <thebrokentoaster@gmail.com> | 2018-03-08 03:26:22 +0000 |
| commit | 0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2 (patch) | |
| tree | ff6bfb36c2885867e0d44bd62dcb488767a2571e /src/encoding | |
| parent | 88466e93a4d4b7503dcbf880be2cd8ad7702d632 (diff) | |
| download | go-0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2.tar.xz | |
encoding/csv: avoid mangling invalid UTF-8 in Writer
In the situation where a quoted field is necessary, avoid processing
each UTF-8 rune one-by-one, which causes mangling of invalid sequences
into utf8.RuneError, causing a loss of information.
Instead, search only for the escaped characters, handle those specially
and copy everything else in between verbatim.
This symmetrically matches the behavior of Reader.
Fixes #24298
Change-Id: I9276f64891084ce8487678f663fad711b4095dbb
Reviewed-on: https://go-review.googlesource.com/99297
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/encoding')
| -rw-r--r-- | src/encoding/csv/writer.go | 51 | ||||
| -rw-r--r-- | src/encoding/csv/writer_test.go | 2 |
2 files changed, 34 insertions, 19 deletions
diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go index ef3594e523..31c4f9c22d 100644 --- a/src/encoding/csv/writer.go +++ b/src/encoding/csv/writer.go @@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error { } continue } + if err := w.w.WriteByte('"'); err != nil { return err } + for len(field) > 0 { + // Search for special characters. + i := strings.IndexAny(field, "\"\r\n") + if i < 0 { + i = len(field) + } + + // Copy verbatim everything before the special character. + if _, err := w.w.WriteString(field[:i]); err != nil { + return err + } + field = field[i:] - for _, r1 := range field { - var err error - switch r1 { - case '"': - _, err = w.w.WriteString(`""`) - case '\r': - if !w.UseCRLF { - err = w.w.WriteByte('\r') + // Encode the special character. + if len(field) > 0 { + var err error + switch field[0] { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } } - case '\n': - if w.UseCRLF { - _, err = w.w.WriteString("\r\n") - } else { - err = w.w.WriteByte('\n') + field = field[1:] + if err != nil { + return err } - default: - _, err = w.w.WriteRune(r1) - } - if err != nil { - return err } } - if err := w.w.WriteByte('"'); err != nil { return err } diff --git a/src/encoding/csv/writer_test.go b/src/encoding/csv/writer_test.go index 8ddca0abe0..99bc84e998 100644 --- a/src/encoding/csv/writer_test.go +++ b/src/encoding/csv/writer_test.go @@ -39,6 +39,8 @@ var writeTests = []struct { {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, + {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, + {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, } func TestWrite(t *testing.T) { |
