From ebdc0b8d68e04ad383088c8b3ab963de4a9b5c5d Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 26 Sep 2018 20:19:11 +0000 Subject: bytes, strings: add ReplaceAll Credit to Harald Nordgren for the proposal in https://golang.org/cl/137456 and #27864. Fixes #27864 Change-Id: I80546683b0623124fe4627a71af88add2f6c1c27 Reviewed-on: https://go-review.googlesource.com/137855 Reviewed-by: Ian Lance Taylor --- src/strings/strings_test.go | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/strings/strings_test.go') diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 20bc484f39..bb6a5b931b 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -1243,6 +1243,12 @@ func TestReplace(t *testing.T) { if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) } + if tt.n == -1 { + s := ReplaceAll(tt.in, tt.old, tt.new) + if s != tt.out { + t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) + } + } } } -- cgit v1.3-5-g9baa From f74de24fbd94d021b047afe0dc62eddeb65ca384 Mon Sep 17 00:00:00 2001 From: Martin Möhrmann Date: Sun, 26 Aug 2018 14:22:39 +0200 Subject: strings: correctly handle invalid utf8 sequences in Map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an invalid UTF-8 byte sequence is decoded in a range loop over a string a utf8.RuneError rune is returned. This is not distinguishable from decoding the valid '\uFFFD' sequence representing utf8.RuneError from a string without further checks within the range loop. The previous Map code did not do any extra checks and would thereby not map invalid UTF-8 byte sequences correctly when those were mapping to utf8.RuneError. Fix this by adding the extra checks necessary to distinguish the decoding of invalid utf8 byte sequences from decoding the sequence for utf8.RuneError when the mapping of a rune is utf8.RuneError. This fix does not result in a measureable performance regression: name old time/op new time/op delta ByteByteMap 1.05µs ± 3% 1.03µs ± 3% ~ (p=0.118 n=10+10) Map/identity/ASCII 169ns ± 2% 170ns ± 1% ~ (p=0.501 n=9+10) Map/identity/Greek 298ns ± 1% 303ns ± 4% ~ (p=0.338 n=10+10) Map/change/ASCII 323ns ± 3% 325ns ± 4% ~ (p=0.679 n=8+10) Map/change/Greek 628ns ± 5% 635ns ± 1% ~ (p=0.460 n=10+9) MapNoChanges 120ns ± 4% 119ns ± 1% ~ (p=0.496 n=10+9) Fixes #26305 Change-Id: I70e99fa244983c5040756fa4549ac1e8cb6022c3 Reviewed-on: https://go-review.googlesource.com/c/131495 Reviewed-by: Brad Fitzpatrick --- src/strings/strings.go | 24 ++++++++++++------------ src/strings/strings_test.go | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/strings/strings_test.go') diff --git a/src/strings/strings.go b/src/strings/strings.go index 00200e4e24..ecc8c97d9e 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -463,27 +463,27 @@ func Map(mapping func(rune) rune, s string) string { for i, c := range s { r := mapping(c) - if r == c { + if r == c && c != utf8.RuneError { continue } + var width int + if c == utf8.RuneError { + c, width = utf8.DecodeRuneInString(s[i:]) + if width != 1 && r == c { + continue + } + } else { + width = utf8.RuneLen(c) + } + b.Grow(len(s) + utf8.UTFMax) b.WriteString(s[:i]) if r >= 0 { b.WriteRune(r) } - if c == utf8.RuneError { - // RuneError is the result of either decoding - // an invalid sequence or '\uFFFD'. Determine - // the correct number of bytes we need to advance. - _, w := utf8.DecodeRuneInString(s[i:]) - i += w - } else { - i += utf8.RuneLen(c) - } - - s = s[i:] + s = s[i+width:] break } diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index bb6a5b931b..eee2dd55df 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -646,10 +646,10 @@ func TestMap(t *testing.T) { if unicode.Is(unicode.Latin, r) { return r } - return '?' + return utf8.RuneError } m = Map(replaceNotLatin, "Hello\255World") - expect = "Hello?World" + expect = "Hello\uFFFDWorld" if m != expect { t.Errorf("replace invalid sequence: expected %q got %q", expect, m) } -- cgit v1.3-5-g9baa