aboutsummaryrefslogtreecommitdiff
path: root/src/pkg/strings
diff options
context:
space:
mode:
authorBrad Fitzpatrick <bradfitz@golang.org>2011-10-03 15:19:04 -0700
committerBrad Fitzpatrick <bradfitz@golang.org>2011-10-03 15:19:04 -0700
commitbba7396fbd4c3245dbedd3cc2a1fb25137331ebb (patch)
tree042d2580b79df727ae9f01e6de5cda1e093f1b9e /src/pkg/strings
parente419535f2ae5c8aef1f64cdb207049c8712ffb48 (diff)
downloadgo-bba7396fbd4c3245dbedd3cc2a1fb25137331ebb.tar.xz
strings: implement a faster byte->string Replacer
This implements a replacer for when all old strings are single bytes, but new values are not. BenchmarkHTMLEscapeNew 1000000 1090 ns/op BenchmarkHTMLEscapeOld 1000000 2049 ns/op R=rsc CC=golang-dev https://golang.org/cl/5176043
Diffstat (limited to 'src/pkg/strings')
-rw-r--r--src/pkg/strings/replace.go110
-rw-r--r--src/pkg/strings/replace_test.go32
2 files changed, 139 insertions, 3 deletions
diff --git a/src/pkg/strings/replace.go b/src/pkg/strings/replace.go
index 8eab12d3c8..64a7f208b9 100644
--- a/src/pkg/strings/replace.go
+++ b/src/pkg/strings/replace.go
@@ -36,8 +36,12 @@ func NewReplacer(oldnew ...string) *Replacer {
panic("strings.NewReplacer: odd argument count")
}
- var bb byteReplacer
- var gen genericReplacer
+ // Possible implementations.
+ var (
+ bb byteReplacer
+ bs byteStringReplacer
+ gen genericReplacer
+ )
allOldBytes, allNewBytes := true, true
for len(oldnew) > 0 {
@@ -49,7 +53,17 @@ func NewReplacer(oldnew ...string) *Replacer {
if len(new) != 1 {
allNewBytes = false
}
+
+ // generic
gen.p = append(gen.p, pair{old, new})
+
+ // byte -> string
+ if allOldBytes {
+ bs.old.set(old[0])
+ bs.new[old[0]] = []byte(new)
+ }
+
+ // byte -> byte
if allOldBytes && allNewBytes {
bb.old.set(old[0])
bb.new[old[0]] = new[0]
@@ -59,6 +73,9 @@ func NewReplacer(oldnew ...string) *Replacer {
if allOldBytes && allNewBytes {
return &Replacer{r: &bb}
}
+ if allOldBytes {
+ return &Replacer{r: &bs}
+ }
return &Replacer{r: &gen}
}
@@ -176,6 +193,7 @@ func (r *byteReplacer) Replace(s string) string {
}
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
+ // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
bufsize := 32 << 10
if len(s) < bufsize {
bufsize = len(s)
@@ -199,6 +217,94 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error)
return n, nil
}
+// byteStringReplacer is the implementation that's used when all the
+// "old" values are single ASCII bytes but the "new" values vary in
+// size.
+type byteStringReplacer struct {
+ // old has a bit set for each old byte that should be replaced.
+ old byteBitmap
+
+ // replacement string, indexed by old byte. only valid if
+ // corresponding old bit is set.
+ new [256][]byte
+}
+
+func (r *byteStringReplacer) Replace(s string) string {
+ newSize := 0
+ anyChanges := false
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
+ anyChanges = true
+ newSize += len(r.new[b])
+ } else {
+ newSize++
+ }
+ }
+ if !anyChanges {
+ return s
+ }
+ buf := make([]byte, newSize)
+ bi := buf
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
+ n := copy(bi[:], r.new[b])
+ bi = bi[n:]
+ } else {
+ bi[0] = b
+ bi = bi[1:]
+ }
+ }
+ return string(buf)
+}
+
+// WriteString maintains one buffer that's at most 32KB. The bytes in
+// s are enumerated and the buffer is filled. If it reaches its
+// capacity or a byte has a replacement, the buffer is flushed to w.
+func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
+ // TODO(bradfitz): use io.WriteString with slices of s instead.
+ bufsize := 32 << 10
+ if len(s) < bufsize {
+ bufsize = len(s)
+ }
+ buf := make([]byte, bufsize)
+ bi := buf[:0]
+
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ var new []byte
+ if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
+ new = r.new[b]
+ } else {
+ bi = append(bi, b)
+ }
+ if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
+ nw, err := w.Write(bi)
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ bi = buf[:0]
+ }
+ if len(new) > 0 {
+ nw, err := w.Write(new)
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ }
+ if len(bi) > 0 {
+ nw, err := w.Write(bi)
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ return n, nil
+}
+
// strings is too low-level to import io/ioutil
var discard io.Writer = devNull(0)
diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go
index 20e734f6cd..e337856c64 100644
--- a/src/pkg/strings/replace_test.go
+++ b/src/pkg/strings/replace_test.go
@@ -41,14 +41,21 @@ var capitalLetters = NewReplacer("a", "A", "b", "B")
var blankToXReplacer = NewReplacer("", "X", "o", "O")
var ReplacerTests = []ReplacerTest{
+ // byte->string
{htmlEscaper, "No changes", "No changes"},
{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+
+ // generic
{replacer, "fooaaabar", "foo3[aaa]b1[a]r"},
{replacer, "long, longerst, longer", "short, most long, medium"},
{replacer, "XiX", "YiY"},
+
+ // byte->byte
{capitalLetters, "brad", "BrAd"},
{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+
+ // hitting "" special case
{blankToXReplacer, "oo", "XOXOX"},
}
@@ -84,7 +91,9 @@ type pickAlgorithmTest struct {
var pickAlgorithmTests = []pickAlgorithmTest{
{capitalLetters, "*strings.byteReplacer"},
- {NewReplacer("a", "A", "b", "Bb"), "*strings.genericReplacer"},
+ {NewReplacer("12", "123"), "*strings.genericReplacer"},
+ {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+ {htmlEscaper, "*strings.byteStringReplacer"},
}
func TestPickAlgorithm(t *testing.T) {
@@ -118,6 +127,27 @@ func BenchmarkByteByteMatch(b *testing.B) {
}
}
+func BenchmarkByteStringMatch(b *testing.B) {
+ str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeNew(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeOld(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ oldhtmlEscape(str)
+ }
+}
+
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)