From e2f413402527505144beea443078649380e0c545 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Sun, 19 Feb 2023 17:37:02 -0800 Subject: encoding/json: unify encodeState.string and encodeState.stringBytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is part of the effort to reduce direct reliance on bytes.Buffer so that we can use a buffer with better pooling characteristics. Unify these two methods as a single version that uses generics to reduce duplicated logic. Unfortunately, we lack a generic version of utf8.DecodeRune (see #56948), so we cast []byte to string. The []byte variant is slightly slower for multi-byte unicode since casting results in a stack-allocated copy operation. Fortunately, this code path is used only for TextMarshalers. We can also delete TestStringBytes, which exists to ensure that the two duplicate implementations remain in sync. Performance: name old time/op new time/op delta CodeEncoder 399µs ± 2% 409µs ± 2% +2.59% (p=0.000 n=9+9) CodeEncoderError 450µs ± 1% 451µs ± 2% ~ (p=0.684 n=10+10) CodeMarshal 553µs ± 2% 562µs ± 3% ~ (p=0.075 n=10+10) CodeMarshalError 733µs ± 3% 737µs ± 2% ~ (p=0.400 n=9+10) EncodeMarshaler 24.9ns ±12% 24.1ns ±13% ~ (p=0.190 n=10+10) EncoderEncode 12.3ns ± 3% 14.7ns ±20% ~ (p=0.315 n=8+10) name old speed new speed delta CodeEncoder 4.87GB/s ± 2% 4.74GB/s ± 2% -2.53% (p=0.000 n=9+9) CodeEncoderError 4.31GB/s ± 1% 4.30GB/s ± 2% ~ (p=0.684 n=10+10) CodeMarshal 3.51GB/s ± 2% 3.46GB/s ± 3% ~ (p=0.075 n=10+10) CodeMarshalError 2.65GB/s ± 3% 2.63GB/s ± 2% ~ (p=0.400 n=9+10) name old alloc/op new alloc/op delta CodeEncoder 327B ±347% 447B ±232% +36.93% (p=0.034 n=9+10) CodeEncoderError 142B ± 1% 143B ± 0% ~ (p=1.000 n=8+7) CodeMarshal 1.96MB ± 2% 1.96MB ± 2% ~ (p=0.468 n=10+10) CodeMarshalError 2.04MB ± 3% 2.03MB ± 1% ~ (p=0.971 n=10+10) EncodeMarshaler 4.00B ± 0% 4.00B ± 0% ~ (all equal) EncoderEncode 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta CodeEncoder 0.00 0.00 ~ (all equal) CodeEncoderError 4.00 ± 0% 4.00 ± 0% ~ (all equal) CodeMarshal 1.00 ± 0% 1.00 ± 0% ~ (all equal) CodeMarshalError 6.00 ± 0% 6.00 ± 0% ~ (all equal) EncodeMarshaler 1.00 ± 0% 1.00 ± 0% ~ (all equal) EncoderEncode 0.00 0.00 ~ (all equal) There is a very slight performance degradation for CodeEncoder due to an increase in allocation sizes. However, the number of allocations did not change. This is likely due to remote effects of the growth rate differences between bytes.Buffer and the builtin append function. We shouldn't overly rely on the growth rate of bytes.Buffer anyways since that is subject to possibly change in #51462. As the benchtime increases, the alloc/op goes down indicating that the amortized memory cost is fixed. Updates #27735 Change-Id: Ie35e480e292fe082d7986e0a4d81212c1d4202b3 Reviewed-on: https://go-review.googlesource.com/c/go/+/469556 Run-TryBot: Joseph Tsai Reviewed-by: Bryan Mills Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: Daniel Martí Auto-Submit: Joseph Tsai --- src/encoding/json/encode_test.go | 49 ---------------------------------------- 1 file changed, 49 deletions(-) (limited to 'src/encoding/json/encode_test.go') diff --git a/src/encoding/json/encode_test.go b/src/encoding/json/encode_test.go index c1b9ed2676..d027972d8a 100644 --- a/src/encoding/json/encode_test.go +++ b/src/encoding/json/encode_test.go @@ -15,7 +15,6 @@ import ( "runtime/debug" "strconv" "testing" - "unicode" ) type Optionals struct { @@ -701,54 +700,6 @@ func TestDuplicatedFieldDisappears(t *testing.T) { } } -func TestStringBytes(t *testing.T) { - t.Parallel() - // Test that encodeState.stringBytes and encodeState.string use the same encoding. - var r []rune - for i := '\u0000'; i <= unicode.MaxRune; i++ { - if testing.Short() && i > 1000 { - i = unicode.MaxRune - } - r = append(r, i) - } - s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too - - for _, escapeHTML := range []bool{true, false} { - es := &encodeState{} - es.string(s, escapeHTML) - - esBytes := &encodeState{} - esBytes.stringBytes([]byte(s), escapeHTML) - - enc := es.Buffer.String() - encBytes := esBytes.Buffer.String() - if enc != encBytes { - i := 0 - for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] { - i++ - } - enc = enc[i:] - encBytes = encBytes[i:] - i = 0 - for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] { - i++ - } - enc = enc[:len(enc)-i] - encBytes = encBytes[:len(encBytes)-i] - - if len(enc) > 20 { - enc = enc[:20] + "..." - } - if len(encBytes) > 20 { - encBytes = encBytes[:20] + "..." - } - - t.Errorf("with escapeHTML=%t, encodings differ at %#q vs %#q", - escapeHTML, enc, encBytes) - } - } -} - func TestIssue10281(t *testing.T) { type Foo struct { N Number -- cgit v1.3