aboutsummaryrefslogtreecommitdiff
path: root/src/encoding/json/fold_test.go
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2023-02-20 11:26:10 -0800
committerGopher Robot <gobot@golang.org>2023-02-27 17:37:27 +0000
commitb9b8cecbfc72168ca03ad586cc2ed52b0e8db409 (patch)
tree3c40d1289a66411daf422f15c27640dff524610d /src/encoding/json/fold_test.go
parent2de406bb9e26df19a31b5f6111bb221b60964d48 (diff)
downloadgo-b9b8cecbfc72168ca03ad586cc2ed52b0e8db409.tar.xz
encoding/json: simplify folded name logic
The folded name logic (despite all attempts to optimize it) was fundamentally an O(n) operation where every field in a struct needed to be linearly scanned in order to find a match. This made unmashaling of unknown fields always O(n). Instead of optimizing the comparison for each field, make it such that we can look up a name in O(1). We accomplish this by maintaining a map keyed by pre-folded names, which we can pre-calculate when processing the struct type. Using a stack-allocated buffer, we can fold the input name and look up its presence in the map. Also, instead of mapping from names to indexes, map directly to a pointer to the field information. The memory cost of this is the same and avoids an extra slice index. The new logic is both simpler and faster. Performance: name old time/op new time/op delta CodeDecoder 2.47ms ± 4% 2.42ms ± 2% -1.83% (p=0.022 n=10+9) UnicodeDecoder 259ns ± 2% 248ns ± 1% -4.32% (p=0.000 n=10+10) DecoderStream 150ns ± 1% 149ns ± 1% ~ (p=0.516 n=10+10) CodeUnmarshal 3.13ms ± 2% 3.09ms ± 2% -1.37% (p=0.022 n=10+9) CodeUnmarshalReuse 2.50ms ± 1% 2.45ms ± 1% -1.96% (p=0.001 n=8+9) UnmarshalString 67.1ns ± 5% 64.5ns ± 5% -3.90% (p=0.005 n=10+10) UnmarshalFloat64 60.1ns ± 4% 58.4ns ± 2% -2.89% (p=0.002 n=10+8) UnmarshalInt64 51.0ns ± 4% 49.2ns ± 1% -3.53% (p=0.001 n=10+8) Issue10335 80.7ns ± 2% 79.2ns ± 1% -1.82% (p=0.016 n=10+8) Issue34127 28.6ns ± 3% 28.8ns ± 3% ~ (p=0.388 n=9+10) Unmapped 177ns ± 2% 177ns ± 2% ~ (p=0.956 n=10+10) Change-Id: I478b2b958f5a63a69c9a991a39cd5ffb43244a2a Reviewed-on: https://go-review.googlesource.com/c/go/+/471196 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Run-TryBot: Joseph Tsai <joetsai@digital-static.net> Auto-Submit: Joseph Tsai <joetsai@digital-static.net> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Johan Brandhorst-Satzkorn <johan.brandhorst@gmail.com> Reviewed-by: Than McIntosh <thanm@google.com> Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Diffstat (limited to 'src/encoding/json/fold_test.go')
-rw-r--r--src/encoding/json/fold_test.go132
1 files changed, 36 insertions, 96 deletions
diff --git a/src/encoding/json/fold_test.go b/src/encoding/json/fold_test.go
index 4daa3590f5..9d6fd0559d 100644
--- a/src/encoding/json/fold_test.go
+++ b/src/encoding/json/fold_test.go
@@ -6,105 +6,45 @@ package json
import (
"bytes"
- "strings"
"testing"
- "unicode/utf8"
)
-var foldTests = []struct {
- fn func(s, t []byte) bool
- s, t string
- want bool
-}{
- {equalFoldRight, "", "", true},
- {equalFoldRight, "a", "a", true},
- {equalFoldRight, "", "a", false},
- {equalFoldRight, "a", "", false},
- {equalFoldRight, "a", "A", true},
- {equalFoldRight, "AB", "ab", true},
- {equalFoldRight, "AB", "ac", false},
- {equalFoldRight, "sbkKc", "ſbKKc", true},
- {equalFoldRight, "SbKkc", "ſbKKc", true},
- {equalFoldRight, "SbKkc", "ſbKK", false},
- {equalFoldRight, "e", "é", false},
- {equalFoldRight, "s", "S", true},
-
- {simpleLetterEqualFold, "", "", true},
- {simpleLetterEqualFold, "abc", "abc", true},
- {simpleLetterEqualFold, "abc", "ABC", true},
- {simpleLetterEqualFold, "abc", "ABCD", false},
- {simpleLetterEqualFold, "abc", "xxx", false},
-
- {asciiEqualFold, "a_B", "A_b", true},
- {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent
-}
-
-func TestFold(t *testing.T) {
- for i, tt := range foldTests {
- if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want {
- t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want)
- }
- truth := strings.EqualFold(tt.s, tt.t)
- if truth != tt.want {
- t.Errorf("strings.EqualFold doesn't agree with case %d", i)
- }
+func FuzzEqualFold(f *testing.F) {
+ for _, ss := range [][2]string{
+ {"", ""},
+ {"123abc", "123ABC"},
+ {"αβδ", "ΑΒΔ"},
+ {"abc", "xyz"},
+ {"abc", "XYZ"},
+ {"1", "2"},
+ {"hello, world!", "hello, world!"},
+ {"hello, world!", "Hello, World!"},
+ {"hello, world!", "HELLO, WORLD!"},
+ {"hello, world!", "jello, world!"},
+ {"γειά, κόσμε!", "γειά, κόσμε!"},
+ {"γειά, κόσμε!", "Γειά, Κόσμε!"},
+ {"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!"},
+ {"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!"},
+ {"AESKey", "aesKey"},
+ {"AESKEY", "aes_key"},
+ {"aes_key", "AES_KEY"},
+ {"AES_KEY", "aes-key"},
+ {"aes-key", "AES-KEY"},
+ {"AES-KEY", "aesKey"},
+ {"aesKey", "AesKey"},
+ {"AesKey", "AESKey"},
+ {"AESKey", "aeskey"},
+ {"DESKey", "aeskey"},
+ {"AES Key", "aeskey"},
+ } {
+ f.Add([]byte(ss[0]), []byte(ss[1]))
}
-}
-
-func TestFoldAgainstUnicode(t *testing.T) {
- var buf1, buf2 []byte
- var runes []rune
- for i := 0x20; i <= 0x7f; i++ {
- runes = append(runes, rune(i))
- }
- runes = append(runes, kelvin, smallLongEss)
-
- funcs := []struct {
- name string
- fold func(s, t []byte) bool
- letter bool // must be ASCII letter
- simple bool // must be simple ASCII letter (not 'S' or 'K')
- }{
- {
- name: "equalFoldRight",
- fold: equalFoldRight,
- },
- {
- name: "asciiEqualFold",
- fold: asciiEqualFold,
- simple: true,
- },
- {
- name: "simpleLetterEqualFold",
- fold: simpleLetterEqualFold,
- simple: true,
- letter: true,
- },
- }
-
- for _, ff := range funcs {
- for _, r := range runes {
- if r >= utf8.RuneSelf {
- continue
- }
- if ff.letter && !isASCIILetter(byte(r)) {
- continue
- }
- if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
- continue
- }
- for _, r2 := range runes {
- buf1 = append(utf8.AppendRune(append(buf1[:0], 'x'), r), 'x')
- buf2 = append(utf8.AppendRune(append(buf2[:0], 'x'), r2), 'x')
- want := bytes.EqualFold(buf1, buf2)
- if got := ff.fold(buf1, buf2); got != want {
- t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
- }
- }
+ equalFold := func(x, y []byte) bool { return string(foldName(x)) == string(foldName(y)) }
+ f.Fuzz(func(t *testing.T, x, y []byte) {
+ got := equalFold(x, y)
+ want := bytes.EqualFold(x, y)
+ if got != want {
+ t.Errorf("equalFold(%q, %q) = %v, want %v", x, y, got, want)
}
- }
-}
-
-func isASCIILetter(b byte) bool {
- return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z')
+ })
}