From b9b8cecbfc72168ca03ad586cc2ed52b0e8db409 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 20 Feb 2023 11:26:10 -0800 Subject: encoding/json: simplify folded name logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The folded name logic (despite all attempts to optimize it) was fundamentally an O(n) operation where every field in a struct needed to be linearly scanned in order to find a match. This made unmashaling of unknown fields always O(n). Instead of optimizing the comparison for each field, make it such that we can look up a name in O(1). We accomplish this by maintaining a map keyed by pre-folded names, which we can pre-calculate when processing the struct type. Using a stack-allocated buffer, we can fold the input name and look up its presence in the map. Also, instead of mapping from names to indexes, map directly to a pointer to the field information. The memory cost of this is the same and avoids an extra slice index. The new logic is both simpler and faster. Performance: name old time/op new time/op delta CodeDecoder 2.47ms ± 4% 2.42ms ± 2% -1.83% (p=0.022 n=10+9) UnicodeDecoder 259ns ± 2% 248ns ± 1% -4.32% (p=0.000 n=10+10) DecoderStream 150ns ± 1% 149ns ± 1% ~ (p=0.516 n=10+10) CodeUnmarshal 3.13ms ± 2% 3.09ms ± 2% -1.37% (p=0.022 n=10+9) CodeUnmarshalReuse 2.50ms ± 1% 2.45ms ± 1% -1.96% (p=0.001 n=8+9) UnmarshalString 67.1ns ± 5% 64.5ns ± 5% -3.90% (p=0.005 n=10+10) UnmarshalFloat64 60.1ns ± 4% 58.4ns ± 2% -2.89% (p=0.002 n=10+8) UnmarshalInt64 51.0ns ± 4% 49.2ns ± 1% -3.53% (p=0.001 n=10+8) Issue10335 80.7ns ± 2% 79.2ns ± 1% -1.82% (p=0.016 n=10+8) Issue34127 28.6ns ± 3% 28.8ns ± 3% ~ (p=0.388 n=9+10) Unmapped 177ns ± 2% 177ns ± 2% ~ (p=0.956 n=10+10) Change-Id: I478b2b958f5a63a69c9a991a39cd5ffb43244a2a Reviewed-on: https://go-review.googlesource.com/c/go/+/471196 Reviewed-by: Dmitri Shuralyov Run-TryBot: Joseph Tsai Auto-Submit: Joseph Tsai TryBot-Result: Gopher Robot Reviewed-by: Johan Brandhorst-Satzkorn Reviewed-by: Than McIntosh Reviewed-by: Daniel Martí --- src/encoding/json/encode.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/encoding/json/encode.go') diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go index de639aa008..f3c824d13e 100644 --- a/src/encoding/json/encode.go +++ b/src/encoding/json/encode.go @@ -672,8 +672,9 @@ type structEncoder struct { } type structFields struct { - list []field - nameIndex map[string]int + list []field + byExactName map[string]*field + byFoldedName map[string]*field } func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { @@ -1033,8 +1034,7 @@ func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) // A field represents a single field found in a struct. type field struct { name string - nameBytes []byte // []byte(name) - equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent + nameBytes []byte // []byte(name) nameNonEsc string // `"` + name + `":` nameEscHTML string // `"` + HTMLEscape(name) + `":` @@ -1161,7 +1161,6 @@ func typeFields(t reflect.Type) structFields { quoted: quoted, } field.nameBytes = []byte(field.name) - field.equalFold = foldFunc(field.nameBytes) // Build nameEscHTML and nameNonEsc ahead of time. nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes) @@ -1240,11 +1239,16 @@ func typeFields(t reflect.Type) structFields { f := &fields[i] f.encoder = typeEncoder(typeByIndex(t, f.index)) } - nameIndex := make(map[string]int, len(fields)) + exactNameIndex := make(map[string]*field, len(fields)) + foldedNameIndex := make(map[string]*field, len(fields)) for i, field := range fields { - nameIndex[field.name] = i + exactNameIndex[field.name] = &fields[i] + // For historical reasons, first folded match takes precedence. + if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok { + foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i] + } } - return structFields{fields, nameIndex} + return structFields{fields, exactNameIndex, foldedNameIndex} } // dominantField looks through the fields, all of which are known to -- cgit v1.3