aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/_gen/generic.rules
diff options
context:
space:
mode:
authorYoulin Feng <fengyoulin@live.com>2025-09-04 09:08:14 +0800
committerGopher Robot <gobot@golang.org>2025-09-09 12:10:07 -0700
commita5fa5ea51cd8fd9bcb8230d2accf9d55826f76b3 (patch)
treef1966aae45ec96a4c378aef6b1ebfbe357a6ffab /src/cmd/compile/internal/ssa/_gen/generic.rules
parent4c63d798cb947a3cdd5a5b68f254a73d83eb288f (diff)
downloadgo-a5fa5ea51cd8fd9bcb8230d2accf9d55826f76b3.tar.xz
cmd/compile/internal/ssa: expand runtime.memequal for length {3,5,6,7}
This CL slightly speeds up strings.HasPrefix when testing constant prefixes of length {3,5,6,7}. goos: linux goarch: amd64 cpu: Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz │ old │ new │ │ sec/op │ sec/op vs base │ StringPrefix3-8 11.125n ± 2% 8.539n ± 1% -23.25% (p=0.000 n=20) StringPrefix5-8 11.170n ± 2% 8.700n ± 1% -22.11% (p=0.000 n=20) StringPrefix6-8 11.190n ± 2% 8.655n ± 1% -22.65% (p=0.000 n=20) StringPrefix7-8 11.095n ± 1% 8.878n ± 1% -19.98% (p=0.000 n=20) Change-Id: I510a80d59cf78680b57d68780d35d212d24030e2 Reviewed-on: https://go-review.googlesource.com/c/go/+/700816 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Freeman <markfreeman@google.com> Auto-Submit: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/_gen/generic.rules')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/generic.rules114
1 files changed, 113 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules
index c0806c2243..58872ca85a 100644
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -2084,7 +2084,7 @@
(NilCheck ptr:(NilCheck _ _) _ ) => ptr
// for late-expanded calls, recognize memequal applied to a single constant byte
-// Support is limited by 1, 2, 4, 8 byte sizes
+// Support is limited by [1-8] byte sizes
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
@@ -2131,6 +2131,118 @@
&& canLoadUnaligned(config) && config.PtrSize == 8
=> (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [3]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) =>
+ (MakeResult
+ (Eq32
+ (Or32 <typ.Int32>
+ (ZeroExt16to32 <typ.Int32> (Load <typ.Int16> sptr mem))
+ (Lsh32x32 <typ.Int32>
+ (ZeroExt8to32 <typ.Int32> (Load <typ.Int8> (OffPtr <typ.BytePtr> [2] sptr) mem))
+ (Const32 <typ.Int32> [16])))
+ (Const32 <typ.Int32> [int32(uint32(read16(scon,0,config.ctxt.Arch.ByteOrder))|(uint32(read8(scon,2))<<16))]))
+ mem)
+
+(StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [3]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) =>
+ (MakeResult
+ (Eq32
+ (Or32 <typ.Int32>
+ (ZeroExt16to32 <typ.Int32> (Load <typ.Int16> sptr mem))
+ (Lsh32x32 <typ.Int32>
+ (ZeroExt8to32 <typ.Int32> (Load <typ.Int8> (OffPtr <typ.BytePtr> [2] sptr) mem))
+ (Const32 <typ.Int32> [16])))
+ (Const32 <typ.Int32> [int32(uint32(read16(scon,0,config.ctxt.Arch.ByteOrder))|(uint32(read8(scon,2))<<16))]))
+ mem)
+
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [5]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt8to64 <typ.Int64> (Load <typ.Int8> (OffPtr <typ.BytePtr> [4] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read8(scon,4))<<32))]))
+ mem)
+
+(StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [5]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt8to64 <typ.Int64> (Load <typ.Int8> (OffPtr <typ.BytePtr> [4] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read8(scon,4))<<32))]))
+ mem)
+
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [6]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt16to64 <typ.Int64> (Load <typ.Int16> (OffPtr <typ.BytePtr> [4] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read16(scon,4,config.ctxt.Arch.ByteOrder))<<32))]))
+ mem)
+
+(StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [6]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt16to64 <typ.Int64> (Load <typ.Int16> (OffPtr <typ.BytePtr> [4] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read16(scon,4,config.ctxt.Arch.ByteOrder))<<32))]))
+ mem)
+
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [7]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> (OffPtr <typ.BytePtr> [3] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read32(scon,3,config.ctxt.Arch.ByteOrder))<<32))]))
+ mem)
+
+(StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [7]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8 =>
+ (MakeResult
+ (Eq64
+ (Or64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> sptr mem))
+ (Lsh64x64 <typ.Int64>
+ (ZeroExt32to64 <typ.Int64> (Load <typ.Int32> (OffPtr <typ.BytePtr> [3] sptr) mem))
+ (Const64 <typ.Int64> [32])))
+ (Const64 <typ.Int64> [int64(uint64(read32(scon,0,config.ctxt.Arch.ByteOrder))|(uint64(read32(scon,3,config.ctxt.Arch.ByteOrder))<<32))]))
+ mem)
+
(StaticLECall {callAux} _ _ (Const64 [0]) mem)
&& isSameCall(callAux, "runtime.memequal")
=> (MakeResult (ConstBool <typ.Bool> [true]) mem)