aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRuslan Andreev <kels9009@gmail.com>2021-06-16 16:25:57 +0000
committerCherry Mui <cherryyz@google.com>2021-10-06 13:47:50 +0000
commit810b08b8ec28ea00bce4c008f7c1b48bc9f3e134 (patch)
treee06230d828b77c201f7642acca57fd30a98ed3c0 /src
parentce72766a02a4be127a26e95fbd62c4b4bb906e91 (diff)
downloadgo-810b08b8ec28ea00bce4c008f7c1b48bc9f3e134.tar.xz
cmd/compile: inline memequal(x, const, sz) for small sizes
This CL adds late expanded memequal(x, const, sz) inlining for 2, 4, 8 bytes size. This PoC is using the same method as CL 248404. This optimization fires about 100 times in Go compiler (1675 occurrences reduced to 1574, so -6%). Also, added unit-tests to codegen/comparisions.go file. Updates #37275 Change-Id: Ia52808d573cb706d1da8166c5746ede26f46c5da Reviewed-on: https://go-review.googlesource.com/c/go/+/328291 Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Cherry Mui <cherryyz@google.com> Trust: David Chase <drchase@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/ssa/gen/generic.rules20
-rw-r--r--src/cmd/compile/internal/ssa/rewrite.go5
-rw-r--r--src/cmd/compile/internal/ssa/rewritegeneric.go100
3 files changed, 124 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
index 40db1a6ee8..6dbe9b47d0 100644
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -2011,12 +2011,30 @@
=> (Invalid)
// for late-expanded calls, recognize memequal applied to a single constant byte
-// TODO figure out breakeven number of bytes for this optimization.
+// Support is limited by 1, 2, 4, 8 byte sizes
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
=> (MakeResult (Eq8 (Load <typ.Int8> sptr mem) (Const8 <typ.Int8> [int8(read8(scon,0))])) mem)
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config)
+ => (MakeResult (Eq16 (Load <typ.Int16> sptr mem) (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config)
+ => (MakeResult (Eq32 (Load <typ.Int32> sptr mem) (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+
+(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem)
+ && isSameCall(callAux, "runtime.memequal")
+ && symIsRO(scon)
+ && canLoadUnaligned(config) && config.PtrSize == 8
+ => (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+
// Evaluate constant address comparisons.
(EqPtr x x) => (ConstBool [true])
(NeqPtr x x) => (ConstBool [false])
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 79f9efaebf..2fe0ca64c8 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -415,6 +415,11 @@ func isSameCall(sym interface{}, name string) bool {
return fn != nil && fn.String() == name
}
+// canLoadUnaligned reports if the achitecture supports unaligned load operations
+func canLoadUnaligned(c *Config) bool {
+ return c.ctxt.Arch.Alignment == 1
+}
+
// nlz returns the number of leading zeros.
func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index a6757e0d10..fbf227562a 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -21746,6 +21746,7 @@ func rewriteValuegeneric_OpSqrt(v *Value) bool {
}
func rewriteValuegeneric_OpStaticLECall(v *Value) bool {
b := v.Block
+ config := b.Func.Config
typ := &b.Func.Config.Types
// match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem)
// cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon)
@@ -21780,6 +21781,105 @@ func rewriteValuegeneric_OpStaticLECall(v *Value) bool {
v.AddArg2(v0, mem)
return true
}
+ // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem)
+ // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)
+ // result: (MakeResult (Eq16 (Load <typ.Int16> sptr mem) (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+ for {
+ if len(v.Args) != 4 {
+ break
+ }
+ callAux := auxToCall(v.Aux)
+ mem := v.Args[3]
+ sptr := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAddr {
+ break
+ }
+ scon := auxToSym(v_1.Aux)
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpSB {
+ break
+ }
+ v_2 := v.Args[2]
+ if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 2 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)) {
+ break
+ }
+ v.reset(OpMakeResult)
+ v0 := b.NewValue0(v.Pos, OpEq16, typ.Bool)
+ v1 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
+ v1.AddArg2(sptr, mem)
+ v2 := b.NewValue0(v.Pos, OpConst16, typ.Int16)
+ v2.AuxInt = int16ToAuxInt(int16(read16(scon, 0, config.ctxt.Arch.ByteOrder)))
+ v0.AddArg2(v1, v2)
+ v.AddArg2(v0, mem)
+ return true
+ }
+ // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem)
+ // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)
+ // result: (MakeResult (Eq32 (Load <typ.Int32> sptr mem) (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+ for {
+ if len(v.Args) != 4 {
+ break
+ }
+ callAux := auxToCall(v.Aux)
+ mem := v.Args[3]
+ sptr := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAddr {
+ break
+ }
+ scon := auxToSym(v_1.Aux)
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpSB {
+ break
+ }
+ v_2 := v.Args[2]
+ if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 4 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config)) {
+ break
+ }
+ v.reset(OpMakeResult)
+ v0 := b.NewValue0(v.Pos, OpEq32, typ.Bool)
+ v1 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
+ v1.AddArg2(sptr, mem)
+ v2 := b.NewValue0(v.Pos, OpConst32, typ.Int32)
+ v2.AuxInt = int32ToAuxInt(int32(read32(scon, 0, config.ctxt.Arch.ByteOrder)))
+ v0.AddArg2(v1, v2)
+ v.AddArg2(v0, mem)
+ return true
+ }
+ // match: (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem)
+ // cond: isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8
+ // result: (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
+ for {
+ if len(v.Args) != 4 {
+ break
+ }
+ callAux := auxToCall(v.Aux)
+ mem := v.Args[3]
+ sptr := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAddr {
+ break
+ }
+ scon := auxToSym(v_1.Aux)
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpSB {
+ break
+ }
+ v_2 := v.Args[2]
+ if v_2.Op != OpConst64 || auxIntToInt64(v_2.AuxInt) != 8 || !(isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8) {
+ break
+ }
+ v.reset(OpMakeResult)
+ v0 := b.NewValue0(v.Pos, OpEq64, typ.Bool)
+ v1 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
+ v1.AddArg2(sptr, mem)
+ v2 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
+ v2.AuxInt = int64ToAuxInt(int64(read64(scon, 0, config.ctxt.Arch.ByteOrder)))
+ v0.AddArg2(v1, v2)
+ v.AddArg2(v0, mem)
+ return true
+ }
return false
}
func rewriteValuegeneric_OpStore(v *Value) bool {