[dev.simd] all: merge master (ca37d24) into dev.simd

Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
author: Cherry Mui <cherryyz@google.com> 2025-11-20 14:40:43 -0500
committer: Cherry Mui <cherryyz@google.com> 2025-11-20 14:40:43 -0500
commit: e3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree: 5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/cmd/compile/internal/ssa
parent: 95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parent: ca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
download: go-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz
19 files changed, 964 insertions, 253 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index 1e9eb0146e..e77f55ab5e 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -156,6 +156,7 @@ func init() {
 		gp11sb         = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly}
 		gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
 		gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21sp2        = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly}
 		gp21sb         = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly}
 		gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
 		gp31shift      = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}}
@@ -361,7 +362,7 @@ func init() {
 		{name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 		{name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 
-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
+		{name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
 		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},
 		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true},
 		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true},
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index f54a692725..53bb35d289 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -573,6 +573,8 @@
 (TBNZ [0] (GreaterThanF  cc) yes no) => (FGT cc yes no)
 (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)
 
+(TB(Z|NZ) [0] (XORconst [1] x) yes no) => (TB(NZ|Z) [0] x yes no)
+
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST                x y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst         [c] y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW               x y) yes no)
@@ -1814,3 +1816,7 @@
 
 (Select0 (Mul64uover x y)) => (MUL x y)
 (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH <typ.UInt64> x y) [0]))
+
+// 32 mul 32 -> 64
+(MUL r:(MOVWUreg x) s:(MOVWUreg y)) && r.Uses == 1 && s.Uses == 1 => (UMULL x y)
+(MUL r:(MOVWreg  x) s:(MOVWreg  y)) && r.Uses == 1 && s.Uses == 1 =>  (MULL x y)
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
index 9691296043..2beba0b1c5 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -743,9 +743,6 @@
 
 (MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
 
-(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c]))
-(MULV (NEGV x) (NEGV y)) => (MULV x y)
-
 (ADDV x0 x1:(SLLVconst [c] y)) && x1.Uses == 1 && c > 0 && c <= 4 => (ADDshiftLLV x0 y [c])
 
 // fold constant in ADDshift op
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
index 7e8b8bf497..81d3a3665b 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -388,6 +388,7 @@ func init() {
 			argLength: 2,
 			reg: regInfo{
 				inputs:       []regMask{gp},
+				clobbers:     buildReg("F31"),
 				clobbersArg0: true,
 			},
 			faultOnNilArg0: true,
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 646948f2df..13a8cab3b5 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -689,36 +689,36 @@
 (MOVDnop (MOVDconst [c])) => (MOVDconst [c])
 
 // Avoid unnecessary zero and sign extension when right shifting.
-(SRAI <t> [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW <t> [int64(x)] y)
-(SRLI <t> [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW <t> [int64(x)] y)
+(SRAI [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW [x] y)
+(SRLI [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW [x] y)
 
 // Replace right shifts that exceed size of signed type.
 (SRAI <t> [x] (MOVBreg y)) && x >=  8 => (SRAI  [63] (SLLI <t> [56] y))
 (SRAI <t> [x] (MOVHreg y)) && x >= 16 => (SRAI  [63] (SLLI <t> [48] y))
-(SRAI <t> [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)
+(SRAI     [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)
 
 // Eliminate right shifts that exceed size of unsigned type.
-(SRLI <t> [x] (MOVBUreg y)) && x >=  8 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVHUreg y)) && x >= 16 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVWUreg y)) && x >= 32 => (MOVDconst <t> [0])
+(SRLI [x] (MOVBUreg y)) && x >=  8 => (MOVDconst [0])
+(SRLI [x] (MOVHUreg y)) && x >= 16 => (MOVDconst [0])
+(SRLI [x] (MOVWUreg y)) && x >= 32 => (MOVDconst [0])
 
 // Fold constant into immediate instructions where possible.
 (ADD (MOVDconst <t> [val]) x) && is32Bit(val) && !t.IsPtr() => (ADDI [val] x)
 (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x)
 (OR  (MOVDconst [val]) x) && is32Bit(val) => (ORI  [val] x)
 (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x)
-(ROL  x (MOVDconst [val])) => (RORI  [int64(int8(-val)&63)] x)
-(ROLW x (MOVDconst [val])) => (RORIW [int64(int8(-val)&31)] x)
-(ROR  x (MOVDconst [val])) => (RORI  [int64(val&63)] x)
-(RORW x (MOVDconst [val])) => (RORIW [int64(val&31)] x)
-(SLL  x (MOVDconst [val])) => (SLLI [int64(val&63)] x)
-(SRL  x (MOVDconst [val])) => (SRLI [int64(val&63)] x)
-(SLLW x (MOVDconst [val])) => (SLLIW [int64(val&31)] x)
-(SRLW x (MOVDconst [val])) => (SRLIW [int64(val&31)] x)
-(SRA  x (MOVDconst [val])) => (SRAI [int64(val&63)] x)
-(SRAW x (MOVDconst [val])) => (SRAIW [int64(val&31)] x)
-(SLT  x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTI  [val] x)
-(SLTU x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTIU [val] x)
+(ROL  x (MOVDconst [val])) => (RORI  [-val&63] x)
+(ROLW x (MOVDconst [val])) => (RORIW [-val&31] x)
+(ROR  x (MOVDconst [val])) => (RORI  [val&63] x)
+(RORW x (MOVDconst [val])) => (RORIW [val&31] x)
+(SLL  x (MOVDconst [val])) => (SLLI  [val&63] x)
+(SLLW x (MOVDconst [val])) => (SLLIW [val&31] x)
+(SRL  x (MOVDconst [val])) => (SRLI  [val&63] x)
+(SRLW x (MOVDconst [val])) => (SRLIW [val&31] x)
+(SRA  x (MOVDconst [val])) => (SRAI  [val&63] x)
+(SRAW x (MOVDconst [val])) => (SRAIW [val&31] x)
+(SLT  x (MOVDconst [val])) && is12Bit(val) => (SLTI  [val] x)
+(SLTU x (MOVDconst [val])) && is12Bit(val) => (SLTIU [val] x)
 
 // Replace negated left rotation with right rotation.
 (ROL  x (NEG y)) => (ROR  x y)
@@ -782,7 +782,7 @@
 (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> uint32(x)])
 
 // Combine doubling via addition with shift.
-(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI <t> [c+1] x)
+(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI [c+1] x)
 (SLLI <t> [c] (ADD x x)) && c >= t.Size() * 8 - 1 => (MOVDconst [0])
 
 // SLTI/SLTIU with constants.
@@ -792,7 +792,6 @@
 // SLTI/SLTIU with known outcomes.
 (SLTI  [x] (ANDI [y] _)) && y >= 0 && int64(y) < int64(x) => (MOVDconst [1])
 (SLTIU [x] (ANDI [y] _)) && y >= 0 && uint64(y) < uint64(x) => (MOVDconst [1])
-(SLTI  [x] (ORI  [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0])
 (SLTIU [x] (ORI  [y] _)) && y >= 0 && uint64(y) >= uint64(x) => (MOVDconst [0])
 
 // SLT/SLTU with known outcomes.
diff --git a/src/cmd/compile/internal/ssa/_gen/dec.rules b/src/cmd/compile/internal/ssa/_gen/dec.rules
index 9f6dc36975..fce0026211 100644
--- a/src/cmd/compile/internal/ssa/_gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/_gen/dec.rules
@@ -97,8 +97,10 @@
 // Helpers for expand calls
 // Some of these are copied from generic.rules
 
-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)
 
 (StructSelect [i] x:(StructMake ___)) => x.Args[i]
 
@@ -109,7 +111,7 @@
 // More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr))
 // There, result of the StructSelect is an Array (not a pointer) and
 // the pre-rewrite input to the ArraySelect is a struct, not a pointer.
-(StructSelect [0] x) && x.Type.IsPtrShaped()  => x
+(StructSelect x) && x.Type.IsPtrShaped()  => x
 (ArraySelect [0] x) && x.Type.IsPtrShaped()  => x
 
 // These, too.  Bits is bits.
@@ -119,6 +121,7 @@
 
 (Store _ (StructMake ___) _) => rewriteStructStore(v)
 
+(IMake _typ (ArrayMake1 val)) => (IMake _typ val)
 (ArraySelect (ArrayMake1 x)) => x
 (ArraySelect [0] (IData x)) => (IData x)
 
diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules
index ccdf0bf50d..6a213cd03a 100644
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -195,6 +195,11 @@
 // Convert x * -1 to -x.
 (Mul(8|16|32|64)  (Const(8|16|32|64)  [-1]) x) => (Neg(8|16|32|64)  x)
 
+// Convert -x * c to x * -c
+(Mul(8|16|32|64) (Const(8|16|32|64) <t> [c]) (Neg(8|16|32|64) x)) => (Mul(8|16|32|64) x (Const(8|16|32|64) <t> [-c]))
+
+(Mul(8|16|32|64) (Neg(8|16|32|64) x) (Neg(8|16|32|64) y)) => (Mul(8|16|32|64) x y)
+
 // DeMorgan's Laws
 (And(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (Or(8|16|32|64) <t> x y))
 (Or(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (And(8|16|32|64) <t> x y))
@@ -337,6 +342,12 @@
 (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
 (OrB ((Less|Leq)8U  (Const8  [c]) x) (Leq8U  x (Const8  [d]))) && uint8(c)  >= uint8(d+1)  && uint8(d+1)  > uint8(d)  => ((Less|Leq)8U  (Const8  <x.Type> [c-d-1]) (Sub8  <x.Type> x (Const8  <x.Type> [d+1])))
 
+// single bit difference: ( x != c && x != d ) -> ( x|(c^d) != c )
+(AndB (Neq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Neq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Neq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
+// single bit difference: ( x == c || x == d ) -> ( x|(c^d) == c )
+(OrB (Eq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Eq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Eq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
 // NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) )
 (OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x))
 (OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y))
@@ -933,8 +944,10 @@
   @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
 
 // Putting struct{*byte} and similar into direct interfaces.
-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)
 
 // un-SSAable values use mem->mem copies
 (Store {t} dst (Load src mem) mem) && !CanSSA(t) =>
@@ -2222,4 +2235,4 @@
 (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => x
 (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => (Not x)
 (Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => x
-(Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x)
-\ No newline at end of file
+(Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x)
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index c1726b2797..1a2985d5af 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -426,7 +426,14 @@ func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value,
 		if a.Op == OpIMake {
 			data := a.Args[1]
 			for data.Op == OpStructMake || data.Op == OpArrayMake1 {
-				data = data.Args[0]
+				// A struct make might have a few zero-sized fields.
+				// Use the pointer-y one we know is there.
+				for _, a := range data.Args {
+					if a.Type.Size() > 0 {
+						data = a
+						break
+					}
+				}
 			}
 			return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type))
 		}
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
index 0cee91b532..e95064c1df 100644
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -10,7 +10,9 @@ import (
 )
 
 // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
-func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) }
+func fuseEarly(f *Func) {
+	fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeSingleBitDifference|fuseTypeNanCheck)
+}
 
 // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
 func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
@@ -21,6 +23,7 @@ const (
 	fuseTypePlain fuseType = 1 << iota
 	fuseTypeIf
 	fuseTypeIntInRange
+	fuseTypeSingleBitDifference
 	fuseTypeNanCheck
 	fuseTypeBranchRedirect
 	fuseTypeShortCircuit
@@ -41,6 +44,9 @@ func fuse(f *Func, typ fuseType) {
 			if typ&fuseTypeIntInRange != 0 {
 				changed = fuseIntInRange(b) || changed
 			}
+			if typ&fuseTypeSingleBitDifference != 0 {
+				changed = fuseSingleBitDifference(b) || changed
+			}
 			if typ&fuseTypeNanCheck != 0 {
 				changed = fuseNanCheck(b) || changed
 			}
diff --git a/src/cmd/compile/internal/ssa/fuse_comparisons.go b/src/cmd/compile/internal/ssa/fuse_comparisons.go
index b6eb8fcb90..898c034485 100644
--- a/src/cmd/compile/internal/ssa/fuse_comparisons.go
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@@ -19,6 +19,14 @@ func fuseNanCheck(b *Block) bool {
 	return fuseComparisons(b, canOptNanCheck)
 }
 
+// fuseSingleBitDifference replaces the short-circuit operators between equality checks with
+// constants that only differ by a single bit. For example, it would convert
+// `if x == 4 || x == 6 { ... }` into `if (x == 4) | (x == 6) { ... }`. Rewrite rules can
+// then optimize these using a bitwise operation, in this case generating `if x|2 == 6 { ... }`.
+func fuseSingleBitDifference(b *Block) bool {
+	return fuseComparisons(b, canOptSingleBitDifference)
+}
+
 // fuseComparisons looks for control graphs that match this pattern:
 //
 //	p - predecessor
@@ -229,3 +237,40 @@ func canOptNanCheck(x, y *Value, op Op) bool {
 	}
 	return false
 }
+
+// canOptSingleBitDifference returns true if x op y matches either:
+//
+//	v == c || v == d
+//	v != c && v != d
+//
+// Where c and d are constant values that differ by a single bit.
+func canOptSingleBitDifference(x, y *Value, op Op) bool {
+	if x.Op != y.Op {
+		return false
+	}
+	switch x.Op {
+	case OpEq64, OpEq32, OpEq16, OpEq8:
+		if op != OpOrB {
+			return false
+		}
+	case OpNeq64, OpNeq32, OpNeq16, OpNeq8:
+		if op != OpAndB {
+			return false
+		}
+	default:
+		return false
+	}
+
+	xi := getConstIntArgIndex(x)
+	if xi < 0 {
+		return false
+	}
+	yi := getConstIntArgIndex(y)
+	if yi < 0 {
+		return false
+	}
+	if x.Args[xi^1] != y.Args[yi^1] {
+		return false
+	}
+	return oneBit(x.Args[xi].AuxInt ^ y.Args[yi].AuxInt)
+}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 9c5d79fa56..ea5491362f 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -11481,7 +11481,7 @@ var opcodeTable = [...]opInfo{
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 			outputs: []outputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -68770,6 +68770,7 @@ var opcodeTable = [...]opInfo{
 			inputs: []inputInfo{
 				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
 			},
+			clobbers:     2305843009213693952, // F31
 			clobbersArg0: true,
 		},
 	},
diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go
index 4919d6ad37..d4e7ed14b1 100644
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@@ -466,57 +466,56 @@ func (ft *factsTable) initLimitForNewValue(v *Value) {
 
 // signedMin records the fact that we know v is at least
 // min in the signed domain.
-func (ft *factsTable) signedMin(v *Value, min int64) bool {
-	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMin(v *Value, min int64) {
+	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
 }
 
 // signedMax records the fact that we know v is at most
 // max in the signed domain.
-func (ft *factsTable) signedMax(v *Value, max int64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMax(v *Value, max int64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
 }
-func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
-	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
+	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
 }
 
 // setNonNegative records the fact that v is known to be non-negative.
-func (ft *factsTable) setNonNegative(v *Value) bool {
-	return ft.signedMin(v, 0)
+func (ft *factsTable) setNonNegative(v *Value) {
+	ft.signedMin(v, 0)
 }
 
 // unsignedMin records the fact that we know v is at least
 // min in the unsigned domain.
-func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
+func (ft *factsTable) unsignedMin(v *Value, min uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
 }
 
 // unsignedMax records the fact that we know v is at most
 // max in the unsigned domain.
-func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
+func (ft *factsTable) unsignedMax(v *Value, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
 }
-func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
+func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
 }
 
-func (ft *factsTable) booleanFalse(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) booleanFalse(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) booleanTrue(v *Value) bool {
-	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
+func (ft *factsTable) booleanTrue(v *Value) {
+	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
 }
-func (ft *factsTable) pointerNil(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) pointerNil(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) pointerNonNil(v *Value) bool {
+func (ft *factsTable) pointerNonNil(v *Value) {
 	l := noLimit
 	l.umin = 1
-	return ft.newLimit(v, l)
+	ft.newLimit(v, l)
 }
 
 // newLimit adds new limiting information for v.
-// Returns true if the new limit added any new information.
-func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
+func (ft *factsTable) newLimit(v *Value, newLim limit) {
 	oldLim := ft.limits[v.ID]
 
 	// Merge old and new information.
@@ -531,13 +530,12 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	}
 
 	if lim == oldLim {
-		return false // nothing new to record
+		return // nothing new to record
 	}
 
 	if lim.unsat() {
-		r := !ft.unsat
 		ft.unsat = true
-		return r
+		return
 	}
 
 	// Check for recursion. This normally happens because in unsatisfiable
@@ -548,7 +546,7 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	// the posets will not notice.
 	if ft.recurseCheck[v.ID] {
 		// This should only happen for unsatisfiable cases. TODO: check
-		return false
+		return
 	}
 	ft.recurseCheck[v.ID] = true
 	defer func() {
@@ -713,8 +711,6 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 			}
 		}
 	}
-
-	return true
 }
 
 func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
@@ -1825,7 +1821,7 @@ func initLimit(v *Value) limit {
 	return lim
 }
 
-// flowLimit updates the known limits of v in ft. Returns true if anything changed.
+// flowLimit updates the known limits of v in ft.
 // flowLimit can use the ranges of input arguments.
 //
 // Note: this calculation only happens at the point the value is defined. We do not reevaluate
@@ -1838,10 +1834,10 @@ func initLimit(v *Value) limit {
 // block. We could recompute the range of v once we enter the block so
 // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
 // that right now.
-func (ft *factsTable) flowLimit(v *Value) bool {
+func (ft *factsTable) flowLimit(v *Value) {
 	if !v.Type.IsInteger() {
 		// TODO: boolean?
-		return false
+		return
 	}
 
 	// Additional limits based on opcode and argument.
@@ -1851,36 +1847,36 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 	// extensions
 	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v, a.umin, a.umax)
+		ft.unsignedMinMax(v, a.umin, a.umax)
 	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.signedMinMax(v, a.min, a.max)
+		ft.signedMinMax(v, a.min, a.max)
 	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 		a := ft.limits[v.Args[0].ID]
 		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
-			return ft.unsignedMinMax(v, a.umin, a.umax)
+			ft.unsignedMinMax(v, a.umin, a.umax)
 		}
 
 	// math/bits
 	case OpCtz64:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
+			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
 		}
 	case OpCtz32:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
 		}
 	case OpCtz16:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
 		}
 	case OpCtz8:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
 		}
 
 	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
@@ -1889,26 +1885,26 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
 		fixedBits := a.umax & sharedLeadingMask
 		min := uint64(bits.OnesCount64(fixedBits))
-		return ft.unsignedMinMax(v, min, min+changingBitsCount)
+		ft.unsignedMinMax(v, min, min+changingBitsCount)
 
 	case OpBitLen64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len64(a.umin)),
 			uint64(bits.Len64(a.umax)))
 	case OpBitLen32:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len32(uint32(a.umin))),
 			uint64(bits.Len32(uint32(a.umax))))
 	case OpBitLen16:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len16(uint16(a.umin))),
 			uint64(bits.Len16(uint16(a.umax))))
 	case OpBitLen8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len8(uint8(a.umin))),
 			uint64(bits.Len8(uint8(a.umax))))
 
@@ -1921,43 +1917,43 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		// AND can only make the value smaller.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, min(a.umax, b.umax))
+		ft.unsignedMax(v, min(a.umax, b.umax))
 	case OpOr64, OpOr32, OpOr16, OpOr8:
 		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			max(a.umin, b.umin),
 			1<<bits.Len64(a.umax|b.umax)-1)
 	case OpXor64, OpXor32, OpXor16, OpXor8:
 		// XOR can't flip bits that are proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
+		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
 	case OpCom64, OpCom32, OpCom16, OpCom8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
+		ft.newLimit(v, a.com(uint(v.Type.Size())*8))
 
 	// Arithmetic.
 	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
 	case OpSub64, OpSub32, OpSub16, OpSub8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
-		mod := ft.detectMod(v)
-		inferred := ft.detectSliceLenRelation(v)
-		return sub || mod || inferred
+		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
+		ft.detectMod(v)
+		ft.detectSliceLenRelation(v)
+		ft.detectSubRelations(v)
 	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
 		a := ft.limits[v.Args[0].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
+		ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
 	case OpMul64, OpMul32, OpMul16, OpMul8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
 	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
 		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
 		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
@@ -1965,7 +1961,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
+		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
 	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
 		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
 		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
@@ -1979,7 +1975,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			// Easier to compute min and max of both than to write sign logic.
 			vmin := min(a.min>>b.min, a.min>>b.max)
 			vmax := max(a.max>>b.min, a.max>>b.max)
-			return ft.signedMinMax(v, vmin, vmax)
+			ft.signedMinMax(v, vmin, vmax)
 		}
 	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
 		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
@@ -1988,7 +1984,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		if b.min >= 0 {
-			return ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
+			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
 		}
 	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
 		a := ft.limits[v.Args[0].ID]
@@ -2008,11 +2004,11 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		if b.umin > 0 {
 			lim = lim.unsignedMax(a.umax / b.umin)
 		}
-		return ft.newLimit(v, lim)
+		ft.newLimit(v, lim)
 	case OpMod64, OpMod32, OpMod16, OpMod8:
-		return ft.modLimit(true, v, v.Args[0], v.Args[1])
+		ft.modLimit(true, v, v.Args[0], v.Args[1])
 	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
-		return ft.modLimit(false, v, v.Args[0], v.Args[1])
+		ft.modLimit(false, v, v.Args[0], v.Args[1])
 
 	case OpPhi:
 		// Compute the union of all the input phis.
@@ -2032,9 +2028,8 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			l.umin = min(l.umin, l2.umin)
 			l.umax = max(l.umax, l2.umax)
 		}
-		return ft.newLimit(v, l)
+		ft.newLimit(v, l)
 	}
-	return false
 }
 
 // detectSliceLenRelation matches the pattern where
@@ -2047,13 +2042,13 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 //
 // Note that "index" is not useed for indexing in this pattern, but
 // in the motivating example (chunked slice iteration) it is.
-func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
+func (ft *factsTable) detectSliceLenRelation(v *Value) {
 	if v.Op != OpSub64 {
-		return false
+		return
 	}
 
 	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpSliceCap) {
-		return false
+		return
 	}
 
 	slice := v.Args[0].Args[0]
@@ -2093,13 +2088,54 @@ func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
 		if K < 0 { // We hate thinking about overflow
 			continue
 		}
-		inferred = inferred || ft.signedMin(v, K)
+		ft.signedMin(v, K)
+	}
+}
+
+// v must be Sub{64,32,16,8}.
+func (ft *factsTable) detectSubRelations(v *Value) {
+	// v = x-y
+	x := v.Args[0]
+	y := v.Args[1]
+	if x == y {
+		ft.signedMinMax(v, 0, 0)
+		return
+	}
+	xLim := ft.limits[x.ID]
+	yLim := ft.limits[y.ID]
+
+	// Check if we might wrap around. If so, give up.
+	width := uint(v.Type.Size()) * 8
+	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
+		return // x-y might underflow
+	}
+	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
+		return // x-y might overflow
+	}
+
+	// Subtracting a positive number only makes
+	// things smaller.
+	if yLim.min >= 0 {
+		ft.update(v.Block, v, x, signed, lt|eq)
+		// TODO: is this worth it?
+		//if yLim.min > 0 {
+		//	ft.update(v.Block, v, x, signed, lt)
+		//}
+	}
+
+	// Subtracting a number from a bigger one
+	// can't go below 0.
+	if ft.orderS.OrderedOrEqual(y, x) {
+		ft.setNonNegative(v)
+		// TODO: is this worth it?
+		//if ft.orderS.Ordered(y, x) {
+		//	ft.signedMin(v, 1)
+		//}
 	}
-	return inferred
 }
 
 // x%d has been rewritten to x - (x/d)*d.
-func (ft *factsTable) detectMod(v *Value) bool {
+func (ft *factsTable) detectMod(v *Value) {
 	var opDiv, opDivU, opMul, opConst Op
 	switch v.Op {
 	case OpSub64:
@@ -2126,36 +2162,37 @@ func (ft *factsTable) detectMod(v *Value) bool {
 
 	mul := v.Args[1]
 	if mul.Op != opMul {
-		return false
+		return
 	}
 	div, con := mul.Args[0], mul.Args[1]
 	if div.Op == opConst {
 		div, con = con, div
 	}
 	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
-		return false
+		return
 	}
-	return ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
+	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
 }
 
 // modLimit sets v with facts derived from v = p % q.
-func (ft *factsTable) modLimit(signed bool, v, p, q *Value) bool {
+func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
 	a := ft.limits[p.ID]
 	b := ft.limits[q.ID]
 	if signed {
 		if a.min < 0 && b.min > 0 {
-			return ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			return
 		}
 		if !(a.nonnegative() && b.nonnegative()) {
 			// TODO: we could handle signed limits but I didn't bother.
-			return false
+			return
 		}
 		if a.min >= 0 && b.min > 0 {
 			ft.setNonNegative(v)
 		}
 	}
 	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
-	return ft.unsignedMax(v, min(a.umax, b.umax-1))
+	ft.unsignedMax(v, min(a.umax, b.umax-1))
 }
 
 // getBranch returns the range restrictions added by p
@@ -2466,15 +2503,13 @@ func addLocalFacts(ft *factsTable, b *Block) {
 				xl := ft.limits[x.ID]
 				y := add.Args[1]
 				yl := ft.limits[y.ID]
-				if unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
-					continue
-				}
-
-				if xl.umax < uminDivisor {
-					ft.update(b, v, y, unsigned, lt|eq)
-				}
-				if yl.umax < uminDivisor {
-					ft.update(b, v, x, unsigned, lt|eq)
+				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
+					if xl.umax < uminDivisor {
+						ft.update(b, v, y, unsigned, lt|eq)
+					}
+					if yl.umax < uminDivisor {
+						ft.update(b, v, x, unsigned, lt|eq)
+					}
 				}
 			}
 			ft.update(b, v, v.Args[0], unsigned, lt|eq)
@@ -2993,16 +3028,14 @@ func (ft *factsTable) topoSortValuesInBlock(b *Block) {
 	want := f.NumValues()
 
 	scores := ft.reusedTopoSortScoresTable
-	if len(scores) < want {
-		if want <= cap(scores) {
-			scores = scores[:want]
-		} else {
-			if cap(scores) > 0 {
-				f.Cache.freeUintSlice(scores)
-			}
-			scores = f.Cache.allocUintSlice(want)
-			ft.reusedTopoSortScoresTable = scores
+	if want <= cap(scores) {
+		scores = scores[:want]
+	} else {
+		if cap(scores) > 0 {
+			f.Cache.freeUintSlice(scores)
 		}
+		scores = f.Cache.allocUintSlice(want)
+		ft.reusedTopoSortScoresTable = scores
 	}
 
 	for _, v := range b.Values {
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index 4d022555b7..11dd53bfc7 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -596,17 +596,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
 	var c *Value
 	if vi.regs != 0 {
 		// Copy from a register that v is already in.
-		r2 := pickReg(vi.regs)
 		var current *Value
-		if !s.allocatable.contains(r2) {
-			current = v // v is in a fixed register
+		if vi.regs&^s.allocatable != 0 {
+			// v is in a fixed register, prefer that
+			current = v
 		} else {
+			r2 := pickReg(vi.regs)
 			if s.regs[r2].v != v {
 				panic("bad register state")
 			}
 			current = s.regs[r2].c
+			s.usedSinceBlockStart |= regMask(1) << r2
 		}
-		s.usedSinceBlockStart |= regMask(1) << r2
 		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
 	} else if v.rematerializeable() {
 		// Rematerialize instead of loading from the spill location.
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 07308973b1..af2568ae89 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -2772,3 +2772,17 @@ func panicBoundsCCToAux(p PanicBoundsCC) Aux {
 func isDictArgSym(sym Sym) bool {
 	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
 }
+
+// When v is (IMake typ (StructMake ...)), convert to
+// (IMake typ arg) where arg is the pointer-y argument to
+// the StructMake (there must be exactly one).
+func imakeOfStructMake(v *Value) *Value {
+	var arg *Value
+	for _, a := range v.Args[1].Args {
+		if a.Type.Size() > 0 {
+			arg = a
+			break
+		}
+	}
+	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 6af1558833..b3f790dbda 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -12556,6 +12556,54 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
 		}
 		break
 	}
+	// match: (MUL r:(MOVWUreg x) s:(MOVWUreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (UMULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWUreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWUreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64UMULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (MUL r:(MOVWreg x) s:(MOVWreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (MULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64MULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MULW(v *Value) bool {
@@ -25273,6 +25321,37 @@ func rewriteBlockARM64(b *Block) bool {
 			b.resetWithControl(BlockARM64FGE, cc)
 			return true
 		}
+		// match: (TBNZ [0] (XORconst [1] x) yes no)
+		// result: (TBZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
+	case BlockARM64TBZ:
+		// match: (TBZ [0] (XORconst [1] x) yes no)
+		// result: (TBNZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBNZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
 	case BlockARM64UGE:
 		// match: (UGE (FlagConstant [fc]) yes no)
 		// cond: fc.uge()
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
index 4262d4e0fb..bf2dd114a9 100644
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@@ -5866,7 +5866,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	config := b.Func.Config
-	typ := &b.Func.Config.Types
 	// match: (MULV _ (MOVVconst [0]))
 	// result: (MOVVconst [0])
 	for {
@@ -5911,44 +5910,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 		}
 		break
 	}
-	// match: (MULV (NEGV x) (MOVVconst [c]))
-	// result: (MULV x (MOVVconst [-c]))
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64MOVVconst {
-				continue
-			}
-			c := auxIntToInt64(v_1.AuxInt)
-			v.reset(OpLOONG64MULV)
-			v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-			v0.AuxInt = int64ToAuxInt(-c)
-			v.AddArg2(x, v0)
-			return true
-		}
-		break
-	}
-	// match: (MULV (NEGV x) (NEGV y))
-	// result: (MULV x y)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64NEGV {
-				continue
-			}
-			y := v_1.Args[0]
-			v.reset(OpLOONG64MULV)
-			v.AddArg2(x, y)
-			return true
-		}
-		break
-	}
 	// match: (MULV (MOVVconst [c]) (MOVVconst [d]))
 	// result: (MOVVconst [c*d])
 	for {
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index 191c7b3d48..284d88967b 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -7027,7 +7027,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROL x (MOVDconst [val]))
-	// result: (RORI [int64(int8(-val)&63)] x)
+	// result: (RORI [-val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7035,7 +7035,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 63))
+		v.AuxInt = int64ToAuxInt(-val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7057,7 +7057,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROLW x (MOVDconst [val]))
-	// result: (RORIW [int64(int8(-val)&31)] x)
+	// result: (RORIW [-val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7065,7 +7065,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 31))
+		v.AuxInt = int64ToAuxInt(-val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7087,7 +7087,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROR x (MOVDconst [val]))
-	// result: (RORI [int64(val&63)] x)
+	// result: (RORI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7095,7 +7095,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7105,7 +7105,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (RORW x (MOVDconst [val]))
-	// result: (RORIW [int64(val&31)] x)
+	// result: (RORIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7113,7 +7113,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7212,7 +7212,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLL x (MOVDconst [val]))
-	// result: (SLLI [int64(val&63)] x)
+	// result: (SLLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7220,7 +7220,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7246,7 +7246,7 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 	}
 	// match: (SLLI <t> [c] (ADD x x))
 	// cond: c < t.Size() * 8 - 1
-	// result: (SLLI <t> [c+1] x)
+	// result: (SLLI [c+1] x)
 	for {
 		t := v.Type
 		c := auxIntToInt64(v.AuxInt)
@@ -7258,7 +7258,6 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SLLI)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(c + 1)
 		v.AddArg(x)
 		return true
@@ -7286,7 +7285,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLLW x (MOVDconst [val]))
-	// result: (SLLIW [int64(val&31)] x)
+	// result: (SLLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7294,7 +7293,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7304,7 +7303,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLT x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTI [val] x)
 	for {
 		x := v_0
@@ -7312,7 +7311,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTI)
@@ -7363,22 +7362,6 @@ func rewriteValueRISCV64_OpRISCV64SLTI(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(1)
 		return true
 	}
-	// match: (SLTI [x] (ORI [y] _))
-	// cond: y >= 0 && int64(y) >= int64(x)
-	// result: (MOVDconst [0])
-	for {
-		x := auxIntToInt64(v.AuxInt)
-		if v_0.Op != OpRISCV64ORI {
-			break
-		}
-		y := auxIntToInt64(v_0.AuxInt)
-		if !(y >= 0 && int64(y) >= int64(x)) {
-			break
-		}
-		v.reset(OpRISCV64MOVDconst)
-		v.AuxInt = int64ToAuxInt(0)
-		return true
-	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64SLTIU(v *Value) bool {
@@ -7433,7 +7416,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLTU x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTIU [val] x)
 	for {
 		x := v_0
@@ -7441,7 +7424,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTIU)
@@ -7555,7 +7538,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRA x (MOVDconst [val]))
-	// result: (SRAI [int64(val&63)] x)
+	// result: (SRAI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7563,7 +7546,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7572,11 +7555,10 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRAIW <t> [int64(x)] y)
+	// result: (SRAIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWreg {
 			break
@@ -7586,8 +7568,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRAIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
@@ -7633,7 +7614,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 		v.AddArg(v0)
 		return true
 	}
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 32
 	// result: (SRAIW [31] y)
 	for {
@@ -7668,7 +7649,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRAW x (MOVDconst [val]))
-	// result: (SRAIW [int64(val&31)] x)
+	// result: (SRAIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7676,7 +7657,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7686,7 +7667,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRL x (MOVDconst [val]))
-	// result: (SRLI [int64(val&63)] x)
+	// result: (SRLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7694,7 +7675,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7702,11 +7683,10 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 }
 func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRLIW <t> [int64(x)] y)
+	// result: (SRLIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@@ -7716,16 +7696,14 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRLIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVBUreg y))
+	// match: (SRLI [x] (MOVBUreg y))
 	// cond: x >= 8
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVBUreg {
 			break
@@ -7734,15 +7712,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVHUreg y))
+	// match: (SRLI [x] (MOVHUreg y))
 	// cond: x >= 16
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVHUreg {
 			break
@@ -7751,15 +7727,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 32
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@@ -7768,7 +7742,6 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
@@ -7790,7 +7763,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRLW x (MOVDconst [val]))
-	// result: (SRLIW [int64(val&31)] x)
+	// result: (SRLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7798,7 +7771,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
diff --git a/src/cmd/compile/internal/ssa/rewritedec.go b/src/cmd/compile/internal/ssa/rewritedec.go
index 16d0269210..c45034ead0 100644
--- a/src/cmd/compile/internal/ssa/rewritedec.go
+++ b/src/cmd/compile/internal/ssa/rewritedec.go
@@ -279,11 +279,20 @@ func rewriteValuedec_OpIData(v *Value) bool {
 func rewriteValuedec_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
+	for {
+		if v_1.Op != OpStructMake {
+			break
+		}
+		v.copyOf(imakeOfStructMake(v))
+		return true
+	}
+	// match: (IMake _typ (ArrayMake1 val))
 	// result: (IMake _typ val)
 	for {
 		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpArrayMake1 {
 			break
 		}
 		val := v_1.Args[0]
@@ -839,17 +848,47 @@ func rewriteValuedec_OpStructMake(v *Value) bool {
 func rewriteValuedec_OpStructSelect(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	// match: (StructSelect [i] x:(StructMake ___))
 	// result: x.Args[i]
 	for {
@@ -861,13 +900,10 @@ func rewriteValuedec_OpStructSelect(v *Value) bool {
 		v.copyOf(x.Args[i])
 		return true
 	}
-	// match: (StructSelect [0] x)
+	// match: (StructSelect x)
 	// cond: x.Type.IsPtrShaped()
 	// result: x
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 {
-			break
-		}
 		x := v_0
 		if !(x.Type.IsPtrShaped()) {
 			break
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 5b5494f43a..5c183fc2a6 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -5332,6 +5332,182 @@ func rewriteValuegeneric_OpAndB(v *Value) bool {
 		}
 		break
 	}
+	// match: (AndB (Neq64 x cv:(Const64 [c])) (Neq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpNeq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq32 x cv:(Const32 [c])) (Neq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpNeq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq16 x cv:(Const16 [c])) (Neq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpNeq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq8 x cv:(Const8 [c])) (Neq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpNeq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	return false
 }
 func rewriteValuegeneric_OpArraySelect(v *Value) bool {
@@ -8809,16 +8985,13 @@ func rewriteValuegeneric_OpFloor(v *Value) bool {
 func rewriteValuegeneric_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
-	// result: (IMake _typ val)
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
 	for {
-		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpStructMake {
 			break
 		}
-		val := v_1.Args[0]
-		v.reset(OpIMake)
-		v.AddArg2(_typ, val)
+		v.copyOf(imakeOfStructMake(v))
 		return true
 	}
 	// match: (IMake _typ (ArrayMake1 val))
@@ -16610,6 +16783,45 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul16 (Const16 <t> [c]) (Neg16 x))
+	// result: (Mul16 x (Const16 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst16 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt16(v_0.AuxInt)
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul16)
+			v0 := b.NewValue0(v.Pos, OpConst16, t)
+			v0.AuxInt = int16ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul16 (Neg16 x) (Neg16 y))
+	// result: (Mul16 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul16)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul16 (Const16 <t> [c]) (Add16 <t> (Const16 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add16 (Const16 <t> [c*d]) (Mul16 <t> (Const16 <t> [c]) x))
@@ -16821,6 +17033,45 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul32 (Const32 <t> [c]) (Neg32 x))
+	// result: (Mul32 x (Const32 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst32 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt32(v_0.AuxInt)
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul32)
+			v0 := b.NewValue0(v.Pos, OpConst32, t)
+			v0.AuxInt = int32ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul32 (Neg32 x) (Neg32 y))
+	// result: (Mul32 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul32)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x))
@@ -17193,6 +17444,45 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul64 (Const64 <t> [c]) (Neg64 x))
+	// result: (Mul64 x (Const64 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst64 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt64(v_0.AuxInt)
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul64)
+			v0 := b.NewValue0(v.Pos, OpConst64, t)
+			v0.AuxInt = int64ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul64 (Neg64 x) (Neg64 y))
+	// result: (Mul64 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul64)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
@@ -17565,6 +17855,45 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul8 (Const8 <t> [c]) (Neg8 x))
+	// result: (Mul8 x (Const8 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst8 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt8(v_0.AuxInt)
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul8)
+			v0 := b.NewValue0(v.Pos, OpConst8, t)
+			v0.AuxInt = int8ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul8 (Neg8 x) (Neg8 y))
+	// result: (Mul8 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul8)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul8 (Const8 <t> [c]) (Add8 <t> (Const8 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add8 (Const8 <t> [c*d]) (Mul8 <t> (Const8 <t> [c]) x))
@@ -23242,6 +23571,182 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
 		}
 		break
 	}
+	// match: (OrB (Eq64 x cv:(Const64 [c])) (Eq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpEq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq32 x cv:(Const32 [c])) (Eq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpEq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq16 x cv:(Const16 [c])) (Eq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpEq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq8 x cv:(Const8 [c])) (Eq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpEq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	// match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c])))
 	// result: (Not (Leq64F y x))
 	for {
@@ -31601,17 +32106,47 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool {
 		v0.AddArg2(v1, mem)
 		return true
 	}
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	return false
 }
 func rewriteValuegeneric_OpSub16(v *Value) bool {
author	Cherry Mui <cherryyz@google.com>	2025-11-20 14:40:43 -0500
committer	Cherry Mui <cherryyz@google.com>	2025-11-20 14:40:43 -0500
commit	e3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree	5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/cmd/compile/internal/ssa
parent	95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parent	ca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
download	go-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz