aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2025-11-04 15:15:16 -0800
committerGopher Robot <gobot@golang.org>2025-11-10 09:34:13 -0800
commit3b3d6b9e5d9898810ee13e739f3ad759ab104fdb (patch)
tree167486b89c8c153d560836065874f4d54829169d /src/cmd/internal/obj
parent5f4b5f1a196774e45bc50de0729973119eb7bf07 (diff)
downloadgo-3b3d6b9e5d9898810ee13e739f3ad759ab104fdb.tar.xz
cmd/internal/obj/arm64: shorten constant integer loads
Large integer constants can take up to 4 instructions to encode. We can encode some large constants with a single instruction, namely those which are bit patterns (repetitions of certain runs of 0s and 1s). Often the constants we want to encode are *close* to those bit patterns, but don't exactly match. For those, we can use 2 instructions, one to load the close-by bit pattern and one to fix up any mismatches. The constants we use to strength reduce divides often fit this pattern. For unsigned divides by 1 through 15, this CL applies to the constant for N=3,5,6,10,12,15. Triggers 17 times in hello world. Change-Id: I623abf32961fb3e74d0a163f6822f0647cd94499 Reviewed-on: https://go-review.googlesource.com/c/go/+/717900 Auto-Submit: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go39
-rw-r--r--src/cmd/internal/obj/arm64/asm_arm64_test.go13
-rw-r--r--src/cmd/internal/obj/arm64/asm_arm64_test.s8
3 files changed, 59 insertions, 1 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 7e7f028bfb..ccf8eda495 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -38,6 +38,7 @@ import (
"fmt"
"log"
"math"
+ "math/bits"
"slices"
"strings"
)
@@ -1976,7 +1977,18 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
return C_MOVCON
} else if zeroCount == 2 || negCount == 2 {
return C_MOVCON2
- } else if zeroCount == 1 || negCount == 1 {
+ }
+ // See omovlconst for description of this loop.
+ for i := 0; i < 4; i++ {
+ mask := uint64(0xffff) << (i * 16)
+ for period := 2; period <= 32; period *= 2 {
+ x := uint64(a.Offset)&^mask | bits.RotateLeft64(uint64(a.Offset), max(period, 16))&mask
+ if isbitcon(x) {
+ return C_MOVCON2
+ }
+ }
+ }
+ if zeroCount == 1 || negCount == 1 {
return C_MOVCON3
} else {
return C_VCON
@@ -7555,6 +7567,31 @@ func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uin
}
}
return 2
+ }
+
+ // Look for a two instruction pair, a bit pattern encodeable
+ // as a bitcon immediate plus a fixup MOVK instruction.
+ // Constants like this often occur from strength reduction of divides.
+ for i = 0; i < 4; i++ {
+ mask := uint64(0xffff) << (i * 16)
+ for period := 2; period <= 32; period *= 2 { // TODO: handle period==64 somehow?
+ // Copy in bits from outside of the masked region
+ x := uint64(d)&^mask | bits.RotateLeft64(uint64(d), max(period, 16))&mask
+ if isbitcon(x) {
+ // ORR $c1, ZR, rt
+ os[0] = c.opirr(p, AORR)
+ os[0] |= bitconEncode(x, 64) | uint32(REGZERO&31)<<5 | uint32(rt&31)
+ // MOVK $c2<<(i*16), rt
+ os[1] = c.opirr(p, AMOVK)
+ os[1] |= MOVCONST(d, i, rt)
+ return 2
+ }
+ }
+ }
+ // TODO: other fixups, like ADD or SUB?
+ // TODO: 3-instruction variant, instead of the full MOVD+3*MOVK version below?
+
+ switch {
case zeroCount == 1:
// one MOVZ and two MOVKs
diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go
index 83d137a084..b83db60b40 100644
--- a/src/cmd/internal/obj/arm64/asm_arm64_test.go
+++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go
@@ -38,3 +38,16 @@ func TestMOVK(t *testing.T) {
t.Errorf("Got %x want %x\n", x, want)
}
}
+
+func testCombined() (a uint64, b uint64)
+func TestCombined(t *testing.T) {
+ got1, got2 := testCombined()
+ want1 := uint64(0xaaaaaaaaaaaaaaab)
+ want2 := uint64(0x0ff019940ff00ff0)
+ if got1 != want1 {
+ t.Errorf("First result, got %x want %x", got1, want1)
+ }
+ if got2 != want2 {
+ t.Errorf("First result, got %x want %x", got2, want2)
+ }
+}
diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s
index e3fda57775..65d80d1380 100644
--- a/src/cmd/internal/obj/arm64/asm_arm64_test.s
+++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s
@@ -37,3 +37,11 @@ TEXT ·testmovk(SB), NOSPLIT, $0-8
MOVK $(40000<<48), R0
MOVD R0, ret+0(FP)
RET
+
+// testCombined() (uint64, uint64)
+TEXT ·testCombined(SB), NOSPLIT, $0-16
+ MOVD $0xaaaaaaaaaaaaaaab, R0
+ MOVD $0x0ff019940ff00ff0, R1
+ MOVD R0, a+0(FP)
+ MOVD R1, b+8(FP)
+ RET