aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2023-05-03 21:56:18 +1000
committerJoel Sing <joel@sing.id.au>2023-07-31 16:38:47 +0000
commit208fc13245add90c33cf48571e1419caf80a637c (patch)
tree9190814b6d6382bf6c632ab26d99386b31351f2d /src/cmd/internal/obj
parent3313b39bae9ee6b830fecaaaaa004de646f82e50 (diff)
downloadgo-208fc13245add90c33cf48571e1419caf80a637c.tar.xz
cmd/internal/obj/arm64: avoid unnecessary literal pool usage for moves
In a number of load and store cases, the use of the literal pool can be entirely avoided by simply adding or subtracting the offset from the register. This uses the same number of instructions, while avoiding a load from memory, along with the need for the value to be in the literal pool. Overall this reduces the size of binaries slightly and should have lower overhead. Updates #59615 Change-Id: I9cb6a403dc71e34a46af913f5db87dbf52f8688c Reviewed-on: https://go-review.googlesource.com/c/go/+/512539 Reviewed-by: David Chase <drchase@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Joel Sing <joel@sing.id.au>
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go46
1 files changed, 42 insertions, 4 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 72e6aadce4..05cf62773e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -1959,6 +1959,10 @@ func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
}
needsPool := true
+ if v >= -4095 && v <= 4095 {
+ needsPool = false
+ }
+
switch p.As {
case AMOVB, AMOVBU:
if cmp(C_UAUTO4K, lsc) || cmp(C_UOREG4K, lsc) {
@@ -4015,10 +4019,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o1 |= uint32(p.From.Reg&31)<<5 | uint32(p.To.Reg&31)
case 30: /* movT R,L(R) -> strT */
- // if offset L can be split into hi+lo, and both fit into instructions, do
+ // If offset L fits in a 12 bit unsigned immediate:
+ // add $L, R, Rtmp or sub $L, R, Rtmp
+ // str R, (Rtmp)
+ // Otherwise, if offset L can be split into hi+lo, and both fit into instructions:
// add $hi, R, Rtmp
// str R, lo(Rtmp)
- // otherwise, use constant pool
+ // Otherwise, use constant pool:
// mov $L, Rtmp (from constant pool)
// str R, (R+Rtmp)
s := movesize(o.as)
@@ -4032,6 +4039,20 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
v := c.regoff(&p.To)
+ if v >= -256 && v <= 256 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be 9 bit signed immediate store)", p, v)
+ }
+ if v >= 0 && v <= 4095 && v&((1<<int32(s))-1) == 0 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be 12 bit unsigned immediate store)", p, v)
+ }
+
+ // Handle smaller unaligned and negative offsets via addition or subtraction.
+ if v >= -4095 && v <= 4095 {
+ o1 = c.oaddi12(p, v, REGTMP, int16(r))
+ o2 = c.olsr12u(p, c.opstr(p, p.As), 0, REGTMP, p.From.Reg)
+ break
+ }
+
hi, lo, err := splitImm24uScaled(v, s)
if err != nil {
goto storeusepool
@@ -4054,10 +4075,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), int(r), REGTMP)
case 31: /* movT L(R), R -> ldrT */
- // if offset L can be split into hi+lo, and both fit into instructions, do
+ // If offset L fits in a 12 bit unsigned immediate:
+ // add $L, R, Rtmp or sub $L, R, Rtmp
+ // ldr R, (Rtmp)
+ // Otherwise, if offset L can be split into hi+lo, and both fit into instructions:
// add $hi, R, Rtmp
// ldr lo(Rtmp), R
- // otherwise, use constant pool
+ // Otherwise, use constant pool:
// mov $L, Rtmp (from constant pool)
// ldr (R+Rtmp), R
s := movesize(o.as)
@@ -4071,6 +4095,20 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
v := c.regoff(&p.From)
+ if v >= -256 && v <= 256 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be 9 bit signed immediate load)", p, v)
+ }
+ if v >= 0 && v <= 4095 && v&((1<<int32(s))-1) == 0 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be 12 bit unsigned immediate load)", p, v)
+ }
+
+ // Handle smaller unaligned and negative offsets via addition or subtraction.
+ if v >= -4095 && v <= 4095 {
+ o1 = c.oaddi12(p, v, REGTMP, int16(r))
+ o2 = c.olsr12u(p, c.opldr(p, p.As), 0, REGTMP, p.To.Reg)
+ break
+ }
+
hi, lo, err := splitImm24uScaled(v, s)
if err != nil {
goto loadusepool