cmd/internal/obj/ppc64: modify PCALIGN to ensure alignment

The initial purpose of PCALIGN was to identify code where it would be beneficial to align code for performance, but avoid cases where too many NOPs were added. On p10, it is now necessary to enforce a certain alignment in some cases, so the behavior of PCALIGN needs to be slightly different. Code will now be aligned to the value specified on the PCALIGN instruction regardless of number of NOPs added, which is more intuitive and consistent with power assembler alignment directives. This also adds 64 as a possible alignment value. The existing values used in PCALIGN were modified according to the new behavior. A testcase was updated and performance testing was done to verify that this does not adversely affect performance. Change-Id: Iad1cf5ff112e5bfc0514f0805be90e24095e932b Reviewed-on: https://go-review.googlesource.com/c/go/+/485056 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Archana Ravindar <aravind5@in.ibm.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Paul Murphy <murp@ibm.com> Reviewed-by: Bryan Mills <bcmills@google.com>
author: Lynn Boger <laboger@linux.vnet.ibm.com> 2023-04-17 10:02:48 -0500
committer: Lynn Boger <laboger@linux.vnet.ibm.com> 2023-04-21 16:47:45 +0000
commit: e23322e2ccd19b5802a823d20a089540afef79ce (patch)
tree: df27fc09f5ee19f00d2b1319ddfcc22b1d22d846 /src/math
parent: de788efeac46f18bd3372666cfc2c698de69d8b6 (diff)
download: go-e23322e2ccd19b5802a823d20a089540afef79ce.tar.xz
1 files changed, 7 insertions, 7 deletions
diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s
index 5fdbf40a24..0613f5c3ad 100644
--- a/src/math/big/arith_ppc64x.s
+++ b/src/math/big/arith_ppc64x.s
@@ -45,7 +45,7 @@ TEXT ·addVV(SB), NOSPLIT, $0
 	// gain significant performance as z_len increases (up to
 	// 1.45x).
 
-	PCALIGN $32
+	PCALIGN $16
 loop:
 	MOVD  8(R8), R11      // R11 = x[i]
 	MOVD  16(R8), R12     // R12 = x[i+1]
@@ -134,7 +134,7 @@ TEXT ·subVV(SB), NOSPLIT, $0
 	// gain significant performance as z_len increases (up to
 	// 1.45x).
 
-	PCALIGN $32
+	PCALIGN $16
 loop:
 	MOVD  8(R8), R11      // R11 = x[i]
 	MOVD  16(R8), R12     // R12 = x[i+1]
@@ -216,7 +216,7 @@ TEXT ·addVW(SB), NOSPLIT, $0
 	CMP   R0, R9
 	MOVD  R9, CTR		// Set up the loop counter
 	BEQ   tail		// If R9 = 0, we can't use the loop
-	PCALIGN $32
+	PCALIGN $16
 
 loop:
 	MOVD  8(R8), R20	// R20 = x[i]
@@ -294,7 +294,7 @@ TEXT ·subVW(SB), NOSPLIT, $0
 	// we don't need to capture CA every iteration because we've already
 	// done that above.
 
-	PCALIGN $32
+	PCALIGN $16
 loop:
 	MOVD  8(R8), R20
 	MOVD  16(R8), R21
@@ -365,7 +365,7 @@ TEXT ·shlVU(SB), NOSPLIT, $0
 	CMP     R5, R0          // iterate from i=len(z)-1 to 0
 	BEQ     loopexit        // Already at end?
 	MOVD	0(R15),R10	// x[i]
-	PCALIGN $32
+	PCALIGN $16
 shloop:
 	SLD     R9, R10, R10    // x[i]<<s
 	MOVDU   -8(R15), R14
@@ -528,7 +528,7 @@ TEXT ·mulAddVWW(SB), NOSPLIT, $0
 	CMP     R0, R14
 	MOVD    R14, CTR          // Set up the loop counter
 	BEQ     tail              // If R9 = 0, we can't use the loop
-	PCALIGN $32
+	PCALIGN $16
 
 loop:
 	MOVD    8(R8), R20        // R20 = x[i]
@@ -611,7 +611,7 @@ TEXT ·addMulVVW(SB), NOSPLIT, $0
 	MOVD R0, R4		// R4 = c = 0
 	MOVD R22, CTR		// Initialize loop counter
 	BEQ  done
-	PCALIGN $32
+	PCALIGN $16
 
 loop:
 	MOVD  (R8)(R3), R20	// Load x[i]
author	Lynn Boger <laboger@linux.vnet.ibm.com>	2023-04-17 10:02:48 -0500
committer	Lynn Boger <laboger@linux.vnet.ibm.com>	2023-04-21 16:47:45 +0000
commit	e23322e2ccd19b5802a823d20a089540afef79ce (patch)
tree	df27fc09f5ee19f00d2b1319ddfcc22b1d22d846 /src/math
parent	de788efeac46f18bd3372666cfc2c698de69d8b6 (diff)
download	go-e23322e2ccd19b5802a823d20a089540afef79ce.tar.xz