aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2023-04-17 10:02:48 -0500
committerLynn Boger <laboger@linux.vnet.ibm.com>2023-04-21 16:47:45 +0000
commite23322e2ccd19b5802a823d20a089540afef79ce (patch)
treedf27fc09f5ee19f00d2b1319ddfcc22b1d22d846 /src/internal/bytealg
parentde788efeac46f18bd3372666cfc2c698de69d8b6 (diff)
downloadgo-e23322e2ccd19b5802a823d20a089540afef79ce.tar.xz
cmd/internal/obj/ppc64: modify PCALIGN to ensure alignment
The initial purpose of PCALIGN was to identify code where it would be beneficial to align code for performance, but avoid cases where too many NOPs were added. On p10, it is now necessary to enforce a certain alignment in some cases, so the behavior of PCALIGN needs to be slightly different. Code will now be aligned to the value specified on the PCALIGN instruction regardless of number of NOPs added, which is more intuitive and consistent with power assembler alignment directives. This also adds 64 as a possible alignment value. The existing values used in PCALIGN were modified according to the new behavior. A testcase was updated and performance testing was done to verify that this does not adversely affect performance. Change-Id: Iad1cf5ff112e5bfc0514f0805be90e24095e932b Reviewed-on: https://go-review.googlesource.com/c/go/+/485056 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Archana Ravindar <aravind5@in.ibm.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Paul Murphy <murp@ibm.com> Reviewed-by: Bryan Mills <bcmills@google.com>
Diffstat (limited to 'src/internal/bytealg')
-rw-r--r--src/internal/bytealg/compare_ppc64x.s2
-rw-r--r--src/internal/bytealg/equal_ppc64x.s2
-rw-r--r--src/internal/bytealg/index_ppc64x.s4
3 files changed, 4 insertions, 4 deletions
diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s
index f3f8b4abd1..63c33ee635 100644
--- a/src/internal/bytealg/compare_ppc64x.s
+++ b/src/internal/bytealg/compare_ppc64x.s
@@ -118,7 +118,7 @@ cmp64: // >= 64B
MOVD $32,R11 // set offsets to load into vector
MOVD $48,R12 // set offsets to load into vector
- PCALIGN $32
+ PCALIGN $16
cmp64_loop:
LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector
LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector
diff --git a/src/internal/bytealg/equal_ppc64x.s b/src/internal/bytealg/equal_ppc64x.s
index 649bd96be4..07dce80d3e 100644
--- a/src/internal/bytealg/equal_ppc64x.s
+++ b/src/internal/bytealg/equal_ppc64x.s
@@ -61,7 +61,7 @@ setup64:
MOVD $48, R16
ANDCC $0x3F, R5, R5 // len%64==0?
- PCALIGN $32
+ PCALIGN $16
loop64:
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
diff --git a/src/internal/bytealg/index_ppc64x.s b/src/internal/bytealg/index_ppc64x.s
index e98f96b715..80a1f853d3 100644
--- a/src/internal/bytealg/index_ppc64x.s
+++ b/src/internal/bytealg/index_ppc64x.s
@@ -674,7 +674,7 @@ index2to16:
#else
MOVD $3, R17 // Number of bytes beyond 16
#endif
- PCALIGN $32
+ PCALIGN $16
index2to16loop:
@@ -776,7 +776,7 @@ short:
MTVSRD R10, V8 // Set up shift
VSLDOI $8, V8, V8, V8
VSLO V1, V8, V1 // Shift by start byte
- PCALIGN $32
+ PCALIGN $16
index2to16next:
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string