aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/bytealg')
-rw-r--r--src/internal/bytealg/compare_amd64.s7
-rw-r--r--src/internal/bytealg/count_amd64.s7
-rw-r--r--src/internal/bytealg/equal_amd64.s3
-rw-r--r--src/internal/bytealg/index_amd64.s2
-rw-r--r--src/internal/bytealg/indexbyte_amd64.s2
5 files changed, 21 insertions, 0 deletions
diff --git a/src/internal/bytealg/compare_amd64.s b/src/internal/bytealg/compare_amd64.s
index 4ccaca5e87..fdd015f560 100644
--- a/src/internal/bytealg/compare_amd64.s
+++ b/src/internal/bytealg/compare_amd64.s
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
@@ -44,9 +45,13 @@ TEXT cmpbody<>(SB),NOSPLIT,$0-0
CMPQ R8, $63
JBE loop
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JEQ big_loop_avx2
JMP big_loop
+#else
+ JMP big_loop_avx2
+#endif
loop:
CMPQ R8, $16
JBE _0through16
@@ -155,6 +160,7 @@ allsame:
RET
// this works for >= 64 bytes of data.
+#ifndef hasAVX2
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
@@ -190,6 +196,7 @@ big_loop:
CMPQ R8, $64
JBE loop
JMP big_loop
+#endif
// Compare 64-bytes per loop iteration.
// Loop is unrolled and uses AVX2.
diff --git a/src/internal/bytealg/count_amd64.s b/src/internal/bytealg/count_amd64.s
index fa864c4c76..efb17f84b7 100644
--- a/src/internal/bytealg/count_amd64.s
+++ b/src/internal/bytealg/count_amd64.s
@@ -3,12 +3,15 @@
// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
+#ifndef hasPOPCNT
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGeneric(SB)
+#endif
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), BX
MOVB c+24(FP), AL
@@ -16,9 +19,11 @@ TEXT ·Count(SB),NOSPLIT,$0-40
JMP countbody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-32
+#ifndef hasPOPCNT
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGenericString(SB)
+#endif
MOVQ s_base+0(FP), SI
MOVQ s_len+8(FP), BX
MOVB c+16(FP), AL
@@ -151,8 +156,10 @@ endofpage:
RET
avx2:
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
+#endif
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1
diff --git a/src/internal/bytealg/equal_amd64.s b/src/internal/bytealg/equal_amd64.s
index dd46e2e0fd..d178a33779 100644
--- a/src/internal/bytealg/equal_amd64.s
+++ b/src/internal/bytealg/equal_amd64.s
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
@@ -46,6 +47,7 @@ TEXT memeqbody<>(SB),NOSPLIT,$0-0
JB small
CMPQ BX, $64
JB bigloop
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
@@ -76,6 +78,7 @@ hugeloop:
JEQ hugeloop
XORQ AX, AX // return 0
RET
+#endif
// 64 bytes at a time using ymm registers
hugeloop_avx2:
diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s
index 6193b57239..04314917b8 100644
--- a/src/internal/bytealg/index_amd64.s
+++ b/src/internal/bytealg/index_amd64.s
@@ -233,8 +233,10 @@ success_avx2:
VZEROUPPER
JMP success
sse42:
+#ifndef hasSSE42
CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1
JNE no_sse42
+#endif
CMPQ AX, $12
// PCMPESTRI is slower than normal compare,
// so using it makes sense only if we advance 4+ bytes per compare
diff --git a/src/internal/bytealg/indexbyte_amd64.s b/src/internal/bytealg/indexbyte_amd64.s
index f78093c539..1ca70e39e2 100644
--- a/src/internal/bytealg/indexbyte_amd64.s
+++ b/src/internal/bytealg/indexbyte_amd64.s
@@ -115,8 +115,10 @@ endofpage:
RET
avx2:
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
+#endif
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1