diff options
Diffstat (limited to 'src/internal/bytealg')
| -rw-r--r-- | src/internal/bytealg/compare_amd64.s | 7 | ||||
| -rw-r--r-- | src/internal/bytealg/count_amd64.s | 7 | ||||
| -rw-r--r-- | src/internal/bytealg/equal_amd64.s | 3 | ||||
| -rw-r--r-- | src/internal/bytealg/index_amd64.s | 2 | ||||
| -rw-r--r-- | src/internal/bytealg/indexbyte_amd64.s | 2 |
5 files changed, 21 insertions, 0 deletions
diff --git a/src/internal/bytealg/compare_amd64.s b/src/internal/bytealg/compare_amd64.s index 4ccaca5e87..fdd015f560 100644 --- a/src/internal/bytealg/compare_amd64.s +++ b/src/internal/bytealg/compare_amd64.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "go_asm.h" +#include "asm_amd64.h" #include "textflag.h" TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56 @@ -44,9 +45,13 @@ TEXT cmpbody<>(SB),NOSPLIT,$0-0 CMPQ R8, $63 JBE loop +#ifndef hasAVX2 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JEQ big_loop_avx2 JMP big_loop +#else + JMP big_loop_avx2 +#endif loop: CMPQ R8, $16 JBE _0through16 @@ -155,6 +160,7 @@ allsame: RET // this works for >= 64 bytes of data. +#ifndef hasAVX2 big_loop: MOVOU (SI), X0 MOVOU (DI), X1 @@ -190,6 +196,7 @@ big_loop: CMPQ R8, $64 JBE loop JMP big_loop +#endif // Compare 64-bytes per loop iteration. // Loop is unrolled and uses AVX2. diff --git a/src/internal/bytealg/count_amd64.s b/src/internal/bytealg/count_amd64.s index fa864c4c76..efb17f84b7 100644 --- a/src/internal/bytealg/count_amd64.s +++ b/src/internal/bytealg/count_amd64.s @@ -3,12 +3,15 @@ // license that can be found in the LICENSE file. #include "go_asm.h" +#include "asm_amd64.h" #include "textflag.h" TEXT ·Count(SB),NOSPLIT,$0-40 +#ifndef hasPOPCNT CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1 JEQ 2(PC) JMP ·countGeneric(SB) +#endif MOVQ b_base+0(FP), SI MOVQ b_len+8(FP), BX MOVB c+24(FP), AL @@ -16,9 +19,11 @@ TEXT ·Count(SB),NOSPLIT,$0-40 JMP countbody<>(SB) TEXT ·CountString(SB),NOSPLIT,$0-32 +#ifndef hasPOPCNT CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1 JEQ 2(PC) JMP ·countGenericString(SB) +#endif MOVQ s_base+0(FP), SI MOVQ s_len+8(FP), BX MOVB c+16(FP), AL @@ -151,8 +156,10 @@ endofpage: RET avx2: +#ifndef hasAVX2 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JNE sse +#endif MOVD AX, X0 LEAQ -32(SI)(BX*1), R11 VPBROADCASTB X0, Y1 diff --git a/src/internal/bytealg/equal_amd64.s b/src/internal/bytealg/equal_amd64.s index dd46e2e0fd..d178a33779 100644 --- a/src/internal/bytealg/equal_amd64.s +++ b/src/internal/bytealg/equal_amd64.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "go_asm.h" +#include "asm_amd64.h" #include "textflag.h" // memequal(a, b unsafe.Pointer, size uintptr) bool @@ -46,6 +47,7 @@ TEXT memeqbody<>(SB),NOSPLIT,$0-0 JB small CMPQ BX, $64 JB bigloop +#ifndef hasAVX2 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JE hugeloop_avx2 @@ -76,6 +78,7 @@ hugeloop: JEQ hugeloop XORQ AX, AX // return 0 RET +#endif // 64 bytes at a time using ymm registers hugeloop_avx2: diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s index 6193b57239..04314917b8 100644 --- a/src/internal/bytealg/index_amd64.s +++ b/src/internal/bytealg/index_amd64.s @@ -233,8 +233,10 @@ success_avx2: VZEROUPPER JMP success sse42: +#ifndef hasSSE42 CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1 JNE no_sse42 +#endif CMPQ AX, $12 // PCMPESTRI is slower than normal compare, // so using it makes sense only if we advance 4+ bytes per compare diff --git a/src/internal/bytealg/indexbyte_amd64.s b/src/internal/bytealg/indexbyte_amd64.s index f78093c539..1ca70e39e2 100644 --- a/src/internal/bytealg/indexbyte_amd64.s +++ b/src/internal/bytealg/indexbyte_amd64.s @@ -115,8 +115,10 @@ endofpage: RET avx2: +#ifndef hasAVX2 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JNE sse +#endif MOVD AX, X0 LEAQ -32(SI)(BX*1), R11 VPBROADCASTB X0, Y1 |
