diff options
| author | Keith Randall <khr@golang.org> | 2016-03-29 21:25:33 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2016-03-31 02:54:10 +0000 |
| commit | 4b209dbf0bf3e5fd4cffda1e11f11bf45ddf212d (patch) | |
| tree | a7af96647c02144a3e82b576eb72d1704bf2f6ff /src/runtime/asm_amd64.s | |
| parent | 1a9373bc570cf408cecdfab5d531e8041f354a54 (diff) | |
| download | go-4b209dbf0bf3e5fd4cffda1e11f11bf45ddf212d.tar.xz | |
runtime: don't use REP;MOVSB if CPUID doesn't say it is fast
Only use REP;MOVSB if:
1) The CPUID flag says it is fast, and
2) The pointers are unaligned
Otherwise, use REP;MOVSQ.
Update #14630
Change-Id: I946b28b87880c08e5eed1ce2945016466c89db66
Reviewed-on: https://go-review.googlesource.com/21300
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Diffstat (limited to 'src/runtime/asm_amd64.s')
| -rw-r--r-- | src/runtime/asm_amd64.s | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index b4df1d80d7..83db4d3e81 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -28,6 +28,7 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 // find out information about the processor we're on MOVQ $0, AX CPUID + MOVQ AX, SI CMPQ AX, $0 JE nocpuinfo @@ -42,15 +43,25 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 JNE notintel MOVB $1, runtime·lfenceBeforeRdtsc(SB) notintel: - // Do nothing. + // Load EAX=1 cpuid flags MOVQ $1, AX CPUID MOVL CX, runtime·cpuid_ecx(SB) MOVL DX, runtime·cpuid_edx(SB) + + // Load EAX=7/ECX=0 cpuid flags + CMPQ SI, $7 + JLT no7 + MOVL $7, AX + MOVL $0, CX + CPUID + MOVL BX, runtime·cpuid_ebx7(SB) +no7: // Detect AVX and AVX2 as per 14.7.1 Detection of AVX2 chapter of [1] // [1] 64-ia-32-architectures-software-developer-manual-325462.pdf // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf + MOVL runtime·cpuid_ecx(SB), CX ANDL $0x18000000, CX // check for OSXSAVE and AVX bits CMPL CX, $0x18000000 JNE noavx @@ -61,12 +72,8 @@ notintel: CMPL AX, $6 // Check for OS support of YMM registers JNE noavx MOVB $1, runtime·support_avx(SB) - MOVL $7, AX - MOVL $0, CX - CPUID - ANDL $0x20, BX // check for AVX2 bit - CMPL BX, $0x20 - JNE noavx2 + TESTL $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit + JEQ noavx2 MOVB $1, runtime·support_avx2(SB) JMP nocpuinfo noavx: |
