aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/asm_amd64.s
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2016-03-29 21:25:33 -0700
committerKeith Randall <khr@golang.org>2016-03-31 02:54:10 +0000
commit4b209dbf0bf3e5fd4cffda1e11f11bf45ddf212d (patch)
treea7af96647c02144a3e82b576eb72d1704bf2f6ff /src/runtime/asm_amd64.s
parent1a9373bc570cf408cecdfab5d531e8041f354a54 (diff)
downloadgo-4b209dbf0bf3e5fd4cffda1e11f11bf45ddf212d.tar.xz
runtime: don't use REP;MOVSB if CPUID doesn't say it is fast
Only use REP;MOVSB if: 1) The CPUID flag says it is fast, and 2) The pointers are unaligned Otherwise, use REP;MOVSQ. Update #14630 Change-Id: I946b28b87880c08e5eed1ce2945016466c89db66 Reviewed-on: https://go-review.googlesource.com/21300 Reviewed-by: Nigel Tao <nigeltao@golang.org>
Diffstat (limited to 'src/runtime/asm_amd64.s')
-rw-r--r--src/runtime/asm_amd64.s21
1 files changed, 14 insertions, 7 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index b4df1d80d7..83db4d3e81 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -28,6 +28,7 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
// find out information about the processor we're on
MOVQ $0, AX
CPUID
+ MOVQ AX, SI
CMPQ AX, $0
JE nocpuinfo
@@ -42,15 +43,25 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
JNE notintel
MOVB $1, runtime·lfenceBeforeRdtsc(SB)
notintel:
- // Do nothing.
+ // Load EAX=1 cpuid flags
MOVQ $1, AX
CPUID
MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
+
+ // Load EAX=7/ECX=0 cpuid flags
+ CMPQ SI, $7
+ JLT no7
+ MOVL $7, AX
+ MOVL $0, CX
+ CPUID
+ MOVL BX, runtime·cpuid_ebx7(SB)
+no7:
// Detect AVX and AVX2 as per 14.7.1 Detection of AVX2 chapter of [1]
// [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+ MOVL runtime·cpuid_ecx(SB), CX
ANDL $0x18000000, CX // check for OSXSAVE and AVX bits
CMPL CX, $0x18000000
JNE noavx
@@ -61,12 +72,8 @@ notintel:
CMPL AX, $6 // Check for OS support of YMM registers
JNE noavx
MOVB $1, runtime·support_avx(SB)
- MOVL $7, AX
- MOVL $0, CX
- CPUID
- ANDL $0x20, BX // check for AVX2 bit
- CMPL BX, $0x20
- JNE noavx2
+ TESTL $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
+ JEQ noavx2
MOVB $1, runtime·support_avx2(SB)
JMP nocpuinfo
noavx: