diff options
| author | Russ Cox <rsc@golang.org> | 2014-10-28 21:50:16 -0400 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2014-10-28 21:50:16 -0400 |
| commit | b55791e2008703b33883e48e72a347ba07a65486 (patch) | |
| tree | 2dc3ed44dc71a1aa6af2d9fbe331464408104d79 /src/runtime | |
| parent | 87b4149b22feb9d8d52da1730b455e39436a3a8d (diff) | |
| download | go-b55791e2008703b33883e48e72a347ba07a65486.tar.xz | |
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/asm_386.s | 58 | ||||
| -rw-r--r-- | src/runtime/asm_amd64.s | 78 | ||||
| -rw-r--r-- | src/runtime/asm_amd64p32.s | 58 | ||||
| -rw-r--r-- | src/runtime/asm_arm.s | 24 | ||||
| -rw-r--r-- | src/runtime/asm_power64x.s | 40 | ||||
| -rw-r--r-- | src/runtime/memclr_386.s | 46 | ||||
| -rw-r--r-- | src/runtime/memclr_amd64.s | 44 | ||||
| -rw-r--r-- | src/runtime/memclr_plan9_386.s | 24 | ||||
| -rw-r--r-- | src/runtime/race_amd64.s | 18 | ||||
| -rw-r--r-- | src/runtime/sys_darwin_386.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_darwin_amd64.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_dragonfly_386.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_freebsd_386.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_linux_amd64.s | 8 | ||||
| -rw-r--r-- | src/runtime/sys_linux_arm.s | 8 | ||||
| -rw-r--r-- | src/runtime/sys_nacl_386.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_nacl_amd64p32.s | 1 | ||||
| -rw-r--r-- | src/runtime/sys_nacl_arm.s | 1 | ||||
| -rw-r--r-- | src/runtime/sys_openbsd_386.s | 4 | ||||
| -rw-r--r-- | src/runtime/sys_solaris_amd64.s | 12 | ||||
| -rw-r--r-- | src/runtime/sys_windows_386.s | 12 | ||||
| -rw-r--r-- | src/runtime/sys_windows_amd64.s | 12 |
22 files changed, 233 insertions, 235 deletions
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 20d3c47c94..d0b3969bda 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -486,11 +486,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-21 MOVL new_hi+16(FP), CX LOCK CMPXCHG8B 0(BP) - JNZ cas64_fail + JNZ fail MOVL $1, AX MOVB AX, ret+20(FP) RET -cas64_fail: +fail: MOVL $0, AX MOVB AX, ret+20(FP) RET @@ -1342,29 +1342,29 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0 // AX = 1/0/-1 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 CMPL SI, DI - JEQ cmp_allsame + JEQ allsame CMPL BX, DX MOVL DX, BP CMOVLLT BX, BP // BP = min(alen, blen) CMPL BP, $4 - JB cmp_small + JB small TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 - JE cmp_mediumloop -cmp_largeloop: + JE mediumloop +largeloop: CMPL BP, $16 - JB cmp_mediumloop + JB mediumloop MOVOU (SI), X0 MOVOU (DI), X1 PCMPEQB X0, X1 PMOVMSKB X1, AX XORL $0xffff, AX // convert EQ to NE - JNE cmp_diff16 // branch if at least one byte is not equal + JNE diff16 // branch if at least one byte is not equal ADDL $16, SI ADDL $16, DI SUBL $16, BP - JMP cmp_largeloop + JMP largeloop -cmp_diff16: +diff16: BSFL AX, BX // index of first byte that differs XORL AX, AX MOVB (SI)(BX*1), CX @@ -1373,25 +1373,25 @@ cmp_diff16: LEAL -1(AX*2), AX // convert 1/0 to +1/-1 RET -cmp_mediumloop: +mediumloop: CMPL BP, $4 - JBE cmp_0through4 + JBE _0through4 MOVL (SI), AX MOVL (DI), CX CMPL AX, CX - JNE cmp_diff4 + JNE diff4 ADDL $4, SI ADDL $4, DI SUBL $4, BP - JMP cmp_mediumloop + JMP mediumloop -cmp_0through4: +_0through4: MOVL -4(SI)(BP*1), AX MOVL -4(DI)(BP*1), CX CMPL AX, CX - JEQ cmp_allsame + JEQ allsame -cmp_diff4: +diff4: BSWAPL AX // reverse order of bytes BSWAPL CX XORL AX, CX // find bit differences @@ -1402,37 +1402,37 @@ cmp_diff4: RET // 0-3 bytes in common -cmp_small: +small: LEAL (BP*8), CX NEGL CX - JEQ cmp_allsame + JEQ allsame // load si CMPB SI, $0xfc - JA cmp_si_high + JA si_high MOVL (SI), SI - JMP cmp_si_finish -cmp_si_high: + JMP si_finish +si_high: MOVL -4(SI)(BP*1), SI SHRL CX, SI -cmp_si_finish: +si_finish: SHLL CX, SI // same for di CMPB DI, $0xfc - JA cmp_di_high + JA di_high MOVL (DI), DI - JMP cmp_di_finish -cmp_di_high: + JMP di_finish +di_high: MOVL -4(DI)(BP*1), DI SHRL CX, DI -cmp_di_finish: +di_finish: SHLL CX, DI BSWAPL SI // reverse order of bytes BSWAPL DI XORL SI, DI // find bit differences - JEQ cmp_allsame + JEQ allsame BSRL DI, CX // index of highest bit difference SHRL CX, SI // move a's bit to bottom ANDL $1, SI // mask bit @@ -1441,7 +1441,7 @@ cmp_di_finish: // all the bytes in common are the same, so we just need // to compare the lengths. -cmp_allsame: +allsame: XORL AX, AX XORL CX, CX CMPL BX, DX diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 709834180e..7a0fdfa73a 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -461,11 +461,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25 MOVQ new+16(FP), CX LOCK CMPXCHGQ CX, 0(BX) - JNZ cas64_fail + JNZ fail MOVL $1, AX MOVB AX, ret+24(FP) RET -cas64_fail: +fail: MOVL $0, AX MOVB AX, ret+24(FP) RET @@ -876,24 +876,24 @@ TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32 MOVO runtime·aeskeysched+0(SB), X2 MOVO runtime·aeskeysched+16(SB), X3 CMPQ CX, $16 - JB aessmall -aesloop: + JB small +loop: CMPQ CX, $16 - JBE aesloopend + JBE loopend MOVOU (AX), X1 AESENC X2, X0 AESENC X1, X0 SUBQ $16, CX ADDQ $16, AX - JMP aesloop + JMP loop // 1-16 bytes remaining -aesloopend: +loopend: // This load may overlap with the previous load above. // We'll hash some bytes twice, but that's ok. MOVOU -16(AX)(CX*1), X1 JMP partial // 0-15 bytes -aessmall: +small: TESTQ CX, CX JE finalize // 0 bytes @@ -1036,18 +1036,18 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33 MOVQ s1len+8(FP), AX MOVQ s2len+24(FP), BX CMPQ AX, BX - JNE different + JNE noteq MOVQ s1str+0(FP), SI MOVQ s2str+16(FP), DI CMPQ SI, DI - JEQ same + JEQ eq CALL runtime·memeqbody(SB) MOVB AX, v+32(FP) RET -same: +eq: MOVB $1, v+32(FP) RET -different: +noteq: MOVB $0, v+32(FP) RET @@ -1170,29 +1170,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56 // AX = 1/0/-1 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 CMPQ SI, DI - JEQ cmp_allsame + JEQ allsame CMPQ BX, DX MOVQ DX, BP CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare CMPQ BP, $8 - JB cmp_small + JB small -cmp_loop: +loop: CMPQ BP, $16 - JBE cmp_0through16 + JBE _0through16 MOVOU (SI), X0 MOVOU (DI), X1 PCMPEQB X0, X1 PMOVMSKB X1, AX XORQ $0xffff, AX // convert EQ to NE - JNE cmp_diff16 // branch if at least one byte is not equal + JNE diff16 // branch if at least one byte is not equal ADDQ $16, SI ADDQ $16, DI SUBQ $16, BP - JMP cmp_loop + JMP loop // AX = bit mask of differences -cmp_diff16: +diff16: BSFQ AX, BX // index of first byte that differs XORQ AX, AX MOVB (SI)(BX*1), CX @@ -1202,21 +1202,21 @@ cmp_diff16: RET // 0 through 16 bytes left, alen>=8, blen>=8 -cmp_0through16: +_0through16: CMPQ BP, $8 - JBE cmp_0through8 + JBE _0through8 MOVQ (SI), AX MOVQ (DI), CX CMPQ AX, CX - JNE cmp_diff8 -cmp_0through8: + JNE diff8 +_0through8: MOVQ -8(SI)(BP*1), AX MOVQ -8(DI)(BP*1), CX CMPQ AX, CX - JEQ cmp_allsame + JEQ allsame // AX and CX contain parts of a and b that differ. -cmp_diff8: +diff8: BSWAPQ AX // reverse order of bytes BSWAPQ CX XORQ AX, CX @@ -1227,44 +1227,44 @@ cmp_diff8: RET // 0-7 bytes in common -cmp_small: +small: LEAQ (BP*8), CX // bytes left -> bits left NEGQ CX // - bits lift (== 64 - bits left mod 64) - JEQ cmp_allsame + JEQ allsame // load bytes of a into high bytes of AX CMPB SI, $0xf8 - JA cmp_si_high + JA si_high MOVQ (SI), SI - JMP cmp_si_finish -cmp_si_high: + JMP si_finish +si_high: MOVQ -8(SI)(BP*1), SI SHRQ CX, SI -cmp_si_finish: +si_finish: SHLQ CX, SI // load bytes of b in to high bytes of BX CMPB DI, $0xf8 - JA cmp_di_high + JA di_high MOVQ (DI), DI - JMP cmp_di_finish -cmp_di_high: + JMP di_finish +di_high: MOVQ -8(DI)(BP*1), DI SHRQ CX, DI -cmp_di_finish: +di_finish: SHLQ CX, DI BSWAPQ SI // reverse order of bytes BSWAPQ DI XORQ SI, DI // find bit differences - JEQ cmp_allsame + JEQ allsame BSRQ DI, CX // index of highest bit difference SHRQ CX, SI // move a's bit to bottom ANDQ $1, SI // mask bit LEAQ -1(SI*2), AX // 1/0 => +1/-1 RET -cmp_allsame: +allsame: XORQ AX, AX XORQ CX, CX CMPQ BX, DX @@ -1299,7 +1299,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0 MOVQ SI, DI CMPQ BX, $16 - JLT indexbyte_small + JLT small // round up to first 16-byte boundary TESTQ $15, SI @@ -1357,7 +1357,7 @@ failure: RET // handle for lengths < 16 -indexbyte_small: +small: MOVQ BX, CX REPN; SCASB JZ success diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s index 28875bc55a..de3ef3a237 100644 --- a/src/runtime/asm_amd64p32.s +++ b/src/runtime/asm_amd64p32.s @@ -444,11 +444,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25 MOVQ new+16(FP), CX LOCK CMPXCHGQ CX, 0(BX) - JNZ cas64_fail + JNZ fail MOVL $1, AX MOVB AX, ret+24(FP) RET -cas64_fail: +fail: MOVL $0, AX MOVB AX, ret+24(FP) RET @@ -834,29 +834,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28 // AX = 1/0/-1 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 CMPQ SI, DI - JEQ cmp_allsame + JEQ allsame CMPQ BX, DX MOVQ DX, R8 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare CMPQ R8, $8 - JB cmp_small + JB small -cmp_loop: +loop: CMPQ R8, $16 - JBE cmp_0through16 + JBE _0through16 MOVOU (SI), X0 MOVOU (DI), X1 PCMPEQB X0, X1 PMOVMSKB X1, AX XORQ $0xffff, AX // convert EQ to NE - JNE cmp_diff16 // branch if at least one byte is not equal + JNE diff16 // branch if at least one byte is not equal ADDQ $16, SI ADDQ $16, DI SUBQ $16, R8 - JMP cmp_loop + JMP loop // AX = bit mask of differences -cmp_diff16: +diff16: BSFQ AX, BX // index of first byte that differs XORQ AX, AX ADDQ BX, SI @@ -868,23 +868,23 @@ cmp_diff16: RET // 0 through 16 bytes left, alen>=8, blen>=8 -cmp_0through16: +_0through16: CMPQ R8, $8 - JBE cmp_0through8 + JBE _0through8 MOVQ (SI), AX MOVQ (DI), CX CMPQ AX, CX - JNE cmp_diff8 -cmp_0through8: + JNE diff8 +_0through8: ADDQ R8, SI ADDQ R8, DI MOVQ -8(SI), AX MOVQ -8(DI), CX CMPQ AX, CX - JEQ cmp_allsame + JEQ allsame // AX and CX contain parts of a and b that differ. -cmp_diff8: +diff8: BSWAPQ AX // reverse order of bytes BSWAPQ CX XORQ AX, CX @@ -895,46 +895,46 @@ cmp_diff8: RET // 0-7 bytes in common -cmp_small: +small: LEAQ (R8*8), CX // bytes left -> bits left NEGQ CX // - bits lift (== 64 - bits left mod 64) - JEQ cmp_allsame + JEQ allsame // load bytes of a into high bytes of AX CMPB SI, $0xf8 - JA cmp_si_high + JA si_high MOVQ (SI), SI - JMP cmp_si_finish -cmp_si_high: + JMP si_finish +si_high: ADDQ R8, SI MOVQ -8(SI), SI SHRQ CX, SI -cmp_si_finish: +si_finish: SHLQ CX, SI // load bytes of b in to high bytes of BX CMPB DI, $0xf8 - JA cmp_di_high + JA di_high MOVQ (DI), DI - JMP cmp_di_finish -cmp_di_high: + JMP di_finish +di_high: ADDQ R8, DI MOVQ -8(DI), DI SHRQ CX, DI -cmp_di_finish: +di_finish: SHLQ CX, DI BSWAPQ SI // reverse order of bytes BSWAPQ DI XORQ SI, DI // find bit differences - JEQ cmp_allsame + JEQ allsame BSRQ DI, CX // index of highest bit difference SHRQ CX, SI // move a's bit to bottom ANDQ $1, SI // mask bit LEAQ -1(SI*2), AX // 1/0 => +1/-1 RET -cmp_allsame: +allsame: XORQ AX, AX XORQ CX, CX CMPQ BX, DX @@ -969,7 +969,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0 MOVL SI, DI CMPL BX, $16 - JLT indexbyte_small + JLT small // round up to first 16-byte boundary TESTL $15, SI @@ -1027,7 +1027,7 @@ failure: RET // handle for lengths < 16 -indexbyte_small: +small: MOVL BX, CX REPN; SCASB JZ success diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 621d13187a..8942b11acb 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -492,7 +492,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0 MOVW g_m(g), R8 MOVW m_g0(R8), R3 CMP R3, g - BEQ asmcgocall_g0 + BEQ g0 BL gosave<>(SB) MOVW R0, R5 MOVW R3, R0 @@ -501,7 +501,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0 MOVW (g_sched+gobuf_sp)(g), R13 // Now on a scheduling stack (a pthread-created stack). -asmcgocall_g0: +g0: SUB $24, R13 BIC $0x7, R13 // alignment for gcc ABI MOVW R4, 20(R13) // save old g @@ -736,13 +736,13 @@ TEXT runtime·memeq(SB),NOSPLIT,$-4-13 ADD R1, R3, R6 MOVW $1, R0 MOVB R0, ret+12(FP) -_next2: +loop: CMP R1, R6 RET.EQ MOVBU.P 1(R1), R4 MOVBU.P 1(R2), R5 CMP R4, R5 - BEQ _next2 + BEQ loop MOVW $0, R0 MOVB R0, ret+12(FP) @@ -765,13 +765,13 @@ TEXT runtime·eqstring(SB),NOSPLIT,$-4-17 CMP R2, R3 RET.EQ ADD R2, R0, R6 -_eqnext: +loop: CMP R2, R6 RET.EQ MOVBU.P 1(R2), R4 MOVBU.P 1(R3), R5 CMP R4, R5 - BEQ _eqnext + BEQ loop MOVB R7, v+16(FP) RET @@ -786,26 +786,26 @@ TEXT bytes·Equal(SB),NOSPLIT,$0 MOVW b_len+16(FP), R3 CMP R1, R3 // unequal lengths are not equal - B.NE _notequal + B.NE notequal MOVW a+0(FP), R0 MOVW b+12(FP), R2 ADD R0, R1 // end -_byteseq_next: +loop: CMP R0, R1 - B.EQ _equal // reached the end + B.EQ equal // reached the end MOVBU.P 1(R0), R4 MOVBU.P 1(R2), R5 CMP R4, R5 - B.EQ _byteseq_next + B.EQ loop -_notequal: +notequal: MOVW $0, R0 MOVBU R0, ret+24(FP) RET -_equal: +equal: MOVW $1, R0 MOVBU R0, ret+24(FP) RET diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index 21220e5cb8..f77658032e 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -699,7 +699,7 @@ TEXT runtime·memeq(SB),NOSPLIT,$-8-25 SUB $1, R3 SUB $1, R4 ADD R3, R5, R8 -_next: +loop: CMP R3, R8 BNE 4(PC) MOVD $1, R3 @@ -708,7 +708,7 @@ _next: MOVBZU 1(R3), R6 MOVBZU 1(R4), R7 CMP R6, R7 - BEQ _next + BEQ loop MOVB R0, ret+24(FP) RETURN @@ -720,14 +720,14 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33 MOVD s1len+8(FP), R4 MOVD s2len+24(FP), R5 CMP R4, R5 - BNE str_noteq + BNE noteq MOVD s1str+0(FP), R3 MOVD s2str+16(FP), R4 SUB $1, R3 SUB $1, R4 ADD R3, R5, R8 -eq_next: +loop: CMP R3, R8 BNE 4(PC) MOVD $1, R3 @@ -736,8 +736,8 @@ eq_next: MOVBZU 1(R3), R6 MOVBZU 1(R4), R7 CMP R6, R7 - BEQ eq_next -str_noteq: + BEQ loop +noteq: MOVB R0, ret+32(FP) RETURN @@ -747,7 +747,7 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49 MOVD b_len+32(FP), R4 CMP R3, R4 // unequal lengths are not equal - BNE _notequal + BNE noteq MOVD a+0(FP), R5 MOVD b+24(FP), R6 @@ -755,19 +755,19 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49 SUB $1, R6 ADD R5, R3 // end-1 -_byteseq_next: +loop: CMP R5, R3 - BEQ _equal // reached the end + BEQ equal // reached the end MOVBZU 1(R5), R4 MOVBZU 1(R6), R7 CMP R4, R7 - BEQ _byteseq_next + BEQ loop -_notequal: +noteq: MOVBZ R0, ret+48(FP) RETURN -_equal: +equal: MOVD $1, R3 MOVBZ R3, ret+48(FP) RETURN @@ -780,18 +780,18 @@ TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 SUB $1, R3 ADD R3, R4 // end-1 -_index_loop: +loop: CMP R3, R4 - BEQ _index_notfound + BEQ notfound MOVBZU 1(R3), R7 CMP R7, R5 - BNE _index_loop + BNE loop SUB R6, R3 // remove base MOVD R3, ret+32(FP) RETURN -_index_notfound: +notfound: MOVD $-1, R3 MOVD R3, ret+32(FP) RETURN @@ -804,18 +804,18 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0 SUB $1, R3 ADD R3, R4 // end-1 -_index2_loop: +loop: CMP R3, R4 - BEQ _index2_notfound + BEQ notfound MOVBZU 1(R3), R7 CMP R7, R5 - BNE _index2_loop + BNE loop SUB R6, R3 // remove base MOVD R3, ret+24(FP) RETURN -_index2_notfound: +notfound: MOVD $-1, R3 MOVD R3, ret+24(FP) RETURN diff --git a/src/runtime/memclr_386.s b/src/runtime/memclr_386.s index 1520aea2e0..3f20b69c82 100644 --- a/src/runtime/memclr_386.s +++ b/src/runtime/memclr_386.s @@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8 XORL AX, AX // MOVOU seems always faster than REP STOSL. -clr_tail: +tail: TESTL BX, BX - JEQ clr_0 + JEQ _0 CMPL BX, $2 - JBE clr_1or2 + JBE _1or2 CMPL BX, $4 - JBE clr_3or4 + JBE _3or4 CMPL BX, $8 - JBE clr_5through8 + JBE _5through8 CMPL BX, $16 - JBE clr_9through16 + JBE _9through16 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 JEQ nosse2 PXOR X0, X0 CMPL BX, $32 - JBE clr_17through32 + JBE _17through32 CMPL BX, $64 - JBE clr_33through64 + JBE _33through64 CMPL BX, $128 - JBE clr_65through128 + JBE _65through128 CMPL BX, $256 - JBE clr_129through256 + JBE _129through256 // TODO: use branch table and BSR to make this just a single dispatch -clr_loop: +loop: MOVOU X0, 0(DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -59,40 +59,40 @@ clr_loop: SUBL $256, BX ADDL $256, DI CMPL BX, $256 - JAE clr_loop - JMP clr_tail + JAE loop + JMP tail -clr_1or2: +_1or2: MOVB AX, (DI) MOVB AX, -1(DI)(BX*1) RET -clr_0: +_0: RET -clr_3or4: +_3or4: MOVW AX, (DI) MOVW AX, -2(DI)(BX*1) RET -clr_5through8: +_5through8: MOVL AX, (DI) MOVL AX, -4(DI)(BX*1) RET -clr_9through16: +_9through16: MOVL AX, (DI) MOVL AX, 4(DI) MOVL AX, -8(DI)(BX*1) MOVL AX, -4(DI)(BX*1) RET -clr_17through32: +_17through32: MOVOU X0, (DI) MOVOU X0, -16(DI)(BX*1) RET -clr_33through64: +_33through64: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_65through128: +_65through128: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -102,7 +102,7 @@ clr_65through128: MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_129through256: +_129through256: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -126,5 +126,5 @@ nosse2: REP STOSL ANDL $3, BX - JNE clr_tail + JNE tail RET diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s index 94a2c7f236..ec24f1db23 100644 --- a/src/runtime/memclr_amd64.s +++ b/src/runtime/memclr_amd64.s @@ -15,30 +15,30 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-16 XORQ AX, AX // MOVOU seems always faster than REP STOSQ. -clr_tail: +tail: TESTQ BX, BX - JEQ clr_0 + JEQ _0 CMPQ BX, $2 - JBE clr_1or2 + JBE _1or2 CMPQ BX, $4 - JBE clr_3or4 + JBE _3or4 CMPQ BX, $8 - JBE clr_5through8 + JBE _5through8 CMPQ BX, $16 - JBE clr_9through16 + JBE _9through16 PXOR X0, X0 CMPQ BX, $32 - JBE clr_17through32 + JBE _17through32 CMPQ BX, $64 - JBE clr_33through64 + JBE _33through64 CMPQ BX, $128 - JBE clr_65through128 + JBE _65through128 CMPQ BX, $256 - JBE clr_129through256 + JBE _129through256 // TODO: use branch table and BSR to make this just a single dispatch // TODO: for really big clears, use MOVNTDQ. -clr_loop: +loop: MOVOU X0, 0(DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -58,38 +58,38 @@ clr_loop: SUBQ $256, BX ADDQ $256, DI CMPQ BX, $256 - JAE clr_loop - JMP clr_tail + JAE loop + JMP tail -clr_1or2: +_1or2: MOVB AX, (DI) MOVB AX, -1(DI)(BX*1) RET -clr_0: +_0: RET -clr_3or4: +_3or4: MOVW AX, (DI) MOVW AX, -2(DI)(BX*1) RET -clr_5through8: +_5through8: MOVL AX, (DI) MOVL AX, -4(DI)(BX*1) RET -clr_9through16: +_9through16: MOVQ AX, (DI) MOVQ AX, -8(DI)(BX*1) RET -clr_17through32: +_17through32: MOVOU X0, (DI) MOVOU X0, -16(DI)(BX*1) RET -clr_33through64: +_33through64: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_65through128: +_65through128: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -99,7 +99,7 @@ clr_65through128: MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_129through256: +_129through256: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) diff --git a/src/runtime/memclr_plan9_386.s b/src/runtime/memclr_plan9_386.s index b4b671f773..50f327b4ed 100644 --- a/src/runtime/memclr_plan9_386.s +++ b/src/runtime/memclr_plan9_386.s @@ -10,40 +10,40 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8 MOVL n+4(FP), BX XORL AX, AX -clr_tail: +tail: TESTL BX, BX - JEQ clr_0 + JEQ _0 CMPL BX, $2 - JBE clr_1or2 + JBE _1or2 CMPL BX, $4 - JBE clr_3or4 + JBE _3or4 CMPL BX, $8 - JBE clr_5through8 + JBE _5through8 CMPL BX, $16 - JBE clr_9through16 + JBE _9through16 MOVL BX, CX SHRL $2, CX REP STOSL ANDL $3, BX - JNE clr_tail + JNE tail RET -clr_1or2: +_1or2: MOVB AX, (DI) MOVB AX, -1(DI)(BX*1) RET -clr_0: +_0: RET -clr_3or4: +_3or4: MOVW AX, (DI) MOVW AX, -2(DI)(BX*1) RET -clr_5through8: +_5through8: MOVL AX, (DI) MOVL AX, -4(DI)(BX*1) RET -clr_9through16: +_9through16: MOVL AX, (DI) MOVL AX, 4(DI) MOVL AX, -8(DI)(BX*1) diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index bdea28c7c0..15b18ff8f8 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -140,20 +140,20 @@ TEXT racecalladdr<>(SB), NOSPLIT, $0-0 MOVQ g_racectx(R14), RARG0 // goroutine context // Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss). CMPQ RARG1, runtime·racearenastart(SB) - JB racecalladdr_data + JB data CMPQ RARG1, runtime·racearenaend(SB) - JB racecalladdr_call -racecalladdr_data: + JB call +data: MOVQ $runtime·noptrdata(SB), R13 CMPQ RARG1, R13 - JB racecalladdr_ret + JB ret MOVQ $runtime·enoptrbss(SB), R13 CMPQ RARG1, R13 - JAE racecalladdr_ret -racecalladdr_call: + JAE ret +call: MOVQ AX, AX // w/o this 6a miscompiles this function JMP racecall<>(SB) -racecalladdr_ret: +ret: RET // func runtime·racefuncenter(pc uintptr) @@ -335,9 +335,9 @@ TEXT racecall<>(SB), NOSPLIT, $0-0 MOVQ SP, R12 // callee-saved, preserved across the CALL MOVQ m_g0(R13), R10 CMPQ R10, R14 - JE racecall_cont // already on g0 + JE call // already on g0 MOVQ (g_sched+gobuf_sp)(R10), SP -racecall_cont: +call: ANDQ $~15, SP // alignment for gcc ABI CALL AX MOVQ R12, SP diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s index a961c71a83..3bf8b1d411 100644 --- a/src/runtime/sys_darwin_386.s +++ b/src/runtime/sys_darwin_386.s @@ -248,7 +248,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40 MOVL BX, 0(SP) MOVL $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVL DI, 20(SP) @@ -275,7 +275,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40 MOVL 20(SP), DI MOVL DI, g(CX) -sigtramp_ret: +ret: // call sigreturn MOVL context+16(FP), CX MOVL style+4(FP), BX diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s index bd397d72a7..8a8928e066 100644 --- a/src/runtime/sys_darwin_amd64.s +++ b/src/runtime/sys_darwin_amd64.s @@ -211,7 +211,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 MOVL DX, 0(SP) MOVQ $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVQ R10, 48(SP) @@ -233,7 +233,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 MOVQ 48(SP), R10 MOVQ R10, g(BX) -sigtramp_ret: +ret: // call sigreturn MOVL $(0x2000000+184), AX // sigreturn(ucontext, infostyle) MOVQ 32(SP), DI // saved ucontext diff --git a/src/runtime/sys_dragonfly_386.s b/src/runtime/sys_dragonfly_386.s index 161eaec19d..71ece9ecbf 100644 --- a/src/runtime/sys_dragonfly_386.s +++ b/src/runtime/sys_dragonfly_386.s @@ -217,7 +217,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL BX, 0(SP) MOVL $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVL DI, 20(SP) @@ -243,7 +243,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL 20(SP), BX MOVL BX, g(CX) -sigtramp_ret: +ret: // call sigreturn MOVL context+8(FP), AX MOVL $0, 0(SP) // syscall gap diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s index 2c40fc433b..66d03c27da 100644 --- a/src/runtime/sys_freebsd_386.s +++ b/src/runtime/sys_freebsd_386.s @@ -197,7 +197,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL BX, 0(SP) MOVL $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVL DI, 20(SP) @@ -223,7 +223,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL 20(SP), BX MOVL BX, g(CX) -sigtramp_ret: +ret: // call sigreturn MOVL context+8(FP), AX MOVL $0, 0(SP) // syscall gap diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 33b91e872f..d8d86ffadf 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -115,7 +115,7 @@ TEXT time·now(SB),NOSPLIT,$16 // That leaves 104 for the gettime code to use. Hope that's enough! MOVQ runtime·__vdso_clock_gettime_sym(SB), AX CMPQ AX, $0 - JEQ fallback_gtod + JEQ fallback MOVL $0, DI // CLOCK_REALTIME LEAQ 0(SP), SI CALL AX @@ -124,7 +124,7 @@ TEXT time·now(SB),NOSPLIT,$16 MOVQ AX, sec+0(FP) MOVL DX, nsec+8(FP) RET -fallback_gtod: +fallback: LEAQ 0(SP), DI MOVQ $0, SI MOVQ runtime·__vdso_gettimeofday_sym(SB), AX @@ -141,7 +141,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16 // See comment above in time.now. MOVQ runtime·__vdso_clock_gettime_sym(SB), AX CMPQ AX, $0 - JEQ fallback_gtod_nt + JEQ fallback MOVL $1, DI // CLOCK_MONOTONIC LEAQ 0(SP), SI CALL AX @@ -153,7 +153,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16 ADDQ DX, AX MOVQ AX, ret+0(FP) RET -fallback_gtod_nt: +fallback: LEAQ 0(SP), DI MOVQ $0, SI MOVQ runtime·__vdso_gettimeofday_sym(SB), AX diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s index bd285f3998..033a036427 100644 --- a/src/runtime/sys_linux_arm.s +++ b/src/runtime/sys_linux_arm.s @@ -373,20 +373,20 @@ TEXT cas<>(SB),NOSPLIT,$0 TEXT runtime·cas(SB),NOSPLIT,$0 MOVW ptr+0(FP), R2 MOVW old+4(FP), R0 -casagain: +loop: MOVW new+8(FP), R1 BL cas<>(SB) - BCC cascheck + BCC check MOVW $1, R0 MOVB R0, ret+12(FP) RET -cascheck: +check: // Kernel lies; double-check. MOVW ptr+0(FP), R2 MOVW old+4(FP), R0 MOVW 0(R2), R3 CMP R0, R3 - BEQ casagain + BEQ loop MOVW $0, R0 MOVB R0, ret+12(FP) RET diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s index 47985f31f5..16cd721d9b 100644 --- a/src/runtime/sys_nacl_386.s +++ b/src/runtime/sys_nacl_386.s @@ -293,7 +293,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0 MOVL $0, 0(SP) MOVL $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVL DI, 20(SP) @@ -317,7 +317,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0 MOVL 20(SP), BX MOVL BX, g(CX) -sigtramp_ret: +ret: // Enable exceptions again. NACL_SYSCALL(SYS_exception_clear_flag) diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s index c30c2a8933..06a0dc5dd7 100644 --- a/src/runtime/sys_nacl_amd64p32.s +++ b/src/runtime/sys_nacl_amd64p32.s @@ -338,7 +338,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80 MOVL 20(SP), BX MOVL BX, g(CX) -sigtramp_ret: // Enable exceptions again. NACL_SYSCALL(SYS_exception_clear_flag) diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s index d354ab4837..432deadf44 100644 --- a/src/runtime/sys_nacl_arm.s +++ b/src/runtime/sys_nacl_arm.s @@ -269,7 +269,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80 // restore g MOVW 20(R13), g -sigtramp_ret: // Enable exceptions again. NACL_SYSCALL(SYS_exception_clear_flag) diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s index 5cda7768ae..b1ae5eceec 100644 --- a/src/runtime/sys_openbsd_386.s +++ b/src/runtime/sys_openbsd_386.s @@ -186,7 +186,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL BX, 0(SP) MOVL $runtime·badsignal(SB), AX CALL AX - JMP sigtramp_ret + JMP ret // save g MOVL DI, 20(SP) @@ -212,7 +212,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 MOVL 20(SP), BX MOVL BX, g(CX) -sigtramp_ret: +ret: // call sigreturn MOVL context+8(FP), AX MOVL $0, 0(SP) // syscall gap diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s index 0ebdab6ee2..3981893b01 100644 --- a/src/runtime/sys_solaris_amd64.s +++ b/src/runtime/sys_solaris_amd64.s @@ -287,24 +287,24 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0 // Execute call on m->g0. get_tls(R15) CMPQ R15, $0 - JE usleep1_noswitch + JE noswitch MOVQ g(R15), R13 CMPQ R13, $0 - JE usleep1_noswitch + JE noswitch MOVQ g_m(R13), R13 CMPQ R13, $0 - JE usleep1_noswitch + JE noswitch // TODO(aram): do something about the cpu profiler here. MOVQ m_g0(R13), R14 CMPQ g(R15), R14 - JNE usleep1_switch + JNE switch // executing on m->g0 already CALL AX RET -usleep1_switch: +switch: // Switch to m->g0 stack and back. MOVQ (g_sched+gobuf_sp)(R14), R14 MOVQ SP, -8(R14) @@ -313,7 +313,7 @@ usleep1_switch: MOVQ 0(SP), SP RET -usleep1_noswitch: +noswitch: // Not a Go-managed thread. Do not switch stack. CALL AX RET diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s index 932fe9dd24..13fb5bdc93 100644 --- a/src/runtime/sys_windows_386.s +++ b/src/runtime/sys_windows_386.s @@ -106,7 +106,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0 MOVL g_m(DX), BX MOVL m_g0(BX), BX CMPL DX, BX - JEQ sigtramp_g0 + JEQ g0 // switch to the g0 stack get_tls(BP) @@ -123,7 +123,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0 MOVL SP, 36(DI) MOVL DI, SP -sigtramp_g0: +g0: MOVL 0(CX), BX // ExceptionRecord* MOVL 4(CX), CX // Context* MOVL BX, 0(SP) @@ -383,12 +383,12 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0 MOVL m_g0(BP), SI CMPL g(CX), SI - JNE usleep1_switch + JNE switch // executing on m->g0 already CALL AX - JMP usleep1_ret + JMP ret -usleep1_switch: +switch: // Switch to m->g0 stack and back. MOVL (g_sched+gobuf_sp)(SI), SI MOVL SP, -4(SI) @@ -396,7 +396,7 @@ usleep1_switch: CALL AX MOVL 0(SP), SP -usleep1_ret: +ret: get_tls(CX) MOVL g(CX), BP MOVL g_m(BP), BP diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index e6190ce684..8b95f6d6c1 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -138,7 +138,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0 MOVQ g_m(DX), BX MOVQ m_g0(BX), BX CMPQ DX, BX - JEQ sigtramp_g0 + JEQ g0 // switch to g0 stack get_tls(BP) @@ -157,7 +157,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0 MOVQ SP, 104(DI) MOVQ DI, SP -sigtramp_g0: +g0: MOVQ 0(CX), BX // ExceptionRecord* MOVQ 8(CX), CX // Context* MOVQ BX, 0(SP) @@ -407,12 +407,12 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0 MOVQ m_g0(R13), R14 CMPQ g(R15), R14 - JNE usleep1_switch + JNE switch // executing on m->g0 already CALL AX - JMP usleep1_ret + JMP ret -usleep1_switch: +switch: // Switch to m->g0 stack and back. MOVQ (g_sched+gobuf_sp)(R14), R14 MOVQ SP, -8(R14) @@ -420,7 +420,7 @@ usleep1_switch: CALL AX MOVQ 0(SP), SP -usleep1_ret: +ret: MOVQ $0, m_libcallsp(R13) RET |
