diff options
| author | Keith Randall <khr@golang.org> | 2018-03-03 14:24:54 -0800 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2018-03-04 17:49:39 +0000 |
| commit | f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (patch) | |
| tree | f09ef9174bee3ae86920a113318f4a322a1a98ad /src/internal/bytealg | |
| parent | 45964e4f9c950863adcaeb62fbe49f3fa913f27d (diff) | |
| download | go-f6332bb84ad87e958290ae23b29a2b13a41ee2a2.tar.xz | |
internal/bytealg: move compare functions to bytealg
Move bytes.Compare and runtime·cmpstring to bytealg.
Update #19792
Change-Id: I139e6d7c59686bef7a3017e3dec99eba5fd10447
Reviewed-on: https://go-review.googlesource.com/98515
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/internal/bytealg')
| -rw-r--r-- | src/internal/bytealg/compare_386.s | 151 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_amd64.s | 236 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_amd64p32.s | 151 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_arm.s | 67 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_arm64.s | 66 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_generic.go | 88 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_mipsx.s | 104 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_native.go | 10 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_ppc64x.s | 307 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_s390x.s | 75 |
10 files changed, 1255 insertions, 0 deletions
diff --git a/src/internal/bytealg/compare_386.s b/src/internal/bytealg/compare_386.s new file mode 100644 index 0000000000..f2a7fcce24 --- /dev/null +++ b/src/internal/bytealg/compare_386.s @@ -0,0 +1,151 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT,$0-28 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+12(FP), DI + MOVL b_len+16(FP), DX + LEAL ret+24(FP), AX + JMP cmpbody<>(SB) + +TEXT bytes·Compare(SB),NOSPLIT,$0-28 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+12(FP), DI + MOVL b_len+16(FP), DX + LEAL ret+24(FP), AX + JMP cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+8(FP), DI + MOVL b_len+12(FP), DX + LEAL ret+16(FP), AX + JMP cmpbody<>(SB) + +// input: +// SI = a +// DI = b +// BX = alen +// DX = blen +// AX = address of return word (set to 1/0/-1) +TEXT cmpbody<>(SB),NOSPLIT,$0-0 + MOVL DX, BP + SUBL BX, DX // DX = blen-alen + JLE 2(PC) + MOVL BX, BP // BP = min(alen, blen) + CMPL SI, DI + JEQ allsame + CMPL BP, $4 + JB small + CMPB runtime·support_sse2(SB), $1 + JNE mediumloop +largeloop: + CMPL BP, $16 + JB mediumloop + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, BX + XORL $0xffff, BX // convert EQ to NE + JNE diff16 // branch if at least one byte is not equal + ADDL $16, SI + ADDL $16, DI + SUBL $16, BP + JMP largeloop + +diff16: + BSFL BX, BX // index of first byte that differs + XORL DX, DX + MOVB (SI)(BX*1), CX + CMPB CX, (DI)(BX*1) + SETHI DX + LEAL -1(DX*2), DX // convert 1/0 to +1/-1 + MOVL DX, (AX) + RET + +mediumloop: + CMPL BP, $4 + JBE _0through4 + MOVL (SI), BX + MOVL (DI), CX + CMPL BX, CX + JNE diff4 + ADDL $4, SI + ADDL $4, DI + SUBL $4, BP + JMP mediumloop + +_0through4: + MOVL -4(SI)(BP*1), BX + MOVL -4(DI)(BP*1), CX + CMPL BX, CX + JEQ allsame + +diff4: + BSWAPL BX // reverse order of bytes + BSWAPL CX + XORL BX, CX // find bit differences + BSRL CX, CX // index of highest bit difference + SHRL CX, BX // move a's bit to bottom + ANDL $1, BX // mask bit + LEAL -1(BX*2), BX // 1/0 => +1/-1 + MOVL BX, (AX) + RET + + // 0-3 bytes in common +small: + LEAL (BP*8), CX + NEGL CX + JEQ allsame + + // load si + CMPB SI, $0xfc + JA si_high + MOVL (SI), SI + JMP si_finish +si_high: + MOVL -4(SI)(BP*1), SI + SHRL CX, SI +si_finish: + SHLL CX, SI + + // same for di + CMPB DI, $0xfc + JA di_high + MOVL (DI), DI + JMP di_finish +di_high: + MOVL -4(DI)(BP*1), DI + SHRL CX, DI +di_finish: + SHLL CX, DI + + BSWAPL SI // reverse order of bytes + BSWAPL DI + XORL SI, DI // find bit differences + JEQ allsame + BSRL DI, CX // index of highest bit difference + SHRL CX, SI // move a's bit to bottom + ANDL $1, SI // mask bit + LEAL -1(SI*2), BX // 1/0 => +1/-1 + MOVL BX, (AX) + RET + + // all the bytes in common are the same, so we just need + // to compare the lengths. +allsame: + XORL BX, BX + XORL CX, CX + TESTL DX, DX + SETLT BX // 1 if alen > blen + SETEQ CX // 1 if alen == blen + LEAL -1(CX)(BX*2), BX // 1,0,-1 result + MOVL BX, (AX) + RET diff --git a/src/internal/bytealg/compare_amd64.s b/src/internal/bytealg/compare_amd64.s new file mode 100644 index 0000000000..7d950286e3 --- /dev/null +++ b/src/internal/bytealg/compare_amd64.s @@ -0,0 +1,236 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT,$0-56 + MOVQ a_base+0(FP), SI + MOVQ a_len+8(FP), BX + MOVQ b_base+24(FP), DI + MOVQ b_len+32(FP), DX + LEAQ ret+48(FP), R9 + JMP cmpbody<>(SB) + +TEXT bytes·Compare(SB),NOSPLIT,$0-56 + MOVQ a_base+0(FP), SI + MOVQ a_len+8(FP), BX + MOVQ b_base+24(FP), DI + MOVQ b_len+32(FP), DX + LEAQ ret+48(FP), R9 + JMP cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 + MOVQ a_base+0(FP), SI + MOVQ a_len+8(FP), BX + MOVQ b_base+16(FP), DI + MOVQ b_len+24(FP), DX + LEAQ ret+32(FP), R9 + JMP cmpbody<>(SB) + +// input: +// SI = a +// DI = b +// BX = alen +// DX = blen +// R9 = address of output word (stores -1/0/1 here) +TEXT cmpbody<>(SB),NOSPLIT,$0-0 + CMPQ SI, DI + JEQ allsame + CMPQ BX, DX + MOVQ DX, R8 + CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare + CMPQ R8, $8 + JB small + + CMPQ R8, $63 + JBE loop + CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1 + JEQ big_loop_avx2 + JMP big_loop +loop: + CMPQ R8, $16 + JBE _0through16 + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX // convert EQ to NE + JNE diff16 // branch if at least one byte is not equal + ADDQ $16, SI + ADDQ $16, DI + SUBQ $16, R8 + JMP loop + +diff64: + ADDQ $48, SI + ADDQ $48, DI + JMP diff16 +diff48: + ADDQ $32, SI + ADDQ $32, DI + JMP diff16 +diff32: + ADDQ $16, SI + ADDQ $16, DI + // AX = bit mask of differences +diff16: + BSFQ AX, BX // index of first byte that differs + XORQ AX, AX + MOVB (SI)(BX*1), CX + CMPB CX, (DI)(BX*1) + SETHI AX + LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 + MOVQ AX, (R9) + RET + + // 0 through 16 bytes left, alen>=8, blen>=8 +_0through16: + CMPQ R8, $8 + JBE _0through8 + MOVQ (SI), AX + MOVQ (DI), CX + CMPQ AX, CX + JNE diff8 +_0through8: + MOVQ -8(SI)(R8*1), AX + MOVQ -8(DI)(R8*1), CX + CMPQ AX, CX + JEQ allsame + + // AX and CX contain parts of a and b that differ. +diff8: + BSWAPQ AX // reverse order of bytes + BSWAPQ CX + XORQ AX, CX + BSRQ CX, CX // index of highest bit difference + SHRQ CX, AX // move a's bit to bottom + ANDQ $1, AX // mask bit + LEAQ -1(AX*2), AX // 1/0 => +1/-1 + MOVQ AX, (R9) + RET + + // 0-7 bytes in common +small: + LEAQ (R8*8), CX // bytes left -> bits left + NEGQ CX // - bits lift (== 64 - bits left mod 64) + JEQ allsame + + // load bytes of a into high bytes of AX + CMPB SI, $0xf8 + JA si_high + MOVQ (SI), SI + JMP si_finish +si_high: + MOVQ -8(SI)(R8*1), SI + SHRQ CX, SI +si_finish: + SHLQ CX, SI + + // load bytes of b in to high bytes of BX + CMPB DI, $0xf8 + JA di_high + MOVQ (DI), DI + JMP di_finish +di_high: + MOVQ -8(DI)(R8*1), DI + SHRQ CX, DI +di_finish: + SHLQ CX, DI + + BSWAPQ SI // reverse order of bytes + BSWAPQ DI + XORQ SI, DI // find bit differences + JEQ allsame + BSRQ DI, CX // index of highest bit difference + SHRQ CX, SI // move a's bit to bottom + ANDQ $1, SI // mask bit + LEAQ -1(SI*2), AX // 1/0 => +1/-1 + MOVQ AX, (R9) + RET + +allsame: + XORQ AX, AX + XORQ CX, CX + CMPQ BX, DX + SETGT AX // 1 if alen > blen + SETEQ CX // 1 if alen == blen + LEAQ -1(CX)(AX*2), AX // 1,0,-1 result + MOVQ AX, (R9) + RET + + // this works for >= 64 bytes of data. +big_loop: + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff16 + + MOVOU 16(SI), X0 + MOVOU 16(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff32 + + MOVOU 32(SI), X0 + MOVOU 32(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff48 + + MOVOU 48(SI), X0 + MOVOU 48(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff64 + + ADDQ $64, SI + ADDQ $64, DI + SUBQ $64, R8 + CMPQ R8, $64 + JBE loop + JMP big_loop + + // Compare 64-bytes per loop iteration. + // Loop is unrolled and uses AVX2. +big_loop_avx2: + VMOVDQU (SI), Y2 + VMOVDQU (DI), Y3 + VMOVDQU 32(SI), Y4 + VMOVDQU 32(DI), Y5 + VPCMPEQB Y2, Y3, Y0 + VPMOVMSKB Y0, AX + XORL $0xffffffff, AX + JNE diff32_avx2 + VPCMPEQB Y4, Y5, Y6 + VPMOVMSKB Y6, AX + XORL $0xffffffff, AX + JNE diff64_avx2 + + ADDQ $64, SI + ADDQ $64, DI + SUBQ $64, R8 + CMPQ R8, $64 + JB big_loop_avx2_exit + JMP big_loop_avx2 + + // Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk. +diff32_avx2: + VZEROUPPER + JMP diff16 + + // Same as diff32_avx2, but for last 32 bytes. +diff64_avx2: + VZEROUPPER + JMP diff48 + + // For <64 bytes remainder jump to normal loop. +big_loop_avx2_exit: + VZEROUPPER + JMP loop diff --git a/src/internal/bytealg/compare_amd64p32.s b/src/internal/bytealg/compare_amd64p32.s new file mode 100644 index 0000000000..0f23147338 --- /dev/null +++ b/src/internal/bytealg/compare_amd64p32.s @@ -0,0 +1,151 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT,$0-28 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+12(FP), DI + MOVL b_len+16(FP), DX + CALL cmpbody<>(SB) + MOVL AX, ret+24(FP) + RET + +TEXT bytes·Compare(SB),NOSPLIT,$0-28 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+12(FP), DI + MOVL b_len+16(FP), DX + CALL cmpbody<>(SB) + MOVL AX, ret+24(FP) + RET + +TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 + MOVL a_base+0(FP), SI + MOVL a_len+4(FP), BX + MOVL b_base+8(FP), DI + MOVL b_len+12(FP), DX + CALL cmpbody<>(SB) + MOVL AX, ret+16(FP) + RET + +// input: +// SI = a +// DI = b +// BX = alen +// DX = blen +// output: +// AX = 1/0/-1 +TEXT cmpbody<>(SB),NOSPLIT,$0-0 + CMPQ SI, DI + JEQ allsame + CMPQ BX, DX + MOVQ DX, R8 + CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare + CMPQ R8, $8 + JB small + +loop: + CMPQ R8, $16 + JBE _0through16 + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX // convert EQ to NE + JNE diff16 // branch if at least one byte is not equal + ADDQ $16, SI + ADDQ $16, DI + SUBQ $16, R8 + JMP loop + + // AX = bit mask of differences +diff16: + BSFQ AX, BX // index of first byte that differs + XORQ AX, AX + ADDQ BX, SI + MOVB (SI), CX + ADDQ BX, DI + CMPB CX, (DI) + SETHI AX + LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 + RET + + // 0 through 16 bytes left, alen>=8, blen>=8 +_0through16: + CMPQ R8, $8 + JBE _0through8 + MOVQ (SI), AX + MOVQ (DI), CX + CMPQ AX, CX + JNE diff8 +_0through8: + ADDQ R8, SI + ADDQ R8, DI + MOVQ -8(SI), AX + MOVQ -8(DI), CX + CMPQ AX, CX + JEQ allsame + + // AX and CX contain parts of a and b that differ. +diff8: + BSWAPQ AX // reverse order of bytes + BSWAPQ CX + XORQ AX, CX + BSRQ CX, CX // index of highest bit difference + SHRQ CX, AX // move a's bit to bottom + ANDQ $1, AX // mask bit + LEAQ -1(AX*2), AX // 1/0 => +1/-1 + RET + + // 0-7 bytes in common +small: + LEAQ (R8*8), CX // bytes left -> bits left + NEGQ CX // - bits lift (== 64 - bits left mod 64) + JEQ allsame + + // load bytes of a into high bytes of AX + CMPB SI, $0xf8 + JA si_high + MOVQ (SI), SI + JMP si_finish +si_high: + ADDQ R8, SI + MOVQ -8(SI), SI + SHRQ CX, SI +si_finish: + SHLQ CX, SI + + // load bytes of b in to high bytes of BX + CMPB DI, $0xf8 + JA di_high + MOVQ (DI), DI + JMP di_finish +di_high: + ADDQ R8, DI + MOVQ -8(DI), DI + SHRQ CX, DI +di_finish: + SHLQ CX, DI + + BSWAPQ SI // reverse order of bytes + BSWAPQ DI + XORQ SI, DI // find bit differences + JEQ allsame + BSRQ DI, CX // index of highest bit difference + SHRQ CX, SI // move a's bit to bottom + ANDQ $1, SI // mask bit + LEAQ -1(SI*2), AX // 1/0 => +1/-1 + RET + +allsame: + XORQ AX, AX + XORQ CX, CX + CMPQ BX, DX + SETGT AX // 1 if alen > blen + SETEQ CX // 1 if alen == blen + LEAQ -1(CX)(AX*2), AX // 1,0,-1 result + RET diff --git a/src/internal/bytealg/compare_arm.s b/src/internal/bytealg/compare_arm.s new file mode 100644 index 0000000000..72cae3309c --- /dev/null +++ b/src/internal/bytealg/compare_arm.s @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-28 + MOVW a_base+0(FP), R2 + MOVW a_len+4(FP), R0 + MOVW b_base+12(FP), R3 + MOVW b_len+16(FP), R1 + ADD $28, R13, R7 + B cmpbody<>(SB) + +TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-28 + MOVW a_base+0(FP), R2 + MOVW a_len+4(FP), R0 + MOVW b_base+12(FP), R3 + MOVW b_len+16(FP), R1 + ADD $28, R13, R7 + B cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20 + MOVW a_base+0(FP), R2 + MOVW a_len+4(FP), R0 + MOVW b_base+8(FP), R3 + MOVW b_len+12(FP), R1 + ADD $20, R13, R7 + B cmpbody<>(SB) + +// On entry: +// R0 is the length of a +// R1 is the length of b +// R2 points to the start of a +// R3 points to the start of b +// R7 points to return value (-1/0/1 will be written here) +// +// On exit: +// R4, R5, and R6 are clobbered +TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 + CMP R2, R3 + BEQ samebytes + CMP R0, R1 + MOVW R0, R6 + MOVW.LT R1, R6 // R6 is min(R0, R1) + + ADD R2, R6 // R2 is current byte in a, R6 is last byte in a to compare +loop: + CMP R2, R6 + BEQ samebytes // all compared bytes were the same; compare lengths + MOVBU.P 1(R2), R4 + MOVBU.P 1(R3), R5 + CMP R4, R5 + BEQ loop + // bytes differed + MOVW.LT $1, R0 + MOVW.GT $-1, R0 + MOVW R0, (R7) + RET +samebytes: + CMP R0, R1 + MOVW.LT $1, R0 + MOVW.GT $-1, R0 + MOVW.EQ $0, R0 + MOVW R0, (R7) + RET diff --git a/src/internal/bytealg/compare_arm64.s b/src/internal/bytealg/compare_arm64.s new file mode 100644 index 0000000000..9b6354715a --- /dev/null +++ b/src/internal/bytealg/compare_arm64.s @@ -0,0 +1,66 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R0 + MOVD b_base+24(FP), R3 + MOVD b_len+32(FP), R1 + ADD $56, RSP, R7 + B cmpbody<>(SB) + +TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R0 + MOVD b_base+24(FP), R3 + MOVD b_len+32(FP), R1 + ADD $56, RSP, R7 + B cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R0 + MOVD b_base+16(FP), R3 + MOVD b_len+24(FP), R1 + ADD $40, RSP, R7 + B cmpbody<>(SB) + +// On entry: +// R0 is the length of a +// R1 is the length of b +// R2 points to the start of a +// R3 points to the start of b +// R7 points to return value (-1/0/1 will be written here) +// +// On exit: +// R4, R5, and R6 are clobbered +TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 + CMP R2, R3 + BEQ samebytes // same starting pointers; compare lengths + CMP R0, R1 + CSEL LT, R1, R0, R6 // R6 is min(R0, R1) + + ADD R2, R6 // R2 is current byte in a, R6 is last byte in a to compare +loop: + CMP R2, R6 + BEQ samebytes // all compared bytes were the same; compare lengths + MOVBU.P 1(R2), R4 + MOVBU.P 1(R3), R5 + CMP R4, R5 + BEQ loop + // bytes differed + MOVD $1, R4 + CSNEG LT, R4, R4, R4 + MOVD R4, (R7) + RET +samebytes: + MOVD $1, R4 + CMP R0, R1 + CSNEG LT, R4, R4, R4 + CSEL EQ, ZR, R4, R4 + MOVD R4, (R7) + RET diff --git a/src/internal/bytealg/compare_generic.go b/src/internal/bytealg/compare_generic.go new file mode 100644 index 0000000000..69610517b2 --- /dev/null +++ b/src/internal/bytealg/compare_generic.go @@ -0,0 +1,88 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle + +package bytealg + +import _ "unsafe" // for go:linkname + +func Compare(a, b []byte) int { + l := len(a) + if len(b) < l { + l = len(b) + } + if l == 0 || &a[0] == &b[0] { + goto samebytes + } + for i := 0; i < l; i++ { + c1, c2 := a[i], b[i] + if c1 < c2 { + return -1 + } + if c1 > c2 { + return +1 + } + } +samebytes: + if len(a) < len(b) { + return -1 + } + if len(a) > len(b) { + return +1 + } + return 0 +} + +//go:linkname bytes_Compare bytes.Compare +func bytes_Compare(a, b []byte) int { + l := len(a) + if len(b) < l { + l = len(b) + } + if l == 0 || &a[0] == &b[0] { + goto samebytes + } + for i := 0; i < l; i++ { + c1, c2 := a[i], b[i] + if c1 < c2 { + return -1 + } + if c1 > c2 { + return +1 + } + } +samebytes: + if len(a) < len(b) { + return -1 + } + if len(a) > len(b) { + return +1 + } + return 0 +} + +//go:linkname runtime_cmpstring runtime.cmpstring +func runtime_cmpstring(a, b string) int { + l := len(a) + if len(b) < l { + l = len(b) + } + for i := 0; i < l; i++ { + c1, c2 := a[i], b[i] + if c1 < c2 { + return -1 + } + if c1 > c2 { + return +1 + } + } + if len(a) < len(b) { + return -1 + } + if len(a) > len(b) { + return +1 + } + return 0 +} diff --git a/src/internal/bytealg/compare_mipsx.s b/src/internal/bytealg/compare_mipsx.s new file mode 100644 index 0000000000..b8e225ea85 --- /dev/null +++ b/src/internal/bytealg/compare_mipsx.s @@ -0,0 +1,104 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips mipsle + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT,$0-28 + MOVW a_base+0(FP), R3 + MOVW b_base+12(FP), R4 + MOVW a_len+4(FP), R1 + MOVW b_len+16(FP), R2 + BEQ R3, R4, samebytes + SGTU R1, R2, R7 + MOVW R1, R8 + CMOVN R7, R2, R8 // R8 is min(R1, R2) + + ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare +loop: + BEQ R3, R8, samebytes + + MOVBU (R3), R6 + ADDU $1, R3 + MOVBU (R4), R7 + ADDU $1, R4 + BEQ R6, R7 , loop + + SGTU R6, R7, R8 + MOVW $-1, R6 + CMOVZ R8, R6, R8 + JMP cmp_ret +samebytes: + SGTU R1, R2, R6 + SGTU R2, R1, R7 + SUBU R7, R6, R8 +cmp_ret: + MOVW R8, ret+24(FP) + RET + +TEXT bytes·Compare(SB),NOSPLIT,$0-28 + MOVW a_base+0(FP), R3 + MOVW b_base+12(FP), R4 + MOVW a_len+4(FP), R1 + MOVW b_len+16(FP), R2 + BEQ R3, R4, samebytes + SGTU R1, R2, R7 + MOVW R1, R8 + CMOVN R7, R2, R8 // R8 is min(R1, R2) + + ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare +loop: + BEQ R3, R8, samebytes + + MOVBU (R3), R6 + ADDU $1, R3 + MOVBU (R4), R7 + ADDU $1, R4 + BEQ R6, R7 , loop + + SGTU R6, R7, R8 + MOVW $-1, R6 + CMOVZ R8, R6, R8 + JMP cmp_ret +samebytes: + SGTU R1, R2, R6 + SGTU R2, R1, R7 + SUBU R7, R6, R8 +cmp_ret: + MOVW R8, ret+24(FP) + RET + +TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 + MOVW a_base+0(FP), R3 + MOVW a_len+4(FP), R1 + MOVW b_base+8(FP), R4 + MOVW b_len+12(FP), R2 + BEQ R3, R4, samebytes + SGTU R1, R2, R7 + MOVW R1, R8 + CMOVN R7, R2, R8 // R8 is min(R1, R2) + + ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare +loop: + BEQ R3, R8, samebytes // all compared bytes were the same; compare lengths + + MOVBU (R3), R6 + ADDU $1, R3 + MOVBU (R4), R7 + ADDU $1, R4 + BEQ R6, R7 , loop + // bytes differed + SGTU R6, R7, R8 + MOVW $-1, R6 + CMOVZ R8, R6, R8 + JMP cmp_ret +samebytes: + SGTU R1, R2, R6 + SGTU R2, R1, R7 + SUBU R7, R6, R8 +cmp_ret: + MOVW R8, ret+16(FP) + RET diff --git a/src/internal/bytealg/compare_native.go b/src/internal/bytealg/compare_native.go new file mode 100644 index 0000000000..8ffade55be --- /dev/null +++ b/src/internal/bytealg/compare_native.go @@ -0,0 +1,10 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle + +package bytealg + +//go:noescape +func Compare(a, b []byte) int diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s new file mode 100644 index 0000000000..9b13c9a14d --- /dev/null +++ b/src/internal/bytealg/compare_ppc64x.s @@ -0,0 +1,307 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ppc64 ppc64le + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R5 + MOVD b_base+24(FP), R6 + MOVD a_len+8(FP), R3 + CMP R5,R6,CR7 + MOVD b_len+32(FP), R4 + MOVD $ret+48(FP), R7 + CMP R3,R4,CR6 + BEQ CR7,equal + +#ifdef GOARCH_ppc64le + BR cmpbodyLE<>(SB) +#else + BR cmpbodyBE<>(SB) +#endif + +equal: + BEQ CR6,done + MOVD $1, R8 + BGT CR6,greater + NEG R8 + +greater: + MOVD R8, (R7) + RET + +done: + MOVD $0, (R7) + RET + +TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R5 + MOVD b_base+24(FP), R6 + MOVD a_len+8(FP), R3 + CMP R5,R6,CR7 + MOVD b_len+32(FP), R4 + MOVD $ret+48(FP), R7 + CMP R3,R4,CR6 + BEQ CR7,equal + +#ifdef GOARCH_ppc64le + BR cmpbodyLE<>(SB) +#else + BR cmpbodyBE<>(SB) +#endif + +equal: + BEQ CR6,done + MOVD $1, R8 + BGT CR6,greater + NEG R8 + +greater: + MOVD R8, (R7) + RET + +done: + MOVD $0, (R7) + RET + +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 + MOVD a_base+0(FP), R5 + MOVD b_base+16(FP), R6 + MOVD a_len+8(FP), R3 + CMP R5,R6,CR7 + MOVD b_len+24(FP), R4 + MOVD $ret+32(FP), R7 + CMP R3,R4,CR6 + BEQ CR7,equal + +#ifdef GOARCH_ppc64le + BR cmpbodyLE<>(SB) +#else + BR cmpbodyBE<>(SB) +#endif + +equal: + BEQ CR6,done + MOVD $1, R8 + BGT CR6,greater + NEG R8 + +greater: + MOVD R8, (R7) + RET + +done: + MOVD $0, (R7) + RET + +// Do an efficient memcmp for ppc64le +// R3 = a len +// R4 = b len +// R5 = a addr +// R6 = b addr +// R7 = addr of return value +TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 + MOVD R3,R8 // set up length + CMP R3,R4,CR2 // unequal? + BC 12,8,setuplen // BLT CR2 + MOVD R4,R8 // use R4 for comparison len +setuplen: + MOVD R8,CTR // set up loop counter + CMP R8,$8 // only optimize >=8 + BLT simplecheck + DCBT (R5) // cache hint + DCBT (R6) + CMP R8,$32 // optimize >= 32 + MOVD R8,R9 + BLT setup8a // 8 byte moves only +setup32a: + SRADCC $5,R8,R9 // number of 32 byte chunks + MOVD R9,CTR + + // Special processing for 32 bytes or longer. + // Loading this way is faster and correct as long as the + // doublewords being compared are equal. Once they + // are found unequal, reload them in proper byte order + // to determine greater or less than. +loop32a: + MOVD 0(R5),R9 // doublewords to compare + MOVD 0(R6),R10 // get 4 doublewords + MOVD 8(R5),R14 + MOVD 8(R6),R15 + CMPU R9,R10 // bytes equal? + MOVD $0,R16 // set up for cmpne + BNE cmpne // further compare for LT or GT + MOVD 16(R5),R9 // get next pair of doublewords + MOVD 16(R6),R10 + CMPU R14,R15 // bytes match? + MOVD $8,R16 // set up for cmpne + BNE cmpne // further compare for LT or GT + MOVD 24(R5),R14 // get next pair of doublewords + MOVD 24(R6),R15 + CMPU R9,R10 // bytes match? + MOVD $16,R16 // set up for cmpne + BNE cmpne // further compare for LT or GT + MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 + ADD $32,R5 // bump up to next 32 + ADD $32,R6 + CMPU R14,R15 // bytes match? + BC 8,2,loop32a // br ctr and cr + BNE cmpne + ANDCC $24,R8,R9 // Any 8 byte chunks? + BEQ leftover // and result is 0 +setup8a: + SRADCC $3,R9,R9 // get the 8 byte count + BEQ leftover // shifted value is 0 + MOVD R9,CTR // loop count for doublewords +loop8: + MOVDBR (R5+R0),R9 // doublewords to compare + MOVDBR (R6+R0),R10 // LE compare order + ADD $8,R5 + ADD $8,R6 + CMPU R9,R10 // match? + BC 8,2,loop8 // bt ctr <> 0 && cr + BGT greater + BLT less +leftover: + ANDCC $7,R8,R9 // check for leftover bytes + MOVD R9,CTR // save the ctr + BNE simple // leftover bytes + BC 12,10,equal // test CR2 for length comparison + BC 12,8,less + BR greater +simplecheck: + CMP R8,$0 // remaining compare length 0 + BNE simple // do simple compare + BC 12,10,equal // test CR2 for length comparison + BC 12,8,less // 1st len < 2nd len, result less + BR greater // 1st len > 2nd len must be greater +simple: + MOVBZ 0(R5), R9 // get byte from 1st operand + ADD $1,R5 + MOVBZ 0(R6), R10 // get byte from 2nd operand + ADD $1,R6 + CMPU R9, R10 + BC 8,2,simple // bc ctr <> 0 && cr + BGT greater // 1st > 2nd + BLT less // 1st < 2nd + BC 12,10,equal // test CR2 for length comparison + BC 12,9,greater // 2nd len > 1st len + BR less // must be less +cmpne: // only here is not equal + MOVDBR (R5+R16),R8 // reload in reverse order + MOVDBR (R6+R16),R9 + CMPU R8,R9 // compare correct endianness + BGT greater // here only if NE +less: + MOVD $-1,R3 + MOVD R3,(R7) // return value if A < B + RET +equal: + MOVD $0,(R7) // return value if A == B + RET +greater: + MOVD $1,R3 + MOVD R3,(R7) // return value if A > B + RET + +// Do an efficient memcmp for ppc64 (BE) +// R3 = a len +// R4 = b len +// R5 = a addr +// R6 = b addr +// R7 = addr of return value +TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 + MOVD R3,R8 // set up length + CMP R3,R4,CR2 // unequal? + BC 12,8,setuplen // BLT CR2 + MOVD R4,R8 // use R4 for comparison len +setuplen: + MOVD R8,CTR // set up loop counter + CMP R8,$8 // only optimize >=8 + BLT simplecheck + DCBT (R5) // cache hint + DCBT (R6) + CMP R8,$32 // optimize >= 32 + MOVD R8,R9 + BLT setup8a // 8 byte moves only + +setup32a: + SRADCC $5,R8,R9 // number of 32 byte chunks + MOVD R9,CTR +loop32a: + MOVD 0(R5),R9 // doublewords to compare + MOVD 0(R6),R10 // get 4 doublewords + MOVD 8(R5),R14 + MOVD 8(R6),R15 + CMPU R9,R10 // bytes equal? + BLT less // found to be less + BGT greater // found to be greater + MOVD 16(R5),R9 // get next pair of doublewords + MOVD 16(R6),R10 + CMPU R14,R15 // bytes match? + BLT less // found less + BGT greater // found greater + MOVD 24(R5),R14 // get next pair of doublewords + MOVD 24(R6),R15 + CMPU R9,R10 // bytes match? + BLT less // found to be less + BGT greater // found to be greater + ADD $32,R5 // bump up to next 32 + ADD $32,R6 + CMPU R14,R15 // bytes match? + BC 8,2,loop32a // br ctr and cr + BLT less // with BE, byte ordering is + BGT greater // good for compare + ANDCC $24,R8,R9 // Any 8 byte chunks? + BEQ leftover // and result is 0 +setup8a: + SRADCC $3,R9,R9 // get the 8 byte count + BEQ leftover // shifted value is 0 + MOVD R9,CTR // loop count for doublewords +loop8: + MOVD (R5),R9 + MOVD (R6),R10 + ADD $8,R5 + ADD $8,R6 + CMPU R9,R10 // match? + BC 8,2,loop8 // bt ctr <> 0 && cr + BGT greater + BLT less +leftover: + ANDCC $7,R8,R9 // check for leftover bytes + MOVD R9,CTR // save the ctr + BNE simple // leftover bytes + BC 12,10,equal // test CR2 for length comparison + BC 12,8,less + BR greater +simplecheck: + CMP R8,$0 // remaining compare length 0 + BNE simple // do simple compare + BC 12,10,equal // test CR2 for length comparison + BC 12,8,less // 1st len < 2nd len, result less + BR greater // same len, must be equal +simple: + MOVBZ 0(R5),R9 // get byte from 1st operand + ADD $1,R5 + MOVBZ 0(R6),R10 // get byte from 2nd operand + ADD $1,R6 + CMPU R9,R10 + BC 8,2,simple // bc ctr <> 0 && cr + BGT greater // 1st > 2nd + BLT less // 1st < 2nd + BC 12,10,equal // test CR2 for length comparison + BC 12,9,greater // 2nd len > 1st len +less: + MOVD $-1,R3 + MOVD R3,(R7) // return value if A < B + RET +equal: + MOVD $0,(R7) // return value if A == B + RET +greater: + MOVD $1,R3 + MOVD R3,(R7) // return value if A > B + RET diff --git a/src/internal/bytealg/compare_s390x.s b/src/internal/bytealg/compare_s390x.s new file mode 100644 index 0000000000..7f27b08c0e --- /dev/null +++ b/src/internal/bytealg/compare_s390x.s @@ -0,0 +1,75 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R3 + MOVD a_len+8(FP), R4 + MOVD b_base+24(FP), R5 + MOVD b_len+32(FP), R6 + LA ret+48(FP), R7 + BR cmpbody<>(SB) + +TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 + MOVD a_base+0(FP), R3 + MOVD a_len+8(FP), R4 + MOVD b_base+24(FP), R5 + MOVD b_len+32(FP), R6 + LA ret+48(FP), R7 + BR cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 + MOVD a_base+0(FP), R3 + MOVD a_len+8(FP), R4 + MOVD b_base+16(FP), R5 + MOVD b_len+24(FP), R6 + LA ret+32(FP), R7 + BR cmpbody<>(SB) + +// input: +// R3 = a +// R4 = alen +// R5 = b +// R6 = blen +// R7 = address of output word (stores -1/0/1 here) +TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 + CMPBEQ R3, R5, cmplengths + MOVD R4, R8 + CMPBLE R4, R6, amin + MOVD R6, R8 +amin: + CMPBEQ R8, $0, cmplengths + CMP R8, $256 + BLE tail +loop: + CLC $256, 0(R3), 0(R5) + BGT gt + BLT lt + SUB $256, R8 + CMP R8, $256 + BGT loop +tail: + SUB $1, R8 + EXRL $cmpbodyclc<>(SB), R8 + BGT gt + BLT lt +cmplengths: + CMP R4, R6 + BEQ eq + BLT lt +gt: + MOVD $1, 0(R7) + RET +lt: + MOVD $-1, 0(R7) + RET +eq: + MOVD $0, 0(R7) + RET + +TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 + CLC $1, 0(R3), 0(R5) + RET |
