aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/asm/internal
diff options
context:
space:
mode:
authorfanzha02 <fannie.zhang@arm.com>2018-06-22 12:08:47 +0000
committerCherry Zhang <cherryyz@google.com>2018-10-22 14:23:23 +0000
commit86ce1cb060446bcee2454080c7f55e754dbf7f82 (patch)
tree055b4cdc60959e5ada4ebc4fbc0889b0689f1fff /src/cmd/asm/internal
parent1e8ecefcd50fe9e9ae86f7c3a7a4f918c5e8e0c4 (diff)
downloadgo-86ce1cb060446bcee2454080c7f55e754dbf7f82.tar.xz
cmd/internal/obj/arm64: reclassify 32-bit/64-bit constants
Current assembler saves constants in Offset which type is int64, causing 32-bit constants have a incorrect class. This CL reclassifies constants when opcodes are 32-bit variant, like MOVW, ANDW and ADDW, etc. Besides, this CL encodes some constants of ADDCON class as MOVs instructions. This CL changes the assembler behavior as follows. 1. go assembler ADDW $MOVCON, Rn, Rd previous version: MOVD $MOVCON, Rtmp; ADDW Rtmp, Rn, Rd current version: MOVW $MOVCON, Rtmp; ADDW Rtmp, Rn, Rd 2. go assembly MOVW $0xaaaaffff, R1 previous version: treats $0xaaaaffff as VCON, encodes it as MOVW 0x994, R1 (loads it from pool). current version: treats $0xaaaaffff as MOVCON, and encodes it into MOVW instructions. 3. go assembly MOVD $0x210000, R1 previous version: treats $0x210000 as ADDCON, loads it from pool current version: treats $0x210000 as MOVCON, and encodes it into MOVD instructions. Add the test cases. 1. Binary size before/after. binary size change pkg/linux_arm64 -1.534KB pkg/tool/linux_arm64 -0.718KB go -0.32KB gofmt no change 2. go1 benchmark result. name old time/op new time/op delta BinaryTree17-8 6.26s ± 1% 6.28s ± 1% ~ (p=0.105 n=10+10) Fannkuch11-8 5.40s ± 0% 5.39s ± 0% -0.29% (p=0.028 n=9+10) FmtFprintfEmpty-8 94.5ns ± 0% 95.0ns ± 0% +0.51% (p=0.000 n=10+9) FmtFprintfString-8 163ns ± 1% 159ns ± 1% -2.06% (p=0.000 n=10+9) FmtFprintfInt-8 200ns ± 1% 196ns ± 1% -1.99% (p=0.000 n=9+10) FmtFprintfIntInt-8 292ns ± 3% 284ns ± 1% -2.87% (p=0.001 n=10+9) FmtFprintfPrefixedInt-8 422ns ± 1% 420ns ± 1% -0.59% (p=0.015 n=10+10) FmtFprintfFloat-8 458ns ± 0% 463ns ± 1% +1.19% (p=0.000 n=9+10) FmtManyArgs-8 1.37µs ± 1% 1.35µs ± 1% -1.85% (p=0.000 n=10+10) GobDecode-8 15.5ms ± 1% 15.3ms ± 1% -1.82% (p=0.000 n=10+10) GobEncode-8 11.7ms ± 5% 11.7ms ± 2% ~ (p=0.549 n=10+9) Gzip-8 622ms ± 0% 624ms ± 0% +0.23% (p=0.000 n=10+9) Gunzip-8 73.6ms ± 0% 73.8ms ± 1% ~ (p=0.077 n=9+9) HTTPClientServer-8 115µs ± 1% 115µs ± 1% ~ (p=0.796 n=10+10) JSONEncode-8 31.1ms ± 2% 28.7ms ± 1% -7.98% (p=0.000 n=10+9) JSONDecode-8 145ms ± 0% 145ms ± 1% ~ (p=0.447 n=9+10) Mandelbrot200-8 9.67ms ± 0% 9.60ms ± 0% -0.76% (p=0.000 n=9+9) GoParse-8 7.56ms ± 1% 7.58ms ± 0% +0.21% (p=0.035 n=10+9) RegexpMatchEasy0_32-8 208ns ±10% 222ns ± 0% ~ (p=0.531 n=10+6) RegexpMatchEasy0_1K-8 699ns ± 4% 694ns ± 4% ~ (p=0.868 n=10+10) RegexpMatchEasy1_32-8 186ns ± 8% 190ns ±12% ~ (p=0.955 n=10+10) RegexpMatchEasy1_1K-8 1.13µs ± 1% 1.05µs ± 2% -6.64% (p=0.000 n=10+10) RegexpMatchMedium_32-8 316ns ± 7% 288ns ± 1% -8.68% (p=0.000 n=10+7) RegexpMatchMedium_1K-8 90.2µs ± 0% 85.5µs ± 2% -5.19% (p=0.000 n=10+10) RegexpMatchHard_32-8 5.53µs ± 0% 3.90µs ± 0% -29.52% (p=0.000 n=10+10) RegexpMatchHard_1K-8 119µs ± 0% 124µs ± 0% +4.29% (p=0.000 n=9+10) Revcomp-8 1.07s ± 0% 1.07s ± 0% ~ (p=0.094 n=9+9) Template-8 162ms ± 1% 160ms ± 2% ~ (p=0.089 n=10+10) TimeParse-8 756ns ± 2% 763ns ± 1% ~ (p=0.158 n=10+10) TimeFormat-8 758ns ± 1% 746ns ± 1% -1.52% (p=0.000 n=10+10) name old speed new speed delta GobDecode-8 49.4MB/s ± 1% 50.3MB/s ± 1% +1.84% (p=0.000 n=10+10) GobEncode-8 65.6MB/s ± 5% 65.4MB/s ± 2% ~ (p=0.549 n=10+9) Gzip-8 31.2MB/s ± 0% 31.1MB/s ± 0% -0.24% (p=0.000 n=9+9) Gunzip-8 264MB/s ± 0% 263MB/s ± 1% ~ (p=0.073 n=9+9) JSONEncode-8 62.3MB/s ± 2% 67.7MB/s ± 1% +8.67% (p=0.000 n=10+9) JSONDecode-8 13.4MB/s ± 0% 13.4MB/s ± 1% ~ (p=0.508 n=9+10) GoParse-8 7.66MB/s ± 1% 7.64MB/s ± 0% -0.23% (p=0.049 n=10+9) RegexpMatchEasy0_32-8 154MB/s ± 9% 143MB/s ± 3% ~ (p=0.303 n=10+7) RegexpMatchEasy0_1K-8 1.46GB/s ± 4% 1.47GB/s ± 4% ~ (p=0.912 n=10+10) RegexpMatchEasy1_32-8 172MB/s ± 9% 170MB/s ±12% ~ (p=0.971 n=10+10) RegexpMatchEasy1_1K-8 908MB/s ± 1% 972MB/s ± 2% +7.12% (p=0.000 n=10+10) RegexpMatchMedium_32-8 3.17MB/s ± 7% 3.46MB/s ± 1% +9.14% (p=0.000 n=10+7) RegexpMatchMedium_1K-8 11.3MB/s ± 0% 12.0MB/s ± 2% +5.51% (p=0.000 n=10+10) RegexpMatchHard_32-8 5.78MB/s ± 0% 8.21MB/s ± 0% +41.93% (p=0.000 n=9+10) RegexpMatchHard_1K-8 8.62MB/s ± 0% 8.27MB/s ± 0% -4.11% (p=0.000 n=9+10) Revcomp-8 237MB/s ± 0% 237MB/s ± 0% ~ (p=0.081 n=9+9) Template-8 12.0MB/s ± 1% 12.1MB/s ± 2% ~ (p=0.072 n=10+10) Change-Id: I080801f520366b42d5f9699954bd33106976a81b Reviewed-on: https://go-review.googlesource.com/c/120661 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/asm/internal')
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s30
1 files changed, 29 insertions, 1 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 079c620f9b..12c7adbd04 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -191,6 +191,11 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc
FMOVD F4, (R2)(R6<<3) // 447826fc
+ CMPW $40960, R0 // 1f284071
+ CMPW $27745, R2 // 3b8c8d525f001b6b
+ CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
+ CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
+ ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
// LTYPE1 imsr ',' spreg ','
// {
// outcode($1, &$2, $4, &nullgen);
@@ -226,6 +231,16 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea
BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea
+ EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2
+ TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72
+ ANDS $0xffff, R2 // ANDS $65535, R2 // 423c40f2
+ AND $0x7fffffff, R3 // AND $2147483647, R3 // 63784092
+ ANDS $0x0ffffffff80000000, R2 // ANDS $-2147483648, R2 // 428061f2
+ AND $0xfffff, R2 // AND $1048575, R2 // 424c4092
+ ANDW $0xf00fffff, R1 // ANDW $4027580415, R1 // 215c0412
+ ANDSW $0xff00ffff, R1 // ANDSW $4278255615, R1 // 215c0872
+ TSTW $0xff00ff, R1 // TSTW $16711935, R1 // 3f9c0072
+
AND $8, R0, RSP // 1f007d92
ORR $8, R0, RSP // 1f007db2
EOR $8, R0, RSP // 1f007dd2
@@ -233,6 +248,19 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
ORN $8, R0, RSP // 1ff87cb2
EON $8, R0, RSP // 1ff87cd2
+ MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2
+ MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552
+ MOVW $0xaaaaffff, R1 // MOVW $2863333375, R1 // a1aaaa12
+ MOVW $0xaaaa, R1 // MOVW $43690, R1 // 41559552
+ MOVW $0xffffaaaa, R1 // MOVW $4294945450, R1 // a1aa8a12
+ MOVW $0xffff0000, R1 // MOVW $4294901760, R1 // e1ffbf52
+ MOVD $0xffff00000000000, R1 // MOVD $1152903912420802560, R1 // e13f54b2
+ MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
+ MOVD $0, R1 // 010080d2
+ MOVD $-1, R1 // 01008092
+ MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2
+ MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92
+
//
// CLS
//
@@ -428,7 +456,7 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
CMP R22.SXTX, RSP // ffe336eb
CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb
- CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a4d2ff633b6b
+ CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b
// TST
TST $15, R2 // 5f0c40f2