aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2023-08-01 14:32:56 -0700
committerKeith Randall <khr@google.com>2023-08-04 16:40:24 +0000
commit611706b17136beb602711f7bfebd15622f73f58f (patch)
tree47a67335a6e838036b57134620e775a6b3ac6ee8 /test/codegen
parent51cb12e83b87a100d1df0d80f5176417e08872ea (diff)
downloadgo-611706b17136beb602711f7bfebd15622f73f58f.tar.xz
cmd/compile: don't use BTS when OR works, add direct memory BTS operations
Stop using BTSconst and friends when ORLconst can be used instead. OR can be issued by more function units than BTS can, so it could lead to better IPC. OR might take a few more bytes to encode, but not a lot more. Still use BTSconst for cases where the constant otherwise wouldn't fit and would require a separate movabs instruction to materialize the constant. This happens when setting bits 31-63 of 64-bit targets. Add BTS-to-memory operations so we don't need to load/bts/store. Fixes #61694 Change-Id: I00379608df8fb0167cb01466e97d11dec7c1596c Reviewed-on: https://go-review.googlesource.com/c/go/+/515755 Reviewed-by: Keith Randall <khr@google.com> Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/bits.go12
-rw-r--r--test/codegen/mathbits.go4
-rw-r--r--test/codegen/memops.go29
3 files changed, 37 insertions, 8 deletions
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index 018f5b909e..88d5ebe9cf 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) {
// amd64:"BTSL"
n += b | (1 << (a & 31))
- // amd64:"BTSL\t[$]31"
+ // amd64:"ORL\t[$]-2147483648"
n += a | (1 << 31)
- // amd64:"BTSL\t[$]28"
+ // amd64:"ORL\t[$]268435456"
n += a | (1 << 28)
// amd64:"ORL\t[$]1"
@@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) {
// amd64:"BTRL"
n += b &^ (1 << (a & 31))
- // amd64:"BTRL\t[$]31"
+ // amd64:"ANDL\t[$]2147483647"
n += a &^ (1 << 31)
- // amd64:"BTRL\t[$]28"
+ // amd64:"ANDL\t[$]-268435457"
n += a &^ (1 << 28)
// amd64:"ANDL\t[$]-2"
@@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) {
// amd64:"BTCL"
n += b ^ (1 << (a & 31))
- // amd64:"BTCL\t[$]31"
+ // amd64:"XORL\t[$]-2147483648"
n += a ^ (1 << 31)
- // amd64:"BTCL\t[$]28"
+ // amd64:"XORL\t[$]268435456"
n += a ^ (1 << 28)
// amd64:"XORL\t[$]1"
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 797aa23b67..d80bfaeec0 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int {
}
func TrailingZeros16(n uint16) int {
- // amd64:"BSFL","BTSL\\t\\$16"
+ // amd64:"BSFL","ORL\\t\\$65536"
// 386:"BSFL\t"
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
@@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int {
}
func TrailingZeros8(n uint8) int {
- // amd64:"BSFL","BTSL\\t\\$8"
+ // amd64:"BSFL","ORL\\t\\$256"
// 386:"BSFL"
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
index f6cf9450a1..e5e89c2acc 100644
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) {
// amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
a[3+i] = v&2 != 0
}
+
+func bitOps(p *[12]uint64) {
+ // amd64: `ORQ\t\$8, \(AX\)`
+ p[0] |= 8
+ // amd64: `ORQ\t\$1073741824, 8\(AX\)`
+ p[1] |= 1 << 30
+ // amd64: `BTSQ\t\$31, 16\(AX\)`
+ p[2] |= 1 << 31
+ // amd64: `BTSQ\t\$63, 24\(AX\)`
+ p[3] |= 1 << 63
+
+ // amd64: `ANDQ\t\$-9, 32\(AX\)`
+ p[4] &^= 8
+ // amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
+ p[5] &^= 1 << 30
+ // amd64: `BTRQ\t\$31, 48\(AX\)`
+ p[6] &^= 1 << 31
+ // amd64: `BTRQ\t\$63, 56\(AX\)`
+ p[7] &^= 1 << 63
+
+ // amd64: `XORQ\t\$8, 64\(AX\)`
+ p[8] ^= 8
+ // amd64: `XORQ\t\$1073741824, 72\(AX\)`
+ p[9] ^= 1 << 30
+ // amd64: `BTCQ\t\$31, 80\(AX\)`
+ p[10] ^= 1 << 31
+ // amd64: `BTCQ\t\$63, 88\(AX\)`
+ p[11] ^= 1 << 63
+}