aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2025-10-27 19:41:39 -0400
committerGopher Robot <gobot@golang.org>2025-10-30 08:04:20 -0700
commit1e5bb416d887b1cf2bd9b6a3b9f05222d44c3ffc (patch)
tree7b932789cf14f150fa27a07124dfb23a1251720a /test/codegen
parent38317c44e71478220f842c7efd8078215825af92 (diff)
downloadgo-1e5bb416d887b1cf2bd9b6a3b9f05222d44c3ffc.tar.xz
cmd/compile: implement bits.Mul64 on 32-bit systems
This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u on 32-bit systems, with the effect that constant division of both int64s and uint64s can now be emitted directly in all cases, and also that bits.Mul64 can be intrinsified on 32-bit systems. Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were implemented as uint32 divisions by c and some fixup. After expanding those smaller constant divisions, the code for i/999 required: (386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes) (arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes) (mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes) For that much code, we might as well use a full 64x64->128 multiply that can be used for all divisors, not just small ones. Having done that, the same i/999 now generates: (386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes) (arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes) (mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes) The size increase on 386 is due to a few extra register spills. The size increase on mips is due to add-with-carry being hard. The new approach is more general, letting us delete the old special case and guarantee that all int64 and uint64 divisions by constants are generated directly on 32-bit systems. This especially speeds up code making heavy use of bits.Mul64 with a constant argument, which happens in strconv and various crypto packages. A few examples are benchmarked below. pkg: cmd/compile/internal/test benchmark \ host local linux-amd64 s7 linux-386 s7:GOARCH=386 vs base vs base vs base vs base vs base DivconstI64 ~ ~ ~ -49.66% -21.02% ModconstI64 ~ ~ ~ -13.45% +14.52% DivisiblePow2constI64 ~ ~ ~ +0.97% -1.32% DivisibleconstI64 ~ ~ ~ -20.01% -48.28% DivisibleWDivconstI64 ~ ~ -1.76% -38.59% -42.74% DivconstU64/3 ~ ~ ~ -13.82% -4.09% DivconstU64/5 ~ ~ ~ -14.10% -3.54% DivconstU64/37 -2.07% -4.45% ~ -19.60% -9.55% DivconstU64/1234567 ~ ~ ~ -61.55% -56.93% ModconstU64 ~ ~ ~ -6.25% ~ DivisibleconstU64 ~ ~ ~ -2.78% -7.82% DivisibleWDivconstU64 ~ ~ ~ +4.23% +2.56% pkg: math/bits benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base Add ~ ~ ~ ~ Add32 +1.59% ~ ~ ~ Add64 ~ ~ ~ ~ Add64multiple ~ ~ ~ ~ Sub ~ ~ ~ ~ Sub32 ~ ~ ~ ~ Sub64 ~ ~ -9.20% ~ Sub64multiple ~ ~ ~ ~ Mul ~ ~ ~ ~ Mul32 ~ ~ ~ ~ Mul64 ~ ~ -41.58% -53.21% Div ~ ~ ~ ~ Div32 ~ ~ ~ ~ Div64 ~ ~ ~ ~ pkg: strconv benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base ParseInt/Pos/7bit ~ ~ -11.08% -6.75% ParseInt/Pos/26bit ~ ~ -13.65% -11.02% ParseInt/Pos/31bit ~ ~ -14.65% -9.71% ParseInt/Pos/56bit -1.80% ~ -17.97% -10.78% ParseInt/Pos/63bit ~ ~ -13.85% -9.63% ParseInt/Neg/7bit ~ ~ -12.14% -7.26% ParseInt/Neg/26bit ~ ~ -14.18% -9.81% ParseInt/Neg/31bit ~ ~ -14.51% -9.02% ParseInt/Neg/56bit ~ ~ -15.79% -9.79% ParseInt/Neg/63bit ~ ~ -15.68% -11.07% AppendFloat/Decimal ~ ~ -7.25% -12.26% AppendFloat/Float ~ ~ -15.96% -19.45% AppendFloat/Exp ~ ~ -13.96% -17.76% AppendFloat/NegExp ~ ~ -14.89% -20.27% AppendFloat/LongExp ~ ~ -12.68% -17.97% AppendFloat/Big ~ ~ -11.10% -16.64% AppendFloat/BinaryExp ~ ~ ~ ~ AppendFloat/32Integer ~ ~ -10.05% -10.91% AppendFloat/32ExactFraction ~ ~ -8.93% -13.00% AppendFloat/32Point ~ ~ -10.36% -14.89% AppendFloat/32Exp ~ ~ -9.88% -13.54% AppendFloat/32NegExp ~ ~ -10.16% -14.26% AppendFloat/32Shortest ~ ~ -11.39% -14.96% AppendFloat/32Fixed8Hard ~ ~ ~ -2.31% AppendFloat/32Fixed9Hard ~ ~ ~ -7.01% AppendFloat/64Fixed1 ~ ~ -2.83% -8.23% AppendFloat/64Fixed2 ~ ~ ~ -7.94% AppendFloat/64Fixed3 ~ ~ -4.07% -7.22% AppendFloat/64Fixed4 ~ ~ -7.24% -7.62% AppendFloat/64Fixed12 ~ ~ -6.57% -4.82% AppendFloat/64Fixed16 ~ ~ -4.00% -5.81% AppendFloat/64Fixed12Hard -2.22% ~ -4.07% -6.35% AppendFloat/64Fixed17Hard -2.12% ~ ~ -3.79% AppendFloat/64Fixed18Hard -1.89% ~ +2.48% ~ AppendFloat/Slowpath64 -1.85% ~ -14.49% -18.21% AppendFloat/SlowpathDenormal64 ~ ~ -13.08% -19.41% pkg: crypto/internal/fips140/nistec/fiat benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base Mul/P224 ~ ~ -29.95% -39.60% Mul/P384 ~ ~ -37.11% -63.33% Mul/P521 ~ ~ -26.62% -12.42% Square/P224 +1.46% ~ -40.62% -49.18% Square/P384 ~ ~ -45.51% -69.68% Square/P521 +90.37% ~ -25.26% -11.23% (The +90% is a separate problem and not real; that much variation can be seen on that system by running the same binary from two different files.) pkg: crypto/internal/fips140/edwards25519 benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base EncodingDecoding ~ ~ -34.67% -35.75% ScalarBaseMult ~ ~ -31.25% -30.29% ScalarMult ~ ~ -33.45% -32.54% VarTimeDoubleScalarBaseMult ~ ~ -33.78% -33.68% Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65 Reviewed-on: https://go-review.googlesource.com/c/go/+/716061 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/divmod.go75
1 files changed, 65 insertions, 10 deletions
diff --git a/test/codegen/divmod.go b/test/codegen/divmod.go
index 3a78180817..98d0852398 100644
--- a/test/codegen/divmod.go
+++ b/test/codegen/divmod.go
@@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 {
}
func div3_uint64(i uint64) uint64 {
- // 386 "CALL"
+ // 386: "MOVL [$]-1431655766"
+ // 386: "MULL"
+ // 386: "SHRL [$]1"
+ // 386 -".*CALL"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]1,"
@@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 {
}
func div14_uint64(i uint64) uint64 {
- // 386 "CALL"
+ // 386: "MOVL [$]-1840700270,"
+ // 386: "MULL"
+ // 386: "SHRL [$]2,"
+ // 386: -".*CALL"
// arm64: "MOVD [$]-7905747460161236406,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
@@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 {
}
func div7_uint64(i uint64) uint64 {
- // 386 "CALL"
+ // 386: "MOVL [$]-1840700269,"
+ // 386: "MULL"
+ // 386: "SHRL [$]2,"
+ // 386: -".*CALL"
// arm64: "MOVD [$]2635249153387078803,"
// arm64: "UMULH"
// arm64: "SUB",
@@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 {
}
func div12345_uint64(i uint64) uint64 {
- // 386 "CALL"
+ // 386: "MOVL [$]-1444876402,"
+ // 386: "MOVL [$]835683390,"
+ // 386: "MULL"
+ // 386: "SHRL [$]13,"
+ // 386: "SHLL [$]19,"
// arm64: "MOVD [$]-6205696892516465602,"
// arm64: "UMULH"
// arm64: "LSR [$]13,"
@@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool {
}
func divis6_int64(i int64) bool {
- // 386 "CALL"
+ // 386: "IMUL3L [$]-1431655766,"
+ // 386: "IMUL3L [$]-1431655765,"
+ // 386: "ADCL [$]715827882,"
+ // 386: "CMPL .*, [$]715827882"
+ // 386: "CMPL .*, [$]-1431655766"
+ // 386: "SETLS"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602,"
@@ -880,7 +898,12 @@ func divis6_int64(i int64) bool {
}
func ndivis6_int64(i int64) bool {
- // 386 "CALL"
+ // 386: "IMUL3L [$]-1431655766,"
+ // 386: "IMUL3L [$]-1431655765,"
+ // 386: "ADCL [$]715827882,"
+ // 386: "CMPL .*, [$]715827882"
+ // 386: "CMPL .*, [$]-1431655766"
+ // 386: "SETHI"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602,"
@@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) {
}
func div_divis6_uint64(i uint64) (uint64, bool) {
- // 386 "CALL"
+ // 386: "MOVL [$]-1431655766,"
+ // 386: "MOVL [$]-1431655765,"
+ // 386: "MULL"
+ // 386: "SHRL [$]2,"
+ // 386: "SHLL [$]30,"
+ // 386: "SETEQ"
+ // 386: -".*CALL"
+ // 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
@@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) {
}
func div_ndivis6_uint64(i uint64) (uint64, bool) {
- // 386 "CALL"
+ // 386: "MOVL [$]-1431655766,"
+ // 386: "MOVL [$]-1431655765,"
+ // 386: "MULL"
+ // 386: "SHRL [$]2,"
+ // 386: "SHLL [$]30,"
+ // 386: "SETNE"
+ // 386: -".*CALL"
+ // 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
@@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) {
}
func div_divis6_int64(i int64) (int64, bool) {
- // 386 "CALL"
+ // 386: "ANDL [$]-1431655766,"
+ // 386: "ANDL [$]-1431655765,"
+ // 386: "MOVL [$]-1431655766,"
+ // 386: "MOVL [$]-1431655765,"
+ // 386: "SUBL" "SBBL"
+ // 386: "MULL"
+ // 386: "SETEQ"
+ // 386: -"SET(LS|HI)"
+ // 386: -".*CALL"
+ // 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH"
// arm64: "ADD"
@@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) {
}
func div_ndivis6_int64(i int64) (int64, bool) {
- // 386 "CALL"
+ // 386: "ANDL [$]-1431655766,"
+ // 386: "ANDL [$]-1431655765,"
+ // 386: "MOVL [$]-1431655766,"
+ // 386: "MOVL [$]-1431655765,"
+ // 386: "SUBL" "SBBL"
+ // 386: "MULL"
+ // 386: "SETNE"
+ // 386: -"SET(LS|HI)"
+ // 386: -".*CALL"
+ // 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH"
// arm64: "ADD"