diff options
| author | Jorropo <jorropo.pgm@gmail.com> | 2025-10-26 22:19:30 +0100 |
|---|---|---|
| committer | Jorropo <jorropo.pgm@gmail.com> | 2025-10-27 23:29:19 -0700 |
| commit | 2c91c33e88c68a5f6cc2f10296698faa305f6267 (patch) | |
| tree | 8bd51c8b338febd568b705ca5209481f9f55ea69 /src/crypto | |
| parent | 73d7635fae502f63a3774e1265f739bff8778113 (diff) | |
| download | go-2c91c33e88c68a5f6cc2f10296698faa305f6267.tar.xz | |
crypto/subtle,cmd/compile: add intrinsics for ConstantTimeSelect and *Eq
Targeting crypto/subtle rather than
crypto/internal/fips140/subtle after discussion with Filippo.
goos: linux
goarch: amd64
pkg: crypto/subtle
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
ConstantTimeSelect-12 0.5246n ± 1% 0.5217n ± 2% ~ (p=0.118 n=10)
ConstantTimeByteEq-12 1.0415n ± 1% 0.5202n ± 2% -50.05% (p=0.000 n=10)
ConstantTimeEq-12 0.7813n ± 2% 0.7819n ± 0% ~ (p=0.897 n=10)
ConstantTimeLessOrEq-12 1.0415n ± 3% 0.7813n ± 1% -24.98% (p=0.000 n=10)
geomean 0.8166n 0.6381n -21.86%
The last three will become 1 lat-cycle (0.25ns) faster once #76066 is fixed.
The Select being that fast with the old code is really impressive.
I am pretty sure this happens because my CPU has BMI1&2 support and
a fusing unit able to translate non BMI code into BMI code.
This benchmark doesn't capture the CACHE gains from the shorter assembly.
It currently compiles as:
v17 = TESTQ <flags> v31 v31 // v != 0
v20 = CMOVQNE <int> v32 v33 v17 (y[int])
It is possible to remove the `TESTQ` by compiletime fusing it with the
compare in a pattern like this:
subtle.ConstantTimeSelect(subtle.ConstantTimeLessOrEq(left, right), right, left)
Saving 2 latency-cycles (1 with #76066 fixed).
Updates #76056
Change-Id: I61a1df99e97a1506f75dae13db529f43846d8f1e
Reviewed-on: https://go-review.googlesource.com/c/go/+/715045
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/crypto')
| -rw-r--r-- | src/crypto/subtle/constant_time.go | 32 | ||||
| -rw-r--r-- | src/crypto/subtle/constant_time_test.go | 11 |
2 files changed, 38 insertions, 5 deletions
diff --git a/src/crypto/subtle/constant_time.go b/src/crypto/subtle/constant_time.go index 22c1c64a0d..8eeff3b629 100644 --- a/src/crypto/subtle/constant_time.go +++ b/src/crypto/subtle/constant_time.go @@ -13,34 +13,56 @@ import "crypto/internal/fips140/subtle" // is independent of the contents. If the lengths of x and y do not match it // returns 0 immediately. func ConstantTimeCompare(x, y []byte) int { - return subtle.ConstantTimeCompare(x, y) + if len(x) != len(y) { + return 0 + } + + var v byte + + for i := 0; i < len(x); i++ { + v |= x[i] ^ y[i] + } + + return ConstantTimeByteEq(v, 0) } // ConstantTimeSelect returns x if v == 1 and y if v == 0. // Its behavior is undefined if v takes any other value. func ConstantTimeSelect(v, x, y int) int { - return subtle.ConstantTimeSelect(v, x, y) + // This is intrinsicified on arches with CMOV. + // It implements the following superset behavior: + // ConstantTimeSelect returns x if v != 0 and y if v == 0. + // Do the same here to avoid non portable UB. + v = int(constantTimeBoolToUint8(v != 0)) + return ^(v-1)&x | (v-1)&y } // ConstantTimeByteEq returns 1 if x == y and 0 otherwise. func ConstantTimeByteEq(x, y uint8) int { - return subtle.ConstantTimeByteEq(x, y) + return int(constantTimeBoolToUint8(x == y)) } // ConstantTimeEq returns 1 if x == y and 0 otherwise. func ConstantTimeEq(x, y int32) int { - return subtle.ConstantTimeEq(x, y) + return int(constantTimeBoolToUint8(x == y)) } // ConstantTimeCopy copies the contents of y into x (a slice of equal length) // if v == 1. If v == 0, x is left unchanged. Its behavior is undefined if v // takes any other value. func ConstantTimeCopy(v int, x, y []byte) { + // Forward this one since it gains nothing from compiler intrinsics. subtle.ConstantTimeCopy(v, x, y) } // ConstantTimeLessOrEq returns 1 if x <= y and 0 otherwise. // Its behavior is undefined if x or y are negative or > 2**31 - 1. func ConstantTimeLessOrEq(x, y int) int { - return subtle.ConstantTimeLessOrEq(x, y) + return int(constantTimeBoolToUint8(x <= y)) +} + +// constantTimeBoolToUint8 is a compiler intrinsic. +// It returns 1 for true and 0 for false. +func constantTimeBoolToUint8(b bool) uint8 { + panic("unreachable; must be intrinsicified") } diff --git a/src/crypto/subtle/constant_time_test.go b/src/crypto/subtle/constant_time_test.go index c2ccd28ad7..9db1140134 100644 --- a/src/crypto/subtle/constant_time_test.go +++ b/src/crypto/subtle/constant_time_test.go @@ -128,6 +128,17 @@ func TestConstantTimeLessOrEq(t *testing.T) { var benchmarkGlobal uint8 +func BenchmarkConstantTimeSelect(b *testing.B) { + x := int(benchmarkGlobal) + var y, z int + + for range b.N { + y, z, x = ConstantTimeSelect(x, y, z), y, z + } + + benchmarkGlobal = uint8(x) +} + func BenchmarkConstantTimeByteEq(b *testing.B) { var x, y uint8 |
