aboutsummaryrefslogtreecommitdiff
path: root/src/crypto
diff options
context:
space:
mode:
authorJorropo <jorropo.pgm@gmail.com>2025-10-26 22:19:30 +0100
committerJorropo <jorropo.pgm@gmail.com>2025-10-27 23:29:19 -0700
commit2c91c33e88c68a5f6cc2f10296698faa305f6267 (patch)
tree8bd51c8b338febd568b705ca5209481f9f55ea69 /src/crypto
parent73d7635fae502f63a3774e1265f739bff8778113 (diff)
downloadgo-2c91c33e88c68a5f6cc2f10296698faa305f6267.tar.xz
crypto/subtle,cmd/compile: add intrinsics for ConstantTimeSelect and *Eq
Targeting crypto/subtle rather than crypto/internal/fips140/subtle after discussion with Filippo. goos: linux goarch: amd64 pkg: crypto/subtle cpu: AMD Ryzen 5 3600 6-Core Processor │ /tmp/old.logs │ /tmp/new.logs │ │ sec/op │ sec/op vs base │ ConstantTimeSelect-12 0.5246n ± 1% 0.5217n ± 2% ~ (p=0.118 n=10) ConstantTimeByteEq-12 1.0415n ± 1% 0.5202n ± 2% -50.05% (p=0.000 n=10) ConstantTimeEq-12 0.7813n ± 2% 0.7819n ± 0% ~ (p=0.897 n=10) ConstantTimeLessOrEq-12 1.0415n ± 3% 0.7813n ± 1% -24.98% (p=0.000 n=10) geomean 0.8166n 0.6381n -21.86% The last three will become 1 lat-cycle (0.25ns) faster once #76066 is fixed. The Select being that fast with the old code is really impressive. I am pretty sure this happens because my CPU has BMI1&2 support and a fusing unit able to translate non BMI code into BMI code. This benchmark doesn't capture the CACHE gains from the shorter assembly. It currently compiles as: v17 = TESTQ <flags> v31 v31 // v != 0 v20 = CMOVQNE <int> v32 v33 v17 (y[int]) It is possible to remove the `TESTQ` by compiletime fusing it with the compare in a pattern like this: subtle.ConstantTimeSelect(subtle.ConstantTimeLessOrEq(left, right), right, left) Saving 2 latency-cycles (1 with #76066 fixed). Updates #76056 Change-Id: I61a1df99e97a1506f75dae13db529f43846d8f1e Reviewed-on: https://go-review.googlesource.com/c/go/+/715045 Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/subtle/constant_time.go32
-rw-r--r--src/crypto/subtle/constant_time_test.go11
2 files changed, 38 insertions, 5 deletions
diff --git a/src/crypto/subtle/constant_time.go b/src/crypto/subtle/constant_time.go
index 22c1c64a0d..8eeff3b629 100644
--- a/src/crypto/subtle/constant_time.go
+++ b/src/crypto/subtle/constant_time.go
@@ -13,34 +13,56 @@ import "crypto/internal/fips140/subtle"
// is independent of the contents. If the lengths of x and y do not match it
// returns 0 immediately.
func ConstantTimeCompare(x, y []byte) int {
- return subtle.ConstantTimeCompare(x, y)
+ if len(x) != len(y) {
+ return 0
+ }
+
+ var v byte
+
+ for i := 0; i < len(x); i++ {
+ v |= x[i] ^ y[i]
+ }
+
+ return ConstantTimeByteEq(v, 0)
}
// ConstantTimeSelect returns x if v == 1 and y if v == 0.
// Its behavior is undefined if v takes any other value.
func ConstantTimeSelect(v, x, y int) int {
- return subtle.ConstantTimeSelect(v, x, y)
+ // This is intrinsicified on arches with CMOV.
+ // It implements the following superset behavior:
+ // ConstantTimeSelect returns x if v != 0 and y if v == 0.
+ // Do the same here to avoid non portable UB.
+ v = int(constantTimeBoolToUint8(v != 0))
+ return ^(v-1)&x | (v-1)&y
}
// ConstantTimeByteEq returns 1 if x == y and 0 otherwise.
func ConstantTimeByteEq(x, y uint8) int {
- return subtle.ConstantTimeByteEq(x, y)
+ return int(constantTimeBoolToUint8(x == y))
}
// ConstantTimeEq returns 1 if x == y and 0 otherwise.
func ConstantTimeEq(x, y int32) int {
- return subtle.ConstantTimeEq(x, y)
+ return int(constantTimeBoolToUint8(x == y))
}
// ConstantTimeCopy copies the contents of y into x (a slice of equal length)
// if v == 1. If v == 0, x is left unchanged. Its behavior is undefined if v
// takes any other value.
func ConstantTimeCopy(v int, x, y []byte) {
+ // Forward this one since it gains nothing from compiler intrinsics.
subtle.ConstantTimeCopy(v, x, y)
}
// ConstantTimeLessOrEq returns 1 if x <= y and 0 otherwise.
// Its behavior is undefined if x or y are negative or > 2**31 - 1.
func ConstantTimeLessOrEq(x, y int) int {
- return subtle.ConstantTimeLessOrEq(x, y)
+ return int(constantTimeBoolToUint8(x <= y))
+}
+
+// constantTimeBoolToUint8 is a compiler intrinsic.
+// It returns 1 for true and 0 for false.
+func constantTimeBoolToUint8(b bool) uint8 {
+ panic("unreachable; must be intrinsicified")
}
diff --git a/src/crypto/subtle/constant_time_test.go b/src/crypto/subtle/constant_time_test.go
index c2ccd28ad7..9db1140134 100644
--- a/src/crypto/subtle/constant_time_test.go
+++ b/src/crypto/subtle/constant_time_test.go
@@ -128,6 +128,17 @@ func TestConstantTimeLessOrEq(t *testing.T) {
var benchmarkGlobal uint8
+func BenchmarkConstantTimeSelect(b *testing.B) {
+ x := int(benchmarkGlobal)
+ var y, z int
+
+ for range b.N {
+ y, z, x = ConstantTimeSelect(x, y, z), y, z
+ }
+
+ benchmarkGlobal = uint8(x)
+}
+
func BenchmarkConstantTimeByteEq(b *testing.B) {
var x, y uint8