diff options
| author | Jorropo <jorropo.pgm@gmail.com> | 2025-10-26 22:19:30 +0100 |
|---|---|---|
| committer | Jorropo <jorropo.pgm@gmail.com> | 2025-10-27 23:29:19 -0700 |
| commit | 2c91c33e88c68a5f6cc2f10296698faa305f6267 (patch) | |
| tree | 8bd51c8b338febd568b705ca5209481f9f55ea69 /src/cmd/compile/internal | |
| parent | 73d7635fae502f63a3774e1265f739bff8778113 (diff) | |
| download | go-2c91c33e88c68a5f6cc2f10296698faa305f6267.tar.xz | |
crypto/subtle,cmd/compile: add intrinsics for ConstantTimeSelect and *Eq
Targeting crypto/subtle rather than
crypto/internal/fips140/subtle after discussion with Filippo.
goos: linux
goarch: amd64
pkg: crypto/subtle
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
ConstantTimeSelect-12 0.5246n ± 1% 0.5217n ± 2% ~ (p=0.118 n=10)
ConstantTimeByteEq-12 1.0415n ± 1% 0.5202n ± 2% -50.05% (p=0.000 n=10)
ConstantTimeEq-12 0.7813n ± 2% 0.7819n ± 0% ~ (p=0.897 n=10)
ConstantTimeLessOrEq-12 1.0415n ± 3% 0.7813n ± 1% -24.98% (p=0.000 n=10)
geomean 0.8166n 0.6381n -21.86%
The last three will become 1 lat-cycle (0.25ns) faster once #76066 is fixed.
The Select being that fast with the old code is really impressive.
I am pretty sure this happens because my CPU has BMI1&2 support and
a fusing unit able to translate non BMI code into BMI code.
This benchmark doesn't capture the CACHE gains from the shorter assembly.
It currently compiles as:
v17 = TESTQ <flags> v31 v31 // v != 0
v20 = CMOVQNE <int> v32 v33 v17 (y[int])
It is possible to remove the `TESTQ` by compiletime fusing it with the
compare in a pattern like this:
subtle.ConstantTimeSelect(subtle.ConstantTimeLessOrEq(left, right), right, left)
Saving 2 latency-cycles (1 with #76066 fixed).
Updates #76056
Change-Id: I61a1df99e97a1506f75dae13db529f43846d8f1e
Reviewed-on: https://go-review.googlesource.com/c/go/+/715045
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd/compile/internal')
| -rw-r--r-- | src/cmd/compile/internal/ssagen/intrinsics.go | 30 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssagen/intrinsics_test.go | 20 |
2 files changed, 50 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index e14db7b0a0..06887c934e 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1602,6 +1602,36 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out) }, sys.AMD64) + + /******** crypto/subtle ********/ + // We implement a superset of the ConstantTimeSelect promise: + // ConstantTimeSelect returns x if v != 0 and y if v == 0. + add("crypto/subtle", "ConstantTimeSelect", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v, x, y := args[0], args[1], args[2] + + var checkOp ssa.Op + var zero *ssa.Value + switch s.config.PtrSize { + case 8: + checkOp = ssa.OpNeq64 + zero = s.constInt64(types.Types[types.TINT], 0) + case 4: + checkOp = ssa.OpNeq32 + zero = s.constInt32(types.Types[types.TINT], 0) + default: + panic("unreachable") + } + check := s.newValue2(checkOp, types.Types[types.TBOOL], zero, v) + + return s.newValue3(ssa.OpCondSelect, types.Types[types.TINT], x, y, check) + }, + sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm) // all with CMOV support. + add("crypto/subtle", "constantTimeBoolToUint8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCvtBoolToUint8, types.Types[types.TUINT8], args[0]) + }, + all...) } // findIntrinsic returns a function which builds the SSA equivalent of the diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index 0623c5f209..5a4e577fb6 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -41,6 +41,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"386", "math/bits", "TrailingZeros8"}: struct{}{}, {"386", "runtime", "KeepAlive"}: struct{}{}, {"386", "runtime", "slicebytetostringtmp"}: struct{}{}, + {"386", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"amd64", "internal/runtime/atomic", "And"}: struct{}{}, {"amd64", "internal/runtime/atomic", "And32"}: struct{}{}, {"amd64", "internal/runtime/atomic", "And64"}: struct{}{}, @@ -187,6 +188,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"amd64", "sync/atomic", "SwapUint32"}: struct{}{}, {"amd64", "sync/atomic", "SwapUint64"}: struct{}{}, {"amd64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"amd64", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"amd64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"arm", "internal/runtime/sys", "Bswap32"}: struct{}{}, {"arm", "internal/runtime/sys", "Bswap64"}: struct{}{}, {"arm", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, @@ -214,6 +217,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"arm", "math/bits", "TrailingZeros8"}: struct{}{}, {"arm", "runtime", "KeepAlive"}: struct{}{}, {"arm", "runtime", "slicebytetostringtmp"}: struct{}{}, + {"arm", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"arm64", "internal/runtime/atomic", "And"}: struct{}{}, {"arm64", "internal/runtime/atomic", "And32"}: struct{}{}, {"arm64", "internal/runtime/atomic", "And64"}: struct{}{}, @@ -358,6 +362,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"arm64", "sync/atomic", "SwapUint32"}: struct{}{}, {"arm64", "sync/atomic", "SwapUint64"}: struct{}{}, {"arm64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"arm64", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"arm64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"loong64", "internal/runtime/atomic", "And"}: struct{}{}, {"loong64", "internal/runtime/atomic", "And32"}: struct{}{}, {"loong64", "internal/runtime/atomic", "And64"}: struct{}{}, @@ -504,6 +510,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"loong64", "sync/atomic", "SwapUint32"}: struct{}{}, {"loong64", "sync/atomic", "SwapUint64"}: struct{}{}, {"loong64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"loong64", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"loong64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"mips", "internal/runtime/atomic", "And"}: struct{}{}, {"mips", "internal/runtime/atomic", "And8"}: struct{}{}, {"mips", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -574,6 +582,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mips", "sync/atomic", "SwapInt32"}: struct{}{}, {"mips", "sync/atomic", "SwapUint32"}: struct{}{}, {"mips", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"mips", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"mips64", "internal/runtime/atomic", "And"}: struct{}{}, {"mips64", "internal/runtime/atomic", "And8"}: struct{}{}, {"mips64", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -662,6 +671,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mips64", "sync/atomic", "SwapUint32"}: struct{}{}, {"mips64", "sync/atomic", "SwapUint64"}: struct{}{}, {"mips64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"mips64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"mips64le", "internal/runtime/atomic", "And"}: struct{}{}, {"mips64le", "internal/runtime/atomic", "And8"}: struct{}{}, {"mips64le", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -750,6 +760,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mips64le", "sync/atomic", "SwapUint32"}: struct{}{}, {"mips64le", "sync/atomic", "SwapUint64"}: struct{}{}, {"mips64le", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"mips64le", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"mipsle", "internal/runtime/atomic", "And"}: struct{}{}, {"mipsle", "internal/runtime/atomic", "And8"}: struct{}{}, {"mipsle", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -820,6 +831,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mipsle", "sync/atomic", "SwapInt32"}: struct{}{}, {"mipsle", "sync/atomic", "SwapUint32"}: struct{}{}, {"mipsle", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"mipsle", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"ppc64", "internal/runtime/atomic", "And"}: struct{}{}, {"ppc64", "internal/runtime/atomic", "And8"}: struct{}{}, {"ppc64", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -944,6 +956,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"ppc64", "sync/atomic", "SwapUint32"}: struct{}{}, {"ppc64", "sync/atomic", "SwapUint64"}: struct{}{}, {"ppc64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"ppc64", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"ppc64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"ppc64le", "internal/runtime/atomic", "And"}: struct{}{}, {"ppc64le", "internal/runtime/atomic", "And8"}: struct{}{}, {"ppc64le", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -1068,6 +1082,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"ppc64le", "sync/atomic", "SwapUint32"}: struct{}{}, {"ppc64le", "sync/atomic", "SwapUint64"}: struct{}{}, {"ppc64le", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"ppc64le", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"ppc64le", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"riscv64", "internal/runtime/atomic", "And"}: struct{}{}, {"riscv64", "internal/runtime/atomic", "And8"}: struct{}{}, {"riscv64", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -1188,6 +1204,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"riscv64", "sync/atomic", "SwapUint32"}: struct{}{}, {"riscv64", "sync/atomic", "SwapUint64"}: struct{}{}, {"riscv64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"riscv64", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"s390x", "internal/runtime/atomic", "And"}: struct{}{}, {"s390x", "internal/runtime/atomic", "And8"}: struct{}{}, {"s390x", "internal/runtime/atomic", "Cas"}: struct{}{}, @@ -1306,6 +1323,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"s390x", "sync/atomic", "SwapUint32"}: struct{}{}, {"s390x", "sync/atomic", "SwapUint64"}: struct{}{}, {"s390x", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"s390x", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, @@ -1341,6 +1359,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"wasm", "math/bits", "TrailingZeros8"}: struct{}{}, {"wasm", "runtime", "KeepAlive"}: struct{}{}, {"wasm", "runtime", "slicebytetostringtmp"}: struct{}{}, + {"wasm", "crypto/subtle", "ConstantTimeSelect"}: struct{}{}, + {"wasm", "crypto/subtle", "constantTimeBoolToUint8"}: struct{}{}, } func TestIntrinsics(t *testing.T) { |
