aboutsummaryrefslogtreecommitdiff
path: root/src/crypto
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2024-11-14 18:36:20 -0500
committerCherry Mui <cherryyz@google.com>2024-11-15 17:18:17 +0000
commit5c534ef546d55a2680d2bc2e88ea329e6a8b068c (patch)
tree5b7e695e8ea89e1d01a41af5e2e417f055686248 /src/crypto
parente30ce3c498b623f5a492a8e77c32077c1ecf3a1f (diff)
downloadgo-5c534ef546d55a2680d2bc2e88ea329e6a8b068c.tar.xz
crypto/internal/bigmod: apply wasm-specific implementation for only sized addMulVVW
Restore generic addMulVVW for wasm (and therefore for all architectures). Apply wasm-specific implementation for only the explicitly sized functions (addMulVVW1024 etc.). Also, for the sized functions, use unsafe pointer calculations directly, without converting them back to slices. (This is what the assembly code does on other architectures.) This results in a bit more speedup for crypto/rsa benchmarks on Wasm: pkg: crypto/rsa │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ DecryptPKCS1v15/2048 4.906m ± 0% 4.221m ± 1% -13.96% (p=0.000 n=25) DecryptPKCS1v15/3072 15.18m ± 0% 13.57m ± 0% -10.64% (p=0.000 n=25) DecryptPKCS1v15/4096 35.49m ± 0% 32.64m ± 1% -8.04% (p=0.000 n=25) EncryptPKCS1v15/2048 177.1µ ± 0% 162.3µ ± 0% -8.35% (p=0.000 n=25) DecryptOAEP/2048 4.900m ± 1% 4.233m ± 0% -13.61% (p=0.000 n=25) EncryptOAEP/2048 181.8µ ± 0% 166.8µ ± 0% -8.24% (p=0.000 n=25) SignPKCS1v15/2048 5.026m ± 1% 4.341m ± 0% -13.63% (p=0.000 n=25) VerifyPKCS1v15/2048 177.2µ ± 0% 161.3µ ± 1% -8.97% (p=0.000 n=25) SignPSS/2048 5.020m ± 0% 4.344m ± 1% -13.47% (p=0.000 n=25) VerifyPSS/2048 182.2µ ± 1% 166.6µ ± 0% -8.52% (p=0.000 n=25) geomean 1.791m 1.598m -10.78% Change-Id: I89775c46a0bbe29380889047ba393c6cfc093ff1 Reviewed-on: https://go-review.googlesource.com/c/go/+/628255 Reviewed-by: Filippo Valsorda <filippo@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/internal/bigmod/nat.go19
-rw-r--r--src/crypto/internal/bigmod/nat_generic.go28
-rw-r--r--src/crypto/internal/bigmod/nat_noasm.go2
-rw-r--r--src/crypto/internal/bigmod/nat_wasm.go33
4 files changed, 45 insertions, 37 deletions
diff --git a/src/crypto/internal/bigmod/nat.go b/src/crypto/internal/bigmod/nat.go
index 71699078e2..5cbae40efe 100644
--- a/src/crypto/internal/bigmod/nat.go
+++ b/src/crypto/internal/bigmod/nat.go
@@ -688,6 +688,25 @@ func (x *Nat) montgomeryMul(a *Nat, b *Nat, m *Modulus) *Nat {
return x
}
+// addMulVVW multiplies the multi-word value x by the single-word value y,
+// adding the result to the multi-word value z and returning the final carry.
+// It can be thought of as one row of a pen-and-paper column multiplication.
+func addMulVVW(z, x []uint, y uint) (carry uint) {
+ _ = x[len(z)-1] // bounds check elimination hint
+ for i := range z {
+ hi, lo := bits.Mul(x[i], y)
+ lo, c := bits.Add(lo, z[i], 0)
+ // We use bits.Add with zero to get an add-with-carry instruction that
+ // absorbs the carry from the previous bits.Add.
+ hi, _ = bits.Add(hi, 0, c)
+ lo, c = bits.Add(lo, carry, 0)
+ hi, _ = bits.Add(hi, 0, c)
+ carry = hi
+ z[i] = lo
+ }
+ return carry
+}
+
// Mul calculates x = x * y mod m.
//
// The length of both operands must be the same as the modulus. Both operands
diff --git a/src/crypto/internal/bigmod/nat_generic.go b/src/crypto/internal/bigmod/nat_generic.go
deleted file mode 100644
index a44d2ec548..0000000000
--- a/src/crypto/internal/bigmod/nat_generic.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2024 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !wasm
-
-package bigmod
-
-import "math/bits"
-
-// addMulVVW multiplies the multi-word value x by the single-word value y,
-// adding the result to the multi-word value z and returning the final carry.
-// It can be thought of as one row of a pen-and-paper column multiplication.
-func addMulVVW(z, x []uint, y uint) (carry uint) {
- _ = x[len(z)-1] // bounds check elimination hint
- for i := range z {
- hi, lo := bits.Mul(x[i], y)
- lo, c := bits.Add(lo, z[i], 0)
- // We use bits.Add with zero to get an add-with-carry instruction that
- // absorbs the carry from the previous bits.Add.
- hi, _ = bits.Add(hi, 0, c)
- lo, c = bits.Add(lo, carry, 0)
- hi, _ = bits.Add(hi, 0, c)
- carry = hi
- z[i] = lo
- }
- return carry
-}
diff --git a/src/crypto/internal/bigmod/nat_noasm.go b/src/crypto/internal/bigmod/nat_noasm.go
index 2501a6fb4c..dbec229f5d 100644
--- a/src/crypto/internal/bigmod/nat_noasm.go
+++ b/src/crypto/internal/bigmod/nat_noasm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build purego || !(386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le || riscv64 || s390x)
+//go:build purego || !(386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le || riscv64 || s390x || wasm)
package bigmod
diff --git a/src/crypto/internal/bigmod/nat_wasm.go b/src/crypto/internal/bigmod/nat_wasm.go
index 81ffdb286f..b4aaff74cf 100644
--- a/src/crypto/internal/bigmod/nat_wasm.go
+++ b/src/crypto/internal/bigmod/nat_wasm.go
@@ -2,25 +2,30 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build !purego
+
package bigmod
+import "unsafe"
+
// The generic implementation relies on 64x64->128 bit multiplication and
// 64-bit add-with-carry, which are compiler intrinsics on many architectures.
// Wasm doesn't support those. Here we implement it with 32x32->64 bit
// operations, which is more efficient on Wasm.
-// addMulVVW multiplies the multi-word value x by the single-word value y,
-// adding the result to the multi-word value z and returning the final carry.
-// It can be thought of as one row of a pen-and-paper column multiplication.
-func addMulVVW(z, x []uint, y uint) (carry uint) {
+func idx(x *uint, i uintptr) *uint {
+ return (*uint)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + i*8))
+}
+
+func addMulVVWWasm(z, x *uint, y uint, n uintptr) (carry uint) {
const mask32 = 1<<32 - 1
y0 := y & mask32
y1 := y >> 32
- _ = x[len(z)-1] // bounds check elimination hint
- for i, zi := range z {
- xi := x[i]
+ for i := range n {
+ xi := *idx(x, i)
x0 := xi & mask32
x1 := xi >> 32
+ zi := *idx(z, i)
z0 := zi & mask32
z1 := zi >> 32
c0 := carry & mask32
@@ -38,7 +43,19 @@ func addMulVVW(z, x []uint, y uint) (carry uint) {
h10 := w10 >> 32
carry = x1*y1 + h10 + h01
- z[i] = w10<<32 + l00
+ *idx(z, i) = w10<<32 + l00
}
return carry
}
+
+func addMulVVW1024(z, x *uint, y uint) (c uint) {
+ return addMulVVWWasm(z, x, y, 1024/_W)
+}
+
+func addMulVVW1536(z, x *uint, y uint) (c uint) {
+ return addMulVVWWasm(z, x, y, 1536/_W)
+}
+
+func addMulVVW2048(z, x *uint, y uint) (c uint) {
+ return addMulVVWWasm(z, x, y, 2048/_W)
+}