aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJulian Zhu <julian.oerv@isrc.iscas.ac.cn>2026-03-12 11:14:13 +0800
committerJulian Zhu <julian.oerv@isrc.iscas.ac.cn>2026-03-27 21:02:30 -0700
commite7e45d770c4d46617dd17b0f5c7dd58bd448f47b (patch)
tree09f7d69d519acbf2a6d1382001be568c940b18d8 /src
parent880f126233a377ee656612e68710eefe7964f646 (diff)
downloadgo-e7e45d770c4d46617dd17b0f5c7dd58bd448f47b.tar.xz
math: add assembly func archExp and archExp2 for riscv64
goos: linux goarch: riscv64 pkg: math │ math-old │ math-new │ │ sec/op │ sec/op vs base │ Exp-64 41.21n ± 0% 32.03n ± 0% -22.28% (p=0.000 n=8) Exp2-64 38.86n ± 1% 28.18n ± 0% -27.49% (p=0.000 n=8) Exp2Go-64 40.36n ± 1% 40.51n ± 1% +0.36% (p=0.049 n=8) Frexp-64 5.681n ± 1% 5.446n ± 0% -4.14% (p=0.000 n=8) Ldexp-64 7.676n ± 1% 7.555n ± 0% -1.58% (p=0.001 n=8) Change-Id: Ic122bf9598302f947c6dbf751db591f403c50373 Reviewed-on: https://go-review.googlesource.com/c/go/+/754687 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/math/exp2_asm.go2
-rw-r--r--src/math/exp2_noasm.go2
-rw-r--r--src/math/exp_asm.go2
-rw-r--r--src/math/exp_noasm.go2
-rw-r--r--src/math/exp_riscv64.s236
5 files changed, 240 insertions, 4 deletions
diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go
index 1e78759374..191dfd26ea 100644
--- a/src/math/exp2_asm.go
+++ b/src/math/exp2_asm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build arm64 || loong64
+//go:build arm64 || loong64 || riscv64
package math
diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go
index 847138b622..95da8148c2 100644
--- a/src/math/exp2_noasm.go
+++ b/src/math/exp2_noasm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !arm64 && !loong64
+//go:build !arm64 && !loong64 && !riscv64
package math
diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go
index 125529fca3..ca4d380d3e 100644
--- a/src/math/exp_asm.go
+++ b/src/math/exp_asm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build amd64 || arm64 || loong64 || s390x
+//go:build amd64 || arm64 || loong64 || riscv64 || s390x
package math
diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go
index bf5e84b736..5123922f6d 100644
--- a/src/math/exp_noasm.go
+++ b/src/math/exp_noasm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build !amd64 && !arm64 && !loong64 && !s390x
+//go:build !amd64 && !arm64 && !loong64 && !riscv64 && !s390x
package math
diff --git a/src/math/exp_riscv64.s b/src/math/exp_riscv64.s
new file mode 100644
index 0000000000..a038b56676
--- /dev/null
+++ b/src/math/exp_riscv64.s
@@ -0,0 +1,236 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define NearZero 0x3e30000000000000 // 2**-28
+#define PosInf 0x7ff0000000000000
+#define FracMask 0x000fffffffffffff
+#define C1 0x3cb0000000000000 // 2**-52
+
+DATA exprodata<>+0(SB)/8, $0.0
+DATA exprodata<>+8(SB)/8, $0.5
+DATA exprodata<>+16(SB)/8, $1.0
+DATA exprodata<>+24(SB)/8, $2.0
+DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi
+DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo
+DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e
+DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow
+DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow
+DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2
+DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2
+DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero
+GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
+
+DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1
+DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2
+DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3
+DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4
+DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5
+GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
+
+// Exp returns e**x, the base-e exponential of x.
+// This is an assembly implementation of the method used for function Exp in file exp.go.
+//
+// func Exp(x float64) float64
+TEXT ·archExp(SB),$0-16
+ MOVD x+0(FP), F0 // F0 = x
+
+ MOV $exprodata<>+0(SB), X5
+ MOVD 56(X5), F1 // Overflow
+ MOVD 64(X5), F2 // Underflow
+ MOVD 88(X5), F3 // NearZero
+ MOVD 16(X5), F17 // 1.0
+
+ FEQD F0, F0, X7
+ BEQ X0, X7, isNaN // x = NaN, return NaN
+
+ FLTD F0, F1, X7
+ BNE X0, X7, overflow // x > Overflow, return PosInf
+
+ FLTD F2, F0, X7
+ BNE X0, X7, underflow // x < Underflow, return 0
+
+ FABSD F0, F5
+ FLTD F3, F5, X7
+ BNE X0, X7, nearzero // fabs(x) < NearZero, return 1 + x
+
+ // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
+ // computed as r = hi - lo for extra precision.
+ MOVD 0(X5), F5
+ MOVD 8(X5), F3
+ MOVD 48(X5), F2
+ FLTD F0, F5, X7
+ BNE X0, X7, add // x > 0
+sub:
+ FMSUBD F0, F2, F3, F3 // Log2e*x - 0.5
+ JMP 2(PC)
+add:
+ FMADDD F0, F2, F3, F3 // Log2e*x + 0.5
+
+ FCVTLD.RTZ F3, X16 // float64 -> int64
+ FCVTDL X16, F3 // int64 -> float64
+
+ MOVD 32(X5), F4
+ MOVD 40(X5), F5
+ FNMSUBD F3, F4, F0, F4
+ FMULD F3, F5, F5
+ FSUBD F5, F4, F6
+ FMULD F6, F6, F7
+
+ // compute c
+ // r=(FMA x y z) -> FMADDD z, y, x, r
+ // r=(FMA x y z) -> FMADDD x, y, z, r
+ MOV $expmultirodata<>+0(SB), X6
+ MOVD 32(X6), F8
+ MOVD 24(X6), F9
+ FMADDD F7, F8, F9, F13
+ MOVD 16(X6), F10
+ FMADDD F7, F13, F10, F13
+ MOVD 8(X6), F11
+ FMADDD F7, F13, F11, F13
+ MOVD 0(X6), F12
+ FMADDD F7, F13, F12, F13
+ FNMSUBD F7, F13, F6, F13
+
+ // compute y
+ MOVD 24(X5), F14
+ FSUBD F13, F14, F14
+ FMULD F6, F13, F15
+ FDIVD F14, F15, F15
+ FSUBD F15, F5, F15
+ FSUBD F4, F15, F15
+ FSUBD F15, F17, F16
+
+ // inline Ldexp(y, k), benefit:
+ // 1, no parameter pass overhead.
+ // 2, skip unnecessary checks for Inf/NaN/Zero
+ MOVD F16, X15
+ MOV $FracMask, X20
+ AND X20, X15, X17 // fraction
+ SRL $52, X15, X18 // exponent
+ ADD X16, X18
+ MOV $1, X21
+ BGE X18, X21, normal
+ ADD $52, X18 // denormal
+ MOV $C1, X19
+ MOVD X19, F17
+normal:
+ SLL $52, X18
+ OR X18, X17, X15
+ MOVD X15, F0
+ FMULD F17, F0, F0 // return m * x
+ MOVD F0, ret+8(FP)
+ RET
+nearzero:
+ FADDD F17, F0, F0
+isNaN:
+ MOVD F0, ret+8(FP)
+ RET
+underflow:
+ MOV X0, ret+8(FP)
+ RET
+overflow:
+ MOV $PosInf, X15
+ MOV X15, ret+8(FP)
+ RET
+
+
+// Exp2 returns 2**x, the base-2 exponential of x.
+// This is an assembly implementation of the method used for function Exp2 in file exp.go.
+//
+// func Exp2(x float64) float64
+TEXT ·archExp2(SB),$0-16
+ MOVD x+0(FP), F0 // F0 = x
+
+ MOV $exprodata<>+0(SB), X5
+ MOVD 72(X5), F1 // Overflow2
+ MOVD 80(X5), F2 // Underflow2
+ MOVD 88(X5), F3 // NearZero
+
+ FEQD F0, F0, X7
+ BEQ X0, X7, isNaN // x = NaN, return NaN
+
+ FLTD F0, F1, X7
+ BNE X0, X7, overflow // x > Overflow, return PosInf
+
+ FLTD F2, F0, X7
+ BNE X0, X7, underflow // x < Underflow, return 0
+
+ // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
+ // computed as r = hi - lo for extra precision.
+ MOVD 0(X5), F10
+ MOVD 8(X5), F2
+ FLTD F0, F10, X7
+ BNE X0, X7, add
+sub:
+ FSUBD F2, F0, F3 // x - 0.5
+ JMP 2(PC)
+add:
+ FADDD F2, F0, F3 // x + 0.5
+
+ FCVTLD.RTZ F3, X16
+ FCVTDL X16, F3
+
+ MOVD 32(X5), F4
+ MOVD 40(X5), F5
+ FSUBD F3, F0, F3
+ FMULD F3, F4, F4
+ FNMSUBD F5, F3, F10, F5
+ FSUBD F5, F4, F6
+ FMULD F6, F6, F7
+
+ // compute c
+ MOV $expmultirodata<>+0(SB), X6
+ MOVD 32(X6), F8
+ MOVD 24(X6), F9
+ FMADDD F7, F8, F9, F13
+ MOVD 16(X6), F10
+ FMADDD F7, F13, F10, F13
+ MOVD 8(X6), F11
+ FMADDD F7, F13, F11, F13
+ MOVD 0(X6), F12
+ FMADDD F7, F13, F12, F13
+ FNMSUBD F7, F13, F6, F13
+
+ // compute y
+ MOVD 24(X5), F14
+ FSUBD F13, F14, F14
+ FMULD F6, F13, F15
+ FDIVD F14, F15, F15
+
+ MOVD 16(X5), F17
+ FSUBD F15, F5, F15
+ FSUBD F4, F15, F15
+ FSUBD F15, F17, F16
+
+ // inline Ldexp(y, k), benefit:
+ // 1, no parameter pass overhead.
+ // 2, skip unnecessary checks for Inf/NaN/Zero
+ MOVD F16, X15
+ MOV $FracMask, X20
+ SRL $52, X15, X18 // exponent
+ AND X20, X15, X17 // fraction
+ ADD X16, X18
+ MOV $1, X21
+ BGE X18, X21, normal
+
+ ADD $52, X18 // denormal
+ MOV $C1, X19
+ MOVD X19, F17
+normal:
+ SLL $52, X18
+ OR X18, X17, X15
+ MOVD X15, F0
+ FMULD F17, F0, F0
+isNaN:
+ MOVD F0, ret+8(FP)
+ RET
+underflow:
+ MOV X0, ret+8(FP)
+ RET
+overflow:
+ MOV $PosInf, X15
+ MOV X15, ret+8(FP)
+ RET