math: add assembly func archExp and archExp2 for riscv64

goos: linux goarch: riscv64 pkg: math │ math-old │ math-new │ │ sec/op │ sec/op vs base │ Exp-64 41.21n ± 0% 32.03n ± 0% -22.28% (p=0.000 n=8) Exp2-64 38.86n ± 1% 28.18n ± 0% -27.49% (p=0.000 n=8) Exp2Go-64 40.36n ± 1% 40.51n ± 1% +0.36% (p=0.049 n=8) Frexp-64 5.681n ± 1% 5.446n ± 0% -4.14% (p=0.000 n=8) Ldexp-64 7.676n ± 1% 7.555n ± 0% -1.58% (p=0.001 n=8) Change-Id: Ic122bf9598302f947c6dbf751db591f403c50373 Reviewed-on: https://go-review.googlesource.com/c/go/+/754687 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
author: Julian Zhu <julian.oerv@isrc.iscas.ac.cn> 2026-03-12 11:14:13 +0800
committer: Julian Zhu <julian.oerv@isrc.iscas.ac.cn> 2026-03-27 21:02:30 -0700
commit: e7e45d770c4d46617dd17b0f5c7dd58bd448f47b (patch)
tree: 09f7d69d519acbf2a6d1382001be568c940b18d8 /src
parent: 880f126233a377ee656612e68710eefe7964f646 (diff)
download: go-e7e45d770c4d46617dd17b0f5c7dd58bd448f47b.tar.xz
5 files changed, 240 insertions, 4 deletions
diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go
index 1e78759374..191dfd26ea 100644
--- a/src/math/exp2_asm.go
+++ b/src/math/exp2_asm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build arm64 || loong64
+//go:build arm64 || loong64 || riscv64
 
 package math
 
diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go
index 847138b622..95da8148c2 100644
--- a/src/math/exp2_noasm.go
+++ b/src/math/exp2_noasm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build !arm64 && !loong64
+//go:build !arm64 && !loong64 && !riscv64
 
 package math
 
diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go
index 125529fca3..ca4d380d3e 100644
--- a/src/math/exp_asm.go
+++ b/src/math/exp_asm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build amd64 || arm64 || loong64 || s390x
+//go:build amd64 || arm64 || loong64 || riscv64 || s390x
 
 package math
 
diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go
index bf5e84b736..5123922f6d 100644
--- a/src/math/exp_noasm.go
+++ b/src/math/exp_noasm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build !amd64 && !arm64 && !loong64 && !s390x
+//go:build !amd64 && !arm64 && !loong64 && !riscv64 && !s390x
 
 package math
 
diff --git a/src/math/exp_riscv64.s b/src/math/exp_riscv64.s
new file mode 100644
index 0000000000..a038b56676
--- /dev/null
+++ b/src/math/exp_riscv64.s
@@ -0,0 +1,236 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#define NearZero	0x3e30000000000000	// 2**-28
+#define PosInf		0x7ff0000000000000
+#define FracMask	0x000fffffffffffff
+#define C1		0x3cb0000000000000	// 2**-52
+
+DATA exprodata<>+0(SB)/8, $0.0
+DATA exprodata<>+8(SB)/8, $0.5
+DATA exprodata<>+16(SB)/8, $1.0
+DATA exprodata<>+24(SB)/8, $2.0
+DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01	// Ln2Hi
+DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10	// Ln2Lo
+DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00	// Log2e
+DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02	// Overflow
+DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02	// Underflow
+DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03	// Overflow2
+DATA exprodata<>+80(SB)/8, $-1.0740e+03			// Underflow2
+DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09	// NearZero
+GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
+
+DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01	// P1
+DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03	// P2
+DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05	// P3
+DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06	// P4
+DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08	// P5
+GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
+
+// Exp returns e**x, the base-e exponential of x.
+// This is an assembly implementation of the method used for function Exp in file exp.go.
+//
+// func Exp(x float64) float64
+TEXT ·archExp(SB),$0-16
+	MOVD	x+0(FP), F0	// F0 = x
+
+	MOV	$exprodata<>+0(SB), X5
+	MOVD	56(X5), F1	// Overflow
+	MOVD	64(X5), F2	// Underflow
+	MOVD	88(X5), F3	// NearZero
+	MOVD	16(X5), F17	// 1.0
+
+	FEQD	F0, F0, X7
+	BEQ	X0, X7, isNaN		// x = NaN, return NaN
+
+	FLTD	F0, F1, X7
+	BNE	X0, X7, overflow	// x > Overflow, return PosInf
+
+	FLTD	F2, F0, X7
+	BNE	X0, X7, underflow	// x < Underflow, return 0
+
+	FABSD	F0, F5
+	FLTD	F3, F5, X7
+	BNE	X0, X7, nearzero	// fabs(x) < NearZero, return 1 + x
+
+	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
+	// computed as r = hi - lo for extra precision.
+	MOVD	0(X5), F5
+	MOVD	8(X5), F3
+	MOVD	48(X5), F2
+	FLTD	F0, F5, X7
+	BNE	X0, X7, add		// x > 0
+sub:
+	FMSUBD	F0, F2, F3, F3	// Log2e*x - 0.5
+	JMP	2(PC)
+add:
+	FMADDD	F0, F2, F3, F3	// Log2e*x + 0.5
+
+	FCVTLD.RTZ	F3, X16	// float64 -> int64
+	FCVTDL	X16, F3		// int64 -> float64
+
+	MOVD	32(X5), F4
+	MOVD	40(X5), F5
+	FNMSUBD	F3, F4, F0, F4
+	FMULD	F3, F5, F5
+	FSUBD	F5, F4, F6
+	FMULD	F6, F6, F7
+
+	// compute c
+	// r=(FMA x y z) -> FMADDD z, y, x, r
+	// r=(FMA x y z) -> FMADDD x, y, z, r
+	MOV	$expmultirodata<>+0(SB), X6
+	MOVD	32(X6), F8
+	MOVD	24(X6), F9
+	FMADDD	F7, F8, F9, F13
+	MOVD	16(X6), F10
+	FMADDD	F7, F13, F10, F13
+	MOVD	8(X6), F11
+	FMADDD	F7, F13, F11, F13
+	MOVD	0(X6), F12
+	FMADDD	F7, F13, F12, F13
+	FNMSUBD	F7, F13, F6, F13
+
+	// compute y
+	MOVD	24(X5), F14
+	FSUBD	F13, F14, F14
+	FMULD	F6, F13, F15
+	FDIVD	F14, F15, F15
+	FSUBD	F15, F5, F15
+	FSUBD	F4, F15, F15
+	FSUBD	F15, F17, F16
+
+	// inline Ldexp(y, k), benefit:
+	// 1, no parameter pass overhead.
+	// 2, skip unnecessary checks for Inf/NaN/Zero
+	MOVD	F16, X15
+	MOV	$FracMask, X20
+	AND	X20, X15, X17	// fraction
+	SRL	$52, X15, X18	// exponent
+	ADD	X16, X18
+	MOV	$1, X21
+	BGE	X18, X21, normal
+	ADD	$52, X18		// denormal
+	MOV	$C1, X19
+	MOVD	X19, F17
+normal:
+	SLL	$52, X18
+	OR	X18, X17, X15
+	MOVD	X15, F0
+	FMULD	F17, F0, F0		// return m * x
+	MOVD	F0, ret+8(FP)
+	RET
+nearzero:
+	FADDD	F17, F0, F0
+isNaN:
+	MOVD	F0, ret+8(FP)
+	RET
+underflow:
+	MOV	X0, ret+8(FP)
+	RET
+overflow:
+	MOV	$PosInf, X15
+	MOV	X15, ret+8(FP)
+	RET
+
+
+// Exp2 returns 2**x, the base-2 exponential of x.
+// This is an assembly implementation of the method used for function Exp2 in file exp.go.
+//
+// func Exp2(x float64) float64
+TEXT ·archExp2(SB),$0-16
+	MOVD	x+0(FP), F0	// F0 = x
+
+	MOV	$exprodata<>+0(SB), X5
+	MOVD	72(X5), F1	// Overflow2
+	MOVD	80(X5), F2	// Underflow2
+	MOVD	88(X5), F3	// NearZero
+
+	FEQD	F0, F0, X7
+	BEQ	X0, X7, isNaN		// x = NaN, return NaN
+
+	FLTD	F0, F1, X7
+	BNE	X0, X7, overflow	// x > Overflow, return PosInf
+
+	FLTD	F2, F0, X7
+	BNE	X0, X7, underflow	// x < Underflow, return 0
+
+	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
+	// computed as r = hi - lo for extra precision.
+	MOVD	0(X5), F10
+	MOVD	8(X5), F2
+	FLTD	F0, F10, X7
+	BNE	X0, X7, add
+sub:
+	FSUBD	F2, F0, F3	// x - 0.5
+	JMP	2(PC)
+add:
+	FADDD	F2, F0, F3	// x + 0.5
+
+	FCVTLD.RTZ	F3, X16
+	FCVTDL	X16, F3
+
+	MOVD	32(X5), F4
+	MOVD	40(X5), F5
+	FSUBD	F3, F0, F3
+	FMULD	F3, F4, F4
+	FNMSUBD	F5, F3, F10, F5
+	FSUBD	F5, F4, F6
+	FMULD	F6, F6, F7
+
+	// compute c
+	MOV	$expmultirodata<>+0(SB), X6
+	MOVD	32(X6), F8
+	MOVD	24(X6), F9
+	FMADDD	F7, F8, F9, F13
+	MOVD	16(X6), F10
+	FMADDD	F7, F13, F10, F13
+	MOVD	8(X6), F11
+	FMADDD	F7, F13, F11, F13
+	MOVD	0(X6), F12
+	FMADDD	F7, F13, F12, F13
+	FNMSUBD	F7, F13, F6, F13
+
+	// compute y
+	MOVD	24(X5), F14
+	FSUBD	F13, F14, F14
+	FMULD	F6, F13, F15
+	FDIVD	F14, F15, F15
+
+	MOVD	16(X5), F17
+	FSUBD	F15, F5, F15
+	FSUBD	F4, F15, F15
+	FSUBD	F15, F17, F16
+
+	// inline Ldexp(y, k), benefit:
+	// 1, no parameter pass overhead.
+	// 2, skip unnecessary checks for Inf/NaN/Zero
+	MOVD	F16, X15
+	MOV	$FracMask, X20
+	SRL	$52, X15, X18	// exponent
+	AND	X20, X15, X17	// fraction
+	ADD	X16, X18
+	MOV	$1, X21
+	BGE	X18, X21, normal
+
+	ADD	$52, X18		// denormal
+	MOV	$C1, X19
+	MOVD	X19, F17
+normal:
+	SLL	$52, X18
+	OR	X18, X17, X15
+	MOVD	X15, F0
+	FMULD	F17, F0, F0
+isNaN:
+	MOVD	F0, ret+8(FP)
+	RET
+underflow:
+	MOV	X0, ret+8(FP)
+	RET
+overflow:
+	MOV	$PosInf, X15
+	MOV	X15, ret+8(FP)
+	RET
author	Julian Zhu <julian.oerv@isrc.iscas.ac.cn>	2026-03-12 11:14:13 +0800
committer	Julian Zhu <julian.oerv@isrc.iscas.ac.cn>	2026-03-27 21:02:30 -0700
commit	e7e45d770c4d46617dd17b0f5c7dd58bd448f47b (patch)
tree	09f7d69d519acbf2a6d1382001be568c940b18d8 /src
parent	880f126233a377ee656612e68710eefe7964f646 (diff)
download	go-e7e45d770c4d46617dd17b0f5c7dd58bd448f47b.tar.xz