diff options
| author | Julian Zhu <julian.oerv@isrc.iscas.ac.cn> | 2026-03-12 11:14:13 +0800 |
|---|---|---|
| committer | Julian Zhu <julian.oerv@isrc.iscas.ac.cn> | 2026-03-27 21:02:30 -0700 |
| commit | e7e45d770c4d46617dd17b0f5c7dd58bd448f47b (patch) | |
| tree | 09f7d69d519acbf2a6d1382001be568c940b18d8 /src | |
| parent | 880f126233a377ee656612e68710eefe7964f646 (diff) | |
| download | go-e7e45d770c4d46617dd17b0f5c7dd58bd448f47b.tar.xz | |
math: add assembly func archExp and archExp2 for riscv64
goos: linux
goarch: riscv64
pkg: math
│ math-old │ math-new │
│ sec/op │ sec/op vs base │
Exp-64 41.21n ± 0% 32.03n ± 0% -22.28% (p=0.000 n=8)
Exp2-64 38.86n ± 1% 28.18n ± 0% -27.49% (p=0.000 n=8)
Exp2Go-64 40.36n ± 1% 40.51n ± 1% +0.36% (p=0.049 n=8)
Frexp-64 5.681n ± 1% 5.446n ± 0% -4.14% (p=0.000 n=8)
Ldexp-64 7.676n ± 1% 7.555n ± 0% -1.58% (p=0.001 n=8)
Change-Id: Ic122bf9598302f947c6dbf751db591f403c50373
Reviewed-on: https://go-review.googlesource.com/c/go/+/754687
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/exp2_asm.go | 2 | ||||
| -rw-r--r-- | src/math/exp2_noasm.go | 2 | ||||
| -rw-r--r-- | src/math/exp_asm.go | 2 | ||||
| -rw-r--r-- | src/math/exp_noasm.go | 2 | ||||
| -rw-r--r-- | src/math/exp_riscv64.s | 236 |
5 files changed, 240 insertions, 4 deletions
diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go index 1e78759374..191dfd26ea 100644 --- a/src/math/exp2_asm.go +++ b/src/math/exp2_asm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build arm64 || loong64 +//go:build arm64 || loong64 || riscv64 package math diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go index 847138b622..95da8148c2 100644 --- a/src/math/exp2_noasm.go +++ b/src/math/exp2_noasm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !arm64 && !loong64 +//go:build !arm64 && !loong64 && !riscv64 package math diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go index 125529fca3..ca4d380d3e 100644 --- a/src/math/exp_asm.go +++ b/src/math/exp_asm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build amd64 || arm64 || loong64 || s390x +//go:build amd64 || arm64 || loong64 || riscv64 || s390x package math diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go index bf5e84b736..5123922f6d 100644 --- a/src/math/exp_noasm.go +++ b/src/math/exp_noasm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !amd64 && !arm64 && !loong64 && !s390x +//go:build !amd64 && !arm64 && !loong64 && !riscv64 && !s390x package math diff --git a/src/math/exp_riscv64.s b/src/math/exp_riscv64.s new file mode 100644 index 0000000000..a038b56676 --- /dev/null +++ b/src/math/exp_riscv64.s @@ -0,0 +1,236 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define NearZero 0x3e30000000000000 // 2**-28 +#define PosInf 0x7ff0000000000000 +#define FracMask 0x000fffffffffffff +#define C1 0x3cb0000000000000 // 2**-52 + +DATA exprodata<>+0(SB)/8, $0.0 +DATA exprodata<>+8(SB)/8, $0.5 +DATA exprodata<>+16(SB)/8, $1.0 +DATA exprodata<>+24(SB)/8, $2.0 +DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi +DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo +DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e +DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow +DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow +DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2 +DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2 +DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero +GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96 + +DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1 +DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2 +DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3 +DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4 +DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5 +GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40 + +// Exp returns e**x, the base-e exponential of x. +// This is an assembly implementation of the method used for function Exp in file exp.go. +// +// func Exp(x float64) float64 +TEXT ·archExp(SB),$0-16 + MOVD x+0(FP), F0 // F0 = x + + MOV $exprodata<>+0(SB), X5 + MOVD 56(X5), F1 // Overflow + MOVD 64(X5), F2 // Underflow + MOVD 88(X5), F3 // NearZero + MOVD 16(X5), F17 // 1.0 + + FEQD F0, F0, X7 + BEQ X0, X7, isNaN // x = NaN, return NaN + + FLTD F0, F1, X7 + BNE X0, X7, overflow // x > Overflow, return PosInf + + FLTD F2, F0, X7 + BNE X0, X7, underflow // x < Underflow, return 0 + + FABSD F0, F5 + FLTD F3, F5, X7 + BNE X0, X7, nearzero // fabs(x) < NearZero, return 1 + x + + // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 + // computed as r = hi - lo for extra precision. + MOVD 0(X5), F5 + MOVD 8(X5), F3 + MOVD 48(X5), F2 + FLTD F0, F5, X7 + BNE X0, X7, add // x > 0 +sub: + FMSUBD F0, F2, F3, F3 // Log2e*x - 0.5 + JMP 2(PC) +add: + FMADDD F0, F2, F3, F3 // Log2e*x + 0.5 + + FCVTLD.RTZ F3, X16 // float64 -> int64 + FCVTDL X16, F3 // int64 -> float64 + + MOVD 32(X5), F4 + MOVD 40(X5), F5 + FNMSUBD F3, F4, F0, F4 + FMULD F3, F5, F5 + FSUBD F5, F4, F6 + FMULD F6, F6, F7 + + // compute c + // r=(FMA x y z) -> FMADDD z, y, x, r + // r=(FMA x y z) -> FMADDD x, y, z, r + MOV $expmultirodata<>+0(SB), X6 + MOVD 32(X6), F8 + MOVD 24(X6), F9 + FMADDD F7, F8, F9, F13 + MOVD 16(X6), F10 + FMADDD F7, F13, F10, F13 + MOVD 8(X6), F11 + FMADDD F7, F13, F11, F13 + MOVD 0(X6), F12 + FMADDD F7, F13, F12, F13 + FNMSUBD F7, F13, F6, F13 + + // compute y + MOVD 24(X5), F14 + FSUBD F13, F14, F14 + FMULD F6, F13, F15 + FDIVD F14, F15, F15 + FSUBD F15, F5, F15 + FSUBD F4, F15, F15 + FSUBD F15, F17, F16 + + // inline Ldexp(y, k), benefit: + // 1, no parameter pass overhead. + // 2, skip unnecessary checks for Inf/NaN/Zero + MOVD F16, X15 + MOV $FracMask, X20 + AND X20, X15, X17 // fraction + SRL $52, X15, X18 // exponent + ADD X16, X18 + MOV $1, X21 + BGE X18, X21, normal + ADD $52, X18 // denormal + MOV $C1, X19 + MOVD X19, F17 +normal: + SLL $52, X18 + OR X18, X17, X15 + MOVD X15, F0 + FMULD F17, F0, F0 // return m * x + MOVD F0, ret+8(FP) + RET +nearzero: + FADDD F17, F0, F0 +isNaN: + MOVD F0, ret+8(FP) + RET +underflow: + MOV X0, ret+8(FP) + RET +overflow: + MOV $PosInf, X15 + MOV X15, ret+8(FP) + RET + + +// Exp2 returns 2**x, the base-2 exponential of x. +// This is an assembly implementation of the method used for function Exp2 in file exp.go. +// +// func Exp2(x float64) float64 +TEXT ·archExp2(SB),$0-16 + MOVD x+0(FP), F0 // F0 = x + + MOV $exprodata<>+0(SB), X5 + MOVD 72(X5), F1 // Overflow2 + MOVD 80(X5), F2 // Underflow2 + MOVD 88(X5), F3 // NearZero + + FEQD F0, F0, X7 + BEQ X0, X7, isNaN // x = NaN, return NaN + + FLTD F0, F1, X7 + BNE X0, X7, overflow // x > Overflow, return PosInf + + FLTD F2, F0, X7 + BNE X0, X7, underflow // x < Underflow, return 0 + + // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 + // computed as r = hi - lo for extra precision. + MOVD 0(X5), F10 + MOVD 8(X5), F2 + FLTD F0, F10, X7 + BNE X0, X7, add +sub: + FSUBD F2, F0, F3 // x - 0.5 + JMP 2(PC) +add: + FADDD F2, F0, F3 // x + 0.5 + + FCVTLD.RTZ F3, X16 + FCVTDL X16, F3 + + MOVD 32(X5), F4 + MOVD 40(X5), F5 + FSUBD F3, F0, F3 + FMULD F3, F4, F4 + FNMSUBD F5, F3, F10, F5 + FSUBD F5, F4, F6 + FMULD F6, F6, F7 + + // compute c + MOV $expmultirodata<>+0(SB), X6 + MOVD 32(X6), F8 + MOVD 24(X6), F9 + FMADDD F7, F8, F9, F13 + MOVD 16(X6), F10 + FMADDD F7, F13, F10, F13 + MOVD 8(X6), F11 + FMADDD F7, F13, F11, F13 + MOVD 0(X6), F12 + FMADDD F7, F13, F12, F13 + FNMSUBD F7, F13, F6, F13 + + // compute y + MOVD 24(X5), F14 + FSUBD F13, F14, F14 + FMULD F6, F13, F15 + FDIVD F14, F15, F15 + + MOVD 16(X5), F17 + FSUBD F15, F5, F15 + FSUBD F4, F15, F15 + FSUBD F15, F17, F16 + + // inline Ldexp(y, k), benefit: + // 1, no parameter pass overhead. + // 2, skip unnecessary checks for Inf/NaN/Zero + MOVD F16, X15 + MOV $FracMask, X20 + SRL $52, X15, X18 // exponent + AND X20, X15, X17 // fraction + ADD X16, X18 + MOV $1, X21 + BGE X18, X21, normal + + ADD $52, X18 // denormal + MOV $C1, X19 + MOVD X19, F17 +normal: + SLL $52, X18 + OR X18, X17, X15 + MOVD X15, F0 + FMULD F17, F0, F0 +isNaN: + MOVD F0, ret+8(FP) + RET +underflow: + MOV X0, ret+8(FP) + RET +overflow: + MOV $PosInf, X15 + MOV X15, ret+8(FP) + RET |
