Text file
src/math/exp_riscv64.s
1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 #define NearZero 0x3e30000000000000 // 2**-28
8 #define PosInf 0x7ff0000000000000
9 #define FracMask 0x000fffffffffffff
10 #define C1 0x3cb0000000000000 // 2**-52
11
12 DATA exprodata<>+0(SB)/8, $0.0
13 DATA exprodata<>+8(SB)/8, $0.5
14 DATA exprodata<>+16(SB)/8, $1.0
15 DATA exprodata<>+24(SB)/8, $2.0
16 DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi
17 DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo
18 DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e
19 DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow
20 DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow
21 DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2
22 DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2
23 DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero
24 GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
25
26 DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1
27 DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2
28 DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3
29 DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4
30 DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5
31 GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
32
33 // Exp returns e**x, the base-e exponential of x.
34 // This is an assembly implementation of the method used for function Exp in file exp.go.
35 //
36 // func Exp(x float64) float64
37 TEXT ·archExp(SB),$0-16
38 MOVD x+0(FP), F0 // F0 = x
39
40 MOV $exprodata<>+0(SB), X5
41 MOVD 56(X5), F1 // Overflow
42 MOVD 64(X5), F2 // Underflow
43 MOVD 88(X5), F3 // NearZero
44 MOVD 16(X5), F17 // 1.0
45
46 FEQD F0, F0, X7
47 BEQ X0, X7, isNaN // x = NaN, return NaN
48
49 FLTD F0, F1, X7
50 BNE X0, X7, overflow // x > Overflow, return PosInf
51
52 FLTD F2, F0, X7
53 BNE X0, X7, underflow // x < Underflow, return 0
54
55 FABSD F0, F5
56 FLTD F3, F5, X7
57 BNE X0, X7, nearzero // fabs(x) < NearZero, return 1 + x
58
59 // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
60 // computed as r = hi - lo for extra precision.
61 MOVD 0(X5), F5
62 MOVD 8(X5), F3
63 MOVD 48(X5), F2
64 FLTD F0, F5, X7
65 BNE X0, X7, add // x > 0
66 sub:
67 FMSUBD F0, F2, F3, F3 // Log2e*x - 0.5
68 JMP 2(PC)
69 add:
70 FMADDD F0, F2, F3, F3 // Log2e*x + 0.5
71
72 FCVTLD.RTZ F3, X16 // float64 -> int64
73 FCVTDL X16, F3 // int64 -> float64
74
75 MOVD 32(X5), F4
76 MOVD 40(X5), F5
77 FNMSUBD F3, F4, F0, F4
78 FMULD F3, F5, F5
79 FSUBD F5, F4, F6
80 FMULD F6, F6, F7
81
82 // compute c
83 // r=(FMA x y z) -> FMADDD z, y, x, r
84 // r=(FMA x y z) -> FMADDD x, y, z, r
85 MOV $expmultirodata<>+0(SB), X6
86 MOVD 32(X6), F8
87 MOVD 24(X6), F9
88 FMADDD F7, F8, F9, F13
89 MOVD 16(X6), F10
90 FMADDD F7, F13, F10, F13
91 MOVD 8(X6), F11
92 FMADDD F7, F13, F11, F13
93 MOVD 0(X6), F12
94 FMADDD F7, F13, F12, F13
95 FNMSUBD F7, F13, F6, F13
96
97 // compute y
98 MOVD 24(X5), F14
99 FSUBD F13, F14, F14
100 FMULD F6, F13, F15
101 FDIVD F14, F15, F15
102 FSUBD F15, F5, F15
103 FSUBD F4, F15, F15
104 FSUBD F15, F17, F16
105
106 // inline Ldexp(y, k), benefit:
107 // 1, no parameter pass overhead.
108 // 2, skip unnecessary checks for Inf/NaN/Zero
109 MOVD F16, X15
110 MOV $FracMask, X20
111 AND X20, X15, X17 // fraction
112 SRL $52, X15, X18 // exponent
113 ADD X16, X18
114 MOV $1, X21
115 BGE X18, X21, normal
116 ADD $52, X18 // denormal
117 MOV $C1, X19
118 MOVD X19, F17
119 normal:
120 SLL $52, X18
121 OR X18, X17, X15
122 MOVD X15, F0
123 FMULD F17, F0, F0 // return m * x
124 MOVD F0, ret+8(FP)
125 RET
126 nearzero:
127 FADDD F17, F0, F0
128 isNaN:
129 MOVD F0, ret+8(FP)
130 RET
131 underflow:
132 MOV X0, ret+8(FP)
133 RET
134 overflow:
135 MOV $PosInf, X15
136 MOV X15, ret+8(FP)
137 RET
138
139
140 // Exp2 returns 2**x, the base-2 exponential of x.
141 // This is an assembly implementation of the method used for function Exp2 in file exp.go.
142 //
143 // func Exp2(x float64) float64
144 TEXT ·archExp2(SB),$0-16
145 MOVD x+0(FP), F0 // F0 = x
146
147 MOV $exprodata<>+0(SB), X5
148 MOVD 72(X5), F1 // Overflow2
149 MOVD 80(X5), F2 // Underflow2
150 MOVD 88(X5), F3 // NearZero
151
152 FEQD F0, F0, X7
153 BEQ X0, X7, isNaN // x = NaN, return NaN
154
155 FLTD F0, F1, X7
156 BNE X0, X7, overflow // x > Overflow, return PosInf
157
158 FLTD F2, F0, X7
159 BNE X0, X7, underflow // x < Underflow, return 0
160
161 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
162 // computed as r = hi - lo for extra precision.
163 MOVD 0(X5), F10
164 MOVD 8(X5), F2
165 FLTD F0, F10, X7
166 BNE X0, X7, add
167 sub:
168 FSUBD F2, F0, F3 // x - 0.5
169 JMP 2(PC)
170 add:
171 FADDD F2, F0, F3 // x + 0.5
172
173 FCVTLD.RTZ F3, X16
174 FCVTDL X16, F3
175
176 MOVD 32(X5), F4
177 MOVD 40(X5), F5
178 FSUBD F3, F0, F3
179 FMULD F3, F4, F4
180 FNMSUBD F5, F3, F10, F5
181 FSUBD F5, F4, F6
182 FMULD F6, F6, F7
183
184 // compute c
185 MOV $expmultirodata<>+0(SB), X6
186 MOVD 32(X6), F8
187 MOVD 24(X6), F9
188 FMADDD F7, F8, F9, F13
189 MOVD 16(X6), F10
190 FMADDD F7, F13, F10, F13
191 MOVD 8(X6), F11
192 FMADDD F7, F13, F11, F13
193 MOVD 0(X6), F12
194 FMADDD F7, F13, F12, F13
195 FNMSUBD F7, F13, F6, F13
196
197 // compute y
198 MOVD 24(X5), F14
199 FSUBD F13, F14, F14
200 FMULD F6, F13, F15
201 FDIVD F14, F15, F15
202
203 MOVD 16(X5), F17
204 FSUBD F15, F5, F15
205 FSUBD F4, F15, F15
206 FSUBD F15, F17, F16
207
208 // inline Ldexp(y, k), benefit:
209 // 1, no parameter pass overhead.
210 // 2, skip unnecessary checks for Inf/NaN/Zero
211 MOVD F16, X15
212 MOV $FracMask, X20
213 SRL $52, X15, X18 // exponent
214 AND X20, X15, X17 // fraction
215 ADD X16, X18
216 MOV $1, X21
217 BGE X18, X21, normal
218
219 ADD $52, X18 // denormal
220 MOV $C1, X19
221 MOVD X19, F17
222 normal:
223 SLL $52, X18
224 OR X18, X17, X15
225 MOVD X15, F0
226 FMULD F17, F0, F0
227 isNaN:
228 MOVD F0, ret+8(FP)
229 RET
230 underflow:
231 MOV X0, ret+8(FP)
232 RET
233 overflow:
234 MOV $PosInf, X15
235 MOV X15, ret+8(FP)
236 RET
237
View as plain text