// Copyright 2026 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "textflag.h" #define NearZero 0x3e30000000000000 // 2**-28 #define PosInf 0x7ff0000000000000 #define FracMask 0x000fffffffffffff #define C1 0x3cb0000000000000 // 2**-52 DATA exprodata<>+0(SB)/8, $0.0 DATA exprodata<>+8(SB)/8, $0.5 DATA exprodata<>+16(SB)/8, $1.0 DATA exprodata<>+24(SB)/8, $2.0 DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2 DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2 DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96 DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1 DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2 DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3 DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4 DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5 GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40 // Exp returns e**x, the base-e exponential of x. // This is an assembly implementation of the method used for function Exp in file exp.go. // // func Exp(x float64) float64 TEXT ·archExp(SB),$0-16 MOVD x+0(FP), F0 // F0 = x MOV $exprodata<>+0(SB), X5 MOVD 56(X5), F1 // Overflow MOVD 64(X5), F2 // Underflow MOVD 88(X5), F3 // NearZero MOVD 16(X5), F17 // 1.0 FEQD F0, F0, X7 BEQ X0, X7, isNaN // x = NaN, return NaN FLTD F0, F1, X7 BNE X0, X7, overflow // x > Overflow, return PosInf FLTD F2, F0, X7 BNE X0, X7, underflow // x < Underflow, return 0 FABSD F0, F5 FLTD F3, F5, X7 BNE X0, X7, nearzero // fabs(x) < NearZero, return 1 + x // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 // computed as r = hi - lo for extra precision. MOVD 0(X5), F5 MOVD 8(X5), F3 MOVD 48(X5), F2 FLTD F0, F5, X7 BNE X0, X7, add // x > 0 sub: FMSUBD F0, F2, F3, F3 // Log2e*x - 0.5 JMP 2(PC) add: FMADDD F0, F2, F3, F3 // Log2e*x + 0.5 FCVTLD.RTZ F3, X16 // float64 -> int64 FCVTDL X16, F3 // int64 -> float64 MOVD 32(X5), F4 MOVD 40(X5), F5 FNMSUBD F3, F4, F0, F4 FMULD F3, F5, F5 FSUBD F5, F4, F6 FMULD F6, F6, F7 // compute c // r=(FMA x y z) -> FMADDD z, y, x, r // r=(FMA x y z) -> FMADDD x, y, z, r MOV $expmultirodata<>+0(SB), X6 MOVD 32(X6), F8 MOVD 24(X6), F9 FMADDD F7, F8, F9, F13 MOVD 16(X6), F10 FMADDD F7, F13, F10, F13 MOVD 8(X6), F11 FMADDD F7, F13, F11, F13 MOVD 0(X6), F12 FMADDD F7, F13, F12, F13 FNMSUBD F7, F13, F6, F13 // compute y MOVD 24(X5), F14 FSUBD F13, F14, F14 FMULD F6, F13, F15 FDIVD F14, F15, F15 FSUBD F15, F5, F15 FSUBD F4, F15, F15 FSUBD F15, F17, F16 // inline Ldexp(y, k), benefit: // 1, no parameter pass overhead. // 2, skip unnecessary checks for Inf/NaN/Zero MOVD F16, X15 MOV $FracMask, X20 AND X20, X15, X17 // fraction SRL $52, X15, X18 // exponent ADD X16, X18 MOV $1, X21 BGE X18, X21, normal ADD $52, X18 // denormal MOV $C1, X19 MOVD X19, F17 normal: SLL $52, X18 OR X18, X17, X15 MOVD X15, F0 FMULD F17, F0, F0 // return m * x MOVD F0, ret+8(FP) RET nearzero: FADDD F17, F0, F0 isNaN: MOVD F0, ret+8(FP) RET underflow: MOV X0, ret+8(FP) RET overflow: MOV $PosInf, X15 MOV X15, ret+8(FP) RET // Exp2 returns 2**x, the base-2 exponential of x. // This is an assembly implementation of the method used for function Exp2 in file exp.go. // // func Exp2(x float64) float64 TEXT ·archExp2(SB),$0-16 MOVD x+0(FP), F0 // F0 = x MOV $exprodata<>+0(SB), X5 MOVD 72(X5), F1 // Overflow2 MOVD 80(X5), F2 // Underflow2 MOVD 88(X5), F3 // NearZero FEQD F0, F0, X7 BEQ X0, X7, isNaN // x = NaN, return NaN FLTD F0, F1, X7 BNE X0, X7, overflow // x > Overflow, return PosInf FLTD F2, F0, X7 BNE X0, X7, underflow // x < Underflow, return 0 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 // computed as r = hi - lo for extra precision. MOVD 0(X5), F10 MOVD 8(X5), F2 FLTD F0, F10, X7 BNE X0, X7, add sub: FSUBD F2, F0, F3 // x - 0.5 JMP 2(PC) add: FADDD F2, F0, F3 // x + 0.5 FCVTLD.RTZ F3, X16 FCVTDL X16, F3 MOVD 32(X5), F4 MOVD 40(X5), F5 FSUBD F3, F0, F3 FMULD F3, F4, F4 FNMSUBD F5, F3, F10, F5 FSUBD F5, F4, F6 FMULD F6, F6, F7 // compute c MOV $expmultirodata<>+0(SB), X6 MOVD 32(X6), F8 MOVD 24(X6), F9 FMADDD F7, F8, F9, F13 MOVD 16(X6), F10 FMADDD F7, F13, F10, F13 MOVD 8(X6), F11 FMADDD F7, F13, F11, F13 MOVD 0(X6), F12 FMADDD F7, F13, F12, F13 FNMSUBD F7, F13, F6, F13 // compute y MOVD 24(X5), F14 FSUBD F13, F14, F14 FMULD F6, F13, F15 FDIVD F14, F15, F15 MOVD 16(X5), F17 FSUBD F15, F5, F15 FSUBD F4, F15, F15 FSUBD F15, F17, F16 // inline Ldexp(y, k), benefit: // 1, no parameter pass overhead. // 2, skip unnecessary checks for Inf/NaN/Zero MOVD F16, X15 MOV $FracMask, X20 SRL $52, X15, X18 // exponent AND X20, X15, X17 // fraction ADD X16, X18 MOV $1, X21 BGE X18, X21, normal ADD $52, X18 // denormal MOV $C1, X19 MOVD X19, F17 normal: SLL $52, X18 OR X18, X17, X15 MOVD X15, F0 FMULD F17, F0, F0 isNaN: MOVD F0, ret+8(FP) RET underflow: MOV X0, ret+8(FP) RET overflow: MOV $PosInf, X15 MOV X15, ret+8(FP) RET