mirror of
https://github.com/NGSolve/netgen.git
synced 2026-06-16 00:20:34 +08:00
179 lines
3.9 KiB
C++
179 lines
3.9 KiB
C++
#ifndef NETGEN_CORE_SIMD_MATH_HPP
|
|
#define NETGEN_CORE_SIMD_MATH_HPP
|
|
|
|
#include <tuple>
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846
|
|
#endif
|
|
|
|
|
|
namespace ngcore
|
|
{
|
|
|
|
/*
|
|
based on:
|
|
Stephen L. Moshier: Methods and Programs For Mathematical Functions
|
|
https://www.moshier.net/methprog.pdf
|
|
|
|
CEPHES MATHEMATICAL FUNCTION LIBRARY
|
|
https://www.netlib.org/cephes/
|
|
*/
|
|
|
|
static constexpr double sincof[] = {
|
|
1.58962301576546568060E-10,
|
|
-2.50507477628578072866E-8,
|
|
2.75573136213857245213E-6,
|
|
-1.98412698295895385996E-4,
|
|
8.33333333332211858878E-3,
|
|
-1.66666666666666307295E-1,
|
|
};
|
|
|
|
static constexpr double coscof[6] = {
|
|
-1.13585365213876817300E-11,
|
|
2.08757008419747316778E-9,
|
|
-2.75573141792967388112E-7,
|
|
2.48015872888517045348E-5,
|
|
-1.38888888888730564116E-3,
|
|
4.16666666666665929218E-2,
|
|
};
|
|
|
|
|
|
// highly accurate on [-pi/4, pi/4]
|
|
template <int N>
|
|
auto sincos_reduced (SIMD<double,N> x)
|
|
{
|
|
auto x2 = x*x;
|
|
|
|
auto s = ((((( sincof[0]*x2 + sincof[1]) * x2 + sincof[2]) * x2 + sincof[3]) * x2 + sincof[4]) * x2 + sincof[5]);
|
|
s = x + x*x*x * s;
|
|
|
|
auto c = ((((( coscof[0]*x2 + coscof[1]) * x2 + coscof[2]) * x2 + coscof[3]) * x2 + coscof[4]) * x2 + coscof[5]);
|
|
c = 1.0 - 0.5*x2 + x2*x2*c;
|
|
|
|
return std::tuple{ s, c };
|
|
}
|
|
|
|
template <int N>
|
|
auto sincos (SIMD<double,N> x)
|
|
{
|
|
auto y = round((2/M_PI) * x);
|
|
auto q = lround(y);
|
|
|
|
auto [s1,c1] = sincos_reduced(x - y * (M_PI/2));
|
|
|
|
auto s2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), s1, c1);
|
|
auto s = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), s2, -s2);
|
|
|
|
auto c2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), c1, -s1);
|
|
auto c = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), c2, -c2);
|
|
|
|
return std::tuple{ s, c };
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <int N>
|
|
SIMD<double,N> exp_reduced (SIMD<double,N> x)
|
|
{
|
|
static constexpr double P[] = {
|
|
1.26177193074810590878E-4,
|
|
3.02994407707441961300E-2,
|
|
9.99999999999999999910E-1,
|
|
};
|
|
|
|
static constexpr double Q[] = {
|
|
3.00198505138664455042E-6,
|
|
2.52448340349684104192E-3,
|
|
2.27265548208155028766E-1,
|
|
2.00000000000000000009E0,
|
|
};
|
|
|
|
/*
|
|
// from: https://www.netlib.org/cephes/
|
|
rational approximation for exponential
|
|
* of the fractional part:
|
|
* e**x = 1 + 2x P(x**2)/( Q(x**2) - x P(x**2) )
|
|
|
|
xx = x * x;
|
|
px = x * polevl( xx, P, 2 );
|
|
x = px/( polevl( xx, Q, 3 ) - px );
|
|
x = 1.0 + 2.0 * x;
|
|
*/
|
|
|
|
auto xx = x*x;
|
|
auto px = (P[0]*xx + P[1]) * xx + P[2];
|
|
auto qx = ((Q[0]*xx+Q[1])*xx+Q[2])*xx+Q[3];
|
|
return 1.0 + 2.0*x * px / (qx- x * px);
|
|
}
|
|
|
|
|
|
template <int N>
|
|
SIMD<double,N> pow2_int64_to_float64(SIMD<int64_t,N> n)
|
|
{
|
|
// thx to deepseek
|
|
|
|
// Step 1: Clamp the input to valid exponent range [-1022, 1023]
|
|
// (We use saturated operations to handle out-of-range values)
|
|
SIMD<int64_t,N> max_exp(1023);
|
|
SIMD<int64_t,N> min_exp(-1022);
|
|
n = If(n > max_exp, max_exp, n);
|
|
n = If(min_exp > n, min_exp, n);
|
|
|
|
// Step 2: Add exponent bias (1023)
|
|
n = n + SIMD<int64_t,N>(1023);
|
|
|
|
// Step 3: Shift to exponent bit position (bit 52)
|
|
auto shifted_exp = (n << IC<52>());
|
|
|
|
// Step 4: Reinterpret as double
|
|
return Reinterpret<double> (shifted_exp);
|
|
}
|
|
|
|
|
|
template <int N>
|
|
SIMD<double,N> myexp (SIMD<double,N> x)
|
|
{
|
|
constexpr double log2 = 0.693147180559945286; // log(2.0);
|
|
|
|
auto r = round(1/log2 * x);
|
|
auto rI = lround(r);
|
|
r *= log2;
|
|
|
|
SIMD<double,N> pow2 = pow2_int64_to_float64 (rI);
|
|
return exp_reduced(x-r) * pow2;
|
|
|
|
// maybe better:
|
|
// x = ldexp( x, n );
|
|
}
|
|
|
|
/*
|
|
inline auto Test1 (SIMD<double> x)
|
|
{
|
|
return myexp(x);
|
|
}
|
|
|
|
inline auto Test2 (SIMD<double> x)
|
|
{
|
|
return sincos(x);
|
|
}
|
|
|
|
inline auto Test3 (SIMD<double,4> x)
|
|
{
|
|
return myexp(x);
|
|
}
|
|
|
|
inline auto Test4 (SIMD<double,4> x)
|
|
{
|
|
return sincos(x);
|
|
}
|
|
*/
|
|
|
|
}
|
|
|
|
#endif
|