From 4edb303c5d6cc8a7c7e41f5a0bdc7ebfe9aa69b5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Sep 2024 16:19:08 -0400 Subject: [PATCH 01/22] Add device only impl --- .../quadrature/detail/exp_sinh_detail.hpp | 527 +++++++++++++++++- 1 file changed, 526 insertions(+), 1 deletion(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 2df07b6ec..4ddc28c90 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -7,6 +7,10 @@ #ifndef BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP #define BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP +#include + +#ifndef BOOST_MATH_ENABLE_CUDA + #include #include #include @@ -541,4 +545,525 @@ void exp_sinh_detail::init(const std::integral_constant&) } } } -#endif + +#else // BOOST_MATH_ENABLE_CUDA + +namespace boost { +namespace math { +namespace quadrature { +namespace detail { + +__constant__ float m_abscissas_float[][] = { + { 3.47876573e-23f, 5.62503650e-09f, 9.95706124e-04f, 9.67438487e-02f, 7.43599217e-01f, 4.14293205e+00f, + 1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, }, + { 2.41870864e-14f, 1.02534662e-05f, 1.65637566e-02f, 3.11290799e-01f, 1.64691269e+00f, 1.49800773e+01f, + 2.57724301e+03f, 2.24833766e+09f, }, + { 3.24983286e-18f, 2.51095186e-11f, 3.82035773e-07f, 1.33717837e-04f, 4.80260650e-03f, 4.41526928e-02f, + 1.83045938e-01f, 4.91960276e-01f, 1.10322609e+00f, 2.53681744e+00f, 7.39791792e+00f, 3.59560256e+01f, + 4.36061333e+02f, 2.49501460e+04f, 1.89216933e+07f, 1.03348694e+12f, }, + { 1.51941172e-20f, 3.70201714e-16f, 9.67598102e-13f, 4.44773051e-10f, 5.28493928e-08f, 2.19158236e-06f, + 4.00799258e-05f, 3.88011529e-04f, 2.29325538e-03f, 9.25182629e-03f, 2.78117501e-02f, 6.67553298e-02f, + 1.35173168e-01f, 2.41374946e-01f, 3.94194704e-01f, 6.07196731e-01f, 9.06432514e-01f, 1.34481045e+00f, + 2.03268444e+00f, 3.21243032e+00f, 5.46310949e+00f, 1.03365745e+01f, 2.26486752e+01f, 6.03727778e+01f, + 2.08220266e+02f, 1.00431239e+03f, 7.47843388e+03f, 9.75279951e+04f, 2.61755592e+06f, 1.77776624e+08f, + 3.98255346e+10f, 4.13443763e+13f, 3.07708133e+17f, }, + { 7.99409438e-22f, 2.41624595e-19f, 3.73461321e-17f, 3.19397902e-15f, 1.62042378e-13f, 5.18579386e-12f, + 1.10520072e-10f, 1.64548212e-09f, 1.78534009e-08f, 1.46529196e-07f, 9.40168786e-07f, 4.85507733e-06f, + 2.07038029e-05f, 7.45799409e-05f, 2.31536599e-04f, 6.30580368e-04f, 1.53035449e-03f, 3.35582040e-03f, + 6.73124842e-03f, 1.24856832e-02f, 2.16245309e-02f, 3.52720523e-02f, 5.45995171e-02f, 8.07587788e-02f, + 1.14840025e-01f, 1.57867103e-01f, 2.10837078e-01f, 2.74805391e-01f, 3.51015955e-01f, 4.41077540e-01f, + 5.47194016e-01f, 6.72466825e-01f, 8.21304567e-01f, 1.00000000e+00f, 1.21757511e+00f, 1.48706221e+00f, + 1.82750536e+00f, 2.26717507e+00f, 2.84887335e+00f, 3.63893880e+00f, 4.74299876e+00f, 6.33444194e+00f, + 8.70776542e+00f, 1.23825548e+01f, 1.83151803e+01f, 2.83510579e+01f, 4.62437776e+01f, 8.00917327e+01f, + 1.48560852e+02f, 2.97989725e+02f, 6.53443372e+02f, 1.58584068e+03f, 4.31897162e+03f, 1.34084311e+04f, + 4.83003053e+04f, 2.05969943e+05f, 1.06363880e+06f, 6.82457850e+06f, 5.60117371e+07f, 6.07724622e+08f, + 9.04813016e+09f, 1.92834507e+11f, 6.17122515e+12f, 3.13089095e+14f, 2.67765347e+16f, 4.13865153e+18f, }, + { 1.70893932e-22f, 3.56621447e-21f, 6.19138882e-20f, 9.04299298e-19f, 1.12287188e-17f, 1.19706303e-16f, + 1.10583090e-15f, 8.92931857e-15f, 6.35404710e-14f, 4.01527389e-13f, 2.26955738e-12f, 1.15522811e-11f, + 5.32913181e-11f, 2.24130967e-10f, 8.64254491e-10f, 3.07161058e-09f, 1.01117742e-08f, 3.09775637e-08f, + 8.87004371e-08f, 2.38368096e-07f, 6.03520392e-07f, 1.44488635e-06f, 3.28212299e-06f, 7.09655821e-06f, + 1.46494407e-05f, 2.89537394e-05f, 5.49357161e-05f, 1.00313252e-04f, 1.76700203e-04f, 3.00920507e-04f, + 4.96484845e-04f, 7.95150594e-04f, 1.23845781e-03f, 1.87911525e-03f, 2.78210510e-03f, 4.02538552e-03f, + 5.70009588e-03f, 7.91020800e-03f, 1.07716137e-02f, 1.44106884e-02f, 1.89624177e-02f, 2.45682104e-02f, + 3.13735515e-02f, 3.95256605e-02f, 4.91713196e-02f, 6.04550279e-02f, 7.35176150e-02f, 8.84954195e-02f, + 1.05520113e-01f, 1.24719213e-01f, 1.46217318e-01f, 1.70138063e-01f, 1.96606781e-01f, 2.25753880e-01f, + 2.57718900e-01f, 2.92655274e-01f, 3.30735809e-01f, 3.72158929e-01f, 4.17155794e-01f, 4.65998399e-01f, + 5.19008863e-01f, 5.76570161e-01f, 6.39138643e-01f, 7.07258781e-01f, 7.81580731e-01f, 8.62881450e-01f, + 9.52090320e-01f, 1.05032052e+00f, 1.15890775e+00f, 1.27945836e+00f, 1.41390963e+00f, 1.56460576e+00f, + 1.73439430e+00f, 1.92674937e+00f, 2.14593012e+00f, 2.39718593e+00f, 2.68702407e+00f, 3.02356133e+00f, + 3.41698950e+00f, 3.88019661e+00f, 4.42960272e+00f, 5.08629455e+00f, 5.87757956e+00f, 6.83913514e+00f, + 8.01801085e+00f, 9.47686632e+00f, 1.13000199e+01f, 1.36021823e+01f, 1.65412214e+01f, 2.03370584e+01f, + 2.53000199e+01f, 3.18739815e+01f, 4.07030054e+01f, 5.27358913e+01f, 6.93929374e+01f, 9.28366010e+01f, + 1.26418926e+02f, 1.75435645e+02f, 2.48423411e+02f, 3.59440052e+02f, 5.32165336e+02f, 8.07455844e+02f, + 1.25762341e+03f, 2.01416017e+03f, 3.32313676e+03f, 5.65930306e+03f, 9.96877263e+03f, 1.82030939e+04f, + 3.45378531e+04f, 6.82619916e+04f, 1.40913380e+05f, 3.04680844e+05f, 6.92095957e+05f, 1.65694484e+06f, + 4.19519229e+06f, 1.12739016e+07f, 3.22814282e+07f, 9.88946136e+07f, 3.25562103e+08f, 1.15706659e+09f, + 4.46167708e+09f, 1.87647826e+10f, 8.65629909e+10f, 4.40614549e+11f, 2.49049013e+12f, 1.57380011e+13f, + 1.11990629e+14f, 9.04297390e+14f, 8.35377903e+15f, 8.90573552e+16f, 1.10582857e+18f, 1.61514650e+19f, }, + { 7.75845008e-23f, 3.71846701e-22f, 1.69833677e-21f, 7.40284853e-21f, 3.08399399e-20f, 1.22962599e-19f, + 4.69855182e-19f, 1.72288020e-18f, 6.07012059e-18f, 2.05742924e-17f, 6.71669437e-17f, 2.11441966e-16f, + 6.42566550e-16f, 1.88715605e-15f, 5.36188198e-15f, 1.47533056e-14f, 3.93507835e-14f, 1.01841667e-13f, + 2.55981752e-13f, 6.25453236e-13f, 1.48683211e-12f, 3.44173601e-12f, 7.76421789e-12f, 1.70831312e-11f, + 3.66877698e-11f, 7.69632540e-11f, 1.57822184e-10f, 3.16577320e-10f, 6.21604166e-10f, 1.19551931e-09f, + 2.25364361e-09f, 4.16647469e-09f, 7.55905964e-09f, 1.34658870e-08f, 2.35675936e-08f, 4.05458117e-08f, + 6.86052525e-08f, 1.14227960e-07f, 1.87243781e-07f, 3.02323521e-07f, 4.81026747e-07f, 7.54564302e-07f, + 1.16746531e-06f, 1.78236867e-06f, 2.68618781e-06f, 3.99792342e-06f, 5.87841837e-06f, 8.54236163e-06f, + 1.22728487e-05f, 1.74387947e-05f, 2.45154696e-05f, 3.41083807e-05f, 4.69806683e-05f, 6.40841007e-05f, + 8.65936597e-05f, 1.15945600e-04f, 1.53878746e-04f, 2.02478652e-04f, 2.64224143e-04f, 3.42035594e-04f, + 4.39324211e-04f, 5.60041454e-04f, 7.08727668e-04f, 8.90558896e-04f, 1.11139085e-03f, 1.37779898e-03f, + 1.69711358e-03f, 2.07744903e-03f, 2.52772622e-03f, 3.05768742e-03f, 3.67790298e-03f, 4.39976940e-03f, + 5.23549846e-03f, 6.19809738e-03f, 7.30134015e-03f, 8.55973022e-03f, 9.98845520e-03f, 1.16033342e-02f, + 1.34207587e-02f, 1.54576276e-02f, 1.77312787e-02f, 2.02594158e-02f, 2.30600348e-02f, 2.61513493e-02f, + 2.95517158e-02f, 3.32795626e-02f, 3.73533204e-02f, 4.17913590e-02f, 4.66119283e-02f, 5.18331072e-02f, + 5.74727595e-02f, 6.35484986e-02f, 7.00776615e-02f, 7.70772927e-02f, 8.45641386e-02f, 9.25546518e-02f, + 1.01065008e-01f, 1.10111132e-01f, 1.19708739e-01f, 1.29873379e-01f, 1.40620505e-01f, 1.51965539e-01f, + 1.63923958e-01f, 1.76511391e-01f, 1.89743720e-01f, 2.03637197e-01f, 2.18208574e-01f, 2.33475238e-01f, + 2.49455360e-01f, 2.66168055e-01f, 2.83633553e-01f, 3.01873381e-01f, 3.20910560e-01f, 3.40769809e-01f, + 3.61477772e-01f, 3.83063247e-01f, 4.05557445e-01f, 4.28994258e-01f, 4.53410546e-01f, 4.78846448e-01f, + 5.05345717e-01f, 5.32956079e-01f, 5.61729623e-01f, 5.91723220e-01f, 6.22998983e-01f, 6.55624768e-01f, + 6.89674714e-01f, 7.25229845e-01f, 7.62378724e-01f, 8.01218171e-01f, 8.41854062e-01f, 8.84402205e-01f, + 9.28989312e-01f, 9.75754080e-01f, 1.02484839e+00f, 1.07643865e+00f, 1.13070727e+00f, 1.18785434e+00f, + 1.24809950e+00f, 1.31168403e+00f, 1.37887320e+00f, 1.44995892e+00f, 1.52526270e+00f, 1.60513906e+00f, + 1.68997931e+00f, 1.78021589e+00f, 1.87632722e+00f, 1.97884333e+00f, 2.08835213e+00f, 2.20550671e+00f, + 2.33103353e+00f, 2.46574193e+00f, 2.61053497e+00f, 2.76642183e+00f, 2.93453226e+00f, 3.11613304e+00f, + 3.31264716e+00f, 3.52567596e+00f, 3.75702486e+00f, 4.00873326e+00f, 4.28310945e+00f, 4.58277134e+00f, + 4.91069419e+00f, 5.27026666e+00f, 5.66535674e+00f, 6.10038953e+00f, 6.58043928e+00f, 7.11133842e+00f, + 7.69980735e+00f, 8.35360902e+00f, 9.08173387e+00f, 9.89462150e+00f, 1.08044272e+01f, 1.18253437e+01f, + 1.29739897e+01f, 1.42698826e+01f, 1.57360130e+01f, 1.73995473e+01f, 1.92926887e+01f, 2.14537359e+01f, + 2.39283915e+01f, 2.67713817e+01f, 3.00484719e+01f, 3.38389827e+01f, 3.82389447e+01f, 4.33650689e+01f, + 4.93597649e+01f, 5.63975118e+01f, 6.46929803e+01f, 7.45114359e+01f, 8.61821250e+01f, 1.00115581e+02f, + 1.16826112e+02f, 1.36961158e+02f, 1.61339834e+02f, 1.91003781e+02f, 2.27284639e+02f, 2.71894067e+02f, + 3.27044548e+02f, 3.95612465e+02f, 4.81359585e+02f, 5.89235756e+02f, 7.25795284e+02f, 8.99773468e+02f, + 1.12289036e+03f, 1.41097920e+03f, 1.78558211e+03f, 2.27622329e+03f, 2.92367233e+03f, 3.78466551e+03f, + 4.93879227e+03f, 6.49862329e+03f, 8.62473434e+03f, 1.15481896e+04f, 1.56044945e+04f, 2.12853507e+04f, + 2.93183077e+04f, 4.07905708e+04f, 5.73434125e+04f, 8.14806753e+04f, 1.17063646e+05f, 1.70113785e+05f, + 2.50129854e+05f, 3.72274789e+05f, 5.61051155e+05f, 8.56556497e+05f, 1.32526810e+06f, 2.07888648e+06f, + 3.30771485e+06f, 5.34063130e+06f, 8.75442405e+06f, 1.45761434e+07f, 2.46634599e+07f, 4.24311457e+07f, + 7.42617251e+07f, 1.32291588e+08f, 2.40011058e+08f, 4.43725882e+08f, 8.36456588e+08f, 1.60874083e+09f, + 3.15878598e+09f, 6.33624483e+09f, 1.29932136e+10f, 2.72570398e+10f, 5.85372779e+10f, 1.28795973e+11f, + 2.90551047e+11f, 6.72570892e+11f, 1.59884056e+12f, 3.90652847e+12f, 9.81916374e+12f, 2.54124546e+13f, + 6.77814197e+13f, 1.86501681e+14f, 5.29897885e+14f, 1.55625904e+15f, 4.72943011e+15f, 1.48882761e+16f, + 4.86043448e+16f, 1.64741373e+17f, 5.80423410e+17f, 2.12831536e+18f, 8.13255421e+18f, }, + { 5.20331508e-23f, 1.15324162e-22f, 2.52466875e-22f, 5.46028730e-22f, 1.16690465e-21f, 2.46458927e-21f, + 5.14543768e-21f, 1.06205431e-20f, 2.16767715e-20f, 4.37564009e-20f, 8.73699691e-20f, 1.72595588e-19f, + 3.37377643e-19f, 6.52669145e-19f, 1.24976973e-18f, 2.36916845e-18f, 4.44691383e-18f, 8.26580373e-18f, + 1.52174118e-17f, 2.77517606e-17f, 5.01415830e-17f, 8.97689232e-17f, 1.59270821e-16f, 2.80084735e-16f, + 4.88253693e-16f, 8.43846463e-16f, 1.44610939e-15f, 2.45762595e-15f, 4.14251017e-15f, 6.92627770e-15f, + 1.14889208e-14f, 1.89084205e-14f, 3.08802476e-14f, 5.00504297e-14f, 8.05169965e-14f, 1.28579121e-13f, + 2.03847833e-13f, 3.20880532e-13f, 5.01568631e-13f, 7.78600100e-13f, 1.20044498e-12f, 1.83848331e-12f, + 2.79712543e-12f, 4.22808302e-12f, 6.35035779e-12f, 9.47805307e-12f, 1.40588174e-11f, 2.07266430e-11f, + 3.03739182e-11f, 4.42491437e-11f, 6.40886341e-11f, 9.22929507e-11f, 1.32161843e-10f, 1.88205259e-10f, + 2.66552657e-10f, 3.75488615e-10f, 5.26149742e-10f, 7.33426418e-10f, 1.01712318e-09f, 1.40344387e-09f, + 1.92688222e-09f, 2.63261606e-09f, 3.57952343e-09f, 4.84396276e-09f, 6.52448685e-09f, 8.74769197e-09f, + 1.16754399e-08f, 1.55137320e-08f, 2.05235608e-08f, 2.70341184e-08f, 3.54587968e-08f, 4.63144836e-08f, + 6.02447248e-08f, 7.80474059e-08f, 1.00707687e-07f, 1.29437018e-07f, 1.65719157e-07f, 2.11364220e-07f, + 2.68571894e-07f, 3.40005066e-07f, 4.28875221e-07f, 5.39041105e-07f, 6.75122241e-07f, 8.42629031e-07f, + 1.04811127e-06f, 1.29932703e-06f, 1.60543396e-06f, 1.97720518e-06f, 2.42727196e-06f, 2.97039558e-06f, + 3.62377065e-06f, 4.40736236e-06f, 5.34428013e-06f, 6.46118994e-06f, 7.78876789e-06f, 9.36219733e-06f, + 1.12217116e-05f, 1.34131848e-05f, 1.59887725e-05f, 1.90076038e-05f, 2.25365270e-05f, 2.66509096e-05f, + 3.14354940e-05f, 3.69853096e-05f, 4.34066412e-05f, 5.08180543e-05f, 5.93514765e-05f, 6.91533342e-05f, + 8.03857429e-05f, 9.32277499e-05f, 1.07876627e-04f, 1.24549208e-04f, 1.43483273e-04f, 1.64938971e-04f, + 1.89200275e-04f, 2.16576471e-04f, 2.47403671e-04f, 2.82046341e-04f, 3.20898851e-04f, 3.64387021e-04f, + 4.12969671e-04f, 4.67140163e-04f, 5.27427922e-04f, 5.94399942e-04f, 6.68662248e-04f, 7.50861330e-04f, + 8.41685517e-04f, 9.41866302e-04f, 1.05217960e-03f, 1.17344692e-03f, 1.30653650e-03f, 1.45236427e-03f, + 1.61189482e-03f, 1.78614219e-03f, 1.97617055e-03f, 2.18309485e-03f, 2.40808123e-03f, 2.65234740e-03f, + 2.91716284e-03f, 3.20384886e-03f, 3.51377855e-03f, 3.84837661e-03f, 4.20911898e-03f, 4.59753235e-03f, + 5.01519359e-03f, 5.46372894e-03f, 5.94481312e-03f, 6.46016832e-03f, 7.01156301e-03f, 7.60081065e-03f, + 8.22976829e-03f, 8.90033499e-03f, 9.61445021e-03f, 1.03740920e-02f, 1.11812753e-02f, 1.20380497e-02f, + 1.29464978e-02f, 1.39087327e-02f, 1.49268962e-02f, 1.60031562e-02f, 1.71397050e-02f, 1.83387564e-02f, + 1.96025436e-02f, 2.09333170e-02f, 2.23333419e-02f, 2.38048956e-02f, 2.53502659e-02f, 2.69717481e-02f, + 2.86716433e-02f, 3.04522558e-02f, 3.23158911e-02f, 3.42648538e-02f, 3.63014456e-02f, 3.84279634e-02f, + 4.06466974e-02f, 4.29599296e-02f, 4.53699317e-02f, 4.78789641e-02f, 5.04892744e-02f, 5.32030959e-02f, + 5.60226468e-02f, 5.89501290e-02f, 6.19877276e-02f, 6.51376099e-02f, 6.84019251e-02f, 7.17828036e-02f, + 7.52823576e-02f, 7.89026802e-02f, 8.26458461e-02f, 8.65139116e-02f, 9.05089155e-02f, 9.46328794e-02f, + 9.88878087e-02f, 1.03275694e-01f, 1.07798510e-01f, 1.12458223e-01f, 1.17256783e-01f, 1.22196135e-01f, + 1.27278214e-01f, 1.32504950e-01f, 1.37878272e-01f, 1.43400107e-01f, 1.49072382e-01f, 1.54897032e-01f, + 1.60875997e-01f, 1.67011231e-01f, 1.73304700e-01f, 1.79758387e-01f, 1.86374297e-01f, 1.93154462e-01f, + 2.00100939e-01f, 2.07215821e-01f, 2.14501238e-01f, 2.21959362e-01f, 2.29592410e-01f, 2.37402653e-01f, + 2.45392415e-01f, 2.53564085e-01f, 2.61920117e-01f, 2.70463037e-01f, 2.79195450e-01f, 2.88120044e-01f, + 2.97239599e-01f, 3.06556989e-01f, 3.16075193e-01f, 3.25797297e-01f, 3.35726506e-01f, 3.45866147e-01f, + 3.56219679e-01f, 3.66790698e-01f, 3.77582948e-01f, 3.88600328e-01f, 3.99846898e-01f, 4.11326892e-01f, + 4.23044723e-01f, 4.35004995e-01f, 4.47212512e-01f, 4.59672288e-01f, 4.72389556e-01f, 4.85369781e-01f, + 4.98618671e-01f, 5.12142186e-01f, 5.25946554e-01f, 5.40038281e-01f, 5.54424165e-01f, 5.69111309e-01f, + 5.84107138e-01f, 5.99419409e-01f, 6.15056232e-01f, 6.31026081e-01f, 6.47337815e-01f, 6.64000696e-01f, + 6.81024405e-01f, 6.98419060e-01f, 7.16195243e-01f, 7.34364016e-01f, 7.52936944e-01f, 7.71926120e-01f, + 7.91344191e-01f, 8.11204381e-01f, 8.31520518e-01f, 8.52307069e-01f, 8.73579162e-01f, 8.95352625e-01f, + 9.17644013e-01f, 9.40470650e-01f, 9.63850664e-01f, 9.87803022e-01f, 1.01234758e+00f, 1.03750512e+00f, + 1.06329740e+00f, 1.08974721e+00f, 1.11687839e+00f, 1.14471595e+00f, 1.17328606e+00f, 1.20261614e+00f, + 1.23273496e+00f, 1.26367264e+00f, 1.29546076e+00f, 1.32813247e+00f, 1.36172249e+00f, 1.39626730e+00f, + 1.43180514e+00f, 1.46837616e+00f, 1.50602252e+00f, 1.54478848e+00f, 1.58472055e+00f, 1.62586760e+00f, + 1.66828098e+00f, 1.71201469e+00f, 1.75712551e+00f, 1.80367319e+00f, 1.85172058e+00f, 1.90133388e+00f, + 1.95258276e+00f, 2.00554062e+00f, 2.06028484e+00f, 2.11689693e+00f, 2.17546288e+00f, 2.23607339e+00f, + 2.29882418e+00f, 2.36381627e+00f, 2.43115639e+00f, 2.50095725e+00f, 2.57333803e+00f, 2.64842468e+00f, + 2.72635049e+00f, 2.80725648e+00f, 2.89129193e+00f, 2.97861498e+00f, 3.06939317e+00f, 3.16380413e+00f, + 3.26203621e+00f, 3.36428929e+00f, 3.47077553e+00f, 3.58172026e+00f, 3.69736291e+00f, 3.81795798e+00f, + 3.94377618e+00f, 4.07510558e+00f, 4.21225285e+00f, 4.35554468e+00f, 4.50532923e+00f, 4.66197775e+00f, + 4.82588634e+00f, 4.99747780e+00f, 5.17720373e+00f, 5.36554672e+00f, 5.56302277e+00f, 5.77018396e+00f, + 5.98762126e+00f, 6.21596768e+00f, 6.45590164e+00f, 6.70815069e+00f, 6.97349551e+00f, 7.25277437e+00f, + 7.54688785e+00f, 7.85680417e+00f, 8.18356491e+00f, 8.52829128e+00f, 8.89219104e+00f, 9.27656603e+00f, + 9.68282047e+00f, 1.01124700e+01f, 1.05671518e+01f, 1.10486353e+01f, 1.15588347e+01f, 1.20998217e+01f, + 1.26738407e+01f, 1.32833247e+01f, 1.39309131e+01f, 1.46194716e+01f, 1.53521138e+01f, 1.61322255e+01f, + 1.69634913e+01f, 1.78499242e+01f, 1.87958987e+01f, 1.98061868e+01f, 2.08859991e+01f, 2.20410294e+01f, + 2.32775056e+01f, 2.46022448e+01f, 2.60227166e+01f, 2.75471124e+01f, 2.91844234e+01f, 3.09445281e+01f, + 3.28382897e+01f, 3.48776660e+01f, 3.70758319e+01f, 3.94473180e+01f, 4.20081658e+01f, 4.47761023e+01f, + 4.77707378e+01f, 5.10137879e+01f, 5.45293247e+01f, 5.83440613e+01f, 6.24876734e+01f, 6.69931639e+01f, + 7.18972765e+01f, 7.72409663e+01f, 8.30699343e+01f, 8.94352364e+01f, 9.63939781e+01f, 1.04010108e+02f, + 1.12355322e+02f, 1.21510104e+02f, 1.31564914e+02f, 1.42621552e+02f, 1.54794728e+02f, 1.68213867e+02f, + 1.83025185e+02f, 1.99394097e+02f, 2.17507985e+02f, 2.37579409e+02f, 2.59849828e+02f, 2.84593917e+02f, + 3.12124587e+02f, 3.42798827e+02f, 3.77024517e+02f, 4.15268384e+02f, 4.58065302e+02f, 5.06029199e+02f, + 5.59865843e+02f, 6.20387872e+02f, 6.88532497e+02f, 7.65382367e+02f, 8.52190227e+02f, 9.50408087e+02f, + 1.06172182e+03f, 1.18809220e+03f, 1.33180384e+03f, 1.49552334e+03f, 1.68236894e+03f, 1.89599367e+03f, + 2.14068513e+03f, 2.42148533e+03f, 2.74433485e+03f, 3.11624675e+03f, 3.54551666e+03f, 4.04197722e+03f, + 4.61730674e+03f, 5.28540457e+03f, 6.06284853e+03f, 6.96945350e+03f, 8.02895513e+03f, 9.26984864e+03f, + 1.07264200e+04f, 1.24400169e+04f, 1.44606187e+04f, 1.68487805e+04f, 1.96780458e+04f, 2.30379493e+04f, + 2.70377620e+04f, 3.18111749e+04f, 3.75221715e+04f, 4.43724093e+04f, 5.26105241e+04f, 6.25438881e+04f, + 7.45535092e+04f, 8.91129656e+04f, 1.06812532e+05f, 1.28390012e+05f, 1.54770253e+05f, 1.87115940e+05f, + 2.26893075e+05f, 2.75955654e+05f, 3.36655497e+05f, 4.11985149e+05f, 5.05764405e+05f, 6.22884544e+05f, + 7.69629183e+05f, 9.54097173e+05f, 1.18676186e+06f, 1.48121324e+06f, 1.85514609e+06f, 2.33168052e+06f, + 2.94113264e+06f, 3.72339780e+06f, 4.73116974e+06f, 6.03430539e+06f, 7.72576515e+06f, 9.92972861e+06f, + 1.28127257e+07f, 1.65989637e+07f, 2.15915179e+07f, 2.82017465e+07f, 3.69902945e+07f, 4.87244884e+07f, + 6.44590226e+07f, 8.56498776e+07f, 1.14315868e+08f, 1.53268759e+08f, 2.06442545e+08f, 2.79366798e+08f, + 3.79850300e+08f, 5.18973079e+08f, 7.12532948e+08f, 9.83165083e+08f, 1.36346329e+09f, 1.90059962e+09f, + 2.66319659e+09f, 3.75160395e+09f, 5.31334782e+09f, 7.56648043e+09f, 1.08350637e+10f, 1.56033907e+10f, + 2.25993074e+10f, 3.29229832e+10f, 4.82470799e+10f, 7.11297379e+10f, 1.05506900e+11f, 1.57471442e+11f, + 2.36513804e+11f, 3.57509889e+11f, 5.43926613e+11f, 8.33024431e+11f, 1.28435637e+12f, 1.99374510e+12f, + 3.11642465e+12f, 4.90561997e+12f, 7.77731247e+12f, 1.24197380e+13f, 1.99798484e+13f, 3.23831600e+13f, + 5.28864904e+13f, 8.70403770e+13f, 1.44377694e+14f, 2.41399528e+14f, 4.06896744e+14f, 6.91510621e+14f, + 1.18504970e+15f, 2.04811559e+15f, 3.57034809e+15f, 6.27861398e+15f, 1.11397125e+16f, 1.99435267e+16f, + 3.60337498e+16f, 6.57141972e+16f, 1.20980371e+17f, 2.24875057e+17f, 4.22089025e+17f, 8.00147402e+17f, + 1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, }, + }; + +__constant__ float m_weights_float[][] = { + { 1.79979618e-21f, 1.07218106e-07f, 7.05786060e-03f, 2.72310168e-01f, 1.18863515e+00f, 8.77655464e+00f, + 5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, }, + { 7.59287827e-13f, 1.18886775e-04f, 7.27332179e-02f, 6.09156795e-01f, 2.71431234e+00f, 4.68800805e+01f, + 2.06437304e+04f, 4.85431236e+10f, }, + { 1.30963564e-16f, 6.14135316e-10f, 5.67743391e-06f, 1.21108690e-03f, 2.67259824e-02f, 1.54234107e-01f, + 4.23412860e-01f, 8.47913037e-01f, 1.73632925e+00f, 4.63203354e+00f, 1.88206826e+01f, 1.40643917e+02f, + 2.73736946e+03f, 2.55633252e+05f, 3.18438602e+08f, 2.86363931e+13f, }, + { 6.93769555e-19f, 1.31670336e-14f, 2.68107110e-11f, 9.60294960e-09f, 8.89417585e-07f, 2.87650015e-05f, + 4.10649371e-04f, 3.10797444e-03f, 1.43958814e-02f, 4.56980985e-02f, 1.08787148e-01f, 2.08910486e-01f, + 3.43887471e-01f, 5.11338439e-01f, 7.19769211e-01f, 1.00073403e+00f, 1.42660267e+00f, 2.14966467e+00f, + 3.50341221e+00f, 6.28632057e+00f, 1.26369961e+01f, 2.90949180e+01f, 7.91163114e+01f, 2.65103292e+02f, + 1.15872311e+03f, 7.11886439e+03f, 6.77324248e+04f, 1.13081650e+06f, 3.88995005e+07f, 3.38857764e+09f, + 9.74063570e+11f, 1.29789430e+15f, 1.24001927e+19f, }, + { 3.88541434e-20f, 1.03646493e-17f, 1.41388360e-15f, 1.06725054e-13f, 4.77908002e-12f, 1.34999345e-10f, + 2.53970414e-09f, 3.33804787e-08f, 3.19755978e-07f, 2.31724882e-06f, 1.31302324e-05f, 5.98917639e-05f, + 2.25650360e-04f, 7.18397083e-04f, 1.97196929e-03f, 4.75106406e-03f, 1.02072514e-02f, 1.98317011e-02f, + 3.52844239e-02f, 5.81350403e-02f, 8.95955146e-02f, 1.30335749e-01f, 1.80445384e-01f, 2.39557131e-01f, + 3.07102681e-01f, 3.82648608e-01f, 4.66260909e-01f, 5.58867257e-01f, 6.62616429e-01f, 7.81267733e-01f, + 9.20677638e-01f, 1.08949034e+00f, 1.30019425e+00f, 1.57079633e+00f, 1.92752387e+00f, 2.40924883e+00f, + 3.07485695e+00f, 4.01578082e+00f, 5.37784753e+00f, 7.40045071e+00f, 1.04890228e+01f, 1.53538346e+01f, + 2.32861156e+01f, 3.67307348e+01f, 6.05296516e+01f, 1.04761593e+02f, 1.91598840e+02f, 3.72918009e+02f, + 7.78738763e+02f, 1.76101294e+03f, 4.35837629e+03f, 1.19484066e+04f, 3.67841605e+04f, 1.29157756e+05f, + 5.26424122e+05f, 2.54082527e+06f, 1.48545930e+07f, 1.07925566e+08f, 1.00317513e+09f, 1.23283860e+10f, + 2.07922173e+11f, 5.01997049e+12f, 1.82006578e+14f, 1.04617001e+16f, 1.01373023e+18f, 1.77530238e+20f, }, + { 8.56958007e-21f, 1.68000718e-19f, 2.74008750e-18f, 3.75978801e-17f, 4.38589881e-16f, 4.39263787e-15f, + 3.81223973e-14f, 2.89198757e-13f, 1.93338859e-12f, 1.14783389e-11f, 6.09544349e-11f, 2.91499607e-10f, + 1.26339559e-09f, 4.99234840e-09f, 1.80872790e-08f, 6.03998541e-08f, 1.86829770e-07f, 5.37807971e-07f, + 1.44704121e-06f, 3.65421571e-06f, 8.69454276e-06f, 1.95621880e-05f, 4.17628758e-05f, 8.48713297e-05f, + 1.64680159e-04f, 3.05960283e-04f, 5.45748909e-04f, 9.36950301e-04f, 1.55189915e-03f, 2.48542560e-03f, + 3.85690505e-03f, 5.81079770e-03f, 8.51529070e-03f, 1.21588421e-02f, 1.69446644e-02f, 2.30834400e-02f, + 3.07847946e-02f, 4.02482241e-02f, 5.16542634e-02f, 6.51566792e-02f, 8.08763802e-02f, 9.88975757e-02f, + 1.19266512e-01f, 1.41992893e-01f, 1.67053901e-01f, 1.94400532e-01f, 2.23965873e-01f, 2.55674859e-01f, + 2.89455038e-01f, 3.25247905e-01f, 3.63020457e-01f, 4.02776696e-01f, 4.44568958e-01f, 4.88509042e-01f, + 5.34779290e-01f, 5.83643845e-01f, 6.35460497e-01f, 6.90693630e-01f, 7.49928915e-01f, 8.13890578e-01f, + 8.83462209e-01f, 9.59712352e-01f, 1.04392634e+00f, 1.13764623e+00f, 1.24272128e+00f, 1.36137177e+00f, + 1.49627028e+00f, 1.65064527e+00f, 1.82841374e+00f, 2.03435175e+00f, 2.27431458e+00f, 2.55552245e+00f, + 2.88693336e+00f, 3.27973254e+00f, 3.74797919e+00f, 4.30946679e+00f, 4.98687594e+00f, 5.80933099e+00f, + 6.81451887e+00f, 8.05159726e+00f, 9.58522167e+00f, 1.15011733e+01f, 1.39143002e+01f, 1.69798351e+01f, + 2.09096993e+01f, 2.59962450e+01f, 3.26472377e+01f, 4.14380231e+01f, 5.31903193e+01f, 6.90928164e+01f, + 9.08883744e+01f, 1.21168895e+02f, 1.63847041e+02f, 2.24923217e+02f, 3.13754154e+02f, 4.45189215e+02f, + 6.43236850e+02f, 9.47484116e+02f, 1.42457583e+03f, 2.18920236e+03f, 3.44338342e+03f, 5.55184130e+03f, + 9.19045432e+03f, 1.56468513e+04f, 2.74471462e+04f, 4.97037777e+04f, 9.31107740e+04f, 1.80835335e+05f, + 3.64968793e+05f, 7.67360053e+05f, 1.68525439e+06f, 3.87686515e+06f, 9.37022570e+06f, 2.38705733e+07f, + 6.43128750e+07f, 1.83920179e+08f, 5.60444636e+08f, 1.82722217e+09f, 6.40182180e+09f, 2.42153053e+10f, + 9.93804949e+10f, 4.44863150e+11f, 2.18425069e+12f, 1.18337660e+13f, 7.11948688e+13f, 4.78870731e+14f, + 3.62710215e+15f, 3.11747341e+16f, 3.06542975e+17f, 3.47854955e+18f, 4.59768243e+19f, 7.14806140e+20f, }, + { 3.95175890e-21f, 1.83575349e-20f, 8.12661397e-20f, 3.43336935e-19f, 1.38634563e-18f, 5.35757029e-18f, + 1.98424944e-17f, 7.05221126e-17f, 2.40827550e-16f, 7.91175869e-16f, 2.50347754e-15f, 7.63871031e-15f, + 2.25003103e-14f, 6.40502166e-14f, 1.76389749e-13f, 4.70424252e-13f, 1.21618334e-12f, 3.05082685e-12f, + 7.43273471e-12f, 1.76028616e-11f, 4.05602375e-11f, 9.10055013e-11f, 1.98994391e-10f, 4.24390078e-10f, + 8.83436580e-10f, 1.79636925e-09f, 3.57059250e-09f, 6.94247187e-09f, 1.32133371e-08f, 2.46332536e-08f, + 4.50110843e-08f, 8.06630537e-08f, 1.41856144e-07f, 2.44958654e-07f, 4.15579069e-07f, 6.93056106e-07f, + 1.13675616e-06f, 1.83473665e-06f, 2.91544023e-06f, 4.56318858e-06f, 7.03833675e-06f, 1.07030190e-05f, + 1.60534529e-05f, 2.37597559e-05f, 3.47141604e-05f, 5.00883685e-05f, 7.14005734e-05f, 1.00592372e-04f, + 1.40115414e-04f, 1.93027181e-04f, 2.63094779e-04f, 3.54905080e-04f, 4.73978972e-04f, 6.26886955e-04f, + 8.21362793e-04f, 1.06641153e-03f, 1.37240787e-03f, 1.75118071e-03f, 2.21607971e-03f, 2.78201983e-03f, + 3.46550010e-03f, 4.28459361e-03f, 5.25890609e-03f, 6.40950150e-03f, 7.75879384e-03f, 9.33040551e-03f, + 1.11489935e-02f, 1.32400455e-02f, 1.56296499e-02f, 1.83442433e-02f, 2.14103400e-02f, 2.48542509e-02f, + 2.87017958e-02f, 3.29780164e-02f, 3.77068968e-02f, 4.29110964e-02f, 4.86117029e-02f, 5.48280093e-02f, + 6.15773214e-02f, 6.88747982e-02f, 7.67333308e-02f, 8.51634602e-02f, 9.41733378e-02f, 1.03768728e-01f, + 1.13953051e-01f, 1.24727473e-01f, 1.36091031e-01f, 1.48040798e-01f, 1.60572082e-01f, 1.73678660e-01f, + 1.87353038e-01f, 2.01586736e-01f, 2.16370598e-01f, 2.31695113e-01f, 2.47550758e-01f, 2.63928342e-01f, + 2.80819365e-01f, 2.98216379e-01f, 3.16113348e-01f, 3.34506011e-01f, 3.53392244e-01f, 3.72772414e-01f, + 3.92649735e-01f, 4.13030618e-01f, 4.33925021e-01f, 4.55346789e-01f, 4.77314001e-01f, 4.99849320e-01f, + 5.22980337e-01f, 5.46739932e-01f, 5.71166640e-01f, 5.96305036e-01f, 6.22206131e-01f, 6.48927802e-01f, + 6.76535247e-01f, 7.05101473e-01f, 7.34707835e-01f, 7.65444619e-01f, 7.97411688e-01f, 8.30719192e-01f, + 8.65488366e-01f, 9.01852407e-01f, 9.39957463e-01f, 9.79963735e-01f, 1.02204672e+00f, 1.06639858e+00f, + 1.11322974e+00f, 1.16277062e+00f, 1.21527359e+00f, 1.27101525e+00f, 1.33029891e+00f, 1.39345744e+00f, + 1.46085648e+00f, 1.53289803e+00f, 1.61002461e+00f, 1.69272386e+00f, 1.78153384e+00f, 1.87704900e+00f, + 1.97992701e+00f, 2.09089644e+00f, 2.21076567e+00f, 2.34043290e+00f, 2.48089770e+00f, 2.63327413e+00f, + 2.79880590e+00f, 2.97888368e+00f, 3.17506505e+00f, 3.38909744e+00f, 3.62294469e+00f, 3.87881764e+00f, + 4.15920968e+00f, 4.46693789e+00f, 4.80519096e+00f, 5.17758497e+00f, 5.58822853e+00f, 6.04179895e+00f, + 6.54363157e+00f, 7.09982467e+00f, 7.71736306e+00f, 8.40426388e+00f, 9.16974906e+00f, 1.00244499e+01f, + 1.09806502e+01f, 1.20525758e+01f, 1.32567410e+01f, 1.46123627e+01f, 1.61418586e+01f, 1.78714466e+01f, + 1.98318690e+01f, 2.20592694e+01f, 2.45962577e+01f, 2.74932084e+01f, 3.08098460e+01f, 3.46171893e+01f, + 3.89999428e+01f, 4.40594471e+01f, 4.99173320e+01f, 5.67200545e+01f, 6.46445583e+01f, 7.39053537e+01f, + 8.47634121e+01f, 9.75373786e+01f, 1.12617765e+02f, 1.30484989e+02f, 1.51732386e+02f, 1.77095712e+02f, + 2.07491096e+02f, 2.44064119e+02f, 2.88253545e+02f, 3.41874461e+02f, 4.07227291e+02f, 4.87241400e+02f, + 5.85665251e+02f, 7.07319497e+02f, 8.58435639e+02f, 1.04711167e+03f, 1.28392853e+03f, 1.58278901e+03f, + 1.96206607e+03f, 2.44618436e+03f, 3.06781187e+03f, 3.87091688e+03f, 4.91505977e+03f, 6.28145970e+03f, + 8.08162997e+03f, 1.04697579e+04f, 1.36605846e+04f, 1.79554230e+04f, 2.37803156e+04f, 3.17424455e+04f, + 4.27142204e+04f, 5.79596727e+04f, 7.93261335e+04f, 1.09537503e+05f, 1.52647130e+05f, 2.14743829e+05f, + 3.05063335e+05f, 4.37755687e+05f, 6.34724899e+05f, 9.30240305e+05f, 1.37850753e+06f, 2.06623977e+06f, + 3.13377596e+06f, 4.81098405e+06f, 7.47905793e+06f, 1.17782423e+07f, 1.87980927e+07f, 3.04180655e+07f, + 4.99257437e+07f, 8.31551852e+07f, 1.40614107e+08f, 2.41519712e+08f, 4.21576502e+08f, 7.48209440e+08f, + 1.35089892e+09f, 2.48263348e+09f, 4.64662007e+09f, 8.86235204e+09f, 1.72348930e+10f, 3.41967381e+10f, + 6.92714904e+10f, 1.43352142e+11f, 3.03269524e+11f, 6.56345865e+11f, 1.45422052e+12f, 3.30099910e+12f, + 7.68267630e+12f, 1.83474885e+13f, 4.49980389e+13f, 1.13430702e+14f, 2.94148450e+14f, 7.85402504e+14f, + 2.16127995e+15f, 6.13534293e+15f, 1.79847736e+16f, 5.44944507e+16f, 1.70858922e+17f, 5.54922744e+17f, + 1.86905990e+18f, 6.53599225e+18f, 2.37582887e+19f, 8.98810682e+19f, 3.54341330e+20f, }, + { 2.67108015e-21f, 5.82833463e-21f, 1.25616316e-20f, 2.67469785e-20f, 5.62745845e-20f, 1.17014394e-19f, + 2.40511019e-19f, 4.88739481e-19f, 9.82072303e-19f, 1.95168062e-18f, 3.83661097e-18f, 7.46163208e-18f, + 1.43594942e-17f, 2.73485792e-17f, 5.15573612e-17f, 9.62223075e-17f, 1.77810682e-16f, 3.25389618e-16f, + 5.89765054e-16f, 1.05888451e-15f, 1.88354538e-15f, 3.31989417e-15f, 5.79902273e-15f, 1.00398818e-14f, + 1.72308010e-14f, 2.93186753e-14f, 4.94655967e-14f, 8.27635884e-14f, 1.37343706e-13f, 2.26082511e-13f, + 3.69205736e-13f, 5.98228147e-13f, 9.61866975e-13f, 1.53484658e-12f, 2.43090464e-12f, 3.82185577e-12f, + 5.96531965e-12f, 9.24474797e-12f, 1.42267754e-11f, 2.17427910e-11f, 3.30041201e-11f, 4.97635091e-11f, + 7.45399354e-11f, 1.10929412e-10f, 1.64031748e-10f, 2.41032586e-10f, 3.51991946e-10f, 5.10905560e-10f, + 7.37124150e-10f, 1.05723929e-09f, 1.50757352e-09f, 2.13744796e-09f, 3.01344401e-09f, 4.22492806e-09f, + 5.89117093e-09f, 8.17046854e-09f, 1.12717587e-08f, 1.54693324e-08f, 2.11213594e-08f, 2.86930859e-08f, + 3.87857241e-08f, 5.21722335e-08f, 6.98414017e-08f, 9.30518593e-08f, 1.23397923e-07f, 1.62889442e-07f, + 2.14048123e-07f, 2.80023159e-07f, 3.64729321e-07f, 4.73011070e-07f, 6.10836627e-07f, 7.85526363e-07f, + 1.00602028e-06f, 1.28318979e-06f, 1.63019938e-06f, 2.06292424e-06f, 2.60043021e-06f, 3.26552286e-06f, + 4.08537275e-06f, 5.09222413e-06f, 6.32419483e-06f, 7.82617466e-06f, 9.65083023e-06f, 1.18597236e-05f, + 1.45245521e-05f, 1.77285168e-05f, 2.15678251e-05f, 2.61533347e-05f, 3.16123436e-05f, 3.80905295e-05f, + 4.57540432e-05f, 5.47917575e-05f, 6.54176707e-05f, 7.78734661e-05f, 9.24312223e-05f, 1.09396271e-04f, + 1.29110197e-04f, 1.51953965e-04f, 1.78351176e-04f, 2.08771424e-04f, 2.43733750e-04f, 2.83810168e-04f, + 3.29629253e-04f, 3.81879756e-04f, 4.41314233e-04f, 5.08752659e-04f, 5.85085996e-04f, 6.71279692e-04f, + 7.68377076e-04f, 8.77502620e-04f, 9.99865030e-04f, 1.13676015e-03f, 1.28957360e-03f, 1.45978322e-03f, + 1.64896113e-03f, 1.85877551e-03f, 2.09099200e-03f, 2.34747474e-03f, 2.63018699e-03f, 2.94119122e-03f, + 3.28264890e-03f, 3.65681963e-03f, 4.06605991e-03f, 4.51282135e-03f, 4.99964828e-03f, 5.52917497e-03f, + 6.10412222e-03f, 6.72729343e-03f, 7.40157020e-03f, 8.12990738e-03f, 8.91532760e-03f, 9.76091537e-03f, + 1.06698107e-02f, 1.16452023e-02f, 1.26903202e-02f, 1.38084285e-02f, 1.50028172e-02f, 1.62767940e-02f, + 1.76336759e-02f, 1.90767806e-02f, 2.06094173e-02f, 2.22348784e-02f, 2.39564300e-02f, 2.57773028e-02f, + 2.77006834e-02f, 2.97297055e-02f, 3.18674406e-02f, 3.41168899e-02f, 3.64809756e-02f, 3.89625331e-02f, + 4.15643030e-02f, 4.42889240e-02f, 4.71389254e-02f, 5.01167213e-02f, 5.32246039e-02f, 5.64647382e-02f, + 5.98391571e-02f, 6.33497571e-02f, 6.69982939e-02f, 7.07863800e-02f, 7.47154815e-02f, 7.87869165e-02f, + 8.30018539e-02f, 8.73613125e-02f, 9.18661613e-02f, 9.65171203e-02f, 1.01314762e-01f, 1.06259513e-01f, + 1.11351656e-01f, 1.16591337e-01f, 1.21978563e-01f, 1.27513213e-01f, 1.33195039e-01f, 1.39023671e-01f, + 1.44998628e-01f, 1.51119321e-01f, 1.57385061e-01f, 1.63795066e-01f, 1.70348473e-01f, 1.77044340e-01f, + 1.83881662e-01f, 1.90859375e-01f, 1.97976367e-01f, 2.05231492e-01f, 2.12623572e-01f, 2.20151415e-01f, + 2.27813822e-01f, 2.35609599e-01f, 2.43537565e-01f, 2.51596569e-01f, 2.59785494e-01f, 2.68103274e-01f, + 2.76548903e-01f, 2.85121445e-01f, 2.93820047e-01f, 3.02643950e-01f, 3.11592502e-01f, 3.20665165e-01f, + 3.29861530e-01f, 3.39181328e-01f, 3.48624439e-01f, 3.58190905e-01f, 3.67880941e-01f, 3.77694943e-01f, + 3.87633504e-01f, 3.97697421e-01f, 4.07887708e-01f, 4.18205605e-01f, 4.28652591e-01f, 4.39230391e-01f, + 4.49940993e-01f, 4.60786652e-01f, 4.71769905e-01f, 4.82893580e-01f, 4.94160809e-01f, 5.05575036e-01f, + 5.17140031e-01f, 5.28859900e-01f, 5.40739096e-01f, 5.52782432e-01f, 5.64995090e-01f, 5.77382639e-01f, + 5.89951040e-01f, 6.02706666e-01f, 6.15656310e-01f, 6.28807202e-01f, 6.42167019e-01f, 6.55743908e-01f, + 6.69546490e-01f, 6.83583887e-01f, 6.97865729e-01f, 7.12402181e-01f, 7.27203953e-01f, 7.42282322e-01f, + 7.57649155e-01f, 7.73316926e-01f, 7.89298740e-01f, 8.05608358e-01f, 8.22260217e-01f, 8.39269463e-01f, + 8.56651970e-01f, 8.74424378e-01f, 8.92604116e-01f, 9.11209442e-01f, 9.30259469e-01f, 9.49774208e-01f, + 9.69774604e-01f, 9.90282579e-01f, 1.01132107e+00f, 1.03291408e+00f, 1.05508673e+00f, 1.07786529e+00f, + 1.10127728e+00f, 1.12535146e+00f, 1.15011796e+00f, 1.17560829e+00f, 1.20185546e+00f, 1.22889400e+00f, + 1.25676010e+00f, 1.28549162e+00f, 1.31512826e+00f, 1.34571158e+00f, 1.37728514e+00f, 1.40989460e+00f, + 1.44358784e+00f, 1.47841507e+00f, 1.51442894e+00f, 1.55168471e+00f, 1.59024039e+00f, 1.63015687e+00f, + 1.67149810e+00f, 1.71433126e+00f, 1.75872698e+00f, 1.80475947e+00f, 1.85250679e+00f, 1.90205105e+00f, + 1.95347869e+00f, 2.00688065e+00f, 2.06235275e+00f, 2.11999592e+00f, 2.17991652e+00f, 2.24222670e+00f, + 2.30704472e+00f, 2.37449538e+00f, 2.44471039e+00f, 2.51782884e+00f, 2.59399766e+00f, 2.67337209e+00f, + 2.75611628e+00f, 2.84240383e+00f, 2.93241843e+00f, 3.02635449e+00f, 3.12441791e+00f, 3.22682682e+00f, + 3.33381238e+00f, 3.44561973e+00f, 3.56250887e+00f, 3.68475574e+00f, 3.81265333e+00f, 3.94651282e+00f, + 4.08666490e+00f, 4.23346116e+00f, 4.38727553e+00f, 4.54850596e+00f, 4.71757611e+00f, 4.89493722e+00f, + 5.08107015e+00f, 5.27648761e+00f, 5.48173646e+00f, 5.69740032e+00f, 5.92410235e+00f, 6.16250823e+00f, + 6.41332946e+00f, 6.67732689e+00f, 6.95531455e+00f, 7.24816384e+00f, 7.55680807e+00f, 7.88224735e+00f, + 8.22555401e+00f, 8.58787841e+00f, 8.97045530e+00f, 9.37461076e+00f, 9.80176975e+00f, 1.02534643e+01f, + 1.07313428e+01f, 1.12371793e+01f, 1.17728848e+01f, 1.23405187e+01f, 1.29423019e+01f, 1.35806306e+01f, + 1.42580922e+01f, 1.49774818e+01f, 1.57418213e+01f, 1.65543795e+01f, 1.74186947e+01f, 1.83385994e+01f, + 1.93182476e+01f, 2.03621450e+01f, 2.14751816e+01f, 2.26626686e+01f, 2.39303784e+01f, 2.52845893e+01f, + 2.67321348e+01f, 2.82804577e+01f, 2.99376708e+01f, 3.17126238e+01f, 3.36149769e+01f, 3.56552840e+01f, + 3.78450835e+01f, 4.01970005e+01f, 4.27248599e+01f, 4.54438126e+01f, 4.83704762e+01f, 5.15230921e+01f, + 5.49217006e+01f, 5.85883374e+01f, 6.25472527e+01f, 6.68251567e+01f, 7.14514957e+01f, 7.64587609e+01f, + 8.18828353e+01f, 8.77633847e+01f, 9.41442967e+01f, 1.01074176e+02f, 1.08606902e+02f, 1.16802259e+02f, + 1.25726650e+02f, 1.35453899e+02f, 1.46066166e+02f, 1.57654979e+02f, 1.70322410e+02f, 1.84182406e+02f, + 1.99362306e+02f, 2.16004568e+02f, 2.34268740e+02f, 2.54333703e+02f, 2.76400239e+02f, 3.00693971e+02f, + 3.27468728e+02f, 3.57010397e+02f, 3.89641362e+02f, 4.25725590e+02f, 4.65674502e+02f, 5.09953726e+02f, + 5.59090900e+02f, 6.13684688e+02f, 6.74415211e+02f, 7.42056139e+02f, 8.17488717e+02f, 9.01718069e+02f, + 9.95892168e+02f, 1.10132394e+03f, 1.21951707e+03f, 1.35219615e+03f, 1.50134197e+03f, 1.66923291e+03f, + 1.85849349e+03f, 2.07215152e+03f, 2.31370536e+03f, 2.58720328e+03f, 2.89733724e+03f, 3.24955383e+03f, + 3.65018587e+03f, 4.10660860e+03f, 4.62742547e+03f, 5.22268956e+03f, 5.90416786e+03f, 6.68565726e+03f, + 7.58336313e+03f, 8.61635357e+03f, 9.80710572e+03f, 1.11821637e+04f, 1.27729327e+04f, 1.46166396e+04f, + 1.67574960e+04f, 1.92481112e+04f, 2.21512104e+04f, 2.55417295e+04f, 2.95093735e+04f, 3.41617487e+04f, + 3.96282043e+04f, 4.60645561e+04f, 5.36589049e+04f, 6.26388223e+04f, 7.32802431e+04f, 8.59184957e+04f, + 1.00962017e+05f, 1.18909442e+05f, 1.40370957e+05f, 1.66095034e+05f, 1.97001996e+05f, 2.34226253e+05f, + 2.79169596e+05f, 3.33568603e+05f, 3.99580125e+05f, 4.79889989e+05f, 5.77851588e+05f, 6.97663062e+05f, + 8.44594440e+05f, 1.02527965e+06f, 1.24809298e+06f, 1.52363581e+06f, 1.86536786e+06f, 2.29042802e+06f, + 2.82070529e+06f, 3.48424008e+06f, 4.31706343e+06f, 5.36561882e+06f, 6.68996113e+06f, 8.36799594e+06f, + 1.05011160e+07f, 1.32217203e+07f, 1.67032788e+07f, 2.11738506e+07f, 2.69343047e+07f, 3.43829654e+07f, + 4.40490690e+07f, 5.66383460e+07f, 7.30953564e+07f, 9.46890531e+07f, 1.23130681e+08f, 1.60736861e+08f, + 2.10656057e+08f, 2.77184338e+08f, 3.66207397e+08f, 4.85821891e+08f, 6.47212479e+08f, 8.65895044e+08f, + 1.16348659e+09f, 1.57023596e+09f, 2.12865840e+09f, 2.89877917e+09f, 3.96573294e+09f, 5.45082863e+09f, + 7.52773593e+09f, 1.04462776e+10f, 1.45675716e+10f, 2.04161928e+10f, 2.87579864e+10f, 4.07167363e+10f, + 5.79499965e+10f, 8.29154750e+10f, 1.19276754e+11f, 1.72524570e+11f, 2.50933409e+11f, 3.67042596e+11f, + 5.39962441e+11f, 7.98985690e+11f, 1.18927611e+12f, 1.78088199e+12f, 2.68310388e+12f, 4.06753710e+12f, + 6.20525592e+12f, 9.52719664e+12f, 1.47228407e+13f, 2.29025392e+13f, 3.58662837e+13f, 5.65517100e+13f, + 8.97859411e+13f, 1.43556057e+14f, 2.31171020e+14f, 3.74966777e+14f, 6.12702071e+14f, 1.00868013e+15f, + 1.67323268e+15f, 2.79711270e+15f, 4.71267150e+15f, 8.00353033e+15f, 1.37027503e+16f, 2.36538022e+16f, + 4.11734705e+16f, 7.22793757e+16f, 1.27982244e+17f, 2.28603237e+17f, 4.11976277e+17f, 7.49169358e+17f, + 1.37488861e+18f, 2.54681529e+18f, 4.76248383e+18f, 8.99167123e+18f, 1.71428840e+19f, 3.30088717e+19f, + 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }, + }; + +__device__ exp_sinh_integrate_impl(const F& f, Real* error, Real* L1, const char* function, Real tolerance, boost::math::size_t* levels) +{ + using K = decltype(f(static_cast(0))) K; + using boost::math::constants::half; + using boost::math::constants::half_pi; + + // This provided a nice error message for real valued integrals, but it's super awkward for complex-valued integrals: + /*K y_max = f(tools::max_value()); + if(abs(y_max) > tools::epsilon() || !(boost::math::isfinite)(y_max)) + { + K val = abs(y_max); + return static_cast(policies::raise_domain_error(function, "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", val, Policy())); + }*/ + + //std::cout << std::setprecision(5*std::numeric_limits::digits10); + + // Get the party started with two estimates of the integral: + auto& m_abscissas = m_abscissas_float; + auto& m_weights = m_weights_float; + + Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value() }; + K I0 = 0; + Real L1_I0 = 0; + for(boost::math::size_t i = 0; i < sizeof(m_abscissas[0])/sizeof(Real); ++i) + { + K y = f(m_abscissas[0][i]); + K I0_last = I0; + I0 += y*m_weights[0][i]; + L1_I0 += abs(y)*m_weights[0][i]; + if ((I0_last == I0) && (abs(I0) != 0)) + { + max_abscissa = m_abscissas[0][i]; + break; + } + } + + //std::cout << "First estimate : " << I0 << std::endl; + K I1 = I0; + Real L1_I1 = L1_I0; + bool have_first_j = false; + boost::math::size_t first_j = 0; + for (boost::math::size_t i = 0; (i < sizeof(m_abscissas[1])/sizeof(Real)) && (m_abscissas[1][i] < max_abscissa); ++i) + { + K y = f(m_abscissas[1][i]); + K I1_last = I1; + I1 += y*m_weights[1][i]; + L1_I1 += abs(y)*m_weights[1][i]; + if (!have_first_j && (I1_last == I1)) + { + // No change to the sum, disregard these values on the LHS: + if ((i < sizeof(m_abscissas[1])/sizeof(Real) - 1) && (m_abscissas[1][i + 1] > max_abscissa)) + { + // The summit is so high, that we found nothing in this row which added to the integral!! + have_first_j = true; + } + else + { + min_abscissa = m_abscissas[1][i]; + first_j = i; + } + } + else + { + have_first_j = true; + } + } + + if (I0 == static_cast(0)) + { + // We failed to find anything, is the integral zero, or have we just not found it yet? + // We'll try one more level, if that still finds nothing then it'll terminate. + min_abscissa = 0; + max_abscissa = boost::math::tools::max_value(); + } + + I1 *= half(); + L1_I1 *= half(); + Real err = abs(I0 - I1); + //std::cout << "Second estimate: " << I1 << " Error estimate at level " << 1 << " = " << err << std::endl; + + boost::math::size_t i = 2; + for(; i < sizeof(m_abscissas)/sizeof(Real); ++i) + { + I0 = I1; + L1_I0 = L1_I1; + + I1 = half()*I0; + L1_I1 = half()*L1_I0; + Real h = static_cast(1)/static_cast(1 << i); + K sum = 0; + Real absum = 0; + + auto abscissas_row = get_abscissa_row(i); + auto weight_row = get_weight_row(i); + + // appoximate location to start looking for lowest meaningful abscissa value + first_j = first_j == 0 ? 0 : 2 * first_j - 1; + + boost::math::size_t j = first_j; + while (abscissas_row[j] < min_abscissa) + { + ++j; + } + + for(; (j < sizeof(m_weights[i])/sizeof(Real)) && (abscissas_row[j] < max_abscissa); ++j) + { + Real x = abscissas_row[j]; + K y = f(x); + sum += y*weight_row[j]; + Real abterm0 = abs(y)*weight_row[j]; + absum += abterm0; + } + + I1 += sum*h; + L1_I1 += absum*h; + err = abs(I0 - I1); + if (!(boost::math::isfinite)(L1_I1)) + { + return static_cast(policies::raise_evaluation_error(function, "The exp_sinh quadrature evaluated your function at a singular point and returned %1%. Please ensure your function evaluates to a finite number over its entire domain.", I1, Policy())); + } + if (err <= tolerance*L1_I1) + { + break; + } + } + + if (error) + { + *error = err; + } + + if(L1) + { + *L1 = L1_I1; + } + + if (levels) + { + *levels = i; + } + + return I1; +} + +} // namespace detail +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP From 59b8da189b83222a3e9bc8b359c26078053e7db0 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Sep 2024 16:19:25 -0400 Subject: [PATCH 02/22] Add function for device only impl --- include/boost/math/quadrature/exp_sinh.hpp | 84 +++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp index f28493737..f9d388b26 100644 --- a/include/boost/math/quadrature/exp_sinh.hpp +++ b/include/boost/math/quadrature/exp_sinh.hpp @@ -15,11 +15,15 @@ #ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP #define BOOST_MATH_QUADRATURE_EXP_SINH_HPP +#include +#include + +#ifndef BOOST_MATH_ENABLE_CUDA + #include #include #include #include -#include namespace boost{ namespace math{ namespace quadrature { @@ -98,4 +102,80 @@ auto exp_sinh::integrate(const F& f, Real tolerance, Real* error, }}} -#endif + +#else + +#include +#include +#include +#include +#include + +namespace boost { +namespace math { +namespace quadrature { + +template > +__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) const +{ + BOOST_MATH_STD_USING + + using K = decltype(f(a)); + static_assert(!boost::math::is_integral::value, + "The return type cannot be integral, it must be either a real or complex floating point type."); + using boost::math::constants::half; + using boost::math::quadrature::detail::exp_sinh_detail; + + constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; + + // Neither limit may be a NaN: + if((boost::math::isnan)(a) || (boost::math::isnan)(b)) + { + return static_cast(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy())); + } + // Right limit is infinite: + if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value())) + { + // If a = 0, don't use an additional level of indirection: + if (a == static_cast(0)) + { + return detail::exp_sinh_integrate_impl(f, error, L1, function, tolerance, levels); + } + const auto u = [&](Real t)->K { return f(t + a); }; + return detail::exp_sinh_integrate_impl(u, error, L1, function, tolerance, levels); + } + + if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value()) + { + const auto u = [&](Real t)->K { return f(b-t);}; + return detail::exp_sinh_integrate_impl(u, error, L1, function, tolerance, levels); + } + + // Infinite limits: + if ((a <= -boost::math::tools::max_value()) && (b >= boost::math::tools::max_value())) + { + return static_cast(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy())); + } + // If we get to here then both ends must necessarily be finite: + return static_cast(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy())); +} + +template > +__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) const +{ + BOOST_MATH_STD_USING + constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; + using std::abs; + if (abs(tolerance) > 1) { + return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy()); + } + return detail::exp_sinh_integrate_impl(f, error, L1, function, tolerance, levels); +} + +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP From 0b12cd59e21ea485d2e977d92d38acb79cd78662 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Sep 2024 17:08:49 -0400 Subject: [PATCH 03/22] Fix function signatures --- include/boost/math/quadrature/exp_sinh.hpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp index f9d388b26..94dcb6ec5 100644 --- a/include/boost/math/quadrature/exp_sinh.hpp +++ b/include/boost/math/quadrature/exp_sinh.hpp @@ -116,15 +116,13 @@ namespace math { namespace quadrature { template > -__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) const +__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { BOOST_MATH_STD_USING using K = decltype(f(a)); static_assert(!boost::math::is_integral::value, "The return type cannot be integral, it must be either a real or complex floating point type."); - using boost::math::constants::half; - using boost::math::quadrature::detail::exp_sinh_detail; constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; @@ -132,23 +130,23 @@ __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, R if((boost::math::isnan)(a) || (boost::math::isnan)(b)) { return static_cast(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy())); - } + } // Right limit is infinite: if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value())) { // If a = 0, don't use an additional level of indirection: if (a == static_cast(0)) { - return detail::exp_sinh_integrate_impl(f, error, L1, function, tolerance, levels); + return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels); } const auto u = [&](Real t)->K { return f(t + a); }; - return detail::exp_sinh_integrate_impl(u, error, L1, function, tolerance, levels); + return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels); } if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value()) { const auto u = [&](Real t)->K { return f(b-t);}; - return detail::exp_sinh_integrate_impl(u, error, L1, function, tolerance, levels); + return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels); } // Infinite limits: @@ -161,7 +159,7 @@ __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, R } template > -__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) const +__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { BOOST_MATH_STD_USING constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; @@ -169,7 +167,7 @@ __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real if (abs(tolerance) > 1) { return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy()); } - return detail::exp_sinh_integrate_impl(f, error, L1, function, tolerance, levels); + return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels); } } // namespace quadrature From 43dcc25593069134ff1fbdf6c24ce781491dac22 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Sep 2024 17:08:58 -0400 Subject: [PATCH 04/22] Fix arrays --- .../quadrature/detail/exp_sinh_detail.hpp | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 4ddc28c90..59aac5665 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -548,12 +548,18 @@ void exp_sinh_detail::init(const std::integral_constant&) #else // BOOST_MATH_ENABLE_CUDA +#include +#include +#include +#include +#include + namespace boost { namespace math { namespace quadrature { namespace detail { -__constant__ float m_abscissas_float[][] = { +__constant__ float m_abscissas_float[8][527] = { { 3.47876573e-23f, 5.62503650e-09f, 9.95706124e-04f, 9.67438487e-02f, 7.43599217e-01f, 4.14293205e+00f, 1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, }, { 2.41870864e-14f, 1.02534662e-05f, 1.65637566e-02f, 3.11290799e-01f, 1.64691269e+00f, 1.49800773e+01f, @@ -734,7 +740,7 @@ __constant__ float m_abscissas_float[][] = { 1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, }, }; -__constant__ float m_weights_float[][] = { +__constant__ float m_weights_float[8][527] = { { 1.79979618e-21f, 1.07218106e-07f, 7.05786060e-03f, 2.72310168e-01f, 1.18863515e+00f, 8.77655464e+00f, 5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, }, { 7.59287827e-13f, 1.18886775e-04f, 7.27332179e-02f, 6.09156795e-01f, 2.71431234e+00f, 4.68800805e+01f, @@ -915,9 +921,10 @@ __constant__ float m_weights_float[][] = { 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }, }; -__device__ exp_sinh_integrate_impl(const F& f, Real* error, Real* L1, const char* function, Real tolerance, boost::math::size_t* levels) +template > +__device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { - using K = decltype(f(static_cast(0))) K; + using K = decltype(f(static_cast(0))); using boost::math::constants::half; using boost::math::constants::half_pi; @@ -1007,8 +1014,8 @@ __device__ exp_sinh_integrate_impl(const F& f, Real* error, Real* L1, const char K sum = 0; Real absum = 0; - auto abscissas_row = get_abscissa_row(i); - auto weight_row = get_weight_row(i); + auto& abscissas_row = m_abscissas[i]; + auto& weight_row = m_weights[i]; // appoximate location to start looking for lowest meaningful abscissa value first_j = first_j == 0 ? 0 : 2 * first_j - 1; @@ -1033,7 +1040,7 @@ __device__ exp_sinh_integrate_impl(const F& f, Real* error, Real* L1, const char err = abs(I0 - I1); if (!(boost::math::isfinite)(L1_I1)) { - return static_cast(policies::raise_evaluation_error(function, "The exp_sinh quadrature evaluated your function at a singular point and returned %1%. Please ensure your function evaluates to a finite number over its entire domain.", I1, Policy())); + return static_cast(policies::raise_evaluation_error("exp_sinh_integrate", "The exp_sinh quadrature evaluated your function at a singular point and returned %1%. Please ensure your function evaluates to a finite number over its entire domain.", I1, Policy())); } if (err <= tolerance*L1_I1) { From aee651ecab54199e7b188036e88d0155a8438c80 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 9 Sep 2024 17:09:13 -0400 Subject: [PATCH 05/22] Add basic test for compilation --- test/cuda_jamfile | 3 + test/test_exp_sinh_quad_float.cu | 111 +++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 test/test_exp_sinh_quad_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 89fd07009..4cd72ff21 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -9,6 +9,9 @@ project : requirements [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] ; +# Quad +run test_exp_sinh_quad_float.cu ; + # Distributions run test_arcsine.cpp ; run test_arcsine_cdf_double.cu ; diff --git a/test/test_exp_sinh_quad_float.cu b/test/test_exp_sinh_quad_float.cu new file mode 100644 index 000000000..9d1018b9b --- /dev/null +++ b/test/test_exp_sinh_quad_float.cu @@ -0,0 +1,111 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return sin(x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + double t = w.elapsed(); + /* + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expm1(input_vector[i])); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + */ + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 1032904bdf7a97fd003a705ac5d68bd687f6b605 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 09:19:40 -0400 Subject: [PATCH 06/22] Allow serial implementation to be run on host under NVCC --- include/boost/math/quadrature/detail/exp_sinh_detail.hpp | 6 ++++-- include/boost/math/quadrature/exp_sinh.hpp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 59aac5665..82fcca600 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -9,7 +9,7 @@ #include -#ifndef BOOST_MATH_ENABLE_CUDA +#ifndef BOOST_MATH_HAS_NVRTC #include #include @@ -546,7 +546,9 @@ void exp_sinh_detail::init(const std::integral_constant&) } } -#else // BOOST_MATH_ENABLE_CUDA +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA // BOOST_MATH_ENABLE_CUDA #include #include diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp index 94dcb6ec5..5f5c486bf 100644 --- a/include/boost/math/quadrature/exp_sinh.hpp +++ b/include/boost/math/quadrature/exp_sinh.hpp @@ -18,7 +18,7 @@ #include #include -#ifndef BOOST_MATH_ENABLE_CUDA +#ifndef BOOST_MATH_HAS_NVRTC #include #include @@ -103,7 +103,9 @@ auto exp_sinh::integrate(const F& f, Real tolerance, Real* error, }}} -#else +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA #include #include From 7a3905eacdb728fe0200dfd4cd081486af7b4e98 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 09:20:01 -0400 Subject: [PATCH 07/22] Add verification steps --- test/test_exp_sinh_quad_float.cu | 45 ++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/test/test_exp_sinh_quad_float.cu b/test/test_exp_sinh_quad_float.cu index 9d1018b9b..46cf2f533 100644 --- a/test/test_exp_sinh_quad_float.cu +++ b/test/test_exp_sinh_quad_float.cu @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" @@ -21,7 +22,7 @@ typedef double float_type; __host__ __device__ float_type func(float_type x) { BOOST_MATH_STD_USING - return sin(x); + return 1/(1+x*x); } /** @@ -87,23 +88,51 @@ int main(void) } // Verify that the result vector is correct - double t = w.elapsed(); - /* std::vector results; results.reserve(numElements); w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; for(int i = 0; i < numElements; ++i) - results.push_back(boost::math::expm1(input_vector[i])); + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); // check the results + int non_finite_count = 0; + int failed_count = 0; for(int i = 0; i < numElements; ++i) { - if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + if (!std::isfinite(output_vector[i])) { - std::cerr << "Result verification failed at element " << i << "!" << std::endl; - return EXIT_FAILURE; + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\nHost: " << results[i] + << "\nEps: " << eps << "\n"; + failed_count++; + if (failed_count > 100) + { + break; + } + } + } + else + { + ++non_finite_count; } } - */ + + if (failed_count != 0 || non_finite_count == numElements) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; From aae6a78c38003370677942fe81e2f07b76fdd506 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 09:56:27 -0400 Subject: [PATCH 08/22] Add arrays of levels coefficient sizes --- .../quadrature/detail/exp_sinh_detail.hpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 82fcca600..fe99fc338 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -923,6 +923,14 @@ __constant__ float m_weights_float[8][527] = { 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }, }; + +// Since we have to use C arrays we can't compensate for the fact that each level has +// a different number of coefficients. +// Store the actual sizes in these size arrays so we don't cruise head first into segfaults. +__constant__ boost::math::size_t float_coefficients_size[8] = {9, 8, 16, 33, 66, 132, 263, 527}; + +__constant__ boost::math::size_t double_coefficients_size[8] = {13, 12, 25, 49, 98, 196, 393, 786}; + template > __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { @@ -943,11 +951,12 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, // Get the party started with two estimates of the integral: auto& m_abscissas = m_abscissas_float; auto& m_weights = m_weights_float; + auto& m_size = float_coefficients_size; Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value() }; K I0 = 0; Real L1_I0 = 0; - for(boost::math::size_t i = 0; i < sizeof(m_abscissas[0])/sizeof(Real); ++i) + for(boost::math::size_t i = 0; i < m_size[0]; ++i) { K y = f(m_abscissas[0][i]); K I0_last = I0; @@ -965,7 +974,7 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real L1_I1 = L1_I0; bool have_first_j = false; boost::math::size_t first_j = 0; - for (boost::math::size_t i = 0; (i < sizeof(m_abscissas[1])/sizeof(Real)) && (m_abscissas[1][i] < max_abscissa); ++i) + for (boost::math::size_t i = 0; (i < m_size[1]) && (m_abscissas[1][i] < max_abscissa); ++i) { K y = f(m_abscissas[1][i]); K I1_last = I1; @@ -974,7 +983,7 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, if (!have_first_j && (I1_last == I1)) { // No change to the sum, disregard these values on the LHS: - if ((i < sizeof(m_abscissas[1])/sizeof(Real) - 1) && (m_abscissas[1][i + 1] > max_abscissa)) + if ((i < m_size[1] - 1) && (m_abscissas[1][i + 1] > max_abscissa)) { // The summit is so high, that we found nothing in this row which added to the integral!! have_first_j = true; @@ -1005,7 +1014,7 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, //std::cout << "Second estimate: " << I1 << " Error estimate at level " << 1 << " = " << err << std::endl; boost::math::size_t i = 2; - for(; i < sizeof(m_abscissas)/sizeof(Real); ++i) + for(; i < 8U; ++i) // Magic number 8 is the number of precomputed levels { I0 = I1; L1_I0 = L1_I1; @@ -1028,7 +1037,7 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, ++j; } - for(; (j < sizeof(m_weights[i])/sizeof(Real)) && (abscissas_row[j] < max_abscissa); ++j) + for(; (j < m_size[i]) && (abscissas_row[j] < max_abscissa); ++j) { Real x = abscissas_row[j]; K y = f(x); From 173aecc15c482366c93d368a79ac2e621e2f03b6 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 10:30:09 -0400 Subject: [PATCH 09/22] Cleanup test set --- test/test_exp_sinh_quad_float.cu | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/test/test_exp_sinh_quad_float.cu b/test/test_exp_sinh_quad_float.cu index 46cf2f533..1472dbcde 100644 --- a/test/test_exp_sinh_quad_float.cu +++ b/test/test_exp_sinh_quad_float.cu @@ -17,7 +17,7 @@ // For the CUDA runtime routines (prefixed with "cuda_") #include -typedef double float_type; +typedef float float_type; __host__ __device__ float_type func(float_type x) { @@ -101,33 +101,26 @@ int main(void) } double t = w.elapsed(); // check the results - int non_finite_count = 0; int failed_count = 0; for(int i = 0; i < numElements; ++i) { - if (!std::isfinite(output_vector[i])) + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) { - const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); - if (eps > 10) - { - std::cerr << "Result verification failed at element " << i << "!\n" - << "Device: " << output_vector[i] - << "\nHost: " << results[i] - << "\nEps: " << eps << "\n"; - failed_count++; - if (failed_count > 100) - { - break; - } - } + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; } - else + if (failed_count > 100) { - ++non_finite_count; + break; } } - if (failed_count != 0 || non_finite_count == numElements) + if (failed_count != 0) { std::cout << "Test FAILED" << std::endl; return EXIT_FAILURE; From 3ec8b5622429923a62fca58bfb2b7b9072792f74 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 10:30:51 -0400 Subject: [PATCH 10/22] Add double test set --- test/cuda_jamfile | 1 + test/test_exp_sinh_quad_double.cu | 133 ++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 test/test_exp_sinh_quad_double.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 4cd72ff21..3f3dcaaa0 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -11,6 +11,7 @@ project : requirements # Quad run test_exp_sinh_quad_float.cu ; +run test_exp_sinh_quad_double.cu ; # Distributions run test_arcsine.cpp ; diff --git a/test/test_exp_sinh_quad_double.cu b/test/test_exp_sinh_quad_double.cu new file mode 100644 index 000000000..59f6d8a12 --- /dev/null +++ b/test/test_exp_sinh_quad_double.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 91027b16ec51a3e42aa6c204aa19740bba14766c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 11:22:24 -0400 Subject: [PATCH 11/22] Add structure for the doubles support --- .../quadrature/detail/exp_sinh_detail.hpp | 824 +++++++++++++++++- 1 file changed, 821 insertions(+), 3 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index fe99fc338..5dd74d508 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -923,6 +923,804 @@ __constant__ float m_weights_float[8][527] = { 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }, }; +__constant__ double m_abscissas_double[8][786] = { + { 7.241670621354483269e-163, 2.257639733856759198e-60, 1.153241619257215165e-22, 8.747691973876861825e-09, + 1.173446923800022477e-03, 1.032756936219208144e-01, 7.719261204224504866e-01, 4.355544675823585545e+00, + 1.215101039066652656e+02, 6.228845436711506169e+05, 6.278613977336989392e+15, 9.127414935180233465e+42, + 6.091127771174027909e+116, }, + { 4.547459836328942014e-99, 6.678756542928857080e-37, 5.005042973041566360e-14, 1.341318484151208960e-05, + 1.833875636365939263e-02, 3.257972971286326131e-01, 1.712014688483495078e+00, 1.613222549264089627e+01, + 3.116246745274236447e+03, 3.751603952020919663e+09, 1.132259067258797346e+26, 6.799257464097374238e+70, }, + { 5.314690663257815465e-127, 2.579830034615362946e-77, 3.534801062399966878e-47, 6.733941646704537777e-29, + 8.265803726974829043e-18, 4.424914371157762285e-11, 5.390411046738629465e-07, 1.649389713333761449e-04, + 5.463728936866216652e-03, 4.787896410534771955e-02, 1.931544616590306846e-01, 5.121421856617965197e-01, + 1.144715949265016019e+00, 2.648424684387670480e+00, 7.856804169938798917e+00, 3.944731803343517708e+01, + 5.060291993016831194e+02, 3.181117494063683297e+04, 2.820174654949211729e+07, 1.993745099515255184e+12, + 1.943469269499068563e+20, 2.858803732300638372e+33, 1.457292199029008637e+55, 8.943565831706355607e+90, + 9.016198369791554655e+149, }, + { 8.165631636299519857e-144, 3.658949309353149331e-112, 1.635242513882908826e-87, 2.578381184977746454e-68, + 2.305546416275824199e-53, 1.016725540031465162e-41, 1.191823622917539774e-32, 1.379018088205016509e-25, + 4.375640088826073184e-20, 8.438464631330991606e-16, 1.838483310261119782e-12, 7.334264181393092650e-10, + 7.804740587931068021e-08, 2.970395577741681504e-06, 5.081805431666579484e-05, 4.671401627620431498e-04, + 2.652347404231090523e-03, 1.037409202661683856e-02, 3.045225582205323946e-02, 7.178280364982721201e-02, + 1.434001065841990688e-01, 2.535640852949085796e-01, 4.113268917643175920e-01, 6.310260805648534613e-01, + 9.404706503455087817e-01, 1.396267301972783068e+00, 2.116896928689963277e+00, 3.364289290471596568e+00, + 5.770183960005836987e+00, 1.104863531218761752e+01, 2.460224479439805859e+01, 6.699316387888639988e+01, + 2.375794092475844708e+02, 1.188092202760116066e+03, 9.269848635975416108e+03, 1.283900116155671304e+05, + 3.723397798030112514e+06, 2.793667983952389721e+08, 7.112973790863854188e+10, 8.704037695808749572e+13, + 8.001474015782459984e+17, 9.804091819390540578e+22, 3.342777673392873288e+29, 8.160092668471508447e+37, + 4.798775331663586528e+48, 3.228614320248853938e+62, 1.836986041572136151e+80, 1.153145986877483804e+103, + 2.160972586723647751e+132, }, + { 4.825077401709435655e-153, 3.813781211050297560e-135, 2.377824349780240844e-119, 2.065817295388293122e-105, + 4.132105770181358886e-93, 2.963965169989404311e-82, 1.127296662046635391e-72, 3.210346399945695041e-64, + 9.282992368222161062e-57, 3.565977853916619677e-50, 2.306962519220473637e-44, 3.098751038516535098e-39, + 1.039558064722960891e-34, 1.025256027381235200e-30, 3.432612000569885403e-27, 4.429681881379089961e-24, + 2.464589267395236846e-21, 6.526691446363344923e-19, 8.976892324445928684e-17, 6.926277695183452225e-15, + 3.208805316815751272e-13, 9.478053068835988899e-12, 1.882052586691155400e-10, 2.632616062773909009e-09, + 2.703411837703917665e-08, 2.113642195965330965e-07, 1.299327029813074013e-06, 6.461189935136030673e-06, + 2.665090959570723827e-05, 9.322774986189288194e-05, 2.820463407940068813e-04, 7.508613300035051413e-04, + 1.786142185986551786e-03, 3.848376610765768211e-03, 7.600810651854199771e-03, 1.390873269178271700e-02, + 2.380489559528694982e-02, 3.842796337748997654e-02, 5.895012901671883992e-02, 8.651391160689367948e-02, + 1.221961347398101671e-01, 1.670112314557845555e-01, 2.219593619059930701e-01, 2.881200442770917241e-01, + 3.667906976948184315e-01, 4.596722879563388211e-01, 5.691113093602836208e-01, 6.984190600916228379e-01, + 8.523070690462583711e-01, 1.037505121571600249e+00, 1.263672635742961915e+00, 1.544788480334120896e+00, + 1.901333876886441433e+00, 2.363816272813317635e+00, 2.978614980117902904e+00, 3.817957977526709364e+00, + 4.997477803461245639e+00, 6.708150685706236545e+00, 9.276566033183386532e+00, 1.328332469239125539e+01, + 1.980618680552458639e+01, 3.094452809319702849e+01, 5.101378787119006225e+01, 8.943523638413590523e+01, + 1.682138665185088325e+02, 3.427988270281270587e+02, 7.653823671943767281e+02, 1.895993667030670343e+03, + 5.285404568827643942e+03, 1.684878049282191210e+04, 6.254388805482299369e+04, 2.759556544455721132e+05, + 1.481213238071008345e+06, 9.929728611179601424e+06, 8.564987764771851841e+07, 9.831650826344826952e+08, + 1.560339073978569502e+10, 3.575098885016726922e+11, 1.241973798101884982e+13, 6.915106205748805839e+14, + 6.571419716645131084e+16, 1.144558033138694099e+19, 3.960915669532823553e+21, 2.984410558028297842e+24, + 5.430494850258846715e+27, 2.683747612498502676e+31, 4.114885708325522701e+35, 2.276004816861421600e+40, + 5.387544917595833246e+45, 6.623575732955432303e+51, 5.266881304835239338e+58, 3.473234812654772210e+66, + 2.517492645985977377e+75, 2.759797646289240629e+85, 6.569603829502412077e+96, 5.116181648220647995e+109, + 2.073901892339407423e+124, 7.406462446666255838e+140, }, + { 7.053618140948655098e-158, 2.343354218558056628e-148, 2.062509087689351439e-139, 5.212388628332260488e-131, + 4.079380320868843387e-123, 1.061481285006738214e-115, 9.816727607793017691e-109, 3.435400719609722581e-102, + 4.825198574681495574e-96, 2.874760995089533358e-90, 7.652499977338879996e-85, 9.556944498127119032e-80, + 5.862241023038227937e-75, 1.843934000129616663e-70, 3.096983980846232911e-66, 2.885057452402340330e-62, + 1.544904681826443837e-58, 4.917572705671511534e-55, 9.602608566391652866e-52, 1.184882375237471009e-48, + 9.499223316355714793e-46, 5.078965858882528461e-43, 1.856080838373584123e-40, 4.744245560917271585e-38, + 8.667497891102658240e-36, 1.155086178652063612e-33, 1.144541329818836153e-31, 8.585083084065812874e-30, + 4.957702933032408922e-28, 2.239353794616277882e-26, 8.030405447708765492e-25, 2.318459271131684362e-23, + 5.460287296679086677e-22, 1.062054307071706375e-20, 1.725955878033239909e-19, 2.369168446274347137e-18, + 2.775176063916613602e-17, 2.800847352316621903e-16, 2.457625954357892245e-15, 1.890842052364646528e-14, + 1.285791209258834942e-13, 7.786001004707878219e-13, 4.228083024410741194e-12, 2.072664297543567489e-11, + 9.229295073519997559e-11, 3.754886152592311575e-10, 1.403443871774813834e-09, 4.843962757371872495e-09, + 1.551373196623161433e-08, 4.631448362339623514e-08, 1.294370176865168120e-07, 3.400050664017164356e-07, + 8.426290307581447654e-07, 1.977205177561996033e-06, 4.407362363338667830e-06, 9.362197325373404563e-06, + 1.900760383449277992e-05, 3.698530963711860636e-05, 6.915333419235766653e-05, 1.245492076251852927e-04, + 2.165764713808099093e-04, 3.643870211078977292e-04, 5.943999416122372516e-04, 9.418663022314558591e-04, + 1.452364274261880083e-03, 2.183094846035196562e-03, 3.203848855069215278e-03, 4.597532353031862490e-03, + 6.460168315117479792e-03, 8.900334989802041559e-03, 1.203804973137064275e-02, 1.600315622064554965e-02, + 2.093331703849583304e-02, 2.697174812170771748e-02, 3.426485378063329473e-02, 4.295992956149806344e-02, + 5.320309587203163231e-02, 6.513760993479510261e-02, 7.890268021756337834e-02, 9.463287940877026649e-02, + 1.124582226719385153e-01, 1.325049504086213973e-01, 1.548970316076579260e-01, 1.797583869192584860e-01, + 2.072158210677632145e-01, 2.374026527414815016e-01, 2.704630368855767324e-01, 3.065569893452247137e-01, + 3.458661469783558388e-01, 3.886003277325320632e-01, 4.350049951304795319e-01, 4.853697810067132707e-01, + 5.400382807495678589e-01, 5.994194092045578293e-01, 6.640006964388650918e-01, 7.343640159321037167e-01, + 8.112043806284638130e-01, 8.953526245122194172e-01, 9.878030224123093447e-01, 1.089747207002141516e+00, + 1.202616144679226559e+00, 1.328132465995424226e+00, 1.468376159872979355e+00, 1.625867601500928277e+00, + 1.803673186618691186e+00, 2.005540624723209206e+00, 2.236073393446881709e+00, 2.500957254018255004e+00, + 2.807256477663534857e+00, 3.163804128101147487e+00, 3.581720263742550029e+00, 4.075105576391566303e+00, + 4.661977749936137761e+00, 5.365546718714963091e+00, 6.215967676434536043e+00, 7.252774367330402583e+00, + 8.528291278204291331e+00, 1.011247001122720391e+01, 1.209982167952718578e+01, 1.461947158782994207e+01, + 1.784992423404041042e+01, 2.204102944968352178e+01, 2.754711235628932374e+01, 3.487766600641650640e+01, + 4.477610230214251576e+01, 5.834406132739843834e+01, 7.724096630394042216e+01, 1.040101075374387191e+02, + 1.426215523101601730e+02, 1.993940974645466479e+02, 2.845939167898235356e+02, 4.152683836292551147e+02, + 6.203878718481709769e+02, 9.504080873581791535e+02, 1.495523342124078853e+03, 2.421485328006836634e+03, + 4.041977218227396500e+03, 6.969453497454785202e+03, 1.244001690461442846e+04, 2.303794930506892099e+04, + 4.437240927040385250e+04, 8.911296561746717657e+04, 1.871159398849787994e+05, 4.119851492265743330e+05, + 9.540971729944126398e+05, 2.331680521880789706e+06, 6.034305391011695472e+06, 1.659896369452266448e+07, + 4.872448839341613053e+07, 1.532687586549090392e+08, 5.189730792935011722e+08, 1.900599621040508288e+09, + 7.566480431232731818e+09, 3.292298322781643849e+10, 1.574714421665075635e+11, 8.330244306239795892e+11, + 4.905619969814187571e+12, 3.238316002757222702e+13, 2.413995281454699076e+14, 2.048115587426077343e+15, + 1.994352670766892066e+16, 2.248750566422739144e+17, 2.964037541992353401e+18, 4.613233119968213445e+19, + 8.569680508342001161e+20, 1.921851711942844799e+22, 5.266829246099861758e+23, 1.786779952992288976e+25, + 7.607919705736976491e+26, 4.125721424346450007e+28, 2.894340142292214313e+30, 2.670720269656428272e+32, + 3.299248229135205151e+34, 5.560105583582310103e+36, 1.304167266599523020e+39, 4.349382146382717353e+41, + 2.109720387774341509e+44, 1.524825352702403324e+47, 1.684941265105084589e+50, 2.925572737558413426e+53, + 8.217834961057481281e+56, 3.852117991896536784e+60, 3.114452310394384063e+64, 4.498555465873245751e+68, + 1.205113215232800796e+73, 6.230864727145221322e+77, 6.487131248948465269e+82, 1.422810109167834249e+88, + 6.897656089181724717e+93, 7.779163462756485195e+99, 2.155213251859555072e+106, 1.554347160152705281e+113, + 3.103875072425192272e+120, 1.832673821557018634e+128, 3.431285951865278376e+136, 2.194542081542393530e+145, }, + { 2.363803632659058081e-160, 1.926835442612677686e-155, 1.109114905180506786e-150, 4.556759282087534164e-146, + 1.350172241067816232e-141, 2.914359263635229435e-137, 4.627545976953585825e-133, 5.456508344460398758e-129, + 4.821828861306345485e-125, 3.221779152402086241e-121, 1.641732102111619421e-117, 6.433569189921227126e-114, + 1.954582672700428961e-110, 4.639912078942456372e-107, 8.671928891742699827e-104, 1.285485264305858782e-100, + 1.522161801460927566e-97, 1.449767844425295085e-94, 1.118122255504445235e-91, 7.028344777398825069e-89, + 3.623454064991238081e-86, 1.541513438874996543e-83, 5.443699502170284982e-81, 1.604913673768949456e-78, + 3.972206240977317536e-76, 8.297975554162539562e-74, 1.470748835855054032e-71, 2.222935801472624670e-69, + 2.879160361851977720e-67, 3.210837413250902178e-65, 3.097303984958235490e-63, 2.595974479763180595e-61, + 1.898656799199089593e-59, 1.216865518398435626e-57, 6.862041810601184397e-56, 3.418134121780773218e-54, + 1.509758535747580387e-52, 5.934924977563731784e-51, 2.083865009061241099e-49, 6.558128104492290092e-48, + 1.856133016606468181e-46, 4.739964621828176249e-45, 1.095600459825324697e-43, 2.299177139060262518e-42, + 4.393663812095906869e-41, 7.667728102142858487e-40, 1.225476279042445010e-38, 1.798526997315960782e-37, + 2.430201154741018716e-36, 3.030993518975438712e-35, 3.497966609954172613e-34, 3.744308272796551045e-33, + 3.726132797819332658e-32, 3.455018936399215381e-31, 2.991524108706319604e-30, 2.423818520801870809e-29, + 1.841452809687011486e-28, 1.314419760826235421e-27, 8.831901010260867670e-27, 5.596660060604091621e-26, + 3.350745417080507841e-25, 1.898675566025820409e-24, 1.019982287418197376e-23, 5.203315082978366918e-23, + 2.524668746906057148e-22, 1.166904646009344233e-21, 5.145437675264868732e-21, 2.167677145279166596e-20, + 8.736996911006110678e-20, 3.373776431076593266e-19, 1.249769727462160008e-18, 4.446913832647864892e-18, + 1.521741180930875343e-17, 5.014158301377399707e-17, 1.592708205361177316e-16, 4.882536933653862982e-16, + 1.446109387544416586e-15, 4.142510168443201880e-15, 1.148892083132325407e-14, 3.088024760858924214e-14, + 8.051699653634442236e-14, 2.038478329249539199e-13, 5.015686309363884049e-13, 1.200444984849900298e-12, + 2.797125428309156462e-12, 6.350357793399881333e-12, 1.405881744263466936e-11, 3.037391821635123795e-11, + 6.408863411016101449e-11, 1.321618431565916164e-10, 2.665526566207284474e-10, 5.261497418654313068e-10, + 1.017123184766088896e-09, 1.926882221639203388e-09, 3.579523428497157488e-09, 6.524486847652635035e-09, + 1.167543991262942921e-08, 2.052356080018121741e-08, 3.545879678923676129e-08, 6.024472481556065885e-08, + 1.007076869023518125e-07, 1.657191565891799652e-07, 2.685718943404479677e-07, 4.288752213761154116e-07, + 6.751222405372943925e-07, 1.048111270324302884e-06, 1.605433960692314060e-06, 2.427271958412371013e-06, + 3.623770645356477660e-06, 5.344280132492750309e-06, 7.788767891027678939e-06, 1.122171160022519082e-05, + 1.598877254198599908e-05, 2.253652700952153115e-05, 3.143549403208496646e-05, 4.340664122305257288e-05, + 5.935147653125578529e-05, 8.038574285450253209e-05, 1.078766266062957565e-04, 1.434832731669987826e-04, + 1.892002753957224677e-04, 2.474036705329449166e-04, 3.208988510028906069e-04, 4.129696713145546995e-04, + 5.274279220384250390e-04, 6.686622480794640482e-04, 8.416855170641220285e-04, 1.052179598744440400e-03, + 1.306536501050643762e-03, 1.611894824798787196e-03, 1.976170547826080496e-03, 2.408081229927640721e-03, + 2.917162840577481875e-03, 3.513778549028205519e-03, 4.209118976964403112e-03, 5.015193592567630665e-03, + 5.944813116164644191e-03, 7.011563005746090924e-03, 8.229768289624073049e-03, 9.614450207543986041e-03, + 1.118127530523730813e-02, 1.294649779580742160e-02, 1.492689615029751590e-02, 1.713970500593860526e-02, + 1.960254358145296755e-02, 2.233334186285684056e-02, 2.535026586984720664e-02, 2.867164333232700310e-02, + 3.231589109997912964e-02, 3.630144557680610965e-02, 4.064669741956638109e-02, 4.536993166688766414e-02, + 5.048927437769432941e-02, 5.602264675683979161e-02, 6.198772763597769678e-02, 6.840192506222012774e-02, + 7.528235762939712171e-02, 8.264584606994605986e-02, 9.050891551257121825e-02, 9.888780870447738360e-02, + 1.077985103995250356e-01, 1.172567830270636607e-01, 1.272782136821146663e-01, 1.378782724173011162e-01, + 1.490723817714478840e-01, 1.608759974398061173e-01, 1.733046999768424060e-01, 1.863742974247175786e-01, + 2.001009387790379976e-01, 2.145012382381487190e-01, 2.295924102330349785e-01, 2.453924153016625057e-01, + 2.619201169541956490e-01, 2.791954497739298773e-01, 2.972395991130188526e-01, 3.160751928723792943e-01, + 3.357265060019327741e-01, 3.562196785212496373e-01, 3.775829480426418792e-01, 3.998468979800887046e-01, + 4.230447228497335035e-01, 4.472125123131631074e-01, 4.723895558858634018e-01, 4.986186705332947608e-01, + 5.259465537097384485e-01, 5.544241647649479754e-01, 5.841071380560416511e-01, 6.150562315632864018e-01, + 6.473378153258308278e-01, 6.810244045956889952e-01, 7.161952432654565143e-01, 7.529369438691556459e-01, + 7.913441913000366617e-01, 8.315205183502086596e-01, 8.735791622734589226e-01, 9.176440128265773576e-01, + 9.638506636817484398e-01, 1.012347580753402101e+00, 1.063297402882930381e+00, 1.116878392515788506e+00, + 1.173286056537125469e+00, 1.232734960362603918e+00, 1.295460761779549539e+00, 1.361722494981910846e+00, + 1.431805139837984876e+00, 1.506022516788234345e+00, 1.584720554029819354e+00, 1.668280980969603645e+00, + 1.757125510515793421e+00, 1.851720582866847453e+00, 1.952582755329533200e+00, 2.060284836698905963e+00, + 2.175462881275503983e+00, 2.298824177179966629e+00, 2.431156386859774759e+00, 2.573338025304717222e+00, + 2.726350494395667363e+00, 2.891291931102408784e+00, 3.069393174263124520e+00, 3.262036211067640944e+00, + 3.470775532153801919e+00, 3.697362905908153155e+00, 3.943776181224350319e+00, 4.212252847439515687e+00, + 4.505329225191826639e+00, 4.825886338442190807e+00, 5.177203733275742875e+00, 5.563022772612923373e+00, + 5.987621259260909859e+00, 6.455901637501497370e+00, 6.973495514195020291e+00, 7.546887847708181032e+00, + 8.183564906772872855e+00, 8.892191039842283431e+00, 9.682820467523296204e+00, 1.056715177903931837e+01, + 1.155883465937652851e+01, 1.267384070151528947e+01, 1.393091310389918289e+01, 1.535211379418177923e+01, + 1.696349128797309510e+01, 1.879589868990482198e+01, 2.088599907466058846e+01, 2.327750557804054323e+01, + 2.602271658731131093e+01, 2.918442338619305962e+01, 3.283828974258811174e+01, 3.707583192189045823e+01, + 4.200816575721451990e+01, 4.777073782243997224e+01, 5.452932468101429049e+01, 6.248767344468634478e+01, + 7.189727649240108469e+01, 8.306993427631743111e+01, 9.639397813652482031e+01, 1.123553215857374919e+02, + 1.315649140340119335e+02, 1.547947284376312334e+02, 1.830251850988715552e+02, 2.175079854175568113e+02, + 2.598498278995140400e+02, 3.121245867818556035e+02, 3.770245173783702458e+02, 4.580653020257635092e+02, + 5.598658426219653689e+02, 6.885324967857802403e+02, 8.521902266884453403e+02, 1.061721815114114004e+03, + 1.331803836529085656e+03, 1.682368940494210217e+03, 2.140685129891926443e+03, 2.744334847491432747e+03, + 3.545516659371773357e+03, 4.617306735234797694e+03, 6.062848530677391758e+03, 8.028955134017154634e+03, + 1.072641999277462936e+04, 1.446061873485939411e+04, 1.967804579389513789e+04, 2.703776201447279367e+04, + 3.752217148194723312e+04, 5.261052412010591097e+04, 7.455350923854624329e+04, 1.068125318497402759e+05, + 1.547702528541975911e+05, 2.268930751685412563e+05, 3.366554971645478061e+05, 5.057644049026088560e+05, + 7.696291826884134742e+05, 1.186761864945790800e+06, 1.855146094294667715e+06, 2.941132644236832276e+06, + 4.731169740596920355e+06, 7.725765147199987935e+06, 1.281272565991955126e+07, 2.159151785284808339e+07, + 3.699029448836502904e+07, 6.445902263727884020e+07, 1.143158678867853615e+08, 2.064425450996979446e+08, + 3.798502995329785506e+08, 7.125329484929003007e+08, 1.363463294023391629e+09, 2.663196590686555077e+09, + 5.313347815419462975e+09, 1.083506369700027396e+10, 2.259930737910197667e+10, 4.824707991473375387e+10, + 1.055069002818752104e+11, 2.365138040635727209e+11, 5.439266129959972285e+11, 1.284356371641026839e+12, + 3.116424654245920797e+12, 7.777312465656280419e+12, 1.997984843259596733e+13, 5.288649037339853118e+13, + 1.443776937640548342e+14, 4.068967444890414804e+14, 1.185049702391501141e+15, 3.570348091883284324e+15, + 1.113971254034978026e+16, 3.603374982229766184e+16, 1.209803708182151942e+17, 4.220890251904870611e+17, + 1.532169872312865862e+18, 5.793890867821715890e+18, 2.285379920879842924e+19, 9.415714369232187727e+19, + 4.057471211245170887e+20, 1.831405465804324767e+21, 8.671209773404504008e+21, 4.313209261217173994e+22, + 2.257498454242656934e+23, 1.245267136898199709e+24, 7.251536499435180219e+24, 4.465573963364524765e+25, + 2.913233420596266283e+26, 2.017063171206072979e+27, 1.485014353353330393e+28, 1.164811091759882662e+29, + 9.753661264047912784e+29, 8.737124417851167566e+30, 8.390503265508677363e+31, 8.657362701430272680e+32, + 9.619472292454361392e+33, 1.153735498483960294e+35, 1.497284701983562213e+36, 2.107816695320163748e+37, + 3.227106623185610745e+38, 5.387696372515021985e+39, 9.835496017627849225e+40, 1.968904749086105300e+42, + 4.334704147416758275e+43, 1.052717645113369473e+45, 2.829013521120326147e+46, 8.439656297525588822e+47, + 2.804279894508234869e+49, 1.041383695988523864e+51, 4.337366591019718310e+52, 2.033523569151676725e+54, + 1.077238847489773081e+56, 6.472891251891105455e+57, 4.429404678715878536e+59, 3.466135480828349864e+61, + 3.114928656972704276e+63, 3.228947925415990689e+65, 3.878402486902381042e+67, 5.423187597439531197e+69, + 8.870779393460412583e+71, 1.705832285076755970e+74, 3.876224350373120420e+76, 1.046359534886878004e+79, + 3.373858809560757544e+81, 1.306762499786044015e+84, 6.115300889685679832e+86, 3.478550048884517349e+89, + 2.420073578988056289e+92, 2.072453567501123129e+95, 2.199029867204449277e+98, 2.910868575802139983e+101, + 4.840699137490951163e+104, 1.018669397739170369e+108, 2.733025017438095928e+111, 9.420797277586029837e+114, + 4.205525105722885986e+118, 2.451352708852151939e+122, 1.881577053794165543e+126, 1.918506219134233785e+130, + 2.622069659115564900e+134, 4.848463485415763756e+138, 1.224645005481997780e+143, 4.267387286482591954e+147, + 2.072505613372582377e+152, }, + { 1.323228129684237783e-161, 4.129002973520822791e-159, 1.178655462569548882e-156, 3.082189008893206231e-154, + 7.393542832199414487e-152, 1.629100644355328639e-149, 3.301545529059822941e-147, 6.162031390854241227e-145, + 1.060528194470986309e-142, 1.685225757497235089e-140, 2.475534097582263629e-138, 3.365764749507587192e-136, + 4.240562683924022383e-134, 4.956794227885611715e-132, 5.381716367914161520e-130, 5.433507172294988849e-128, + 5.107031242794315420e-126, 4.473704932098646394e-124, 3.656376947377888629e-122, 2.791170022694259001e-120, + 1.992200238692415032e-118, 1.330894359393789718e-116, 8.330356767359890503e-115, 4.890256639970245146e-113, + 2.695128935451165447e-111, 1.395829605415630844e-109, 6.799997527188085942e-108, 3.119037767379032293e-106, + 1.348260131419216291e-104, 5.497526018943990804e-103, 2.116384670251198533e-101, 7.699148714858061209e-100, + 2.649065347250598345e-98, 8.628189263549727753e-97, 2.662520943248368922e-95, 7.790698623582886341e-94, + 2.163354866683077281e-92, 5.705576739797220361e-91, 1.430338193028564913e-89, 3.411040781372328747e-88, + 7.744268073516449037e-87, 1.675136564303435813e-85, 3.454795810595704816e-84, 6.798573137099477363e-83, + 1.277474708033782661e-81, 2.293702139426309483e-80, 3.938021700015175030e-79, 6.469593934876300124e-78, + 1.017725266990912471e-76, 1.534019529793324951e-75, 2.216999886838860916e-74, 3.074100747562803362e-73, + 4.092295330837549092e-72, 5.233434175636538471e-71, 6.433506079763357418e-70, 7.607042677901362161e-69, + 8.656714387163425357e-68, 9.486746058685489974e-67, 1.001756724248288397e-65, 1.019853943834854330e-64, + 1.001591106610665630e-63, 9.494277822444263952e-63, 8.691422918891890649e-62, 7.687977047887448276e-61, + 6.574408104196605248e-60, 5.438162502918425191e-59, 4.353340831363003212e-58, 3.374338762181243411e-57, + 2.533770921173042330e-56, 1.844048925248616738e-55, 1.301410812308480184e-54, 8.910466744374470063e-54, + 5.921538384124132331e-53, 3.821356134297705127e-52, 2.395780657353036891e-51, 1.459882187581820236e-50, + 8.650105472076777327e-50, 4.985933550797199316e-49, 2.796911903237435916e-48, 1.527570118993503332e-47, + 8.126314048196993302e-47, 4.212436363948578182e-46, 2.128604050242564662e-45, 1.048938356323431072e-44, + 5.042753142653687842e-44, 2.365999225494165364e-43, 1.083813462091040325e-42, 4.848963367960316169e-42, + 2.119612873737657277e-41, 9.055947139022002648e-41, 3.782987192192666650e-40, 1.545649846917574765e-39, + 6.178909752126026357e-39, 2.417597558625940386e-38, 9.261305999966332746e-38, 3.474712971194656115e-37, + 1.277215890629181345e-36, 4.600938133935473864e-36, 1.624804314773052044e-35, 5.626808103137929972e-35, + 1.911442429947086471e-34, 6.371300415498187125e-34, 2.084444531309441237e-33, 6.695356060065574234e-33, + 2.112038435637792931e-32, 6.544802906551512393e-32, 1.992864937623987114e-31, 5.964358817764151755e-31, + 1.754973231464949500e-30, 5.078231558861773863e-30, 1.445447866528259475e-29, 4.048099759391660786e-29, + 1.115752878927994221e-28, 3.027334168442338592e-28, 8.087868498106224788e-28, 2.128106544151858936e-27, + 5.516210113930227985e-27, 1.408890921124863906e-26, 3.546520734326774807e-26, 8.800636481096360494e-26, + 2.153319509043984465e-25, 5.196136544731926346e-25, 1.236869058422202190e-24, 2.904891674490918873e-24, + 6.732707317563258763e-24, 1.540253603361391055e-23, 3.478765727687221019e-23, 7.758450079933031976e-23, + 1.708939324269830276e-22, 3.718467010568811152e-22, 7.994094376769029920e-22, 1.698336774318343123e-21, + 3.566214469724002275e-21, 7.402848534866351662e-21, 1.519411719755297549e-20, 3.083993994528608740e-20, + 6.191388817974459809e-20, 1.229625987010589227e-19, 2.416245949308411084e-19, 4.698551818749419706e-19, + 9.042992978848520439e-19, 1.722880198390020817e-18, 3.249832858354112322e-18, 6.070120594586457562e-18, + 1.122871881646098441e-17, 2.057429235664205922e-17, 3.734613207742816399e-17, 6.716694369267842075e-17, + 1.197063025055043952e-16, 2.114419661115663617e-16, 3.702017138231021853e-16, 6.425665498746337860e-16, + 1.105830903726985419e-15, 1.887156051660563224e-15, 3.193979018679125833e-15, 5.361881977473204459e-15, + 8.929318568606692809e-15, 1.475330560958586660e-14, 2.418708636765824964e-14, 3.935078350904051302e-14, + 6.354047096308654479e-14, 1.018416666466509442e-13, 1.620423782999307693e-13, 2.559817517056126166e-13, + 4.015273886294212810e-13, 6.254532358261761291e-13, 9.675981021394182858e-13, 1.486832112534566186e-12, + 2.269557377760486879e-12, 3.441736008766365832e-12, 5.185793859860652413e-12, 7.764217889314004663e-12, + 1.155228105746548036e-11, 1.708313121464262097e-11, 2.510951856086201897e-11, 3.668776978510952341e-11, + 5.329131813941740314e-11, 7.696325397299480856e-11, 1.105200723643722855e-10, 1.578221843796034825e-10, + 2.241309672940976766e-10, 3.165773201144956642e-10, 4.447730510871610704e-10, 6.216041661455164049e-10, + 8.642544905395987868e-10, 1.195519306516659349e-09, 1.645482121417189823e-09, 2.253643612941620883e-09, + 3.071610576496751310e-09, 4.166474690460445927e-09, 5.625036504185181035e-09, 7.559059638953998396e-09, + 1.011177417876491092e-08, 1.346588701906267454e-08, 1.785340092957703350e-08, 2.356759364235337519e-08, + 3.097756373337616088e-08, 4.054581171302714730e-08, 5.284939280085554173e-08, 6.860525247854168448e-08, + 8.870043714076795346e-08, 1.142279599340281637e-07, 1.465291959965373757e-07, 1.872437814520259903e-07, + 2.383680961705324062e-07, 3.023235208219232784e-07, 3.820357732606947876e-07, 4.810267467496160044e-07, + 6.035203917139166314e-07, 7.545643021775656875e-07, 9.401687861337141280e-07, 1.167465314019272078e-06, + 1.444886349199346242e-06, 1.782368666762205796e-06, 2.191582359683820240e-06, 2.686187812137005286e-06, + 3.282122985909738110e-06, 3.997923415034129149e-06, 4.855077333283880469e-06, 5.878418366687560187e-06, + 7.096558206229387964e-06, 8.542361632206236097e-06, 1.025346618920209381e-05, 1.227284870748632855e-05, + 1.464944073127878202e-05, 1.743879474552002742e-05, 2.070380288967650755e-05, 2.451546960924430874e-05, + 2.895373942298085844e-05, 3.410838067694928604e-05, 4.007992581615393488e-05, 4.698066833232878622e-05, + 5.493571614427227251e-05, 6.408410073746518169e-05, 7.457994093551813828e-05, 8.659365970069775654e-05, + 1.003132518682442285e-04, 1.159456002136906496e-04, 1.337178367385581674e-04, 1.538787455425709779e-04, + 1.767002031351005554e-04, 2.024786515302844608e-04, 2.315365989746650402e-04, 2.642241426787982083e-04, + 3.009205074706080013e-04, 3.420355938637258307e-04, 3.880115286439000550e-04, 4.393242107257947798e-04, + 4.964848447258090522e-04, 5.600414544382562271e-04, 6.305803681962314437e-04, 7.087276679481586600e-04, + 7.951505937892094439e-04, 8.905588956558126794e-04, 9.957061239230124343e-04, 1.111390850739538593e-03, + 1.238457814094548688e-03, 1.377798976832850428e-03, 1.530354493121150144e-03, 1.697113575214988470e-03, + 1.879115253782404405e-03, 2.077449025503311209e-03, 2.293255382179820056e-03, 2.527726216158548279e-03, + 2.782105097477072741e-03, 3.057687418798497807e-03, 3.355820404885606963e-03, 3.677902984083964409e-03, + 4.025385520026097270e-03, 4.399769402530814407e-03, 4.802606497446985045e-03, 5.235498455973840111e-03, + 5.700095884774212336e-03, 6.198097378977308725e-03, 6.731248420937948614e-03, 7.301340148374219834e-03, + 7.910207996239952125e-03, 8.559730217397303903e-03, 9.251826287833445298e-03, 9.988455202809488913e-03, + 1.077161367093554544e-02, 1.160333421372954856e-02, 1.248568317873621646e-02, 1.342075867475355427e-02, + 1.441068843813546585e-02, 1.545762763950860648e-02, 1.656375664055830135e-02, 1.773127871080136402e-02, + 1.896241771447260382e-02, 2.025941577780677588e-02, 2.162453094709917839e-02, 2.306003484797691421e-02, + 2.456821035631025318e-02, 2.615134929114115217e-02, 2.781175013990572523e-02, 2.955171582608151263e-02, + 3.137355152920124081e-02, 3.327956256694509270e-02, 3.527205234875621605e-02, 3.735332041012234938e-02, + 3.952566053633324126e-02, 4.179135898416228534e-02, 4.415269280953487221e-02, 4.661192830883879903e-02, + 4.917131958110712872e-02, 5.183310721786459418e-02, 5.459951712697841302e-02, 5.747275949639657337e-02, + 6.045502790319455825e-02, 6.354849857288828754e-02, 6.675532979350985865e-02, 7.007766148848641979e-02, + 7.351761495191403887e-02, 7.707729274938041525e-02, 8.075877878706524317e-02, 8.456413855143733669e-02, + 8.849541952147546057e-02, 9.255465175496720496e-02, 9.674384865008904765e-02, 1.010650078831426502e-01, + 1.055201125230189472e-01, 1.101111323226840632e-01, 1.148400251877307103e-01, 1.197087388218165293e-01, + 1.247192125486176994e-01, 1.298733793097628269e-01, 1.351731678380792159e-01, 1.406205050053816316e-01, + 1.462173183439629526e-01, 1.519655387409069424e-01, 1.578671033043359383e-01, 1.639239584007306411e-01, + 1.701380628625154331e-01, 1.765113913651907042e-01, 1.830459379734134606e-01, 1.897437198555789051e-01, + 1.966067811666385690e-01, 2.036371970991047974e-01, 2.108370781024367852e-01, 2.182085742712797843e-01, + 2.257538799033364379e-01, 2.334752382279873511e-01, 2.413749463071469410e-01, 2.494553601102403241e-01, + 2.577188997656175820e-01, 2.661680549911833443e-01, 2.748053907075124803e-01, 2.836335528372471376e-01, + 2.926552742951268547e-01, 3.018733811735925662e-01, 3.112907991295277084e-01, 3.209105599783561596e-01, + 3.307358085024083972e-01, 3.407698094811951648e-01, 3.510159549519934555e-01, 3.614777717099542274e-01, + 3.721589290577866932e-01, 3.830632468159621812e-01, 3.941947036053136035e-01, 4.055574454148868711e-01, + 4.171557944689308074e-01, 4.289942584079951543e-01, 4.410775398002453309e-01, 4.534105460003012245e-01, + 4.659983993741692944e-01, 4.788464479101668631e-01, 4.919602762371392109e-01, 5.053457170727489659e-01, + 5.190088631261786795e-01, 5.329560794812372669e-01, 5.471940164876055195e-01, 5.617296231898020413e-01, + 5.765701613254061793e-01, 5.917232199261468491e-01, 6.071967305576643327e-01, 6.229989832360855492e-01, + 6.391386430620321596e-01, 6.556247676153161584e-01, 6.724668251563812272e-01, 6.896747136835329047e-01, + 7.072587808981804764e-01, 7.252298451337033758e-01, 7.435992173071710726e-01, 7.623787239570054101e-01, + 7.815807314337971290e-01, 8.012181713158943859e-01, 8.213045671260926392e-01, 8.418540624307963733e-01, + 8.628814504084197628e-01, 8.844022049795737430e-01, 9.064325135977815717e-01, 9.289893118061069464e-01, + 9.520903196722039764e-01, 9.757540802219457353e-01, 1.000000000000000000e+00, 1.024848391894543008e+00, + 1.050320520372784475e+00, 1.076438649284173871e+00, 1.103226092399127978e+00, 1.130707266862927052e+00, + 1.158907749757141229e+00, 1.187854337974646084e+00, 1.217575111629048984e+00, 1.248099501235266386e+00, + 1.279458358915164500e+00, 1.311684033900709062e+00, 1.344810452627081143e+00, 1.378873203729832710e+00, + 1.413909628283517352e+00, 1.449958915644490754e+00, 1.487062205287898607e+00, 1.525262695058439148e+00, + 1.564605756286502811e+00, 1.605139056255971231e+00, 1.646912688547541313e+00, 1.689979311822189937e+00, + 1.734394297653598793e+00, 1.780215888066332921e+00, 1.827505363488657555e+00, 1.876327221885466881e+00, + 1.926749369898304239e+00, 1.978843326886336694e+00, 2.032684442834914613e+00, 2.088352131177556992e+00, + 2.145930117663470432e+00, 2.205506706496711366e+00, 2.267175065075584681e+00, 2.331033528772661605e+00, + 2.397185927317806037e+00, 2.465741934479827004e+00, 2.536817442887937264e+00, 2.610534965993323711e+00, + 2.687024069345184956e+00, 2.766421833546071979e+00, 2.848873351459948781e+00, 2.934532262474922666e+00, + 3.023561326873131923e+00, 3.116133043635102211e+00, 3.212430315307524598e+00, 3.312647163894682976e+00, + 3.416989502097797957e+00, 3.525675964626843197e+00, 3.638938804749809967e+00, 3.757024861729272487e+00, + 3.880196605330264341e+00, 4.008733264172298986e+00, 4.142932045347867609e+00, 4.283109453446644399e+00, + 4.429602717916437040e+00, 4.582771338567048147e+00, 4.742998759991079249e+00, 4.910694186746867507e+00, + 5.086294552335034437e+00, 5.270266656314831820e+00, 5.463109485364516396e+00, 5.665356735708146927e+00, + 5.877579556128345480e+00, 6.100389532781943879e+00, 6.334441939256981670e+00, 6.580439277782222274e+00, + 6.839135140254664526e+00, 7.111338420820842566e+00, 7.397917915172903763e+00, 7.699807345544508469e+00, + 8.018010854664294474e+00, 8.353609016702406728e+00, 8.707765418592385473e+00, 9.081733871099147484e+00, + 9.476866315716376006e+00, 9.894621501007146275e+00, 1.033657451045679019e+01, 1.080442723340841910e+01, + 1.130001988133777781e+01, 1.182534366375335115e+01, 1.238255475156052427e+01, 1.297398967101161563e+01, + 1.360218228861306245e+01, 1.426988256684760289e+01, 1.498007729260327644e+01, 1.573601300513857081e+01, + 1.654122137866316500e+01, 1.739954734664685784e+01, 1.831518029132688981e+01, 1.929268866318984532e+01, + 2.033705844217826172e+01, 2.145373590584482942e+01, 2.264867523060898736e+01, 2.392839152177298272e+01, + 2.530001994731418268e+01, 2.677138174118011529e+01, 2.835105794560498805e+01, 3.004847188085487195e+01, + 3.187398146713610639e+01, 3.383898267989664904e+01, 3.595602559959535672e+01, 3.823894472392493310e+01, + 4.070300544879345396e+01, 4.336506889917953679e+01, 4.624377760823269784e+01, 4.935976490967979071e+01, + 5.273589133292714765e+01, 5.639751178186770847e+01, 6.037277784867852275e+01, 6.469298027622754351e+01, + 6.939293735292118365e+01, 7.451143592061966836e+01, 8.009173272176674066e+01, 8.618212503236856949e+01, + 9.283660095406551480e+01, 1.001155814082968890e+02, 1.080867678325352448e+02, 1.168261118752949279e+02, + 1.264189260858047240e+02, 1.369611577708331715e+02, 1.485608519349011866e+02, 1.613398336385932743e+02, + 1.754356453320629017e+02, 1.910037809024609590e+02, 2.082202655019913565e+02, 2.272846389233001078e+02, + 2.484234106336023257e+02, 2.718940668983047258e+02, 2.979897251188232016e+02, 3.270445480633676878e+02, + 3.594400516741229885e+02, 3.956124653087335485e+02, 4.360613334959077953e+02, 4.813595846269808355e+02, + 5.321653357808338203e+02, 5.892357556996862196e+02, 6.534433717775449045e+02, 7.257952842284018994e+02, + 8.074558443729566627e+02, 8.997734679339701200e+02, 1.004312392957944252e+03, 1.122890361185594877e+03, + 1.257623408459775530e+03, 1.410979202907522234e+03, 1.585840680166573460e+03, 1.785582106601447262e+03, + 2.014160171499825914e+03, 2.276223289283167479e+03, 2.577243010007973485e+03, 2.923672325162804598e+03, + 3.323136759290736047e+03, 3.784665511113575050e+03, 4.318971620160236406e+03, 4.938792274850918489e+03, + 5.659303058273368331e+03, 6.498623292476395004e+03, 7.478433875318933386e+03, 8.624734342286166238e+03, + 9.968772633484590145e+03, 1.154818959559393902e+04, 1.340843110702649390e+04, 1.560449453908580443e+04, + 1.820309391023133793e+04, 2.128535066649680777e+04, 2.495014598048375046e+04, 2.931830770482188047e+04, + 3.453785313845473397e+04, 4.079057084931056631e+04, 4.830030527863206410e+04, 5.734341246586992004e+04, + 6.826199159022146453e+04, 8.148067525594191464e+04, 9.752799507478730867e+04, 1.170636462204808295e+05, + 1.409133795481584143e+05, 1.701137853111825512e+05, 2.059699426710509940e+05, 2.501298539735692463e+05, + 3.046808435555379486e+05, 3.722747886360361411e+05, 4.562913164460176067e+05, 5.610511554921845541e+05, + 6.920959565810343691e+05, 8.565564972181198149e+05, 1.063638800552326000e+06, 1.325268101226286025e+06, + 1.656944841847240121e+06, 2.078886479301160156e+06, 2.617555920130068069e+06, 3.307714852226224955e+06, + 4.195192293202626259e+06, 5.340631300250745566e+06, 6.824578495767020734e+06, 8.754424053248831818e+06, + 1.127390159772263517e+07, 1.457614342739689625e+07, 1.892169326841938100e+07, 2.466345986800667442e+07, + 3.228142821711217588e+07, 4.243114571539869754e+07, 5.601173714434088431e+07, 7.426172509723072112e+07, + 9.889461357830121731e+07, 1.322915875470427182e+08, 1.777766240727455981e+08, 2.400110583389834263e+08, + 3.255621033641982742e+08, 4.437258820593761403e+08, 6.077246218504877165e+08, 8.364565879857375417e+08, + 1.157066594326456169e+09, 1.608740826498742961e+09, 2.248337657948688269e+09, 3.158785978851336228e+09, + 4.461677081363911380e+09, 6.336244831048209270e+09, 9.048130159588677560e+09, 1.299321362309972265e+10, + 1.876478261212947929e+10, 2.725703976712888971e+10, 3.982553459064288940e+10, 5.853727794017415415e+10, + 8.656299089553103385e+10, 1.287959733041898747e+11, 1.928345065430099883e+11, 2.905510467545806044e+11, + 4.406145488098485809e+11, 6.725708918778493152e+11, 1.033486938212196930e+12, 1.598840557086695854e+12, + 2.490490134218272825e+12, 3.906528466724583921e+12, 6.171225147961354244e+12, 9.819163736485109137e+12, + 1.573800106991564475e+13, 2.541245461530031221e+13, 4.134437628407981776e+13, 6.778141973485971528e+13, + 1.119906286595884492e+14, 1.865016806041768967e+14, 3.130890948724989738e+14, 5.298978847669068280e+14, + 9.042973899804181753e+14, 1.556259036818991439e+15, 2.701230066368200812e+15, 4.729430105054711279e+15, + 8.353779033096586530e+15, 1.488827606293191651e+16, 2.677653466031614956e+16, 4.860434481369499270e+16, + 8.905735519300993312e+16, 1.647413728306871552e+17, 3.077081325673016377e+17, 5.804234101329097680e+17, + 1.105828570628099614e+18, 2.128315358808074026e+18, 4.138651532085235581e+18, 8.132554212123920035e+18, + 1.615146503312570855e+19, 3.242548467260718193e+19, 6.581494581080701321e+19, 1.350831366183090003e+20, + 2.804093832520937396e+20, 5.888113683467563837e+20, 1.250923435312468276e+21, 2.689280279098215635e+21, + 5.851582825664479700e+21, 1.288917231788944660e+22, 2.874582763768997631e+22, 6.492437335109217869e+22, + 1.485286605867082177e+23, 3.442469159113307066e+23, 8.084930196860438207e+23, 1.924506778048094878e+24, + 4.643992662491470729e+24, 1.136281452083591334e+25, 2.819664891060694571e+25, 7.097781559991856367e+25, + 1.812838850127688486e+26, 4.699012851344539124e+26, 1.236419707162832951e+27, 3.303236261210411286e+27, + 8.962558097638891218e+27, 2.470294852986226117e+28, 6.918270960555942883e+28, 1.969189447958411510e+29, + 5.698092609453981289e+29, 1.676626156396922084e+30, 5.017901520171556970e+30, 1.527929892279834489e+31, + 4.734762318366711949e+31, 1.493572546446777040e+32, 4.797441164681908184e+32, 1.569538296400998732e+33, + 5.231651156910242454e+33, 1.777206511525290941e+34, 6.154587299576916134e+34, 2.173469781356604872e+35, + 7.829529896526581616e+35, 2.877935554073076917e+36, 1.079761320923458592e+37, 4.136337730951207042e+37, + 1.618408489711185844e+38, 6.469770640447824771e+38, 2.643413654859316358e+39, 1.104246728308525703e+40, + 4.717842641881260665e+40, 2.062296462389327711e+41, 9.226680005161257219e+41, 4.226544071632731963e+42, + 1.983043729707066518e+43, 9.533448690970155039e+43, 4.697914578740208606e+44, 2.373923101980436574e+45, + 1.230570211868531753e+46, 6.546344338411695147e+46, 3.575371819335804914e+47, 2.005642453538335506e+48, + 1.156055268028903078e+49, 6.849867807870312958e+49, 4.174004815218951121e+50, 2.616872034052857472e+51, + 1.688750346837297725e+52, 1.122275666009684101e+53, 7.683968740248677071e+53, 5.422849612654278583e+54, + 3.946686701799533415e+55, 2.963543587288132884e+56, 2.297086395798939516e+57, 1.838856414208555761e+58, + 1.521049475711243996e+59, 1.300732291175071112e+60, 1.150559591141716740e+61, 1.053265997373725461e+62, + 9.984114209879020836e+62, 9.805325615938694719e+63, 9.982463564199115995e+64, 1.054102211457911410e+66, + 1.155172684780782463e+67, 1.314571302334116663e+68, 1.554362407685457310e+69, 1.910791206002645077e+70, + 2.443616403890711206e+71, 3.252983822318823232e+72, 4.510600140020139737e+73, 6.518821831001902447e+74, + 9.825834460774267633e+75, 1.545692063622722856e+77, 2.539346088408163253e+78, 4.359763993811836117e+79, + 7.827943627464404744e+80, 1.470896877674301183e+82, 2.894527071420674290e+83, 5.969662541607915492e+84, + 1.291277613981057357e+86, 2.931656535626877923e+87, 6.991353547531463135e+88, 1.752671194525972852e+90, + 4.622450137056020715e+91, 1.283581933169566226e+93, 3.755839001138390788e+94, 1.158991729845978702e+96, + 3.774916315438862678e+97, 1.298844894462381673e+99, 4.725038949943384889e+100, 1.819000031203286740e+102, + 7.416966330876906188e+103, 3.206116996910598204e+105, 1.470588770071975193e+107, 7.164198238238641057e+108, + 3.710397624567077270e+110, 2.044882454279709373e+112, 1.200428778654730225e+114, 7.513744370030172114e+115, + 5.019575746343410636e+117, 3.582726927665698318e+119, 2.734947775877248560e+121, 2.235283764078944248e+123, + 1.958084751118243323e+125, 1.840431913109305657e+127, 1.858143260692831108e+129, 2.017432949655777136e+131, + 2.358177615888101494e+133, 2.971092974178603610e+135, 4.039532321435816302e+137, 5.933923069661132195e+139, + 9.429263693444953240e+141, 1.622841456932873872e+144, 3.028884476067694180e+146, 6.138356175015339477e+148, + 1.352531557191942648e+151, 3.244447362295582945e+153, }, + }; +__constant__ double m_weights_double[8][786] = { + { 2.703640234162693583e-160, 3.100862940179668765e-58, 5.828334625665462970e-21, 1.628894422402653830e-07, + 8.129907377394029252e-03, 2.851214447180802931e-01, 1.228894002317118650e+00, 9.374610761705565881e+00, + 6.136846875218162167e+02, 8.367995944653844271e+06, 2.286032371256753845e+17, 9.029964022492184559e+44, + 1.637973037681055808e+119, }, + { 1.029757744225565290e-96, 5.564174008086804112e-35, 1.534846576427062716e-12, 1.519539651119905182e-04, + 7.878691652861874032e-02, 6.288072016384128612e-01, 2.842403831496369386e+00, 5.152309209026500589e+01, + 2.554172947873109927e+04, 8.291547503290989754e+10, 6.794911791960761587e+27, 1.108995159102362663e+73, }, + { 1.545310485347377408e-124, 4.549745016271158113e-75, 3.781189989988588481e-45, 4.369440793304363176e-27, + 3.253896178006708087e-16, 1.057239289288944987e-09, 7.826174663495492476e-06, 1.459783224353939263e-03, + 2.972970552567852420e-02, 1.637950661613330541e-01, 4.392303913269138921e-01, 8.744243777287317807e-01, + 1.804759465860974506e+00, 4.894937215283148383e+00, 2.036214502429748943e+01, 1.576549789679037479e+02, + 3.249553828744194733e+03, 3.335686029489862584e+05, 4.858218914917275532e+08, 5.655171002571584464e+13, + 9.084276291356790926e+21, 2.202757570781655071e+35, 1.851176020895552142e+57, 1.873046373612647920e+93, + 3.113183070605141140e+152, }, + { 2.690380169654157101e-141, 9.388760099830475385e-110, 3.267856956418766261e-85, 4.012903562780032075e-66, + 2.794595941054873674e-51, 9.598140333687791635e-40, 8.762766371925782803e-31, 7.896919977115783593e-24, + 1.951680620313826776e-18, 2.931867534349928041e-14, 4.976350908135118762e-11, 1.546933241860617074e-08, + 1.283189791774752963e-06, 3.809052946018782340e-05, 5.087526585392884730e-04, 3.656819625189471368e-03, + 1.627679402690602992e-02, 5.011672130624018967e-02, 1.165913368715250324e-01, 2.201514148384271336e-01, + 3.581909054968942386e-01, 5.288599003801643436e-01, 7.422823219366348741e-01, 1.032914080772662205e+00, + 1.478415067523268199e+00, 2.242226697017918644e+00, 3.684755742578570582e+00, 6.677326887819023056e+00, + 1.358063058433697357e+01, 3.171262375809110066e+01, 8.776338468947827779e+01, 3.006939713363920293e+02, + 1.352196150715330628e+03, 8.616353573310419356e+03, 8.591849573350877359e+04, 1.523635814554291966e+06, + 5.663834603448267056e+07, 5.450828629396188577e+09, 1.780881993484818221e+12, 2.797112703281894578e+15, + 3.300887168363313931e+19, 5.192538272313512016e+24, 2.273085973059979872e+31, 7.124498195222272142e+39, + 5.379592741425673874e+50, 4.647296508337283075e+64, 3.395147156494395571e+82, 2.736576372417856435e+105, + 6.584825756536212781e+134, }, + { 1.692276285171240629e-150, 1.180420021590838281e-132, 6.494931071412232065e-117, 4.979673804239645358e-103, + 8.790122245397054202e-91, 5.564311726870413043e-80, 1.867634664877268411e-70, 4.693767384843440310e-62, + 1.197772698674604837e-54, 4.060530886983702887e-48, 2.318268710612758367e-42, 2.748088060676949794e-37, + 8.136086869664039226e-33, 7.081491999860360593e-29, 2.092407629019781417e-25, 2.383020547076997517e-22, + 1.170143938604536054e-19, 2.734857915002515580e-17, 3.319894174569245506e-15, 2.260825106530477104e-13, + 9.244747974241858562e-12, 2.410325858091057071e-10, 4.224928060220423782e-09, 5.217223349652829804e-08, + 4.730110697329046717e-07, 3.265522864288710545e-06, 1.772851678458610971e-05, 7.787346612077215804e-05, + 2.838101678971546354e-04, 8.775026198694109646e-04, 2.347474744139291716e-03, 5.529174974874315725e-03, + 1.164520226280038968e-02, 2.223487842904240574e-02, 3.896253311038730452e-02, 6.334975706136386464e-02, + 9.651712033300261848e-02, 1.390236708907266445e-01, 1.908593745910709887e-01, 2.515965688234414960e-01, + 3.206651646562737595e-01, 3.976974208167367099e-01, 4.828935799767836828e-01, 5.773826389735376677e-01, + 6.835838865575605461e-01, 8.056083579298257627e-01, 9.497742078309479997e-01, 1.125351459431134254e+00, + 1.345711576612114788e+00, 1.630156867495860456e+00, 2.006880650908830857e+00, 2.517828844916874130e+00, + 3.226826819856410846e+00, 4.233461155863004269e+00, 5.697400323487776530e+00, 7.882247346334201378e+00, + 1.123717929435969530e+01, 1.655437952523069781e+01, 2.528458931361129124e+01, 4.019700050163276117e+01, + 6.682515670231120695e+01, 1.168022589948424530e+02, 2.160045684819153702e+02, 4.257255901158116698e+02, + 9.017180693982791021e+02, 2.072151523320542727e+03, 5.222689557952776194e+03, 1.461663959276604441e+04, + 4.606455611513396576e+04, 1.660950339384278845e+05, 6.976630616605097333e+05, 3.484240083705972727e+06, + 2.117385064786894718e+07, 1.607368605379557548e+08, 1.570235957877638143e+09, 2.041619284762317483e+10, + 3.670425964529826371e+11, 9.527196643411724126e+12, 3.749667772735766186e+14, 2.365380223523087981e+16, + 2.546815287226970627e+18, 5.026010591299970789e+20, 1.970775914722195502e+23, 1.682531038342715298e+26, + 3.469062187981719410e+29, 1.942614547946028081e+33, 3.375034694941022784e+37, 2.115298406181711256e+42, + 5.673738540911562268e+47, 7.904099301170483654e+53, 7.121903115084356741e+60, 5.321820777644930491e+68, + 4.370977753639010591e+77, 5.429657931755513797e+87, 1.464602226824232950e+99, 1.292445035662836561e+112, + 5.936633203060705474e+126, 2.402419924621336913e+143, }, + { 2.552410363565288863e-155, 7.965872719315690060e-146, 6.586401422963018216e-137, 1.563673437419490296e-128, + 1.149636272392214573e-120, 2.810189759625314580e-113, 2.441446149780773329e-106, 8.026292508555041710e-100, + 1.059034284623927886e-93, 5.927259046205893861e-88, 1.482220909125121967e-82, 1.738946448501809732e-77, + 1.002047910184021813e-72, 2.960929073720769637e-68, 4.671749731809402860e-64, 4.088398674807775827e-60, + 2.056642628601930023e-56, 6.149878578966749305e-53, 1.128142221531950274e-49, 1.307702777646013040e-46, + 9.848757125541659318e-44, 4.946847667192787369e-41, 1.698284656321589089e-38, 4.077947349805764486e-36, + 6.998897321243266048e-34, 8.762183229651405846e-32, 8.156281709801700633e-30, 5.747366069381804213e-28, + 3.117951907317865517e-26, 1.323052992594482858e-24, 4.457166057119926322e-23, 1.208896132634708032e-21, + 2.674697849739340358e-20, 4.887394807742436672e-19, 7.461632083041868391e-18, 9.622230748739818989e-17, + 1.058884510032627118e-15, 1.003988180288807180e-14, 8.276358838778374127e-14, 5.982281469656734375e-13, + 3.821855766886203088e-12, 2.174279097299082001e-11, 1.109294120074848583e-10, 5.109055596902086022e-10, + 2.137447956882816268e-09, 8.170468538364022161e-09, 2.869308592926374871e-08, 9.305185930419436742e-08, + 2.800231592227134982e-07, 7.855263634214717091e-07, 2.062924236714395731e-06, 5.092224131071637441e-06, + 1.185972357373608535e-05, 2.615333473470835518e-05, 5.479175746096322166e-05, 1.093962713107868416e-04, + 2.087714243290528595e-04, 3.818797556417767457e-04, 6.712796918790164790e-04, 1.136760145626956604e-03, + 1.858775505765622915e-03, 2.941191222579735746e-03, 4.512821350378020080e-03, 6.727293426938802892e-03, + 9.760915371480980900e-03, 1.380842853102550981e-02, 1.907678055354397196e-02, 2.577730275571060412e-02, + 3.411688991056810143e-02, 4.428892397843486143e-02, 5.646473816310556552e-02, 7.078637998740884103e-02, + 8.736131246718460273e-02, 1.062595125372295046e-01, 1.275132133780278017e-01, 1.511193209351630349e-01, + 1.770443400812491404e-01, 2.052314915777496186e-01, 2.356095985715091716e-01, 2.681032744853198083e-01, + 3.026439500331752405e-01, 3.391813282438962329e-01, 3.776949427111484449e-01, 4.182056049753837852e-01, + 4.607866519948383101e-01, 5.055750360563806155e-01, 5.527824318481410262e-01, 6.027066663808878454e-01, + 6.557439076684384801e-01, 7.124021812071310501e-01, 7.733169258916167748e-01, 8.392694625821144443e-01, + 9.112094418201526544e-01, 9.902825786957198607e-01, 1.077865293953107863e+00, 1.175608288920191064e+00, + 1.285491624542001346e+00, 1.409894601042286311e+00, 1.551684711657329886e+00, 1.714331263928885829e+00, + 1.902051053858215699e+00, 2.119995922515087770e+00, 2.374495377438728901e+00, 2.673372087884984440e+00, + 3.026354489757871517e+00, 3.445619726158519068e+00, 3.946512819227006419e+00, 4.548505964859933724e+00, + 5.276487613615791435e+00, 6.162508226184798743e+00, 7.248163842886806184e+00, 8.587878410768473380e+00, + 1.025346434903602082e+01, 1.234051869120733230e+01, 1.497748183201988157e+01, 1.833859935862139637e+01, + 2.266266859437541631e+01, 2.828045768298752298e+01, 3.565528397044830339e+01, 4.544381261232990127e+01, + 5.858833744254070379e+01, 7.645876087681923606e+01, 1.010741758687003802e+02, 1.354538987141142977e+02, + 1.841824059064608872e+02, 2.543337025162468240e+02, 3.570103970895535977e+02, 5.099537256432247190e+02, + 7.420561390174965949e+02, 1.101323941193719451e+03, 1.669232910686306616e+03, 2.587203282090385703e+03, + 4.106608602134535014e+03, 6.685657263550896700e+03, 1.118216368762133982e+04, 1.924811115485038079e+04, + 3.416174865734933127e+04, 6.263882227839496242e+04, 1.189094418952240294e+05, 2.342262528110389793e+05, + 4.798899889628646876e+05, 1.025279649144740527e+06, 2.290428015483177407e+06, 5.365618820221241118e+06, + 1.322172034826883742e+07, 3.438296542047893623e+07, 9.468905314460992170e+07, 2.771843378168242512e+08, + 8.658950437199969679e+08, 2.898779165825890846e+09, 1.044627762990198184e+10, 4.071673625087267154e+10, + 1.725245696783106160e+11, 7.989856904303845909e+11, 4.067537100664303783e+12, 2.290253922913114847e+13, + 1.435560574531699914e+14, 1.008680130601194048e+15, 8.003530334765274913e+15, 7.227937568629809266e+16, + 7.491693576707361828e+17, 8.991671234614216799e+18, 1.261556024888540618e+20, 2.090038400033346091e+21, + 4.132773073376509056e+22, 9.865671928781943336e+23, 2.877978132616007671e+25, 1.039303004928044064e+27, + 4.710544722984128252e+28, 2.719194692980296464e+30, 2.030608169419634520e+32, 1.994536427964099457e+34, + 2.622806931876485852e+36, 4.705142628855489738e+38, 1.174794916996875010e+41, 4.170574236544843559e+43, + 2.153441953645800917e+46, 1.656794933445123415e+49, 1.948830907651317326e+52, 3.601980393005358786e+55, + 1.077033440153993124e+59, 5.374188883861674378e+62, 4.625267105826449467e+66, 7.111646979020385006e+70, + 2.027996051444846521e+75, 1.116168784120367146e+80, 1.237019821283735086e+85, 2.888108172342166477e+90, + 1.490426937972460544e+96, 1.789306677271856318e+102, 5.276973875344766848e+108, 4.051217867886536330e+115, + 8.611617868168979525e+122, 5.412634353380155695e+130, 1.078756609821147465e+139, 7.344353246966125053e+147, }, + { 8.688318611421924613e-158, 6.864317997043424201e-153, 3.829638174036322920e-148, 1.524985558970066863e-143, + 4.379527631402474835e-139, 9.162408388991747001e-135, 1.410086556664696347e-130, 1.611529786006329005e-126, + 1.380269212504431613e-122, 8.938739565456142404e-119, 4.414803004265274778e-115, 1.676831992534574674e-111, + 4.937648515671545377e-108, 1.136068312653058895e-104, 2.057969760853201132e-101, 2.956779836249922681e-98, + 3.393449014375824853e-95, 3.132619285740674842e-92, 2.341677665639346254e-89, 1.426656997926173190e-86, + 7.128825597334931865e-84, 2.939485275517928205e-81, 1.006113300119903410e-78, 2.874969402023240560e-76, + 6.896713338909433222e-74, 1.396405038640012785e-71, 2.398869799873387326e-69, 3.514180228970525006e-67, + 4.411557600438730779e-65, 4.768408435763044172e-63, 4.458287229998440383e-61, 3.621710763086768959e-59, + 2.567373174003034094e-57, 1.594829856885795944e-55, 8.716746897177859412e-54, 4.208424534880021226e-52, + 1.801637343401221381e-50, 6.864432292330768862e-49, 2.336084584516383243e-47, 7.125716658075193173e-46, + 1.954733295862350631e-44, 4.838195020814970471e-43, 1.083903033389729471e-41, 2.204655424309513426e-40, + 4.083431629921110537e-39, 6.907095608064865023e-38, 1.069951518082577963e-36, 1.521972185061747284e-35, + 1.993254198127980161e-34, 2.409552194902670884e-33, 2.695243589253751811e-32, 2.796309045342585624e-31, + 2.697138787161831243e-30, 2.423968619042656074e-29, 2.034233848004972409e-28, 1.597498662808006882e-27, + 1.176341105034547043e-26, 8.138404856556384931e-26, 5.300199402716282910e-25, 3.255367628680633536e-24, + 1.889060856810273071e-23, 1.037502167741821871e-22, 5.402129194695882094e-22, 2.671080147950250592e-21, + 1.256163163817414397e-20, 5.627458451375099018e-20, 2.405110192151924414e-19, 9.820723025892385774e-19, + 3.836610965933493002e-18, 1.435949417965440387e-17, 5.155736116435221852e-17, 1.778106820243535736e-16, + 5.897650538103448384e-16, 1.883545377386949394e-15, 5.799022727889041128e-15, 1.723080101027408120e-14, + 4.946559668895564981e-14, 1.373437058883951037e-13, 3.692057356296675476e-13, 9.618669754374864080e-13, + 2.430904641718059201e-12, 5.965319652795549281e-12, 1.422677541958913512e-11, 3.300412010407028696e-11, + 7.453993539444124847e-11, 1.640317480539372495e-10, 3.519919455549922227e-10, 7.371241496931924727e-10, + 1.507573517782825692e-09, 3.013444008176544118e-09, 5.891170930525923854e-09, 1.127175867596519203e-08, + 2.112135943063526334e-08, 3.878572405868819131e-08, 6.984140168311147329e-08, 1.233979234102365865e-07, + 2.140481233406505212e-07, 3.647293211756793211e-07, 6.108366265875129839e-07, 1.006020283089617901e-06, + 1.630199379920459998e-06, 2.600430208375972125e-06, 4.085372746054298735e-06, 6.324194831966406940e-06, + 9.650830226718535837e-06, 1.452455211307694488e-05, 2.156782506321975658e-05, 3.161234361554654466e-05, + 4.575404320696170555e-05, 6.541767069965264068e-05, 9.243122234114186712e-05, 1.291101968446571125e-04, + 1.783511762821284409e-04, 2.437337497712608884e-04, 3.296292528289701234e-04, 4.413142327104518440e-04, + 5.850859955683163216e-04, 7.683770763700705263e-04, 9.998650298180469208e-04, 1.289573601590465490e-03, + 1.648961132392222413e-03, 2.090991995585424661e-03, 2.630186988492201910e-03, 3.282648895332118799e-03, + 4.066059914467245175e-03, 4.999648283080481820e-03, 6.104122218554241819e-03, 7.401570199659662364e-03, + 8.915327597805008451e-03, 1.066981070009509413e-02, 1.269032020049755525e-02, 1.500281723149735994e-02, + 1.763367592672867332e-02, 2.060941730962251417e-02, 2.395642996410886880e-02, 2.770068343772389725e-02, + 3.186744063963193757e-02, 3.648097561865623097e-02, 4.156430303997019336e-02, 4.713892543167989540e-02, + 5.322460385886412684e-02, 5.983915712308283792e-02, 6.699829390463281224e-02, 7.471548149065050122e-02, + 8.300185389391494996e-02, 9.186616129460712899e-02, 1.013147618591979452e-01, 1.113516561340355690e-01, + 1.219785634003157786e-01, 1.331950386328042665e-01, 1.449986280439946752e-01, 1.573850606313672716e-01, + 1.703484726870446791e-01, 1.838816618814874884e-01, 1.979763672973498048e-01, 2.126235716643688402e-01, + 2.278138220265254991e-01, 2.435375651517067386e-01, 2.597854941629632707e-01, 2.765489031191654411e-01, + 2.938200465906351752e-01, 3.115925016510994851e-01, 3.298615301301230823e-01, 3.486244394295739435e-01, + 3.678809406939879716e-01, 3.876335036292959599e-01, 4.078877077798518471e-01, 4.286525905940105684e-01, + 4.499409931290513174e-01, 4.717699047639316286e-01, 4.941608088016098926e-01, 5.171400313514193966e-01, + 5.407390963876342256e-01, 5.649950903858123945e-01, 5.899510404480374918e-01, 6.156563103475134535e-01, + 6.421670194591982411e-01, 6.695464901047961714e-01, 6.978657294374126896e-01, 7.272039526349696447e-01, + 7.576491548751669105e-01, 7.892987403432202489e-01, 8.222602173936578230e-01, 8.566519699682320391e-01, + 8.926041164852169437e-01, 9.302594686857616145e-01, 9.697746043788558519e-01, 1.011321069700320644e+00, + 1.055086728430498711e+00, 1.101277278143300224e+00, 1.150117955536247302e+00, 1.201855456275760449e+00, + 1.256760098152647779e+00, 1.315128260359919236e+00, 1.377285136373095709e+00, 1.443587843343442141e+00, + 1.514428937238563465e+00, 1.590240390338335337e+00, 1.671498096302065311e+00, 1.758726978084942299e+00, + 1.852506785760205887e+00, 1.953478685110838140e+00, 2.062352754065132708e+00, 2.179916523112736371e+00, + 2.307044718290330681e+00, 2.444710391817196957e+00, 2.593997656772008968e+00, 2.756116279277535182e+00, + 2.932418425642610903e+00, 3.124417914187536020e+00, 3.333812383735923205e+00, 3.562508865047068391e+00, + 3.812653330296280988e+00, 4.086664902155689132e+00, 4.387275531849634155e+00, 4.717576109385405085e+00, + 5.081070154695596855e+00, 5.481736462718817995e+00, 5.924102347216244340e+00, 6.413329458204850426e+00, + 6.955314549766230740e+00, 7.556808065486941215e+00, 8.225554008952760095e+00, 8.970455302965185036e+00, + 9.801769746699598466e+00, 1.073134279679936208e+01, 1.177288477943655549e+01, 1.294230185297226511e+01, + 1.425809217068106541e+01, 1.574182134943112610e+01, 1.741869467329444792e+01, 1.931824763074534781e+01, + 2.147518163232618457e+01, 2.393037838236259586e+01, 2.673213477270754163e+01, 2.993767083537830673e+01, + 3.361497689655818107e+01, 3.784508348524495401e+01, 4.272485990900652026e+01, 4.837047622725585887e+01, + 5.492170063250241752e+01, 6.254725265973777743e+01, 7.145149574983117631e+01, 8.188283528217430591e+01, + 9.414429671899321190e+01, 1.086069017070108772e+02, 1.257266497442910506e+02, 1.460661655727672308e+02, + 1.703224100743601641e+02, 1.993623058409479084e+02, 2.342687403011957198e+02, 2.764002385528330658e+02, + 3.274687277481591846e+02, 3.896413615832930151e+02, 4.656745019682919178e+02, 5.590908996105107215e+02, + 6.744152109571297875e+02, 8.174887172033244140e+02, 9.958921680864290197e+02, 1.219517071629880108e+03, + 1.501341972869855447e+03, 1.858493492282554856e+03, 2.313705362529768409e+03, 2.897337235279879262e+03, + 3.650185874628374320e+03, 4.627425468074182920e+03, 5.904167858279871204e+03, 7.583363128219763259e+03, + 9.807105719965428472e+03, 1.277293273832114230e+04, 1.675749596877978193e+04, 2.215121038263169759e+04, + 2.950937349291504490e+04, 3.962820433513419525e+04, 5.365890489878942635e+04, 7.328024305737981431e+04, + 1.009620167752942516e+05, 1.403709568321740997e+05, 1.970019955923188504e+05, 2.791695960502382133e+05, + 3.995801250202947693e+05, 5.778515877588312220e+05, 8.445944401474017243e+05, 1.248092975135001687e+06, + 1.865367859966950385e+06, 2.820705292493674480e+06, 4.317063433830483499e+06, 6.689961127164684387e+06, + 1.050111601631327499e+07, 1.670327884792325766e+07, 2.693430470211696200e+07, 4.404906898054894166e+07, + 7.309535640536363311e+07, 1.231306812701882145e+08, 2.106560568719367745e+08, 3.662073971851359192e+08, + 6.472124787519330196e+08, 1.163486593592585616e+09, 2.128658395254150452e+09, 3.965732938755983605e+09, + 7.527735928223242836e+09, 1.456757162128879538e+10, 2.875798636941021041e+10, 5.794999654160054887e+10, + 1.192767536774485257e+11, 2.509334090779650360e+11, 5.399624414800303207e+11, 1.189276111740286910e+12, + 2.683103883355551677e+12, 6.205255919751506427e+12, 1.472284072112162717e+13, 3.586628373992547853e+13, + 8.978594107356889337e+13, 2.311710197091641250e+14, 6.127020712804348908e+14, 1.673232679378485978e+15, + 4.712671499032329365e+15, 1.370275025680988289e+16, 4.117347054027612886e+16, 1.279822436878842710e+17, + 4.119762767831332886e+17, 1.374888606936629814e+18, 4.762483833659790733e+18, 1.714288404980390540e+19, + 6.420200704842635702e+19, 2.504808062315322558e+20, 1.019355251138167687e+21, 4.332952958521756932e+21, + 1.926416464889827426e+22, 8.971059571108856501e+22, 4.382317748928748816e+23, 2.249003059943548727e+24, + 1.214458587662725100e+25, 6.911683912813140938e+25, 4.152578123301633020e+26, 2.638346388179288086e+27, + 1.775811490887700718e+28, 1.268552401544524965e+29, 9.635786341213661742e+29, 7.797939379813000783e+30, + 6.736900087983560033e+31, 6.226288752443836475e+32, 6.169035287163451891e+33, 6.567250104576983172e+34, + 7.528666735185428595e+35, 9.316271421365627344e+36, 1.247410737003664698e+38, 1.811787648043939987e+39, + 2.861918583157116420e+40, 4.929657099622567574e+41, 9.284951278562156071e+42, 1.917687997037326435e+44, + 4.355948096683946408e+45, 1.091453486585817118e+47, 3.026206402784023251e+48, 9.314478983991942688e+49, + 3.193195693823940775e+51, 1.223447678968662613e+53, 5.257403184148516426e+54, 2.543108925126136766e+56, + 1.389947584026783879e+58, 8.616987336205957549e+59, 6.083777056769299984e+61, 4.911841077800001710e+63, + 4.554259483169784661e+65, 4.870815185962582259e+67, 6.036211886847067841e+69, 8.708377755587698026e+71, + 1.469655296381977267e+74, 2.915822924489215887e+76, 6.836044306573246016e+78, 1.903917300559946782e+81, + 6.333813341980360028e+83, 2.531082268773868753e+86, 1.222077360592898816e+89, 7.172167453276776330e+91, + 5.148160232410244898e+94, 4.548619807672339638e+97, 4.979632843475864923e+100, 6.800802744782331957e+103, + 1.166855497965918386e+107, 2.533457765534279043e+110, 7.012864641215147208e+113, 2.494083354169569414e+117, + 1.148722178881219993e+121, 6.908313932158993510e+124, 5.470912484744367184e+128, 5.755359832684120769e+132, + 8.115681923907451939e+136, 1.548304780334447081e+141, 4.034912159113614601e+145, 1.450632759611715526e+150, + 7.268799665580789770e+154, }, + { 4.901759085947701448e-159, 1.505832423620814399e-156, 4.231872109262999523e-154, 1.089479701785106001e-151, + 2.572922387150651649e-149, 5.581311054334156941e-147, 1.113575900126970040e-144, 2.046165051332286084e-142, + 3.466994885004770636e-140, 5.423795404073501922e-138, 7.843833272402847010e-136, 1.049922957933194415e-133, + 1.302301071957418603e-131, 1.498659737828393008e-129, 1.601906622414286282e-127, 1.592248618401983561e-125, + 1.473375345916436274e-123, 1.270651551394009593e-121, 1.022408263525766209e-119, 7.683762602329562781e-118, + 5.399268127233373186e-116, 3.551074274853494676e-114, 2.188235409519121010e-112, 1.264667515430816934e-110, + 6.861807566737243712e-109, 3.498691686825209963e-107, 1.678016807398375157e-105, 7.577439431441931490e-104, + 3.224703770159386809e-102, 1.294487090677705963e-100, 4.906133250963454139e-99, 1.757121317988153326e-97, + 5.952042491454320383e-96, 1.908566653286417264e-94, 5.798224459236429212e-93, 1.670293239978334727e-91, + 4.566236673398083038e-90, 1.185617342791547945e-88, 2.926160027801296929e-87, 6.870061134126707137e-86, + 1.535565783500379945e-84, 3.270036736778401257e-83, 6.639558007206580362e-82, 1.286319750967398593e-80, + 2.379566581139022958e-79, 4.206268231398883425e-78, 7.109719237833379433e-77, 1.149915104115372777e-75, + 1.780876201255594220e-74, 2.642703796179329883e-73, 3.760085375941719327e-72, 5.132920951124251993e-71, + 6.727100274601427696e-70, 8.469585621347697498e-69, 1.025032382672232848e-67, 1.193219127557863348e-66, + 1.336816930381306582e-65, 1.442283479679798385e-64, 1.499374555004793991e-63, 1.502797203133501438e-62, + 1.453005969318485303e-61, 1.355980448377862540e-60, 1.222072412212552127e-59, 1.064223180270520159e-58, + 8.959667396075636845e-58, 7.296288808079294105e-57, 5.750255296190181158e-56, 4.388011664829013518e-55, + 3.243852451291832398e-54, 2.324239357665538806e-53, 1.614869776203026446e-52, 1.088524605545274842e-51, + 7.121755574192829045e-51, 4.524647662549067074e-50, 2.792730715818793035e-49, 1.675384879603864227e-48, + 9.773114328777676091e-48, 5.545910766847627082e-47, 3.062809705627873645e-46, 1.646862118038266234e-45, + 8.625108513887155847e-45, 4.401687663868890701e-44, 2.189755778847646746e-43, 1.062345336449265889e-42, + 5.028036663485684049e-42, 2.322524635717249223e-41, 1.047406593898341306e-40, 4.613438388449698168e-40, + 1.985397445118162005e-39, 8.351027367454628343e-39, 3.434440903484543389e-38, 1.381489131877196646e-37, + 5.437051201310225224e-37, 2.094357548080647717e-36, 7.898676618592006902e-36, 2.917536870947471272e-35, + 1.055788886022716597e-34, 3.744333812160330812e-34, 1.301801185251957290e-33, 4.438346216893387768e-33, + 1.484348268951816542e-32, 4.871001129849836971e-32, 1.568903000742513942e-31, 4.961295315917935235e-31, + 1.540773910027990821e-30, 4.700558022172014910e-30, 1.409115230718949596e-29, 4.151913103955692034e-29, + 1.202737613715427748e-28, 3.426327374934496736e-28, 9.601405359397026012e-28, 2.647278642033773301e-27, + 7.183442220565147103e-27, 1.918850545981494042e-26, 5.046974779455992494e-26, 1.307394799925911700e-25, + 3.336342198236957082e-25, 8.389259581136262194e-25, 2.079051813513548608e-24, 5.079178967243765280e-24, + 1.223501794357837278e-23, 2.906654911057549530e-23, 6.811668606095015470e-23, 1.574985938238025303e-22, + 3.593796788969348326e-22, 8.094185411205212564e-22, 1.799796183237481721e-21, 3.951758901641017285e-21, + 8.569580068050865775e-21, 1.835753486517298696e-20, 3.885414339966022317e-20, 8.126613972895021790e-20, + 1.680007182889503141e-19, 3.433369351563962828e-19, 6.937695550399427499e-19, 1.386345631008981755e-18, + 2.740087497759230881e-18, 5.357570288683386626e-18, 1.036464933022803784e-17, 1.984249442010084992e-17, + 3.759788006060003409e-17, 7.052211261821684795e-17, 1.309635641529546221e-16, 2.408275496109180528e-16, + 4.385898809611711552e-16, 7.911758686849121285e-16, 1.413883597877183873e-15, 2.503477536644680210e-15, + 4.392637866550705827e-15, 7.638710306960574612e-15, 1.316703360377476041e-14, 2.250031027275448919e-14, + 3.812239733412214953e-14, 6.405021660191363479e-14, 1.067250538270319484e-13, 1.763897493784721010e-13, + 2.891987565334547756e-13, 4.704242520369958085e-13, 7.592878273512691990e-13, 1.216183338372525172e-12, + 1.933388593436624879e-12, 3.050826852442290751e-12, 4.779080020017636657e-12, 7.432734713385425098e-12, + 1.147833888125873666e-11, 1.760286160372422754e-11, 2.681071101623953168e-11, 4.056023754295965437e-11, + 6.095443492241537222e-11, 9.100550129616064211e-11, 1.349993452136967652e-10, 1.989943912395156051e-10, + 2.914996073619059788e-10, 4.243900781412219621e-10, 6.141353162671391082e-10, 8.834365795894798511e-10, + 1.263395594025933170e-09, 1.796369250051716047e-09, 2.539704143326480862e-09, 3.570592498287890499e-09, + 4.992348403150539107e-09, 6.942471870489931483e-09, 9.602949600164561371e-09, 1.321333712761666777e-08, + 1.808727901635346390e-08, 2.463325364767791516e-08, 3.338047870136870496e-08, 4.501108426108505069e-08, + 6.039985413333259594e-08, 8.066305374526097834e-08, 1.072181059018892614e-07, 1.418561443795353991e-07, + 1.868297699836383305e-07, 2.449586539172972009e-07, 3.197559780442760832e-07, 4.155790690867544334e-07, + 5.378079713325544678e-07, 6.930561064776686194e-07, 8.894175852502122454e-07, 1.136756157868726006e-06, + 1.447041212534730898e-06, 1.834736645332833504e-06, 2.317248822354253644e-06, 2.915440225825303911e-06, + 3.654215709863551870e-06, 4.563188576773760151e-06, 5.677433909482232878e-06, 7.038336747307571784e-06, + 8.694542758083067228e-06, 1.070301902702759858e-05, 1.313023243937403750e-05, 1.605345286789073897e-05, + 1.956218797728780449e-05, 2.375975591555218862e-05, 2.876500146954361208e-05, 3.471416041263076209e-05, + 4.176287576185915239e-05, 5.008836848967403773e-05, 5.989176390181730373e-05, 7.140057340280213227e-05, + 8.487132973049760036e-05, 1.005923719620999934e-04, 1.188867746885496973e-04, 1.401154137398069279e-04, + 1.646801587388731249e-04, 1.930271805904271778e-04, 2.256503597954330556e-04, 2.630947792533707128e-04, + 3.059602829980946180e-04, 3.549050801425155303e-04, 4.106493712131842727e-04, 4.739789720708565436e-04, + 5.457489087697051069e-04, 6.268869550379884668e-04, 7.183970825975973673e-04, 8.213627933082928901e-04, + 9.369503011517966364e-04, 1.066411531385725184e-03, 1.211086903819095417e-03, 1.372407867107646339e-03, + 1.551899151252505624e-03, 1.751180706119547318e-03, 1.971969294784470944e-03, 2.216079711850908971e-03, + 2.485425598581779636e-03, 2.782019828718993257e-03, 3.107974441230220176e-03, 3.465500098895993776e-03, + 3.856905054613959619e-03, 4.284593610523639393e-03, 4.751064058515097225e-03, 5.258906094345618421e-03, + 5.810797701414435799e-03, 6.409501504198915943e-03, 7.057860595396970186e-03, 7.758793844909123446e-03, + 8.515290702888369372e-03, 9.330405513145299523e-03, 1.020725135717912572e-02, 1.114899345297222760e-02, + 1.215884213639836574e-02, 1.324004545661629463e-02, 1.439588142011718850e-02, 1.562964992113485073e-02, + 1.694466439888404584e-02, 1.834424326453982033e-02, 1.983170114298836870e-02, 2.141033997615067889e-02, + 2.308344003609062690e-02, 2.485425089716015368e-02, 2.672598241710042669e-02, 2.870179577730820310e-02, + 3.078479463239356953e-02, 3.297801641870515720e-02, 3.528442387069167064e-02, 3.770689679281728890e-02, + 4.024822413326941635e-02, 4.291109640390936770e-02, 4.569809848884132640e-02, 4.861170288163592155e-02, + 5.165426338866744454e-02, 5.482800933323496446e-02, 5.813504029216542680e-02, 6.157732139347005467e-02, + 6.515667920037330165e-02, 6.887479820368566403e-02, 7.273321794107712090e-02, 7.673333075835566151e-02, + 8.087638022439339824e-02, 8.516346020789830747e-02, 8.959551462082867423e-02, 9.417333782991444898e-02, + 9.889757573450802477e-02, 1.037687275058577967e-01, 1.087871479799008567e-01, 1.139530506928239996e-01, + 1.192665115459606141e-01, 1.247274730840887416e-01, 1.303357493688843496e-01, 1.360910314271734020e-01, + 1.419928932517243620e-01, 1.480407983306351483e-01, 1.542341066798992024e-01, 1.605720823524863565e-01, + 1.670539013962460335e-01, 1.736786602321317742e-01, 1.804453844236544912e-01, 1.873530378080931153e-01, + 1.944005319598201097e-01, 2.015867359561292115e-01, 2.089104864161762672e-01, 2.163705977840528187e-01, + 2.239658728275971045e-01, 2.316951133252986765e-01, 2.395571309145607347e-01, 2.475507580756380088e-01, + 2.556748592267567912e-01, 2.639283419072366399e-01, 2.723101680268593668e-01, 2.808193651612593497e-01, + 2.894550378747292326e-01, 2.982163790535362503e-01, 3.071026812346166036e-01, 3.161133479163487600e-01, + 3.252479048399920142e-01, 3.345060112323053140e-01, 3.438874710018250777e-01, 3.533922438832718793e-01, + 3.630204565265675291e-01, 3.727724135289699431e-01, 3.826486084108677024e-01, 3.926497345378144818e-01, + 4.027766959934214472e-01, 4.130306184097598756e-01, 4.234128597639539906e-01, 4.339250211516634154e-01, + 4.445689575501645526e-01, 4.553467885857401860e-01, 4.662609093220769612e-01, 4.773140010883521767e-01, + 4.885090423676662636e-01, 4.998493197684479070e-01, 5.113384391034281429e-01, 5.229803366027518117e-01, + 5.347792902897740156e-01, 5.467399315500809553e-01, 5.588672569262846167e-01, 5.711666401731758417e-01, + 5.836438446098876156e-01, 5.963050358078278898e-01, 6.091567946552975691e-01, 6.222061308419237716e-01, + 6.354604968083211637e-01, 6.489278022087558681e-01, 6.626164289370386795e-01, 6.765352467684294227e-01, + 6.906936296730053994e-01, 7.051014728587479919e-01, 7.197692106055475377e-01, 7.347078349544334315e-01, + 7.499289153196209421e-01, 7.654446190944464391e-01, 7.812677333259577661e-01, 7.974116875368567865e-01, + 8.138905777776784362e-01, 8.307191919965581771e-01, 8.479130368187123741e-01, 8.654883658328603475e-01, + 8.834622094872810766e-01, 9.018524067040521621e-01, 9.206776383262963142e-01, 9.399574625199963151e-01, + 9.597123522591707284e-01, 9.799637350309700387e-01, 1.000734034905599933e+00, 1.022046717124952010e+00, + 1.043926335373472893e+00, 1.066398581905185161e+00, 1.089490340711946628e+00, 1.113229743930062164e+00, + 1.137646231695313314e+00, 1.162770615670420260e+00, 1.188635146483979071e+00, 1.215273585336112390e+00, + 1.242721280043529050e+00, 1.271015245815510799e+00, 1.300194251072644711e+00, 1.330298908642019971e+00, + 1.361371772686240192e+00, 1.393457441749111730e+00, 1.426602668328411758e+00, 1.460856475415888358e+00, + 1.496270280476785338e+00, 1.532898027375920169e+00, 1.570796326794896619e+00, 1.610024605725646420e+00, + 1.650645266669431435e+00, 1.692723857217988332e+00, 1.736329250744977731e+00, 1.781533838991654903e+00, + 1.828413737391087381e+00, 1.877049004040720448e+00, 1.927523873304087635e+00, 1.979927005099477087e+00, + 2.034351751016940433e+00, 2.090896438495766214e+00, 2.149664674393090421e+00, 2.210765669381402212e+00, + 2.274314584729113927e+00, 2.340432903144970240e+00, 2.409248825504827076e+00, 2.480897695429288043e+00, + 2.555522453844001656e+00, 2.633274125832370887e+00, 2.714312342284411608e+00, 2.798805899057066353e+00, + 2.886933356592141886e+00, 2.978883683190077867e+00, 3.074856945413050211e+00, 3.175065049391765683e+00, + 3.279732537139255280e+00, 3.389097442334834102e+00, 3.503412210435275865e+00, 3.622944688401595705e+00, + 3.747979189802462585e+00, 3.878817641573403805e+00, 4.015780819279312670e+00, 4.159209678351536168e+00, + 4.309466789455788368e+00, 4.466937886899736897e+00, 4.632033539816493591e+00, 4.805190956770360727e+00, + 4.986875935432896972e+00, 5.177584970080537688e+00, 5.377847530880629761e+00, 5.588228530273088035e+00, + 5.809330993233640059e+00, 6.041798949837089488e+00, 6.286320570342285919e+00, 6.543631565013652661e+00, + 6.814518873098582608e+00, 7.099824667819718682e+00, 7.400450706942931008e+00, 7.717363061475788814e+00, + 8.051597258371279584e+00, 8.404263876795383951e+00, 8.776554641607500109e+00, 9.169749062247565207e+00, + 9.585221670276993889e+00, 1.002444991444300704e+01, 1.048902277839603856e+01, 1.098065019316492606e+01, + 1.150117332427169985e+01, 1.205257582204547280e+01, 1.263699613338454324e+01, 1.325674098404332380e+01, + 1.391430015262873368e+01, 1.461236267104086712e+01, 1.535383460126837531e+01, 1.614185855545811846e+01, + 1.697983514525758524e+01, 1.787144656784601339e+01, 1.882068256013178484e+01, 1.983186897964764985e+01, + 2.090969930111845450e+01, 2.205926935196095527e+01, 2.328611564861881683e+01, 2.459625773922860138e+01, + 2.599624500732998276e+01, 2.749320844694889238e+01, 2.909491798228195984e+01, 3.080984597641076715e+01, + 3.264723765414180400e+01, 3.461718925554321861e+01, 3.673073484057443067e+01, 3.899994278315456980e+01, + 4.143802312713618427e+01, 4.405944712930142330e+01, 4.688008048840357439e+01, 4.991733195758662298e+01, + 5.319031926387298369e+01, 5.672005451703465811e+01, 6.052965158594831140e+01, 6.464455825915836491e+01, + 6.909281639443131774e+01, 7.390535370725211687e+01, 7.911631135942343489e+01, 8.476341209659472308e+01, + 9.088837435982152722e+01, 9.753737857533253823e+01, 1.047615927251647361e+02, 1.126177653386554197e+02, + 1.211688952437418817e+02, 1.304849888043593828e+02, 1.406439169773708701e+02, 1.517323863863765989e+02, + 1.638470407739824279e+02, 1.770957117100033620e+02, 1.915988403612775885e+02, 2.074910955409497265e+02, + 2.249232172361061194e+02, 2.440641194630869936e+02, 2.651032917390266964e+02, 2.882535448280364212e+02, + 3.137541538897424513e+02, 3.418744609277612322e+02, 3.729180087461214321e+02, 4.072272907593818790e+02, + 4.451892153103389878e+02, 4.872414000388630927e+02, 5.338794318098249932e+02, 5.856652513400113117e+02, + 6.432368496766822816e+02, 7.073194969336578611e+02, 7.787387632221277236e+02, 8.584356387770406827e+02, + 9.474841163944599543e+02, 1.047111666301969297e+03, 1.158723113719277435e+03, 1.283928525349707755e+03, + 1.424575826189363437e+03, 1.582789006393775706e+03, 1.761012944445459235e+03, 1.962066073573121788e+03, + 2.189202360708354222e+03, 2.446184360349559652e+03, 2.737369460761187093e+03, 3.067811870808767638e+03, + 3.443383419509962754e+03, 3.870916878218207705e+03, 4.358376293464465508e+03, 4.915059769420260559e+03, + 5.551841303216967404e+03, 6.281459704453426129e+03, 7.118864385205665710e+03, 8.081629967627799596e+03, + 9.190454321738597280e+03, 1.046975794051835702e+04, 1.194840663946247320e+04, 1.366058463062104793e+04, + 1.564685131637809273e+04, 1.795542299179967539e+04, 2.064373043744082514e+04, 2.378031563732670807e+04, + 2.744714621995650953e+04, 3.174244552480722739e+04, 3.678416050731336226e+04, 4.271422037773508051e+04, + 4.970377768100323981e+04, 5.795967273138576164e+04, 6.773242484608792593e+04, 7.932613346949942761e+04, + 9.311077397156915450e+04, 1.095375030536372224e+05, 1.291577556735669526e+05, 1.526471301608741586e+05, + 1.808353350969648289e+05, 2.147438294770164181e+05, 2.556332515573999948e+05, 3.050633345562097502e+05, + 3.649687926665853954e+05, 4.377556866857485380e+05, 5.264241222943208736e+05, 6.347248990108319410e+05, + 7.673600526542426466e+05, 9.302403050337502786e+05, 1.130816502666451845e+06, 1.378507531155523742e+06, + 1.685254393964162275e+06, 2.066239770168639390e+06, 2.540825270229354918e+06, 3.133775962036416630e+06, + 3.876865148275802393e+06, 4.810984054018349430e+06, 5.988924089534678664e+06, 7.479057929608060924e+06, + 9.370225698693408867e+06, 1.177824230977510661e+07, 1.485459301432580619e+07, 1.879809270383398104e+07, + 2.387057334436346400e+07, 3.041806552258603202e+07, 3.889950046843262151e+07, 4.992574374586696017e+07, + 6.431287504495613210e+07, 8.315518519925858136e+07, 1.079255664704117961e+08, 1.406141073390035115e+08, + 1.839201785677305607e+08, 2.415197116904975365e+08, 3.184386015381112281e+08, 4.215765018929686736e+08, + 5.604446356915114550e+08, 7.482094398046911572e+08, 1.003175129668246151e+09, 1.350898918997482870e+09, + 1.827222165053491590e+09, 2.482633480831760933e+09, 3.388577637234919719e+09, 4.646620065299105644e+09, + 6.401821801566297122e+09, 8.862352038053251473e+09, 1.232838602859196811e+10, 1.723489297480180023e+10, + 2.421530528469447376e+10, 3.419673813208063025e+10, 4.854312364622606540e+10, 6.927149043760342676e+10, + 9.938049490186203616e+10, 1.433521424759854145e+11, 2.079221734483088227e+11, 3.032695241820108158e+11, + 4.448631503727710431e+11, 6.563458646477901051e+11, 9.740635696398910980e+11, 1.454220520059656158e+12, + 2.184250688898627320e+12, 3.300999104757560757e+12, 5.019970485022749012e+12, 7.682676299017607834e+12, + 1.183376596003983872e+13, 1.834748853557035315e+13, 2.863639312458363586e+13, 4.499803892715039958e+13, + 7.119486876989154498e+13, 1.134307017980122346e+14, 1.820065782363618395e+14, 2.941484500615394037e+14, + 4.788707305890930382e+14, 7.854025036928623551e+14, 1.297894304619860251e+15, 2.161279954782425640e+15, + 3.627102147035003834e+15, 6.135342933440950378e+15, 1.046170006362244506e+16, 1.798477357839665686e+16, + 3.117473412332331475e+16, 5.449445073049184222e+16, 9.607515505017978212e+16, 1.708589224452677852e+17, + 3.065429751110228665e+17, 5.549227437451149511e+17, 1.013730232778046314e+18, 1.869059895876405824e+18, + 3.478549552381578424e+18, 6.535992245975463763e+18, 1.240019272261066308e+19, 2.375828866910936629e+19, + 4.597682433604432625e+19, 8.988106816837128428e+19, 1.775302379393632263e+20, 3.543413304390973486e+20, + 7.148061397675525327e+20, 1.457620510577186305e+21, 3.005137124879829797e+21, 6.265024861633250697e+21, + 1.320979941090283816e+22, 2.817487535902146221e+22, 6.079933041429805231e+22, 1.327658853647212083e+23, + 2.934311759183641318e+23, 6.565087216807130026e+23, 1.487212273437937650e+24, 3.411840196076788128e+24, + 7.928189928797018762e+24, 1.866451877029704857e+25, 4.452521859886739549e+25, 1.076545435174977662e+26, + 2.638685681190697586e+26, 6.557908470244186498e+26, 1.652952243735585721e+27, 4.226383395914916199e+27, + 1.096450394268080148e+28, 2.886822082999286080e+28, 7.715480389344015925e+28, 2.093728789309964846e+29, + 5.770275789447655037e+29, 1.615463845391781140e+30, 4.595470055795608691e+30, 1.328629392686523255e+31, + 3.905079681530784219e+31, 1.167134024271997252e+32, 3.548058538654277403e+32, 1.097378059358046160e+33, + 3.454102978064445595e+33, 1.106745393701652323e+34, 3.610899559139069994e+34, 1.199946999283670567e+35, + 4.062687014190878792e+35, 1.401835223893224514e+36, 4.931085527333162173e+36, 1.768812393284919500e+37, + 6.472148293945199961e+37, 2.416453721739211922e+38, 9.208944720398123862e+38, 3.583297028622126676e+39, + 1.424097482596699440e+40, 5.782627833426411524e+40, 2.399862204084363183e+41, 1.018291572042305460e+42, + 4.419105414822034531e+42, 1.962126117680499311e+43, 8.916742424061253707e+43, 4.148882478294757720e+44, + 1.977256529558276930e+45, 9.655300233875401080e+45, 4.832878898335598922e+46, 2.480575878223098058e+47, + 1.306102809757654706e+48, 7.057565717289569232e+48, 3.915276522229618618e+49, 2.230898980943393318e+50, + 1.306141334496309306e+51, 7.861021286656392627e+51, 4.865583758538451107e+52, 3.098487425915704674e+53, + 2.031037614862563901e+54, 1.370999647608260200e+55, 9.534736274325001528e+55, 6.834959923166415407e+56, + 5.052733546324789020e+57, 3.853810997282159979e+58, 3.034183107853208298e+59, 2.467161926009838899e+60, + 2.072901039813580593e+61, 1.800563980579615383e+62, 1.617764027895344257e+63, 1.504283028250688329e+64, + 1.448393206525427172e+65, 1.444855510980115799e+66, 1.494120428855029243e+67, 1.602566566107015722e+68, + 1.783880504153942988e+69, 2.061999240572760738e+70, 2.476521794698572715e+71, 3.092349914153497358e+72, + 4.016927238305985810e+73, 5.431607545226497387e+74, 7.650086824042822759e+75, 1.123017984114349288e+77, + 1.719382952966052004e+78, 2.747335718690686674e+79, 4.584545010557684123e+80, 7.995082041539250252e+81, + 1.458119909365899044e+83, 2.783001178679600175e+84, 5.562812231966194628e+85, 1.165338768982404578e+87, + 2.560399126432838224e+88, 5.904549641859098192e+89, 1.430278474749838710e+91, 3.642046122956932563e+92, + 9.756698571206402300e+93, 2.751946044275883051e+95, 8.179164793643197279e+96, 2.563704735086825890e+98, + 8.481656496128255880e+99, 2.964260254403981007e+101, 1.095342970031208886e+103, 4.283148547584870628e+104, + 1.773954352944319744e+106, 7.788991081894224760e+107, 3.628931721056821352e+109, 1.795729272516020592e+111, + 9.446685151482835339e+112, 5.288263179614488101e+114, 3.153311236741401362e+116, 2.004807079683827669e+118, + 1.360407192665237716e+120, 9.862825609807810517e+121, 7.647551788591128099e+123, 6.348802224871730088e+125, + 5.649062361980019098e+127, 5.393248003523784781e+129, 5.530897191915703916e+131, 6.099598644640894333e+133, + 7.242098433491964504e+135, 9.268083053637375570e+137, 1.279942702416040582e+140, 1.909796626960621302e+142, + 3.082540300669885040e+144, 5.388809732384179657e+146, 1.021610251056626535e+149, 2.103005440072790650e+151, + 4.706753990348725570e+153, 1.146834128125248991e+156, }, + }; // Since we have to use C arrays we can't compensate for the fact that each level has // a different number of coefficients. @@ -931,6 +1729,26 @@ __constant__ boost::math::size_t float_coefficients_size[8] = {9, 8, 16, 33, 66, __constant__ boost::math::size_t double_coefficients_size[8] = {13, 12, 25, 49, 98, 196, 393, 786}; +template +struct CoefficientsSelector; + +template<> +struct CoefficientsSelector +{ + __device__ static const float (*abscissas())[527] { return m_abscissas_float; } + __device__ static const float (*weights())[527] { return m_weights_float; } + __device__ static const boost::math::size_t* size() { return float_coefficients_size; } +}; + +template<> +struct CoefficientsSelector +{ + __device__ static const double (*abscissas())[786] { return m_abscissas_double; } + __device__ static const double (*weights())[786] { return m_weights_double; } + __device__ static const boost::math::size_t* size() { return double_coefficients_size; } +}; + + template > __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { @@ -949,9 +1767,9 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, //std::cout << std::setprecision(5*std::numeric_limits::digits10); // Get the party started with two estimates of the integral: - auto& m_abscissas = m_abscissas_float; - auto& m_weights = m_weights_float; - auto& m_size = float_coefficients_size; + const auto m_abscissas = CoefficientsSelector::abscissas(); + const auto m_weights = CoefficientsSelector::weights(); + const auto m_size = CoefficientsSelector::size(); Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value() }; K I0 = 0; From 32fa3788afce97930fe36ab944b5f1f7307f218b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 11:39:20 -0400 Subject: [PATCH 12/22] Save space by using pointer to different size arrays rather than 2d --- .../quadrature/detail/exp_sinh_detail.hpp | 105 +++++++++++++----- 1 file changed, 79 insertions(+), 26 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 5dd74d508..9f836a80d 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -561,20 +561,31 @@ namespace math { namespace quadrature { namespace detail { -__constant__ float m_abscissas_float[8][527] = { +// In the CUDA case we break these down into a series of fixed size arrays and then make a pointer to the arrays +// We can't use a 2D array because it takes up far too much memory that is primarily wasted space + +__constant__ float m_abscissas_float_1[9] = { 3.47876573e-23f, 5.62503650e-09f, 9.95706124e-04f, 9.67438487e-02f, 7.43599217e-01f, 4.14293205e+00f, - 1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, }, + 1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, }; + +__constant__ float m_abscissas_float_2[8] = { 2.41870864e-14f, 1.02534662e-05f, 1.65637566e-02f, 3.11290799e-01f, 1.64691269e+00f, 1.49800773e+01f, - 2.57724301e+03f, 2.24833766e+09f, }, + 2.57724301e+03f, 2.24833766e+09f, }; + +__constant__ float m_abscissas_float_3[16] = { 3.24983286e-18f, 2.51095186e-11f, 3.82035773e-07f, 1.33717837e-04f, 4.80260650e-03f, 4.41526928e-02f, 1.83045938e-01f, 4.91960276e-01f, 1.10322609e+00f, 2.53681744e+00f, 7.39791792e+00f, 3.59560256e+01f, - 4.36061333e+02f, 2.49501460e+04f, 1.89216933e+07f, 1.03348694e+12f, }, + 4.36061333e+02f, 2.49501460e+04f, 1.89216933e+07f, 1.03348694e+12f, }; + +__constant__ float m_abscissas_float_4[33] = { 1.51941172e-20f, 3.70201714e-16f, 9.67598102e-13f, 4.44773051e-10f, 5.28493928e-08f, 2.19158236e-06f, 4.00799258e-05f, 3.88011529e-04f, 2.29325538e-03f, 9.25182629e-03f, 2.78117501e-02f, 6.67553298e-02f, 1.35173168e-01f, 2.41374946e-01f, 3.94194704e-01f, 6.07196731e-01f, 9.06432514e-01f, 1.34481045e+00f, 2.03268444e+00f, 3.21243032e+00f, 5.46310949e+00f, 1.03365745e+01f, 2.26486752e+01f, 6.03727778e+01f, 2.08220266e+02f, 1.00431239e+03f, 7.47843388e+03f, 9.75279951e+04f, 2.61755592e+06f, 1.77776624e+08f, - 3.98255346e+10f, 4.13443763e+13f, 3.07708133e+17f, }, + 3.98255346e+10f, 4.13443763e+13f, 3.07708133e+17f, }; + +__constant__ float m_abscissas_float_5[66] = { 7.99409438e-22f, 2.41624595e-19f, 3.73461321e-17f, 3.19397902e-15f, 1.62042378e-13f, 5.18579386e-12f, 1.10520072e-10f, 1.64548212e-09f, 1.78534009e-08f, 1.46529196e-07f, 9.40168786e-07f, 4.85507733e-06f, 2.07038029e-05f, 7.45799409e-05f, 2.31536599e-04f, 6.30580368e-04f, 1.53035449e-03f, 3.35582040e-03f, @@ -585,7 +596,9 @@ __constant__ float m_abscissas_float[8][527] = { 8.70776542e+00f, 1.23825548e+01f, 1.83151803e+01f, 2.83510579e+01f, 4.62437776e+01f, 8.00917327e+01f, 1.48560852e+02f, 2.97989725e+02f, 6.53443372e+02f, 1.58584068e+03f, 4.31897162e+03f, 1.34084311e+04f, 4.83003053e+04f, 2.05969943e+05f, 1.06363880e+06f, 6.82457850e+06f, 5.60117371e+07f, 6.07724622e+08f, - 9.04813016e+09f, 1.92834507e+11f, 6.17122515e+12f, 3.13089095e+14f, 2.67765347e+16f, 4.13865153e+18f, }, + 9.04813016e+09f, 1.92834507e+11f, 6.17122515e+12f, 3.13089095e+14f, 2.67765347e+16f, 4.13865153e+18f, }; + +__constant__ float m_abscissas_float_6[132] = { 1.70893932e-22f, 3.56621447e-21f, 6.19138882e-20f, 9.04299298e-19f, 1.12287188e-17f, 1.19706303e-16f, 1.10583090e-15f, 8.92931857e-15f, 6.35404710e-14f, 4.01527389e-13f, 2.26955738e-12f, 1.15522811e-11f, 5.32913181e-11f, 2.24130967e-10f, 8.64254491e-10f, 3.07161058e-09f, 1.01117742e-08f, 3.09775637e-08f, @@ -607,7 +620,9 @@ __constant__ float m_abscissas_float[8][527] = { 3.45378531e+04f, 6.82619916e+04f, 1.40913380e+05f, 3.04680844e+05f, 6.92095957e+05f, 1.65694484e+06f, 4.19519229e+06f, 1.12739016e+07f, 3.22814282e+07f, 9.88946136e+07f, 3.25562103e+08f, 1.15706659e+09f, 4.46167708e+09f, 1.87647826e+10f, 8.65629909e+10f, 4.40614549e+11f, 2.49049013e+12f, 1.57380011e+13f, - 1.11990629e+14f, 9.04297390e+14f, 8.35377903e+15f, 8.90573552e+16f, 1.10582857e+18f, 1.61514650e+19f, }, + 1.11990629e+14f, 9.04297390e+14f, 8.35377903e+15f, 8.90573552e+16f, 1.10582857e+18f, 1.61514650e+19f, }; + +__constant__ float m_abscissas_float_7[263] = { 7.75845008e-23f, 3.71846701e-22f, 1.69833677e-21f, 7.40284853e-21f, 3.08399399e-20f, 1.22962599e-19f, 4.69855182e-19f, 1.72288020e-18f, 6.07012059e-18f, 2.05742924e-17f, 6.71669437e-17f, 2.11441966e-16f, 6.42566550e-16f, 1.88715605e-15f, 5.36188198e-15f, 1.47533056e-14f, 3.93507835e-14f, 1.01841667e-13f, @@ -651,7 +666,9 @@ __constant__ float m_abscissas_float[8][527] = { 3.15878598e+09f, 6.33624483e+09f, 1.29932136e+10f, 2.72570398e+10f, 5.85372779e+10f, 1.28795973e+11f, 2.90551047e+11f, 6.72570892e+11f, 1.59884056e+12f, 3.90652847e+12f, 9.81916374e+12f, 2.54124546e+13f, 6.77814197e+13f, 1.86501681e+14f, 5.29897885e+14f, 1.55625904e+15f, 4.72943011e+15f, 1.48882761e+16f, - 4.86043448e+16f, 1.64741373e+17f, 5.80423410e+17f, 2.12831536e+18f, 8.13255421e+18f, }, + 4.86043448e+16f, 1.64741373e+17f, 5.80423410e+17f, 2.12831536e+18f, 8.13255421e+18f, }; + +__constant__ float m_abscissas_float_8[527] = { 5.20331508e-23f, 1.15324162e-22f, 2.52466875e-22f, 5.46028730e-22f, 1.16690465e-21f, 2.46458927e-21f, 5.14543768e-21f, 1.06205431e-20f, 2.16767715e-20f, 4.37564009e-20f, 8.73699691e-20f, 1.72595588e-19f, 3.37377643e-19f, 6.52669145e-19f, 1.24976973e-18f, 2.36916845e-18f, 4.44691383e-18f, 8.26580373e-18f, @@ -739,23 +756,41 @@ __constant__ float m_abscissas_float[8][527] = { 5.28864904e+13f, 8.70403770e+13f, 1.44377694e+14f, 2.41399528e+14f, 4.06896744e+14f, 6.91510621e+14f, 1.18504970e+15f, 2.04811559e+15f, 3.57034809e+15f, 6.27861398e+15f, 1.11397125e+16f, 1.99435267e+16f, 3.60337498e+16f, 6.57141972e+16f, 1.20980371e+17f, 2.24875057e+17f, 4.22089025e+17f, 8.00147402e+17f, - 1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, }, - }; + 1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, }; + +__constant__ float* m_abscissas_float[8] = { + m_abscissas_float_1, + m_abscissas_float_2, + m_abscissas_float_3, + m_abscissas_float_4, + m_abscissas_float_5, + m_abscissas_float_6, + m_abscissas_float_7, + m_abscissas_float_8, +}; -__constant__ float m_weights_float[8][527] = { +__constant__ float m_weights_float_1[9] = { 1.79979618e-21f, 1.07218106e-07f, 7.05786060e-03f, 2.72310168e-01f, 1.18863515e+00f, 8.77655464e+00f, - 5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, }, + 5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, }; + +__constant__ float m_weights_float_2[8] = { 7.59287827e-13f, 1.18886775e-04f, 7.27332179e-02f, 6.09156795e-01f, 2.71431234e+00f, 4.68800805e+01f, - 2.06437304e+04f, 4.85431236e+10f, }, + 2.06437304e+04f, 4.85431236e+10f, }; + +__constant__ float m_weights_float_3[16] = { 1.30963564e-16f, 6.14135316e-10f, 5.67743391e-06f, 1.21108690e-03f, 2.67259824e-02f, 1.54234107e-01f, 4.23412860e-01f, 8.47913037e-01f, 1.73632925e+00f, 4.63203354e+00f, 1.88206826e+01f, 1.40643917e+02f, - 2.73736946e+03f, 2.55633252e+05f, 3.18438602e+08f, 2.86363931e+13f, }, + 2.73736946e+03f, 2.55633252e+05f, 3.18438602e+08f, 2.86363931e+13f, }; + +__constant__ float m_weights_float_4[33] = { 6.93769555e-19f, 1.31670336e-14f, 2.68107110e-11f, 9.60294960e-09f, 8.89417585e-07f, 2.87650015e-05f, 4.10649371e-04f, 3.10797444e-03f, 1.43958814e-02f, 4.56980985e-02f, 1.08787148e-01f, 2.08910486e-01f, 3.43887471e-01f, 5.11338439e-01f, 7.19769211e-01f, 1.00073403e+00f, 1.42660267e+00f, 2.14966467e+00f, 3.50341221e+00f, 6.28632057e+00f, 1.26369961e+01f, 2.90949180e+01f, 7.91163114e+01f, 2.65103292e+02f, 1.15872311e+03f, 7.11886439e+03f, 6.77324248e+04f, 1.13081650e+06f, 3.88995005e+07f, 3.38857764e+09f, - 9.74063570e+11f, 1.29789430e+15f, 1.24001927e+19f, }, + 9.74063570e+11f, 1.29789430e+15f, 1.24001927e+19f, }; + +__constant__ float m_weights_float_5[66] = { 3.88541434e-20f, 1.03646493e-17f, 1.41388360e-15f, 1.06725054e-13f, 4.77908002e-12f, 1.34999345e-10f, 2.53970414e-09f, 3.33804787e-08f, 3.19755978e-07f, 2.31724882e-06f, 1.31302324e-05f, 5.98917639e-05f, 2.25650360e-04f, 7.18397083e-04f, 1.97196929e-03f, 4.75106406e-03f, 1.02072514e-02f, 1.98317011e-02f, @@ -766,7 +801,9 @@ __constant__ float m_weights_float[8][527] = { 2.32861156e+01f, 3.67307348e+01f, 6.05296516e+01f, 1.04761593e+02f, 1.91598840e+02f, 3.72918009e+02f, 7.78738763e+02f, 1.76101294e+03f, 4.35837629e+03f, 1.19484066e+04f, 3.67841605e+04f, 1.29157756e+05f, 5.26424122e+05f, 2.54082527e+06f, 1.48545930e+07f, 1.07925566e+08f, 1.00317513e+09f, 1.23283860e+10f, - 2.07922173e+11f, 5.01997049e+12f, 1.82006578e+14f, 1.04617001e+16f, 1.01373023e+18f, 1.77530238e+20f, }, + 2.07922173e+11f, 5.01997049e+12f, 1.82006578e+14f, 1.04617001e+16f, 1.01373023e+18f, 1.77530238e+20f, }; + +__constant__ float m_weights_float_6[132] = { 8.56958007e-21f, 1.68000718e-19f, 2.74008750e-18f, 3.75978801e-17f, 4.38589881e-16f, 4.39263787e-15f, 3.81223973e-14f, 2.89198757e-13f, 1.93338859e-12f, 1.14783389e-11f, 6.09544349e-11f, 2.91499607e-10f, 1.26339559e-09f, 4.99234840e-09f, 1.80872790e-08f, 6.03998541e-08f, 1.86829770e-07f, 5.37807971e-07f, @@ -788,7 +825,9 @@ __constant__ float m_weights_float[8][527] = { 3.64968793e+05f, 7.67360053e+05f, 1.68525439e+06f, 3.87686515e+06f, 9.37022570e+06f, 2.38705733e+07f, 6.43128750e+07f, 1.83920179e+08f, 5.60444636e+08f, 1.82722217e+09f, 6.40182180e+09f, 2.42153053e+10f, 9.93804949e+10f, 4.44863150e+11f, 2.18425069e+12f, 1.18337660e+13f, 7.11948688e+13f, 4.78870731e+14f, - 3.62710215e+15f, 3.11747341e+16f, 3.06542975e+17f, 3.47854955e+18f, 4.59768243e+19f, 7.14806140e+20f, }, + 3.62710215e+15f, 3.11747341e+16f, 3.06542975e+17f, 3.47854955e+18f, 4.59768243e+19f, 7.14806140e+20f, }; + +__constant__ float m_weights_float_7[263] = { 3.95175890e-21f, 1.83575349e-20f, 8.12661397e-20f, 3.43336935e-19f, 1.38634563e-18f, 5.35757029e-18f, 1.98424944e-17f, 7.05221126e-17f, 2.40827550e-16f, 7.91175869e-16f, 2.50347754e-15f, 7.63871031e-15f, 2.25003103e-14f, 6.40502166e-14f, 1.76389749e-13f, 4.70424252e-13f, 1.21618334e-12f, 3.05082685e-12f, @@ -832,7 +871,9 @@ __constant__ float m_weights_float[8][527] = { 6.92714904e+10f, 1.43352142e+11f, 3.03269524e+11f, 6.56345865e+11f, 1.45422052e+12f, 3.30099910e+12f, 7.68267630e+12f, 1.83474885e+13f, 4.49980389e+13f, 1.13430702e+14f, 2.94148450e+14f, 7.85402504e+14f, 2.16127995e+15f, 6.13534293e+15f, 1.79847736e+16f, 5.44944507e+16f, 1.70858922e+17f, 5.54922744e+17f, - 1.86905990e+18f, 6.53599225e+18f, 2.37582887e+19f, 8.98810682e+19f, 3.54341330e+20f, }, + 1.86905990e+18f, 6.53599225e+18f, 2.37582887e+19f, 8.98810682e+19f, 3.54341330e+20f, }; + +__constant__ float m_weights_float_8[527] = { 2.67108015e-21f, 5.82833463e-21f, 1.25616316e-20f, 2.67469785e-20f, 5.62745845e-20f, 1.17014394e-19f, 2.40511019e-19f, 4.88739481e-19f, 9.82072303e-19f, 1.95168062e-18f, 3.83661097e-18f, 7.46163208e-18f, 1.43594942e-17f, 2.73485792e-17f, 5.15573612e-17f, 9.62223075e-17f, 1.77810682e-16f, 3.25389618e-16f, @@ -920,9 +961,20 @@ __constant__ float m_weights_float[8][527] = { 1.67323268e+15f, 2.79711270e+15f, 4.71267150e+15f, 8.00353033e+15f, 1.37027503e+16f, 2.36538022e+16f, 4.11734705e+16f, 7.22793757e+16f, 1.27982244e+17f, 2.28603237e+17f, 4.11976277e+17f, 7.49169358e+17f, 1.37488861e+18f, 2.54681529e+18f, 4.76248383e+18f, 8.99167123e+18f, 1.71428840e+19f, 3.30088717e+19f, - 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }, - }; + 6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, }; + +__constant__ float* m_weights_float[8] = { + m_weights_float_1, + m_weights_float_2, + m_weights_float_3, + m_weights_float_4, + m_weights_float_5, + m_weights_float_6, + m_weights_float_7, + m_weights_float_8 +}; +/* __constant__ double m_abscissas_double[8][786] = { { 7.241670621354483269e-163, 2.257639733856759198e-60, 1.153241619257215165e-22, 8.747691973876861825e-09, 1.173446923800022477e-03, 1.032756936219208144e-01, 7.719261204224504866e-01, 4.355544675823585545e+00, @@ -1721,6 +1773,7 @@ __constant__ double m_weights_double[8][786] = { 3.082540300669885040e+144, 5.388809732384179657e+146, 1.021610251056626535e+149, 2.103005440072790650e+151, 4.706753990348725570e+153, 1.146834128125248991e+156, }, }; +*/ // Since we have to use C arrays we can't compensate for the fact that each level has // a different number of coefficients. @@ -1735,17 +1788,17 @@ struct CoefficientsSelector; template<> struct CoefficientsSelector { - __device__ static const float (*abscissas())[527] { return m_abscissas_float; } - __device__ static const float (*weights())[527] { return m_weights_float; } - __device__ static const boost::math::size_t* size() { return float_coefficients_size; } + __device__ static const auto abscissas() { return m_abscissas_float; } + __device__ static const auto weights() { return m_weights_float; } + __device__ static const auto size() { return float_coefficients_size; } }; template<> struct CoefficientsSelector { - __device__ static const double (*abscissas())[786] { return m_abscissas_double; } - __device__ static const double (*weights())[786] { return m_weights_double; } - __device__ static const boost::math::size_t* size() { return double_coefficients_size; } + __device__ static const auto abscissas() { return m_abscissas_double; } + __device__ static const auto weights() { return m_weights_double; } + __device__ static const auto size() { return double_coefficients_size; } }; From bf2ad59ca1ea3ce652bab3d9c3a16009c0e5f45d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 11:47:51 -0400 Subject: [PATCH 13/22] Separate the double precision weights into their own arrays --- .../quadrature/detail/exp_sinh_detail.hpp | 106 +++++++++++++----- 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp index 9f836a80d..77f2fbf06 100644 --- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp @@ -974,22 +974,27 @@ __constant__ float* m_weights_float[8] = { m_weights_float_8 }; -/* -__constant__ double m_abscissas_double[8][786] = { +__constant__ double m_abscissas_double_1[13] = { 7.241670621354483269e-163, 2.257639733856759198e-60, 1.153241619257215165e-22, 8.747691973876861825e-09, 1.173446923800022477e-03, 1.032756936219208144e-01, 7.719261204224504866e-01, 4.355544675823585545e+00, 1.215101039066652656e+02, 6.228845436711506169e+05, 6.278613977336989392e+15, 9.127414935180233465e+42, - 6.091127771174027909e+116, }, + 6.091127771174027909e+116, }; + +__constant__ double m_abscissas_double_2[12] = { 4.547459836328942014e-99, 6.678756542928857080e-37, 5.005042973041566360e-14, 1.341318484151208960e-05, 1.833875636365939263e-02, 3.257972971286326131e-01, 1.712014688483495078e+00, 1.613222549264089627e+01, - 3.116246745274236447e+03, 3.751603952020919663e+09, 1.132259067258797346e+26, 6.799257464097374238e+70, }, + 3.116246745274236447e+03, 3.751603952020919663e+09, 1.132259067258797346e+26, 6.799257464097374238e+70, }; + +__constant__ double m_abscissas_double_3[25] = { 5.314690663257815465e-127, 2.579830034615362946e-77, 3.534801062399966878e-47, 6.733941646704537777e-29, 8.265803726974829043e-18, 4.424914371157762285e-11, 5.390411046738629465e-07, 1.649389713333761449e-04, 5.463728936866216652e-03, 4.787896410534771955e-02, 1.931544616590306846e-01, 5.121421856617965197e-01, 1.144715949265016019e+00, 2.648424684387670480e+00, 7.856804169938798917e+00, 3.944731803343517708e+01, 5.060291993016831194e+02, 3.181117494063683297e+04, 2.820174654949211729e+07, 1.993745099515255184e+12, 1.943469269499068563e+20, 2.858803732300638372e+33, 1.457292199029008637e+55, 8.943565831706355607e+90, - 9.016198369791554655e+149, }, + 9.016198369791554655e+149, }; + +__constant__ double m_abscissas_double_4[49] = { 8.165631636299519857e-144, 3.658949309353149331e-112, 1.635242513882908826e-87, 2.578381184977746454e-68, 2.305546416275824199e-53, 1.016725540031465162e-41, 1.191823622917539774e-32, 1.379018088205016509e-25, 4.375640088826073184e-20, 8.438464631330991606e-16, 1.838483310261119782e-12, 7.334264181393092650e-10, @@ -1002,7 +1007,9 @@ __constant__ double m_abscissas_double[8][786] = { 3.723397798030112514e+06, 2.793667983952389721e+08, 7.112973790863854188e+10, 8.704037695808749572e+13, 8.001474015782459984e+17, 9.804091819390540578e+22, 3.342777673392873288e+29, 8.160092668471508447e+37, 4.798775331663586528e+48, 3.228614320248853938e+62, 1.836986041572136151e+80, 1.153145986877483804e+103, - 2.160972586723647751e+132, }, + 2.160972586723647751e+132, }; + +__constant__ double m_abscissas_double_5[98] = { 4.825077401709435655e-153, 3.813781211050297560e-135, 2.377824349780240844e-119, 2.065817295388293122e-105, 4.132105770181358886e-93, 2.963965169989404311e-82, 1.127296662046635391e-72, 3.210346399945695041e-64, 9.282992368222161062e-57, 3.565977853916619677e-50, 2.306962519220473637e-44, 3.098751038516535098e-39, @@ -1027,7 +1034,9 @@ __constant__ double m_abscissas_double[8][786] = { 5.430494850258846715e+27, 2.683747612498502676e+31, 4.114885708325522701e+35, 2.276004816861421600e+40, 5.387544917595833246e+45, 6.623575732955432303e+51, 5.266881304835239338e+58, 3.473234812654772210e+66, 2.517492645985977377e+75, 2.759797646289240629e+85, 6.569603829502412077e+96, 5.116181648220647995e+109, - 2.073901892339407423e+124, 7.406462446666255838e+140, }, + 2.073901892339407423e+124, 7.406462446666255838e+140, }; + +__constant__ double m_abscissas_double_6[196] = { 7.053618140948655098e-158, 2.343354218558056628e-148, 2.062509087689351439e-139, 5.212388628332260488e-131, 4.079380320868843387e-123, 1.061481285006738214e-115, 9.816727607793017691e-109, 3.435400719609722581e-102, 4.825198574681495574e-96, 2.874760995089533358e-90, 7.652499977338879996e-85, 9.556944498127119032e-80, @@ -1076,7 +1085,9 @@ __constant__ double m_abscissas_double[8][786] = { 8.217834961057481281e+56, 3.852117991896536784e+60, 3.114452310394384063e+64, 4.498555465873245751e+68, 1.205113215232800796e+73, 6.230864727145221322e+77, 6.487131248948465269e+82, 1.422810109167834249e+88, 6.897656089181724717e+93, 7.779163462756485195e+99, 2.155213251859555072e+106, 1.554347160152705281e+113, - 3.103875072425192272e+120, 1.832673821557018634e+128, 3.431285951865278376e+136, 2.194542081542393530e+145, }, + 3.103875072425192272e+120, 1.832673821557018634e+128, 3.431285951865278376e+136, 2.194542081542393530e+145, }; + +__constant__ double m_abscissas_double_7[393] = { 2.363803632659058081e-160, 1.926835442612677686e-155, 1.109114905180506786e-150, 4.556759282087534164e-146, 1.350172241067816232e-141, 2.914359263635229435e-137, 4.627545976953585825e-133, 5.456508344460398758e-129, 4.821828861306345485e-125, 3.221779152402086241e-121, 1.641732102111619421e-117, 6.433569189921227126e-114, @@ -1175,7 +1186,9 @@ __constant__ double m_abscissas_double[8][786] = { 4.840699137490951163e+104, 1.018669397739170369e+108, 2.733025017438095928e+111, 9.420797277586029837e+114, 4.205525105722885986e+118, 2.451352708852151939e+122, 1.881577053794165543e+126, 1.918506219134233785e+130, 2.622069659115564900e+134, 4.848463485415763756e+138, 1.224645005481997780e+143, 4.267387286482591954e+147, - 2.072505613372582377e+152, }, + 2.072505613372582377e+152, }; + +__constant__ double m_abscissas_double_8[786] = { 1.323228129684237783e-161, 4.129002973520822791e-159, 1.178655462569548882e-156, 3.082189008893206231e-154, 7.393542832199414487e-152, 1.629100644355328639e-149, 3.301545529059822941e-147, 6.162031390854241227e-145, 1.060528194470986309e-142, 1.685225757497235089e-140, 2.475534097582263629e-138, 3.365764749507587192e-136, @@ -1372,23 +1385,40 @@ __constant__ double m_abscissas_double[8][786] = { 1.958084751118243323e+125, 1.840431913109305657e+127, 1.858143260692831108e+129, 2.017432949655777136e+131, 2.358177615888101494e+133, 2.971092974178603610e+135, 4.039532321435816302e+137, 5.933923069661132195e+139, 9.429263693444953240e+141, 1.622841456932873872e+144, 3.028884476067694180e+146, 6.138356175015339477e+148, - 1.352531557191942648e+151, 3.244447362295582945e+153, }, - }; -__constant__ double m_weights_double[8][786] = { + 1.352531557191942648e+151, 3.244447362295582945e+153, }; + +__constant__ double* m_abscissas_double[8] = { + m_abscissas_double_1, + m_abscissas_double_2, + m_abscissas_double_3, + m_abscissas_double_4, + m_abscissas_double_5, + m_abscissas_double_6, + m_abscissas_double_7, + m_abscissas_double_8, +}; + +__constant__ double m_weights_double_1[13] = { 2.703640234162693583e-160, 3.100862940179668765e-58, 5.828334625665462970e-21, 1.628894422402653830e-07, 8.129907377394029252e-03, 2.851214447180802931e-01, 1.228894002317118650e+00, 9.374610761705565881e+00, 6.136846875218162167e+02, 8.367995944653844271e+06, 2.286032371256753845e+17, 9.029964022492184559e+44, - 1.637973037681055808e+119, }, + 1.637973037681055808e+119, }; + +__constant__ double m_weights_double_2[12] = { 1.029757744225565290e-96, 5.564174008086804112e-35, 1.534846576427062716e-12, 1.519539651119905182e-04, 7.878691652861874032e-02, 6.288072016384128612e-01, 2.842403831496369386e+00, 5.152309209026500589e+01, - 2.554172947873109927e+04, 8.291547503290989754e+10, 6.794911791960761587e+27, 1.108995159102362663e+73, }, + 2.554172947873109927e+04, 8.291547503290989754e+10, 6.794911791960761587e+27, 1.108995159102362663e+73, }; + +__constant__ double m_weights_double_3[25] = { 1.545310485347377408e-124, 4.549745016271158113e-75, 3.781189989988588481e-45, 4.369440793304363176e-27, 3.253896178006708087e-16, 1.057239289288944987e-09, 7.826174663495492476e-06, 1.459783224353939263e-03, 2.972970552567852420e-02, 1.637950661613330541e-01, 4.392303913269138921e-01, 8.744243777287317807e-01, 1.804759465860974506e+00, 4.894937215283148383e+00, 2.036214502429748943e+01, 1.576549789679037479e+02, 3.249553828744194733e+03, 3.335686029489862584e+05, 4.858218914917275532e+08, 5.655171002571584464e+13, 9.084276291356790926e+21, 2.202757570781655071e+35, 1.851176020895552142e+57, 1.873046373612647920e+93, - 3.113183070605141140e+152, }, + 3.113183070605141140e+152, }; + +__constant__ double m_weights_double_4[49] = { 2.690380169654157101e-141, 9.388760099830475385e-110, 3.267856956418766261e-85, 4.012903562780032075e-66, 2.794595941054873674e-51, 9.598140333687791635e-40, 8.762766371925782803e-31, 7.896919977115783593e-24, 1.951680620313826776e-18, 2.931867534349928041e-14, 4.976350908135118762e-11, 1.546933241860617074e-08, @@ -1401,7 +1431,9 @@ __constant__ double m_weights_double[8][786] = { 5.663834603448267056e+07, 5.450828629396188577e+09, 1.780881993484818221e+12, 2.797112703281894578e+15, 3.300887168363313931e+19, 5.192538272313512016e+24, 2.273085973059979872e+31, 7.124498195222272142e+39, 5.379592741425673874e+50, 4.647296508337283075e+64, 3.395147156494395571e+82, 2.736576372417856435e+105, - 6.584825756536212781e+134, }, + 6.584825756536212781e+134, }; + +__constant__ double m_weights_double_5[98] = { 1.692276285171240629e-150, 1.180420021590838281e-132, 6.494931071412232065e-117, 4.979673804239645358e-103, 8.790122245397054202e-91, 5.564311726870413043e-80, 1.867634664877268411e-70, 4.693767384843440310e-62, 1.197772698674604837e-54, 4.060530886983702887e-48, 2.318268710612758367e-42, 2.748088060676949794e-37, @@ -1426,7 +1458,9 @@ __constant__ double m_weights_double[8][786] = { 3.469062187981719410e+29, 1.942614547946028081e+33, 3.375034694941022784e+37, 2.115298406181711256e+42, 5.673738540911562268e+47, 7.904099301170483654e+53, 7.121903115084356741e+60, 5.321820777644930491e+68, 4.370977753639010591e+77, 5.429657931755513797e+87, 1.464602226824232950e+99, 1.292445035662836561e+112, - 5.936633203060705474e+126, 2.402419924621336913e+143, }, + 5.936633203060705474e+126, 2.402419924621336913e+143, }; + +__constant__ double m_weights_double_6[196] = { 2.552410363565288863e-155, 7.965872719315690060e-146, 6.586401422963018216e-137, 1.563673437419490296e-128, 1.149636272392214573e-120, 2.810189759625314580e-113, 2.441446149780773329e-106, 8.026292508555041710e-100, 1.059034284623927886e-93, 5.927259046205893861e-88, 1.482220909125121967e-82, 1.738946448501809732e-77, @@ -1475,7 +1509,9 @@ __constant__ double m_weights_double[8][786] = { 1.077033440153993124e+59, 5.374188883861674378e+62, 4.625267105826449467e+66, 7.111646979020385006e+70, 2.027996051444846521e+75, 1.116168784120367146e+80, 1.237019821283735086e+85, 2.888108172342166477e+90, 1.490426937972460544e+96, 1.789306677271856318e+102, 5.276973875344766848e+108, 4.051217867886536330e+115, - 8.611617868168979525e+122, 5.412634353380155695e+130, 1.078756609821147465e+139, 7.344353246966125053e+147, }, + 8.611617868168979525e+122, 5.412634353380155695e+130, 1.078756609821147465e+139, 7.344353246966125053e+147, }; + +__constant__ double m_weights_double_7[393] = { 8.688318611421924613e-158, 6.864317997043424201e-153, 3.829638174036322920e-148, 1.524985558970066863e-143, 4.379527631402474835e-139, 9.162408388991747001e-135, 1.410086556664696347e-130, 1.611529786006329005e-126, 1.380269212504431613e-122, 8.938739565456142404e-119, 4.414803004265274778e-115, 1.676831992534574674e-111, @@ -1574,7 +1610,9 @@ __constant__ double m_weights_double[8][786] = { 1.166855497965918386e+107, 2.533457765534279043e+110, 7.012864641215147208e+113, 2.494083354169569414e+117, 1.148722178881219993e+121, 6.908313932158993510e+124, 5.470912484744367184e+128, 5.755359832684120769e+132, 8.115681923907451939e+136, 1.548304780334447081e+141, 4.034912159113614601e+145, 1.450632759611715526e+150, - 7.268799665580789770e+154, }, + 7.268799665580789770e+154, }; + +__constant__ double m_weights_double_8[786] = { 4.901759085947701448e-159, 1.505832423620814399e-156, 4.231872109262999523e-154, 1.089479701785106001e-151, 2.572922387150651649e-149, 5.581311054334156941e-147, 1.113575900126970040e-144, 2.046165051332286084e-142, 3.466994885004770636e-140, 5.423795404073501922e-138, 7.843833272402847010e-136, 1.049922957933194415e-133, @@ -1771,22 +1809,28 @@ __constant__ double m_weights_double[8][786] = { 5.649062361980019098e+127, 5.393248003523784781e+129, 5.530897191915703916e+131, 6.099598644640894333e+133, 7.242098433491964504e+135, 9.268083053637375570e+137, 1.279942702416040582e+140, 1.909796626960621302e+142, 3.082540300669885040e+144, 5.388809732384179657e+146, 1.021610251056626535e+149, 2.103005440072790650e+151, - 4.706753990348725570e+153, 1.146834128125248991e+156, }, - }; -*/ + 4.706753990348725570e+153, 1.146834128125248991e+156, }; + +__constant__ double* m_weights_double[8] = { + m_weights_double_1, + m_weights_double_2, + m_weights_double_3, + m_weights_double_4, + m_weights_double_5, + m_weights_double_6, + m_weights_double_7, + m_weights_double_8 +}; -// Since we have to use C arrays we can't compensate for the fact that each level has -// a different number of coefficients. -// Store the actual sizes in these size arrays so we don't cruise head first into segfaults. __constant__ boost::math::size_t float_coefficients_size[8] = {9, 8, 16, 33, 66, 132, 263, 527}; __constant__ boost::math::size_t double_coefficients_size[8] = {13, 12, 25, 49, 98, 196, 393, 786}; template -struct CoefficientsSelector; +struct coefficients_selector; template<> -struct CoefficientsSelector +struct coefficients_selector { __device__ static const auto abscissas() { return m_abscissas_float; } __device__ static const auto weights() { return m_weights_float; } @@ -1794,7 +1838,7 @@ struct CoefficientsSelector }; template<> -struct CoefficientsSelector +struct coefficients_selector { __device__ static const auto abscissas() { return m_abscissas_double; } __device__ static const auto weights() { return m_weights_double; } @@ -1820,9 +1864,9 @@ __device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, //std::cout << std::setprecision(5*std::numeric_limits::digits10); // Get the party started with two estimates of the integral: - const auto m_abscissas = CoefficientsSelector::abscissas(); - const auto m_weights = CoefficientsSelector::weights(); - const auto m_size = CoefficientsSelector::size(); + const auto m_abscissas = coefficients_selector::abscissas(); + const auto m_weights = coefficients_selector::weights(); + const auto m_size = coefficients_selector::size(); Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value() }; K I0 = 0; From fad8e6f46ba40aba76266e15e4b52be28378751b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 13:13:18 -0400 Subject: [PATCH 14/22] Remove stray call to std::abs --- include/boost/math/quadrature/exp_sinh.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp index 5f5c486bf..d3148e0c0 100644 --- a/include/boost/math/quadrature/exp_sinh.hpp +++ b/include/boost/math/quadrature/exp_sinh.hpp @@ -165,7 +165,6 @@ __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real { BOOST_MATH_STD_USING constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate"; - using std::abs; if (abs(tolerance) > 1) { return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy()); } From 7651cba7936df958ca9968c6187eb55a9a551e6e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 13:17:11 -0400 Subject: [PATCH 15/22] Add NVRTC testing --- test/nvrtc_jamfile | 4 + test/test_exp_sinh_quad_nvrtc_double.cpp | 206 +++++++++++++++++++++++ test/test_exp_sinh_quad_nvrtc_float.cpp | 206 +++++++++++++++++++++++ 3 files changed, 416 insertions(+) create mode 100644 test/test_exp_sinh_quad_nvrtc_double.cpp create mode 100644 test/test_exp_sinh_quad_nvrtc_float.cpp diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile index 0786564d7..3ef320d97 100644 --- a/test/nvrtc_jamfile +++ b/test/nvrtc_jamfile @@ -9,6 +9,10 @@ project : requirements [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] ; +# Quad +run test_exp_sinh_quad_nvrtc_float.cpp ; +run test_exp_sinh_quad_nvrtc_double.cpp ; + # Distributions run test_arcsine_cdf_nvrtc_double.cpp ; run test_arcsine_cdf_nvrtc_float.cpp ; diff --git a/test/test_exp_sinh_quad_nvrtc_double.cpp b/test/test_exp_sinh_quad_nvrtc_double.cpp new file mode 100644 index 000000000..bfd508092 --- /dev/null +++ b/test/test_exp_sinh_quad_nvrtc_double.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_exp_sinh_quad_nvrtc_float.cpp b/test/test_exp_sinh_quad_nvrtc_float.cpp new file mode 100644 index 000000000..b472e5597 --- /dev/null +++ b/test/test_exp_sinh_quad_nvrtc_float.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_expm1_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::exp_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} From 16c85c0db8dd48c6045a42a200c3d132d6459e32 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 13:49:48 -0400 Subject: [PATCH 16/22] Add documentation section --- doc/quadrature/double_exponential.qbk | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/quadrature/double_exponential.qbk b/doc/quadrature/double_exponential.qbk index b4649adbc..2959b94cd 100644 --- a/doc/quadrature/double_exponential.qbk +++ b/doc/quadrature/double_exponential.qbk @@ -1,5 +1,6 @@ [/ Copyright (c) 2017 Nick Thompson +Copyright (c) 2024 Matt Borland Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t [endsect] [/section:de_caveats Caveats] +[section:gpu_usage GPU Usage] + +`` + #include + + namespace boost{ namespace math{ namespace quadrature { + + template > + __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) + + template > + __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) + +}}} +`` + +Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms. +The major difference is outlined in the above function signatures. +When used on device these are free standing functions instead of using OOP like on the host. +The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed. +An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions. + +[endsect] [/section:gpu_usage Usage] + [section:de_refes References] * Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741. From fcaac308610ed6d87d0a2ad5eb10e81ee63a1fe0 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 14:05:21 -0400 Subject: [PATCH 17/22] Add device function signature for sinh_sinh_integrate --- include/boost/math/quadrature/sinh_sinh.hpp | 33 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/include/boost/math/quadrature/sinh_sinh.hpp b/include/boost/math/quadrature/sinh_sinh.hpp index ed958eb8d..00a363404 100644 --- a/include/boost/math/quadrature/sinh_sinh.hpp +++ b/include/boost/math/quadrature/sinh_sinh.hpp @@ -1,4 +1,5 @@ // Copyright Nick Thompson, 2017 +// Copyright Matt Borland, 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -15,10 +16,17 @@ #ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP #define BOOST_MATH_QUADRATURE_SINH_SINH_HPP +#include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include -#include namespace boost{ namespace math{ namespace quadrature { @@ -40,4 +48,25 @@ class sinh_sinh }; }}} -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA + +namespace boost { +namespace math { +namespace quadrature { + +template > +__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr) +{ + return detail::sinh_sinh_integrate(f, tol, error, L1, levels); +} + +} // namespace quadrature +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP From 3492b098a39d69c0b43998be4f7ad120409a92e5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 14:34:03 -0400 Subject: [PATCH 18/22] Add float coefficients --- .../quadrature/detail/sinh_sinh_detail.hpp | 267 +++++++++++++++++- 1 file changed, 265 insertions(+), 2 deletions(-) diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index a9e1ef493..f0ba05ef0 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -1,4 +1,5 @@ // Copyright Nick Thompson, 2017 +// Copyright Matt Borland, 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -7,6 +8,10 @@ #ifndef BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP #define BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP +#include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include @@ -15,7 +20,6 @@ #include #include #include -#include #ifdef BOOST_MATH_HAS_THREADS #include @@ -485,4 +489,263 @@ void sinh_sinh_detail::init(const std::integral_constant&) #endif }}}} -#endif + +#endif // BOOST_MATH_HAS_NVRTC + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include +#include +#include +#include +#include + +namespace boost { +namespace math { +namespace quadrature { +namespace detail { + +__constant__ float m_abscissas_float_1[4] = + { 3.08828742e+00f, 1.48993185e+02f, 3.41228925e+06f, 2.06932577e+18f, }; + +__constant__ float m_abscissas_float_2[4] = + { 9.13048763e-01f, 1.41578929e+01f, 6.70421552e+03f, 9.64172533e+10f, }; + +__constant__ float m_abscissas_float_3[8] = + { 4.07297690e-01f, 1.68206671e+00f, 6.15089799e+00f, 4.00396235e+01f, 7.92920025e+02f, 1.02984971e+05f, + 3.03862311e+08f, 1.56544547e+14f, }; + +__constant__ float m_abscissas_float_4[16] = + { 1.98135272e-01f, 6.40155674e-01f, 1.24892870e+00f, 2.26608084e+00f, 4.29646270e+00f, 9.13029039e+00f, + 2.31110765e+01f, 7.42770603e+01f, 3.26720921e+02f, 2.15948569e+03f, 2.41501526e+04f, 5.31819400e+05f, + 2.80058686e+07f, 4.52406508e+09f, 3.08561257e+12f, 1.33882673e+16f, }; + +__constant__ float m_abscissas_float_5[32] = + { 9.83967894e-02f, 3.00605618e-01f, 5.19857979e-01f, 7.70362083e-01f, 1.07131137e+00f, 1.45056976e+00f, + 1.95077855e+00f, 2.64003177e+00f, 3.63137237e+00f, 5.11991533e+00f, 7.45666098e+00f, 1.13022613e+01f, + 1.79641069e+01f, 3.01781070e+01f, 5.40387580e+01f, 1.04107731e+02f, 2.18029520e+02f, 5.02155699e+02f, + 1.28862131e+03f, 3.73921687e+03f, 1.24750730e+04f, 4.87639975e+04f, 2.28145658e+05f, 1.30877796e+06f, + 9.46084663e+06f, 8.88883120e+07f, 1.12416883e+09f, 1.99127673e+10f, 5.16743469e+11f, 2.06721881e+13f, + 1.35061503e+15f, 1.53854066e+17f, }; + +__constant__ float m_abscissas_float_6[65] = + { 4.91151004e-02f, 1.48013150e-01f, 2.48938814e-01f, 3.53325424e-01f, 4.62733557e-01f, 5.78912068e-01f, + 7.03870253e-01f, 8.39965859e-01f, 9.90015066e-01f, 1.15743257e+00f, 1.34641276e+00f, 1.56216711e+00f, + 1.81123885e+00f, 2.10192442e+00f, 2.44484389e+00f, 2.85372075e+00f, 3.34645891e+00f, 3.94664582e+00f, + 4.68567310e+00f, 5.60576223e+00f, 6.76433234e+00f, 8.24038318e+00f, 1.01439436e+01f, 1.26302471e+01f, + 1.59213040e+01f, 2.03392186e+01f, 2.63584645e+01f, 3.46892633e+01f, 4.64129147e+01f, 6.32055079e+01f, + 8.77149726e+01f, 1.24209693e+02f, 1.79718635e+02f, 2.66081728e+02f, 4.03727303e+02f, 6.28811307e+02f, + 1.00707984e+03f, 1.66156823e+03f, 2.82965144e+03f, 4.98438627e+03f, 9.10154693e+03f, 1.72689266e+04f, + 3.41309958e+04f, 7.04566898e+04f, 1.52340422e+05f, 3.46047978e+05f, 8.28472421e+05f, 2.09759615e+06f, + 5.63695080e+06f, 1.61407141e+07f, 4.94473068e+07f, 1.62781052e+08f, 5.78533297e+08f, 2.23083854e+09f, + 9.38239131e+09f, 4.32814954e+10f, 2.20307274e+11f, 1.24524507e+12f, 7.86900053e+12f, 5.59953143e+13f, + 4.52148695e+14f, 4.17688952e+15f, 4.45286776e+16f, 5.52914285e+17f, 8.07573252e+18f, }; + +__constant__ float m_abscissas_float_7[129] = + { 2.45471558e-02f, 7.37246687e-02f, 1.23152531e-01f, 1.73000138e-01f, 2.23440665e-01f, 2.74652655e-01f, + 3.26821679e-01f, 3.80142101e-01f, 4.34818964e-01f, 4.91070037e-01f, 5.49128046e-01f, 6.09243132e-01f, + 6.71685571e-01f, 7.36748805e-01f, 8.04752842e-01f, 8.76048080e-01f, 9.51019635e-01f, 1.03009224e+00f, + 1.11373586e+00f, 1.20247203e+00f, 1.29688123e+00f, 1.39761124e+00f, 1.50538689e+00f, 1.62102121e+00f, + 1.74542840e+00f, 1.87963895e+00f, 2.02481711e+00f, 2.18228138e+00f, 2.35352849e+00f, 2.54026147e+00f, + 2.74442267e+00f, 2.96823279e+00f, 3.21423687e+00f, 3.48535896e+00f, 3.78496698e+00f, 4.11695014e+00f, + 4.48581137e+00f, 4.89677825e+00f, 5.35593629e+00f, 5.87038976e+00f, 6.44845619e+00f, 7.09990245e+00f, + 7.83623225e+00f, 8.67103729e+00f, 9.62042778e+00f, 1.07035620e+01f, 1.19433001e+01f, 1.33670142e+01f, + 1.50075962e+01f, 1.69047155e+01f, 1.91063967e+01f, 2.16710044e+01f, 2.46697527e+01f, 2.81898903e+01f, + 3.23387613e+01f, 3.72490076e+01f, 4.30852608e+01f, 5.00527965e+01f, 5.84087761e+01f, 6.84769282e+01f, + 8.06668178e+01f, 9.54992727e+01f, 1.13640120e+02f, 1.35945194e+02f, 1.63520745e+02f, 1.97804969e+02f, + 2.40678754e+02f, 2.94617029e+02f, 3.62896953e+02f, 4.49886178e+02f, 5.61444735e+02f, 7.05489247e+02f, + 8.92790773e+02f, 1.13811142e+03f, 1.46183599e+03f, 1.89233262e+03f, 2.46939604e+03f, 3.24931157e+03f, + 4.31236711e+03f, 5.77409475e+03f, 7.80224724e+03f, 1.06426753e+04f, 1.46591538e+04f, 2.03952854e+04f, + 2.86717062e+04f, 4.07403376e+04f, 5.85318231e+04f, 8.50568927e+04f, 1.25064927e+05f, 1.86137394e+05f, + 2.80525578e+05f, 4.28278249e+05f, 6.62634051e+05f, 1.03944324e+06f, 1.65385743e+06f, 2.67031565e+06f, + 4.37721203e+06f, 7.28807171e+06f, 1.23317299e+07f, 2.12155729e+07f, 3.71308625e+07f, 6.61457938e+07f, + 1.20005529e+08f, 2.21862941e+08f, 4.18228294e+08f, 8.04370413e+08f, 1.57939299e+09f, 3.16812242e+09f, + 6.49660681e+09f, 1.36285199e+10f, 2.92686390e+10f, 6.43979867e+10f, 1.45275523e+11f, 3.36285446e+11f, + 7.99420279e+11f, 1.95326423e+12f, 4.90958187e+12f, 1.27062273e+13f, 3.38907099e+13f, 9.32508403e+13f, + 2.64948942e+14f, 7.78129518e+14f, 2.36471505e+15f, 7.44413803e+15f, 2.43021724e+16f, 8.23706864e+16f, + 2.90211705e+17f, 1.06415768e+18f, 4.06627711e+18f, }; + +__constant__ float m_abscissas_float_8[259] = + { 1.22722792e-02f, 3.68272289e-02f, 6.14133763e-02f, 8.60515971e-02f, 1.10762884e-01f, 1.35568393e-01f, + 1.60489494e-01f, 1.85547813e-01f, 2.10765290e-01f, 2.36164222e-01f, 2.61767321e-01f, 2.87597761e-01f, + 3.13679240e-01f, 3.40036029e-01f, 3.66693040e-01f, 3.93675878e-01f, 4.21010910e-01f, 4.48725333e-01f, + 4.76847237e-01f, 5.05405685e-01f, 5.34430786e-01f, 5.63953775e-01f, 5.94007101e-01f, 6.24624511e-01f, + 6.55841151e-01f, 6.87693662e-01f, 7.20220285e-01f, 7.53460977e-01f, 7.87457528e-01f, 8.22253686e-01f, + 8.57895297e-01f, 8.94430441e-01f, 9.31909591e-01f, 9.70385775e-01f, 1.00991475e+00f, 1.05055518e+00f, + 1.09236885e+00f, 1.13542087e+00f, 1.17977990e+00f, 1.22551840e+00f, 1.27271289e+00f, 1.32144424e+00f, + 1.37179794e+00f, 1.42386447e+00f, 1.47773961e+00f, 1.53352485e+00f, 1.59132774e+00f, 1.65126241e+00f, + 1.71344993e+00f, 1.77801893e+00f, 1.84510605e+00f, 1.91485658e+00f, 1.98742510e+00f, 2.06297613e+00f, + 2.14168493e+00f, 2.22373826e+00f, 2.30933526e+00f, 2.39868843e+00f, 2.49202464e+00f, 2.58958621e+00f, + 2.69163219e+00f, 2.79843963e+00f, 2.91030501e+00f, 3.02754584e+00f, 3.15050230e+00f, 3.27953915e+00f, + 3.41504770e+00f, 3.55744805e+00f, 3.70719145e+00f, 3.86476298e+00f, 4.03068439e+00f, 4.20551725e+00f, + 4.38986641e+00f, 4.58438376e+00f, 4.78977239e+00f, 5.00679110e+00f, 5.23625945e+00f, 5.47906320e+00f, + 5.73616037e+00f, 6.00858792e+00f, 6.29746901e+00f, 6.60402117e+00f, 6.92956515e+00f, 7.27553483e+00f, + 7.64348809e+00f, 8.03511888e+00f, 8.45227058e+00f, 8.89695079e+00f, 9.37134780e+00f, 9.87784877e+00f, + 1.04190601e+01f, 1.09978298e+01f, 1.16172728e+01f, 1.22807990e+01f, 1.29921443e+01f, 1.37554055e+01f, + 1.45750793e+01f, 1.54561061e+01f, 1.64039187e+01f, 1.74244972e+01f, 1.85244301e+01f, 1.97109839e+01f, + 2.09921804e+01f, 2.23768845e+01f, 2.38749023e+01f, 2.54970927e+01f, 2.72554930e+01f, 2.91634608e+01f, + 3.12358351e+01f, 3.34891185e+01f, 3.59416839e+01f, 3.86140099e+01f, 4.15289481e+01f, 4.47120276e+01f, + 4.81918020e+01f, 5.20002465e+01f, 5.61732106e+01f, 6.07509371e+01f, 6.57786566e+01f, 7.13072704e+01f, + 7.73941341e+01f, 8.41039609e+01f, 9.15098607e+01f, 9.96945411e+01f, 1.08751694e+02f, 1.18787600e+02f, + 1.29922990e+02f, 1.42295202e+02f, 1.56060691e+02f, 1.71397955e+02f, 1.88510933e+02f, 2.07632988e+02f, + 2.29031559e+02f, 2.53013612e+02f, 2.79932028e+02f, 3.10193130e+02f, 3.44265522e+02f, 3.82690530e+02f, + 4.26094527e+02f, 4.75203518e+02f, 5.30860437e+02f, 5.94045681e+02f, 6.65901543e+02f, 7.47761337e+02f, + 8.41184173e+02f, 9.47996570e+02f, 1.07034233e+03f, 1.21074246e+03f, 1.37216724e+03f, 1.55812321e+03f, + 1.77275819e+03f, 2.02098849e+03f, 2.30865326e+03f, 2.64270219e+03f, 3.03142418e+03f, 3.48472668e+03f, + 4.01447750e+03f, 4.63492426e+03f, 5.36320995e+03f, 6.22000841e+03f, 7.23030933e+03f, 8.42439022e+03f, + 9.83902287e+03f, 1.15189746e+04f, 1.35188810e+04f, 1.59055875e+04f, 1.87610857e+04f, 2.21862046e+04f, + 2.63052621e+04f, 3.12719440e+04f, 3.72767546e+04f, 4.45564828e+04f, 5.34062659e+04f, 6.41950058e+04f, + 7.73851264e+04f, 9.35579699e+04f, 1.13446538e+05f, 1.37977827e+05f, 1.68327749e+05f, 2.05992575e+05f, + 2.52882202e+05f, 3.11442272e+05f, 3.84814591e+05f, 4.77048586e+05f, 5.93380932e+05f, 7.40606619e+05f, + 9.27573047e+05f, 1.16584026e+06f, 1.47056632e+06f, 1.86169890e+06f, 2.36558487e+06f, 3.01715270e+06f, + 3.86288257e+06f, 4.96486431e+06f, 6.40636283e+06f, 8.29948185e+06f, 1.07957589e+07f, 1.41008733e+07f, + 1.84951472e+07f, 2.43622442e+07f, 3.22295113e+07f, 4.28249388e+07f, 5.71579339e+07f, 7.66343793e+07f, + 1.03221273e+08f, 1.39683399e+08f, 1.89925150e+08f, 2.59486540e+08f, 3.56266474e+08f, 4.91582541e+08f, + 6.81731647e+08f, 9.50299811e+08f, 1.33159830e+09f, 1.87580198e+09f, 2.65667391e+09f, 3.78324022e+09f, + 5.41753185e+09f, 7.80169537e+09f, 1.12996537e+10f, 1.64614916e+10f, 2.41235400e+10f, 3.55648690e+10f, + 5.27534501e+10f, 7.87357211e+10f, 1.18256902e+11f, 1.78754944e+11f, 2.71963306e+11f, 4.16512215e+11f, + 6.42178186e+11f, 9.96872550e+11f, 1.55821233e+12f, 2.45280998e+12f, 3.88865623e+12f, 6.20986899e+12f, + 9.98992422e+12f, 1.61915800e+13f, 2.64432452e+13f, 4.35201885e+13f, 7.21888469e+13f, 1.20699764e+14f, + 2.03448372e+14f, 3.45755310e+14f, 5.92524851e+14f, 1.02405779e+15f, 1.78517405e+15f, 3.13930699e+15f, + 5.56985627e+15f, 9.97176335e+15f, 1.80168749e+16f, 3.28570986e+16f, 6.04901854e+16f, 1.12437528e+17f, + 2.11044513e+17f, 4.00073701e+17f, 7.66084936e+17f, 1.48201877e+18f, 2.89694543e+18f, 5.72279017e+18f, + 1.14268996e+19f, }; + +__constant__ float* m_abscissas_float[8] = { + m_abscissas_float_1, + m_abscissas_float_2, + m_abscissas_float_3, + m_abscissas_float_4, + m_abscissas_float_5, + m_abscissas_float_6, + m_abscissas_float_7, + m_abscissas_float_8, +}; + +__constant__ float m_weights_float_1[4] = + { 7.86824160e+00f, 8.80516388e+02f, 5.39627832e+07f, 8.87651190e+19f, }; + +__constant__ float m_weights_float_2[4] = + { 2.39852428e+00f, 5.24459642e+01f, 6.45788782e+04f, 2.50998524e+12f, }; + +__constant__ float m_weights_float_3[8] = + { 1.74936958e+00f, 3.97965898e+00f, 1.84851460e+01f, 1.86488072e+02f, 5.97420570e+03f, 1.27041264e+06f, + 6.16419301e+09f, 5.23085003e+15f, }; + +__constant__ float m_weights_float_4[16] = + { 1.61385906e+00f, 1.99776729e+00f, 3.02023198e+00f, 5.47764184e+00f, 1.17966092e+01f, 3.03550485e+01f, + 9.58442179e+01f, 3.89387024e+02f, 2.17919325e+03f, 1.83920812e+04f, 2.63212061e+05f, 7.42729651e+06f, + 5.01587565e+08f, 1.03961087e+11f, 9.10032891e+13f, 5.06865116e+17f, }; + +__constant__ float m_weights_float_5[32] = + { 1.58146596e+00f, 1.66914991e+00f, 1.85752319e+00f, 2.17566262e+00f, 2.67590138e+00f, 3.44773868e+00f, + 4.64394654e+00f, 6.53020450e+00f, 9.58228502e+00f, 1.46836141e+01f, 2.35444955e+01f, 3.96352727e+01f, + 7.03763521e+01f, 1.32588012e+02f, 2.66962565e+02f, 5.79374920e+02f, 1.36869193e+03f, 3.55943572e+03f, + 1.03218668e+04f, 3.38662130e+04f, 1.27816626e+05f, 5.65408251e+05f, 2.99446204e+06f, 1.94497502e+07f, + 1.59219301e+08f, 1.69428882e+09f, 2.42715618e+10f, 4.87031785e+11f, 1.43181966e+13f, 6.48947152e+14f, + 4.80375775e+16f, 6.20009636e+18f, }; + +__constant__ float m_weights_float_6[65] = + { 1.57345777e+00f, 1.59489276e+00f, 1.63853652e+00f, 1.70598041e+00f, 1.79972439e+00f, 1.92332285e+00f, + 2.08159737e+00f, 2.28093488e+00f, 2.52969785e+00f, 2.83878478e+00f, 3.22239575e+00f, 3.69908136e+00f, + 4.29318827e+00f, 5.03686536e+00f, 5.97287114e+00f, 7.15853842e+00f, 8.67142780e+00f, 1.06174736e+01f, + 1.31428500e+01f, 1.64514563e+01f, 2.08309945e+01f, 2.66923599e+01f, 3.46299351e+01f, 4.55151836e+01f, + 6.06440809e+01f, 8.19729692e+01f, 1.12502047e+02f, 1.56909655e+02f, 2.22620435e+02f, 3.21638549e+02f, + 4.73757451e+02f, 7.12299455e+02f, 1.09460965e+03f, 1.72169779e+03f, 2.77592491e+03f, 4.59523007e+03f, + 7.82342759e+03f, 1.37235744e+04f, 2.48518896e+04f, 4.65553875e+04f, 9.04176678e+04f, 1.82484396e+05f, + 3.83680026e+05f, 8.42627197e+05f, 1.93843257e+06f, 4.68511285e+06f, 1.19352867e+07f, 3.21564375e+07f, + 9.19600893e+07f, 2.80222318e+08f, 9.13611083e+08f, 3.20091090e+09f, 1.21076526e+10f, 4.96902475e+10f, + 2.22431575e+11f, 1.09212534e+12f, 5.91688298e+12f, 3.55974344e+13f, 2.39435365e+14f, 1.81355107e+15f, + 1.55873671e+16f, 1.53271488e+17f, 1.73927478e+18f, 2.29884122e+19f, 3.57403070e+20f, }; + +__constant__ float m_weights_float_7[129] = + { 1.57146132e+00f, 1.57679017e+00f, 1.58749564e+00f, 1.60367396e+00f, 1.62547113e+00f, 1.65308501e+00f, + 1.68676814e+00f, 1.72683132e+00f, 1.77364814e+00f, 1.82766042e+00f, 1.88938482e+00f, 1.95942057e+00f, + 2.03845873e+00f, 2.12729290e+00f, 2.22683194e+00f, 2.33811466e+00f, 2.46232715e+00f, 2.60082286e+00f, + 2.75514621e+00f, 2.92706011e+00f, 3.11857817e+00f, 3.33200254e+00f, 3.56996830e+00f, 3.83549565e+00f, + 4.13205150e+00f, 4.46362211e+00f, 4.83479919e+00f, 5.25088196e+00f, 5.71799849e+00f, 6.24325042e+00f, + 6.83488580e+00f, 7.50250620e+00f, 8.25731548e+00f, 9.11241941e+00f, 1.00831875e+01f, 1.11876913e+01f, + 1.24472371e+01f, 1.38870139e+01f, 1.55368872e+01f, 1.74323700e+01f, 1.96158189e+01f, 2.21379089e+01f, + 2.50594593e+01f, 2.84537038e+01f, 3.24091185e+01f, 3.70329629e+01f, 4.24557264e+01f, 4.88367348e+01f, + 5.63712464e+01f, 6.52994709e+01f, 7.59180776e+01f, 8.85949425e+01f, 1.03788130e+02f, 1.22070426e+02f, + 1.44161210e+02f, 1.70968019e+02f, 2.03641059e+02f, 2.43645006e+02f, 2.92854081e+02f, 3.53678602e+02f, + 4.29234308e+02f, 5.23570184e+02f, 6.41976690e+02f, 7.91405208e+02f, 9.81042209e+02f, 1.22309999e+03f, + 1.53391256e+03f, 1.93546401e+03f, 2.45753455e+03f, 3.14073373e+03f, 4.04081819e+03f, 5.23488160e+03f, + 6.83029446e+03f, 8.97771323e+03f, 1.18901592e+04f, 1.58712239e+04f, 2.13571111e+04f, 2.89798371e+04f, + 3.96630673e+04f, 5.47687519e+04f, 7.63235654e+04f, 1.07371915e+05f, 1.52531667e+05f, 2.18877843e+05f, + 3.17362450e+05f, 4.65120153e+05f, 6.89253766e+05f, 1.03311989e+06f, 1.56688798e+06f, 2.40549203e+06f, + 3.73952896e+06f, 5.88912115e+06f, 9.39904635e+06f, 1.52090328e+07f, 2.49628719e+07f, 4.15775926e+07f, + 7.03070537e+07f, 1.20759856e+08f, 2.10788251e+08f, 3.74104720e+08f, 6.75449459e+08f, 1.24131674e+09f, + 2.32331003e+09f, 4.43117602e+09f, 8.61744649e+09f, 1.70983691e+10f, 3.46357452e+10f, 7.16760712e+10f, + 1.51634762e+11f, 3.28172932e+11f, 7.27110260e+11f, 1.65049955e+12f, 3.84133815e+12f, 9.17374427e+12f, + 2.24990195e+13f, 5.67153509e+13f, 1.47074225e+14f, 3.92701252e+14f, 1.08063998e+15f, 3.06767147e+15f, + 8.99238679e+15f, 2.72472254e+16f, 8.54294612e+16f, 2.77461372e+17f, 9.34529948e+17f, 3.26799612e+18f, + 1.18791443e+19f, 4.49405341e+19f, 1.77170665e+20f, }; + +__constant__ float m_weights_float_8[259] = + { 1.57096255e+00f, 1.57229290e+00f, 1.57495658e+00f, 1.57895955e+00f, 1.58431079e+00f, 1.59102230e+00f, + 1.59910918e+00f, 1.60858966e+00f, 1.61948515e+00f, 1.63182037e+00f, 1.64562338e+00f, 1.66092569e+00f, + 1.67776241e+00f, 1.69617233e+00f, 1.71619809e+00f, 1.73788633e+00f, 1.76128784e+00f, 1.78645779e+00f, + 1.81345587e+00f, 1.84234658e+00f, 1.87319943e+00f, 1.90608922e+00f, 1.94109632e+00f, 1.97830698e+00f, + 2.01781368e+00f, 2.05971547e+00f, 2.10411838e+00f, 2.15113585e+00f, 2.20088916e+00f, 2.25350798e+00f, + 2.30913084e+00f, 2.36790578e+00f, 2.42999091e+00f, 2.49555516e+00f, 2.56477893e+00f, 2.63785496e+00f, + 2.71498915e+00f, 2.79640147e+00f, 2.88232702e+00f, 2.97301705e+00f, 3.06874019e+00f, 3.16978367e+00f, + 3.27645477e+00f, 3.38908227e+00f, 3.50801806e+00f, 3.63363896e+00f, 3.76634859e+00f, 3.90657947e+00f, + 4.05479525e+00f, 4.21149322e+00f, 4.37720695e+00f, 4.55250922e+00f, 4.73801517e+00f, 4.93438579e+00f, + 5.14233166e+00f, 5.36261713e+00f, 5.59606472e+00f, 5.84356014e+00f, 6.10605759e+00f, 6.38458564e+00f, + 6.68025373e+00f, 6.99425915e+00f, 7.32789480e+00f, 7.68255767e+00f, 8.05975815e+00f, 8.46113023e+00f, + 8.88844279e+00f, 9.34361190e+00f, 9.82871448e+00f, 1.03460033e+01f, 1.08979234e+01f, 1.14871305e+01f, + 1.21165112e+01f, 1.27892047e+01f, 1.35086281e+01f, 1.42785033e+01f, 1.51028871e+01f, 1.59862046e+01f, + 1.69332867e+01f, 1.79494108e+01f, 1.90403465e+01f, 2.02124072e+01f, 2.14725057e+01f, 2.28282181e+01f, + 2.42878539e+01f, 2.58605342e+01f, 2.75562800e+01f, 2.93861096e+01f, 3.13621485e+01f, 3.34977526e+01f, + 3.58076454e+01f, 3.83080730e+01f, 4.10169773e+01f, 4.39541917e+01f, 4.71416602e+01f, 5.06036855e+01f, + 5.43672075e+01f, 5.84621188e+01f, 6.29216205e+01f, 6.77826252e+01f, 7.30862125e+01f, 7.88781469e+01f, + 8.52094636e+01f, 9.21371360e+01f, 9.97248336e+01f, 1.08043785e+02f, 1.17173764e+02f, 1.27204209e+02f, + 1.38235512e+02f, 1.50380485e+02f, 1.63766039e+02f, 1.78535118e+02f, 1.94848913e+02f, 2.12889407e+02f, + 2.32862309e+02f, 2.55000432e+02f, 2.79567594e+02f, 3.06863126e+02f, 3.37227087e+02f, 3.71046310e+02f, + 4.08761417e+02f, 4.50874968e+02f, 4.97960949e+02f, 5.50675821e+02f, 6.09771424e+02f, 6.76110054e+02f, + 7.50682104e+02f, 8.34626760e+02f, 9.29256285e+02f, 1.03608458e+03f, 1.15686082e+03f, 1.29360914e+03f, + 1.44867552e+03f, 1.62478326e+03f, 1.82509876e+03f, 2.05330964e+03f, 2.31371761e+03f, 2.61134924e+03f, + 2.95208799e+03f, 3.34283233e+03f, 3.79168493e+03f, 4.30817984e+03f, 4.90355562e+03f, 5.59108434e+03f, + 6.38646863e+03f, 7.30832183e+03f, 8.37874981e+03f, 9.62405722e+03f, 1.10756067e+04f, 1.27708661e+04f, + 1.47546879e+04f, 1.70808754e+04f, 1.98141031e+04f, 2.30322789e+04f, 2.68294532e+04f, 3.13194118e+04f, + 3.66401221e+04f, 4.29592484e+04f, 5.04810088e+04f, 5.94547213e+04f, 7.01854788e+04f, 8.30475173e+04f, + 9.85009981e+04f, 1.17113127e+05f, 1.39584798e+05f, 1.66784302e+05f, 1.99790063e+05f, 2.39944995e+05f, + 2.88925794e+05f, 3.48831531e+05f, 4.22297220e+05f, 5.12639825e+05f, 6.24046488e+05f, 7.61817907e+05f, + 9.32683930e+05f, 1.14521401e+06f, 1.41035265e+06f, 1.74212004e+06f, 2.15853172e+06f, 2.68280941e+06f, + 3.34498056e+06f, 4.18399797e+06f, 5.25055801e+06f, 6.61086017e+06f, 8.35163942e+06f, 1.05869253e+07f, + 1.34671524e+07f, 1.71914827e+07f, 2.20245345e+07f, 2.83191730e+07f, 3.65476782e+07f, 4.73445266e+07f, + 6.15653406e+07f, 8.03684303e+07f, 1.05328028e+08f, 1.38592169e+08f, 1.83103699e+08f, 2.42910946e+08f, + 3.23606239e+08f, 4.32947522e+08f, 5.81743297e+08f, 7.85117979e+08f, 1.06432920e+09f, 1.44938958e+09f, + 1.98286647e+09f, 2.72541431e+09f, 3.76386796e+09f, 5.22313881e+09f, 7.28378581e+09f, 1.02080964e+10f, + 1.43789932e+10f, 2.03583681e+10f, 2.89749983e+10f, 4.14577375e+10f, 5.96383768e+10f, 8.62622848e+10f, + 1.25466705e+11f, 1.83521298e+11f, 2.69981221e+11f, 3.99492845e+11f, 5.94638056e+11f, 8.90440997e+11f, + 1.34155194e+12f, 2.03376855e+12f, 3.10262796e+12f, 4.76359832e+12f, 7.36142036e+12f, 1.14512696e+13f, + 1.79331419e+13f, 2.82758550e+13f, 4.48929705e+13f, 7.17780287e+13f, 1.15585510e+14f, 1.87483389e+14f, + 3.06351036e+14f, 5.04340065e+14f, 8.36616340e+14f, 1.39855635e+15f, 2.35633575e+15f, 4.00176517e+15f, + 6.85137513e+15f, 1.18269011e+16f, 2.05867353e+16f, 3.61396878e+16f, 6.39911218e+16f, 1.14301619e+17f, + 2.05988138e+17f, 3.74584679e+17f, 6.87444303e+17f, 1.27340764e+18f, 2.38124192e+18f, 4.49583562e+18f, + 8.57144202e+18f, 1.65044358e+19f, 3.21010035e+19f, 6.30778012e+19f, 1.25240403e+20f, 2.51300530e+20f, + 5.09677626e+20f, }; + +__constant__ float* m_weights_float[8] = { + m_weights_float_1, + m_weights_float_2, + m_weights_float_3, + m_weights_float_4, + m_weights_float_5, + m_weights_float_6, + m_weights_float_7, + m_weights_float_8 +}; + + + +} // Namespace detail +} // Namespace quadrature +} // Namespace math +} // Namespace boost + +#endif // BOOST_MATH_ENABLE_CUDA + +#endif // BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP From 8d28f21c00ea90b6574faa0f6874514db2010a5d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 15:20:01 -0400 Subject: [PATCH 19/22] Add double coeffs --- .../quadrature/detail/sinh_sinh_detail.hpp | 470 ++++++++++++++++++ 1 file changed, 470 insertions(+) diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index f0ba05ef0..95393b2f5 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -739,7 +739,477 @@ __constant__ float* m_weights_float[8] = { m_weights_float_8 }; +__constant__ double m_abscissas_double_1[6] = + { 3.088287417976322866e+00, 1.489931846492091580e+02, 3.412289247883437102e+06, 2.069325766042617791e+18, + 2.087002407609475560e+50, 2.019766160717908151e+137, }; + +__constant__ double m_abscissas_double_2[6] = + { 9.130487626376696748e-01, 1.415789294662811592e+01, 6.704215516223276482e+03, 9.641725327150499415e+10, + 2.508950760085778485e+30, 1.447263535710337145e+83, }; + +__constant__ double m_abscissas_double_3[12] = + { 4.072976900657586902e-01, 1.682066707021148743e+00, 6.150897986386729515e+00, 4.003962351929400222e+01, + 7.929200247931026321e+02, 1.029849713330979583e+05, 3.038623109252438574e+08, 1.565445474362494869e+14, + 4.042465098430219104e+23, 1.321706827429658179e+39, 4.991231782099557998e+64, 7.352943850359875966e+106, }; + +__constant__ double m_abscissas_double_4[24] = + { 1.981352722514781726e-01, 6.401556735005260177e-01, 1.248928698253977663e+00, 2.266080840944321232e+00, + 4.296462696702327381e+00, 9.130290387099955696e+00, 2.311107653864279933e+01, 7.427706034324012430e+01, + 3.267209207115258917e+02, 2.159485694311818716e+03, 2.415015262896413060e+04, 5.318194002756929158e+05, + 2.800586857217043323e+07, 4.524065079794338780e+09, 3.085612573980677122e+12, 1.338826733015807478e+16, + 6.254617176562341381e+20, 6.182098535814164754e+26, 3.077293649788458067e+34, 2.348957289370104303e+44, + 1.148543197899469758e+57, 2.255300070010069868e+73, 1.877919500569195394e+94, 1.367473887938624280e+121, }; + +__constant__ double m_abscissas_double_5[49] = + { 9.839678940067320339e-02, 3.006056176599550351e-01, 5.198579789949384900e-01, 7.703620832988877009e-01, + 1.071311369641311830e+00, 1.450569758088998445e+00, 1.950778549520360334e+00, 2.640031773695551468e+00, + 3.631372373667412273e+00, 5.119915330903350570e+00, 7.456660981404883289e+00, 1.130226126889972624e+01, + 1.796410692472772550e+01, 3.017810704601898222e+01, 5.403875800312370567e+01, 1.041077314477469548e+02, + 2.180295201202628077e+02, 5.021556986259101646e+02, 1.288621310998222420e+03, 3.739216870800548324e+03, + 1.247507297020191232e+04, 4.876399753226692124e+04, 2.281456582219130122e+05, 1.308777960064843017e+06, + 9.460846634209664077e+06, 8.888831203637279622e+07, 1.124168828974344134e+09, 1.991276729532144470e+10, + 5.167434691060984650e+11, 2.067218814203990888e+13, 1.350615033184100406e+15, 1.538540662836508188e+17, + 3.290747290540350661e+19, 1.437291381884498816e+22, 1.409832445530347286e+25, 3.459135480277971441e+28, + 2.398720582340954092e+32, 5.398806604617292960e+36, 4.613340002580628610e+41, 1.787685909667902457e+47, + 3.841984370124338536e+53, 5.752797955708583700e+60, 7.771812038427286551e+68, 1.269673044204081626e+78, + 3.495676773765731568e+88, 2.362519474971692445e+100, 6.002143893273651123e+113, 9.290716303464155539e+128, + 1.514442238033847090e+146, }; + +__constant__ double m_abscissas_double_6[98] = + { 4.911510035029024930e-02, 1.480131496743607333e-01, 2.489388137406836857e-01, 3.533254236926684378e-01, + 4.627335566122353259e-01, 5.789120681640963067e-01, 7.038702533860627799e-01, 8.399658591446505688e-01, + 9.900150664244376147e-01, 1.157432570143699131e+00, 1.346412759185361763e+00, 1.562167113901335551e+00, + 1.811238852782323380e+00, 2.101924419006550301e+00, 2.444843885584197934e+00, 2.853720746632915024e+00, + 3.346458910955350787e+00, 3.946645821057838387e+00, 4.685673101596678529e+00, 5.605762230908151175e+00, + 6.764332336830574204e+00, 8.240383175379985221e+00, 1.014394356129857730e+01, 1.263024714338892472e+01, + 1.592130395780345258e+01, 2.033921861921857185e+01, 2.635846445760633752e+01, 3.468926333224152409e+01, + 4.641291467019728963e+01, 6.320550793890424203e+01, 8.771497261808906374e+01, 1.242096926240411498e+02, + 1.797186347845127557e+02, 2.660817283327900190e+02, 4.037273029575712841e+02, 6.288113066545908703e+02, + 1.007079837507490594e+03, 1.661568229185114288e+03, 2.829651440786582598e+03, 4.984386266585669139e+03, + 9.101546927647810893e+03, 1.726892655475049727e+04, 3.413099578778601190e+04, 7.045668977053092802e+04, + 1.523404217761279128e+05, 3.460479782897947414e+05, 8.284724209233183002e+05, 2.097596146601193946e+06, + 5.636950798861273236e+06, 1.614071410855607245e+07, 4.944730678915060360e+07, 1.627810516820991356e+08, + 5.785332971632280838e+08, 2.230838540681955690e+09, 9.382391306064739643e+09, 4.328149544776551692e+10, + 2.203072744049242904e+11, 1.245245067109136413e+12, 7.869000534957822375e+12, 5.599531432979422461e+13, + 4.521486949902090877e+14, 4.176889516548293265e+15, 4.452867759650496656e+16, 5.529142853140498068e+17, + 8.075732516562854275e+18, 1.402046916260468698e+20, 2.925791412832239850e+21, 7.426433029335410886e+22, + 2.321996331245735364e+24, 9.064194250638442432e+25, 4.481279048819445609e+27, 2.849046304726990645e+29, + 2.367381159183355975e+31, 2.615825578455121227e+33, 3.914764948263290808e+35, 8.092042448555929219e+37, + 2.358921320940630332e+40, 9.915218648535332591e+42, 6.152851059342658764e+45, 5.780276340144515388e+48, + 8.443751734186488626e+51, 1.973343350899766708e+55, 7.605247378556219980e+58, 4.992057104939510418e+62, + 5.775863423903912316e+66, 1.221808201945355603e+71, 4.912917230387133816e+75, 3.913971813732202372e+80, + 6.456388069905286787e+85, 2.311225068528010358e+91, 1.887458157719431339e+97, 3.708483165438453094e+103, + 1.855198812283538635e+110, 2.509787873171705318e+117, 9.790423755591216617e+124, 1.179088807944050747e+133, + 4.714631846722476620e+141, 6.762657785959713240e+150, }; + +__constant__ double m_abscissas_double_7[196] = + { 2.454715583629863651e-02, 7.372466873903346224e-02, 1.231525309416766543e-01, 1.730001377719248556e-01, + 2.234406649596860001e-01, 2.746526549718518258e-01, 3.268216792980646669e-01, 3.801421009804789245e-01, + 4.348189637215614948e-01, 4.910700365099428407e-01, 5.491280459480215441e-01, 6.092431324382654397e-01, + 6.716855712021148069e-01, 7.367488049067938643e-01, 8.047528416336950644e-01, 8.760480802482050705e-01, + 9.510196351823332253e-01, 1.030092244532470067e+00, 1.113735859588680765e+00, 1.202472030918058876e+00, + 1.296881226496863751e+00, 1.397611241828373026e+00, 1.505386891360545205e+00, 1.621021205894798030e+00, + 1.745428403369044572e+00, 1.879638952031029331e+00, 2.024817107609328524e+00, 2.182281382147884181e+00, + 2.353528494823881355e+00, 2.540261468229626457e+00, 2.744422672171478111e+00, 2.968232787190606619e+00, + 3.214236869520657666e+00, 3.485358957907730467e+00, 3.784966983117372821e+00, 4.116950138940295100e+00, + 4.485811369388231710e+00, 4.896778246562001812e+00, 5.355936290826725948e+00, 5.870389762600956907e+00, + 6.448456189131117605e+00, 7.099902452679558236e+00, 7.836232253282841261e+00, 8.671037293575230635e+00, + 9.620427777985990363e+00, 1.070356198876799531e+01, 1.194330008139441022e+01, 1.336701421038499647e+01, + 1.500759615914396343e+01, 1.690471548203528376e+01, 1.910639668731689597e+01, 2.167100443216577994e+01, + 2.466975274695099197e+01, 2.818989025157845355e+01, 3.233876132429401745e+01, 3.724900758097245740e+01, + 4.308526084907741997e+01, 5.005279647654703975e+01, 5.840877607253876528e+01, 6.847692821534239862e+01, + 8.066681777060714848e+01, 9.549927270200249260e+01, 1.136401195769487885e+02, 1.359451944976603209e+02, + 1.635207451879744447e+02, 1.978049687912586950e+02, 2.406787535889776661e+02, 2.946170292930555023e+02, + 3.628969532147125333e+02, 4.498861782715596902e+02, 5.614447353133496106e+02, 7.054892470899271429e+02, + 8.927907732799964116e+02, 1.138111424979478376e+03, 1.461835991563605367e+03, 1.892332623444716186e+03, + 2.469396036186133479e+03, 3.249311569298824731e+03, 4.312367113170283012e+03, 5.774094754500139661e+03, + 7.802247237500851845e+03, 1.064267530975806972e+04, 1.465915383535674990e+04, 2.039528541239754835e+04, + 2.867170622421556265e+04, 4.074033762183453297e+04, 5.853182310596923393e+04, 8.505689265265206640e+04, + 1.250649269847856615e+05, 1.861373943166749766e+05, 2.805255777452010927e+05, 4.282782486084761748e+05, + 6.626340506127657304e+05, 1.039443239650339565e+06, 1.653857426112961316e+06, 2.670315650125279161e+06, + 4.377212026624358795e+06, 7.288071713698413821e+06, 1.233172993400331694e+07, 2.121557285769933699e+07, + 3.713086254861535383e+07, 6.614579377352135534e+07, 1.200055291694917110e+08, 2.218629410296880690e+08, + 4.182282939928687703e+08, 8.043704132493714804e+08, 1.579392989425668114e+09, 3.168122415524104635e+09, + 6.496606811549861323e+09, 1.362851988356444486e+10, 2.926863897008707708e+10, 6.439798665209493735e+10, + 1.452755233772903022e+11, 3.362854459389246576e+11, 7.994202785433479271e+11, 1.953264233362291960e+12, + 4.909581868242554569e+12, 1.270622730765015610e+13, 3.389070986742985764e+13, 9.325084030208844833e+13, + 2.649489423834534140e+14, 7.781295184094957195e+14, 2.364715052527355639e+15, 7.444138031465958255e+15, + 2.430217240684749635e+16, 8.237068641534357762e+16, 2.902117050664548840e+17, 1.064157679404037013e+18, + 4.066277106061960017e+18, 1.621274233630359097e+19, 6.754156830915450013e+19, 2.944056841733781919e+20, + 1.344640139549107817e+21, 6.444586158944723300e+21, 3.246218667554608934e+22, 1.721234579556653533e+23, + 9.622533890240474391e+23, 5.681407260417956671e+24, 3.548890779995928184e+25, 2.349506425672269562e+26, + 1.651618130605205643e+27, 1.235147426493113059e+28, 9.845947239792057550e+28, 8.383130781984610418e+29, + 7.639649461399172445e+30, 7.467862732233885201e+31, 7.847691482004993660e+32, 8.886032557626454704e+33, + 1.086734890678302436e+35, 1.438967777036538458e+36, 2.068168865475603521e+37, 3.234885320223912385e+38, + 5.521233641542628514e+39, 1.031148231194663855e+41, 2.113272035816365982e+42, 4.766724345485077520e+43, + 1.186961550990218287e+45, 3.273172169205847573e+46, 1.002821226769167753e+48, 3.424933903935156479e+49, + 1.308436017026428736e+51, 5.611378330048420503e+52, 2.711424806327139291e+54, 1.481771793644066442e+56, + 9.194282071042778804e+57, 6.503661455875355562e+59, 5.266329986868627303e+61, 4.902662807969347359e+63, + 5.270511057289557050e+65, 6.572856511670583316e+67, 9.553956030013225387e+69, 1.626491911159411616e+72, + 3.259410915500951223e+74, 7.728460318113614280e+76, 2.179881996905918059e+79, 7.354484388371505915e+81, + 2.984831270803957746e+84, 1.465828267813438962e+87, 8.763355972629864261e+89, 6.417909665847831130e+92, + 5.794958649229893510e+95, 6.494224472311908365e+98, 9.095000156016433698e+101, 1.603058498455299102e+105, + 3.582099119119320529e+108, 1.022441227139854687e+112, 3.756872185015086057e+115, 1.791363463832849159e+119, + 1.117641882039472124e+123, 9.202159565546528285e+126, 1.008716474827888568e+131, 1.485546487089301805e+135, + 2.966961534830566097e+139, 8.114207284664369360e+143, 3.069178087507669739e+148, 1.622223681147791473e+153, }; + +__constant__ double m_abscissas_double_8[391] = + { 1.227227917054637830e-02, 3.682722894492590471e-02, 6.141337626871079991e-02, 8.605159708778207907e-02, + 1.107628840017845446e-01, 1.355683934957785482e-01, 1.604894937454335489e-01, 1.855478131645089496e-01, + 2.107652898670700524e-01, 2.361642222214626268e-01, 2.617673206785495261e-01, 2.875977610631342900e-01, + 3.136792395249035647e-01, 3.400360293536632770e-01, 3.666930398731810193e-01, 3.936758776386451797e-01, + 4.210109101746846268e-01, 4.487253325041450341e-01, 4.768472367324829462e-01, 5.054056849688209375e-01, + 5.344307858825229079e-01, 5.639537752137267134e-01, 5.940071005777549000e-01, 6.246245109268716053e-01, + 6.558411510586397969e-01, 6.876936615883514922e-01, 7.202202848338683401e-01, 7.534609770949572224e-01, + 7.874575278460963461e-01, 8.222536864020499377e-01, 8.578952966595825808e-01, 8.944304405668593009e-01, + 9.319095910247435485e-01, 9.703857749817920659e-01, 1.009914747547728584e+00, 1.050555178019083150e+00, + 1.092368848786092579e+00, 1.135420868172514300e+00, 1.179779898350424466e+00, 1.225518399571142610e+00, + 1.272712892062026473e+00, 1.321444237057985065e+00, 1.371797938567245953e+00, 1.423864467614384096e+00, + 1.477739610861208115e+00, 1.533524845679288858e+00, 1.591327743938355098e+00, 1.651262406984310076e+00, + 1.713449934511288211e+00, 1.778018930286256858e+00, 1.845106047964720870e+00, 1.914856580544951899e+00, + 1.987425097349017093e+00, 2.062976132795275283e+00, 2.141684931642916785e+00, 2.223738255848994521e+00, + 2.309335258687213796e+00, 2.398688432341103821e+00, 2.492024635808356095e+00, 2.589586210645122756e+00, + 2.691632192846832444e+00, 2.798439630014497291e+00, 2.910305013902562652e+00, 3.027545839497364963e+00, + 3.150502302946919722e+00, 3.279539151967394330e+00, 3.415047703805410611e+00, 3.557448047456550733e+00, + 3.707191448649779817e+00, 3.864762978128342125e+00, 4.030684386016531344e+00, 4.205517247588613835e+00, + 4.389866408585172458e+00, 4.584383761391930748e+00, 4.789772386950687695e+00, 5.006791101261363264e+00, + 5.236259449815274050e+00, 5.479063198337523150e+00, 5.736160373884817415e+00, 6.008587916728619858e+00, + 6.297469010648863048e+00, 6.604021167380929133e+00, 6.929565150124677837e+00, 7.275534831383860972e+00, + 7.643488092123492064e+00, 8.035118882502459288e+00, 8.452270579478188130e+00, 8.896950793641785313e+00, + 9.371347797016395173e+00, 9.877848765573446033e+00, 1.041906005527762037e+01, 1.099782975900831706e+01, + 1.161727282423952258e+01, 1.228079904848924611e+01, 1.299214431196691048e+01, 1.375540545535625881e+01, + 1.457507926620621316e+01, 1.545610610104852468e+01, 1.640391874338302925e+01, 1.742449718154208970e+01, + 1.852443008688437526e+01, 1.971098388378266494e+01, 2.099218043080961648e+01, 2.237688448013982946e+01, + 2.387490225270073820e+01, 2.549709266380430464e+01, 2.725549296232531555e+01, 2.916346081119624987e+01, + 3.123583514423284962e+01, 3.348911849136805118e+01, 3.594168387985465099e+01, 3.861400990307230737e+01, + 4.152894811329303023e+01, 4.471202755441533396e+01, 4.819180202224910174e+01, 5.200024654361558757e+01, + 5.617321062537384494e+01, 6.075093706918782079e+01, 6.577865661168003966e+01, 7.130727037357721343e+01, + 7.739413413465805794e+01, 8.410396085269633392e+01, 9.150986068496734448e+01, 9.969454113547704016e+01, + 1.087516939426018897e+02, 1.187876000643037532e+02, 1.299229897614516371e+02, 1.422952015056372537e+02, + 1.560606914665002671e+02, 1.713979549326432406e+02, 1.885109325154830073e+02, 2.076329877740125935e+02, + 2.290315594654587370e+02, 2.530136115655676467e+02, 2.799320282398896912e+02, 3.101931299766730890e+02, + 3.442655222107529892e+02, 3.826905303289378387e+02, 4.260945266207607701e+02, 4.752035175892902045e+02, + 5.308604366239058864e+02, 5.940456805372995009e+02, 6.659015428338778262e+02, 7.477613367309153870e+02, + 8.411841730471343023e+02, 9.479965698013741524e+02, 1.070342331375881840e+03, 1.210742457518582660e+03, + 1.372167241552205820e+03, 1.558123212187692722e+03, 1.772758188662716282e+03, 2.020988485411862984e+03, + 2.308653259329163157e+03, 2.642702189813684273e+03, 3.031424182869210212e+03, 3.484726676985756018e+03, + 4.014477504733973505e+03, 4.634924264049394751e+03, 5.363209949773439749e+03, 6.220008412114342803e+03, + 7.230309332853029956e+03, 8.424390216735217783e+03, 9.839022871538541787e+03, 1.151897463083113988e+04, + 1.351888098874374202e+04, 1.590558745460066947e+04, 1.876108572764816176e+04, 2.218620462393366275e+04, + 2.630526205054915357e+04, 3.127194401941711057e+04, 3.727675461256652923e+04, 4.455648280312273249e+04, + 5.340626592018903930e+04, 6.419500580388918123e+04, 7.738512642386820060e+04, 9.355796993981725963e+04, + 1.134465375820669470e+05, 1.379778272209741713e+05, 1.683277485807887053e+05, 2.059925746120735305e+05, + 2.528822024503158254e+05, 3.114422718347725915e+05, 3.848145913435570736e+05, 4.770485864966822643e+05, + 5.933809324724740854e+05, 7.406066190351666115e+05, 9.275730471470643372e+05, 1.165840260940180415e+06, + 1.470566322118246135e+06, 1.861698899014921971e+06, 2.365584870298354495e+06, 3.017152695505764877e+06, + 3.862882573599929249e+06, 4.964864305589750358e+06, 6.406362829959736606e+06, 8.299481847261302115e+06, + 1.079575892642401854e+07, 1.410087327474604091e+07, 1.849514724418250100e+07, 2.436224419670805500e+07, + 3.222951131863941234e+07, 4.282493882385925337e+07, 5.715793394339267637e+07, 7.663437932745451635e+07, + 1.032212725498489699e+08, 1.396833991976194842e+08, 1.899251497664892740e+08, 2.594865396467505851e+08, + 3.562664742464501497e+08, 4.915825413172413471e+08, 6.817316470116958142e+08, 9.502998105202541438e+08, + 1.331598295343277538e+09, 1.875801976010459831e+09, 2.656673907709731487e+09, 3.783240215616365909e+09, + 5.417531848500136979e+09, 7.801695369892847510e+09, 1.129965368955098833e+10, 1.646149161390821924e+10, + 2.412353995736687694e+10, 3.556486895431927094e+10, 5.275345014093760519e+10, 7.873572108325378177e+10, + 1.182569020317863604e+11, 1.787549442508363461e+11, 2.719633064979986142e+11, 4.165122153119897946e+11, + 6.421781858205134197e+11, 9.968725497576275918e+11, 1.558212327122960399e+12, 2.452809984907093786e+12, + 3.888656232828140210e+12, 6.209868990509424909e+12, 9.989924216297983665e+12, 1.619158001378611351e+13, + 2.644324518669926559e+13, 4.352018847904374786e+13, 7.218884688202741709e+13, 1.206997640727349538e+14, + 2.034483722445207402e+14, 3.457553102874402920e+14, 5.925248511957505706e+14, 1.024057793713038672e+15, + 1.785174045941642162e+15, 3.139306988668494696e+15, 5.569856270174890128e+15, 9.971763353834460328e+15, + 1.801687491114883092e+16, 3.285709858322565542e+16, 6.049018540910759710e+16, 1.124375283211369572e+17, + 2.110445125952435305e+17, 4.000737007891229992e+17, 7.660849361564329309e+17, 1.482018770996176700e+18, + 2.896945433910857945e+18, 5.722790165693470493e+18, 1.142689960439921462e+19, 2.306616559984106723e+19, + 4.707857184616093863e+19, 9.717346347495342813e+19, 2.028735605622585444e+20, 4.284840254171000581e+20, + 9.157027329021623836e+20, 1.980457834766411777e+21, 4.335604886702252004e+21, 9.609258559714223995e+21, + 2.156604630608586997e+22, 4.902045909695270289e+22, 1.128749227121328467e+23, 2.633414623049930879e+23, + 6.226335684490998543e+23, 1.492205279014148921e+24, 3.625768249717590109e+24, 8.933899764961444882e+24, + 2.232786981682262383e+25, 5.661295336293986732e+25, 1.456616710298133142e+26, 3.803959852868488245e+26, + 1.008531585603036490e+27, 2.715247425129423358e+27, 7.425071766766651967e+27, 2.062860712173225003e+28, + 5.824055458799413312e+28, 1.671388836696436644e+29, 4.876830632023956392e+29, 1.447170071146107156e+30, + 4.368562208925583783e+30, 1.341873806249251338e+31, 4.195251632754338682e+31, 1.335360134828214136e+32, + 4.328681350715136340e+32, 1.429401866150319186e+33, 4.809736146227180696e+33, 1.649624114567602575e+34, + 5.768677492419801469e+34, 2.057442854162761350e+35, 7.486423509917811063e+35, 2.780052791791155051e+36, + 1.053908347660081874e+37, 4.080046334235754223e+37, 1.613553311592805373e+38, 6.520836332997615098e+38, + 2.693848186257510992e+39, 1.138002408430710800e+40, 4.917748008813924613e+40, 2.174691073191358676e+41, + 9.844523745430526502e+41, 4.563707467590116732e+42, 2.167352073708379137e+43, 1.054860193887170754e+44, + 5.263588225566847365e+44, 2.693772458797916623e+45, 1.414506760560163074e+46, 7.624126763512016620e+46, + 4.219828148762794411e+47, 2.399387665831793264e+48, 1.402139947254117434e+49, 8.424706325525422943e+49, + 5.206918479942619318e+50, 3.311787866477716151e+51, 2.168683295509859155e+52, 1.462786368779206713e+53, + 1.016761784575838363e+54, 7.286460995145043184e+54, 5.386194237448865407e+55, 4.108917480528740640e+56, + 3.236445625945552728e+57, 2.633440652417619669e+58, 2.214702339357939268e+59, 1.926058995948268392e+60, + 1.733067740414174932e+61, 1.614307160124426969e+62, 1.557464328486352138e+63, 1.557226155197192031e+64, + 1.614473962707995344e+65, 1.736617406327386105e+66, 1.939201243451190521e+67, 2.249277732936622876e+68, + 2.711593798719765599e+69, 3.399628732048687119e+70, 4.435389696730206291e+71, 6.025566076164003981e+72, + 8.529161425383779849e+73, 1.258746322992988688e+75, 1.938112175186560210e+76, 3.115432363572610661e+77, + 5.231797674434390018e+78, 9.184930207860680757e+79, 1.686929404780378772e+81, 3.243565624474232635e+82, + 6.533812498930220075e+83, 1.379898823144620314e+85, 3.057650444842839916e+86, 7.114050545839171245e+87, + 1.739275024442258674e+89, 4.471782915853177804e+90, 1.210036789494028144e+92, 3.448828044590862359e+93, + 1.036226783750561565e+95, 3.284801914751206038e+96, 1.099514933602224638e+98, 3.889581731378242597e+99, + 1.455434287901069991e+101, 5.765729934387419019e+102, 2.420349568745475582e+104, 1.077606625929777536e+106, + 5.093346988695851845e+107, 2.558090824110323997e+109, 1.366512508719047964e+111, 7.771735800763526406e+112, + 4.710398638793014918e+114, 3.045563885587013954e+116, 2.102762552861442993e+118, 1.551937536212596136e+120, + 1.225676354426075970e+122, 1.036950946169703711e+124, 9.407885268970827717e+125, 9.163369107785093171e+127, + 9.592531095671168926e+129, 1.080486293361823875e+132, 1.311034829557782450e+134, 1.715642975932639188e+136, + 2.424231742707881878e+138, 3.703231223333127919e+140, 6.123225027409988902e+142, 1.097271040771196765e+145, + 2.133693643241295977e+147, 4.508099184895777328e+149, 1.036252806686291189e+152, }; + +__constant__ double* m_abscissas_double[8] = { + m_abscissas_double_1, + m_abscissas_double_2, + m_abscissas_double_3, + m_abscissas_double_4, + m_abscissas_double_5, + m_abscissas_double_6, + m_abscissas_double_7, + m_abscissas_double_8, +}; + +__constant__ double m_weights_double_1[6] = + { 7.868241604839621507e+00, 8.805163880733011116e+02, 5.396278323520705668e+07, 8.876511896968161317e+19, + 2.432791879269225553e+52, 6.399713512080202911e+139, }; + +__constant__ double m_weights_double_2[6] = + { 2.398524276302635218e+00, 5.244596423726681022e+01, 6.457887819598201760e+04, 2.509985242511374506e+12, + 1.774029269327138701e+32, 2.781406115983097314e+85, }; + +__constant__ double m_weights_double_3[12] = + { 1.749369583108386852e+00, 3.979658981934607813e+00, 1.848514598574449570e+01, 1.864880718932067988e+02, + 5.974205695263265855e+03, 1.270412635144623341e+06, 6.164193014295984071e+09, 5.230850031811222530e+15, + 2.226260929943369774e+25, 1.199931102042181592e+41, 7.470602144275146214e+66, 1.814465860528410676e+109, }; + +__constant__ double m_weights_double_4[24] = + { 1.613859062188366173e+00, 1.997767291869673262e+00, 3.020231979908834220e+00, 5.477641843859057761e+00, + 1.179660916492671672e+01, 3.035504848518598294e+01, 9.584421793794920860e+01, 3.893870238229992076e+02, + 2.179193250357911344e+03, 1.839208123964132852e+04, 2.632120612599856167e+05, 7.427296507169468210e+06, + 5.015875648341232356e+08, 1.039610867241544113e+11, 9.100328911818091977e+13, 5.068651163890231571e+17, + 3.039966520714902616e+22, 3.857740194672007962e+28, 2.465542763666581087e+36, 2.416439449167799461e+46, + 1.517091553926604149e+59, 3.825043412021411380e+75, 4.089582396821598640e+96, 3.823775894295564050e+123, }; + +__constant__ double m_weights_double_5[49] = + { 1.581465959536694744e+00, 1.669149910438534746e+00, 1.857523188595005770e+00, 2.175662623626994120e+00, + 2.675901375211020564e+00, 3.447738682498791744e+00, 4.643946540355464126e+00, 6.530204496574248616e+00, + 9.582285015566804961e+00, 1.468361407515440960e+01, 2.354449548740987533e+01, 3.963527273305166705e+01, + 7.037635206267538547e+01, 1.325880124784838868e+02, 2.669625649541569172e+02, 5.793749198508472676e+02, + 1.368691928321303605e+03, 3.559435721533130554e+03, 1.032186677270763318e+04, 3.386621302858741487e+04, + 1.278166259840246830e+05, 5.654082513926693098e+05, 2.994462044781721833e+06, 1.944975023421914947e+07, + 1.592193007690560588e+08, 1.694288818617459913e+09, 2.427156182311303271e+10, 4.870317848199455490e+11, + 1.431819656229181793e+13, 6.489471523099301256e+14, 4.803757752508989106e+16, 6.200096361305331541e+18, + 1.502568562439914899e+21, 7.436061367189688251e+23, 8.264761218677928603e+26, 2.297735027897804345e+30, + 1.805449779569534997e+34, 4.604472360199061931e+38, 4.458371212030626854e+43, 1.957638261114809309e+49, + 4.767368137162500764e+55, 8.088820139476721285e+62, 1.238260897349286357e+71, 2.292272505278842062e+80, + 7.151392373749193549e+90, 5.476714850156044431e+102, 1.576655618370700681e+116, 2.765448595957851958e+131, + 5.108051255283132673e+148, }; + +__constant__ double m_weights_double_6[98] = + { 1.573457773573108386e+00, 1.594892755038663787e+00, 1.638536515530234742e+00, 1.705980408212213620e+00, + 1.799724394608737275e+00, 1.923322854425656307e+00, 2.081597373313268178e+00, 2.280934883790070511e+00, + 2.529697852387704655e+00, 2.838784782552951185e+00, 3.222395745020980612e+00, 3.699081358854235112e+00, + 4.293188274330526800e+00, 5.036865356322330076e+00, 5.972871140910932199e+00, 7.158538424311077564e+00, + 8.671427800892076385e+00, 1.061747360297922326e+01, 1.314285002260235600e+01, 1.645145625668428040e+01, + 2.083099449998189069e+01, 2.669235989791640190e+01, 3.462993514791378189e+01, 4.551518362653662579e+01, + 6.064408087764392116e+01, 8.197296917485846798e+01, 1.125020468081652564e+02, 1.569096552844714123e+02, + 2.226204347868638276e+02, 3.216385489504077755e+02, 4.737574505945461739e+02, 7.122994548146997637e+02, + 1.094609652686376553e+03, 1.721697789176049576e+03, 2.775924909253835146e+03, 4.595230066268149347e+03, + 7.823427586641573672e+03, 1.372357435269105405e+04, 2.485188961645119553e+04, 4.655538745425972783e+04, + 9.041766782135686884e+04, 1.824843964862728392e+05, 3.836800264094614027e+05, 8.426271970245168026e+05, + 1.938432574158782634e+06, 4.685112849356485528e+06, 1.193528667218607927e+07, 3.215643752247989316e+07, + 9.196008928386600386e+07, 2.802223178457559964e+08, 9.136110825267458886e+08, 3.200910900783148591e+09, + 1.210765264234723689e+10, 4.969024745093101808e+10, 2.224315751863855216e+11, 1.092125344449313660e+12, + 5.916882980019919359e+12, 3.559743438494577249e+13, 2.394353652945465191e+14, 1.813551073517501917e+15, + 1.558736706166165738e+16, 1.532714875555114333e+17, 1.739274776190789212e+18, 2.298841216802216313e+19, + 3.574030698837762664e+20, 6.604899705451419080e+21, 1.467155879591820659e+23, 3.964094964398509381e+24, + 1.319342840595348793e+26, 5.482251971340400742e+27, 2.885137894723827518e+29, 1.952539840765392110e+31, + 1.727051489032222797e+33, 2.031343507095439396e+35, 3.236074146972599980e+37, 7.120487412983497200e+39, + 2.209552707411017265e+42, 9.886282647791384648e+44, 6.530514048788273529e+47, 6.530706672481546528e+50, + 1.015518807431281951e+54, 2.526366773162394510e+57, 1.036450519906790297e+61, 7.241966032627135861e+64, + 8.919402520769714938e+68, 2.008463619152992905e+73, 8.596914764830260020e+77, 7.290599546829495220e+82, + 1.280199563216419112e+88, 4.878349285603201150e+93, 4.240828248064127940e+99, 8.869771764721598720e+105, + 4.723342575741417669e+112, 6.802035963326188581e+119, 2.824531180990009549e+127, 3.621049216745982252e+135, + 1.541270150334942520e+144, 2.353376995174362785e+153, }; + +__constant__ double m_weights_double_7[196] = + { 1.571461316550783294e+00, 1.576790166316938345e+00, 1.587495640370383316e+00, 1.603673956341370210e+00, + 1.625471125457493943e+00, 1.653085011915939302e+00, 1.686768142525911236e+00, 1.726831323537516202e+00, + 1.773648138667236602e+00, 1.827660421478661448e+00, 1.889384817044018196e+00, 1.959420572855037091e+00, + 2.038458728047908923e+00, 2.127292904083847225e+00, 2.226831940199076941e+00, 2.338114664555130296e+00, + 2.462327148722991304e+00, 2.600822860927085164e+00, 2.755146214814554359e+00, 2.927060108424483555e+00, + 3.118578166240921951e+00, 3.332002540339506630e+00, 3.569968300410740276e+00, 3.835495653996447262e+00, + 4.132051496512934885e+00, 4.463622106699067881e+00, 4.834799191008006557e+00, 5.250881957765679608e+00, + 5.717998490875333124e+00, 6.243250421598568105e+00, 6.834885801226541839e+00, 7.502506202789340802e+00, + 8.257315484493544201e+00, 9.112419405864642634e+00, 1.008318749543997758e+01, 1.118769134993865202e+01, + 1.244723705914106881e+01, 1.388701390605507587e+01, 1.553688715915900190e+01, 1.743237000680942831e+01, + 1.961581894823993424e+01, 2.213790886354273806e+01, 2.505945934677137610e+01, 2.845370377742137561e+01, + 3.240911845969524834e+01, 3.703296289480230161e+01, 4.245572644746267911e+01, 4.883673480337985582e+01, + 5.637124640586975420e+01, 6.529947092752610340e+01, 7.591807755694122837e+01, 8.859494252391663822e+01, + 1.037881295005788124e+02, 1.220704263969226746e+02, 1.441612098131200535e+02, 1.709680191245773511e+02, + 2.036410593843575570e+02, 2.436450058708723643e+02, 2.928540812182076105e+02, 3.536786019152253392e+02, + 4.292343083967296939e+02, 5.235701840488733027e+02, 6.419766898003024575e+02, 7.914052083668759283e+02, + 9.810422089081931637e+02, 1.223099994999740393e+03, 1.533912555427112127e+03, 1.935464013605830339e+03, + 2.457534549912886852e+03, 3.140733731623635519e+03, 4.040818188564651898e+03, 5.234881599712225681e+03, + 6.830294457607329226e+03, 8.977713228649887143e+03, 1.189015920967326839e+04, 1.587122387044346962e+04, + 2.135711106445789331e+04, 2.897983705189681437e+04, 3.966306726795547950e+04, 5.476875193750000787e+04, + 7.632356539388055680e+04, 1.073719149754976951e+05, 1.525316674555574152e+05, 2.188778434744216586e+05, + 3.173624496019295608e+05, 4.651201525869328462e+05, 6.892537656280580572e+05, 1.033119885120019982e+06, + 1.566887981043252499e+06, 2.405492027026531795e+06, 3.739528964815910340e+06, 5.889121154895580032e+06, + 9.399046351922342030e+06, 1.520903276129653518e+07, 2.496287187293576168e+07, 4.157759259963074840e+07, + 7.030705366950267312e+07, 1.207598558452493366e+08, 2.107882509464846833e+08, 3.741047199023457864e+08, + 6.754494594987415572e+08, 1.241316740415880537e+09, 2.323310032649552862e+09, 4.431176019026625759e+09, + 8.617446487400900130e+09, 1.709836906604031513e+10, 3.463574521880171339e+10, 7.167607123799270726e+10, + 1.516347620910054079e+11, 3.281729323238950526e+11, 7.271102600298280790e+11, 1.650499552378780378e+12, + 3.841338149508803917e+12, 9.173744267785176575e+12, 2.249901946357519979e+13, 5.671535089900611731e+13, + 1.470742250307697019e+14, 3.927012518464311775e+14, 1.080639977391212820e+15, 3.067671466720475189e+15, + 8.992386789198328428e+15, 2.724722536524592111e+16, 8.542946122263389258e+16, 2.774613718725574755e+17, + 9.345299479382029121e+17, 3.267996122987731882e+18, 1.187914433455468315e+19, 4.494053408418564214e+19, + 1.771706652195486743e+20, 7.288102552885931527e+20, 3.132512430816625349e+21, 1.408743767951073110e+22, + 6.638294268236060414e+22, 3.282543608403565013e+23, 1.705920098038394064e+24, 9.332259385148524285e+24, + 5.382727175874888312e+25, 3.278954235122093249e+26, 2.113191697957458099e+27, 1.443411041499643040e+28, + 1.046864394654982423e+29, 8.077319226958905700e+29, 6.643146963432616277e+30, 5.835670121359986260e+31, + 5.486890296790230798e+32, 5.533726968508261614e+33, 5.999734996418352834e+34, 7.009176119466122569e+35, + 8.844061966424597499e+36, 1.208226860869605961e+38, 1.791648514311063338e+39, 2.891313916713205762e+40, + 5.091457860211527298e+41, 9.810630588402496553e+42, 2.074441239147378860e+44, 4.827650116937700540e+45, + 1.240287939111549029e+47, 3.528782858644784616e+48, 1.115449490471696659e+50, 3.930510643328196314e+51, + 1.549243712957852337e+53, 6.854998238041301002e+54, 3.417479961583207704e+56, 1.926905498641079990e+58, + 1.233580963004919450e+60, 9.002819902898076915e+61, 7.521415141253441645e+63, 7.224277554900578993e+65, + 8.012832830535078610e+67, 1.030999620286380369e+70, 1.546174957076748679e+72, 2.715803772613248694e+74, + 5.615089920571746438e+76, 1.373667859345343337e+79, 3.997541020769625126e+81, 1.391500589339800087e+84, + 5.826693844912022892e+86, 2.952274820929549096e+89, 1.821023061478466282e+92, 1.375973022137941526e+95, + 1.281852367543412945e+98, 1.482130127201990503e+101, 2.141574273792435314e+104, 3.894495540947112380e+107, + 8.978646362580102961e+110, 2.644131589807244050e+114, 1.002403539841913834e+118, 4.931412804903905259e+121, + 3.174401112435865044e+125, 2.696624001761892390e+129, 3.049799322320447166e+133, 4.634041526818687785e+137, + 9.548983134803106512e+141, 2.694404866192089829e+146, 1.051502720036395325e+151, 5.734170640626244955e+155, }; + +__constant__ double m_weights_double_8[391] = + { 1.570962550997832611e+00, 1.572292902367211961e+00, 1.574956581912666755e+00, 1.578959553636163985e+00, + 1.584310789563614305e+00, 1.591022301117035107e+00, 1.599109181186160337e+00, 1.608589657109067468e+00, + 1.619485154826419743e+00, 1.631820374530739318e+00, 1.645623378191125679e+00, 1.660925689395424109e+00, + 1.677762406016463717e+00, 1.696172326277082973e+00, 1.716198088860732467e+00, 1.737886327791014562e+00, + 1.761287842885152410e+00, 1.786457786673686420e+00, 1.813455868772335587e+00, 1.842346578792652542e+00, + 1.873199428986627521e+00, 1.906089217937612619e+00, 1.941096316736779451e+00, 1.978306979221816566e+00, + 2.017813678003844337e+00, 2.059715468170813895e+00, 2.104118380732327493e+00, 2.151135848063375554e+00, + 2.200889163814591418e+00, 2.253507979986114202e+00, 2.309130844113053375e+00, 2.367905779785113334e+00, + 2.429990914023652954e+00, 2.495555155369085590e+00, 2.564778926893134514e+00, 2.637854958747451684e+00, + 2.714989145296268067e+00, 2.796401472360280536e+00, 2.882327020626578700e+00, 2.973017051860293803e+00, + 3.068740185193628238e+00, 3.169783671473487386e+00, 3.276454774427328601e+00, 3.389082268266156098e+00, + 3.508018062292869136e+00, 3.633638964133530274e+00, 3.766348594369884204e+00, 3.906579466636309289e+00, + 4.054795248667541120e+00, 4.211493221360917802e+00, 4.377206954666462219e+00, 4.552509221059946388e+00, + 4.738015169510782826e+00, 4.934385785253587887e+00, 5.142331663338191074e+00, 5.362617126899976224e+00, + 5.596064724397100194e+00, 5.843560143744373307e+00, 6.106057585381734693e+00, 6.384585640900671436e+00, + 6.680253728973824449e+00, 6.994259146058412709e+00, 7.327894795748901060e+00, 7.682557667824588764e+00, + 8.059758146071137270e+00, 8.461130232962342889e+00, 8.888442789395671080e+00, 9.343611899025485155e+00, + 9.828714479494622022e+00, 1.034600327721380625e+01, 1.089792339849122916e+01, 1.148713054801325790e+01, + 1.211651116619788555e+01, 1.278920468010096321e+01, 1.350862810871281096e+01, 1.427850329305334421e+01, + 1.510288705493181327e+01, 1.598620462612703196e+01, 1.693328673269081128e+01, 1.794941076780000506e+01, + 1.904034654190823159e+01, 2.021240716182964334e+01, 2.147250566192247370e+01, 2.282821809199713505e+01, + 2.428785385941680425e+01, 2.586053422878117785e+01, 2.755628000354674426e+01, 2.938610955221109564e+01, + 3.136214849990951329e+01, 3.349775258749912582e+01, 3.580764540799625468e+01, 3.830807296872530167e+01, + 4.101697730155473447e+01, 4.395419165876113623e+01, 4.714166019494196927e+01, 5.060368545366659226e+01, + 5.436720746019445252e+01, 5.846211877912138439e+01, 6.292162054058128784e+01, 6.778262518512416663e+01, + 7.308621254265223015e+01, 7.887814686488147292e+01, 8.520946359734658334e+01, 9.213713603387774717e+01, + 9.972483357670754649e+01, 1.080437851679046426e+02, 1.171737636088621692e+02, 1.272042089988687372e+02, + 1.382355124664102373e+02, 1.503804848151483311e+02, 1.637660387526102742e+02, 1.785351181233383403e+02, + 1.948489131607280604e+02, 2.128894073598352670e+02, 2.328623093447990790e+02, 2.550004322843281994e+02, + 2.795675942672445782e+02, 3.068631259124280934e+02, 3.372270867451200874e+02, 3.710463099965576255e+02, + 4.087614170466174911e+02, 4.508749684194593670e+02, 4.979609488959773491e+02, 5.506758209385785877e+02, + 6.097714244663179092e+02, 6.761100535726473685e+02, 7.506821038741422446e+02, 8.346267600518081192e+02, + 9.292562845315541998e+02, 1.036084578498234728e+03, 1.156860819661897657e+03, 1.293609142453808600e+03, + 1.448675521854205144e+03, 1.624783259532197615e+03, 1.825098759915318560e+03, 2.053309635972617554e+03, + 2.313717614494777200e+03, 2.611349236640186999e+03, 2.952087994093624299e+03, 3.342832332560548180e+03, + 3.791684927756595099e+03, 4.308179838716318955e+03, 4.903555624570201673e+03, 5.591084343634811452e+03, + 6.386468625571246341e+03, 7.308321829412979440e+03, 8.378749812799703561e+03, 9.624057218749638059e+03, + 1.107560666191146008e+04, 1.277086605445904388e+04, 1.475468792019489452e+04, 1.708087537417066343e+04, + 1.981410309695485051e+04, 2.303227888204754908e+04, 2.682945317928632535e+04, 3.131941178398428200e+04, + 3.664012209706997997e+04, 4.295924836668690170e+04, 5.048100882639843572e+04, 5.945472133180055290e+04, + 7.018547875172689579e+04, 8.304751726175694003e+04, 9.850099805053575446e+04, 1.171131266261766060e+05, + 1.395847982160589845e+05, 1.667843016393077556e+05, 1.997900626520524686e+05, 2.399449946032992187e+05, + 2.889257939838013232e+05, 3.488315309194304548e+05, 4.222972201496778447e+05, 5.126398246369253619e+05, + 6.240464876221989792e+05, 7.618179073233615941e+05, 9.326839300224119257e+05, 1.145214007774297539e+06, + 1.410352646274233119e+06, 1.742120041875863385e+06, 2.158531716934287014e+06, 2.682809410126426731e+06, + 3.344980563595418861e+06, 4.183997972337706048e+06, 5.250558008165501752e+06, 6.610860174141680988e+06, + 8.351639423967558693e+06, 1.058692532393929900e+07, 1.346715235106239409e+07, 1.719148271024263021e+07, + 2.202453449027701694e+07, 2.831917301724337797e+07, 3.654767820268344932e+07, 4.734452657230626106e+07, + 6.156534063509513873e+07, 8.036843026897869248e+07, 1.053280284359690289e+08, 1.385921689084126286e+08, + 1.831036985925683524e+08, 2.429109457458640820e+08, 3.236062393759667463e+08, 4.329475218599986663e+08, + 5.817432967962929479e+08, 7.851179789388191786e+08, 1.064329197627075307e+09, 1.449389582912945485e+09, + 1.982866469377991849e+09, 2.725414314698094324e+09, 3.763867964111621444e+09, 5.223138814950990937e+09, + 7.283785810644397704e+09, 1.020809642381158743e+10, 1.437899318470510521e+10, 2.035836812543633578e+10, + 2.897499827080027444e+10, 4.145773751645494878e+10, 5.963837683872426287e+10, 8.626228483915530800e+10, + 1.254667045389825180e+11, 1.835212982264913186e+11, 2.699812207400151604e+11, 3.994928452151922954e+11, + 5.946380558701434550e+11, 8.904409967424091107e+11, 1.341551941677775838e+12, 2.033768550332151892e+12, + 3.102627959875753214e+12, 4.763598321705862063e+12, 7.361420360560813584e+12, 1.145126961456557423e+13, + 1.793314186996273926e+13, 2.827585501285792232e+13, 4.489297053678444669e+13, 7.177802872658499571e+13, + 1.155855098545820625e+14, 1.874833886367883093e+14, 3.063510356402174454e+14, 5.043400653005970242e+14, + 8.366163396892429890e+14, 1.398556351640947289e+15, 2.356335749516164682e+15, 4.001765167382637456e+15, + 6.851375128404941445e+15, 1.182690111761543990e+16, 2.058673527013806443e+16, 3.613968784314904633e+16, + 6.399112184394213551e+16, 1.143016185628376923e+17, 2.059881383915666443e+17, 3.745846788353680914e+17, + 6.874443034683149068e+17, 1.273407643613485314e+18, 2.381241916829895366e+18, 4.495835617307108399e+18, + 8.571442024901952701e+18, 1.650443584181656965e+19, 3.210100352421317851e+19, 6.307780124442703091e+19, + 1.252404031157661279e+20, 2.513005295649985394e+20, 5.096776255690838436e+20, 1.045019200016673046e+21, + 2.166476479260878466e+21, 4.542138145678395463e+21, 9.632082324449137128e+21, 2.066386536688254528e+22, + 4.485529785554428251e+22, 9.853879573610977508e+22, 2.191158874464374408e+23, 4.932835964390971668e+23, + 1.124501529971774363e+24, 2.596269136156756008e+24, 6.072292938313625501e+24, 1.438989066308003836e+25, + 3.455841956406570469e+25, 8.412655191713576490e+25, 2.076289061650816510e+26, 5.196515024640220322e+26, + 1.319173194089644043e+27, 3.397455895980380794e+27, 8.879057454438503591e+27, 2.355272361492064126e+28, + 6.342762007722624824e+28, 1.734531093990859705e+29, 4.817893170606830871e+29, 1.359597346490148232e+30, + 3.898969689906500392e+30, 1.136542986529989936e+31, 3.368450043991780017e+31, 1.015304084709817260e+32, + 3.113144376221918237e+32, 9.713072739730140403e+32, 3.084517643581725946e+33, 9.972682139820497284e+33, + 3.283625052288491586e+34, 1.101378785390827536e+35, 3.764333367592714297e+35, 1.311403465938242926e+36, + 4.658135710682813672e+36, 1.687517347470511392e+37, 6.237053685018323490e+37, 2.352571314427744869e+38, + 9.058938240219699936e+38, 3.562249097611136071e+39, 1.430959291578558210e+40, 5.873974584984375049e+40, + 2.464828549811283787e+41, 1.057649203090855628e+42, 4.642475639281078035e+42, 2.085287118272421779e+43, + 9.588439985186632177e+43, 4.514982011246092280e+44, 2.177974048341973204e+45, 1.076720976822900458e+46, + 5.457267432929085589e+46, 2.836869270455781134e+47, 1.513103201392011626e+48, 8.283974667225617075e+48, + 4.657239491995971344e+49, 2.689796370712836937e+50, 1.596597846911970388e+51, 9.744154538256586629e+51, + 6.117238394843313065e+52, 3.952049650585241827e+53, 2.628701592074258213e+54, 1.800990196502679393e+55, + 1.271554462563068383e+56, 9.255880104477760711e+56, 6.949737920133919393e+57, 5.385167200769965621e+58, + 4.308493668102978774e+59, 3.560951557542178371e+60, 3.041888528384649992e+61, 2.687094441930837189e+62, + 2.455920538900000855e+63, 2.323648254168641537e+64, 2.277129741584892331e+65, 2.312633552913224734e+66, + 2.435407592981291129e+67, 2.660910388822465246e+68, 3.018105943423533920e+69, 3.555823489510192503e+70, + 4.354188877793849013e+71, 5.544975795511813315e+72, 7.348276481909886336e+73, 1.013998025722423261e+75, + 1.457911462244607943e+76, 2.185488876819505295e+77, 3.418022153286623008e+78, 5.580843920601835728e+79, + 9.519586502799733908e+80, 1.697573578247197786e+82, 3.166906670990180014e+83, 6.185099106418675430e+84, + 1.265541134386934377e+86, 2.714828965877756899e+87, 6.110386802964494082e+88, 1.444054086171083239e+90, + 3.586083726638388165e+91, 9.365231868063239600e+92, 2.574080116205122449e+94, 7.452134689862302719e+95, + 2.274309903836169819e+97, 7.323011134121164749e+98, 2.489816421737932462e+100, 8.946533386359281588e+101, + 3.400401372391165979e+103, 1.368288186208928217e+105, 5.834277489829591931e+106, 2.638486937672383424e+108, + 1.266728882767139521e+110, 6.462225178314182803e+111, 3.506432320607573604e+113, 2.025608933943268165e+115, + 1.247041677084784707e+117, 8.189865188405279038e+118, 5.743610894406099965e+120, 4.305808934084489763e+122, + 3.454156966079496755e+124, 2.968316601530352737e+126, 2.735456242372183592e+128, 2.706317176690077847e+130, + 2.877679916342060385e+132, 3.292412878268106390e+134, 4.057840961953725969e+136, 5.393783049105737324e+138, + 7.741523901672235406e+140, 1.201209962310668456e+143, 2.017456079556807301e+145, 3.672176623483062526e+147, + 7.253163798058577630e+149, 1.556591535302570570e+152, 3.634399832790394885e+154, }; + +__constant__ double* m_weights_double[8] = { + m_weights_double_1, + m_weights_double_2, + m_weights_double_3, + m_weights_double_4, + m_weights_double_5, + m_weights_double_6, + m_weights_double_7, + m_weights_double_8 +}; +__constant__ boost::math::size_t float_coefficients_size[8] = {4, 4, 8, 16, 32, 65, 129, 259}; + +__constant__ boost::math::size_t double_coefficients_size[8] = {6, 6, 12, 24, 49, 98, 196, 391}; + +template +struct coefficients_selector; +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_float; } + __device__ static const auto weights() { return m_weights_float; } + __device__ static const auto size() { return float_coefficients_size; } +}; + +template<> +struct coefficients_selector +{ + __device__ static const auto abscissas() { return m_abscissas_double; } + __device__ static const auto weights() { return m_weights_double; } + __device__ static const auto size() { return double_coefficients_size; } +}; } // Namespace detail } // Namespace quadrature From 2a759dc25d191e66ee27b3517d9ddbc03062a772 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 15:38:24 -0400 Subject: [PATCH 20/22] Add device specific impl --- .../quadrature/detail/sinh_sinh_detail.hpp | 132 ++++++++++++++++++ include/boost/math/quadrature/sinh_sinh.hpp | 2 +- 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index 95393b2f5..ad577ef9e 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -496,6 +496,7 @@ void sinh_sinh_detail::init(const std::integral_constant&) #include #include +#include #include #include #include @@ -1211,6 +1212,137 @@ struct coefficients_selector __device__ static const auto size() { return double_coefficients_size; } }; +template > +__device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real* L1, boost::math::size_t* levels) +{ + BOOST_MATH_STD_USING + using boost::math::constants::half; + using boost::math::constants::half_pi; + using boost::math::size_t; + + constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate"; + + using K = decltype(f(static_cast(0))); + static_assert(!boost::math::::is_integral::value, + "The return type cannot be integral, it must be either a real or complex floating point type."); + + K y_max = f(boost::math::tools::max_value()); + + if(abs(y_max) > boost::math::tools::epsilon()) + { + return static_cast(policies::raise_domain_error(function, + "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", y_max, Policy())); + } + + K y_min = f(-boost::math::tools::max_value()); + + if(abs(y_min) > boost::math::tools::epsilon()) + { + return static_cast(policies::raise_domain_error(function, + "The function you are trying to integrate does not go to zero at -infinity, and instead evaluates to %1%", y_max, Policy())); + } + + // Get the party started with two estimates of the integral: + const auto m_abscissas = coefficients_selector::abscissas(); + const auto m_weights = coefficients_selector::weights(); + const auto m_size = coefficients_selector::size(); + + K I0 = f(0)*half_pi(); + Real L1_I0 = abs(I0); + for(size_t i = 0; i < m_size[0]; ++i) + { + Real x = m_abscissas[0][i]; + K yp = f(x); + K ym = f(-x); + I0 += (yp + ym)*m_weights[0][i]; + L1_I0 += (abs(yp)+abs(ym))*m_weights[0][i]; + } + + K I1 = I0; + Real L1_I1 = L1_I0; + for (size_t i = 0; i < m_size[1]; ++i) + { + Real x= m_abscissas[1][i]; + K yp = f(x); + K ym = f(-x); + I1 += (yp + ym)*m_weights[1][i]; + L1_I1 += (abs(yp) + abs(ym))*m_weights[1][i]; + } + + I1 *= half(); + L1_I1 *= half(); + Real err = abs(I0 - I1); + + size_t i = 2; + for(; i <= 8U; ++i) + { + I0 = I1; + L1_I0 = L1_I1; + + I1 = half()*I0; + L1_I1 = half()*L1_I0; + Real h = static_cast(1) / static_cast(1 << i); + K sum = 0; + Real absum = 0; + + Real abterm1 = 1; + Real eps = boost::math::tools::epsilon()*L1_I1; + + auto abscissa_row = m_abscissas[i]; + auto weight_row = m_weights[i]; + + for(size_t j = 0; j < m_size[i]; ++j) + { + Real x = abscissa_row[j]; + K yp = f(x); + K ym = f(-x); + sum += (yp + ym)*weight_row[j]; + Real abterm0 = (abs(yp) + abs(ym))*weight_row[j]; + absum += abterm0; + + // We require two consecutive terms to be < eps in case we hit a zero of f. + if (x > static_cast(100) && abterm0 < eps && abterm1 < eps) + { + break; + } + abterm1 = abterm0; + } + + I1 += sum*h; + L1_I1 += absum*h; + err = abs(I0 - I1); + + if (!(boost::math::isfinite)(L1_I1)) + { + constexpr auto err_msg = "The sinh_sinh quadrature evaluated your function at a singular point, leading to the value %1%.\n" + "sinh_sinh quadrature cannot handle singularities in the domain.\n" + "If you are sure your function has no singularities, please submit a bug against boost.math\n"; + return static_cast(policies::raise_evaluation_error(function, err_msg, I1, Policy())); + } + if (err <= tolerance*L1_I1) + { + break; + } + } + + if (error) + { + *error = err; + } + + if (L1) + { + *L1 = L1_I1; + } + + if (levels) + { + *levels = i; + } + + return I1; +} + } // Namespace detail } // Namespace quadrature } // Namespace math diff --git a/include/boost/math/quadrature/sinh_sinh.hpp b/include/boost/math/quadrature/sinh_sinh.hpp index 00a363404..7aabcb437 100644 --- a/include/boost/math/quadrature/sinh_sinh.hpp +++ b/include/boost/math/quadrature/sinh_sinh.hpp @@ -60,7 +60,7 @@ namespace quadrature { template > __device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr) { - return detail::sinh_sinh_integrate(f, tol, error, L1, levels); + return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels); } } // namespace quadrature From ddd448aca9d5b500b80f8e370cdfd30b0157d043 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 16:00:55 -0400 Subject: [PATCH 21/22] Add sinh_sinh CUDA testing --- .../quadrature/detail/sinh_sinh_detail.hpp | 4 +- test/cuda_jamfile | 2 + test/test_sinh_sinh_quad_double.cu | 133 ++++++++++++++++++ test/test_sinh_sinh_quad_float.cu | 133 ++++++++++++++++++ 4 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 test/test_sinh_sinh_quad_double.cu create mode 100644 test/test_sinh_sinh_quad_float.cu diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index ad577ef9e..7f7477a6e 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -1213,7 +1213,7 @@ struct coefficients_selector }; template > -__device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real* L1, boost::math::size_t* levels) +__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { BOOST_MATH_STD_USING using boost::math::constants::half; @@ -1223,7 +1223,7 @@ __device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate"; using K = decltype(f(static_cast(0))); - static_assert(!boost::math::::is_integral::value, + static_assert(!boost::math::is_integral::value, "The return type cannot be integral, it must be either a real or complex floating point type."); K y_max = f(boost::math::tools::max_value()); diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 3f3dcaaa0..0a32ef099 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -12,6 +12,8 @@ project : requirements # Quad run test_exp_sinh_quad_float.cu ; run test_exp_sinh_quad_double.cu ; +run test_sinh_sinh_quad_float.cu ; +run test_sinh_sinh_quad_double.cu ; # Distributions run test_arcsine.cpp ; diff --git a/test/test_sinh_sinh_quad_double.cu b/test/test_sinh_sinh_quad_double.cu new file mode 100644 index 000000000..bf7490fa4 --- /dev/null +++ b/test/test_sinh_sinh_quad_double.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sinh_sinh_quad_float.cu b/test/test_sinh_sinh_quad_float.cu new file mode 100644 index 000000000..b84e316af --- /dev/null +++ b/test/test_sinh_sinh_quad_float.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 0946e38678eaff1cb205dbfc81e23599ef4f8be3 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 10 Sep 2024 16:16:32 -0400 Subject: [PATCH 22/22] Add sinh_sinh NVRTC testing --- test/nvrtc_jamfile | 2 + test/test_sinh_sinh_quad_nvrtc_double.cpp | 206 ++++++++++++++++++++++ test/test_sinh_sinh_quad_nvrtc_float.cpp | 206 ++++++++++++++++++++++ 3 files changed, 414 insertions(+) create mode 100644 test/test_sinh_sinh_quad_nvrtc_double.cpp create mode 100644 test/test_sinh_sinh_quad_nvrtc_float.cpp diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile index 3ef320d97..46dd0e257 100644 --- a/test/nvrtc_jamfile +++ b/test/nvrtc_jamfile @@ -12,6 +12,8 @@ project : requirements # Quad run test_exp_sinh_quad_nvrtc_float.cpp ; run test_exp_sinh_quad_nvrtc_double.cpp ; +run test_sinh_sinh_quad_nvrtc_float.cpp ; +run test_sinh_sinh_quad_nvrtc_double.cpp ; # Distributions run test_arcsine_cdf_nvrtc_double.cpp ; diff --git a/test/test_sinh_sinh_quad_nvrtc_double.cpp b/test/test_sinh_sinh_quad_nvrtc_double.cpp new file mode 100644 index 000000000..5342e9778 --- /dev/null +++ b/test/test_sinh_sinh_quad_nvrtc_double.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_sinh_sinh_quad_nvrtc_float.cpp b/test/test_sinh_sinh_quad_nvrtc_float.cpp new file mode 100644 index 000000000..37a8c1252 --- /dev/null +++ b/test/test_sinh_sinh_quad_nvrtc_float.cpp @@ -0,0 +1,206 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +extern "C" __global__ +void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} +)"; + +__host__ __device__ float_type func(float_type x) +{ + return 1/(1+x*x); +} + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function"); + + int numElements = 50000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for (int i = 0; i < numElements; ++i) + { + auto res = integrator.integrate(func, tol, &error, &L1); + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +}