Merge pull request #1200 from boostorg/GPU_batch_12

GPU Batch 12
boostorg · Sep 17, 2024 · c3afa49 · c3afa49
2 parents ff72eaa + dbb547b
commit c3afa49
Show file tree

Hide file tree

Showing 73 changed files with 6,441 additions and 336 deletions.
diff --git a/doc/overview/gpu.qbk b/doc/overview/gpu.qbk
@@ -3,8 +3,9 @@
 [h4 GPU Support]
 
 Selected functions, distributions, tools, etc. support running on both host and devices.
-These functions will have the annotation `BOOST_MATH_GPU_ENABLED` next to their individual documentation.
-We test using CUDA (both NVCC and NVRTC) as well as SYCL to provide a wide range of support.
+These functions will have the annotation `BOOST_MATH_GPU_ENABLED` or `BOOST_MATH_CUDA_ENABLED` next to their individual documentation.
+Functions marked with `BOOST_MATH_GPU_ENABLED` are tested using CUDA (both NVCC and NVRTC) as well as SYCL to provide a wide range of support.
+Functions marked with `BOOST_MATH_CUDA_ENABLED` are few, but due to its restrictions SYCL is unsupported.
 
 [h4 Policies]
 

diff --git a/doc/sf/ellint_carlson.qbk b/doc/sf/ellint_carlson.qbk
@@ -17,10 +17,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -32,10 +32,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -47,10 +47,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3, class T4>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
 
   template <class T1, class T2, class T3, class T4, class ``__Policy``>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
 
   }} // namespaces
 
@@ -62,10 +62,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_rc(T1 x, T2 y)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y)
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
 
   }} // namespaces
 
@@ -76,10 +76,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -98,10 +98,10 @@ when the arguments are of different types: otherwise the return is the same type
 as the arguments.
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
 
 Returns Carlson's Elliptic Integral ['R[sub F]]:
 
@@ -113,10 +113,10 @@ one may be zero.  Otherwise returns the result of __domain_error.
 [optional_policy]
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
 
 Returns Carlson's elliptic integral R[sub D]:
 
@@ -128,10 +128,10 @@ zero, and that z >= 0.  Otherwise returns the result of __domain_error.
 [optional_policy]
 
   template <class T1, class T2, class T3, class T4>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
 
   template <class T1, class T2, class T3, class T4, class ``__Policy``>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
 
 Returns Carlson's elliptic integral R[sub J]:
 
@@ -149,10 +149,10 @@ using the relation:
 [equation ellint17]
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_rc(T1 x, T2 y)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y)
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
 
 Returns Carlson's elliptic integral R[sub C]:
 
@@ -170,10 +170,10 @@ using the relation:
 [equation ellint18]
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
 
 Returns Carlson's elliptic integral ['R[sub G]:]
 

diff --git a/doc/sf/ellint_legendre.qbk b/doc/sf/ellint_legendre.qbk
@@ -17,16 +17,16 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_1(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T>
-  ``__sf_result`` ellint_1(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k);
 
   template <class T, class ``__Policy``>
-  ``__sf_result`` ellint_1(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&);
 
   }} // namespaces
 
@@ -42,10 +42,10 @@ when T1 and T2 are different types: when they are the same type then the result
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_1(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
 
 Returns the incomplete elliptic integral of the first kind ['F([phi], k)]:
 
@@ -56,10 +56,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma
 [optional_policy]
 
   template <class T>
-  ``__sf_result`` ellint_1(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k);
 
   template <class T>
-  ``__sf_result`` ellint_1(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&);
 
 Returns the complete elliptic integral of the first kind ['K(k)]:
 
@@ -123,16 +123,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_2(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T>
-  ``__sf_result`` ellint_2(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k);
 
   template <class T, class ``__Policy``>
-  ``__sf_result`` ellint_2(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&);
 
   }} // namespaces
 
@@ -148,10 +148,10 @@ when T1 and T2 are different types: when they are the same type then the result
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_2(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
 
 Returns the incomplete elliptic integral of the second kind ['E([phi], k)]:
 
@@ -162,10 +162,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma
 [optional_policy]
 
   template <class T>
-  ``__sf_result`` ellint_2(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k);
 
   template <class T>
-  ``__sf_result`` ellint_2(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&);
 
 Returns the complete elliptic integral of the second kind ['E(k)]:
 
@@ -230,16 +230,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_3(T1 k, T2 n);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
 
   }} // namespaces
 
@@ -255,10 +255,10 @@ when the arguments are of different types: when they are the same type then the
 is the same type as the arguments.
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
 
 Returns the incomplete elliptic integral of the third kind ['[Pi](n, [phi], k)]:
 
@@ -271,10 +271,10 @@ would be complex).
 [optional_policy]
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_3(T1 k, T2 n);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
 
 Returns the complete elliptic integral of the first kind ['[Pi](n, k)]:
 
@@ -355,16 +355,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_d(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T1>
-  ``__sf_result`` ellint_d(T1 k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k);
 
   template <class T1, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
 
   }} // namespaces
 
@@ -378,10 +378,10 @@ when the arguments are of different types: when they are the same type then the
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_d(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&);
 
 Returns the incomplete elliptic integral:
 
@@ -394,10 +394,10 @@ would be complex).
 [optional_policy]
 
   template <class T1>
-  ``__sf_result`` ellint_d(T1 k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k);
 
   template <class T1, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
 
 Returns the complete elliptic integral ['D(k) = D([pi]/2, k)]
 
@@ -463,10 +463,10 @@ using the relation:
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` jacobi_zeta(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&);
 
   }} // namespaces
 
@@ -543,10 +543,10 @@ is [@../../example/jacobi_zeta_example.cpp jacobi_zeta_example.cpp].
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` heuman_lambda(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&);
 
   }} // namespaces