Merge pull request #25 from cppalliance/GPU_batch_11.1_test

Gpu batch 11.1 test
cppalliance · Sep 12, 2024 · 45391a2 · 45391a2
2 parents 0a5efdd + 0946e38
commit 45391a2
Show file tree

Hide file tree

Showing 15 changed files with 3,831 additions and 7 deletions.
diff --git a/doc/quadrature/double_exponential.qbk b/doc/quadrature/double_exponential.qbk
@@ -1,5 +1,6 @@
 [/
 Copyright (c) 2017 Nick Thompson
+Copyright (c) 2024 Matt Borland
 Use, modification and distribution are subject to the
 Boost Software License, Version 1.0. (See accompanying file
 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t
 
 [endsect] [/section:de_caveats Caveats]
 
+[section:gpu_usage GPU Usage]
+
+``
+    #include <boost/math/quadrature/exp_sinh.hpp>
+
+    namespace boost{ namespace math{ namespace quadrature {
+
+    template <class F, class Real, class Policy = policies::policy<> >
+    __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+
+    template <class F, class Real, class Policy = policies::policy<> >
+    __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+
+}}}
+``
+
+Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms.
+The major difference is outlined in the above function signatures.
+When used on device these are free standing functions instead of using OOP like on the host.
+The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed.
+An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions.
+
+[endsect] [/section:gpu_usage Usage]
+
 [section:de_refes References]
 
 * Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741.

diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp
diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp
diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp
@@ -15,11 +15,15 @@
 #ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP
 #define BOOST_MATH_QUADRATURE_EXP_SINH_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
 #include <memory>
 #include <string>
-#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
 
 namespace boost{ namespace math{ namespace quadrature {
 
@@ -98,4 +102,79 @@ auto exp_sinh<Real, Policy>::integrate(const F& f, Real tolerance, Real* error,
 
 
 }}}
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
+
+namespace boost { 
+namespace math { 
+namespace quadrature {
+
+template <class F, class Real, class Policy = policies::policy<> >
+__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    BOOST_MATH_STD_USING
+
+    using K = decltype(f(a));
+    static_assert(!boost::math::is_integral<K>::value,
+                  "The return type cannot be integral, it must be either a real or complex floating point type.");
+
+    constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
+
+    // Neither limit may be a NaN:
+    if((boost::math::isnan)(a) || (boost::math::isnan)(b))
+    {
+       return static_cast<K>(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy()));
+    }
+    // Right limit is infinite:
+    if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value<Real>()))
+    {
+        // If a = 0, don't use an additional level of indirection:
+        if (a == static_cast<Real>(0))
+        {
+            return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
+        }
+        const auto u = [&](Real t)->K { return f(t + a); };
+        return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
+    }
+
+    if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value<Real>())
+    {
+        const auto u = [&](Real t)->K { return f(b-t);};
+        return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
+    }
+
+    // Infinite limits:
+    if ((a <= -boost::math::tools::max_value<Real>()) && (b >= boost::math::tools::max_value<Real>()))
+    {
+        return static_cast<K>(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy()));
+    }
+    // If we get to here then both ends must necessarily be finite:
+    return static_cast<K>(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy()));
+}
+
+template <class F, class Real, class Policy = policies::policy<> >
+__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    BOOST_MATH_STD_USING
+    constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
+    if (abs(tolerance) > 1) {
+        return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy());
+    }
+    return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
+}
+
+} // namespace quadrature
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP
diff --git a/include/boost/math/quadrature/sinh_sinh.hpp b/include/boost/math/quadrature/sinh_sinh.hpp
@@ -1,4 +1,5 @@
 // Copyright Nick Thompson, 2017
+// Copyright Matt Borland, 2024
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -15,10 +16,17 @@
 #ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP
 #define BOOST_MATH_QUADRATURE_SINH_SINH_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
 #include <memory>
-#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
 
 namespace boost{ namespace math{ namespace quadrature {
 
@@ -40,4 +48,25 @@ class sinh_sinh
 };
 
 }}}
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+namespace boost {
+namespace math {
+namespace quadrature {
+
+template <class F, class Real, class Policy = boost::math::policies::policy<> >
+__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon<Real>(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr)
+{
+    return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels);
+}
+
+} // namespace quadrature
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
@@ -9,6 +9,12 @@ project : requirements
     [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
     ;
 
+# Quad
+run test_exp_sinh_quad_float.cu ;
+run test_exp_sinh_quad_double.cu ;
+run test_sinh_sinh_quad_float.cu ;
+run test_sinh_sinh_quad_double.cu ;
+
 # Distributions
 run test_arcsine.cpp ;
 run test_arcsine_cdf_double.cu ;

diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile
@@ -9,6 +9,12 @@ project : requirements
     [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
     ;
 
+# Quad
+run test_exp_sinh_quad_nvrtc_float.cpp ;
+run test_exp_sinh_quad_nvrtc_double.cpp ;
+run test_sinh_sinh_quad_nvrtc_float.cpp ;
+run test_sinh_sinh_quad_nvrtc_double.cpp ;
+
 # Distributions
 run test_arcsine_cdf_nvrtc_double.cpp ;
 run test_arcsine_cdf_nvrtc_float.cpp ;

diff --git a/test/test_exp_sinh_quad_double.cu b/test/test_exp_sinh_quad_double.cu
@@ -0,0 +1,133 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/quadrature/exp_sinh.hpp>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/precision.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+__host__ __device__ float_type func(float_type x)
+{
+    BOOST_MATH_STD_USING
+    return 1/(1+x*x);
+}
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::quadrature::exp_sinh<float_type> integrator;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(integrator.integrate(func, tol, &error, &L1));
+    }
+    double t = w.elapsed();
+    // check the results
+    int failed_count = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+        if (eps > 10)
+        {
+            std::cerr   << std::setprecision(std::numeric_limits<float_type>::digits10)
+                        << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i]
+                        << "\n  Host: " << results[i]
+                        << "\n   Eps: " << eps << "\n";
+            failed_count++;
+        }
+        if (failed_count > 100)
+        {
+            break;
+        }
+    }
+
+    if (failed_count != 0)
+    {
+        std::cout << "Test FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}