diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp index ad577ef9e..7f7477a6e 100644 --- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp +++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp @@ -1213,7 +1213,7 @@ struct coefficients_selector }; template > -__device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real* L1, boost::math::size_t* levels) +__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels) { BOOST_MATH_STD_USING using boost::math::constants::half; @@ -1223,7 +1223,7 @@ __device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate"; using K = decltype(f(static_cast(0))); - static_assert(!boost::math::::is_integral::value, + static_assert(!boost::math::is_integral::value, "The return type cannot be integral, it must be either a real or complex floating point type."); K y_max = f(boost::math::tools::max_value()); diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 3f3dcaaa0..0a32ef099 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -12,6 +12,8 @@ project : requirements # Quad run test_exp_sinh_quad_float.cu ; run test_exp_sinh_quad_double.cu ; +run test_sinh_sinh_quad_float.cu ; +run test_sinh_sinh_quad_double.cu ; # Distributions run test_arcsine.cpp ; diff --git a/test/test_sinh_sinh_quad_double.cu b/test/test_sinh_sinh_quad_double.cu new file mode 100644 index 000000000..bf7490fa4 --- /dev/null +++ b/test/test_sinh_sinh_quad_double.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sinh_sinh_quad_float.cu b/test/test_sinh_sinh_quad_float.cu new file mode 100644 index 000000000..b84e316af --- /dev/null +++ b/test/test_sinh_sinh_quad_float.cu @@ -0,0 +1,133 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +__host__ __device__ float_type func(float_type x) +{ + BOOST_MATH_STD_USING + return 1/(1+x*x); +} + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::size_t levels; + + if (i < numElements) + { + out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = M_PI * (static_cast(i) / numElements); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type tol = boost::math::tools::root_epsilon(); + float_type error; + float_type L1; + boost::math::quadrature::sinh_sinh integrator; + for(int i = 0; i < numElements; ++i) + { + results.push_back(integrator.integrate(func, tol, &error, &L1)); + } + double t = w.elapsed(); + // check the results + int failed_count = 0; + for(int i = 0; i < numElements; ++i) + { + const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]); + if (eps > 10) + { + std::cerr << std::setprecision(std::numeric_limits::digits10) + << "Result verification failed at element " << i << "!\n" + << "Device: " << output_vector[i] + << "\n Host: " << results[i] + << "\n Eps: " << eps << "\n"; + failed_count++; + } + if (failed_count > 100) + { + break; + } + } + + if (failed_count != 0) + { + std::cout << "Test FAILED" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}