Skip to content

Commit

Permalink
Add sinh_sinh CUDA testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mborland committed Sep 10, 2024
1 parent 2a759dc commit ddd448a
Show file tree
Hide file tree
Showing 4 changed files with 270 additions and 2 deletions.
4 changes: 2 additions & 2 deletions include/boost/math/quadrature/detail/sinh_sinh_detail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ struct coefficients_selector<double>
};

template <class F, class Real, class Policy = boost::math::policies::policy<> >
__device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real* L1, boost::math::size_t* levels)
__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING
using boost::math::constants::half;
Expand All @@ -1223,7 +1223,7 @@ __device__ auto sinh_sinh_integrate_impl(const F& f, Real tol, Real* error, Real
constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate";

using K = decltype(f(static_cast<Real>(0)));
static_assert(!boost::math::::is_integral<K>::value,
static_assert(!boost::math::is_integral<K>::value,
"The return type cannot be integral, it must be either a real or complex floating point type.");

K y_max = f(boost::math::tools::max_value<Real>());
Expand Down
2 changes: 2 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ project : requirements
# Quad
run test_exp_sinh_quad_float.cu ;
run test_exp_sinh_quad_double.cu ;
run test_sinh_sinh_quad_float.cu ;
run test_sinh_sinh_quad_double.cu ;

# Distributions
run test_arcsine.cpp ;
Expand Down
133 changes: 133 additions & 0 deletions test/test_sinh_sinh_quad_double.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef double float_type;

__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;

if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}

/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}

if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
133 changes: 133 additions & 0 deletions test/test_sinh_sinh_quad_float.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/sinh_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef float float_type;

__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;

if (i < numElements)
{
out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
}
}

/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::sinh_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}

if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}

0 comments on commit ddd448a

Please sign in to comment.