From 51cd0638c33ecb7e27aad4dd275d7ef67a03b4f5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 6 Aug 2024 11:42:44 -0400 Subject: [PATCH] Add additional checks --- test/test_gamma_nvrtc.cpp | 78 ++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/test/test_gamma_nvrtc.cpp b/test/test_gamma_nvrtc.cpp index 05976df0c..6b1afef7d 100644 --- a/test/test_gamma_nvrtc.cpp +++ b/test/test_gamma_nvrtc.cpp @@ -14,23 +14,16 @@ #include #include #include - -// For the CUDA runtime routines (prefixed with "cuda_") #include #include #include -/** - * CUDA Kernel Device code - * - */ const char* cuda_kernel = R"( #include extern "C" __global__ void test_gamma_kernel(const float *in1, const float*, float *out, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; - assert(i < 5000); if (i < numElements) { out[i] = boost::math::tgamma(in1[i]); @@ -38,6 +31,26 @@ void test_gamma_kernel(const float *in1, const float*, float *out, int numElemen } )"; +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + void checkNVRTCError(nvrtcResult result, const char* msg) { if (result != NVRTC_SUCCESS) @@ -51,17 +64,19 @@ int main() { try { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + nvrtcProgram prog; nvrtcResult res; - // Create NVRTC program - res = nvrtcCreateProgram(&prog, - cuda_kernel, - "test_gamma_kernel.cu", - 0, - nullptr, - nullptr); - + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr); checkNVRTCError(res, "Failed to create NVRTC program"); nvrtcAddNameExpression(prog, "test_gamma_kernel"); @@ -94,10 +109,9 @@ int main() // Load PTX into CUDA module CUmodule module; CUfunction kernel; - cuModuleLoadDataEx(&module, ptx, 0, 0, 0); - cuModuleGetFunction(&kernel, module, "test_gamma_kernel"); - - // Input parameters + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function"); + int numElements = 5000; float *h_in1, *h_in2, *h_out; float *d_in1, *d_in2, *d_out; @@ -116,25 +130,21 @@ int main() h_in2[i] = static_cast(dist(rng)); } - // Allocate memory on the device - cudaMalloc(&d_in1, numElements * sizeof(float)); - cudaMalloc(&d_in2, numElements * sizeof(float)); - cudaMalloc(&d_out, numElements * sizeof(float)); + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float)), "Failed to allocate device memory for d_out"); - // Copy input data to device - cudaMemcpy(d_in1, h_in1, numElements * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(d_in2, h_in2, numElements * sizeof(float), cudaMemcpyHostToDevice); + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); - // Launch the kernel int blockSize = 256; int numBlocks = (numElements + blockSize - 1) / blockSize; void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; - cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0); + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); - // Copy result back to host - cudaMemcpy(h_out, d_out, numElements * sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); - // Verify results + // Verify Result for (int i = 0; i < numElements; ++i) { auto res = boost::math::tgamma(h_in1[i]); @@ -150,7 +160,6 @@ int main() } } - // Clean up cudaFree(d_in1); cudaFree(d_in2); cudaFree(d_out); @@ -161,11 +170,14 @@ int main() nvrtcDestroyProgram(&prog); delete[] ptx; + cuCtxDestroy(context); + std::cout << "Kernel executed successfully." << std::endl; return 0; } catch(const std::exception& e) { std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; } -} +} \ No newline at end of file