diff --git a/core/src/Cuda/Kokkos_CudaSpace.cpp b/core/src/Cuda/Kokkos_CudaSpace.cpp index dc0a203d65c..82c41fea0ea 100644 --- a/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -193,15 +193,16 @@ void *impl_allocate_common(const int device_id, // The idea is to use host allocator and then adivce to keep it in HBM on // device, but that requires CUDA 12.2 static_assert(CUDART_VERSION >= 12020); - ptr = malloc(arg_alloc_size); - // TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr? - // One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would - // work but it doesn't. I.e. the order of members doesn't seem to be defined. - cudaMemLocation loc; - loc.id = device_id; - loc.type = cudaMemLocationTypeDevice; - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2( - ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc)); + if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr + ptr = malloc(arg_alloc_size); + // TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr? One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would + // work but it doesn't. I.e. the order of members doesn't seem to be defined. + cudaMemLocation loc; + loc.id = device_id; + loc.type = cudaMemLocationTypeDevice; + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2( + ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc)); + } #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) { error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);