diff --git a/core/src/Cuda/Kokkos_CudaSpace.cpp b/core/src/Cuda/Kokkos_CudaSpace.cpp index 47227977729..b10600511f6 100644 --- a/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -195,8 +195,8 @@ void *impl_allocate_common(const int device_id, static_assert(CUDART_VERSION >= 12020); if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr ptr = malloc(arg_alloc_size); - // TODO: what error do we want to throw here if it fails, i.e. ptr == - // nullptr? One would thing cudaMemLocation{device_id, + if(ptr != nullptr) { + // One would think cudaMemLocation{device_id, // cudaMemLocationTypeDeivce} would work but it doesn't. I.e. the order of // members doesn't seem to be defined. cudaMemLocation loc; @@ -204,6 +204,11 @@ void *impl_allocate_common(const int device_id, loc.type = cudaMemLocationTypeDevice; KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2( ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc)); + } else { + // I think this is the most logical error to return unless we + // want a different mechanism for this code path + error_code = cudaErrorMemoryAllocation; + } } #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) { @@ -373,6 +378,8 @@ void CudaSpace::impl_deallocate( KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); #elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + Impl::cuda_device_synchronize( + "Kokkos::Cuda: backend fence before unified memory free"); free(arg_alloc_ptr); #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) {