Skip to content

Commit

Permalink
CUDA Unified Memory: remove EMULATE option
Browse files Browse the repository at this point in the history
Since we already use cudaMallocManaged now for the non-emulate path,
it will just work anyway.
  • Loading branch information
Christian Robert Trott committed Aug 21, 2024
1 parent 0e2d3a3 commit 62527f6
Show file tree
Hide file tree
Showing 6 changed files with 0 additions and 27 deletions.
1 change: 0 additions & 1 deletion cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
Expand Down
1 change: 0 additions & 1 deletion cmake/kokkos_enable_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
# resolved but we keep the option around a bit longer to be safe.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_EMULATE_UNIFIED_MEMORY OFF "Whether to emulate unified memory architectures for CUDA in non-Grace Hopper systems")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
Expand Down
6 changes: 0 additions & 6 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,6 @@ void *impl_allocate_common(const int device_id,
cudaError_t error_code = cudaSuccess;
#ifndef CUDART_VERSION
#error CUDART_VERSION undefined!
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
// This is intended to simulate Grace-Hopper-like behavior
error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal);
if (error_code == cudaSuccess) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
// This is intended for Grace-Hopper (and future unified memory architectures)
// The idea is to use host allocator and then advise to keep it in HBM on the
Expand Down
4 changes: 0 additions & 4 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,10 +719,6 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
os << " KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY: ";
os << "yes\n";
#endif
#ifdef KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY
os << " KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY: ";
os << "yes\n";
#endif

os << "\nCuda Runtime Configuration:\n";

Expand Down
6 changes: 0 additions & 6 deletions core/src/Cuda/Kokkos_Cuda_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,7 @@ class CudaInternal {
template <bool setCudaDevice = true>
cudaError_t cuda_malloc_wrapper(void** devPtr, size_t size) const {
if constexpr (setCudaDevice) set_cuda_device();
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
return cudaMalloc(devPtr, size);
#else
auto ptr = cudaMallocManaged(devPtr, size, cudaMemAttachGlobal);
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize());
return ptr;
#endif
}

template <bool setCudaDevice = true>
Expand Down
9 changes: 0 additions & 9 deletions core/src/Kokkos_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,15 +550,6 @@ static constexpr bool kokkos_omp_on_host() { return false; }
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#endif

#if defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
#define KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#endif

// TODO: enable the following when we are sure it is the right thing to do
// #if defined(KOKKOS_ARCH_ARMV9_GRACE) && defined(KOKKOS_ARCH_HOPPER90)
// #define KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
// #endif

#define KOKKOS_INVALID_INDEX (~std::size_t(0))

#define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX
Expand Down

0 comments on commit 62527f6

Please sign in to comment.