Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor cleanup rocblas-onemkl #1

Open
wants to merge 5 commits into
base: rocblas_hip_support
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ set(DOMAINS_LIST "")
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUBLAS_BACKEND
OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_NETLIB_BACKEND)
list(APPEND DOMAINS_LIST "blas")
endif()
Expand All @@ -90,7 +91,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND)
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_ROCBLAS_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
set(CMAKE_CXX_COMPILER "dpcpp")
Expand Down Expand Up @@ -151,8 +152,8 @@ if(WIN32 AND ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> -fsycl /nologo <OBJECTS> ${MKL_SYCL_LIB} /link /out:<TARGET> /implib:<TARGET_IMPLIB> /pdb:<TARGET_PDB> /dll /version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR> <LINK_FLAGS> <LINK_LIBRARIES>")
endif()

# Temporary disable sycl 2020 deprecations warnings for cuBLAS and cuSOLVER
if(ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++" AND (ENABLE_CUBLAS_BACKEND OR ENABLE_CUSOLVER_BACKEND))
# Temporary disable sycl 2020 deprecations warnings for cuBLAS, rocBLAS and cuSOLVER
if(ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++" AND (ENABLE_CUBLAS_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSYCL2020_DISABLE_DEPRECATION_WARNINGS")
endif()

Expand Down
8 changes: 7 additions & 1 deletion cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,13 @@ if(is_dpcpp)
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda)
elif(ENABLE_ROCBLAS_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
-fsycl-targets=amdgcn-amd-amdhsa)
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND)
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"
Expand All @@ -57,3 +62,4 @@ if(is_dpcpp)
endif()

endif()

67 changes: 67 additions & 0 deletions cmake/FindrocBLAS.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#==========================================================================
# Copyright (C) Codeplay Software Limited
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# For your convenience, a copy of the License has been included in this
# repository.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#=========================================================================

if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()

set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
list(APPEND CMAKE_PREFIX_PATH
"${HIP_PATH}/lib/cmake"
"${HIP_PATH}/../lib/cmake"
)

find_package(HIP QUIET)
find_package(rocblas REQUIRED)
get_filename_component(SYCL_BINARY_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
# the OpenCL include file from hip is opencl 1.1 and it is not compatible with DPC++
# the OpenCL include headers 1.2 onward is required. This is used to bypass NVIDIA OpenCL headers
find_path(OPENCL_INCLUDE_DIR CL/cl.h OpenCL/cl.h
HINTS
${OPENCL_INCLUDE_DIR}
${SYCL_BINARY_DIR}/../include/sycl/
)
# this is work around to avoid duplication half creation in both hip and SYCL
add_compile_definitions(HIP_NO_HALF)

find_package(Threads REQUIRED)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(rocBLAS
REQUIRED_VARS
HIP_INCLUDE_DIRS
HIP_LIBRARIES
ROCBLAS_INCLUDE_DIR
ROCBLAS_LIBRARIES
OPENCL_INCLUDE_DIR
)
if(NOT TARGET ONEMKL::rocBLAS::rocBLAS)
add_library(ONEMKL::rocBLAS::rocBLAS SHARED IMPORTED)
set_target_properties(ONEMKL::rocBLAS::rocBLAS PROPERTIES
IMPORTED_LOCATION "${HIP_PATH}/../rocblas/lib/librocblas.so"
INTERFACE_INCLUDE_DIRECTORIES "${OPENCL_INCLUDE_DIR};${ROCBLAS_INCLUDE_DIR};${HIP_INCLUDE_DIRS};"
INTERFACE_LINK_LIBRARIES "Threads::Threads;${ROCBLAS_LIBRARIES};"
)

endif()

2 changes: 1 addition & 1 deletion include/oneapi/mkl/detail/get_device_id.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

#define INTEL_ID 32902
#define NVIDIA_ID 4318
#define AMD_ID 1022
#define AMD_ID 4098

namespace oneapi {
namespace mkl {
Expand Down
1 change: 0 additions & 1 deletion src/blas/backends/cublas/cublas_scope_handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
*
**************************************************************************/
#include "cublas_scope_handle.hpp"
#include <CL/sycl/detail/common.hpp>

namespace oneapi {
namespace mkl {
Expand Down
19 changes: 12 additions & 7 deletions src/blas/backends/cublas/cublas_scope_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@
#define _CUBLAS_SCOPED_HANDLE_HPP_
#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
#include <sycl/backend/cuda.hpp>
#include <sycl/context.hpp>
#include <sycl/detail/pi.hpp>
#include <sycl/detail/common.hpp>
#else
#include <CL/sycl.hpp>
#endif
#include <CL/sycl/backend/cuda.hpp>
#include <CL/sycl/context.hpp>
#include <CL/sycl/detail/pi.hpp>
#include <CL/sycl/detail/common.hpp>
#endif
#include <atomic>
#include <memory>
#include <thread>
Expand Down Expand Up @@ -78,12 +83,12 @@ class CublasScopedContextHandler {

~CublasScopedContextHandler() noexcept(false);
/**
* @brief get_handle: creates the handle by implicitly impose the advice
* given by nvidia for creating a cublas_handle. (e.g. one cuStream per device
* per thread).
* @param queue sycl queue.
* @return cublasHandle_t a handle to construct cublas routines
*/
* @brief get_handle: creates the handle by implicitly impose the advice
* given by nvidia for creating a cublas_handle. (e.g. one cuStream per device
* per thread).
* @param queue sycl queue.
* @return cublasHandle_t a handle to construct cublas routines
*/
cublasHandle_t get_handle(const sycl::queue &queue);
// This is a work-around function for reinterpret_casting the memory. This
// will be fixed when SYCL-2020 has been implemented for Pi backend.
Expand Down
4 changes: 2 additions & 2 deletions src/blas/backends/cublas/cublas_task.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _MKL_BLAS_CUBLAS_TASK_HPP_
#define _MKL_BLAS_CUBLAS_TASK_HPP_
#include <cublas_v2.h>
#include <cuda.h>
//#include <cuda.h>
#include <complex>
#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
Expand All @@ -11,7 +11,7 @@
#include "oneapi/mkl/types.hpp"
#ifndef __HIPSYCL__
#include "cublas_scope_handle.hpp"
#include <CL/sycl/detail/pi.hpp>
#include <sycl/detail/pi.hpp>
#else
#include "cublas_scope_handle_hipsycl.hpp"
namespace sycl {
Expand Down
6 changes: 4 additions & 2 deletions src/blas/backends/rocblas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@

set(LIB_NAME onemkl_blas_rocblas)
set(LIB_OBJ ${LIB_NAME}_obj)
find_package(rocblas REQUIRED)
find_package(rocBLAS REQUIRED)
set(SOURCES rocblas_level1.cpp
rocblas_level2.cpp
rocblas_level3.cpp
rocblas_batch.cpp
rocblas_extensions.cpp
$<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:rocblas_scope_handle.cpp >
$<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},hipsycl>:rocblas_scope_handle_hipsycl.cpp >
$<$<BOOL:${BUILD_SHARED_LIBS}>: rocblas_wrappers.cpp>)
add_library(${LIB_NAME})
Expand All @@ -38,7 +39,7 @@ target_include_directories(${LIB_OBJ}
${PROJECT_BINARY_DIR}/bin
)
target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL roc::rocblas)
target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ONEMKL::rocBLAS::rocBLAS)
target_compile_features(${LIB_OBJ} PUBLIC cxx_std_17)
set_target_properties(${LIB_OBJ} PROPERTIES
POSITION_INDEPENDENT_CODE ON)
Expand All @@ -62,3 +63,4 @@ install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
)

62 changes: 62 additions & 0 deletions src/blas/backends/rocblas/rocblas_handle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/***************************************************************************
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/
#ifndef RBLAS_HANDLE_HPP
#define RBLAS_HANDLE_HPP
#include <atomic>
#include <unordered_map>
#include "rocblas_helper.hpp"

namespace oneapi {
namespace mkl {
namespace blas {
namespace rocblas {

template <typename T>
struct rblas_handle {
using handle_container_t = std::unordered_map<T, std::atomic<rocblas_handle> *>;
handle_container_t rblas_handle_mapper_{};
~rblas_handle() noexcept(false) {
for (auto &handle_pair : rblas_handle_mapper_) {
rocblas_status err;
if (handle_pair.second != nullptr) {
auto handle = handle_pair.second->exchange(nullptr);
if (handle != nullptr) {
ROCBLAS_ERROR_FUNC(rocblas_destroy_handle, err, handle);
handle = nullptr;
}
else {
// if the handle is nullptr it means the handle was already
// destroyed by the ContextCallback and we're free to delete the
// atomic object.
delete handle_pair.second;
}

handle_pair.second = nullptr;
}
}
rblas_handle_mapper_.clear();
}
};

} // namespace rocblas
} // namespace blas
} // namespace mkl
} // namespace oneapi

#endif // RBLAS_HANDLE_HPP
1 change: 1 addition & 0 deletions src/blas/backends/rocblas/rocblas_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <rocblas.h>
#include <complex>
#include "oneapi/mkl/types.hpp"
#include <hip/hip_runtime.h>

namespace oneapi {
namespace mkl {
Expand Down
9 changes: 9 additions & 0 deletions src/blas/backends/rocblas/rocblas_level1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,9 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6
});
});
done.wait();
#if(defined(ENABLE_ROCBLAS_BACKEND) && !defined(__HIPSYCL__))
queue.wait();
#endif
result[0] = result[0] + sb;
return done;
}
Expand Down Expand Up @@ -1028,6 +1031,9 @@ inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, c
});
});
done.wait();
#if(defined(ENABLE_ROCBLAS_BACKEND) && !defined(__HIPSYCL__))
queue.wait();
#endif
result[0] = std::max((int64_t)(*int_res_p - 1), int64_t{ 0 });
return done;
}
Expand Down Expand Up @@ -1109,6 +1115,9 @@ inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, c
});
});
done.wait();
#if(defined(ENABLE_ROCBLAS_BACKEND) && !defined(__HIPSYCL__))
queue.wait();
#endif
result[0] = std::max((int64_t)(*int_res_p - 1), int64_t{ 0 });
return done;
}
Expand Down
Loading