Skip to content

Commit

Permalink
[SYCL] Switch to using blocking USM free for OpenCL GPU (#4928)
Browse files Browse the repository at this point in the history
Whenever a kernel is enqueued on GPU, the GPU driver records the state
of all USM pointers that might be used in an indirect fashion. Because
of this, these pointers cannot be freed until the execution of the kernel
is finished.
This change addresses this problem for OpenCL by using a blocking version
of free, while Level Zero already handles this by deferring USM release.
The change is temporarily limited to OpenCL GPU until a bug in OpenCL CPU
runtime is resolved.
  • Loading branch information
sergey-semenov authored Dec 10, 2021
1 parent db7ff53 commit c4a7290
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 4 deletions.
4 changes: 3 additions & 1 deletion sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,9 @@ __SYCL_EXPORT pi_result piextUSMSharedAlloc(void **result_ptr,
pi_usm_mem_properties *properties,
size_t size, pi_uint32 alignment);

/// Frees allocated USM memory
/// Indicates that the allocated USM memory is no longer needed on the runtime
/// side. The actual freeing of the memory may be done in a blocking or deferred
/// manner, e.g. to avoid issues with indirect memory access from kernels.
///
/// \param context is the pi_context of the allocation
/// \param ptr is the memory to be freed
Expand Down
52 changes: 49 additions & 3 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
CONSTFIX char clMemFreeName[] = "clMemFreeINTEL";
CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL";
CONSTFIX char clCreateBufferWithPropertiesName[] =
"clCreateBufferWithPropertiesINTEL";
CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL";
Expand Down Expand Up @@ -969,11 +970,56 @@ pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context,
/// \param context is the pi_context of the allocation
/// \param ptr is the memory to be freed
pi_result piextUSMFree(pi_context context, void *ptr) {
// Use a blocking free to avoid issues with indirect access from kernels that
// might be still running.
clMemBlockingFreeINTEL_fn FuncPtr = nullptr;

// We need to use clMemBlockingFreeINTEL here, however, due to a bug in OpenCL
// CPU runtime this call fails with CL_INVALID_EVENT on CPU devices in certain
// cases. As a temporary workaround, this function replicates caching of
// extension function pointers in getExtFuncFromContext, while choosing
// clMemBlockingFreeINTEL for GPU and clMemFreeINTEL for other device types.
// TODO remove this workaround when the new OpenCL CPU runtime version is
// uplifted in CI.
static_assert(
std::is_same<clMemBlockingFreeINTEL_fn, clMemFreeINTEL_fn>::value);
cl_uint deviceCount;
cl_int ret_err =
clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
sizeof(cl_uint), &deviceCount, nullptr);

if (ret_err != CL_SUCCESS || deviceCount < 1) {
return PI_INVALID_CONTEXT;
}

std::vector<cl_device_id> devicesInCtx(deviceCount);
ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
deviceCount * sizeof(cl_device_id),
devicesInCtx.data(), nullptr);

if (ret_err != CL_SUCCESS) {
return PI_INVALID_CONTEXT;
}

bool useBlockingFree = true;
for (const cl_device_id &dev : devicesInCtx) {
cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
ret_err = clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(cl_device_type),
&devType, nullptr);
if (ret_err != CL_SUCCESS) {
return PI_INVALID_DEVICE;
}
useBlockingFree &= devType == CL_DEVICE_TYPE_GPU;
}

clMemFreeINTEL_fn FuncPtr = nullptr;
pi_result RetVal = PI_INVALID_OPERATION;
RetVal = getExtFuncFromContext<clMemFreeName, clMemFreeINTEL_fn>(context,
&FuncPtr);
if (useBlockingFree)
RetVal =
getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
context, &FuncPtr);
else
RetVal = getExtFuncFromContext<clMemFreeName, clMemFreeINTEL_fn>(context,
&FuncPtr);

if (FuncPtr) {
RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr));
Expand Down

0 comments on commit c4a7290

Please sign in to comment.