Skip to content

Commit

Permalink
[SYCL] Clear event_impl dependencies with graph cleanup (#4793)
Browse files Browse the repository at this point in the history
When event_impl's d-tor is called its dependencies are starting to recursive releasing. It leads to stack overflow.
Clearing event_impl dependencies in graph cleanup helps to eliminate this problem.
getWaitList was moved to event_impl class so the work with dependencies could be wrapped to mutex.
Test : intel/llvm-test-suite#574
Signed-off-by: mdimakov <[email protected]>
  • Loading branch information
maximdimakov authored Nov 19, 2021
1 parent b5b6673 commit 5bb3ab9
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 37 deletions.
43 changes: 34 additions & 9 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,18 +210,24 @@ void event_impl::wait(

void event_impl::wait_and_throw(
std::shared_ptr<cl::sycl::detail::event_impl> Self) {
Command *Cmd = static_cast<Command *>(Self->getCommand());
QueueImplPtr submittedQueue = nullptr;
if (Cmd)
submittedQueue = Cmd->getSubmittedQueue();
Scheduler &Sched = Scheduler::getInstance();

QueueImplPtr submittedQueue = nullptr;
{
Scheduler::ReadLockT Lock(Sched.MGraphLock);
Command *Cmd = static_cast<Command *>(Self->getCommand());
if (Cmd)
submittedQueue = Cmd->getSubmittedQueue();
}
wait(Self);

for (auto &EventImpl :
detail::Scheduler::getInstance().getWaitList(std::move(Self))) {
Command *Cmd = (Command *)EventImpl->getCommand();
if (Cmd)
Cmd->getSubmittedQueue()->throw_asynchronous();
{
Scheduler::ReadLockT Lock(Sched.MGraphLock);
for (auto &EventImpl : getWaitList()) {
Command *Cmd = (Command *)EventImpl->getCommand();
if (Cmd)
Cmd->getSubmittedQueue()->throw_asynchronous();
}
}
if (submittedQueue)
submittedQueue->throw_asynchronous();
Expand Down Expand Up @@ -325,6 +331,25 @@ pi_native_handle event_impl::getNative() const {
return Handle;
}

std::vector<EventImplPtr> event_impl::getWaitList() {
std::lock_guard<std::mutex> Lock(MMutex);

std::vector<EventImplPtr> Result;
Result.reserve(MPreparedDepsEvents.size() + MPreparedHostDepsEvents.size());
Result.insert(Result.end(), MPreparedDepsEvents.begin(),
MPreparedDepsEvents.end());
Result.insert(Result.end(), MPreparedHostDepsEvents.begin(),
MPreparedHostDepsEvents.end());

return Result;
}

void event_impl::cleanupDependencyEvents() {
std::lock_guard<std::mutex> Lock(MMutex);
MPreparedDepsEvents.clear();
MPreparedHostDepsEvents.clear();
}

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
16 changes: 14 additions & 2 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class context_impl;
using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
class queue_impl;
using QueueImplPtr = std::shared_ptr<cl::sycl::detail::queue_impl>;
class event_impl;
using EventImplPtr = std::shared_ptr<cl::sycl::detail::event_impl>;

class event_impl {
public:
Expand Down Expand Up @@ -175,6 +177,14 @@ class event_impl {
return MPreparedHostDepsEvents;
}

/// Returns vector of event_impl that this event_impl depends on.
///
/// @return a vector of "immediate" dependencies for this event_impl.
std::vector<EventImplPtr> getWaitList();

/// Cleans dependencies of this event_impl
void cleanupDependencyEvents();

private:
// When instrumentation is enabled emits trace event for event wait begin and
// returns the telemetry event generated for the wait
Expand All @@ -192,15 +202,17 @@ class event_impl {
void *MCommand = nullptr;

/// Dependency events prepared for waiting by backend.
std::vector<std::shared_ptr<event_impl>> MPreparedDepsEvents;
std::vector<std::shared_ptr<event_impl>> MPreparedHostDepsEvents;
std::vector<EventImplPtr> MPreparedDepsEvents;
std::vector<EventImplPtr> MPreparedHostDepsEvents;

enum HostEventState : int { HES_NotComplete = 0, HES_Complete };

// State of host event. Employed only for host events and event with no
// backend's representation (e.g. alloca). Used values are listed in
// HostEventState enum.
std::atomic<int> MState;

std::mutex MMutex;
};

} // namespace detail
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ static void handleVisitedNodes(std::vector<Command *> &Visited) {
for (Command *Cmd : Visited) {
if (Cmd->MMarks.MToBeDeleted) {
Cmd->getEvent()->setCommand(nullptr);
Cmd->getEvent()->cleanupDependencyEvents();
delete Cmd;
} else
Cmd->MMarks.MVisited = false;
Expand Down
13 changes: 0 additions & 13 deletions sycl/source/detail/scheduler/graph_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,6 @@ static Command *getCommand(const EventImplPtr &Event) {
return (Command *)Event->getCommand();
}

std::vector<EventImplPtr>
Scheduler::GraphProcessor::getWaitList(EventImplPtr Event) {
std::vector<EventImplPtr> Result;
const std::vector<EventImplPtr> &PDeps = Event->getPreparedDepsEvents();
const std::vector<EventImplPtr> &PHDeps = Event->getPreparedHostDepsEvents();

Result.reserve(PDeps.size() + PHDeps.size());
Result.insert(Result.end(), PDeps.begin(), PDeps.end());
Result.insert(Result.end(), PHDeps.begin(), PHDeps.end());

return Result;
}

void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
ReadLockT &GraphReadLock,
bool LockTheLock) {
Expand Down
5 changes: 0 additions & 5 deletions sycl/source/detail/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,6 @@ Scheduler &Scheduler::getInstance() {
return GlobalHandler::instance().getScheduler();
}

std::vector<EventImplPtr> Scheduler::getWaitList(EventImplPtr Event) {
ReadLockT Lock(MGraphLock);
return GraphProcessor::getWaitList(std::move(Event));
}

void Scheduler::waitForEvent(EventImplPtr Event) {
ReadLockT Lock(MGraphLock);
// It's fine to leave the lock unlocked upon return from waitForEvent as
Expand Down
8 changes: 1 addition & 7 deletions sycl/source/detail/scheduler/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,6 @@ class Scheduler {
/// \return an instance of the scheduler object.
static Scheduler &getInstance();

/// \return a vector of "immediate" dependencies for the Event given.
std::vector<EventImplPtr> getWaitList(EventImplPtr Event);

/// Allocate buffers in the pool for a provided stream
///
/// \param Impl to the stream object
Expand Down Expand Up @@ -721,10 +718,6 @@ class Scheduler {
/// \ingroup sycl_graph
class GraphProcessor {
public:
/// \return a list of events that represent immediate dependencies of the
/// command associated with Event passed.
static std::vector<EventImplPtr> getWaitList(EventImplPtr Event);

/// Waits for the command, associated with Event passed, is completed.
/// \param GraphReadLock read-lock which is already acquired for reading
/// \param LockTheLock selects if graph lock should be locked upon return
Expand Down Expand Up @@ -763,6 +756,7 @@ class Scheduler {
friend class Command;
friend class DispatchHostTask;
friend class queue_impl;
friend class event_impl;

/// Stream buffers structure.
///
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void event::wait_and_throw(const std::vector<event> &EventList) {
std::vector<event> event::get_wait_list() {
std::vector<event> Result;

for (auto &EventImpl : detail::Scheduler::getInstance().getWaitList(impl))
for (auto &EventImpl : impl->getWaitList())
Result.push_back(detail::createSyclObjFromImpl<event>(EventImpl));

return Result;
Expand Down

0 comments on commit 5bb3ab9

Please sign in to comment.