From 4a5b1d98c2fb9f2477ee3197c4e8a3d72b3472ca Mon Sep 17 00:00:00 2001 From: Benjamin Welton Date: Fri, 25 Oct 2024 13:13:36 -0700 Subject: [PATCH] SDK: counter collection serialization per device (#1157) Migrates profiler_serializer class in QueueController to have an instance per-agent instead of one globally. Other changes in this commit are to allow for maps of the queues associated with each agent to be passed to profiler_serializer when it is turned on/off. Existing test cases cover whether or not the kernels are serialized (multistream app). New test case added to show that this serialization only occurs on a per device level with a kernel launched on one device waiting for a value to be set on the other. --- CHANGELOG.md | 1 + samples/counter_collection/CMakeLists.txt | 22 +++++ samples/counter_collection/client.cpp | 32 ++++++- .../per_dev_serialization.cpp | 71 +++++++++++++++ .../counters/dispatch_handlers.cpp | 20 +++-- .../lib/rocprofiler-sdk/hsa/hsa_barrier.cpp | 4 +- .../lib/rocprofiler-sdk/hsa/hsa_barrier.hpp | 5 +- .../hsa/profile_serializer.cpp | 10 +-- .../hsa/profile_serializer.hpp | 5 +- .../rocprofiler-sdk/hsa/queue_controller.cpp | 87 ++++++++++++++++--- .../rocprofiler-sdk/hsa/queue_controller.hpp | 19 ++-- .../lib/rocprofiler-sdk/tests/hsa_barrier.cpp | 35 ++++++-- .../rocprofiler-sdk/thread_trace/att_core.cpp | 15 ++-- 13 files changed, 275 insertions(+), 51 deletions(-) create mode 100644 samples/counter_collection/per_dev_serialization.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index b75b8bad..e643057b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ - Fix Support for derived counters in reduce operation and bug fix for max in reduce - Check to force tools to initialize context id with zero. - Fix to handle a range of values for select() dimension in expressions parser. +- PMC dispatch based Counter Collection Serialization is now per-device instead of global across all devices. ### Removed diff --git a/samples/counter_collection/CMakeLists.txt b/samples/counter_collection/CMakeLists.txt index d0906ec1..ca077239 100644 --- a/samples/counter_collection/CMakeLists.txt +++ b/samples/counter_collection/CMakeLists.txt @@ -53,6 +53,28 @@ set_tests_properties( "${counter-collection-buffer-env}" FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") +set_source_files_properties(per_dev_serialization.cpp PROPERTIES LANGUAGE HIP) +add_executable(counter-collection-buffer-device-serialization) +target_sources(counter-collection-buffer-device-serialization + PRIVATE per_dev_serialization.cpp) +target_link_libraries(counter-collection-buffer-device-serialization + PRIVATE counter-collection-buffer-client Threads::Threads) + +rocprofiler_samples_get_ld_library_path_env(LIBRARY_PATH_ENV) +rocprofiler_samples_get_preload_env(PRELOAD_ENV counter-collection-buffer-client) + +set(counter-collection-buffer-device-serialization-env "${PRELOAD_ENV}" + "${LIBRARY_PATH_ENV}") + +add_test(NAME counter-collection-buffer-device-serialization + COMMAND $) + +set_tests_properties( + counter-collection-buffer-device-serialization + PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT + "${counter-collection-buffer-device-serialization-env}" + FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + add_library(counter-collection-callback-client SHARED) target_sources(counter-collection-callback-client PRIVATE callback_client.cpp client.hpp) target_link_libraries( diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index ced8f515..dbec8d5f 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -22,6 +22,7 @@ #include "client.hpp" +#include #include #include #include @@ -74,6 +75,13 @@ get_buffer() return buf; } +std::unordered_map>** +dimension_cache() +{ + static std::unordered_map>* cache; + return &cache; +} + /** * For a given counter, query the dimensions that it has. Typically you will * want to call this function once to get the dimensions and cache them. @@ -81,6 +89,20 @@ get_buffer() std::vector counter_dimensions(rocprofiler_counter_id_t counter) { + if(*dimension_cache() == nullptr) return {}; + + if((*dimension_cache())->count(counter.handle) > 0) + { + return (*dimension_cache())->at(counter.handle); + } + + return {}; +} + +void +fill_dimension_cache(rocprofiler_counter_id_t counter) +{ + assert(*dimension_cache() != nullptr); std::vector dims; rocprofiler_available_dimensions_cb_t cb = [](rocprofiler_counter_id_t, @@ -97,7 +119,7 @@ counter_dimensions(rocprofiler_counter_id_t counter) }; ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions(counter, cb, &dims), "Could not iterate counter dimensions"); - return dims; + (*dimension_cache())->emplace(counter.handle, dims); } /** @@ -251,6 +273,7 @@ build_profile_for_agent(rocprofiler_agent_id_t agent, { std::clog << "Counter: " << counter.handle << " " << version.name << "\n"; collect_counters.push_back(counter); + fill_dimension_cache(counter); } } @@ -375,6 +398,10 @@ tool_fini(void* user_data) auto* output_stream = static_cast(user_data); *output_stream << std::flush; if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream; + + auto* tmp_ptr = *dimension_cache(); + *dimension_cache() = nullptr; + delete tmp_ptr; } } // namespace @@ -416,6 +443,9 @@ rocprofiler_configure(uint32_t version, &tool_fini, static_cast(output_stream)}; + *dimension_cache() = + new std::unordered_map>(); + // return pointer to configure data return &cfg; } diff --git a/samples/counter_collection/per_dev_serialization.cpp b/samples/counter_collection/per_dev_serialization.cpp new file mode 100644 index 00000000..fb14018e --- /dev/null +++ b/samples/counter_collection/per_dev_serialization.cpp @@ -0,0 +1,71 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include "client.hpp" + +#define HIP_CALL(call) \ + do \ + { \ + hipError_t err = call; \ + if(err != hipSuccess) \ + { \ + fprintf(stderr, "%s\n", hipGetErrorString(err)); \ + abort(); \ + } \ + } while(0) + +__global__ void +kernelA(int* wait_on, int value, int* no_opt) +{ + while(*wait_on != value) + { + (*no_opt)++; + }; + (*wait_on)--; +} + +int +main(int, char**) +{ + int ntotdevice = 0; + HIP_CALL(hipGetDeviceCount(&ntotdevice)); + if(ntotdevice < 2) return 0; + + start(); + int* check_value = nullptr; + int* no_opt = nullptr; + HIP_CALL(hipMallocManaged(&check_value, sizeof(*check_value))); + HIP_CALL(hipMallocManaged(&no_opt, sizeof(*no_opt))); + *no_opt = 0; + *check_value = 1; + // Will hang if per-device serialization is not functional + HIP_CALL(hipSetDevice(0)); + hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 0, no_opt); + HIP_CALL(hipSetDevice(1)); + hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 1, no_opt); + HIP_CALL(hipSetDevice(0)); + HIP_CALL(hipDeviceSynchronize()); + + std::cerr << "Run complete\n"; +} diff --git a/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp b/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp index 0fdcec91..b92700b3 100644 --- a/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp +++ b/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp @@ -62,12 +62,14 @@ queue_cb(const context::context* ctx, // and maybe adds barrier packets if the state is transitioning from serialized <-> // unserialized auto maybe_add_serialization = [&](auto& gen_pkt) { - CHECK_NOTNULL(hsa::get_queue_controller())->serializer().rlock([&](const auto& serializer) { - for(auto& s_pkt : serializer.kernel_dispatch(queue)) - { - gen_pkt->before_krn_pkt.push_back(s_pkt.ext_amd_aql_pm4); - } - }); + CHECK_NOTNULL(hsa::get_queue_controller()) + ->serializer(&queue) + .rlock([&](const auto& serializer) { + for(auto& s_pkt : serializer.kernel_dispatch(queue)) + { + gen_pkt->before_krn_pkt.push_back(s_pkt.ext_amd_aql_pm4); + } + }); }; // Packet generated when no instrumentation is performed. May contain serialization @@ -189,9 +191,9 @@ completed_cb(const context::context* ctx, if(!pkt) return; - CHECK_NOTNULL(hsa::get_queue_controller())->serializer().wlock([&](auto& serializer) { - serializer.kernel_completion_signal(session.queue); - }); + CHECK_NOTNULL(hsa::get_queue_controller()) + ->serializer(&session.queue) + .wlock([&](auto& serializer) { serializer.kernel_completion_signal(session.queue); }); // We have no profile config, nothing to output. if(!prof_config) return; diff --git a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp index 3b673102..ca1b701e 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp @@ -49,13 +49,13 @@ hsa_barrier::~hsa_barrier() } void -hsa_barrier::set_barrier(const queue_map_t& q) +hsa_barrier::set_barrier(const queue_map_ptr_t& q) { _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 1); _queue_waiting.wlock([&](auto& queue_waiting) { for(const auto& [_, queue] : q) { - queue->lock_queue([ptr = queue.get(), &queue_waiting]() { + queue->lock_queue([ptr = queue, &queue_waiting]() { if(ptr->active_async_packets() > 0) { queue_waiting[ptr->get_id().handle] = ptr->active_async_packets(); diff --git a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp index 1023240e..a7546e94 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp @@ -46,12 +46,11 @@ class Queue; class hsa_barrier { public: - using queue_map_t = std::unordered_map>; - + using queue_map_ptr_t = std::unordered_map; hsa_barrier(std::function&& finished, CoreApiTable core_api); ~hsa_barrier(); - void set_barrier(const queue_map_t& q); + void set_barrier(const queue_map_ptr_t& q); std::optional enqueue_packet(const Queue* queue); bool register_completion(const Queue* queue); diff --git a/source/lib/rocprofiler-sdk/hsa/profile_serializer.cpp b/source/lib/rocprofiler-sdk/hsa/profile_serializer.cpp index 41110c46..78e1d08d 100644 --- a/source/lib/rocprofiler-sdk/hsa/profile_serializer.cpp +++ b/source/lib/rocprofiler-sdk/hsa/profile_serializer.cpp @@ -36,7 +36,7 @@ profiler_serializer_ready_signal_handler(hsa_signal_value_t /* signal_value */, auto* hsa_queue = static_cast(data); const auto* queue = CHECK_NOTNULL(get_queue_controller())->get_queue(*hsa_queue); CHECK(queue); - CHECK_NOTNULL(get_queue_controller())->serializer().wlock([&](auto& serializer) { + CHECK_NOTNULL(get_queue_controller())->serializer(queue).wlock([&](auto& serializer) { serializer.queue_ready(hsa_queue, *queue); }); return true; @@ -242,7 +242,7 @@ profiler_serializer::destroy_queue(hsa_queue_t* id, const Queue& queue) // Enable the serializer void -profiler_serializer::enable(const queue_map_t& queues) +profiler_serializer::enable(const hsa_barrier::queue_map_ptr_t& queues) { if(_serializer_status == Status::ENABLED) return; @@ -257,14 +257,14 @@ profiler_serializer::enable(const queue_map_t& queues) std::make_unique( [] {}, CHECK_NOTNULL(get_queue_controller())->get_core_table())); _serializer_status = Status::ENABLED; - _barrier.back().barrier->set_barrier(queues); + _barrier.back().barrier->set_barrier(queues); ROCP_INFO << "Profiler serialization enabled"; } // Disable the serializer void -profiler_serializer::disable(const queue_map_t& queues) +profiler_serializer::disable(const hsa_barrier::queue_map_ptr_t& queues) { if(_serializer_status == Status::DISABLED) return; @@ -279,8 +279,8 @@ profiler_serializer::disable(const queue_map_t& queues) std::make_unique( [] {}, CHECK_NOTNULL(get_queue_controller())->get_core_table())); _serializer_status = Status::DISABLED; - _barrier.back().barrier->set_barrier(queues); + _barrier.back().barrier->set_barrier(queues); ROCP_INFO << "Profiler serialization disabled"; } diff --git a/source/lib/rocprofiler-sdk/hsa/profile_serializer.hpp b/source/lib/rocprofiler-sdk/hsa/profile_serializer.hpp index f0bb28c5..68705bac 100644 --- a/source/lib/rocprofiler-sdk/hsa/profile_serializer.hpp +++ b/source/lib/rocprofiler-sdk/hsa/profile_serializer.hpp @@ -68,7 +68,6 @@ class profiler_serializer std::unique_ptr barrier; }; - using queue_map_t = std::unordered_map>; void kernel_completion_signal(const Queue&); // Signal a kernel dispatch is taking place, generates packets needed to be // inserted to support kernel dispatch @@ -76,9 +75,9 @@ class profiler_serializer void queue_ready(hsa_queue_t* hsa_queue, const Queue& queue); // Enable the serializer - void enable(const queue_map_t& queues); + void enable(const hsa_barrier::queue_map_ptr_t& queues); // Disable the serializer - void disable(const queue_map_t& queues); + void disable(const hsa_barrier::queue_map_ptr_t& queues); void destroy_queue(hsa_queue_t* id, const Queue& queue); diff --git a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index 32c79967..94933373 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -28,6 +28,7 @@ #include "lib/rocprofiler-sdk/registration.hpp" #include +#include namespace rocprofiler { @@ -62,8 +63,9 @@ create_queue(hsa_agent_t agent, controller->get_ext_table(), queue); - controller->serializer().wlock( - [&](auto& serializer) { serializer.add_queue(queue, *new_queue); }); + controller->serializer(new_queue.get()).wlock([&](auto& serializer) { + serializer.add_queue(queue, *new_queue); + }); controller->add_queue(*queue, std::move(new_queue)); return HSA_STATUS_SUCCESS; @@ -316,23 +318,88 @@ QueueController::get_queue(const hsa_queue_t& _hsa_queue) const _hsa_queue); } +common::Synchronized& +QueueController::serializer(const Queue* queue) +{ + CHECK(queue); + common::Synchronized* ret = nullptr; + _profiler_serializer.ulock( + [&](const auto& m) { + if(auto ptr = m.find(queue->get_agent().get_rocp_agent()->id); ptr != m.end()) + { + ret = ptr->second.get(); + return true; + } + return false; + }, + [&](auto& m) { + ret = m.emplace(queue->get_agent().get_rocp_agent()->id, + std::make_shared>()) + .first->second.get(); + if(_serialized_enabled.load() == true) + { + ret->wlock([&](auto& serializer) { serializer.enable({}); }); + } + return true; + }); + return *ret; +} + +namespace +{ +std::unordered_map +per_dev_map(const QueueController::queue_map_t& _queues_v) +{ + std::unordered_map dmap; + for(const auto& [k, v] : _queues_v) + { + dmap[v->get_agent().get_rocp_agent()->id][k] = v.get(); + } + return dmap; +} +}; // namespace + void QueueController::disable_serialization() { - _queues.rlock([](const queue_map_t& _queues_v) { - if(get_queue_controller()) - get_queue_controller()->serializer().wlock( - [&](auto& serializer) { serializer.disable(_queues_v); }); + _queues.rlock([&](const queue_map_t& _queues_v) { + _serialized_enabled.store(false); + auto pd_map = per_dev_map(_queues_v); + _profiler_serializer.wlock([&](auto& m) { + for(auto& [k, v] : m) + { + if(auto it = pd_map.find(k); it != pd_map.end()) + { + v->wlock([&](auto& serializer) { serializer.disable(it->second); }); + } + else + { + v->wlock([&](auto& serializer) { serializer.disable({}); }); + } + } + }); }); } void QueueController::enable_serialization() { - _queues.rlock([](const queue_map_t& _queues_v) { - if(get_queue_controller()) - get_queue_controller()->serializer().wlock( - [&](auto& serializer) { serializer.enable(_queues_v); }); + _queues.rlock([&](const queue_map_t& _queues_v) { + _serialized_enabled.store(true); + auto pd_map = per_dev_map(_queues_v); + _profiler_serializer.wlock([&](auto& m) { + for(auto& [k, v] : m) + { + if(auto it = pd_map.find(k); it != pd_map.end()) + { + v->wlock([&](auto& serializer) { serializer.enable(it->second); }); + } + else + { + v->wlock([&](auto& serializer) { serializer.enable({}); }); + } + } + }); }); } diff --git a/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp b/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp index 99196e11..63dd2f3d 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp +++ b/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp @@ -23,6 +23,7 @@ #pragma once #include +#include #include "lib/rocprofiler-sdk/hsa/profile_serializer.hpp" #include "lib/rocprofiler-sdk/hsa/queue.hpp" @@ -83,7 +84,7 @@ class QueueController void iterate_callbacks(const callback_iterator_cb_t&) const; - common::Synchronized& serializer() { return _profiler_serializer; } + common::Synchronized& serializer(const Queue*); /** * Disable serialization for QueueController, has no effect if counter collection @@ -107,12 +108,16 @@ class QueueController using client_id_map_t = std::unordered_map; using resource_alloc_t = void(const AgentCache&, const CoreApiTable&, const AmdExtTable&); - CoreApiTable _core_table = {}; - AmdExtTable _ext_table = {}; - common::Synchronized _queues = {}; - common::Synchronized _callback_cache = {}; - agent_cache_map_t _supported_agents = {}; - common::Synchronized _profiler_serializer; + CoreApiTable _core_table = {}; + AmdExtTable _ext_table = {}; + common::Synchronized _queues = {}; + common::Synchronized _callback_cache = {}; + agent_cache_map_t _supported_agents = {}; + std::atomic _serialized_enabled = {false}; + common::Synchronized< + std::unordered_map>>> + _profiler_serializer; }; QueueController* diff --git a/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp b/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp index 91361e8d..550a5324 100644 --- a/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp +++ b/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp @@ -192,8 +192,14 @@ TEST(hsa_barrier, no_block_single) auto queues = create_queue_map(1); // Immediate return of barrier due to no active async packets - hsa::hsa_barrier barrier(finished_func, get_api_table()); - barrier.set_barrier(queues); + hsa::hsa_barrier barrier(finished_func, get_api_table()); + hsa_barrier::queue_map_ptr_t q_map; + for(const auto& [k, v] : queues) + { + q_map[k] = v.get(); + } + + barrier.set_barrier(q_map); executed_handlers = 0; ASSERT_TRUE(barrier.complete()); should_execute_handler = true; @@ -224,7 +230,14 @@ TEST(hsa_barrier, no_block_multi) // Immediate return of barrier due to no active async packets hsa::hsa_barrier barrier(finished_func, get_api_table()); - barrier.set_barrier(queues); + + hsa_barrier::queue_map_ptr_t q_map; + for(const auto& [k, v] : queues) + { + q_map[k] = v.get(); + } + + barrier.set_barrier(q_map); ASSERT_TRUE(barrier.complete()); should_execute_handler = true; executed_handlers = 0; @@ -265,7 +278,13 @@ TEST(hsa_barrier, block_single) should_execute_handler = false; executed_handlers = 0; - barrier.set_barrier(queues); + hsa_barrier::queue_map_ptr_t q_map; + for(const auto& [k, v] : queues) + { + q_map[k] = v.get(); + } + + barrier.set_barrier(q_map); ASSERT_FALSE(barrier.complete()); should_execute_handler = false; @@ -323,7 +342,13 @@ TEST(hsa_barrier, block_multi) should_execute_handler = false; executed_handlers = 0; - barrier.set_barrier(queues); + hsa_barrier::queue_map_ptr_t q_map; + for(const auto& [k, v] : queues) + { + q_map[k] = v.get(); + } + + barrier.set_barrier(q_map); ASSERT_FALSE(barrier.complete()); should_execute_handler = false; diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp b/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp index b92d24ee..b1ebbe7e 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp @@ -334,10 +334,12 @@ DispatchThreadTracer::pre_kernel_call(const hsa::Queue& queue, // and maybe adds barrier packets if the state is transitioning from serialized <-> // unserialized auto maybe_add_serialization = [&](auto& gen_pkt) { - CHECK_NOTNULL(hsa::get_queue_controller())->serializer().rlock([&](const auto& serializer) { - for(auto& s_pkt : serializer.kernel_dispatch(queue)) - gen_pkt->before_krn_pkt.push_back(s_pkt.ext_amd_aql_pm4); - }); + CHECK_NOTNULL(hsa::get_queue_controller()) + ->serializer(&queue) + .rlock([&](const auto& serializer) { + for(auto& s_pkt : serializer.kernel_dispatch(queue)) + gen_pkt->before_krn_pkt.push_back(s_pkt.ext_amd_aql_pm4); + }); }; auto control_flags = params.dispatch_cb_fn(queue.get_id(), @@ -381,8 +383,9 @@ class SignalSerializerExit auto* controller = hsa::get_queue_controller(); if(!controller) return; - controller->serializer().wlock( - [&](auto& serializer) { serializer.kernel_completion_signal(session.queue); }); + controller->serializer(&session.queue).wlock([&](auto& serializer) { + serializer.kernel_completion_signal(session.queue); + }); } const hsa::Queue::queue_info_session_t& session; };