From a78753d392a76726f5665202747b9f59d9cd2e9c Mon Sep 17 00:00:00 2001 From: Giovanni Lenzi Baraldi Date: Mon, 1 Jul 2024 21:56:41 -0300 Subject: [PATCH] Accumulation metrics support and update counter collection API to aqlprofile_v2 (#915) * Updating to v3 API * General fixes * Extending dimension bits to 54 * Disabling agent profiling tests * Fixed unit test * Adding accumulate metric support for parsing counters (#609) * Adding accumulate metric support for parsing counters * Adding metric flag * Updating tests * source formatting (clang-format v11) (#610) Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com> * source formatting (clang-format v11) (#614) Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com> * Adding evaluate ast test * source formatting (clang-format v11) (#633) Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com> * Update scanner generated file * Adding flags to events for aqlprofile * Fix Mi200 failing test --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com> Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com> * Revert "Extending dimension bits to 54" This reverts commit 3cd6628452484044a93e129f27974f996a0e4c08. * Removing CU dimension * Fixing merge conflicts * Revert "Disabling agent profiling tests" This reverts commit 7e01518ed8c51fbb0c3b2575e1e0b8f9ddfa8237. * Fixing merge conflicts * Fix parser tests * Adding accumulate metric documentation * Update counter_collection_services.md * Update index.md * fix nested expression use * Update source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Doc update --------- Co-authored-by: Benjamin Welton Co-authored-by: Manjunath P Jakaraddi Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com> Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com> Co-authored-by: Manjunath-Jakaraddi --- source/docs/_toc.yml.in | 1 + source/docs/counter_collection_services.md | 14 + source/lib/rocprofiler-sdk/agent.cpp | 2 + source/lib/rocprofiler-sdk/aql/helpers.cpp | 10 +- source/lib/rocprofiler-sdk/aql/helpers.hpp | 6 +- .../rocprofiler-sdk/aql/packet_construct.cpp | 136 +++------ .../rocprofiler-sdk/aql/packet_construct.hpp | 42 ++- .../rocprofiler-sdk/aql/tests/aql_test.cpp | 41 ++- .../counters/agent_profiling.cpp | 27 +- source/lib/rocprofiler-sdk/counters/core.cpp | 4 +- .../counters/dispatch_handlers.cpp | 9 +- .../rocprofiler-sdk/counters/evaluate_ast.cpp | 46 ++-- .../rocprofiler-sdk/counters/evaluate_ast.hpp | 3 +- .../rocprofiler-sdk/counters/id_decode.cpp | 4 +- .../rocprofiler-sdk/counters/id_decode.hpp | 2 +- .../lib/rocprofiler-sdk/counters/metrics.cpp | 5 +- .../lib/rocprofiler-sdk/counters/metrics.hpp | 4 + .../counters/parser/parser.cpp | 186 +++++++------ .../rocprofiler-sdk/counters/parser/parser.h | 13 +- .../rocprofiler-sdk/counters/parser/parser.y | 6 + .../counters/parser/raw_ast.hpp | 45 ++- .../counters/parser/scanner.cpp | 119 ++++---- .../rocprofiler-sdk/counters/parser/scanner.l | 1 + .../counters/parser/tests/parser_test.cpp | 258 +++++++++++++----- .../counters/tests/agent_profiling.cpp | 1 + .../rocprofiler-sdk/counters/tests/core.cpp | 34 +-- .../counters/tests/evaluate_ast_test.cpp | 68 +++++ .../counters/tests/hsa_tables.cpp | 4 +- source/lib/rocprofiler-sdk/hsa/aql_packet.cpp | 100 +++++-- source/lib/rocprofiler-sdk/hsa/aql_packet.hpp | 54 ++-- .../thread_trace/tests/att_packet_test.cpp | 4 +- 31 files changed, 778 insertions(+), 471 deletions(-) create mode 100644 source/docs/counter_collection_services.md diff --git a/source/docs/_toc.yml.in b/source/docs/_toc.yml.in index b74d252f..6db2aeb1 100644 --- a/source/docs/_toc.yml.in +++ b/source/docs/_toc.yml.in @@ -15,6 +15,7 @@ subtrees: - file: buffered_services - file: pc_sampling - file: intercept_table + - file: counter_collection_services - file: _doxygen/html/index - file: samples - file: rocprofv3 diff --git a/source/docs/counter_collection_services.md b/source/docs/counter_collection_services.md new file mode 100644 index 00000000..a9605cbf --- /dev/null +++ b/source/docs/counter_collection_services.md @@ -0,0 +1,14 @@ +# Derived Metrics + +## Accumulate metric +### Expression + expr=accumulate(, ) +### Description +- The accumulate metric is used to sum the values of a basic level counter over a specified number of cycles. By setting the resolution parameter, you can control the frequency of the summing operation: + - HIGH_RES: Sums up the basic counter every clock cycle. Captures the value every single cycle for higher accuracy, suitable for fine-grained analysis. + - LOW_RES: Sums up the basic counter every four clock cycles. Reduces the data points and provides less detailed summing, useful for reducing data volume. + - NONE: Does nothing and is equivalent to collecting basic_level_counter. Outputs the value of the basic counter without any summing operation. + +### Usage (derived_counters.xml) + +- MeanOccupancyPerCU: This metric calculates the mean occupancy per compute unit. It uses the accumulate function with HIGH_RES to sum the SQ_LEVEL_WAVES counter at every clock cycle. This sum is then divided by GRBM_GUI_ACTIVE and the number of compute units (CU_NUM) to derive the mean occupancy. diff --git a/source/lib/rocprofiler-sdk/agent.cpp b/source/lib/rocprofiler-sdk/agent.cpp index d6c86356..fd4edb14 100644 --- a/source/lib/rocprofiler-sdk/agent.cpp +++ b/source/lib/rocprofiler-sdk/agent.cpp @@ -795,6 +795,8 @@ construct_agent_cache(::HsaApiTable* table) "{}", fmt::join(rocp_hsa_agent_node_ids.begin(), rocp_hsa_agent_node_ids.end(), ", ")); + get_agent_caches().clear(); + get_agent_mapping().clear(); get_agent_mapping().reserve(get_agent_mapping().size() + rocp_agents.size()); auto hsa_agent_node_map = std::unordered_map{}; diff --git a/source/lib/rocprofiler-sdk/aql/helpers.cpp b/source/lib/rocprofiler-sdk/aql/helpers.cpp index 0628f4af..0111732f 100644 --- a/source/lib/rocprofiler-sdk/aql/helpers.cpp +++ b/source/lib/rocprofiler-sdk/aql/helpers.cpp @@ -66,9 +66,9 @@ get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& e rocprofiler_status_t set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id, - hsa_agent_t agent, - hsa_ven_amd_aqlprofile_event_t event, - uint32_t sample_id) + aqlprofile_agent_handle_t agent, + aqlprofile_pmc_event_t event, + size_t sample_id) { auto callback = [](int, int sid, int, int coordinate, const char*, void* userdata) -> hsa_status_t { @@ -82,8 +82,8 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id, return HSA_STATUS_SUCCESS; }; - if(hsa_ven_amd_aqlprofile_iterate_event_coord( - agent, event, sample_id, callback, static_cast(&id)) != HSA_STATUS_SUCCESS) + if(aqlprofile_iterate_event_coord(agent, event, sample_id, callback, static_cast(&id)) != + HSA_STATUS_SUCCESS) { return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD; } diff --git a/source/lib/rocprofiler-sdk/aql/helpers.hpp b/source/lib/rocprofiler-sdk/aql/helpers.hpp index 80247987..802cede6 100644 --- a/source/lib/rocprofiler-sdk/aql/helpers.hpp +++ b/source/lib/rocprofiler-sdk/aql/helpers.hpp @@ -57,9 +57,9 @@ get_dim_info(rocprofiler_agent_id_t agent, // Set dimension ids into id for sample rocprofiler_status_t set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id, - hsa_agent_t agent, - hsa_ven_amd_aqlprofile_event_t event, - uint32_t sample_id); + aqlprofile_agent_handle_t agent, + aqlprofile_pmc_event_t event, + size_t sample_id); rocprofiler_status_t set_profiler_active_on_queue(const AmdExtTable& api, diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index 5319af1f..da2ef57d 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -66,14 +66,15 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index) { _metrics.back().instances.push_back( - {static_cast(query_info.id), - block_index, - event_id}); + {.block_index = block_index, + .event_id = event_id, + .flags = aqlprofile_pmc_event_flags_t{x.flags()}, + .block_name = static_cast(query_info.id)}); _metrics.back().events.push_back( {.block_index = block_index, .event_id = event_id, - .flags = aqlprofile_pmc_event_flags_t{0}, + .flags = aqlprofile_pmc_event_flags_t{x.flags()}, .block_name = static_cast(query_info.id)}); bool validate_event_result; @@ -86,114 +87,45 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t &validate_event_result) != HSA_STATUS_SUCCESS); ROCP_FATAL_IF(!validate_event_result) << "Invalid Metric: " << block_index << " " << event_id; - _event_to_metric[std::make_tuple( - static_cast(query_info.id), - block_index, - event_id)] = x; + _event_to_metric[_metrics.back().events.back()] = x; } } _events = get_all_events(); } std::unique_ptr -CounterPacketConstruct::construct_packet(const AmdExtTable& ext) +CounterPacketConstruct::construct_packet(const CoreApiTable& coreapi, const AmdExtTable& ext) { - auto pkt_ptr = std::make_unique(ext.hsa_amd_memory_pool_free_fn); - auto& pkt = *pkt_ptr; - if(_events.empty()) - { - ROCP_TRACE << "No events for pkt"; - return pkt_ptr; - } - pkt.empty = false; - - const auto* agent_cache = + const auto* agent = rocprofiler::agent::get_agent_cache(CHECK_NOTNULL(rocprofiler::agent::get_agent(_agent))); - if(!agent_cache) - { - ROCP_FATAL << "No agent cache for agent id: " << _agent.handle; - } - - pkt.profile = hsa_ven_amd_aqlprofile_profile_t{ - agent_cache->get_hsa_agent(), - HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM? - _events.data(), - static_cast(_events.size()), - nullptr, - 0u, - hsa_ven_amd_aqlprofile_descriptor_t{.ptr = nullptr, .size = 0}, - hsa_ven_amd_aqlprofile_descriptor_t{.ptr = nullptr, .size = 0}}; - auto& profile = pkt.profile; + if(!agent) ROCP_FATAL << "No agent cache for agent id: " << _agent.handle; hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; - ext.hsa_amd_agent_memory_pool_get_info_fn(agent_cache->get_hsa_agent(), - agent_cache->kernarg_pool(), + ext.hsa_amd_agent_memory_pool_get_info_fn(agent->get_hsa_agent(), + agent->kernarg_pool(), HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, static_cast(&_access)); - // Memory is accessable by both the GPU and CPU, unlock the command buffer for - // sharing. - if(_access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) - { - throw std::runtime_error( - fmt::format("Agent {} does not allow memory pool access for counter collection", - agent_cache->get_hsa_agent().handle)); - } - CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, nullptr), "could not generate packet sizes"); + hsa::CounterAQLPacket::CounterMemoryPool pool; - if(profile.command_buffer.size == 0 || profile.output_buffer.size == 0) - { - throw std::runtime_error( - fmt::format("No command or output buffer size set. CMD_BUF={} PROFILE_BUF={}", - profile.command_buffer.size, - profile.output_buffer.size)); - } + if(_access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) pool.bIgnoreKernArg = true; - // Allocate buffers and check the results - auto alloc_and_check = [&](auto& pool, auto** mem_loc, auto size) -> bool { - bool malloced = false; - size_t page_aligned = getPageAligned(size); - if(ext.hsa_amd_memory_pool_allocate_fn( - pool, page_aligned, 0, static_cast(mem_loc)) != HSA_STATUS_SUCCESS) - { - *mem_loc = malloc(page_aligned); - malloced = true; - } - else - { - CHECK(*mem_loc); - hsa_agent_t agent = agent_cache->get_hsa_agent(); - // Memory is accessable by both the GPU and CPU, unlock the command buffer for - // sharing. - LOG_IF(FATAL, - ext.hsa_amd_agents_allow_access_fn(1, &agent, nullptr, *mem_loc) != - HSA_STATUS_SUCCESS) - << "Error: Allowing access to Command Buffer"; - } - return malloced; - }; - - // Build command and output buffers - pkt.command_buf_mallocd = alloc_and_check( - agent_cache->cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size); - pkt.output_buffer_malloced = alloc_and_check( - agent_cache->kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size); - memset(profile.output_buffer.ptr, 0x0, profile.output_buffer.size); - - CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, &pkt.start), "failed to create start packet"); - CHECK_HSA(hsa_ven_amd_aqlprofile_stop(&profile, &pkt.stop), "failed to create stop packet"); - CHECK_HSA(hsa_ven_amd_aqlprofile_read(&profile, &pkt.read), "failed to create read packet"); - pkt.start.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; - pkt.stop.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; - pkt.read.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; - ROCP_TRACE << fmt::format("Following Packets Generated (output_buffer={}, output_size={}). " - "Start Pkt: {}, Read Pkt: {}, Stop Pkt: {}", - profile.output_buffer.ptr, - profile.output_buffer.size, - pkt.start, - pkt.read, - pkt.stop); - return pkt_ptr; + pool.allocate_fn = ext.hsa_amd_memory_pool_allocate_fn; + pool.allow_access_fn = ext.hsa_amd_agents_allow_access_fn; + pool.free_fn = ext.hsa_amd_memory_pool_free_fn; + pool.api_copy_fn = coreapi.hsa_memory_copy_fn; + pool.fill_fn = ext.hsa_amd_memory_fill_fn; + + pool.gpu_agent = agent->get_hsa_agent(); + pool.cpu_pool_ = agent->cpu_pool(); + pool.kernarg_pool_ = agent->kernarg_pool(); + + const auto* aql_agent = rocprofiler::agent::get_aql_agent(agent->get_rocp_agent()->id); + if(aql_agent == nullptr) throw std::runtime_error("Could not get AQL agent!"); + + if(_events.empty()) ROCP_TRACE << "No events for pkt"; + + return std::make_unique(*aql_agent, pool, _events); } ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache& agent, @@ -255,10 +187,10 @@ ThreadTraceAQLPacketFactory::construct_unload_marker_packet(uint64_t id) return std::make_unique(tracepool, id, 0, 0, false, true); } -std::vector +std::vector CounterPacketConstruct::get_all_events() const { - std::vector ret; + std::vector ret; for(const auto& metric : _metrics) { ret.insert(ret.end(), metric.instances.begin(), metric.instances.end()); @@ -267,11 +199,9 @@ CounterPacketConstruct::get_all_events() const } const counters::Metric* -CounterPacketConstruct::event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const +CounterPacketConstruct::event_to_metric(const aqlprofile_pmc_event_t& event) const { - if(const auto* ptr = rocprofiler::common::get_val( - _event_to_metric, - std::make_tuple(event.block_name, event.block_index, event.counter_id))) + if(const auto* ptr = rocprofiler::common::get_val(_event_to_metric, event)) { return ptr; } diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp index 78b0f2ad..9a78cd29 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp @@ -38,6 +38,24 @@ #include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" #include "rocprofiler-sdk/fwd.h" +inline bool +operator==(aqlprofile_pmc_event_t lhs, aqlprofile_pmc_event_t rhs) +{ + if(lhs.block_name != rhs.block_name) return false; + if(lhs.block_index != rhs.block_index) return false; + if(lhs.event_id != rhs.event_id) return false; + return lhs.flags.raw == rhs.flags.raw; +} + +inline bool +operator<(aqlprofile_pmc_event_t lhs, aqlprofile_pmc_event_t rhs) +{ + if(lhs.block_name != rhs.block_name) return lhs.block_name < rhs.block_name; + if(lhs.block_index != rhs.block_index) return lhs.block_index < rhs.block_index; + if(lhs.event_id != rhs.event_id) return lhs.event_id < rhs.event_id; + return lhs.flags.raw < rhs.flags.raw; +} + namespace rocprofiler { namespace aql @@ -55,11 +73,12 @@ class CounterPacketConstruct public: CounterPacketConstruct(rocprofiler_agent_id_t agent, const std::vector& metrics); - std::unique_ptr construct_packet(const AmdExtTable&); + std::unique_ptr construct_packet(const CoreApiTable&, + const AmdExtTable&); - const counters::Metric* event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const; - std::vector get_all_events() const; - const std::vector& get_counter_events(const counters::Metric&) const; + const counters::Metric* event_to_metric(const aqlprofile_pmc_event_t& event) const; + std::vector get_all_events() const; + const std::vector& get_counter_events(const counters::Metric&) const; rocprofiler_agent_id_t agent() const { return _agent; } @@ -73,16 +92,15 @@ class CounterPacketConstruct protected: struct AQLProfileMetric { - counters::Metric metric; - std::vector instances; - std::vector events; + counters::Metric metric; + std::vector instances; + std::vector events; }; - rocprofiler_agent_id_t _agent; - std::vector _metrics; - std::vector _events; - std::map, counters::Metric> - _event_to_metric; + rocprofiler_agent_id_t _agent; + std::vector _metrics; + std::vector _events; + std::map _event_to_metric; }; class ThreadTraceAQLPacketFactory diff --git a/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp b/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp index ae51423a..242bd02f 100644 --- a/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp +++ b/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp @@ -39,6 +39,38 @@ using namespace rocprofiler::counters::test_constants; namespace rocprofiler { +AmdExtTable& +get_ext_table() +{ + static auto _v = []() { + auto val = AmdExtTable{}; + val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info; + val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools; + val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate; + val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free; + val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info; + val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access; + val.hsa_amd_memory_fill_fn = hsa_amd_memory_fill; + return val; + }(); + return _v; +} + +CoreApiTable& +get_api_table() +{ + static auto _v = []() { + auto val = CoreApiTable{}; + val.hsa_iterate_agents_fn = hsa_iterate_agents; + val.hsa_agent_get_info_fn = hsa_agent_get_info; + val.hsa_queue_create_fn = hsa_queue_create; + val.hsa_queue_destroy_fn = hsa_queue_destroy; + val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed; + return val; + }(); + return _v; +} + auto findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set& metrics) { @@ -122,7 +154,9 @@ TEST(aql_profile, packet_generation_single) { auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"}); CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics); - auto test_pkt = pkt.construct_packet(get_ext_table()); + auto test_pkt = + pkt.construct_packet(rocprofiler::get_api_table(), rocprofiler::get_ext_table()); + EXPECT_TRUE(test_pkt); } @@ -141,13 +175,15 @@ TEST(aql_profile, packet_generation_multi) auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES", "TA_FLAT_READ_WAVEFRONTS"}); CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics); - auto test_pkt = pkt.construct_packet(get_ext_table()); + auto test_pkt = + pkt.construct_packet(rocprofiler::get_api_table(), rocprofiler::get_ext_table()); EXPECT_TRUE(test_pkt); } hsa_shut_down(); } +/* class TestAqlPacket : public rocprofiler::hsa::CounterAQLPacket { public: @@ -183,3 +219,4 @@ TEST(aql_profile, test_aql_packet) // Why is this valid? TestAqlPacket test_pkt2(false); } +*/ \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp b/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp index effac090..7b288ad5 100644 --- a/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp +++ b/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp @@ -100,12 +100,14 @@ construct_aql_pkt(std::shared_ptr& profile) } auto pkts = profile->pkt_generator->construct_packet( + CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table(), CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table()); - pkts->start.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); - pkts->start.completion_signal.handle = 0; - pkts->stop.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); - pkts->read.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + pkts->packets.start_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + pkts->packets.stop_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + pkts->packets.read_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + + pkts->packets.start_packet.completion_signal.handle = 0; return pkts; } @@ -303,8 +305,9 @@ read_agent_ctx(const context::context* ctx, agent->get_rocp_agent()->simd_arrays_per_engine); // Submit the read packet to the queue - submitPacket( - callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->read); + submitPacket(callback_data.table, + agent->profile_queue(), + (void*) &callback_data.packet->packets.read_packet); // Submit a barrier packet. This is needed to flush hardware caches. Without this // the read packet may not have the correct data. @@ -452,10 +455,11 @@ start_agent_ctx(const context::context* ctx) continue; } - callback_data.packet->start.completion_signal = callback_data.start_signal; + callback_data.packet->packets.start_packet.completion_signal = callback_data.start_signal; callback_data.table.hsa_signal_store_relaxed_fn(callback_data.start_signal, 1); - submitPacket( - callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->start); + submitPacket(callback_data.table, + agent->profile_queue(), + (void*) &callback_data.packet->packets.start_packet); // Wait for startup to finish before continuing callback_data.table.hsa_signal_wait_relaxed_fn(callback_data.start_signal, @@ -511,8 +515,9 @@ stop_agent_ctx(const context::context* ctx) if(!callback_data.profile->reqired_hw_counters.empty()) { // Remove when AQL is updated to not require stop to be called first - submitPacket( - callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->stop); + submitPacket(callback_data.table, + agent->profile_queue(), + (void*) &callback_data.packet->packets.stop_packet); } // Wait for the stop packet to complete diff --git a/source/lib/rocprofiler-sdk/counters/core.cpp b/source/lib/rocprofiler-sdk/counters/core.cpp index 5055d7e9..fbf06c5d 100644 --- a/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/core.cpp @@ -129,11 +129,11 @@ counter_callback_info::get_packet(std::unique_ptr& { // If we do not have a packet in the cache, create one. ret_pkt = profile->pkt_generator->construct_packet( + CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table(), CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table()); } - ret_pkt->before_krn_pkt.clear(); - ret_pkt->after_krn_pkt.clear(); + ret_pkt->clear(); packet_return_map.wlock([&](auto& data) { data.emplace(ret_pkt.get(), profile); }); return ROCPROFILER_STATUS_SUCCESS; diff --git a/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp b/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp index 8e716c8f..8ee7c994 100644 --- a/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp +++ b/source/lib/rocprofiler-sdk/counters/dispatch_handlers.cpp @@ -72,7 +72,7 @@ queue_cb(const context::context* ctx, // Packet generated when no instrumentation is performed. May contain serialization // packets/barrier packets (and can be empty). auto no_instrumentation = [&]() { - auto ret_pkt = std::make_unique(nullptr); + auto ret_pkt = std::make_unique(); // If we have a counter collection context but it is not enabled, we still might need // to add barrier packets to transition from serialized -> unserialized execution. This // transition is coordinated by the serializer. @@ -147,13 +147,10 @@ queue_cb(const context::context* ctx, return ret_pkt; } - ret_pkt->before_krn_pkt.push_back(ret_pkt->start); - ret_pkt->after_krn_pkt.push_back(ret_pkt->read); - ret_pkt->after_krn_pkt.push_back(ret_pkt->stop); + ret_pkt->populate_before(); + ret_pkt->populate_after(); for(auto& aql_pkt : ret_pkt->after_krn_pkt) - { aql_pkt.completion_signal.handle = 0; - } return ret_pkt; } diff --git a/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp b/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp index 66b6082a..cf68cb42 100644 --- a/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp +++ b/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp @@ -207,11 +207,15 @@ EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id, , _reduce_dimension_set(ast.reduce_dimension_set) , _out_id(out_id) { - if(_type == NodeType::REFERENCE_NODE) + if(_type == NodeType::REFERENCE_NODE || _type == NodeType::ACCUMULATE_NODE) { try { _metric = metrics.at(std::get(ast.value)); + if(_type == NodeType::ACCUMULATE_NODE) + { + _metric.setflags(static_cast(ast.accumulate_op)); + } } catch(std::exception& e) { throw std::runtime_error( @@ -277,6 +281,7 @@ EvaluateAST::set_dimensions() _dimension_types = first.size() > second.size() ? first : second; } break; + case ACCUMULATE_NODE: case REFERENCE_NODE: { _dimension_types = get_dim_types(_metric); @@ -377,6 +382,11 @@ EvaluateAST::validate_raw_ast(const std::unordered_map& met // Dimensionindex values should be within limits for this metric and GPU. } break; + case ACCUMULATE_NODE: + { + // Future todo only to be applied on sq metric + } + break; } } catch(std::exception& e) { @@ -466,39 +476,36 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket { std::unordered_map>* data; const aql::CounterPacketConstruct* pkt_gen; - hsa_agent_t agent; + aqlprofile_agent_handle_t agent; }; - auto agent = CHECK_NOTNULL(rocprofiler::agent::get_agent_cache( - CHECK_NOTNULL(rocprofiler::agent::get_agent(pkt_gen->agent())))) - ->get_hsa_agent(); + auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(pkt_gen->agent())); + std::unordered_map> ret; if(pkt.empty) return ret; - it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = agent}; - ; - hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data( - &pkt.profile, - [](hsa_ven_amd_aqlprofile_info_type_t info_type, - hsa_ven_amd_aqlprofile_info_data_t* info_data, - void* data) { + it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = aql_agent}; + + hsa_status_t status = aqlprofile_pmc_iterate_data( + pkt.handle, + [](aqlprofile_pmc_event_t event, uint64_t counter_id, uint64_t counter_value, void* data) { CHECK(data); - auto& it = *static_cast(data); - if(info_type != HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) return HSA_STATUS_SUCCESS; - const auto* metric = it.pkt_gen->event_to_metric(info_data->pmc_data.event); + auto& it = *static_cast(data); + const auto* metric = it.pkt_gen->event_to_metric(event); + if(!metric) return HSA_STATUS_SUCCESS; + auto& vec = it.data->emplace(metric->id(), std::vector{}) .first->second; auto& next_rec = vec.emplace_back(); set_counter_in_rec(next_rec.id, {.handle = metric->id()}); // Actual dimension info needs to be used here in the future - auto aql_status = aql::set_dim_id_from_sample( - next_rec.id, it.agent, info_data->pmc_data.event, info_data->sample_id); + auto aql_status = aql::set_dim_id_from_sample(next_rec.id, it.agent, event, counter_id); CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS) << rocprofiler_get_status_string(aql_status); // set_dim_in_rec(next_rec.id, ROCPROFILER_DIMENSION_NONE, vec.size() - 1); // Note: in the near future we need to use hw_counter here instead - next_rec.counter_value = info_data->pmc_data.result; + next_rec.counter_value = counter_value; return HSA_STATUS_SUCCESS; }, &aql_data); @@ -522,6 +529,7 @@ EvaluateAST::expand_derived(std::unordered_map& asts) _expanded = true; for(auto& child : _children) { + if(child._type == NodeType::ACCUMULATE_NODE) continue; if(auto* ptr = rocprofiler::common::get_val(asts, child.metric().name())) { ptr->expand_derived(asts); @@ -629,6 +637,8 @@ EvaluateAST::evaluate( .dispatch_id = a.dispatch_id, .user_data = {.value = 0}}; }); + case ACCUMULATE_NODE: + // todo update how to read the hybrid metric case REFERENCE_NODE: { auto* result = rocprofiler::common::get_val(results_map, _metric.id()); diff --git a/source/lib/rocprofiler-sdk/counters/evaluate_ast.hpp b/source/lib/rocprofiler-sdk/counters/evaluate_ast.hpp index dcc6e1d6..7f9b2625 100644 --- a/source/lib/rocprofiler-sdk/counters/evaluate_ast.hpp +++ b/source/lib/rocprofiler-sdk/counters/evaluate_ast.hpp @@ -48,8 +48,7 @@ enum DimensionTypes DIMENSION_SHADER_ENGINE = 1 << 2, DIMENSION_AGENT = 1 << 3, DIMENSION_PMC_CHANNEL = 1 << 4, - DIMENSION_CU = 1 << 5, - DIMENSION_LAST = 1 << 6, + DIMENSION_LAST = 1 << 5, }; enum ReduceOperation diff --git a/source/lib/rocprofiler-sdk/counters/id_decode.cpp b/source/lib/rocprofiler-sdk/counters/id_decode.cpp index 2d4e96d2..b9a6457d 100644 --- a/source/lib/rocprofiler-sdk/counters/id_decode.cpp +++ b/source/lib/rocprofiler-sdk/counters/id_decode.cpp @@ -43,7 +43,7 @@ dimension_map() {ROCPROFILER_DIMENSION_SHADER_ENGINE, std::string_view("DIMENSION_SHADER_ENGINE")}, {ROCPROFILER_DIMENSION_AGENT, std::string_view("DIMENSION_AGENT")}, {ROCPROFILER_DIMENSION_SHADER_ARRAY, std::string_view("DIMENSION_SHADER_ARRAY")}, - {ROCPROFILER_DIMENSION_CU, std::string_view("DIMENSION_CU")}, + {ROCPROFILER_DIMENSION_WGP, std::string_view("DIMENSION_WGP")}, {ROCPROFILER_DIMENSION_INSTANCE, std::string_view("DIMENSION_INSTANCE")}, }); return *_v; @@ -67,7 +67,7 @@ aqlprofile_id_to_rocprof_instance() {"AID", ROCPROFILER_DIMENSION_AID}, {"SE", ROCPROFILER_DIMENSION_SHADER_ENGINE}, {"SA", ROCPROFILER_DIMENSION_SHADER_ARRAY}, - {"CU", ROCPROFILER_DIMENSION_CU}, + {"WGP", ROCPROFILER_DIMENSION_WGP}, {"INSTANCE", ROCPROFILER_DIMENSION_INSTANCE}, }; diff --git a/source/lib/rocprofiler-sdk/counters/id_decode.hpp b/source/lib/rocprofiler-sdk/counters/id_decode.hpp index c6dbb014..0c779f22 100644 --- a/source/lib/rocprofiler-sdk/counters/id_decode.hpp +++ b/source/lib/rocprofiler-sdk/counters/id_decode.hpp @@ -45,7 +45,7 @@ enum rocprofiler_profile_counter_instance_types ROCPROFILER_DIMENSION_SHADER_ENGINE, ///< SE dimension of result ROCPROFILER_DIMENSION_AGENT, ///< Agent dimension ROCPROFILER_DIMENSION_SHADER_ARRAY, ///< Number of shader arrays - ROCPROFILER_DIMENSION_CU, ///< Number of compute units + ROCPROFILER_DIMENSION_WGP, ///< Number of workgroup processors ROCPROFILER_DIMENSION_INSTANCE, ///< Number of instances ROCPROFILER_DIMENSION_LAST }; diff --git a/source/lib/rocprofiler-sdk/counters/metrics.cpp b/source/lib/rocprofiler-sdk/counters/metrics.cpp index 59dd33cd..f44ede85 100644 --- a/source/lib/rocprofiler-sdk/counters/metrics.cpp +++ b/source/lib/rocprofiler-sdk/counters/metrics.cpp @@ -272,7 +272,7 @@ checkValidMetric(const std::string& agent, const Metric& metric) bool operator<(Metric const& lhs, Metric const& rhs) { - return lhs.id() < rhs.id(); + return std::tie(lhs.id_, lhs.flags_) < std::tie(rhs.id_, rhs.flags_); } bool @@ -286,7 +286,8 @@ operator==(Metric const& lhs, Metric const& rhs) x.expression_, x.special_, x.id_, - x.empty_); + x.empty_, + x.flags_); }; return get_tie(lhs) == get_tie(rhs); } diff --git a/source/lib/rocprofiler-sdk/counters/metrics.hpp b/source/lib/rocprofiler-sdk/counters/metrics.hpp index a792b2eb..df58e166 100644 --- a/source/lib/rocprofiler-sdk/counters/metrics.hpp +++ b/source/lib/rocprofiler-sdk/counters/metrics.hpp @@ -64,8 +64,11 @@ class Metric const std::string& expression() const { return expression_; } const std::string& special() const { return special_; } uint64_t id() const { return id_; } + uint32_t flags() const { return flags_; } bool empty() const { return empty_; } + void setflags(uint32_t flags) { this->flags_ = flags; } + friend bool operator<(Metric const& lhs, Metric const& rhs); friend bool operator==(Metric const& lhs, Metric const& rhs); @@ -78,6 +81,7 @@ class Metric std::string special_ = {}; int64_t id_ = -1; bool empty_ = false; + uint32_t flags_ = 0; }; using MetricMap = std::unordered_map>; diff --git a/source/lib/rocprofiler-sdk/counters/parser/parser.cpp b/source/lib/rocprofiler-sdk/counters/parser/parser.cpp index 153330bd..37c53c99 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/parser.cpp +++ b/source/lib/rocprofiler-sdk/counters/parser/parser.cpp @@ -133,13 +133,14 @@ enum yysymbol_kind_t YYSYMBOL_NAME = 20, /* NAME */ YYSYMBOL_REDUCE = 21, /* REDUCE */ YYSYMBOL_SELECT = 22, /* SELECT */ - YYSYMBOL_LOWER_THAN_ELSE = 23, /* LOWER_THAN_ELSE */ - YYSYMBOL_ELSE = 24, /* ELSE */ - YYSYMBOL_YYACCEPT = 25, /* $accept */ - YYSYMBOL_top = 26, /* top */ - YYSYMBOL_exp = 27, /* exp */ - YYSYMBOL_reduce_dim_args = 28, /* reduce_dim_args */ - YYSYMBOL_select_dim_args = 29 /* select_dim_args */ + YYSYMBOL_ACCUMULATE = 23, /* ACCUMULATE */ + YYSYMBOL_LOWER_THAN_ELSE = 24, /* LOWER_THAN_ELSE */ + YYSYMBOL_ELSE = 25, /* ELSE */ + YYSYMBOL_YYACCEPT = 26, /* $accept */ + YYSYMBOL_top = 27, /* top */ + YYSYMBOL_exp = 28, /* exp */ + YYSYMBOL_reduce_dim_args = 29, /* reduce_dim_args */ + YYSYMBOL_select_dim_args = 30 /* select_dim_args */ }; typedef enum yysymbol_kind_t yysymbol_kind_t; @@ -451,21 +452,21 @@ union yyalloc #endif /* !YYCOPY_NEEDED */ /* YYFINAL -- State number of the termination state. */ -#define YYFINAL 11 +#define YYFINAL 13 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 54 +#define YYLAST 60 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 25 +#define YYNTOKENS 26 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 5 /* YYNRULES -- Number of rules. */ -#define YYNRULES 16 +#define YYNRULES 17 /* YYNSTATES -- Number of states. */ -#define YYNSTATES 44 +#define YYNSTATES 50 /* YYMAXUTOK -- Last valid token kind. */ -#define YYMAXUTOK 278 +#define YYMAXUTOK 279 /* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM as returned by yylex, with out-of-bounds checking. */ @@ -476,22 +477,22 @@ union yyalloc /* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM as returned by yylex. */ static const yytype_int8 yytranslate[] = { - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}; #if YYDEBUG /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int8 yyrline[] = - {0, 57, 57, 60, 61, 62, 63, 64, 65, 66, 69, 73, 77, 84, 87, 94, 97}; + {0, 58, 58, 61, 62, 63, 64, 65, 66, 67, 70, 75, 79, 83, 90, 93, 100, 103}; #endif /** Accessing symbol of state STATE. */ @@ -528,6 +529,7 @@ static const char* const yytname[] = {"\"end of file\"", "NAME", "REDUCE", "SELECT", + "ACCUMULATE", "LOWER_THAN_ELSE", "ELSE", "$accept", @@ -544,7 +546,7 @@ yysymbol_name(yysymbol_kind_t yysymbol) } #endif -#define YYPACT_NINF (-3) +#define YYPACT_NINF (-10) #define yypact_value_is_default(Yyn) ((Yyn) == YYPACT_NINF) @@ -554,48 +556,50 @@ yysymbol_name(yysymbol_kind_t yysymbol) /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -static const yytype_int8 yypact[] = {11, 11, -3, -3, 1, 16, 7, 32, 18, 11, 11, -3, 11, 11, 11, - 11, -3, -2, 13, 0, 0, -3, -3, 6, 28, 17, 20, -3, 30, 34, - 31, 24, 27, 36, 33, 35, 37, -3, 24, 38, 20, -3, -3, -3}; +static const yytype_int8 yypact[] = {2, 2, -10, -10, -7, -2, 3, 21, 38, 27, 2, 2, 14, + -10, 2, 2, 2, 2, -10, 0, 23, 18, 13, 13, -10, -10, + 16, 28, 25, -9, 26, 37, -10, 39, 30, 36, -10, 29, 33, + 42, 40, 41, 43, -10, 29, 44, 26, -10, -10, -10}; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. Performed when YYTABLE does not specify something else to do. Zero means the default is an error. */ -static const yytype_int8 yydefact[] = {0, 0, 3, 9, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, - 0, 8, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0, 10, 0, 0, - 0, 0, 0, 0, 13, 0, 15, 12, 0, 0, 0, 14, 11, 16}; +static const yytype_int8 yydefact[] = {0, 0, 3, 9, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 8, 0, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 11, 0, + 0, 0, 10, 0, 0, 0, 14, 0, 16, 13, 0, 0, 0, 15, 12, 17}; /* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = {-3, -3, -1, 14, 9}; +static const yytype_int8 yypgoto[] = {-10, -10, -1, 11, 10}; /* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = {0, 6, 7, 35, 30}; +static const yytype_int8 yydefgoto[] = {0, 7, 8, 41, 35}; /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule whose number is the opposite. If YYTABLE_NINF, syntax error. */ -static const yytype_int8 yytable[] = {8, 12, 13, 14, 15, 14, 15, 11, 17, 18, 9, 19, 20, 21, - 22, 23, 12, 13, 14, 15, 1, 12, 13, 14, 15, 10, 25, 27, - 16, 2, 24, 3, 4, 5, 28, 12, 13, 14, 15, 26, 29, 31, - 32, 33, 34, 36, 37, 39, 42, 43, 38, 0, 41, 0, 40}; +static const yytype_int8 yytable[] = { + 9, 32, 10, 14, 15, 16, 17, 11, 33, 19, 20, 1, 12, 22, 23, 24, 25, 26, 16, 17, 2, + 13, 3, 4, 5, 6, 14, 15, 16, 17, 14, 15, 16, 17, 21, 28, 29, 18, 38, 30, 27, 14, + 15, 16, 17, 31, 34, 36, 39, 40, 37, 42, 43, 45, 48, 47, 49, 44, 0, 0, 46}; -static const yytype_int8 yycheck[] = {1, 3, 4, 5, 6, 5, 6, 0, 9, 10, 9, 12, 13, 14, - 15, 17, 3, 4, 5, 6, 9, 3, 4, 5, 6, 9, 20, 10, - 10, 18, 17, 20, 21, 22, 17, 3, 4, 5, 6, 11, 20, 11, - 8, 12, 20, 18, 10, 12, 10, 40, 17, -1, 38, -1, 17}; +static const yytype_int8 yycheck[] = {1, 10, 9, 3, 4, 5, 6, 9, 17, 10, 11, 9, 9, 14, 15, 16, + 17, 17, 5, 6, 18, 0, 20, 21, 22, 23, 3, 4, 5, 6, 3, 4, + 5, 6, 20, 17, 20, 10, 8, 11, 17, 3, 4, 5, 6, 20, 20, 10, + 12, 20, 11, 18, 10, 12, 10, 44, 46, 17, -1, -1, 17}; /* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of state STATE-NUM. */ -static const yytype_int8 yystos[] = {0, 9, 18, 20, 21, 22, 26, 27, 27, 9, 9, 0, 3, 4, 5, - 6, 10, 27, 27, 27, 27, 27, 27, 17, 17, 20, 11, 10, 17, 20, - 29, 11, 8, 12, 20, 28, 18, 10, 17, 12, 17, 28, 10, 29}; +static const yytype_int8 yystos[] = {0, 9, 18, 20, 21, 22, 23, 27, 28, 28, 9, 9, 9, + 0, 3, 4, 5, 6, 10, 28, 28, 20, 28, 28, 28, 28, + 17, 17, 17, 20, 11, 20, 10, 17, 20, 30, 10, 11, 8, + 12, 20, 29, 18, 10, 17, 12, 17, 29, 10, 30}; /* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ static const yytype_int8 yyr1[] = - {0, 25, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29}; + {0, 26, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 30, 30}; /* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ -static const yytype_int8 yyr2[] = {0, 2, 1, 1, 3, 3, 3, 3, 3, 1, 6, 10, 8, 1, 3, 3, 5}; +static const yytype_int8 yyr2[] = {0, 2, 1, 1, 3, 3, 3, 3, 3, 1, 6, 6, 10, 8, 1, 3, 3, 5}; enum { @@ -1020,133 +1024,143 @@ yyparse(RawAST** result) switch(yyn) { case 2: /* top: exp */ -#line 57 "parser.y" +#line 58 "parser.y" { *result = (yyvsp[0].a); } -#line 1119 "parser.cpp" +#line 1122 "parser.cpp" break; case 3: /* exp: NUMBER */ -#line 60 "parser.y" +#line 61 "parser.y" { (yyval.a) = new RawAST(NUMBER_NODE, (yyvsp[0].d)); } -#line 1125 "parser.cpp" +#line 1128 "parser.cpp" break; case 4: /* exp: exp ADD exp */ -#line 61 "parser.y" +#line 62 "parser.y" { (yyval.a) = new RawAST(ADDITION_NODE, {(yyvsp[-2].a), (yyvsp[0].a)}); } -#line 1131 "parser.cpp" +#line 1134 "parser.cpp" break; case 5: /* exp: exp SUB exp */ -#line 62 "parser.y" +#line 63 "parser.y" { (yyval.a) = new RawAST(SUBTRACTION_NODE, {(yyvsp[-2].a), (yyvsp[0].a)}); } -#line 1137 "parser.cpp" +#line 1140 "parser.cpp" break; case 6: /* exp: exp MUL exp */ -#line 63 "parser.y" +#line 64 "parser.y" { (yyval.a) = new RawAST(MULTIPLY_NODE, {(yyvsp[-2].a), (yyvsp[0].a)}); } -#line 1143 "parser.cpp" +#line 1146 "parser.cpp" break; case 7: /* exp: exp DIV exp */ -#line 64 "parser.y" +#line 65 "parser.y" { (yyval.a) = new RawAST(DIVIDE_NODE, {(yyvsp[-2].a), (yyvsp[0].a)}); } -#line 1149 "parser.cpp" +#line 1152 "parser.cpp" break; case 8: /* exp: OP exp CP */ -#line 65 "parser.y" +#line 66 "parser.y" { (yyval.a) = (yyvsp[-1].a); } -#line 1155 "parser.cpp" +#line 1158 "parser.cpp" break; case 9: /* exp: NAME */ -#line 66 "parser.y" +#line 67 "parser.y" { (yyval.a) = new RawAST(REFERENCE_NODE, (yyvsp[0].s)); free((yyvsp[0].s)); } -#line 1163 "parser.cpp" +#line 1166 "parser.cpp" break; - case 10: /* exp: REDUCE OP exp CM NAME CP */ -#line 69 "parser.y" + case 10: /* exp: ACCUMULATE OP NAME CM NAME CP */ +#line 70 "parser.y" + { + (yyval.a) = new RawAST(ACCUMULATE_NODE, (yyvsp[-3].s), (yyvsp[-1].s)); + free((yyvsp[-3].s)); + free((yyvsp[-1].s)); + } +#line 1176 "parser.cpp" + break; + + case 11: /* exp: REDUCE OP exp CM NAME CP */ +#line 75 "parser.y" { (yyval.a) = new RawAST(REDUCE_NODE, (yyvsp[-3].a), (yyvsp[-1].s), NULL); free((yyvsp[-1].s)); } -#line 1172 "parser.cpp" +#line 1185 "parser.cpp" break; - case 11: /* exp: REDUCE OP exp CM NAME CM O_SQ reduce_dim_args C_SQ CP */ -#line 73 "parser.y" + case 12: /* exp: REDUCE OP exp CM NAME CM O_SQ reduce_dim_args C_SQ CP */ +#line 79 "parser.y" { (yyval.a) = new RawAST(REDUCE_NODE, (yyvsp[-7].a), (yyvsp[-5].s), (yyvsp[-2].ll)); free((yyvsp[-5].s)); } -#line 1181 "parser.cpp" +#line 1194 "parser.cpp" break; - case 12: /* exp: SELECT OP exp CM O_SQ select_dim_args C_SQ CP */ -#line 77 "parser.y" + case 13: /* exp: SELECT OP exp CM O_SQ select_dim_args C_SQ CP */ +#line 83 "parser.y" { (yyval.a) = new RawAST(SELECT_NODE, (yyvsp[-5].a), (yyvsp[-2].ll)); } -#line 1189 "parser.cpp" +#line 1202 "parser.cpp" break; - case 13: /* reduce_dim_args: NAME */ -#line 84 "parser.y" + case 14: /* reduce_dim_args: NAME */ +#line 90 "parser.y" { (yyval.ll) = new LinkedList((yyvsp[0].s), NULL); free((yyvsp[0].s)); } -#line 1197 "parser.cpp" +#line 1210 "parser.cpp" break; - case 14: /* reduce_dim_args: NAME CM reduce_dim_args */ -#line 87 "parser.y" + case 15: /* reduce_dim_args: NAME CM reduce_dim_args */ +#line 93 "parser.y" { (yyval.ll) = new LinkedList((yyvsp[-2].s), (yyvsp[0].ll)); free((yyvsp[-2].s)); } -#line 1205 "parser.cpp" +#line 1218 "parser.cpp" break; - case 15: /* select_dim_args: NAME EQUALS NUMBER */ -#line 94 "parser.y" + case 16: /* select_dim_args: NAME EQUALS NUMBER */ +#line 100 "parser.y" { (yyval.ll) = new LinkedList((yyvsp[-2].s), (yyvsp[0].d), NULL); free((yyvsp[-2].s)); } -#line 1213 "parser.cpp" +#line 1226 "parser.cpp" break; - case 16: /* select_dim_args: NAME EQUALS NUMBER CM select_dim_args */ -#line 97 "parser.y" + case 17: /* select_dim_args: NAME EQUALS NUMBER CM select_dim_args */ +#line 103 "parser.y" { (yyval.ll) = new LinkedList((yyvsp[-4].s), (yyvsp[-2].d), (yyvsp[0].ll)); free((yyvsp[-4].s)); } -#line 1221 "parser.cpp" +#line 1234 "parser.cpp" break; -#line 1225 "parser.cpp" +#line 1238 "parser.cpp" default: break; } @@ -1320,4 +1334,4 @@ yyparse(RawAST** result) return yyresult; } -#line 103 "parser.y" +#line 109 "parser.y" diff --git a/source/lib/rocprofiler-sdk/counters/parser/parser.h b/source/lib/rocprofiler-sdk/counters/parser/parser.h index 355f02ff..7c3abb26 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/parser.h +++ b/source/lib/rocprofiler-sdk/counters/parser/parser.h @@ -35,8 +35,8 @@ especially those whose name start with YY_ or yy_. They are private implementation details that can be changed or removed. */ -#ifndef YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED -#define YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED +#ifndef YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED +#define YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED /* Debug traces. */ #ifndef YYDEBUG # define YYDEBUG 1 @@ -81,8 +81,9 @@ enum yytokentype NAME = 274, /* NAME */ REDUCE = 275, /* REDUCE */ SELECT = 276, /* SELECT */ - LOWER_THAN_ELSE = 277, /* LOWER_THAN_ELSE */ - ELSE = 278 /* ELSE */ + ACCUMULATE = 277, /* ACCUMULATE */ + LOWER_THAN_ELSE = 278, /* LOWER_THAN_ELSE */ + ELSE = 279 /* ELSE */ }; typedef enum yytokentype yytoken_kind_t; #endif @@ -98,7 +99,7 @@ union YYSTYPE int64_t d; char* s; -# line 102 "parser.h" +# line 103 "parser.h" }; typedef union YYSTYPE YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 @@ -110,4 +111,4 @@ extern YYSTYPE yylval; int yyparse(RawAST** result); -#endif /* !YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED */ +#endif /* !YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED */ diff --git a/source/lib/rocprofiler-sdk/counters/parser/parser.y b/source/lib/rocprofiler-sdk/counters/parser/parser.y index f0caca79..3e7cb80a 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/parser.y +++ b/source/lib/rocprofiler-sdk/counters/parser/parser.y @@ -39,6 +39,7 @@ void yyerror(rocprofiler::counters::RawAST**, const char *s) { ROCP_ERROR << s; %token NUMBER RANGE /* set data type for numbers */ %token NAME /* set data type for variables and user-defined functions */ %token REDUCE SELECT /* set data type for special functions */ +%token ACCUMULATE %type exp /* set data type for expressions */ %type NAME %type NUMBER @@ -64,6 +65,11 @@ exp: NUMBER { $$ = new RawAST(NUMBER_NODE, $1); } | NAME { $$ = new RawAST(REFERENCE_NODE, $1); free($1); } + | ACCUMULATE OP NAME CM NAME CP { + $$ = new RawAST(ACCUMULATE_NODE, $3, $5); + free($3); + free($5); + } | REDUCE OP exp CM NAME CP { $$ = new RawAST(REDUCE_NODE, $3, $5, NULL); free($5); diff --git a/source/lib/rocprofiler-sdk/counters/parser/raw_ast.hpp b/source/lib/rocprofiler-sdk/counters/parser/raw_ast.hpp index 84f7ca20..b35d245f 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/raw_ast.hpp +++ b/source/lib/rocprofiler-sdk/counters/parser/raw_ast.hpp @@ -54,6 +54,14 @@ enum NodeType SELECT_NODE, SUBTRACTION_NODE, CONSTANT_NODE, + ACCUMULATE_NODE +}; + +enum class ACCUMULATE_OP_TYPE +{ + NONE = 0, + LOW_RESOLUTION, + HIGH_RESOLUTION }; struct LinkedList @@ -75,8 +83,9 @@ struct LinkedList struct RawAST { // Node type - NodeType type{NONE}; // Operation to perform on the counter set - std::string reduce_op{}; + NodeType type{NONE}; // Operation to perform on the counter set + std::string reduce_op{}; + ACCUMULATE_OP_TYPE accumulate_op{ACCUMULATE_OP_TYPE::NONE}; // Stores either the name or digit dependening on whether this // is a name or number @@ -164,6 +173,20 @@ struct RawAST } } + RawAST(NodeType t, const char* v, const char* op) + : type(t) + , value(std::string{CHECK_NOTNULL(v)}) + { + CHECK_NOTNULL(op); + static std::unordered_map map = { + {"NONE", ACCUMULATE_OP_TYPE::NONE}, + {"LOW_RES", ACCUMULATE_OP_TYPE::LOW_RESOLUTION}, + {"HIGH_RES", ACCUMULATE_OP_TYPE::HIGH_RESOLUTION}, + }; + accumulate_op = map.at(static_cast(op)); + CHECK_EQ(t, ACCUMULATE_NODE); + } + // Select operation constructor. Counter is the counter AST // to use for the reduce op, refs is the reference set AST. // dimensions contains the mapping for selecting dimensions @@ -227,16 +250,26 @@ struct formatter {rocprofiler::counters::MULTIPLY_NODE, "MULTIPLY_NODE"}, {rocprofiler::counters::NUMBER_NODE, "NUMBER_NODE"}, {rocprofiler::counters::RANGE_NODE, "RANGE_NODE"}, + {rocprofiler::counters::ACCUMULATE_NODE, "ACCUMULATE_NODE"}, {rocprofiler::counters::REDUCE_NODE, "REDUCE_NODE"}, {rocprofiler::counters::REFERENCE_NODE, "REFERENCE_NODE"}, {rocprofiler::counters::SELECT_NODE, "SELECT_NODE"}, {rocprofiler::counters::SUBTRACTION_NODE, "SUBTRACTION_NODE"}, }; - auto out = fmt::format_to(ctx.out(), - "{{\"Type\":\"{}\", \"REDUCE_OP\":\"{}\",", - NodeTypeToString.at(ast.type), - ast.reduce_op); + static std::unordered_map + AccumulateTypeToString = { + {rocprofiler::counters::ACCUMULATE_OP_TYPE::NONE, "NONE"}, + {rocprofiler::counters::ACCUMULATE_OP_TYPE::HIGH_RESOLUTION, "HIGH_RES"}, + {rocprofiler::counters::ACCUMULATE_OP_TYPE::LOW_RESOLUTION, "LOW_RES"}, + }; + + auto out = + fmt::format_to(ctx.out(), + "{{\"Type\":\"{}\", \"REDUCE_OP\":\"{}\", \"ACCUMULATE_OP\":\"{}\",", + NodeTypeToString.at(ast.type), + ast.reduce_op, + AccumulateTypeToString.at(ast.accumulate_op)); if(const auto* string_val = std::get_if(&ast.value)) { diff --git a/source/lib/rocprofiler-sdk/counters/parser/scanner.cpp b/source/lib/rocprofiler-sdk/counters/parser/scanner.cpp index e773ca90..aa161b6d 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/scanner.cpp +++ b/source/lib/rocprofiler-sdk/counters/parser/scanner.cpp @@ -388,8 +388,8 @@ yy_fatal_error(const char* msg); (yy_hold_char) = *yy_cp; \ *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 22 -#define YY_END_OF_BUFFER 23 +#define YY_NUM_RULES 23 +#define YY_END_OF_BUFFER 24 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -397,19 +397,20 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static const flex_int16_t yy_accept[48] = { - 0, 0, 0, 23, 21, 20, 18, 6, 7, 3, 1, 9, 2, 21, 4, 14, 10, 8, 17, 11, 12, 17, 17, 5, - 14, 19, 13, 14, 0, 17, 17, 17, 19, 13, 0, 0, 14, 17, 17, 0, 13, 17, 17, 17, 17, 15, 16, 0}; +static const flex_int16_t yy_accept[58] = { + 0, 0, 0, 24, 22, 21, 19, 6, 7, 3, 1, 9, 2, 22, 4, 14, 10, 8, 18, 11, + 12, 18, 18, 18, 5, 14, 20, 13, 14, 0, 18, 18, 18, 18, 20, 13, 0, 0, 14, 18, + 18, 18, 0, 13, 18, 18, 18, 18, 18, 18, 18, 15, 16, 18, 18, 18, 17, 0}; static const YY_CHAR yy_ec[256] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 1, 1, 14, 1, 1, 1, 15, 15, 15, 15, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 17, 1, 18, 1, 15, 1, 15, 15, 19, 20, + 15, 15, 15, 15, 15, 15, 15, 17, 1, 18, 1, 15, 1, 19, 15, 20, 21, - 21, 15, 15, 15, 15, 15, 15, 22, 15, 15, 15, 15, 15, 23, 24, 25, 26, 15, 15, 15, 15, - 15, 1, 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 22, 15, 15, 15, 15, 15, 15, 23, 24, 15, 15, 15, 15, 25, 26, 27, 28, 15, 15, 15, 15, + 15, 1, 29, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -418,40 +419,40 @@ static const YY_CHAR yy_ec[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -static const YY_CHAR yy_meta[28] = {0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, - 1, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1}; +static const YY_CHAR yy_meta[30] = {0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, + 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1}; -static const flex_int16_t yy_base[50] = {0, 0, 0, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, - 57, 57, 18, 71, 71, 0, 71, 71, 46, 45, 71, 17, 0, - 19, 31, 37, 0, 45, 42, 0, 38, 44, 51, 49, 32, 36, - 43, 36, 26, 23, 16, 11, 0, 0, 71, 29, 59}; +static const flex_int16_t yy_base[60] = { + 0, 0, 0, 81, 82, 82, 82, 82, 82, 82, 82, 82, 82, 68, 68, 20, 82, 82, 0, 82, + 82, 58, 55, 54, 82, 19, 0, 21, 28, 39, 0, 55, 53, 50, 0, 33, 45, 60, 59, 42, + 41, 46, 55, 54, 41, 44, 43, 34, 39, 32, 33, 0, 0, 34, 20, 17, 0, 82, 31, 57}; -static const flex_int16_t yy_def[50] = {0, 47, 1, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 47, 47, 48, 48, 47, 47, 49, - 47, 47, 47, 48, 48, 48, 49, 47, 47, 47, 47, 48, 48, - 47, 47, 48, 48, 48, 48, 48, 48, 0, 47, 47}; +static const flex_int16_t yy_def[60] = {0, 57, 1, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 58, 57, 57, 58, 58, 58, 57, 57, 59, 57, 57, 57, + 58, 58, 58, 58, 59, 57, 57, 57, 57, 58, 58, 58, 57, 57, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 0, 57, 57}; -static const flex_int16_t yy_nxt[99] = { - 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 18, - 18, 18, 18, 21, 22, 18, 18, 23, 26, 24, 27, 33, 29, 28, 28, 34, 46, 45, 28, 28, - 34, 26, 44, 27, 35, 43, 35, 28, 40, 36, 33, 39, 28, 39, 34, 40, 40, 42, 41, 34, - 32, 36, 32, 36, 38, 37, 31, 30, 25, 24, 47, 3, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47 +static const flex_int16_t yy_nxt[112] = { + 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 18, + 18, 18, 18, 18, 22, 23, 18, 18, 24, 27, 25, 28, 35, 30, 29, 29, 36, 27, 56, 28, 29, + 29, 36, 29, 35, 37, 55, 37, 36, 29, 38, 42, 54, 42, 36, 53, 43, 34, 52, 34, 51, 50, + 49, 48, 47, 43, 43, 46, 45, 44, 38, 38, 41, 40, 39, 33, 32, 31, 26, 25, 57, 3, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, -}; + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57}; -static const flex_int16_t yy_chk[99] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 15, 24, 15, 26, 48, 24, 15, 26, 44, 43, 24, 15, - 26, 27, 42, 27, 28, 41, 28, 27, 40, 28, 33, 34, 27, 34, 33, 39, 34, 38, 37, 33, - 49, 36, 49, 35, 31, 30, 22, 21, 14, 13, 3, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47 +static const flex_int16_t yy_chk[112] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 15, 25, 15, 27, 58, 25, 15, 27, 28, 55, 28, 25, + 15, 27, 28, 35, 29, 54, 29, 35, 28, 29, 36, 53, 36, 35, 50, 36, 59, 49, 59, 48, 47, + 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 33, 32, 31, 23, 22, 21, 14, 13, 3, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, -}; + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57}; /* Table of booleans, true if rule could match eol. */ -static const flex_int32_t yy_rule_can_match_eol[23] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, +static const flex_int32_t yy_rule_can_match_eol[24] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, }; static yy_state_type yy_last_accepting_state; @@ -476,9 +477,9 @@ char* yytext; #include "raw_ast.hpp" using namespace std; #define YYDEBUG 1 -#line 511 "scanner.cpp" +#line 518 "scanner.cpp" /* float exponent */ -#line 513 "scanner.cpp" +#line 520 "scanner.cpp" #define INITIAL 0 @@ -713,7 +714,7 @@ YY_DECL { #line 15 "scanner.l" -#line 730 "scanner.cpp" +#line 737 "scanner.cpp" while(/*CONSTCOND*/ 1) /* loops until end-of-file is reached */ { @@ -740,11 +741,11 @@ YY_DECL while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state) { yy_current_state = (int) yy_def[yy_current_state]; - if(yy_current_state >= 48) yy_c = yy_meta[yy_c]; + if(yy_current_state >= 58) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; ++yy_cp; - } while(yy_base[yy_current_state] != 71); + } while(yy_base[yy_current_state] != 82); yy_find_action: yy_act = yy_accept[yy_current_state]; @@ -870,40 +871,46 @@ YY_DECL } YY_BREAK case 17: YY_RULE_SETUP -#line 37 "scanner.l" +#line 36 "scanner.l" + { + return ACCUMULATE; + } + YY_BREAK + case 18: YY_RULE_SETUP +#line 38 "scanner.l" { yylval.s = strdup(yytext); return NAME; } YY_BREAK - case 18: - /* rule 18 can match eol */ + case 19: + /* rule 19 can match eol */ YY_RULE_SETUP -#line 42 "scanner.l" +#line 43 "scanner.l" { return EOL; } - YY_BREAK - case 19: YY_RULE_SETUP -#line 43 "scanner.l" - YY_BREAK case 20: YY_RULE_SETUP #line 44 "scanner.l" - { /* ignore white space */ - } + YY_BREAK case 21: YY_RULE_SETUP #line 45 "scanner.l" - { - throw std::runtime_error(fmt::format("Mystery character {}", *yytext)); + { /* ignore white space */ } YY_BREAK case 22: YY_RULE_SETUP #line 46 "scanner.l" + { + throw std::runtime_error(fmt::format("Mystery character {}", *yytext)); + } + YY_BREAK + case 23: YY_RULE_SETUP +#line 47 "scanner.l" YY_FATAL_ERROR("flex scanner jammed"); YY_BREAK -#line 909 "scanner.cpp" +#line 921 "scanner.cpp" case YY_STATE_EOF(INITIAL): yyterminate(); case YY_END_OF_BUFFER: @@ -1187,7 +1194,7 @@ yy_get_previous_state(void) while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state) { yy_current_state = (int) yy_def[yy_current_state]; - if(yy_current_state >= 48) yy_c = yy_meta[yy_c]; + if(yy_current_state >= 58) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; } @@ -1215,10 +1222,10 @@ yy_try_NUL_trans(yy_state_type yy_current_state) while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state) { yy_current_state = (int) yy_def[yy_current_state]; - if(yy_current_state >= 48) yy_c = yy_meta[yy_c]; + if(yy_current_state >= 58) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; - yy_is_jam = (yy_current_state == 47); + yy_is_jam = (yy_current_state == 57); return yy_is_jam ? 0 : yy_current_state; } @@ -1864,4 +1871,4 @@ yyfree(void* ptr) #define YYTABLES_NAME "yytables" -#line 46 "scanner.l" +#line 47 "scanner.l" diff --git a/source/lib/rocprofiler-sdk/counters/parser/scanner.l b/source/lib/rocprofiler-sdk/counters/parser/scanner.l index 96cff467..411eeaa0 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/scanner.l +++ b/source/lib/rocprofiler-sdk/counters/parser/scanner.l @@ -33,6 +33,7 @@ EXP ([Ee][-+]?[0-9]+) "reduce" { return REDUCE; } "select" { return SELECT; } +"accumulate" { return ACCUMULATE; } [a-z_A-Z][a-z_A-Z0-9]* { yylval.s = strdup(yytext); diff --git a/source/lib/rocprofiler-sdk/counters/parser/tests/parser_test.cpp b/source/lib/rocprofiler-sdk/counters/parser/tests/parser_test.cpp index 8b3d5f88..2d863cff 100644 --- a/source/lib/rocprofiler-sdk/counters/parser/tests/parser_test.cpp +++ b/source/lib/rocprofiler-sdk/counters/parser/tests/parser_test.cpp @@ -33,36 +33,48 @@ TEST(parser, base_ops) { std::map expressionToExpected = { {"AB * BA", - "{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"BA\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}"}, {"AB + BA", - "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"BA\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}"}, {"CD - ZX", - "{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"CD\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"ZX\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"ZX\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}"}, {"NM / DB", - "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"NM\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"DB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"NM\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"DB\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}"}}; for(auto [op, expected] : expressionToExpected) @@ -81,51 +93,70 @@ TEST(parser, order_of_ops) { std::map expressionToExpected = { {"(AB + BA) / CD", - "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " + "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " "\"Counter_Set\":[{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"BA\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[]," + " \"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " "\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}"}, + "\"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}, {"(AB / BA) - BN", - "{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", " + "{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " "\"Counter_Set\":[{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"BA\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " "\"Value\":\"BN\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}"}, + "\"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}, {"AD / (CD - ZX)", - "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AD\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"CD\", " + "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"AD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"CD\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"ZX\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", " + "\"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\":\"ZX\", \"Counter_Set\":[], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}"}, + "\"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}, {"MN * (NM / DB)", - "{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"MN\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"NM\", " - "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " - "\"Value\":\"DB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " - "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"MN\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"NM\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}," + "{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Value\":\"DB\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}"}}; @@ -145,29 +176,37 @@ TEST(parser, reduction) { std::vector> expressionToExpected = { {"reduce(AB, SUM, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])", - "{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[\"3\",\"1\"], \"Select_Dimension_Set\":[]}"}, {"reduce(AB+CD, SUM, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])", - "{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", " + "{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", \"ACCUMULATE_OP\":\"NONE\", " "\"Counter_Set\":[{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", " "\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " "\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[\"3\",\"1\"], " "\"Select_Dimension_Set\":[]}"}, {"reduce(AB,DIV, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])+reduce(DC,SUM, " "[DIMENSION_XCC,DIMENSION_SHADER_ENGINE])", - "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", " + "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " "\"Counter_Set\":[{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"DIV\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[\"3\",\"1\"], " "\"Select_Dimension_Set\":[]},{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"DC\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"DC\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[\"3\",\"1\"], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}}; @@ -188,25 +227,30 @@ TEST(parser, DISABLED_selection) { std::map expressionToExpected = { {"select(AB, [SE=1,XCC=0])+select(DC,[SE=2])", - "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", " + "{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " "\"Counter_Set\":[{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "\"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"XCC\", 0)\",\"(\"SE\", " - "1)\"]},{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"DC\", " + "1)\"]},{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"DC\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"SE\", 2)\"]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}, {"select(AB, [SE=2,XCC=1,WGP=3])", - "{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"WGP\", 3)\",\"(\"XCC\", " "1)\",\"(\"SE\", 2)\"]}"}, {"select(AB, [XCC=0])", - "{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", " - "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", " + "{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", " "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"XCC\", 0)\"]}"}}; @@ -216,6 +260,7 @@ TEST(parser, DISABLED_selection) auto* buf = yy_scan_string(op.c_str()); yyparse(&ast); ASSERT_TRUE(ast); + auto exp = fmt::format("{}", *ast); EXPECT_EQ(fmt::format("{}", *ast), expected); yy_delete_buffer(buf); delete ast; @@ -241,6 +286,71 @@ TEST(parser, parse_derived_counters) } } +TEST(parser, parse_accum_counter) +{ + std::map expressionToExpected = { + {"accumulate(SQ_WAVES,NONE)", + "{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\"" + ":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}"}, + {"accumulate(SQ_WAVES,HIGH_RES)", + "{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"HIGH_RES\", " + "\"Value" + "\":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}"}, + {"accumulate(SQ_WAVES,LOW_RES)", + "{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"LOW_RES\", " + "\"Value\"" + ":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}"}}; + + for(auto [op, expected] : expressionToExpected) + { + RawAST* ast = nullptr; + auto* buf = yy_scan_string(op.c_str()); + yyparse(&ast); + ASSERT_TRUE(ast); + auto exp = fmt::format("{}", *ast); + EXPECT_EQ(fmt::format("{}", *ast), expected); + yy_delete_buffer(buf); + delete ast; + } +} + +TEST(parser, parse_nested_accum_counter) +{ + std::map expressionToExpected = { + {"reduce(accumulate(SQ_LEVEL_WAVES,HIGH_RES),sum)/reduce(GRBM_GUI_ACTIVE,max)/CU_NUM", + "{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Counter_Set\":[{\"Type\":\"REDUCE_NODE\", " + "\"REDUCE_OP\":\"sum\", \"ACCUMULATE_OP\":\"NONE\", " + "\"Counter_Set\":[{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"HIGH_RES\", \"Value\":\"SQ_LEVEL_WAVES\", \"Counter_Set\":[], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]},{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"max\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", " + "\"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\":\"GRBM_GUI_ACTIVE\", " + "\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", " + "\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"CU_NUM\", \"Counter_Set\":[], " + "\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], " + "\"Select_Dimension_Set\":[]}"}}; + + for(auto [op, expected] : expressionToExpected) + { + RawAST* ast = nullptr; + auto* buf = yy_scan_string(op.c_str()); + yyparse(&ast); + ASSERT_TRUE(ast); + auto exp = fmt::format("{}", *ast); + EXPECT_EQ(fmt::format("{}", *ast), expected); + yy_delete_buffer(buf); + delete ast; + } +} + // TEST(parser, parse_complex_counters) // { // std::map expressionToExpected = { diff --git a/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp index 4fbf308d..a995d8a0 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp @@ -254,6 +254,7 @@ class agent_profile_test : public ::testing::Test registration::set_init_status(-1); context::push_client(1); test_init(); + // rocprofiler_debugger_block(); counters::agent_profile_hsa_registration(); std::string kernel_name = "null_kernel"; diff --git a/source/lib/rocprofiler-sdk/counters/tests/core.cpp b/source/lib/rocprofiler-sdk/counters/tests/core.cpp index 035dfe7b..7ba514ba 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/core.cpp @@ -116,28 +116,14 @@ get_client_ctx() return ctx; } -struct buf_check -{ - size_t expected_size{0}; - bool is_special{false}; - double special_val{0.0}; -}; - void buffered_callback(rocprofiler_context_id_t, rocprofiler_buffer_id_t, rocprofiler_record_header_t** headers, size_t num_headers, - void* user_data, + void* /* user_data */, uint64_t) { - buf_check& expected = *static_cast(user_data); - if(expected.is_special) - { - // Special values are single value constants (from agent_t) - expected.expected_size = 1; - } - std::set seen_data; std::set seen_dims; for(size_t i = 0; i < num_headers; ++i) @@ -456,33 +442,17 @@ TEST(core, check_callbacks) ASSERT_TRUE(ret_pkt) << fmt::format("Expected a packet to be generated for - {}", metric.name()); - /** - * Fake some data for the counter - */ - size_t* fake_data = static_cast(ret_pkt->profile.output_buffer.ptr); - for(size_t i = 0; i < (ret_pkt->profile.output_buffer.size / sizeof(size_t)); i++) - { - fake_data[i] = i + 1; - } - /** * Create the buffer and run test */ rocprofiler_buffer_id_t opt_buff_id = {.handle = 0}; - buf_check check = { - .expected_size = ret_pkt->profile.output_buffer.size / sizeof(size_t), - .is_special = !metric.special().empty(), - .special_val = (metric.special().empty() ? 0.0 - : double(counters::get_agent_property( - std::string_view(metric.name()), - *agent.get_rocp_agent())))}; ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), 500 * sizeof(size_t), 500 * sizeof(size_t), ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_callback, - &check, + nullptr, &opt_buff_id), "Could not create buffer"); cb_info->buffer = opt_buff_id; diff --git a/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp b/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp index f1764008..50ffd812 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp @@ -526,6 +526,74 @@ TEST(evaluate_ast, evaluate_simple_counters) } } +TEST(evaulate_ast, evaulate_hybrid_counters) +{ + using namespace rocprofiler::counters; + + auto get_base_rec_id = [](uint64_t counter_id) { + rocprofiler_counter_instance_id_t base_id = 0; + set_counter_in_rec(base_id, {.handle = counter_id}); + return base_id; + }; + + std::unordered_map metrics = { + {"VOORHEES", Metric("gfx9", "VOORHEES", "a", "a", "a", "", "", 0)}, + {"KRUEGER", Metric("gfx9", "KRUEGER", "a", "a", "a", "", "", 1)}, + {"MYERS", Metric("gfx9", "MYERS", "a", "a", "a", "", "", 2)}, + {"BATES", Metric("gfx9", "BATES", "a", "a", "a", "accumulate(VOORHEES,NONE)", "", 3)}, + {"KRAMER", Metric("gfx9", "KRAMER", "a", "a", "a", "accumulate(KRUEGER,LOW_RES)", "", 4)}, + {"TORRANCE", + Metric("gfx9", "TORRANCE", "a", "a", "a", "accumulate(MYERS,HIGH_RES)", "", 5)}}; + std::unordered_map> base_counter_data = { + {"VOORHEES", construct_test_data_dim(get_base_rec_id(0), {ROCPROFILER_DIMENSION_NONE}, 8)}, + {"KRUEGER", construct_test_data_dim(get_base_rec_id(1), {ROCPROFILER_DIMENSION_NONE}, 8)}, + {"MYERS", construct_test_data_dim(get_base_rec_id(2), {ROCPROFILER_DIMENSION_NONE}, 8)}, + }; + + std::unordered_map> asts; + for(const auto& [val, metric] : metrics) + { + RawAST* ast = nullptr; + auto buf = yy_scan_string(metric.expression().empty() ? metric.name().c_str() + : metric.expression().c_str()); + yyparse(&ast); + ASSERT_TRUE(ast) << metric.expression() << " " << metric.name(); + asts.emplace("gfx9", std::unordered_map{}) + .first->second.emplace(val, + EvaluateAST({.handle = metric.id()}, metrics, *ast, "gfx9")); + yy_delete_buffer(buf); + delete ast; + } + + std::vector< + std::tuple, int64_t, uint32_t>> + derived_counters = { + {"BATES", base_counter_data["VOORHEES"], 1, 0}, + {"KRAMER", base_counter_data["KRUEGER"], 1, 1}, + {"TORRANCE", base_counter_data["MYERS"], 1, 2}, + }; + + std::unordered_map> base_counter_decode; + for(const auto& [name, base_counter_v] : base_counter_data) + { + base_counter_decode[metrics[name].id()] = base_counter_v; + } + + for(auto& [name, expected, eval_count, flag] : derived_counters) + { + LOG(INFO) << name; + auto eval_counters = + rocprofiler::counters::get_required_hardware_counters(asts, "gfx9", metrics[name]); + ASSERT_TRUE(eval_counters); + ASSERT_EQ(eval_counters->size(), eval_count); + ASSERT_EQ(eval_counters->begin()->flags(), flag); + std::vector>> cache; + asts.at("gfx9").at(name).expand_derived(asts.at("gfx9")); + auto ret = asts.at("gfx9").at(name).evaluate(base_counter_decode, cache); + EXPECT_EQ(ret->size(), expected.size()); + } +} + namespace { void diff --git a/source/lib/rocprofiler-sdk/counters/tests/hsa_tables.cpp b/source/lib/rocprofiler-sdk/counters/tests/hsa_tables.cpp index c6ceac30..f29ce439 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/hsa_tables.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/hsa_tables.cpp @@ -109,7 +109,9 @@ get_ext_table() val.hsa_amd_vmem_get_alloc_properties_from_handle_fn = hsa_amd_vmem_get_alloc_properties_from_handle; val.hsa_amd_agent_set_async_scratch_limit_fn = hsa_amd_agent_set_async_scratch_limit; - val.hsa_amd_queue_get_info_fn = hsa_amd_queue_get_info; +#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x02 + val.hsa_amd_queue_get_info_fn = hsa_amd_queue_get_info; +#endif return val; }(); return _v; diff --git a/source/lib/rocprofiler-sdk/hsa/aql_packet.cpp b/source/lib/rocprofiler-sdk/hsa/aql_packet.cpp index e8a21ab4..97628b12 100644 --- a/source/lib/rocprofiler-sdk/hsa/aql_packet.cpp +++ b/source/lib/rocprofiler-sdk/hsa/aql_packet.cpp @@ -36,33 +36,89 @@ namespace rocprofiler { namespace hsa { -CounterAQLPacket::~CounterAQLPacket() +hsa_status_t +CounterAQLPacket::CounterMemoryPool::Alloc(void** ptr, size_t size, desc_t flags, void* data) { - if(!profile.command_buffer.ptr) - { - // pass, nothing malloced - } - else if(!command_buf_mallocd) - { - CHECK_HSA(free_func(profile.command_buffer.ptr), "freeing memory"); - } - else + if(size == 0) { - ::free(profile.command_buffer.ptr); + if(ptr != nullptr) *ptr = nullptr; + return HSA_STATUS_SUCCESS; } + if(!data) return HSA_STATUS_ERROR; + auto& pool = *reinterpret_cast(data); - if(!profile.output_buffer.ptr) - { - // pass, nothing malloced - } - else if(!output_buffer_malloced) - { - CHECK_HSA(free_func(profile.output_buffer.ptr), "freeing memory"); - } + if(!pool.allocate_fn || !pool.free_fn || !pool.allow_access_fn) return HSA_STATUS_ERROR; + if(!flags.host_access || pool.kernarg_pool_.handle == 0 || !pool.fill_fn) + return HSA_STATUS_ERROR; + + hsa_status_t status; + if(!pool.bIgnoreKernArg && flags.memory_hint == AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED) + status = pool.allocate_fn(pool.kernarg_pool_, size, 0, ptr); else - { - ::free(profile.output_buffer.ptr); - } + status = pool.allocate_fn(pool.cpu_pool_, size, 0, ptr); + + if(status != HSA_STATUS_SUCCESS) return status; + + status = pool.fill_fn(*ptr, 0u, size / sizeof(uint32_t)); + if(status != HSA_STATUS_SUCCESS) return status; + + status = pool.allow_access_fn(1, &pool.gpu_agent, nullptr, *ptr); + return status; +} + +void +CounterAQLPacket::CounterMemoryPool::Free(void* ptr, void* data) +{ + if(ptr == nullptr) return; + + assert(data); + auto& pool = *reinterpret_cast(data); + assert(pool.free_fn); + pool.free_fn(ptr); +} + +hsa_status_t +CounterAQLPacket::CounterMemoryPool::Copy(void* dst, const void* src, size_t size, void* data) +{ + if(size == 0) return HSA_STATUS_SUCCESS; + if(!data) return HSA_STATUS_ERROR; + auto& pool = *reinterpret_cast(data); + + if(!pool.api_copy_fn) return HSA_STATUS_ERROR; + + return pool.api_copy_fn(dst, src, size); +} + +CounterAQLPacket::CounterAQLPacket(aqlprofile_agent_handle_t agent, + CounterAQLPacket::CounterMemoryPool _pool, + const std::vector& events) +: pool(_pool) +{ + if(events.empty()) return; + + packets.start_packet = null_amd_aql_pm4_packet; + packets.stop_packet = null_amd_aql_pm4_packet; + packets.read_packet = null_amd_aql_pm4_packet; + + aqlprofile_pmc_profile_t profile{}; + profile.agent = agent; + profile.events = events.data(); + profile.event_count = static_cast(events.size()); + + hsa_status_t status = aqlprofile_pmc_create_packets(&this->handle, + &this->packets, + profile, + &CounterMemoryPool::Alloc, + &CounterMemoryPool::Free, + &CounterMemoryPool::Copy, + reinterpret_cast(&pool)); + if(status != HSA_STATUS_SUCCESS) ROCP_FATAL << "Could not create PMC packets!"; + + auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; + packets.start_packet.header = header; + packets.stop_packet.header = header; + packets.read_packet.header = header; + empty = false; } hsa_status_t diff --git a/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp b/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp index e0216e2e..97dfe8a8 100644 --- a/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp +++ b/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp @@ -68,21 +68,17 @@ class AQLPacket before_krn_pkt.clear(); after_krn_pkt.clear(); } + bool isEmpty() const { return empty; } virtual void populate_before() = 0; virtual void populate_after() = 0; - aqlprofile_handle_t pkt_handle = {.handle = 0}; + aqlprofile_handle_t GetHandle() const { return handle; } + aqlprofile_handle_t handle = {.handle = 0}; + bool empty = {true}; - bool empty = {true}; - hsa_ven_amd_aqlprofile_profile_t profile = {}; - hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet; - hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet; - hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet; common::container::small_vector before_krn_pkt = {}; common::container::small_vector after_krn_pkt = {}; - - bool isEmpty() const { return empty; } }; class EmptyAQLPacket : public AQLPacket @@ -100,22 +96,46 @@ class CounterAQLPacket : public AQLPacket friend class rocprofiler::aql::CounterPacketConstruct; using memory_pool_free_func_t = decltype(::hsa_amd_memory_pool_free)*; + struct CounterMemoryPool + { + using desc_t = aqlprofile_buffer_desc_flags_t; + + hsa_agent_t gpu_agent; + hsa_amd_memory_pool_t cpu_pool_; + hsa_amd_memory_pool_t kernarg_pool_; + decltype(hsa_amd_memory_pool_allocate)* allocate_fn; + decltype(hsa_amd_agents_allow_access)* allow_access_fn; + decltype(hsa_amd_memory_pool_free)* free_fn; + decltype(hsa_amd_memory_fill)* fill_fn; + decltype(hsa_memory_copy)* api_copy_fn; + bool bIgnoreKernArg; + + static void Free(void* ptr, void* data); + static hsa_status_t Alloc(void** ptr, size_t size, desc_t flags, void* data); + static hsa_status_t Copy(void* dst, const void* src, size_t size, void* data); + }; + public: - CounterAQLPacket(memory_pool_free_func_t func) - : free_func{func} {}; - ~CounterAQLPacket() override; + CounterAQLPacket(aqlprofile_agent_handle_t agent, + CounterMemoryPool pool, + const std::vector& events); + ~CounterAQLPacket() override { aqlprofile_pmc_delete_packets(this->handle); }; - void populate_before() override { before_krn_pkt.push_back(start); }; + void populate_before() override + { + if(!empty) before_krn_pkt.push_back(packets.start_packet); + }; void populate_after() override { - after_krn_pkt.push_back(stop); - after_krn_pkt.push_back(read); + if(empty) return; + after_krn_pkt.push_back(packets.read_packet); + after_krn_pkt.push_back(packets.stop_packet); }; + aqlprofile_pmc_aql_packets_t packets{}; + protected: - bool command_buf_mallocd = false; - bool output_buffer_malloced = false; - memory_pool_free_func_t free_func = nullptr; + CounterMemoryPool pool{}; }; struct TraceMemoryPool diff --git a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp index e8f814cc..3f760b47 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp @@ -95,10 +95,10 @@ TEST(thread_trace, resource_creation) packet->populate_after(); size_t vendor_packet = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; - ASSERT_TRUE(packet->start.header == vendor_packet); - ASSERT_TRUE(packet->stop.header == vendor_packet); ASSERT_TRUE(packet->before_krn_pkt.size() > 0); ASSERT_TRUE(packet->after_krn_pkt.size() > 0); + ASSERT_TRUE(packet->before_krn_pkt.at(0).header == vendor_packet); + ASSERT_TRUE(packet->after_krn_pkt.at(0).header == vendor_packet); } {