diff --git a/samples/pc_sampling/pcs.cpp b/samples/pc_sampling/pcs.cpp index ccffc2e5..ee150516 100644 --- a/samples/pc_sampling/pcs.cpp +++ b/samples/pc_sampling/pcs.cpp @@ -322,9 +322,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, << "wave_in_group: " << std::setw(2) << static_cast(pc_sample->wave_in_group) << ", " << "chiplet: " << std::setw(2) - << static_cast(pc_sample->hw_id.chiplet) - << ", " - // << "cu_id: " << pc_sample->hw_id << ", " + << static_cast(pc_sample->hw_id.chiplet) << ", " + << "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << "," << "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", " << "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}" diff --git a/source/lib/rocprofiler-sdk/hsa/queue.cpp b/source/lib/rocprofiler-sdk/hsa/queue.cpp index d5bd040c..b776177f 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -395,7 +395,7 @@ WriteInterceptor(const void* packets, if(pc_sampling::is_pc_sample_service_configured(queue.get_agent().get_rocp_agent()->id)) { transformed_packets.emplace_back(pc_sampling::hsa::generate_marker_packet_for_kernel( - corr_id, tracing_data_v.external_correlation_ids)); + corr_id, tracing_data_v.external_correlation_ids, dispatch_id)); } #endif diff --git a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp index 486c020e..6b8f3a04 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp @@ -105,6 +105,7 @@ amd_intercept_marker_handler_callback(const struct amd_aql_intercept_marker_s* p dispatch_pkt.write_index = packet_id; dispatch_pkt.correlation_id = {.internal = internal_correlation, .external = external_correlation}; + dispatch_pkt.dispatch_id = packet->user_data[2]; auto* parser = pcs_session->parser.get(); if(parser->shouldFlipRocrBuffer(dispatch_pkt)) @@ -187,7 +188,8 @@ data_ready_callback(void* client_callback_data, rocprofiler::hsa::rocprofiler_packet generate_marker_packet_for_kernel( context::correlation_id* correlation_id, - const tracing::external_correlation_id_map_t& external_correlation_ids) + const tracing::external_correlation_id_map_t& external_correlation_ids, + const rocprofiler_dispatch_id_t dispatch_id) { // This function executes for each kernel dispatched to the agent on which // the PC sampling service is configured. @@ -231,6 +233,9 @@ generate_marker_packet_for_kernel( marker_pkt.user_data[1] = 0; } + // dispatch_id should always be present + marker_pkt.user_data[2] = dispatch_id; + return rocprofiler::hsa::rocprofiler_packet(marker_pkt); } diff --git a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.hpp b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.hpp index 11a84661..bd4d600a 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.hpp @@ -42,7 +42,8 @@ namespace hsa rocprofiler::hsa::rocprofiler_packet generate_marker_packet_for_kernel( context::correlation_id* correlation_id, - const tracing::external_correlation_id_map_t& external_correlation_ids); + const tracing::external_correlation_id_map_t& external_correlation_ids, + const rocprofiler_dispatch_id_t dispatch_id); void pc_sampling_service_start(context::pc_sampling_service* service); diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp index 2a351987..5e6fce46 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp @@ -47,6 +47,12 @@ operator==(device_handle a, device_handle b) namespace Parser { +struct dispatch_correlation_ids_t +{ + rocprofiler_dispatch_id_t dispatch_id; + rocprofiler_correlation_id_t correlation_id; +}; + /** * @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples. */ @@ -70,11 +76,11 @@ struct DispatchPkt struct cache_type_t { - trap_correlation_id_t id_in{.raw = ~0ul}; - rocprofiler_correlation_id_t id_out{}; - uint64_t dev_id = ~0ul; - size_t increment = 0; - size_t object_id = 0; + trap_correlation_id_t id_in{.raw = ~0ul}; + dispatch_correlation_ids_t id_out{}; + uint64_t dev_id = ~0ul; + size_t increment = 0; + size_t object_id = 0; }; inline bool @@ -131,7 +137,7 @@ class CorrelationMap { std::unique_lock lk(mut); auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size); - dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id; + dispatch_to_correlation[{trap_id, pkt.device}] = {pkt.dispatch_id, pkt.correlation_id}; cache_reset_count.fetch_add(1); } @@ -150,7 +156,7 @@ class CorrelationMap * Given a device dev, doorbell and and wrapped dispatch_id, * @returns the correlation_id set by dispatch_pkt_id_t */ - rocprofiler_correlation_id_t get(device_handle dev, trap_correlation_id_t correlation_in) + dispatch_correlation_ids_t get(device_handle dev, trap_correlation_id_t correlation_in) { #ifndef _PARSER_CORRELATION_DISABLE_CACHE static thread_local cache_type_t cache{}; @@ -195,9 +201,9 @@ class CorrelationMap } private: - std::unordered_map dispatch_to_correlation{}; - std::atomic cache_reset_count{1}; - size_t object_id = 0; + std::unordered_map dispatch_to_correlation{}; + std::atomic cache_reset_count{1}; + size_t object_id = 0; std::mutex mut; }; @@ -238,9 +244,13 @@ add_upcoming_samples(const device_handle device, try { Parser::trap_correlation_id_t trap{.raw = snap->correlation_id}; - pc_sample.correlation_id = corr_map->get(device, trap); + auto dispatch_correlation_ids = corr_map->get(device, trap); + pc_sample.dispatch_id = dispatch_correlation_ids.dispatch_id; + pc_sample.correlation_id = dispatch_correlation_ids.correlation_id; } catch(std::exception& e) { + // TODO: introduce ROCPROFILER_DISPATCH_ID_INTERNAL_NONE + pc_sample.dispatch_id = 0; pc_sample.correlation_id = {.internal = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE, .external = rocprofiler_user_data_t{ .value = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE}}; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/rocr.h b/source/lib/rocprofiler-sdk/pc_sampling/parser/rocr.h index b396b406..d3d58d6b 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/rocr.h +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/rocr.h @@ -73,7 +73,7 @@ typedef struct uint64_t read_index; /// both internal and external correlation ID. rocprofiler_correlation_id_t correlation_id; - reserved_type _[2]; + rocprofiler_dispatch_id_t dispatch_id; } dispatch_pkt_id_t; typedef struct diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp index 78a115ef..accdcd9a 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp @@ -94,6 +94,7 @@ Benchmark(bool bWarmup) TEST(pcs_parser, benchmark_test) { // Tests for host trap v0 records + std::cout << "Parsing rocprofiler_pc_sampling_record_host_trap_v0_t records!" << std::endl; EXPECT_EQ(Benchmark(true), true); EXPECT_EQ(Benchmark(false), true); EXPECT_EQ(Benchmark(false), true); diff --git a/tests/pc_sampling/pcs.cpp b/tests/pc_sampling/pcs.cpp index e660744e..551852b8 100644 --- a/tests/pc_sampling/pcs.cpp +++ b/tests/pc_sampling/pcs.cpp @@ -351,9 +351,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, << "wave_in_group: " << std::setw(2) << static_cast(pc_sample->wave_in_group) << ", " << "chiplet: " << std::setw(2) - << static_cast(pc_sample->hw_id.chiplet) - << ", " - // << "cu_id: " << pc_sample->hw_id << ", " + << static_cast(pc_sample->hw_id.chiplet) << ", " + << "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << "," << "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", " << "external=" << std::setw(5) << pc_sample->correlation_id.external.value