Skip to content

Commit

Permalink
PC sampling services provides dispatch id (#1209)
Browse files Browse the repository at this point in the history
  • Loading branch information
vlaindic authored Nov 21, 2024
1 parent 6ae441f commit 8d2ce4b
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 21 deletions.
5 changes: 2 additions & 3 deletions samples/pc_sampling/pcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "wave_in_group: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
<< "chiplet: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
<< ", "
// << "cu_id: " << pc_sample->hw_id << ", "
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}"
Expand Down
2 changes: 1 addition & 1 deletion source/lib/rocprofiler-sdk/hsa/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ WriteInterceptor(const void* packets,
if(pc_sampling::is_pc_sample_service_configured(queue.get_agent().get_rocp_agent()->id))
{
transformed_packets.emplace_back(pc_sampling::hsa::generate_marker_packet_for_kernel(
corr_id, tracing_data_v.external_correlation_ids));
corr_id, tracing_data_v.external_correlation_ids, dispatch_id));
}
#endif

Expand Down
7 changes: 6 additions & 1 deletion source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ amd_intercept_marker_handler_callback(const struct amd_aql_intercept_marker_s* p
dispatch_pkt.write_index = packet_id;
dispatch_pkt.correlation_id = {.internal = internal_correlation,
.external = external_correlation};
dispatch_pkt.dispatch_id = packet->user_data[2];

auto* parser = pcs_session->parser.get();
if(parser->shouldFlipRocrBuffer(dispatch_pkt))
Expand Down Expand Up @@ -187,7 +188,8 @@ data_ready_callback(void* client_callback_data,
rocprofiler::hsa::rocprofiler_packet
generate_marker_packet_for_kernel(
context::correlation_id* correlation_id,
const tracing::external_correlation_id_map_t& external_correlation_ids)
const tracing::external_correlation_id_map_t& external_correlation_ids,
const rocprofiler_dispatch_id_t dispatch_id)
{
// This function executes for each kernel dispatched to the agent on which
// the PC sampling service is configured.
Expand Down Expand Up @@ -231,6 +233,9 @@ generate_marker_packet_for_kernel(
marker_pkt.user_data[1] = 0;
}

// dispatch_id should always be present
marker_pkt.user_data[2] = dispatch_id;

return rocprofiler::hsa::rocprofiler_packet(marker_pkt);
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ namespace hsa
rocprofiler::hsa::rocprofiler_packet
generate_marker_packet_for_kernel(
context::correlation_id* correlation_id,
const tracing::external_correlation_id_map_t& external_correlation_ids);
const tracing::external_correlation_id_map_t& external_correlation_ids,
const rocprofiler_dispatch_id_t dispatch_id);

void
pc_sampling_service_start(context::pc_sampling_service* service);
Expand Down
32 changes: 21 additions & 11 deletions source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ operator==(device_handle a, device_handle b)

namespace Parser
{
struct dispatch_correlation_ids_t
{
rocprofiler_dispatch_id_t dispatch_id;
rocprofiler_correlation_id_t correlation_id;
};

/**
* @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples.
*/
Expand All @@ -70,11 +76,11 @@ struct DispatchPkt

struct cache_type_t
{
trap_correlation_id_t id_in{.raw = ~0ul};
rocprofiler_correlation_id_t id_out{};
uint64_t dev_id = ~0ul;
size_t increment = 0;
size_t object_id = 0;
trap_correlation_id_t id_in{.raw = ~0ul};
dispatch_correlation_ids_t id_out{};
uint64_t dev_id = ~0ul;
size_t increment = 0;
size_t object_id = 0;
};

inline bool
Expand Down Expand Up @@ -131,7 +137,7 @@ class CorrelationMap
{
std::unique_lock<std::mutex> lk(mut);
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
dispatch_to_correlation[{trap_id, pkt.device}] = {pkt.dispatch_id, pkt.correlation_id};
cache_reset_count.fetch_add(1);
}

Expand All @@ -150,7 +156,7 @@ class CorrelationMap
* Given a device dev, doorbell and and wrapped dispatch_id,
* @returns the correlation_id set by dispatch_pkt_id_t
*/
rocprofiler_correlation_id_t get(device_handle dev, trap_correlation_id_t correlation_in)
dispatch_correlation_ids_t get(device_handle dev, trap_correlation_id_t correlation_in)
{
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
static thread_local cache_type_t cache{};
Expand Down Expand Up @@ -195,9 +201,9 @@ class CorrelationMap
}

private:
std::unordered_map<DispatchPkt, rocprofiler_correlation_id_t> dispatch_to_correlation{};
std::atomic<size_t> cache_reset_count{1};
size_t object_id = 0;
std::unordered_map<DispatchPkt, dispatch_correlation_ids_t> dispatch_to_correlation{};
std::atomic<size_t> cache_reset_count{1};
size_t object_id = 0;

std::mutex mut;
};
Expand Down Expand Up @@ -238,9 +244,13 @@ add_upcoming_samples(const device_handle device,
try
{
Parser::trap_correlation_id_t trap{.raw = snap->correlation_id};
pc_sample.correlation_id = corr_map->get(device, trap);
auto dispatch_correlation_ids = corr_map->get(device, trap);
pc_sample.dispatch_id = dispatch_correlation_ids.dispatch_id;
pc_sample.correlation_id = dispatch_correlation_ids.correlation_id;
} catch(std::exception& e)
{
// TODO: introduce ROCPROFILER_DISPATCH_ID_INTERNAL_NONE
pc_sample.dispatch_id = 0;
pc_sample.correlation_id = {.internal = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE,
.external = rocprofiler_user_data_t{
.value = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE}};
Expand Down
2 changes: 1 addition & 1 deletion source/lib/rocprofiler-sdk/pc_sampling/parser/rocr.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ typedef struct
uint64_t read_index;
/// both internal and external correlation ID.
rocprofiler_correlation_id_t correlation_id;
reserved_type _[2];
rocprofiler_dispatch_id_t dispatch_id;
} dispatch_pkt_id_t;

typedef struct
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Benchmark(bool bWarmup)
TEST(pcs_parser, benchmark_test)
{
// Tests for host trap v0 records
std::cout << "Parsing rocprofiler_pc_sampling_record_host_trap_v0_t records!" << std::endl;
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(true), true);
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
Expand Down
5 changes: 2 additions & 3 deletions tests/pc_sampling/pcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "wave_in_group: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
<< "chiplet: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
<< ", "
// << "cu_id: " << pc_sample->hw_id << ", "
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value
Expand Down

0 comments on commit 8d2ce4b

Please sign in to comment.