Skip to content

Commit

Permalink
Add logical_node_type_id field to rocprofiler_agent_t (#948)
Browse files Browse the repository at this point in the history
* Add logical_node_type_id field to rocprofiler_agent_t

* Patch queue_controller
  • Loading branch information
jrmadsen authored Jun 25, 2024
1 parent 62ec95e commit af2f85c
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
16 changes: 16 additions & 0 deletions source/include/rocprofiler-sdk/agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,22 @@ typedef struct rocprofiler_agent_v0_t
///< HSA_AMD_AGENT_INFO_DRIVER_NODE_ID property
int32_t logical_node_id; ///< Logical sequence number. This will always be [0..N) where N is
///< the total number of agents
int32_t logical_node_type_id;
int32_t reserved_padding0; ///< padding logical_node_id to 64 bytes

/// @var logical_node_type_id
/// @brief Logical sequence number with respect to other agents of same type. This will always
/// be [0..N) where N is the total number of X agents (where X is a ::rocprofiler_agent_type_t
/// value). This field is intended to help with environment variable indexing used to mask GPUs
/// at runtime (i.e. HIP_VISIBLE_DEVICES and ROCR_VISIBLE_DEVICES) which start at zero and only
/// apply to GPUs, e.g., logical_node_type_id value for first GPU will be 0, second GPU will
/// have value of 1, etc., regardless of however many agents of a different type preceeded (and
/// thus increased the ::node_id or ::logical_node_id).
///
/// Example: a system with 2 CPUs and 2 GPUs, where the node ids are 0=CPU, 1=GPU, 2=CPU, 3=GPU,
/// then then CPU node_ids 0 and 2 would have logical_node_type_id values of 0 and 1,
/// respectively, and GPU node_ids 1 and 3 would also have logical_node_type_id values of 0
/// and 1.
} rocprofiler_agent_v0_t;

typedef rocprofiler_agent_v0_t rocprofiler_agent_t;
Expand Down
23 changes: 18 additions & 5 deletions source/lib/rocprofiler-sdk/agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,9 @@ read_topology()
auto data = std::vector<unique_agent_t>{};
uint64_t idcount = 0;
uint64_t nodecount = 0;
uint64_t cpucount = 0;
uint64_t gpucount = 0;
uint64_t unkcount = 0;

while(true)
{
Expand Down Expand Up @@ -398,11 +401,12 @@ read_topology()
// we may have been able to open the properties file but if it was empty, we ignore it
if(properties.empty()) continue;

auto agent_info = common::init_public_api_struct(rocprofiler_agent_t{});
agent_info.type = ROCPROFILER_AGENT_TYPE_NONE;
agent_info.logical_node_id = idcount++;
agent_info.node_id = node_id;
agent_info.id.handle = (agent_info.logical_node_id) + get_agent_offset();
auto agent_info = common::init_public_api_struct(rocprofiler_agent_t{});
agent_info.type = ROCPROFILER_AGENT_TYPE_NONE;
agent_info.logical_node_id = idcount++;
agent_info.node_id = node_id;
agent_info.id.handle = (agent_info.logical_node_id) + get_agent_offset();
agent_info.logical_node_type_id = -1;

if(!name_prop.empty())
agent_info.model_name =
Expand All @@ -419,6 +423,15 @@ read_topology()
agent_info.type = ROCPROFILER_AGENT_TYPE_CPU;
else if(agent_info.simd_count > 0)
agent_info.type = ROCPROFILER_AGENT_TYPE_GPU;
else
ROCP_WARNING << "agent " << agent_info.node_id << " is neither a CPU nor a GPU";

if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
agent_info.logical_node_type_id = cpucount++;
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU)
agent_info.logical_node_type_id = gpucount++;
else
agent_info.logical_node_type_id = unkcount++;

read_property(properties, "mem_banks_count", agent_info.mem_banks_count);
read_property(properties, "caches_count", agent_info.caches_count);
Expand Down
4 changes: 3 additions & 1 deletion source/lib/rocprofiler-sdk/hsa/queue_controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ constexpr rocprofiler_agent_t default_agent =
.product_name = nullptr,
.model_name = nullptr,
.node_id = 0,
.logical_node_id = 0};
.logical_node_id = 0,
.logical_node_type_id = 0,
.reserved_padding0 = 0};
} // namespace

void
Expand Down
4 changes: 3 additions & 1 deletion source/lib/rocprofiler-sdk/tests/agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,11 @@ TEST(rocprofiler_lib, agent_abi)
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, node_id), 280) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, logical_node_id), 284) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, logical_node_type_id), 288) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, reserved_padding0), 292) << msg;
// Add test for offset of new field above this. Do NOT change any existing values!

constexpr auto expected_rocp_agent_size = 288;
constexpr auto expected_rocp_agent_size = 296;
// If a new field is added, increase this value by the size of the new field(s)
EXPECT_EQ(sizeof(rocprofiler_agent_t), expected_rocp_agent_size)
<< "ABI break. If you added a new field, make sure that this is the only new check that "
Expand Down

0 comments on commit af2f85c

Please sign in to comment.