Skip to content

Commit

Permalink
SWDEV-492625 memory free functions (#11)
Browse files Browse the repository at this point in the history
* SWDEV-492625: Track free memory HSA functions to help determine total amount of memory allocated on the system at any one time

* Minor fixes to address comments

* Update allocation size description

* Moved get function back to specialization, minor typo fixes

* Removed memory_operation_type field, removed memory_pool allocation enum, converted starting address to hex string for json format.

* Made conversion to hex_string a function, changed address to use union rocprofiler_address_t type, changed VMEM descriptors

* Removed as_hex from the global namespace

* Formatting

* Removed TRACK_EVENT for memory allocation, now TRACK_COUNTER for memory allocation is being performed

* Check if address was recorded before retrieving allocation size in generate Perfetto

* Formatting

* Update source/lib/output/generatePerfetto.cpp

* Explicitly disable app-abort tests

* Remove excluding app-abort test from workflow CI

- redundant bc these tests are explicitly marked as disabled now

---------

Co-authored-by: Madsen, Jonathan <[email protected]>
Co-authored-by: Jonathan R. Madsen <[email protected]>
  • Loading branch information
3 people authored Dec 6, 2024
1 parent 00c46fd commit 79006bb
Show file tree
Hide file tree
Showing 23 changed files with 564 additions and 248 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ env:
ROCM_PATH: "/opt/rocm"
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
PATH: "/usr/bin:$PATH"
navi3_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate)|rocprofv3-test-(execute|validate)-app-abort)$"
vega20_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate)|rocprofv3-test-(execute|validate)-app-abort)$"
mi200_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate)|rocprofv3-test-(execute|validate)-app-abort)$"
mi300_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate)|rocprofv3-test-(execute|validate)-app-abort)$"
navi3_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate))$"
vega20_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate))$"
mi200_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate))$"
mi300_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate))$"
navi3_EXCLUDE_LABEL_REGEX: "^(pc-sampling|openmp-target)$"
vega20_EXCLUDE_LABEL_REGEX: "^(pc-sampling|openmp-target)$"
mi200_EXCLUDE_LABEL_REGEX: "^(openmp-target)$"
Expand Down
11 changes: 7 additions & 4 deletions source/docs/data/memory_allocation_trace.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"Kind","Operation","Agent_Id","Allocation_Size","Starting_Address","Correlation_Id","Start_Timestamp","End_Timestamp"
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,140341497356288,1,65788054621500,65788055678893
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,140341497348096,1,65788055691832,65788056666844
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,140341497339904,1,65788056672061,65788057643457
"Kind","Operation","Agent_Id","Allocation_Size","Address","Correlation_Id","Start_Timestamp","End_Timestamp"
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,0x7fb2d0005000,11,3721742710532634,3721742710584854
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_FREE",0,0,0x7fb2d0005000,12,3721742710596404,3721742710933366
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,0x7fb2d0005000,13,3721742710941416,3721742710960916
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_FREE",0,0,0x7fb2d0005000,14,3721742710967236,3721742711197647
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_ALLOCATE",0,1024,0x7fb2d0005000,15,3721742711204077,3721742711219717
"MEMORY_ALLOCATION","MEMORY_ALLOCATION_FREE",0,0,0x7fb2d0005000,16,3721742711225857,3721742711466018
2 changes: 1 addition & 1 deletion source/docs/how-to/using-rocprofv3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1318,5 +1318,5 @@ Properties
- **`thread_id`** *(integer, required)*: Thread ID.
- **`agent_id`** *(object, required)*: Agent ID.
- **`handle`** *(integer, required)*: Handle of the agent.
- **`starting_address`** *(string, required)*: Starting address of allocation.
- **`address`** *(string, required)*: Starting address of allocation.
- **`allocation_size`** *(integer, required)*: Size of allocation.
10 changes: 5 additions & 5 deletions source/docs/rocprofv3-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1566,13 +1566,13 @@
"handle"
]
},
"starting_address": {
"type": "integer",
"description": "Starting address of allocation"
"address": {
"type": "string",
"description": "Starting address for allocation or freeing memory."
},
"allocation_size": {
"type": "integer",
"description": "allocation_size"
"description": "Size of memory allocation. Free operations not tracked currently."
}
},
"required": [
Expand All @@ -1584,7 +1584,7 @@
"end_timestamp",
"thread_id",
"agent_id",
"starting_address",
"address",
"allocation_size"
]
}
Expand Down
6 changes: 3 additions & 3 deletions source/include/rocprofiler-sdk/buffer_tracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,9 @@ typedef struct
rocprofiler_thread_id_t thread_id; ///< id for thread that triggered copy
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_agent_id_t agent_id; ///< agent information for memory allocation
uint64_t starting_address; ///< starting address for memory allocation
uint64_t allocation_size; ///< size for memory allocation
rocprofiler_agent_id_t agent_id; ///< agent information for memory allocation
rocprofiler_address_t address; ///< starting address for memory allocation
uint64_t allocation_size; ///< size for memory allocation
/// @var kind
/// @brief ::ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION
/// @var operation
Expand Down
14 changes: 7 additions & 7 deletions source/include/rocprofiler-sdk/callback_tracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,16 +211,16 @@ typedef struct
} rocprofiler_callback_tracing_memory_copy_data_t;

/**
* @brief ROCProfiler Memory Copy Allocation Tracer Record.
* @brief ROCProfiler Memory Allocation Tracer Record.
*/
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_agent_id_t agent_id; ///< agent id for memory allocation
uint64_t starting_address; ///< starting address for memory allocation
uint64_t allocation_size; ///< size of memory allocation
uint64_t size; ///< size of this struct
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_agent_id_t agent_id; ///< agent id for memory allocation
rocprofiler_address_t address; ///< starting address for memory allocation
uint64_t allocation_size; ///< size of memory allocation
} rocprofiler_callback_tracing_memory_allocation_data_t;

/**
Expand Down
2 changes: 1 addition & 1 deletion source/include/rocprofiler-sdk/cxx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
#
set(ROCPROFILER_CXX_HEADER_FILES hash.hpp name_info.hpp operators.hpp perfetto.hpp
serialization.hpp)
utility.hpp serialization.hpp)

install(
FILES ${ROCPROFILER_CXX_HEADER_FILES}
Expand Down
7 changes: 5 additions & 2 deletions source/include/rocprofiler-sdk/cxx/serialization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <rocprofiler-sdk/internal_threading.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <rocprofiler-sdk/cxx/name_info.hpp>
#include <rocprofiler-sdk/cxx/perfetto.hpp>
#include <rocprofiler-sdk/cxx/utility.hpp>

#include <cereal/archives/binary.hpp>
#include <cereal/archives/json.hpp>
Expand Down Expand Up @@ -64,6 +66,7 @@

#define ROCP_SDK_SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
#define ROCP_SDK_SAVE_DATA_VALUE(NAME, VALUE) ar(make_nvp(NAME, data.VALUE))
#define ROCP_SDK_SAVE_VALUE(NAME, VALUE) ar(make_nvp(NAME, VALUE))
#define ROCP_SDK_SAVE_DATA_CSTR(FIELD) \
ar(make_nvp(#FIELD, std::string{data.FIELD ? data.FIELD : ""}))
#define ROCP_SDK_SAVE_DATA_BITFIELD(NAME, VALUE) \
Expand Down Expand Up @@ -338,7 +341,7 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_memory_allocation_data_t data)
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(starting_address);
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16));
ROCP_SDK_SAVE_DATA_FIELD(allocation_size);
}

Expand Down Expand Up @@ -543,7 +546,7 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_allocation_record_t data)
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(starting_address);
ROCP_SDK_SAVE_VALUE("address", rocprofiler::sdk::utility::as_hex(data.address.value, 16));
ROCP_SDK_SAVE_DATA_FIELD(allocation_size);
}

Expand Down
54 changes: 54 additions & 0 deletions source/include/rocprofiler-sdk/cxx/utility.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//

#pragma once

#include <iomanip>
#include <sstream>

namespace rocprofiler
{
namespace sdk
{
namespace utility
{
template <typename Tp>
auto
_as_hex(Tp val, size_t width = 0)
{
auto ss = std::stringstream{};
ss << "0x" << std::hex << std::setw(width) << std::setfill('0') << val;
return ss.str();
}

#define ROCPROFILER_CXX_DEFINE_AS_HEX(TYPE) \
inline auto as_hex(TYPE val, size_t width = 0) \
{ \
return ::rocprofiler::sdk::utility::_as_hex(val, width); \
}

ROCPROFILER_CXX_DEFINE_AS_HEX(uint64_t)
#undef ROCPROFILER_CXX_DEFINE_AS_HEX
} // namespace utility
} // namespace sdk
} // namespace rocprofiler
24 changes: 15 additions & 9 deletions source/include/rocprofiler-sdk/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,11 @@ typedef enum // NOLINT(performance-enum-size)
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_MEMORY_ALLOCATION_NONE = 0, ///< Unknown memory allocation function
ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE, ///< Allocate memory function
ROCPROFILER_MEMORY_ALLOCATION_MEMORY_POOL_ALLOCATE, ///< Allocate memory pool
ROCPROFILER_MEMORY_ALLOCATION_VMEM_HANDLE_CREATE, ///< Allocate vmem memory handle
ROCPROFILER_MEMORY_ALLOCATION_NONE = 0, ///< Unknown memory allocation function
ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE, ///< Allocate memory function
ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE, ///< Allocate vmem memory handle
ROCPROFILER_MEMORY_ALLOCATION_FREE, ///< Free memory function
ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE, ///< Release vmem memory handle
ROCPROFILER_MEMORY_ALLOCATION_LAST,
} rocprofiler_memory_allocation_operation_t;

Expand Down Expand Up @@ -458,11 +459,6 @@ typedef enum
*/
typedef uint64_t rocprofiler_timestamp_t;

/**
* @brief ROCProfiler Address.
*/
typedef uint64_t rocprofiler_address_t;

/**
* @brief Thread ID. Value will be equivalent to `syscall(__NR_gettid)`
*/
Expand Down Expand Up @@ -519,6 +515,16 @@ typedef union rocprofiler_user_data_t
void* ptr; ///< usage example: set to address of data allocation
} rocprofiler_user_data_t;

/**
* @brief Stores memory address for profiling
*
*/
typedef union rocprofiler_address_t
{
uint64_t value; ///< usage example: store address in uint64_t format
void* ptr; ///< usage example: generic form of address
} rocprofiler_address_t;

//--------------------------------------------------------------------------------------//
//
// STRUCTS
Expand Down
16 changes: 13 additions & 3 deletions source/lib/output/generateCSV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/marker/api_id.h>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include <rocprofiler-sdk/cxx/utility.hpp>

#include <unistd.h>
#include <cstdint>
Expand Down Expand Up @@ -456,23 +457,32 @@ generate_csv(const output_config&
"Operation",
"Agent_Id",
"Allocation_Size",
"Starting_Address",
"Address",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
uint64_t agent_info{0};
// Free functions currently do not track agent information. Only set it on allocation
// operations, otherwise set it to 0 currently
if(record.operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE ||
record.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE)
{
agent_info = tool_metadata.get_node_id(record.agent_id);
}
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
auto row_ss = std::stringstream{};

rocprofiler::tool::csv::memory_allocation_csv_encoder::write_row(
row_ss,
tool_metadata.get_kind_name(record.kind),
api_name,
tool_metadata.get_node_id(record.agent_id),
agent_info,
record.allocation_size,
record.starting_address,
rocprofiler::sdk::utility::as_hex(record.address.value, 16),
record.correlation_id.internal,
record.start_timestamp,
record.end_timestamp);
Expand Down
28 changes: 21 additions & 7 deletions source/lib/output/generateOTF2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,17 +486,25 @@ write_otf2(
{
for(auto& [agent, evt] : itr)
{
const auto* _agent = _get_agent(agent);
auto _type_name = std::string_view{"UNK"};
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
// Free functions do not track agent information. Below handles case where
// null rocprof agent id is passed to generate OTF2
constexpr auto null_rocp_agent_id =
rocprofiler_agent_id_t{.handle = std::numeric_limits<uint64_t>::max()};
const rocprofiler_agent_t* _agent = nullptr;
if(agent != null_rocp_agent_id)
{
_agent = _get_agent(agent);
}
auto _type_name = std::string_view{"UNK"};
if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_type_name = "CPU";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
else if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_type_name = "GPU";

evt.name = fmt::format("Thread {}, Memory Allocation at {} {}",
evt.name = fmt::format("Thread {}, Memory Operation at {} {}",
tid,
_type_name,
_agent->logical_node_type_id);
_agent == nullptr ? 0 : _agent->logical_node_type_id);
}
}

Expand Down Expand Up @@ -860,14 +868,20 @@ write_otf2(
for(auto& [agent, evt] : itr)
{
auto _hash = get_hash_id(evt.name);
// Using max numeric limits results in an out-of-bound runtime error for OTF2
// and perfetto for agent ids. Setting handle to 0 for free functions.
constexpr auto null_rocp_agent_id =
rocprofiler_agent_id_t{.handle = std::numeric_limits<uint64_t>::max()};
auto handle = agent.handle;
if(agent == null_rocp_agent_id) handle = 0;

add_write_string(_hash, evt.name);
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer,
evt.id(), // id
_hash,
OTF2_LOCATION_TYPE_ACCELERATOR_STREAM,
2 * evt.event_count, // # events
agent.handle // location group
handle // location group
));
}
}
Expand Down
Loading

0 comments on commit 79006bb

Please sign in to comment.