Skip to content

Commit

Permalink
Adding --collection-period feature in rocprofv3 to match v1/v2 parity (
Browse files Browse the repository at this point in the history
…#9)

* Adding Trace Period feature to rocprofv3

* Adding feature documentation

* Update source/bin/rocprofv3.py

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Fixing format

* Moving to Collection Period and changing the input params

* Format Fixes

* Fixing rebasing issues

* Removing atomic include from the tool

* Adding more options for units, optimizing the code

* Fixing rocprofv3.py

* Fixing time conv & adding time controlled app

* Fixing format

* Changing to shared memory testing methodology

* use of shmem use

* Fix include headers for transpose-time-controlled.cpp

* Format upload-image-to-github.py

* Removing shmem and using only env var to dump timestamps from the tool

* Tool Fixes + Test Config

* Adding Tests

* Fixing Review comments

* Update trace period implementation

* Update trace period tests

* check between start and stop timestamps

* Merge Fix

* Update validate.py

* Improve safety of rocprofiler_stop_context after finalization

* Pass context id to collection_period_cntrl by value

* Adding 20 us error margin

* Ensure log level for collection-period test is not more than warning

---------

Co-authored-by: Ammar ELWazir <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <[email protected]>
  • Loading branch information
4 people authored Dec 6, 2024
1 parent e7d4562 commit a579c70
Show file tree
Hide file tree
Showing 15 changed files with 429 additions and 22 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
### Added

- Added support for select() operation in counter expression.
- Added reduce operation for counter expression wrt dimension.
- Added reduce operation for counter expression wrt dimension.
- `--collection-period` feature added in rocprofv3, to enable filtering using time.
- `--collection-period-unit` feature added in rocprofv3, to allow the user to control time units used in collection period option.

### Changed

Expand Down
46 changes: 44 additions & 2 deletions source/bin/rocprofv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,23 @@ def add_parser_bool_argument(gparser, *args, **kwargs):
default=None,
type=str,
)
filter_options.add_argument(
"-p",
"--collection-period",
help="The times are specified in seconds by default, but the unit can be changed using the `--collection-period-unit` or `-pu` option. Start Delay Time is the time in seconds before the collection begins, Collection Time is the duration in seconds for which data is collected, and Rate is the number of times the cycle is repeated. A repeat of 0 indicates that the cycle will repeat indefinitely. Users can specify multiple configurations, each defined by a triplet in the format `start_delay:collection_time:repeat`",
nargs="+",
default=None,
type=str,
metavar=("(START_DELAY_TIME):(COLLECTION_TIME):(REPEAT)"),
)
filter_options.add_argument(
"--collection-period-unit",
help="To change the unit used in `--collection-period` or `-p`, you can specify the desired unit using the `--collection-period-unit` or `-pu` option. The available units are `hour` for hours, `min` for minutes, `sec` for seconds, `msec` for milliseconds, `usec` for microseconds, and `nsec` for nanoseconds",
nargs=1,
default="sec",
type=str,
choices=("hour", "min", "sec", "msec", "usec", "nsec"),
)

perfetto_options = parser.add_argument_group("Perfetto-specific options")

Expand Down Expand Up @@ -494,7 +511,6 @@ def parse_json(json_file):


def parse_text(text_file):

def process_line(line):
if "pmc:" not in line:
return ""
Expand Down Expand Up @@ -561,7 +577,6 @@ def patch_args(data):


def get_args(cmd_args, inp_args):

def ensure_type(name, var, type_id):
if not isinstance(var, type_id):
raise TypeError(
Expand Down Expand Up @@ -850,6 +865,33 @@ def _write_env_value():
args.list_avail,
overwrite_if_true=True,
)
if args.collection_period:
factors = {
"hour": 60 * 60 * 1e9,
"min": 60 * 1e9,
"sec": 1e9,
"msec": 1e6,
"usec": 1e3,
"nsec": 1,
}

def to_nanosec(val):
return int(float(val) * factors[args.collection_period_unit[0]])

def convert_triplet(delay, duration, repeat):
return ":".join(
[
f"{itr}"
for itr in [to_nanosec(delay), to_nanosec(duration), int(repeat)]
]
)

periods = [convert_triplet(*itr.split(":")) for itr in args.collection_period]
update_env(
"ROCPROF_COLLECTION_PERIOD",
";".join(periods),
overwrite_if_true=True,
)

if args.log_level and args.log_level not in ("env"):
for itr in ("ROCPROF", "ROCPROFILER", "ROCTX"):
Expand Down
26 changes: 17 additions & 9 deletions source/docs/how-to/using-rocprofv3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
- Kernel Dispatch Counter Collection

* - ``-L`` \| ``--list-avail``
- List metrics for counter collection
- List metrics for counter collection
- List supported PC sampling configurations.

* - ``-E`` \| ``--extra_counters``
Expand Down Expand Up @@ -169,11 +169,19 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
* - ``--pc-sampling-unit``
- The unit appropriate to the PC sampling type/method, currently only time unit is supported
- PC Sampling Configurations

* - ``--pc-sampling-interval``
- Frequency at which PC samples are generated
- PC Sampling Configurations

* - ``--collection-period \| -p [(START_DELAY_TIME):(COLLECTION_TIME):(REPEAT), ...]``
- The times are specified in seconds by default, but the unit can be changed using the `--collection-period-unit` or `-pu` option. Start Delay Time is the time in seconds before the collection begins, Collection Time is the duration in seconds for which data is collected, and Rate is the number of times the cycle is repeated. A repeat of 0 indicates that the cycle will repeat indefinitely. Users can specify multiple configurations, each defined by a triplet in the format `start_delay:collection_time:repeat`. For example, the command `-p 10:10:1 5:3:0` specifies two configurations: the first with a start delay of 10 seconds, a collection time of 10 seconds, and a repeat of 1 (the cycle will repeat once); the second with a start delay of 5 seconds, a collection time of 3 seconds, and a repeat of 0 (the cycle will repeat indefinitely).
- Filtering Options

* - ``--collection-period-unit {hour,min,sec,msec,usec,nsec}``
- To change the unit used in `--collection-period` or `-p`, you can specify the desired unit using the `--collection-period-unit` option. The available units are `hour` for hours, `min` for minutes, `sec` for seconds, `msec` for milliseconds, `usec` for microseconds, and `nsec` for nanoseconds.
- Filtering Options

To see exhaustive list of ``rocprofv3`` options, run:

.. code-block:: bash
Expand Down Expand Up @@ -633,23 +641,23 @@ For the description of the fields in the output file, see :ref:`output-file-fiel
Output single summary of tracing data at the conclusion of the profiling session

.. code-block:: shell
rocprofv3 -S --hip-trace -- <application_path>
.. image:: /data/rocprofv3_summary.png


2.1 Summary per domain
++++++++++++++++++++++

Outputs the summary of each tracing domain at the end of profiling session.
Outputs the summary of each tracing domain at the end of profiling session.

.. code-block:: shell
rocprofv3 -D --hsa-trace --hip-trace -- <application_path>
The above command generates a ``hip_trace.csv``, ``hsa_trace.csv`` file prefixed with the process ID along with the summary of each domain at the terminal.

2.2 Summary groups
+++++++++++++++++++

Expand All @@ -666,7 +674,7 @@ To create a summary for ``MEMORY_COPY`` domains, use:
To create a summary for ``MEMORY_COPY`` and ``HIP_API`` domains, use:

.. code-block:: shell
rocprofv3 --summary-groups 'MEMORY_COPY|HIP_API' --sys-trace -- <application_path>
.. image:: /data/rocprofv3_hip_memcpy_summary.png
Expand Down Expand Up @@ -816,7 +824,7 @@ To supply the counters via ``command-line`` options, use:
Extra-counters
++++++++++++++++
Counters with custom definitions can be defined through an extra_counters.yaml
Counters with custom definitions can be defined through an extra_counters.yaml
file using the ``command-line`` option.
To supply the extra counters via ``command-line`` options, use:
Expand Down
12 changes: 12 additions & 0 deletions source/lib/rocprofiler-sdk-tool/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ config::config()
if(pc_sampling_method_value == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
pc_sampling_host_trap = true;
pc_sampling_unit_value = pc_sampling_unit_map.at(pc_sampling_unit);

if(auto _collection_period = get_env("ROCPROF_COLLECTION_PERIOD", "");
!_collection_period.empty())
{
for(const auto& _config : sdk::parse::tokenize(_collection_period, ";"))
{
auto _config_params = sdk::parse::tokenize(_config, ":");
collection_periods.emplace(CollectionPeriod{std::stoull(_config_params.at(0)),
std::stoull(_config_params.at(1)),
std::stoull(_config_params.at(2))});
}
}
}

std::string
Expand Down
9 changes: 9 additions & 0 deletions source/lib/rocprofiler-sdk-tool/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ struct config : output_config
{
using base_type = output_config;

struct CollectionPeriod
{
uint64_t delay = 0;
uint64_t duration = 0;
uint64_t repeat = 0;
};

config();

~config() = default;
Expand Down Expand Up @@ -110,6 +117,8 @@ struct config : output_config
std::unordered_set<uint32_t> kernel_filter_range = {};
std::set<std::string> counters = {};

std::queue<CollectionPeriod> collection_periods = {};

template <typename ArchiveT>
void save(ArchiveT&) const;

Expand Down
100 changes: 94 additions & 6 deletions source/lib/rocprofiler-sdk-tool/tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,21 @@

#include <fmt/core.h>

#include <sys/mman.h>
#include <unistd.h>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <csignal>
#include <cstring>
#include <fstream>
#include <future>
#include <iomanip>
#include <limits>
#include <mutex>
#include <optional>
#include <shared_mutex>
#include <thread>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -265,6 +270,77 @@ flush()
ROCP_INFO << "Buffers flushed";
}

void
collection_period_cntrl(std::promise<void>&& _promise, rocprofiler_context_id_t _ctx)
{
bool testing_cp = tool::get_env("ROCPROF_COLLECTION_PERIOD_TESTING", false);
auto log_fname = get_output_filename(tool::get_config(), "collection_periods", "log");
auto output_testing_file = std::ofstream{};

if(testing_cp)
{
ROCP_INFO << "collection period test logging enabled: " << log_fname;
output_testing_file.open(log_fname);
}

auto log_period = [testing_cp, &output_testing_file](
std::string_view label, auto _func, auto... _args) {
ROCP_INFO << "collection period: " << label;

auto beg = rocprofiler_timestamp_t{};
if(testing_cp)
{
rocprofiler_get_timestamp(&beg);
}

_func(_args...);

if(testing_cp)
{
auto end = rocprofiler_timestamp_t{};
rocprofiler_get_timestamp(&end);
output_testing_file << label << ":" << beg << ":" << end << '\n' << std::flush;
}
};

auto sleep_for_nsec = [](auto _value) {
if(_value > 0)
{
std::this_thread::yield();
std::this_thread::sleep_for(std::chrono::nanoseconds{_value});
}
};

auto periods = tool::get_config().collection_periods;
_promise.set_value(); // allow the launching thread to proceed
while(!periods.empty())
{
auto _period = periods.front();
periods.pop();

auto execute_period = [&]() {
if(testing_cp) output_testing_file << "--" << '\n';

log_period("delay", sleep_for_nsec, _period.delay);
log_period("start", rocprofiler_start_context, _ctx);
log_period("duration", sleep_for_nsec, _period.duration);
log_period("stop", rocprofiler_stop_context, _ctx);
};

if(_period.repeat == 0)
{
execute_period();
}
else
{
for(size_t i = 0; i < _period.repeat; ++i)
{
execute_period();
}
}
}
}

int
set_kernel_rename_correlation_id(rocprofiler_thread_id_t thr_id,
rocprofiler_context_id_t ctx_id,
Expand Down Expand Up @@ -553,8 +629,8 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
// add the kernel to the kernel_targets if
if(success)
{
// if kernel name is provided by user then by default all kernels in the application
// are targeted
// if kernel name is provided by user then by default all kernels in the
// application are targeted
const auto* kernel_info =
CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(sym_data->kernel_id);
auto kernel_filter_include = tool::get_config().kernel_filter_include;
Expand Down Expand Up @@ -711,12 +787,14 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id)

ROCP_FATAL_IF(dev_id_s.empty() ||
dev_id_s.find_first_not_of("0123456789") != std::string::npos)
<< "invalid device qualifier format (':device=N) where N is the GPU id: "
<< "invalid device qualifier format (':device=N) where N is the "
"GPU "
"id: "
<< itr;

auto dev_id_v = std::stol(dev_id_s);
// skip this counter if the counter is for a specific device id (which doesn't
// this agent's device id)
// skip this counter if the counter is for a specific device id (which
// doesn't this agent's device id)
if(dev_id_v != agent_v->gpu_index)
{
--expected_v; // is not expected
Expand Down Expand Up @@ -1257,7 +1335,17 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
}
}

ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed");
if(tool::get_config().collection_periods.empty())
{
ROCPROFILER_CHECK(rocprofiler_start_context(get_client_ctx()));
}
else
{
auto _prom = std::promise<void>{};
auto _fut = _prom.get_future();
std::thread{collection_period_cntrl, std::move(_prom), get_client_ctx()}.detach();
_fut.wait_for(std::chrono::seconds{1}); // wait for a max of 1 second
}

tool_metadata->process_id = getpid();
rocprofiler_get_timestamp(&(tool_metadata->process_start_ns));
Expand Down
3 changes: 3 additions & 0 deletions source/lib/rocprofiler-sdk/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ rocprofiler_stop_context(rocprofiler_context_id_t context_id)
!rocprofiler::context::get_registered_context(context_id))
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;

// if finalized, context is already stopped
if(rocprofiler::registration::get_fini_status() > 0) return ROCPROFILER_STATUS_SUCCESS;

return rocprofiler::context::stop_context(context_id);
}

Expand Down
8 changes: 6 additions & 2 deletions source/lib/rocprofiler-sdk/context/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,13 @@ context*
get_mutable_registered_context(rocprofiler_context_id_t id)
{
if(id.handle < get_contexts_offset()) return nullptr;
if(!get_registered_contexts_impl()) return nullptr;
auto _idx = id.handle - get_contexts_offset();
if(_idx >= get_registered_contexts_impl()->size()) return nullptr;
return &get_registered_contexts_impl()->at(_idx).value();
if(_idx >= get_registered_contexts_impl()->size())
return nullptr;
else if(get_registered_contexts_impl()->at(_idx).has_value())
return &get_registered_contexts_impl()->at(_idx).value();
return nullptr;
}

const context*
Expand Down
1 change: 0 additions & 1 deletion tests/pytest-packages/pytest_utils/otf2_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def __init__(self, filename):
self.filename = filename if isinstance(filename, (list, tuple)) else [filename]

def read(self):

def _read_trace(trace_name):
trace = otf2.reader.Reader(trace_name)
# print(f"Read {len(trace.definitions.strings)} string definitions")
Expand Down
1 change: 0 additions & 1 deletion tests/pytest-packages/tests/rocprofv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def test_perfetto_data(
def test_otf2_data(
otf2_data, json_data, categories=("hip", "hsa", "marker", "kernel", "memory_copy")
):

def get_operation_name(kind_id, op_id):
return json_data["rocprofiler-sdk-tool"]["strings"]["buffer_records"][kind_id][
"operations"
Expand Down
1 change: 1 addition & 0 deletions tests/rocprofv3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ add_subdirectory(summary)
add_subdirectory(roctracer-roctx)
add_subdirectory(scratch-memory)
add_subdirectory(pc-sampling)
add_subdirectory(trace-period)
Loading

0 comments on commit a579c70

Please sign in to comment.