Skip to content

Commit

Permalink
LD_PRELOAD librocprofiler-sdk-roctx.so when marker-trace enabled (#1057)
Browse files Browse the repository at this point in the history
* LD_PRELOAD librocprofiler-sdk-roctx.so when marker-trace enabled

- this enables apps to link against old ROCTx (libroctx64.so) but get marker tracing in rocprofv3

* Update CHANGELOG

* Validation test for app linked to old (roctracer) ROCTx library

* Tweak scope of tool_counter_info

- causing "signal-unsafe call inside of a signal" error for ThreadSanitizer on mi200

* Fix handling of missing transpose-roctracer-roctx

* Disable rocprofv3 aborted-app test (ThreadSanitizer)

- ThreadSanitizer + mi200/mi300 + aborted-app results in a signal-unsafe call inside a signal that cannot be specifically suppressed as usual via rocprofv3_error_signal_handler for some unknown reason

* Add UndefinedBehaviorSanitizer job
  • Loading branch information
jrmadsen authored Sep 11, 2024
1 parent df939cb commit 72cbced
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 24 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ jobs:
fail-fast: false
matrix:
runner: ['mi200', 'mi300']
sanitizer: ['AddressSanitizer', 'ThreadSanitizer', 'LeakSanitizer']
sanitizer: ['AddressSanitizer', 'ThreadSanitizer', 'LeakSanitizer', 'UndefinedBehaviorSanitizer']
os: ['ubuntu-22.04']
build-type: ['RelWithDebInfo']
exclude:
Expand All @@ -524,6 +524,7 @@ jobs:
- { runner: 'mi200', sanitizer: 'AddressSanitizer' }
# - { runner: 'mi300', sanitizer: 'AddressSanitizer' }
- { runner: 'mi300', sanitizer: 'ThreadSanitizer' }
- { runner: 'mi300', sanitizer: 'UndefinedBehaviorSanitizer' }

if: ${{ contains(github.event_name, 'pull_request') }}
runs-on: ${{ matrix.runner }}-runner-set
Expand Down
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,11 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

### Additions

### Fixes

### Changes

- Support `--marker-trace` on application linked against old (roctracer) ROCTx (i.e. `libroctx64.so`)

### Fixes

- Creation of subdirection when rocprofv3 `--output-file` contains a folder path
- Fix misaligned stores (undefined behavior) for buffer records
15 changes: 11 additions & 4 deletions source/bin/rocprofv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,13 @@ def add_parser_bool_argument(gparser, *args, **kwargs):
default=os.environ.get("ROCPROF_PRELOAD", "").split(":"),
nargs="*",
)
# below is available for CI because LD_PRELOADing a library linked to a sanitizer library
# causes issues in apps where HIP is part of shared library.
add_parser_bool_argument(
advanced_options,
"--suppress-marker-preload",
help=argparse.SUPPRESS,
)

if args is None:
args = sys.argv[1:]
Expand Down Expand Up @@ -580,11 +587,11 @@ def _write_env_value():
elif _prepend:
app_env[env_var] = (
"{}{}{}".format(_val, _join_char, _curr_val) if _val else _curr_val
)
).strip(":")
elif _append:
app_env[env_var] = (
"{}{}{}".format(_curr_val, _join_char, _val) if _val else _curr_val
)
).strip(":")
elif _overwrite:
_write_env_value()
else:
Expand Down Expand Up @@ -691,8 +698,8 @@ def _write_env_value():

# if marker tracing was requested, LD_PRELOAD the rocprofiler-sdk-roctx library
# to override the roctx symbols of an app linked to the old roctracer roctx
# if args.marker_trace:
# update_env("LD_PRELOAD", ROCPROF_ROCTX_LIBRARY, append=True)
if args.marker_trace and not args.suppress_marker_preload:
update_env("LD_PRELOAD", ROCPROF_ROCTX_LIBRARY, append=True)

if trace_count == 0:
# if no tracing was enabled but the options below were enabled, raise an error
Expand Down
3 changes: 1 addition & 2 deletions source/lib/rocprofiler-sdk-tool/tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1565,8 +1565,6 @@ tool_fini(void* /*tool_data*/)
for(auto& itr : *agent_info)
_agents.emplace_back(itr.second);

auto _counters = get_tool_counter_info();

std::sort(_agents.begin(), _agents.end(), node_id_sort);

if(tool::get_config().csv_output)
Expand All @@ -1591,6 +1589,7 @@ tool_fini(void* /*tool_data*/)

if(tool::get_config().json_output)
{
auto _counters = get_tool_counter_info();
rocprofiler::tool::write_json(tool_functions,
getpid(),
contributions,
Expand Down
13 changes: 7 additions & 6 deletions tests/bin/transpose/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,22 @@ transpose_build_target(transpose "" rocprofiler-sdk-roctx::rocprofiler-sdk-roctx
find_path(
roctracer_roctx_ROOT_DIR
NAMES include/roctracer/roctx.h
HINTS ${hip_DIR}
PATHS ${hip_DIR})
lib/${CMAKE_SHARED_LIBRARY_PREFIX}roctx64${CMAKE_SHARED_LIBRARY_SUFFIX}
HINTS ${hip_DIR} ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${hip_DIR} ${ROCM_PATH} ENV ROCM_PATH /opt/rocm)

find_path(
roctracer_roctx_INCLUDE_DIR
NAMES roctracer/roctx.h
HINTS ${roctracer_roctx_ROOT_DIR} ${hip_DIR}
PATHS ${roctracer_roctx_ROOT_DIR} ${hip_DIR}
HINTS ${roctracer_roctx_ROOT_DIR}
PATHS ${roctracer_roctx_ROOT_DIR}
PATH_SUFFIXES include)

find_library(
roctracer_roctx_LIBRARY
NAMES roctx64
HINTS ${roctracer_roctx_ROOT_DIR} ${hip_DIR}
PATHS ${roctracer_roctx_ROOT_DIR} ${hip_DIR}
HINTS ${roctracer_roctx_ROOT_DIR}
PATHS ${roctracer_roctx_ROOT_DIR}
PATH_SUFFIXES lib lib64)

include(FindPackageHandleStandardArgs)
Expand Down
1 change: 1 addition & 0 deletions tests/rocprofv3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ add_subdirectory(hsa-queue-dependency)
add_subdirectory(kernel-rename)
add_subdirectory(aborted-app)
add_subdirectory(summary)
add_subdirectory(roctracer-roctx)
33 changes: 28 additions & 5 deletions tests/rocprofv3/aborted-app/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.p
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")

# disable this test for thread sanitizers because of "signal-unsafe call inside signal"
# issues on mi200 and mi300 (works fine on vega20 and navi32)
if(ROCPROFILER_MEMCHECK STREQUAL "ThreadSanitizer")
set(IS_THREAD_SANITIZER ON)
else()
set(IS_THREAD_SANITIZER OFF)
endif()

set(aborted-app-env "${PRELOAD_ENV}" ROCPROF_TESTING_RAISE_SIGNAL=1
ROCPROF_INTERNAL_TEST_SIGNAL_HANDLER_VIA_EXIT=1)

Expand All @@ -29,8 +37,16 @@ add_test(

set_tests_properties(
rocprofv3-test-execute-app-abort
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${aborted-app-env}"
WILL_FAIL TRUE)
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
ENVIRONMENT
"${aborted-app-env}"
WILL_FAIL
TRUE
DISABLED
"${IS_THREAD_SANITIZER}")

add_test(
NAME rocprofv3-test-validate-app-abort
Expand All @@ -39,6 +55,13 @@ add_test(

set_tests_properties(
rocprofv3-test-validate-app-abort
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
"rocprofv3-test-execute-app-abort" FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
"rocprofv3-test-execute-app-abort"
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
DISABLED
"${IS_THREAD_SANITIZER}")
60 changes: 60 additions & 0 deletions tests/rocprofv3/roctracer-roctx/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# rocprofv3 tool test
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)

project(
rocprofiler-tests-roctracer-roctx-tracing
LANGUAGES CXX
VERSION 0.0.0)

find_package(rocprofiler-sdk REQUIRED)

string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")

set(tracing-env "${PRELOAD_ENV}")

rocprofiler_configure_pytest_files(CONFIG pytest.ini roctracer-roctx-input.yml
COPY validate.py conftest.py)

add_test(
NAME rocprofv3-test-roctracer-roctx-trace-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i roctracer-roctx-input.yml --
$<IF:$<TARGET_EXISTS:transpose-roctracer-roctx>,$<TARGET_FILE:transpose-roctracer-roctx>,>
)

set_tests_properties(
rocprofv3-test-roctracer-roctx-trace-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${tracing-env}"
DISABLED "$<IF:$<TARGET_EXISTS:transpose-roctracer-roctx>,OFF,ON>")

add_test(
NAME rocprofv3-test-roctracer-roctx-trace-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.json
--pftrace-input
${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.pftrace
--otf2-input ${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.otf2)

set(VALIDATION_FILES
${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.pftrace
${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.json
${CMAKE_CURRENT_BINARY_DIR}/roctracer-roctx-trace/out_results.otf2)

set_tests_properties(
rocprofv3-test-roctracer-roctx-trace-validate
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
"rocprofv3-test-roctracer-roctx-trace-execute"
DISABLED
"$<IF:$<TARGET_EXISTS:transpose-roctracer-roctx>,OFF,ON>"
FAIL_REGULAR_EXPRESSION
"AssertionError"
ATTACHED_FILES_ON_FAIL
"${VALIDATION_FILES}")
49 changes: 49 additions & 0 deletions tests/rocprofv3/roctracer-roctx/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3

import os
import json
import pytest

from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
from rocprofiler_sdk.pytest_utils.otf2_reader import OTF2Reader


def pytest_addoption(parser):
parser.addoption(
"--json-input",
action="store",
help="Path to JSON file.",
)
parser.addoption(
"--pftrace-input",
action="store",
help="Path to Perfetto trace file.",
)
parser.addoption(
"--otf2-input",
action="store",
help="Path to OTF2 trace file.",
)


@pytest.fixture
def json_data(request):
filename = request.config.getoption("--json-input")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))


@pytest.fixture
def pftrace_data(request):
filename = request.config.getoption("--pftrace-input")
return PerfettoReader(filename).read()[0]


@pytest.fixture
def otf2_data(request):
filename = request.config.getoption("--otf2-input")
if not os.path.exists(filename):
raise FileExistsError(f"{filename} does not exist")
return OTF2Reader(filename).read()[0]
5 changes: 5 additions & 0 deletions tests/rocprofv3/roctracer-roctx/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
11 changes: 11 additions & 0 deletions tests/rocprofv3/roctracer-roctx/roctracer-roctx-input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
jobs:
- hsa_trace: False
hip_trace: False
kernel_trace: True
memory_copy_trace: True
marker_trace: True
output_directory: "@CMAKE_CURRENT_BINARY_DIR@/roctracer-roctx-trace"
output_file: out
output_format: [pftrace, json, otf2]
log_level: env
kernel_rename: False
77 changes: 77 additions & 0 deletions tests/rocprofv3/roctracer-roctx/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3

import sys
import pytest


def test_marker_api_trace(json_data):
data = json_data["rocprofiler-sdk-tool"]

def get_kind_name(kind_id):
return data["strings"]["buffer_records"][kind_id]["kind"]

def get_region_name(corr_id):
for itr in data["strings"]["marker_api"]:
if itr.key == corr_id:
return itr.value
return None

valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API")

buffer_records = data["buffer_records"]
marker_data = buffer_records["marker_api"]
tot_data = {}
thr_data = {}
for marker in marker_data:
assert get_kind_name(marker["kind"]) in valid_domain
assert marker.thread_id >= data["metadata"]["pid"]
assert marker.end_timestamp >= marker.start_timestamp

if marker.thread_id not in thr_data.keys():
thr_data[marker.thread_id] = {}

corr_id = marker.correlation_id.internal
assert corr_id > 0, f"{marker}"
name = get_region_name(corr_id)
if not name.startswith("roctracer/roctx"):
assert "run" in name, f"{marker}"
if name not in thr_data[marker.thread_id].keys():
thr_data[marker.thread_id][name] = 1
else:
thr_data[marker.thread_id][name] += 1

if name not in tot_data.keys():
tot_data[name] = 1
else:
tot_data[name] += 1

assert tot_data["roctracer/roctx v4.1"] == 1
assert tot_data["run"] == 2
assert tot_data["run/iteration"] == 1000
assert tot_data["run/iteration/sync"] == 100
assert tot_data["run/rank-0/thread-0/device-0/begin"] == 1
assert tot_data["run/rank-0/thread-0/device-0/end"] == 1
assert len(tot_data.keys()) >= 8

for tid, titr in thr_data.items():
assert titr["run"] == 1
assert titr["run/iteration"] == 500
assert titr["run/iteration/sync"] == 50
assert len(titr.keys()) >= 5


def test_perfetto_data(pftrace_data, json_data):
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3

rocprofv3.test_perfetto_data(pftrace_data, json_data, ("memory_copy", "marker"))


def test_otf2_data(otf2_data, json_data):
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3

rocprofv3.test_otf2_data(otf2_data, json_data, ("memory_copy", "marker"))


if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
Loading

0 comments on commit 72cbced

Please sign in to comment.