From b6833710dcecb3add16ca9cfb0b2e7b371841092 Mon Sep 17 00:00:00 2001 From: Vladimir Indic <139573562+vlaindic@users.noreply.github.com> Date: Thu, 20 Jun 2024 04:50:46 +0200 Subject: [PATCH] PC Sampling IOCTL version check introduced (#944) --- .github/workflows/continuous_integration.yml | 5 - .../lib/rocprofiler-sdk/details/kfd_ioctl.h | 7 +- .../pc_sampling/ioctl/ioctl_adapter.cpp | 110 +++++++++++++++++- 3 files changed, 110 insertions(+), 12 deletions(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index ae7e9383..418ea7a8 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -74,11 +74,6 @@ jobs: if: ${{ contains(matrix.runner, 'mi200') }} shell: bash run: | - # This workaround is to enable PC Sampling for kfd_ioctl 1.16 on CI machines, please refer to SWDEV-467721 - git config --global --add safe.directory '*' - git config user.name ${{ github.actor }} - git config user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com" - git fetch -a; git cherry-pick 5805f7a6c6c7ca47bd602e7b94b9dcda390f0c2e echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV - name: Configure, Build, and Test diff --git a/source/lib/rocprofiler-sdk/details/kfd_ioctl.h b/source/lib/rocprofiler-sdk/details/kfd_ioctl.h index 081f6681..e4dda4de 100644 --- a/source/lib/rocprofiler-sdk/details/kfd_ioctl.h +++ b/source/lib/rocprofiler-sdk/details/kfd_ioctl.h @@ -23,8 +23,8 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED +#include #include -#include /* * - 1.1 - initial version @@ -42,10 +42,9 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag - * - 1.17 - Add PC Sampling ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 17 +#define KFD_IOCTL_MINOR_VERSION 16 struct kfd_ioctl_get_version_args { @@ -1724,7 +1723,7 @@ struct kfd_ioctl_pc_sample_args __u32 gpu_id; __u32 trace_id; __u32 flags; /* kfd_ioctl_pcs_query flags */ - __u32 reserved; + __u32 version; }; #define AMDKFD_IOCTL_BASE 'K' diff --git a/source/lib/rocprofiler-sdk/pc_sampling/ioctl/ioctl_adapter.cpp b/source/lib/rocprofiler-sdk/pc_sampling/ioctl/ioctl_adapter.cpp index 580833d6..75fe372e 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/ioctl/ioctl_adapter.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/ioctl/ioctl_adapter.cpp @@ -42,22 +42,63 @@ namespace pc_sampling { namespace ioctl { -// forward declaration +namespace +{ +/** + * @brief Used to determine the version of PC sampling + * IOCTL implementation in the driver. + * + * @todo Remove this once the KFD IOCTL is upstreamed + */ +struct pc_sampling_ioctl_version_t +{ + uint32_t major_version; /// PC sampling IOCTL major version + uint32_t minor_version; /// PC sampling IOCTL minor version +}; + +// forward declarations rocprofiler_ioctl_version_info_t& get_ioctl_version(); -// IOCTL 1.17 is the first one supporting PC sampling. +rocprofiler_status_t +get_pc_sampling_ioctl_version(uint32_t kfd_gpu_id, pc_sampling_ioctl_version_t& pcs_ioctl_version); + +// IOCTL 1.16 is the first one supporting PC sampling. #define CHECK_IOCTL_VERSION \ do \ { \ auto ioctl_version = get_ioctl_version(); \ - if(ioctl_version.major_version < 1 || ioctl_version.minor_version < 17) \ + if(ioctl_version.major_version < 1 || ioctl_version.minor_version < 16) \ { \ LOG(ERROR) << "PC sampling unavailable\n"; \ return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL; \ } \ } while(0) +// PC Sampling IOCTL 0.1 is the initial implementaiton of PC sampling in KFD. +#define CHECK_PC_SAMPLING_IOCTL_VERSION(kfd_gpu_id) \ + do \ + { \ + pc_sampling_ioctl_version_t pcs_ioctl_version = {.major_version = 0, .minor_version = 0}; \ + auto status = get_pc_sampling_ioctl_version(kfd_gpu_id, pcs_ioctl_version); \ + if(status == ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE) \ + { \ + ROCP_ERROR << "PC sampling unavailable\n"; \ + return status; \ + } \ + else if(status != ROCPROFILER_STATUS_SUCCESS) \ + { \ + return status; \ + } \ + else if(pcs_ioctl_version.major_version < 1 && pcs_ioctl_version.minor_version < 1) \ + { \ + ROCP_ERROR << "PC sampling unavailable\n"; \ + return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL; \ + } \ + } while(0) + +#define PC_SAMPLING_IOCTL_BITMASK 0xFFFF + int kfd_open() { @@ -132,6 +173,64 @@ get_ioctl_version() return v; } +/** + * @brief KFD IOCTL PC Sampling API version is provided via + * the `kfd_ioctl_pc_sample_args.version` field by + * @ref ::KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES` IOCTL function. + * The latter function requires @p kfd_gpu_id + * This mechanism is used for internal versioning of the PC sampling + * implementation. + * + * @todo: Remove once KFD IOCTL is upstreamed. + * + * @param[in] kfd_gpu_id - KFD GPU identifier + * @param[out] pcs_ioctl_version - The PC sampling IOCTL version. Invalid if + * the return value is different than ::ROCPROFILER_STATUS_SUCCESS + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t +get_pc_sampling_ioctl_version(uint32_t kfd_gpu_id, pc_sampling_ioctl_version_t& pcs_ioctl_version) +{ + struct kfd_ioctl_pc_sample_args args; + args.op = KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES; + args.gpu_id = kfd_gpu_id; + args.sample_info_ptr = 0; + args.num_sample_info = 0; + args.flags = 0; + args.version = 0; + + auto ret = ioctl(get_kfd_fd(), AMDKFD_IOC_PC_SAMPLE, &args); + + if(ret == -EBUSY) + { + // The ROCProfiler-SDK is used inside the ROCgdb. + // The `KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES` is not executed, + // so the value of the args.version is irrelevant. + // Report that PC sampling cannot be used from within the ROCgdb. + return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE; + } + else if(ret == -EOPNOTSUPP) + { + // The GPU does not support PC sampling. + return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE; + } + else if(ret != 0) + { + // An unexpected error occured, so we cannot be sure if the + // context of the `version` is valid. + return ROCPROFILER_STATUS_ERROR; + } + + // `version` field contains PC Sampling IOCTL version + auto version = args.version; + // Lower 16 bits represent minor version + pcs_ioctl_version.minor_version = version & PC_SAMPLING_IOCTL_BITMASK; + // Upper 16 bits represent major version + pcs_ioctl_version.major_version = (version >> 16) & PC_SAMPLING_IOCTL_BITMASK; + + return ROCPROFILER_STATUS_SUCCESS; +} + /** * @kfd_gpu_id represents the gpu identifier read from the content of the * /sys/class/kfd/kfd/topology/nodes//gpu_id. @@ -231,12 +330,15 @@ convert_ioctl_pcs_config_to_rocp(const rocprofiler_ioctl_pc_sampling_info_t& ioc return ROCPROFILER_STATUS_SUCCESS; } +} // namespace rocprofiler_status_t ioctl_query_pcs_configs(const rocprofiler_agent_t* agent, rocp_pcs_cfgs_vec_t& rocp_configs) { // Assert the IOCTL version CHECK_IOCTL_VERSION; + // Verify the PC Sampling IOCTL version + CHECK_PC_SAMPLING_IOCTL_VERSION(agent->gpu_id); uint32_t kfd_gpu_id = agent->gpu_id; @@ -339,6 +441,8 @@ ioctl_pcs_create(const rocprofiler_agent_t* agent, { // Assert the IOCTL version CHECK_IOCTL_VERSION; + // Verify the PC Sampling IOCTL version + CHECK_PC_SAMPLING_IOCTL_VERSION(agent->gpu_id); rocprofiler_ioctl_pc_sampling_info_t ioctl_cfg; auto ret = create_ioctl_pcs_config_from_rocp(ioctl_cfg, method, unit, interval);