From 5e1643cf81e0d0742cd59116c19aa88bb75c1559 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 25 Oct 2024 14:14:59 -0500 Subject: [PATCH] rocprofv3: stabilize rocprofv3 summary tests (#1161) * Update tests/bin/transpose/transpose.cpp - add hipMemGetInfo call to display the available vs. total memory on the GPU * Update tests/rocprofv3/summary/validate.py - Updated test_summary_display_data after addition of hipMemGetInfo to transpose test exe * Tweak code coverage comment uploading - create unique orphan branch per PR - reduce quality of PNG files (85 -> 70) * Revert some of code coverage comment uploading - remove creation of unique orphan branch per PR * Tweak code coverage comment uploading - create unique orphan branch per PR --- .github/workflows/continuous_integration.yml | 2 +- source/scripts/upload-image-to-github.py | 19 +++++----- tests/bin/transpose/transpose.cpp | 38 +++++++++++++++----- tests/rocprofv3/summary/validate.py | 6 ++-- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 670372f0..56998e68 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -421,7 +421,7 @@ jobs: ls -la for i in "all" "tests" "samples"; do - wkhtmltoimage --enable-local-file-access --quality 85 .codecov/${i}.html .codecov/${i}.png + wkhtmltoimage --enable-local-file-access --quality 70 .codecov/${i}.html .codecov/${i}.png done ls -la .codecov which -a git diff --git a/source/scripts/upload-image-to-github.py b/source/scripts/upload-image-to-github.py index 39c59225..ada96dd4 100755 --- a/source/scripts/upload-image-to-github.py +++ b/source/scripts/upload-image-to-github.py @@ -122,12 +122,16 @@ def run(*args, **kwargs): check=True, ) + _branch = f"images-{args.name}" + _ref_branch = f"refs/{_branch}/image-ref" + run(["pwd"]) - run([git_cmd, "switch", "--orphan", "images"], check=True) + run([git_cmd, "switch", "--orphan", _branch], check=True) run([git_cmd, "commit", "--allow-empty", "-m", "Empty commit"], check=True) - run([git_cmd, "fetch", "origin", "refs/images/image-ref"], check=True) - run([git_cmd, "pull", "--rebase", "origin", "refs/images/image-ref"], check=True) - run([git_cmd, "reset", "--hard", "HEAD^"], check=True) + run([git_cmd, "push", "origin", f"HEAD:{_ref_branch}"], check=False) + run([git_cmd, "fetch", "origin", _ref_branch], check=True) + run([git_cmd, "pull", "--rebase", "origin", _ref_branch], check=True) + run([git_cmd, "reset", "--hard", "HEAD^"], check=False) if not os.path.exists(args.name): os.makedirs(args.name) @@ -142,11 +146,8 @@ def run(*args, **kwargs): run([git_cmd, "add", args.name]) run([git_cmd, "status"]) - run([git_cmd, "commit", "-m", "code coverage files"]) - run( - [git_cmd, "push", "--force", "origin", "HEAD:refs/images/image-ref"], - check=True, - ) + run([git_cmd, "commit", "-m", f"{args.name} code coverage files"]) + run([git_cmd, "push", "--force", "origin", f"HEAD:{_ref_branch}"], check=True) log = run([git_cmd, "log", "-n", "1", "--format=%H"], capture_output=True) hash = log.stdout.decode("utf-8").strip() diff --git a/tests/bin/transpose/transpose.cpp b/tests/bin/transpose/transpose.cpp index addb8bf9..e183d606 100644 --- a/tests/bin/transpose/transpose.cpp +++ b/tests/bin/transpose/transpose.cpp @@ -20,27 +20,28 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include - #if defined(USE_ROCTRACER_ROCTX) # include #else # include #endif +#include + +#if defined(USE_MPI) +# include +#endif + #include #include #include +#include #include #include #include +#include #include -#if defined(USE_MPI) -# include -#endif - #define HIP_API_CALL(CALL) \ { \ hipError_t error_ = (CALL); \ @@ -229,8 +230,27 @@ run(int rank, int tid, int devid, int argc, char** argv) int* in = nullptr; int* out = nullptr; - HIP_API_CALL(hipMallocAsync(&in, size, stream)); - HIP_API_CALL(hipMallocAsync(&out, size, stream)); + // lock during malloc to get more accurate memory info + { + _lk.lock(); + constexpr auto MiB = (1024UL * 1024UL); + size_t free_gpu_mem = 0; + size_t total_gpu_mem = 0; + + HIP_API_CALL(hipMemGetInfo(&free_gpu_mem, &total_gpu_mem)); + free_gpu_mem /= MiB; + total_gpu_mem /= MiB; + + std::cout << "[transpose][" << rank << "][" << tid + << "] Available GPU memory (MiB): " << std::setw(6) << free_gpu_mem << " / " + << std::setw(6) << total_gpu_mem << std::endl; + + HIP_API_CALL(hipMallocAsync(&in, size, stream)); + HIP_API_CALL(hipMallocAsync(&out, size, stream)); + + _lk.unlock(); + } + HIP_API_CALL(hipMemsetAsync(in, 0, size, stream)); HIP_API_CALL(hipMemsetAsync(out, 0, size, stream)); HIP_API_CALL(hipMemcpyAsync(in, inp_matrix, size, hipMemcpyHostToDevice, stream)); diff --git a/tests/rocprofv3/summary/validate.py b/tests/rocprofv3/summary/validate.py index 56bd6268..1c51598b 100644 --- a/tests/rocprofv3/summary/validate.py +++ b/tests/rocprofv3/summary/validate.py @@ -235,15 +235,15 @@ def get_dims(df): hip_and_marker = get_df("HIP_API + MARKER_API") if num_summary_grps > 1 else None total = get_df("SUMMARY") - expected_hip_and_marker_dims = [20, 9] if hip_and_marker is not None else [0, 0] + expected_hip_and_marker_dims = [21, 9] if hip_and_marker is not None else [0, 0] assert get_dims(marker) == [7, 9], f"{marker}" assert get_dims(memcpy) == [2, 9], f"{memcpy}" assert get_dims(dispatch) == [3, 9], f"{dispatch}" assert get_dims(dispatch_and_copy) == [5, 9], f"{dispatch_and_copy}" - assert get_dims(hip) == [13, 9], f"{hip}" + assert get_dims(hip) == [14, 9], f"{hip}" assert get_dims(hip_and_marker) == expected_hip_and_marker_dims, f"{hip_and_marker}" - assert get_dims(total) == [22, 9], f"{total}" + assert get_dims(total) == [23, 9], f"{total}" def test_perfetto_data(pftrace_data, json_data):