From 87c9823703d3cfae4383afd3920aedd3a5328543 Mon Sep 17 00:00:00 2001 From: udaij12 <32673964+udaij12@users.noreply.github.com> Date: Tue, 10 Sep 2024 11:32:54 -0700 Subject: [PATCH] Adding Graviton Regression test CI (#3273) * testing on graviton * testing on graviton * testing on graviton * checking python * rmv python * changing back to python * testing cpu instead * adding torchtext * adding torchtext * testing torchtext * removing two tests * removing pytorch test * adding numpy upgrade * adding numpy upgrade * testing full ci * testing full ci * testing full ci * skipping grpc * addign graviton ci * addign graviton ci * adding ci cpu graviton * adding ci cpu graviton * adding env * skipping a test for now * fixing env variable * removing scripted 3&4 * small changes * fixing lint * fixing lint * fixing lint * removing torchtext --------- Co-authored-by: Ubuntu Co-authored-by: Ankith Gunapal --- .github/workflows/ci_graviton_cpu.yml | 48 ++++++++++++++++++ .../regression_tests_graviton_cpu.yml | 41 ++++++++++++++++ test/pytest/test_gRPC_inference_api.py | 5 ++ test/pytest/test_model_custom_dependencies.py | 5 ++ test/pytest/test_pytorch_profiler.py | 49 ++++++++++++++++--- .../unit_tests/test_object_detector.py | 5 +- 6 files changed, 144 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/ci_graviton_cpu.yml create mode 100644 .github/workflows/regression_tests_graviton_cpu.yml diff --git a/.github/workflows/ci_graviton_cpu.yml b/.github/workflows/ci_graviton_cpu.yml new file mode 100644 index 0000000000..e06072ca01 --- /dev/null +++ b/.github/workflows/ci_graviton_cpu.yml @@ -0,0 +1,48 @@ +name: CI CPU Graviton + +on: + workflow_dispatch: + push: + branches: + - master + pull_request: + branches: + - master + merge_group: + + +concurrency: + group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }} + cancel-in-progress: true + +jobs: + ci-cpu: + runs-on: [self-hosted, graviton-test] + steps: + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + architecture: arm64 + - name: Setup Java 17 + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '17' + - name: Checkout TorchServe + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Install dependencies + run: | + python ts_scripts/install_dependencies.py --environment=dev + - name: Torchserve Sanity + uses: nick-fields/retry@v3 + env: + TS_MAC_ARM64_CPU_ONLY: 'True' + with: + timeout_minutes: 60 + max_attempts: 3 + retry_on: error + command: | + python torchserve_sanity.py diff --git a/.github/workflows/regression_tests_graviton_cpu.yml b/.github/workflows/regression_tests_graviton_cpu.yml new file mode 100644 index 0000000000..39d3a7f600 --- /dev/null +++ b/.github/workflows/regression_tests_graviton_cpu.yml @@ -0,0 +1,41 @@ +name: Run Regression Tests on CPU for Graviton + +on: + push: + branches: + - master + pull_request: + branches: + - master + merge_group: + +concurrency: + group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }} + cancel-in-progress: true + +jobs: + regression-cpu: + runs-on: [self-hosted, graviton-test] + steps: + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + architecture: arm64 + - name: Setup Java 17 + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '17' + - name: Checkout TorchServe + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Install dependencies + run: | + python ts_scripts/install_dependencies.py --environment=dev + - name: Torchserve Regression Tests + env: + TS_MAC_ARM64_CPU_ONLY: 'True' + run: | + python test/regression_tests.py diff --git a/test/pytest/test_gRPC_inference_api.py b/test/pytest/test_gRPC_inference_api.py index 65a6f717b4..21cc38b8c3 100644 --- a/test/pytest/test_gRPC_inference_api.py +++ b/test/pytest/test_gRPC_inference_api.py @@ -1,10 +1,12 @@ import json import os +import platform import threading from ast import literal_eval import inference_pb2 import management_pb2 +import pytest import test_gRPC_utils import test_utils @@ -50,6 +52,9 @@ def __infer(stub, model_name, model_input): return prediction +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_inference_apis(): with open(os.path.join(os.path.dirname(__file__), inference_data_json), "rb") as f: test_data = json.loads(f.read()) diff --git a/test/pytest/test_model_custom_dependencies.py b/test/pytest/test_model_custom_dependencies.py index e633373be9..5c2e3b1a29 100644 --- a/test/pytest/test_model_custom_dependencies.py +++ b/test/pytest/test_model_custom_dependencies.py @@ -1,7 +1,9 @@ import os import pathlib +import platform import subprocess +import pytest import requests import test_utils from model_archiver import ModelArchiver, ModelArchiverConfig @@ -140,6 +142,9 @@ def register_model_and_make_inference_request(expect_model_load_failure=False): resp.raise_for_status() +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_install_dependencies_to_target_directory_with_requirements(): test_utils.torchserve_cleanup() diff --git a/test/pytest/test_pytorch_profiler.py b/test/pytest/test_pytorch_profiler.py index 877080c364..e2113fb76a 100644 --- a/test/pytest/test_pytorch_profiler.py +++ b/test/pytest/test_pytorch_profiler.py @@ -6,21 +6,28 @@ import json import os import pathlib +import platform import shutil import subprocess +from concurrent import futures import pytest import requests - import test_utils -from concurrent import futures REPO_ROOT = os.path.normpath( os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") ) -data_file_mnist = os.path.join(REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png") +data_file_mnist = os.path.join( + REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png" +) data_file_resnet = os.path.join( - REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "kitten.jpg" + REPO_ROOT, + "examples", + "image_classifier", + "resnet_152_batch", + "images", + "kitten.jpg", ) data_file_resnet_dog = os.path.join( REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "dog.jpg" @@ -33,6 +40,9 @@ @pytest.fixture +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def set_custom_handler(handler_name): """ This method downloads resnet serialized file, creates mar file and sets up a custom handler @@ -48,7 +58,8 @@ def set_custom_handler(handler_name): serialized_file = os.path.join(test_utils.MODEL_STORE, "resnet152-394f9c45.pth") if not os.path.exists(serialized_file): response = requests.get( - "https://download.pytorch.org/models/resnet152-394f9c45.pth", allow_redirects=True + "https://download.pytorch.org/models/resnet152-394f9c45.pth", + allow_redirects=True, ) assert response.status_code == 200 with open(serialized_file, "wb") as f: @@ -58,10 +69,21 @@ def set_custom_handler(handler_name): cmd = test_utils.model_archiver_command_builder( model_name="resnet-152-batch", version="1.0", - model_file=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "resnet_152_batch", "model.py"), + model_file=os.path.join( + test_utils.CODEBUILD_WD, + "examples", + "image_classifier", + "resnet_152_batch", + "model.py", + ), serialized_file=serialized_file, handler=handler_name, - extra_files=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "index_to_name.json"), + extra_files=os.path.join( + test_utils.CODEBUILD_WD, + "examples", + "image_classifier", + "index_to_name.json", + ), force=True, ) print(cmd) @@ -94,6 +116,9 @@ def set_custom_handler(handler_name): "handler_name", [os.path.join(profiler_utils, "resnet_custom.py"), "image_classifier"], ) +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_profiler_default_and_custom_handler(set_custom_handler, handler_name): """ Tests pytorch profiler integration with default and custom handler @@ -112,6 +137,9 @@ def test_profiler_default_and_custom_handler(set_custom_handler, handler_name): "handler_name", [os.path.join(profiler_utils, "resnet_profiler_override.py")], ) +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_profiler_arguments_override(set_custom_handler, handler_name): """ Tests pytorch profiler integration when user overrides the profiler arguments @@ -133,6 +161,9 @@ def test_profiler_arguments_override(set_custom_handler, handler_name): "handler_name", [os.path.join(profiler_utils, "resnet_profiler_override.py")], ) +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_batch_input(set_custom_handler, handler_name): """ Tests pytorch profiler integration with batch inference @@ -146,7 +177,9 @@ def test_batch_input(set_custom_handler, handler_name): def invoke_batch_input(): data = open(data_file_resnet, "rb") - response = requests.post("{}/predictions/resnet152".format(TF_INFERENCE_API), data) + response = requests.post( + "{}/predictions/resnet152".format(TF_INFERENCE_API), data + ) assert response.status_code == 200 assert "tiger_cat" in json.loads(response.content) diff --git a/ts/torch_handler/unit_tests/test_object_detector.py b/ts/torch_handler/unit_tests/test_object_detector.py index 290643f5ab..f56e27652c 100644 --- a/ts/torch_handler/unit_tests/test_object_detector.py +++ b/ts/torch_handler/unit_tests/test_object_detector.py @@ -5,6 +5,7 @@ Ensures it can load and execute an example model """ +import platform import sys from pathlib import Path @@ -56,7 +57,6 @@ def model_dir(tmp_path_factory, model_name): @pytest.fixture(scope="module") def context(model_dir, model_name): - context = MockContext( model_name="mnist", model_dir=model_dir.as_posix(), @@ -73,6 +73,9 @@ def handler(context): return handler +@pytest.mark.skipif( + platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture" +) def test_handle(handler, context, image_bytes): test_data = [{"data": image_bytes}] * 2 results = handler.handle(test_data, context)