From b0afef6fd66de67c295a71caaf2303dbdefcc064 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Mon, 27 Nov 2023 10:24:21 +0800 Subject: [PATCH 1/4] add triton server test and workflow yml --- .github/scripts/test_triton_server.py | 73 +++++++++ .github/workflows/test.yml | 212 ++++++++++++++++++++++++++ docker/Dockerfile | 2 +- requirements/test.txt | 3 + 4 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 .github/scripts/test_triton_server.py create mode 100644 .github/workflows/test.yml diff --git a/.github/scripts/test_triton_server.py b/.github/scripts/test_triton_server.py new file mode 100644 index 0000000000..0e94de1579 --- /dev/null +++ b/.github/scripts/test_triton_server.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import time +from subprocess import PIPE, Popen + +import fire + + +def parse_dialogue(inputs: str): + sep = 'double enter to end input >>>' + dialogues = inputs.strip() + if dialogues.endswith(sep): + dialogues = dialogues[:-len(sep)] + dialogues = dialogues.strip() + dialogues = dialogues.split(sep) + dialogues = [d.strip() for d in dialogues] + return dialogues[1:] + + +def test(port=33337): + cmd = [f'lmdeploy serve triton_client localhost:{port}'] + + test_cases = [ + dict( + prompts='Hello! Please answer in English.', + keywords=['Hello', 'hi'], + ), + dict( + prompts='您好! 请用中文回答。', + keywords=['您好', '你好'], + ), + dict( + prompts='How many days does a week have?', + keywords=['seven'], + ), + dict( + prompts='一周有多少天', + keywords=['七天', '7天'], + ), + ] + + sep = '\n\n' + end = sep + 'exit\n\n\n' + all_pass = True + for cases in test_cases: + quest = cases['prompts'] + keywords = cases['keywords'] + inputs = quest + end + print(f'Test Input prompts: {quest}\nKey words: {keywords}') + time.sleep(5) + + with Popen(cmd, + stdin=PIPE, + stdout=PIPE, + stderr=PIPE, + shell=True, + text=True, + encoding='utf-8') as proc: + out, err = proc.communicate(input=inputs) + print(f'Output: {out}') + if proc.returncode == 0: + out = parse_dialogue(out)[0] + success = any([k in out for k in keywords]) + if not success: + print(f'>>> Failed to output keywords: {out} {keywords}') + all_pass = False + else: + all_pass = False + print(f'Failed to get outputs: {out} {err}') + assert all_pass, 'Tests failed!' + + +if __name__ == '__main__': + fire.Fire(test) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000..a855802189 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,212 @@ +name: test + +on: + pull_request: + paths: + - ".github/scripts/test_triton_server.py" + - ".github/workflows/test.yml" + - "cmake/**" + - "src/**" + - "3rdparty/**" + - "lmdeploy/**" + - "requirements/**" + - "requirements.txt" + - "CMakeLists.txt" + - "setup.py" + push: + branches: + - main + paths: + - "lmdeploy/version.py" + tags: + - "v*.*.*" + + workflow_dispatch: + inputs: + markers: + required: false + description: 'Tested markers. eg: "-m internlm_chat_7b"' + type: string + default: '' + +env: + HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache + HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai + + +jobs: + test_functions: + runs-on: [self-hosted, linux-a100] + timeout-minutes: 4320 # 72hours + environment: 'prod' + env: + REPORT_DIR: /nvme/qa_test_models/test-reports + container: + image: nvcr.io/nvidia/tritonserver:22.12-py3 + options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip" + volumes: + - /nvme/github-actions/pip-cache:/root/.cache/pip + - /nvme/github-actions/packages:/root/packages + - /nvme/qa_test_models:/nvme/qa_test_models + - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro + steps: + - name: Setup systems + run: | + rm /etc/apt/sources.list.d/cuda*.list + apt-get update && apt-get install -y --no-install-recommends rapidjson-dev \ + libgoogle-glog-dev libgl1 openjdk-8-jre-headless + dpkg -i /root/packages/allure_2.24.1-1_all.deb + rm -rf /var/lib/apt/lists/* + - name: Clone repository + uses: actions/checkout@v2 + - name: Install pytorch + run: | + python3 -m pip cache dir + python3 -m pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 + - name: Build lmdeploy + run: | + python3 -m pip install cmake + python3 -m pip install -r requirements/build.txt + # use cached build + cp -r ../../build build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DBUILD_PY_FFI=ON \ + -DBUILD_MULTI_GPU=ON \ + -DCMAKE_CUDA_FLAGS="-lineinfo" \ + -DUSE_NVTX=ON \ + -DSM=80 \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DBUILD_TEST=OFF + make -j$(nproc) && make install + - name: Install lmdeploy from source + run: | + python3 -m pip install packaging protobuf transformers_stream_generator + python3 -m pip install -r requirements.txt -r requirements/test.txt + python3 -m pip install . + # lmdeploy check_env + - name: Test lmdeploy + run: | + echo "TODO: awaiting PR of adding autotest" + # pytest autotest ${{github.event.inputs.markers}} --alluredir=allure-results --clean-alluredir + - name: Generate reports + if: always() + run: | + if test -D "allure-results"; then + export date_today="$(date +'%Y%m%d-%H%M%S')" + export report_dir="$REPORT_DIR/$date_today" + echo "Save report to $ALLURE_DIR" + allure generate -c -o $report_dir + fi + - name: Clear workfile + if: always() + run: | + export workdir=$(pwd) + cd .. + rm -rf $workdir + mkdir $workdir + chmod -R 777 $workdir + + test_triton: + runs-on: [self-hosted, linux-a100] + timeout-minutes: 4320 # 72hours + environment: 'prod' + env: + HF_MODEL: /nvme/qa_test_models/internlm-chat-20b + WORKDIR: /nvme/qa_test_models/triton_workspace + TB_MODEL: internlm-chat-20b-fp16-tp2 + GRPC_PORT: 33337 + steps: + - name: Clone repository + uses: actions/checkout@v2 + - name: Create test container + run: | + export CONTAINER_ID=$(docker create \ + --rm \ + --gpus='"device=0,1"' \ + --shm-size 16g \ + --cap-add=SYS_PTRACE \ + --cap-add=SYS_ADMIN \ + --security-opt seccomp=unconfined \ + --name lmdeploy-ci-triton \ + --workdir /__w/lmdeploy/lmdeploy \ + --env PIP_CACHE_DIR=/root/.cache/pip \ + --env NCCL_LAUNCH_MODE=GROUP \ + -v $(pwd)/../../:/__w \ + -v ${HF_MODEL}:/root/workspace/hf_model \ + -v ${WORKDIR}:/root/workspace/workdir \ + -v ${HOST_PIP_CACHE_DIR}:/root/.cache/pip \ + -v ${HOST_LOCALTIME}:/etc/localtime:ro \ + openmmlab/lmdeploy:latest tail -f /dev/null \ + ) + docker start $CONTAINER_ID + echo "CONTAINER_ID=$CONTAINER_ID" + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + - name: Build lmdeploy from source + run: | + docker exec $CONTAINER_ID cp -r ../../build build + docker exec --workdir /__w/lmdeploy/lmdeploy/build \ + --env http_proxy=${{secrets.PROXY}} \ + --env https_proxy=${{secrets.PROXY}} \ + $CONTAINER_ID cmake .. \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DBUILD_PY_FFI=ON \ + -DBUILD_MULTI_GPU=ON \ + -DCMAKE_CUDA_FLAGS="-lineinfo" \ + -DUSE_NVTX=ON \ + -DSM=80 \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DBUILD_TEST=OFF + docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make -j$(nproc) + docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make install + - name: Install lmdeploy + run: | + docker exec \ + --env http_proxy=${{secrets.PROXY}} \ + --env https_proxy=${{secrets.PROXY}} \ + $CONTAINER_ID python3 -m pip install tritonclient[grpc] + docker exec \ + --env http_proxy=${{secrets.PROXY}} \ + --env https_proxy=${{secrets.PROXY}} \ + $CONTAINER_ID python3 -m pip install -r requirements/test.txt + docker exec $CONTAINER_ID python3 -m pip install . + # docker exec $CONTAINER_ID check_env + - name: Convert to turbomind model + run: | + docker exec $CONTAINER_ID \ + lmdeploy convert \ + --model-name internlm-chat-20b \ + --model-path /root/workspace/hf_model \ + --tp 2 \ + --dst-path /root/workspace/workdir/${TB_MODEL} + - name: Start triton server service + run: | + docker exec --detach $CONTAINER_ID \ + tritonserver \ + --model-repository=/root/workspace/workdir/${TB_MODEL}/model_repository \ + --allow-http=0 \ + --allow-grpc=1 \ + --grpc-port=${GRPC_PORT} \ + --log-verbose=0 \ + --allow-metrics=1 + # wait for triton server to start up + sleep 180s + - name: Test triton server + run: | + docker exec \ + --env no_proxy=localhost,127.0.0.1 \ + $CONTAINER_ID python3 .github/scripts/test_triton_server.py --port ${GRPC_PORT} + - name: Clear workfile + if: always() + run: | + export workdir=$(pwd) + docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf lmdeploy + mkdir $workdir + chmod -R 777 $workdir + docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf /root/workspace/workdir/${TB_MODEL} + docker stop $CONTAINER_ID diff --git a/docker/Dockerfile b/docker/Dockerfile index 1cc53d3888..36c3e24a55 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,7 @@ FROM nvcr.io/nvidia/tritonserver:22.12-py3 RUN rm /etc/apt/sources.list.d/cuda*.list && apt-get update && apt-get install -y --no-install-recommends \ - rapidjson-dev libgoogle-glog-dev gdb \ + rapidjson-dev libgoogle-glog-dev gdb libgl1 \ && rm -rf /var/lib/apt/lists/* RUN python3 -m pip install --no-cache-dir torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 diff --git a/requirements/test.txt b/requirements/test.txt index 7093b61a35..2125b2daaa 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,2 +1,5 @@ +allure-pytest coverage +pynvml pytest +pyyaml From f9ec9fc68b007038f88b53e9b354cadb4d2e2642 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Mon, 27 Nov 2023 11:50:27 +0800 Subject: [PATCH 2/4] update --- .github/workflows/test.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a855802189..6f052b0d04 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,12 +82,15 @@ jobs: -DCMAKE_CUDA_ARCHITECTURES=80 \ -DBUILD_TEST=OFF make -j$(nproc) && make install - - name: Install lmdeploy from source + - name: Install lmdeploy run: | python3 -m pip install packaging protobuf transformers_stream_generator python3 -m pip install -r requirements.txt -r requirements/test.txt python3 -m pip install . - # lmdeploy check_env + - name: Check env + run: | + python3 -m pip list + lmdeploy check_env - name: Test lmdeploy run: | echo "TODO: awaiting PR of adding autotest" @@ -194,7 +197,7 @@ jobs: --grpc-port=${GRPC_PORT} \ --log-verbose=0 \ --allow-metrics=1 - # wait for triton server to start up + # wait for triton server to fully start up sleep 180s - name: Test triton server run: | From 39fef8472c770533a0ac3b3da87e2d64c12b50c3 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 29 Nov 2023 08:22:16 +0800 Subject: [PATCH 3/4] revert changes in dockerfile --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 36c3e24a55..1cc53d3888 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,7 @@ FROM nvcr.io/nvidia/tritonserver:22.12-py3 RUN rm /etc/apt/sources.list.d/cuda*.list && apt-get update && apt-get install -y --no-install-recommends \ - rapidjson-dev libgoogle-glog-dev gdb libgl1 \ + rapidjson-dev libgoogle-glog-dev gdb \ && rm -rf /var/lib/apt/lists/* RUN python3 -m pip install --no-cache-dir torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 From e43c9ffb2ab26a3c7188d687749fcd50a92a29ce Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 29 Nov 2023 12:01:14 +0800 Subject: [PATCH 4/4] update prompts --- .github/scripts/test_triton_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/test_triton_server.py b/.github/scripts/test_triton_server.py index 0e94de1579..a5146b150c 100644 --- a/.github/scripts/test_triton_server.py +++ b/.github/scripts/test_triton_server.py @@ -29,11 +29,11 @@ def test(port=33337): keywords=['您好', '你好'], ), dict( - prompts='How many days does a week have?', - keywords=['seven'], + prompts='How many days does a week have? ', + keywords=['seven', '7'], ), dict( - prompts='一周有多少天', + prompts='一周有多少天?请用中文回答。', keywords=['七天', '7天'], ), ]