-
Notifications
You must be signed in to change notification settings - Fork 448
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add triton server test and workflow yml
- Loading branch information
1 parent
c07f60f
commit 74d77ba
Showing
4 changed files
with
289 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
import time | ||
from subprocess import PIPE, Popen | ||
|
||
import fire | ||
|
||
|
||
def parse_dialogue(inputs: str): | ||
sep = 'double enter to end input >>>' | ||
dialogues = inputs.strip() | ||
if dialogues.endswith(sep): | ||
dialogues = dialogues[:-len(sep)] | ||
dialogues = dialogues.strip() | ||
dialogues = dialogues.split(sep) | ||
dialogues = [d.strip() for d in dialogues] | ||
return dialogues[1:] | ||
|
||
|
||
def test(port=33337): | ||
cmd = [f'lmdeploy serve triton_client localhost:{port}'] | ||
|
||
test_cases = [ | ||
dict( | ||
prompts='Hello! Please answer in English.', | ||
keywords=['Hello', 'hi'], | ||
), | ||
dict( | ||
prompts='您好! 请用中文回答。', | ||
keywords=['您好', '你好'], | ||
), | ||
dict( | ||
prompts='How many days does a week have?', | ||
keywords=['seven'], | ||
), | ||
dict( | ||
prompts='一周有多少天', | ||
keywords=['七天', '7天'], | ||
), | ||
] | ||
|
||
sep = '\n\n' | ||
end = sep + 'exit\n\n\n' | ||
all_pass = True | ||
for cases in test_cases: | ||
quest = cases['prompts'] | ||
keywords = cases['keywords'] | ||
inputs = quest + end | ||
print(f'Test Input prompts: {quest}\nKey words: {keywords}') | ||
time.sleep(5) | ||
|
||
with Popen(cmd, | ||
stdin=PIPE, | ||
stdout=PIPE, | ||
stderr=PIPE, | ||
shell=True, | ||
text=True, | ||
encoding='utf-8') as proc: | ||
out, err = proc.communicate(input=inputs) | ||
print(f'Output: {out}') | ||
if proc.returncode == 0: | ||
out = parse_dialogue(out)[0] | ||
success = any([k in out for k in keywords]) | ||
if not success: | ||
print(f'>>> Failed to output keywords: {out} {keywords}') | ||
all_pass = False | ||
else: | ||
all_pass = False | ||
print(f'Failed to get outputs: {out} {err}') | ||
assert all_pass, 'Tests failed!' | ||
|
||
|
||
if __name__ == '__main__': | ||
fire.Fire(test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
name: test | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- ".github/scripts/test_triton_server.py" | ||
- ".github/workflows/test.yml" | ||
- "cmake/**" | ||
- "src/**" | ||
- "3rdparty/**" | ||
- "lmdeploy/**" | ||
- "requirements/**" | ||
- "requirements.txt" | ||
- "CMakeLists.txt" | ||
- "setup.py" | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- "lmdeploy/version.py" | ||
tags: | ||
- "v*.*.*" | ||
|
||
workflow_dispatch: | ||
inputs: | ||
markers: | ||
required: false | ||
description: 'Tested markers. eg: "-m internlm_chat_7b"' | ||
type: string | ||
default: '' | ||
|
||
env: | ||
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | ||
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | ||
|
||
|
||
jobs: | ||
test_functions: | ||
runs-on: [self-hosted, linux-a100] | ||
timeout-minutes: 4320 # 72hours | ||
environment: 'prod' | ||
env: | ||
REPORT_DIR: /nvme/qa_test_models/test-reports | ||
container: | ||
image: nvcr.io/nvidia/tritonserver:22.12-py3 | ||
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip" | ||
volumes: | ||
- /nvme/github-actions/pip-cache:/root/.cache/pip | ||
- /nvme/github-actions/packages:/root/packages | ||
- /nvme/qa_test_models:/nvme/qa_test_models | ||
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | ||
steps: | ||
- name: Setup systems | ||
run: | | ||
rm /etc/apt/sources.list.d/cuda*.list | ||
apt-get update && apt-get install -y --no-install-recommends rapidjson-dev \ | ||
libgoogle-glog-dev libgl1 openjdk-8-jre-headless | ||
dpkg -i /root/packages/allure_2.24.1-1_all.deb | ||
rm -rf /var/lib/apt/lists/* | ||
- name: Clone repository | ||
uses: actions/checkout@v2 | ||
- name: Install pytorch | ||
run: | | ||
python3 -m pip cache dir | ||
python3 -m pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 | ||
- name: Build lmdeploy | ||
run: | | ||
python3 -m pip install cmake | ||
python3 -m pip install -r requirements/build.txt | ||
# use cached build | ||
cp -r ../../build build | ||
cd build | ||
cmake .. \ | ||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | ||
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ | ||
-DCMAKE_INSTALL_PREFIX=./install \ | ||
-DBUILD_PY_FFI=ON \ | ||
-DBUILD_MULTI_GPU=ON \ | ||
-DCMAKE_CUDA_FLAGS="-lineinfo" \ | ||
-DUSE_NVTX=ON \ | ||
-DSM=80 \ | ||
-DCMAKE_CUDA_ARCHITECTURES=80 \ | ||
-DBUILD_TEST=OFF | ||
make -j$(nproc) && make install | ||
- name: Install lmdeploy from source | ||
run: | | ||
python3 -m pip install packaging protobuf transformers_stream_generator | ||
python3 -m pip install -r requirements.txt -r requirements/test.txt | ||
python3 -m pip install . | ||
# lmdeploy check_env | ||
- name: Test lmdeploy | ||
run: | | ||
echo "TODO: awaiting PR of adding autotest | ||
# pytest autotest ${{github.event.inputs.markers}} --alluredir=allure-results --clean-alluredir | ||
- name: Generate reports | ||
if: always() | ||
run: | | ||
if test -D "allure-results"; then | ||
export date_today="$(date +'%Y%m%d-%H%M%S')" | ||
export report_dir="$REPORT_DIR/$date_today" | ||
echo "Save report to $ALLURE_DIR" | ||
allure generate -c -o $report_dir | ||
fi | ||
- name: Clear workfile | ||
if: always() | ||
run: | | ||
export workdir=$(pwd) | ||
cd .. | ||
rm -rf $workdir | ||
mkdir $workdir | ||
chmod -R 777 $workdir | ||
test_triton: | ||
runs-on: [self-hosted, linux-a100] | ||
timeout-minutes: 4320 # 72hours | ||
environment: 'prod' | ||
env: | ||
HF_MODEL: /nvme/qa_test_models/internlm-chat-20b | ||
WORKDIR: /nvme/qa_test_models/triton_workspace | ||
TB_MODEL: internlm-chat-20b-fp16-tp2 | ||
GRPC_PORT: 33337 | ||
steps: | ||
- name: Clone repository | ||
uses: actions/checkout@v2 | ||
- name: Create test container | ||
run: | | ||
export CONTAINER_ID=$(docker create \ | ||
--rm \ | ||
--gpus='"device=0,1"' \ | ||
--shm-size 16g \ | ||
--cap-add=SYS_PTRACE \ | ||
--cap-add=SYS_ADMIN \ | ||
--security-opt seccomp=unconfined \ | ||
--name lmdeploy-ci-triton \ | ||
--workdir /__w/lmdeploy/lmdeploy \ | ||
--env PIP_CACHE_DIR=/root/.cache/pip \ | ||
--env NCCL_LAUNCH_MODE=GROUP \ | ||
-v $(pwd)/../../:/__w \ | ||
-v ${HF_MODEL}:/root/workspace/hf_model \ | ||
-v ${WORKDIR}:/root/workspace/workdir \ | ||
-v ${HOST_PIP_CACHE_DIR}:/root/.cache/pip \ | ||
-v ${HOST_LOCALTIME}:/etc/localtime:ro \ | ||
openmmlab/lmdeploy:latest tail -f /dev/null \ | ||
) | ||
docker start $CONTAINER_ID | ||
echo "CONTAINER_ID=$CONTAINER_ID" | ||
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | ||
- name: Build lmdeploy from source | ||
run: | | ||
docker exec $CONTAINER_ID cp -r ../../build build | ||
docker exec --workdir /__w/lmdeploy/lmdeploy/build \ | ||
--env http_proxy=${{secrets.PROXY}} \ | ||
--env https_proxy=${{secrets.PROXY}} \ | ||
$CONTAINER_ID cmake .. \ | ||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | ||
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ | ||
-DCMAKE_INSTALL_PREFIX=./install \ | ||
-DBUILD_PY_FFI=ON \ | ||
-DBUILD_MULTI_GPU=ON \ | ||
-DCMAKE_CUDA_FLAGS="-lineinfo" \ | ||
-DUSE_NVTX=ON \ | ||
-DSM=80 \ | ||
-DCMAKE_CUDA_ARCHITECTURES=80 \ | ||
-DBUILD_TEST=OFF | ||
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make -j$(nproc) | ||
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make install | ||
- name: Install lmdeploy | ||
run: | | ||
docker exec \ | ||
--env http_proxy=${{secrets.PROXY}} \ | ||
--env https_proxy=${{secrets.PROXY}} \ | ||
$CONTAINER_ID python3 -m pip install tritonclient[grpc] | ||
docker exec \ | ||
--env http_proxy=${{secrets.PROXY}} \ | ||
--env https_proxy=${{secrets.PROXY}} \ | ||
$CONTAINER_ID python3 -m pip install -r requirements/test.txt | ||
docker exec $CONTAINER_ID python3 -m pip install . | ||
# docker exec $CONTAINER_ID check_env | ||
- name: Convert to turbomind model | ||
run: | | ||
docker exec $CONTAINER_ID \ | ||
lmdeploy convert \ | ||
--model-name internlm-chat-20b \ | ||
--model-path /root/workspace/hf_model \ | ||
--tp 2 \ | ||
--dst-path /root/workspace/workdir/${TB_MODEL} | ||
- name: Start triton server service | ||
run: | | ||
docker exec --detach $CONTAINER_ID \ | ||
tritonserver \ | ||
--model-repository=/root/workspace/workdir/${TB_MODEL}/model_repository \ | ||
--allow-http=0 \ | ||
--allow-grpc=1 \ | ||
--grpc-port=${GRPC_PORT} \ | ||
--log-verbose=0 \ | ||
--allow-metrics=1 | ||
# wait for triton server to start up | ||
sleep 180s | ||
- name: Test triton server | ||
run: | | ||
docker exec \ | ||
--env no_proxy=localhost,127.0.0.1 \ | ||
$CONTAINER_ID python3 .github/scripts/test_triton_server.py --port ${GRPC_PORT} | ||
- name: Clear workfile | ||
if: always() | ||
run: | | ||
export workdir=$(pwd) | ||
docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf lmdeploy | ||
mkdir $workdir | ||
chmod -R 777 $workdir | ||
docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf /root/workspace/workdir/${TB_MODEL} | ||
docker stop $CONTAINER_ID |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
allure-pytest | ||
coverage | ||
pynvml | ||
pytest | ||
pyyaml |