Skip to content

Commit

Permalink
add more model into benchmark and evaluate workflow (#1565)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

---------

Co-authored-by: zhulin1 <[email protected]>
  • Loading branch information
zhulinJulia24 and zhulin1 authored May 13, 2024
1 parent 16878da commit ca4de27
Show file tree
Hide file tree
Showing 9 changed files with 603 additions and 470 deletions.
3 changes: 2 additions & 1 deletion .github/scripts/action_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _load_hf_results(test_results: dict, model_name: str):
return out


def evaluate(models: List[str], workspace: str):
def evaluate(models: List[str], datasets: List[str], workspace: str):
"""Evaluate models from lmdeploy using opencompass.
Args:
Expand Down Expand Up @@ -150,6 +150,7 @@ def evaluate(models: List[str], workspace: str):
continue
logging.info(f'Start evaluating {target_model} ...\\nn{model_cfg}\n\n')
with open(config_path_new, 'a') as f:
f.write(f'\ndatasets = {datasets}\n')
f.write(f'\nmodels = [ {target_model} ]\n')

work_dir = os.path.join(workspace, target_model)
Expand Down
969 changes: 542 additions & 427 deletions .github/scripts/eval_opencompass_config.py

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions .github/scripts/set_benchmark_param.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,24 @@ else
echo "MODEL_FORMAT=" >> "$GITHUB_ENV"
fi

if [[ $1 == *"llama"* ]] || [[ $1 == *"Llama"* ]]
if [[ $1 == *"llama2"* ]] || [[ $1 == *"Llama-2"* ]]
then
echo "MAX_ENTRY_COUNT=--cache-max-entry-count 0.95" >> "$GITHUB_ENV"

else
echo "MAX_ENTRY_COUNT=--cache-max-entry-count 0.90" >> "$GITHUB_ENV"
fi

if [[ $1 == *"Llama-2-13b"* ]]
then
echo "BATCHES=128" >> "$GITHUB_ENV"
echo "MAX_BATCH_SIZE=" >> "$GITHUB_ENV"
else
echo "BATCHES=128 256" >> "$GITHUB_ENV"
echo "MAX_BATCH_SIZE=--max-batch-size 256" >> "$GITHUB_ENV"
fi

if [[ $1 == *"internlm2-chat-20b"* ]]
if [[ $1 == *"internlm2-chat-20b"* ]] || [[ $1 == *"Qwen1.5-32B-Chat"* ]]
then
echo "TP_INFO=--tp 2" >> "$GITHUB_ENV"
fi
48 changes: 26 additions & 22 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ on:
required: true
description: 'Set models run benchmark'
type: string
default: "['internlm/internlm2-chat-20b','internlm/internlm2-chat-20b-inner-4bits','meta-llama/Llama-2-7b-chat-hf','meta-llama/Llama-2-7b-chat-hf-inner-4bits']"
default: "['internlm/internlm2-chat-20b','internlm/internlm2-chat-20b-inner-4bits','meta-llama/Llama-2-7b-chat-hf','meta-llama/Llama-2-7b-chat-hf-inner-4bits','meta-llama/Meta-Llama-3-8B-Instruct','Qwen/Qwen1.5-32B-Chat']"

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand All @@ -61,7 +61,7 @@ env:
REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
DATASET_FILE: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
TP_INFO: --tp 1
LOOP_NUM: 3
LOOP_NUM: 1
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas


Expand Down Expand Up @@ -115,7 +115,7 @@ jobs:
CUDA_VISIBLE_DEVICES: 6,7
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
Expand Down Expand Up @@ -211,7 +211,7 @@ jobs:
CUDA_VISIBLE_DEVICES: 4,5
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
Expand Down Expand Up @@ -268,8 +268,8 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
for batch in "${batches[@]}"
batches=($BATCHES)
for batch in ${batches[@]}
do
for ((i=1; i<=$LOOP_NUM; i++))
do
Expand All @@ -283,8 +283,8 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
for batch in "${batches[@]}"
batches=($BATCHES)
for batch in ${batches[@]}
do
for ((i=1; i<=$LOOP_NUM; i++))
do
Expand All @@ -298,8 +298,8 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
for batch in "${batches[@]}"
batches=($BATCHES)
for batch in ${batches[@]}
do
for ((i=1; i<=$LOOP_NUM; i++))
do
Expand All @@ -313,8 +313,8 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
for batch in "${batches[@]}"
batches=($BATCHES)
for batch in ${batches[@]}
do
for ((i=1; i<=$LOOP_NUM; i++))
do
Expand Down Expand Up @@ -352,7 +352,7 @@ jobs:
CUDA_VISIBLE_DEVICES: 6,7
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
Expand Down Expand Up @@ -405,7 +405,7 @@ jobs:
- name: Start restful api turbomind
if: contains(fromJSON(github.event.inputs.backend), 'turbomind')
run: |
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MODEL_FORMAT $TP_INFO --log-level ${{inputs.log_level}} > turbomind_run.log 2>&1 &
lmdeploy serve api_server $MODEL_PATH $MAX_ENTRY_COUNT $MAX_BATCH_SIZE $MODEL_FORMAT $TP_INFO --log-level ${{inputs.log_level}} > turbomind_run.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 180s
- name: Run restful benchmark
Expand All @@ -415,8 +415,8 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
for batch in "${batches[@]}"
batches=($BATCHES)
for batch in ${batches[@]}
do
for ((i=1; i<=$LOOP_NUM; i++))
do
Expand All @@ -441,7 +441,7 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
batches=($BATCHES)
for batch in "${batches[@]}"
do
for ((i=1; i<=$LOOP_NUM; i++))
Expand All @@ -466,7 +466,7 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
batches=($BATCHES)
for batch in "${batches[@]}"
do
for ((i=1; i<=$LOOP_NUM; i++))
Expand All @@ -491,7 +491,7 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir}
batches=(128 256)
batches=($BATCHES)
for batch in "${batches[@]}"
do
for ((i=1; i<=$LOOP_NUM; i++))
Expand Down Expand Up @@ -544,7 +544,11 @@ jobs:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Set params
if: (contains( matrix.model, 'internlm2-chat-20b'))
run: |
chmod +x .github/scripts/set_benchmark_param.sh
.github/scripts/set_benchmark_param.sh ${{matrix.model}}
- name: Set params - cuda allocate
if: contains( env.TP_INFO, '--tp 2')
run: |
echo 'DEVICE="device=4,5"' >> "$GITHUB_ENV"
- name: Create test container
Expand All @@ -560,6 +564,7 @@ jobs:
--name "lmdeploy-ci-triton-$GITHUB_RUN_ID-$date_today" \
--workdir /__w/lmdeploy/lmdeploy \
--env NCCL_LAUNCH_MODE=GROUP \
--pull never \
-v $(pwd)/../../:/__w \
-v ${MODEL_PATH}:${MODEL_PATH} \
-v ${WORKDIR}:/root/workspace/workdir \
Expand All @@ -575,7 +580,6 @@ jobs:
- name: Build lmdeploy from source
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
docker exec $CONTAINER_ID sed -i 's/https:\/\/github.com\/NVIDIA\/cutlass.git/https:\/\/521github.com\/extdomains\/github.com\/NVIDIA\/cutlass.git/g' CMakeLists.txt
docker exec $CONTAINER_ID mkdir build
docker exec --workdir /__w/lmdeploy/lmdeploy/build \
--env http_proxy=${{secrets.PROXY}} \
Expand Down Expand Up @@ -664,7 +668,7 @@ jobs:
run: |
rm -rf ${result_dir}
mkdir ${result_dir} -p
batches=(128 256)
batches=($BATCHES)
for batch in "${batches[@]}"
do
for ((i=1; i<=$LOOP_NUM; i++))
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ on:
type: boolean
default: true
schedule:
- cron: '00 20 * * 1-5'
- cron: '00 20 * * 0-4'

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
Expand Down Expand Up @@ -293,6 +293,7 @@ jobs:
--name "lmdeploy-ci-triton-$GITHUB_RUN_ID" \
--workdir /__w/lmdeploy/lmdeploy \
--env NCCL_LAUNCH_MODE=GROUP \
--pull never \
-v $(pwd)/../../:/__w \
-v ${HF_MODEL}:/root/workspace/hf_model \
-v ${WORKDIR}:/root/workspace/workdir \
Expand Down Expand Up @@ -431,7 +432,7 @@ jobs:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/evaluate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,32 @@ on:
required: true
description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
type: string
default: '[internlm2_chat_7b,internlm2_chat_20b,internlm2_chat_20b_w4a16,llama2_chat_7b,qwen_chat_7b]'
default: '[tb_internlm2_chat_7b,tb_internlm2_chat_20b,tb_internlm2_chat_20b_w4a16,tb_llama2_chat_7b,tb_qwen1_5_chat_7b,tb_llama_3_8b_instruct,pt_internlm2_chat_7b,pt_internlm2_chat_20b,pt_llama2_chat_7b,pt_qwen1_5_chat_7b,pt_qwen1_5_moe_2_7b_chat,pt_llama_3_8b_instruct,tb_internlm2_chat_7b_kvint4,tb_internlm2_chat_20b_kvint4,tb_qwen1_5_chat_7b_kvint4,tb_llama_3_8b_instruct_kvint4]'
datasets:
required: true
description: 'Tested datasets list. eg. [*mmlu_datasets, *ceval_datasets, *WiC_datasets, *WSC_datasets, *triviaqa_datasets, *gsm8k_datasets, *race_datasets, *crowspairs_datasets]'
type: string
default: '[*mmlu_datasets, *gsm8k_datasets]'
devices:
required: true
description: 'CUDA_VISIBLE_DEVICES.'
type: string
default: '0,1,2,3,4,5,6,7'


jobs:
evaluate:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 4320 # 72hours
environment: 'prod'
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/github-actions/evaluation-reports:/root/evaluation-reports
- /nvme/github-actions/opencompass-data:/root/opencompass-data
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
- /nvme/qa_test_models:/root/models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
Expand Down Expand Up @@ -102,6 +107,7 @@ jobs:
python3 .github/scripts/action_tools.py evaluate \
--models "${{github.event.inputs.models}}" \
--datasets "${{github.event.inputs.datasets}}" \
--workspace /root/evaluation-reports/$TIME_STAMP
- name: Clear workspace
if: always()
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/pr_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
image: nvcr.io/nvidia/tritonserver:24.03-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/packages:/root/packages
Expand All @@ -56,7 +56,6 @@ jobs:
run: |
python3 -m pip install cmake
python3 -m pip install -r requirements/build.txt
sed -i 's/https:\/\/github.com\/NVIDIA\/cutlass.git/https:\/\/521github.com\/extdomains\/github.com\/NVIDIA\/cutlass.git/g' CMakeLists.txt
mkdir build
cd build
cmake .. \
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
timeout-minutes: 4320 # 72hours
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3"
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never"
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/packages:/root/packages
Expand All @@ -58,7 +58,6 @@ jobs:
run: |
python3 -m pip install cmake
python3 -m pip install -r requirements/build.txt
sed -i 's/https:\/\/github.com\/NVIDIA\/cutlass.git/https:\/\/521github.com\/extdomains\/github.com\/NVIDIA\/cutlass.git/g' CMakeLists.txt
mkdir build
cd build
cmake .. \
Expand Down
Loading

0 comments on commit ca4de27

Please sign in to comment.