diff --git a/.github/scripts/action_tools.py b/.github/scripts/action_tools.py
index 8bc42899a3..49226f02f2 100644
--- a/.github/scripts/action_tools.py
+++ b/.github/scripts/action_tools.py
@@ -163,7 +163,7 @@ def evaluate(models: List[str], datasets: List[str], workspace: str):
 
         work_dir = os.path.join(workspace, target_model)
         cmd_eval = [
-            f'python3 {opencompass_dir}/run.py {config_path_new} -w {work_dir} --max-num-workers 8'  # noqa: E501
+            f'python3 {opencompass_dir}/run.py {config_path_new} -w {work_dir} --reuse --max-num-workers 8'  # noqa: E501
         ]
         eval_log = os.path.join(workspace, f'eval.{ori_model}.txt')
         ret = run_cmd(cmd_eval, log_path=eval_log, cwd=lmdeploy_dir)
diff --git a/.github/scripts/eval_opencompass_config.py b/.github/scripts/eval_opencompass_config.py
index 13bb2ba001..f2fe134503 100644
--- a/.github/scripts/eval_opencompass_config.py
+++ b/.github/scripts/eval_opencompass_config.py
@@ -1,5 +1,6 @@
 from mmengine.config import read_base
-from opencompass.models import LmdeployPytorchModel, TurboMindModel
+from opencompass.models import (LmdeployPytorchModel, TurboMindModel,
+                                TurboMindModelwithChatTemplate)
 
 with read_base():
     # choose a list of datasets
@@ -32,10 +33,6 @@
         nq_datasets  # noqa: F401, E501
     from .datasets.race.race_gen_69ee4f import \
         race_datasets  # noqa: F401, E501
-    from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import \
-        WiC_datasets  # noqa: F401, E501
-    from .datasets.SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import \
-        WSC_datasets  # noqa: F401, E501
     from .datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
         TheoremQA_datasets  # noqa: F401, E501
     from .datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import \
@@ -270,6 +267,24 @@
 run_cfg_tp1_template = dict(num_gpus=1, num_procs=1)
 run_cfg_tp2_template = dict(num_gpus=2, num_procs=1)
 
+engine_config_template_max_bs_128 = dict(session_len=MAX_SESSION_LEN,
+                                         max_batch_size=128)
+engine_config_template_max_bs_128_awq = dict(session_len=MAX_SESSION_LEN,
+                                             max_batch_size=128,
+                                             model_format='awq')
+engine_config_template_max_bs_128_kvint4 = dict(session_len=MAX_SESSION_LEN,
+                                                max_batch_size=128,
+                                                quant_policy=4)
+engine_config_template_max_bs_128_tp2 = dict(session_len=MAX_SESSION_LEN,
+                                             max_batch_size=128,
+                                             tp=2)
+engine_config_template_max_bs_128_awq_tp2 = dict(session_len=MAX_SESSION_LEN,
+                                                 max_batch_size=128,
+                                                 model_format='awq',
+                                                 tp=2)
+engine_config_template_max_bs_128_kvint4_tp2 = dict(
+    session_len=MAX_SESSION_LEN, max_batch_size=128, quant_policy=4, tp=2)
+
 # ===== Configs for internlm/internlm-chat-7b =====
 # config for internlm-chat-7b
 tb_internlm_chat_7b = dict(type=TurboMindModel,
@@ -359,122 +374,106 @@
     end_str='<eoa>')
 
 # ===== Configs for internlm/internlm2-chat-7b =====
-# config for internlm2-chat-7b
-tb_internlm2_chat_7b = dict(type=TurboMindModel,
-                            abbr='internlm2-chat-7b-turbomind',
-                            path='internlm/internlm2-chat-7b',
-                            engine_config=tb_engine_config_template_max_bs_128,
-                            gen_config=gen_config_template,
-                            max_out_len=MAX_NEW_TOKENS,
-                            max_seq_len=MAX_SESSION_LEN,
-                            batch_size=128,
-                            concurrency=128,
-                            meta_template=internlm2_meta_template,
-                            run_cfg=run_cfg_tp1_template,
-                            end_str='<|im_end|>')
+tb_internlm2_chat_7b = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_7b',
+    path='internlm/internlm2-chat-7b',
+    engine_config=engine_config_template_max_bs_128,
+    gen_config=gen_config_template,
+    max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
+    batch_size=128,
+    run_cfg=dict(num_gpus=1),
+)
 
-# config for internlm2-chat-7b-w4
 tb_internlm2_chat_7b_w4a16 = dict(
-    type=TurboMindModel,
-    abbr='internlm2-chat-7b-4bits-turbomind',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_7b_w4a16',
     path='internlm/internlm2-chat-7b-inner-4bits',
-    engine_config=tb_awq_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_awq,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=1),
+)
 
 tb_internlm2_chat_7b_kvint4 = dict(
-    type=TurboMindModel,
-    abbr='internlm2-chat-7b-turbomind-kvint4',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_7b_kvint4',
     path='internlm/internlm2-chat-7b',
-    engine_config=tb_kvint4_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_kvint4,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=1),
+)
 
 # config for pt internlm-chat-7b
 pt_internlm2_chat_7b = dict(type=LmdeployPytorchModel,
                             abbr='internlm2-chat-7b-pytorch',
                             path='internlm/internlm2-chat-7b',
-                            engine_config=pt_engine_config_template_max_bs_128,
+                            engine_config=pt_engine_config_template_max_bs_64,
                             gen_config=gen_config_template,
                             max_out_len=MAX_NEW_TOKENS,
                             max_seq_len=MAX_SESSION_LEN,
-                            batch_size=128,
-                            concurrency=128,
+                            batch_size=64,
+                            concurrency=64,
                             meta_template=internlm2_meta_template,
                             run_cfg=run_cfg_tp1_template,
                             end_str='<|im_end|>')
 
 # ===== Configs for internlm/internlm2-chat-20b =====
-# config for internlm2-chat-20b
 tb_internlm2_chat_20b = dict(
-    type=TurboMindModel,
-    abbr='internlm2-chat-20b-turbomind',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_20b',
     path='internlm/internlm2-chat-20b',
-    engine_config=tb_engine_config_template_max_bs_128_tp2,
+    engine_config=engine_config_template_max_bs_128_tp2,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp2_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=2),
+)
 
-# config for internlm2-chat-20b-w4 model
 tb_internlm2_chat_20b_w4a16 = dict(
-    type=TurboMindModel,
-    abbr='internlm2-chat-20b-4bits-turbomind',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_20b_w4a16',
     path='internlm/internlm2-chat-20b-inner-4bits',
-    engine_config=tb_awq_engine_config_template_max_bs_128_tp2,
+    engine_config=engine_config_template_max_bs_128_awq_tp2,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp2_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=2),
+)
 
-# config for internlm2-chat-20b-w4 model
 tb_internlm2_chat_20b_kvint4 = dict(
-    type=TurboMindModel,
-    abbr='internlm2-chat-20b-turbomind-kvint4',
-    path='internlm/internlm2-chat-20b-inner-4bits',
-    engine_config=tb_kvint4_engine_config_template_max_bs_128_tp2,
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_internlm2_chat_7b_kvint4',
+    path='internlm/internlm2-chat-20b',
+    engine_config=engine_config_template_max_bs_128_kvint4_tp2,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp2_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=2),
+)
 
 # config for pt internlm-chat-20b
 pt_internlm2_chat_20b = dict(
     type=LmdeployPytorchModel,
     abbr='internlm2-chat-20b-pytorch',
     path='internlm/internlm2-chat-20b',
-    engine_config=pt_engine_config_template_max_bs_64_prefill,
+    engine_config=pt_engine_config_template_max_bs_64_prefill_tp2,
     gen_config=gen_config_template,
     max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
     batch_size=64,
     concurrency=64,
     meta_template=internlm2_meta_template,
-    run_cfg=run_cfg_tp1_template,
+    run_cfg=run_cfg_tp2_template,
     end_str='<|im_end|>')
 
 # ===== Configs for Qwen/Qwen-7B-Chat =====
@@ -624,48 +623,43 @@
     end_str='</s>')
 
 # ===== Configs for Qwen/Qwen1.5-7B-Chat =====
-tb_qwen1_5_chat_7b = dict(type=TurboMindModel,
-                          abbr='qwen1.5-7b-chat-turbomind',
-                          path='Qwen/Qwen1.5-7B-Chat',
-                          engine_config=tb_engine_config_template_max_bs_128,
-                          gen_config=gen_config_template,
-                          max_out_len=MAX_NEW_TOKENS,
-                          max_seq_len=MAX_SESSION_LEN,
-                          batch_size=128,
-                          concurrency=128,
-                          meta_template=qwen1_5_meta_template,
-                          run_cfg=run_cfg_tp1_template,
-                          end_str='<|im_end|>')
+tb_qwen1_5_7b_chat = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen1_5_7b_chat',
+    path='Qwen/Qwen1.5-7B-Chat',
+    engine_config=engine_config_template_max_bs_128,
+    gen_config=gen_config_template,
+    max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
+    batch_size=128,
+    run_cfg=dict(num_gpus=1),
+)
 
-tb_qwen1_5_chat_7b_w4a16 = dict(
-    type=TurboMindModel,
-    abbr='qwen1.5-7b-chat-4bits-turbomind',
+tb_qwen1_5_7b_chat_w4a16 = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen1_5_7b_chat_w4a16',
     path='Qwen/Qwen1.5-7B-Chat-inner-4bits',
-    engine_config=tb_awq_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_awq,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=qwen1_5_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=1),
+)
 
-tb_qwen1_5_chat_7b_kvint4 = dict(
-    type=TurboMindModel,
-    abbr='qwen1.5-7b-chat-turbomind-kvint4',
+tb_qwen1_5_7b_chat_kvint4 = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen1_5_7b_chat_kvint4',
     path='Qwen/Qwen1.5-7B-Chat',
-    engine_config=tb_kvint4_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_kvint4,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=qwen1_5_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='<|im_end|>')
+    run_cfg=dict(num_gpus=1),
+)
 
-pt_qwen1_5_chat_7b = dict(type=LmdeployPytorchModel,
+pt_qwen1_5_7b_chat = dict(type=LmdeployPytorchModel,
                           abbr='qwen1.5-7b-chat-pytorch',
                           path='Qwen/Qwen1.5-7B-Chat',
                           engine_config=pt_engine_config_template_max_bs_128,
@@ -692,6 +686,43 @@
     run_cfg=run_cfg_tp1_template,
     end_str='<|im_end|>')
 
+# ===== Configs for Qwen/Qwen2-7B-Instruct =====
+tb_qwen2_7b_instruct = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen2_7b_instruct',
+    path='Qwen/Qwen2-7B-Instruct',
+    engine_config=engine_config_template_max_bs_128,
+    gen_config=gen_config_template,
+    max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
+    batch_size=128,
+    run_cfg=dict(num_gpus=1),
+)
+
+tb_qwen2_7b_instruct_w4a16 = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen2_7b_instruct_w4a16',
+    path='Qwen/Qwen2-7B-Instruct-inner-4bits',
+    engine_config=engine_config_template_max_bs_128_awq,
+    gen_config=gen_config_template,
+    max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
+    batch_size=128,
+    run_cfg=dict(num_gpus=1),
+)
+
+tb_qwen2_7b_instruct_kvint4 = dict(
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_qwen2_7b_instruct_kvint4',
+    path='Qwen/Qwen2-7B-Instruct',
+    engine_config=engine_config_template_max_bs_128_kvint4,
+    gen_config=gen_config_template,
+    max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
+    batch_size=128,
+    run_cfg=dict(num_gpus=1),
+)
+
 # ===== Configs for google/gemma-7b-it =====
 pt_gemma_chat_7b = dict(type=LmdeployPytorchModel,
                         abbr='gemma-7b-it-pytorch',
@@ -707,48 +738,44 @@
                         end_str='<end_of_turn>')
 
 # ===== Configs for meta-llama/Meta-Llama-3-8B-Instruct =====
-# config for llama-3-8b-instruct turbomind
 tb_llama_3_8b_instruct = dict(
-    type=TurboMindModel,
-    abbr='llama-3-8b-instruct-turbomind',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_llama_3_8b_instruct',
     path='meta-llama/Meta-Llama-3-8B-Instruct',
-    engine_config=tb_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=llama3_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='[INST]')
+    run_cfg=dict(num_gpus=1),
+    stop_words=['<|eot_id|>', '<|end_of_text|>'],
+)
 
 tb_llama_3_8b_instruct_w4a16 = dict(
-    type=TurboMindModel,
-    abbr='llama-3-8b-instruct-4bits-turbomind',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_llama_3_8b_instruct_w4a16',
     path='meta-llama/Meta-Llama-3-8B-Instruct-inner-4bits',
-    engine_config=tb_awq_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_awq,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=llama3_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='[INST]')
+    run_cfg=dict(num_gpus=1),
+    stop_words=['<|eot_id|>', '<|end_of_text|>'],
+)
 
 tb_llama_3_8b_instruct_kvint4 = dict(
-    type=TurboMindModel,
-    abbr='llama-3-8b-instruct-turbomind-kvint4',
+    type=TurboMindModelwithChatTemplate,
+    abbr='tb_llama_3_8b_instruct_kvint4',
     path='meta-llama/Meta-Llama-3-8B-Instruct',
-    engine_config=tb_kvint4_engine_config_template_max_bs_128,
+    engine_config=engine_config_template_max_bs_128_kvint4,
     gen_config=gen_config_template,
-    max_out_len=MAX_NEW_TOKENS,
     max_seq_len=MAX_SESSION_LEN,
+    max_out_len=MAX_NEW_TOKENS,
     batch_size=128,
-    concurrency=128,
-    meta_template=llama3_meta_template,
-    run_cfg=run_cfg_tp1_template,
-    end_str='[INST]')
+    run_cfg=dict(num_gpus=1),
+    stop_words=['<|eot_id|>', '<|end_of_text|>'],
+)
 
 # config for llama-3-8b-instruct pytorch
 pt_llama_3_8b_instruct = dict(
diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index aa5e358341..56033c5c60 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -17,12 +17,17 @@ on:
         required: true
         description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
         type: string
-        default: '[tb_internlm2_chat_7b,tb_internlm2_chat_20b,tb_internlm2_chat_20b_w4a16,tb_llama2_chat_7b,tb_qwen1_5_chat_7b,tb_llama_3_8b_instruct,pt_internlm2_chat_7b,pt_internlm2_chat_20b,pt_llama2_chat_7b,pt_qwen1_5_chat_7b,pt_qwen1_5_moe_2_7b_chat,pt_llama_3_8b_instruct,tb_internlm2_chat_7b_kvint4,tb_internlm2_chat_20b_kvint4,tb_qwen1_5_chat_7b_kvint4,tb_llama_3_8b_instruct_kvint4]'
+        default: '[tb_internlm2_chat_7b,tb_internlm2_chat_7b_w4a16,tb_internlm2_chat_7b_kvint4,pt_internlm2_chat_7b,tb_internlm2_chat_20b,tb_internlm2_chat_20b_w4a16,tb_internlm2_chat_20b_kvint4,pt_internlm2_chat_20b,tb_llama_3_8b_instruct,tb_llama_3_8b_instruct_w4a16,tb_llama_3_8b_instruct_kvint4,tb_qwen1_5_7b_chat,tb_qwen1_5_7b_chat_kvint4,tb_qwen1_5_7b_chat_w4a16,pt_qwen1_5_7b_chat,pt_qwen1_5_moe_2_7b_chat,tb_qwen2_7b_instruct,tb_qwen2_7b_instruct_w4a16,tb_qwen2_7b_instruct_kvint4]'
       datasets:
         required: true
-        description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*WiC_datasets,*WSC_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
+        description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
         type: string
         default: '[*mmlu_datasets, *gsm8k_datasets]'
+      local_config:
+        required: true
+        description: 'Whether use local eval config'
+        type: boolean
+        default: false
       devices:
         required: true
         description: 'CUDA_VISIBLE_DEVICES.'
@@ -129,6 +134,9 @@ jobs:
         run: |
           ln -s /root/opencompass-data ./data
           python3 .github/scripts/action_tools.py create_model_links /root/models .
+      - name: Use local config
+        if: ${{inputs.local_config}}
+        run: cp /nvme/qa_test_models/offline_pkg/eval_config.py .github/scripts/eval_opencompass_config.py
       - name: Evaluate models
         run: |
           echo ${{github.event.inputs.models}}
@@ -139,7 +147,7 @@ jobs:
           python3 .github/scripts/action_tools.py evaluate \
             --models "${{github.event.inputs.models}}" \
             --datasets "${{github.event.inputs.datasets}}" \
-            --workspace /root/evaluation-reports/$TIME_STAMP
+            --workspace /root/evaluation-reports/${{ github.run_id }}
       - name: Clear workspace
         if: always()
         run: |