From 0f86f227f9ec42c4f048564baa51c810462a5e35 Mon Sep 17 00:00:00 2001 From: irexyc Date: Wed, 29 Nov 2023 19:42:38 +0800 Subject: [PATCH] convert model with hf repo_id --- lmdeploy/cli/cli.py | 10 +++++++--- lmdeploy/turbomind/deploy/converter.py | 13 ++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lmdeploy/cli/cli.py b/lmdeploy/cli/cli.py index ab15cb46ad..12babfc751 100644 --- a/lmdeploy/cli/cli.py +++ b/lmdeploy/cli/cli.py @@ -23,13 +23,15 @@ def convert(self, dst_path: str = './workspace', tp: int = 1, quant_path: str = None, - group_size: int = 0): + group_size: int = 0, + **kwargs): """Convert LLMs to lmdeploy format. Args: model_name (str): The name of the to-be-deployed model, such as llama-7b, llama-13b, vicuna-7b and etc. - model_path (str): The directory path of the model + model_path (str): The directory path of the model or huggingface + repo_id like 'internlm/internlm-chat-20b' model_format (str): the format of the model, should choose from ['llama', 'hf', 'awq', None]. 'llama' stands for META's llama format, 'hf' means huggingface llama format, and 'awq' means @@ -43,6 +45,7 @@ def convert(self, quant_path (str): Path of the quantized model, which can be None. group_size (int): A parameter used in AWQ to quantize fp16 weights to 4 bits. + kwargs (dict): other params for convert """ from lmdeploy.turbomind.deploy.converter import main as convert @@ -53,7 +56,8 @@ def convert(self, dst_path=dst_path, tp=tp, quant_path=quant_path, - group_size=group_size) + group_size=group_size, + **kwargs) def list(self, engine: str = 'turbomind'): """List supported model names. diff --git a/lmdeploy/turbomind/deploy/converter.py b/lmdeploy/turbomind/deploy/converter.py index 4e6a03203e..5bcab7b537 100644 --- a/lmdeploy/turbomind/deploy/converter.py +++ b/lmdeploy/turbomind/deploy/converter.py @@ -6,8 +6,10 @@ from pathlib import Path import fire +from huggingface_hub import snapshot_download from lmdeploy.model import MODELS +from lmdeploy.turbomind.utils import create_hf_download_args from .source_model.base import INPUT_MODELS from .target_model.base import OUTPUT_MODELS, TurbomindModelConfig @@ -143,7 +145,8 @@ def main(model_name: str, dst_path: str = 'workspace', tp: int = 1, quant_path: str = None, - group_size: int = 0): + group_size: int = 0, + **kwargs): """deploy llama family models via turbomind. Args: @@ -162,6 +165,7 @@ def main(model_name: str, quant_path (str): Path of the quantized model, which can be None. group_size (int): a parameter used in AWQ to quantize fp16 weights to 4 bits + kwargs (dict): other params for convert """ assert model_name in MODELS.module_dict.keys(), \ @@ -184,6 +188,13 @@ def main(model_name: str, f'which is not in supported list {supported_keys}') exit(-1) + if not os.path.exists(model_path): + print(f'can\'t find model from local_path {model_path}, ' + 'try to download from huggingface') + download_kwargs = create_hf_download_args(**kwargs) + model_path = snapshot_download(model_path, **download_kwargs) + print(f'load model from {model_path}') + # get tokenizer path tokenizer_path = get_tokenizer_path(model_path, tokenizer_path)