Skip to content

Commit

Permalink
remove unused code again
Browse files Browse the repository at this point in the history
  • Loading branch information
MeouSker77 committed Dec 27, 2024
1 parent 34dbdb8 commit cc8ee22
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 171 deletions.
5 changes: 2 additions & 3 deletions python/llm/example/GPU/LLM-Finetuning/LISA/lisa_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def train(
load_in_low_bit="bf16",
optimize_model=True,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
enable_xetla=False
trust_remote_code=True
)

model = model.to("xpu")
Expand Down Expand Up @@ -156,7 +155,7 @@ def train(
callbacks=trainer_callbacks
)
model.config.use_cache = False

trainer.train(resume_from_checkpoint=resume_from_checkpoint)

# model.save_pretrained(output_dir)
Expand Down
3 changes: 1 addition & 2 deletions python/llm/src/ipex_llm/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,7 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True, modules_to_not_
optimize_model=optimize_llm,
modules_to_not_convert=modules_to_not_convert,
cpu_embedding=cpu_embedding,
lightweight_bmm=lightweight_bmm,
enable_xetla=kwargs.pop("enable_xetla", False))
lightweight_bmm=lightweight_bmm)
# add save_low_bit to pretrained model dynamically
import types
model._bigdl_config = dict()
Expand Down
17 changes: 1 addition & 16 deletions python/llm/src/ipex_llm/transformers/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def is_linear_module(module):


def convert_vllm(module, qtype, in_features, out_features, mp_group, cur_qtype,
enable_xetla, optimize_lm_head, enable_scale_search):
optimize_lm_head, enable_scale_search):
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from ipex_llm.transformers.low_bit_linear import LowBitLinear, \
FP16Linear, BF16Linear, vLLMLowBitLinear, vLLMFP16Linear, vLLMBF16Linear
Expand Down Expand Up @@ -261,7 +261,6 @@ def convert_vllm(module, qtype, in_features, out_features, mp_group, cur_qtype,
cur_qtype,
module.bias is not None,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=optimize_lm_head,
enable_scale_search=enable_scale_search,
)
Expand Down Expand Up @@ -289,7 +288,6 @@ def convert_vllm(module, qtype, in_features, out_features, mp_group, cur_qtype,
cur_qtype,
module.bias is not None,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=optimize_lm_head,
enable_scale_search=enable_scale_search,
)
Expand Down Expand Up @@ -473,7 +471,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
prefix_name='',
imatrix_data=None, embedding_qtype=None,
model_config=None, torch_dtype=torch.float32,
enable_xetla=False,
mixed_precision=False,
act_order=False,
enable_scale_search=False,
Expand Down Expand Up @@ -523,7 +520,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
qtype=qtype,
bias=has_bias,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=optimize_lm_head,
act_order=act_order,
enable_scale_search=enable_scale_search,
Expand All @@ -544,7 +540,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
_shape=(out_features, in_features),
convert_shape_only=convert_shape_only,
qtype=qtype,
enable_xetla=enable_xetla,
enable_scale_search=enable_scale_search).to(device)
new_linear._parameters['weight'] = paramsLowBit
if has_bias:
Expand All @@ -562,7 +557,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
qtype=qtype,
bias=has_bias,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=False,
act_order=act_order,
enable_scale_search=enable_scale_search,
Expand All @@ -581,7 +575,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
qtype=cur_qtype,
imatrix=cur_imatrix,
in_features=in_features,
enable_xetla=enable_xetla,
enable_scale_search=enable_scale_search).to(device)
else:
new_linear = vLLMLowBitLinear(
Expand All @@ -590,7 +583,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
qtype=qtype,
bias=has_bias,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=False,
act_order=act_order,
enable_scale_search=enable_scale_search,
Expand All @@ -609,7 +601,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
_shape=(out_features, in_features),
convert_shape_only=convert_shape_only,
qtype=qtype,
enable_xetla=enable_xetla,
enable_scale_search=enable_scale_search).to(device)
new_linear._parameters['weight'] = paramsLowBit
if has_bias:
Expand Down Expand Up @@ -639,7 +630,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
out_features,
mp_group,
cur_qtype,
enable_xetla,
optimize_lm_head,
enable_scale_search)
else:
Expand All @@ -649,7 +639,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
cur_qtype,
module.bias is not None,
mp_group=mp_group,
enable_xetla=enable_xetla,
optimize_lm_head=optimize_lm_head,
enable_scale_search=enable_scale_search,
)
Expand All @@ -663,7 +652,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
qtype=cur_qtype,
imatrix=cur_imatrix,
in_features=in_features,
enable_xetla=enable_xetla,
enable_scale_search=enable_scale_search).to(device)
new_linear._parameters['weight'] = paramsLowBit
if module.bias is not None:
Expand Down Expand Up @@ -762,7 +750,6 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
embedding_qtype=embedding_qtype,
model_config=model_config,
torch_dtype=torch_dtype,
enable_xetla=enable_xetla,
mixed_precision=mixed_precision,
act_order=act_order,
enable_scale_search=enable_scale_search,
Expand Down Expand Up @@ -1094,7 +1081,6 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
lightweight_bmm=False, torch_dtype="auto",
imatrix_data=None,
embedding_qtype=None,
enable_xetla=False,
mixed_precision=False):
if qtype in ggml_tensor_qtype.values():
index = list(ggml_tensor_qtype.values()).index(qtype)
Expand Down Expand Up @@ -1138,7 +1124,6 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
embedding_qtype=embedding_qtype,
model_config=model_config,
torch_dtype=torch_dtype,
enable_xetla=enable_xetla,
mixed_precision=mixed_precision,
act_order=act_order,
enable_scale_search=enable_scale_search,
Expand Down
Loading

0 comments on commit cc8ee22

Please sign in to comment.