data={"instruction":"","input":"","output":""}
prompt = """<s> [INST] <<SYS>>
You are a helpful, respectful and honest assistant.
<</SYS>>
{instruction} [/INST]"""
prompt
data={"instruction":"","input":"","output":""}
data['instruction']="""<s>[INST] <<SYS>>
{{ system_prompt }}
<</SYS>>
{{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST] {{ model_answer_2 }} </s><s>[INST] {{ user_msg_3 }} [/INST]"""
mistral_template="{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}"
llama3_template="{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
def chat_format(conversation:list,tokenizer,chat_type="mistral"):
system_prompt = "You are a helpful, respectful and honest assistant.Help humman as much as you can."
ap = [{"role":"system","content":system_prompt}] + conversation
if chat_type=='mistral':
id = tokenizer.apply_chat_template(ap,chat_template=mistral_template,tokenize=False)
elif chat_type=='llama3':
id = tokenizer.apply_chat_template(ap,chat_template=llama3_template,tokenize=False)
#id = id.rstrip("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n")
return id
in my llm_trainer.py the input is not padding with -100. It's a bit like pretraining steps. from left to right to predict next word.
sh run_sfttrainer.sh
Huggingface link My Huggingface Chat Model 7B
4.autotrain llama autotrain-advanced
cd autotrain
sh run.sh
or
sh script/llama_sft.sh
Huggingface link My Huggingface Chat Llama3 Moldel 8B)
refer to llama-recipes
import openai
openai.api_key = "EMPTY"
client = openai.OpenAI(
api_key="EMPTY",
base_url="http://localhost:7777/v1",
)
def chatbot(msgs):
model = "dewu-chat"
call_args = {
'temperature': 0.7,
'top_p': 0.9,
'top_k': 40,
'max_tokens': 4096, # output-len
'presence_penalty': 1.0,
'frequency_penalty': 0.0,
"repetition_penalty":1.0,
"stop":["<|im_end|>"],
# "stop":["</s>"],
# "stream":False,
"seed":None,
}
# create a chat completion
completion = client.chat.completions.create(
model=model,
messages=msgs,
extra_body=call_args,
stream=False
)
# print the completion
return completion.choices[0].message.content
def chatbot_single(text,system=None):
if not system:
system = "You are a assistant"
user = [{"role":"system","content":system},{"role":"user","content":text}]
return chatbot(user)