-
Notifications
You must be signed in to change notification settings - Fork 9
/
llama-3-8b.yaml
40 lines (40 loc) · 1.42 KB
/
llama-3-8b.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
model_id: meta-llama/Meta-Llama-3-8B-Instruct # <-- change this to the model you want to fine-tune
train_path: s3://llm-guide/data/viggo/train.jsonl # <-- change this to the path to your training data
valid_path: s3://llm-guide/data/viggo/val.jsonl # <-- change this to the path to your validation data. This is optional
context_length: 512 # <-- change this to the context length you want to use
num_devices: 16 # <-- change this to total number of GPUs that you want to use
num_epochs: 4 # <-- change this to the number of epochs that you want to train for
train_batch_size_per_device: 16
eval_batch_size_per_device: 16
learning_rate: 1e-4
padding: "longest" # This will pad batches to the longest sequence. Use "max_length" when profiling to profile the worst case.
num_checkpoints_to_keep: 1
dataset_size_scaling_factor: 10000
output_dir: /mnt/local_storage
deepspeed:
config_path: configs/deepspeed/zero_3_offload_optim+param.json
dataset_size_scaling_factor: 10000 # internal flag. No need to change
flash_attention_2: true
trainer_resources:
memory: 53687091200 # 50 GB memory
worker_resources:
accelerator_type:A10G: 0.001
lora_config:
r: 8
lora_alpha: 16
lora_dropout: 0.05
target_modules:
- q_proj
- v_proj
- k_proj
- o_proj
- gate_proj
- up_proj
- down_proj
- embed_tokens
- lm_head
task_type: "CAUSAL_LM"
modules_to_save: []
bias: "none"
fan_in_fan_out: false
init_lora_weights: true