-
Notifications
You must be signed in to change notification settings - Fork 26
/
GoLLIE-7B_CodeLLaMA_ablation_masking.yaml
128 lines (119 loc) · 2.44 KB
/
GoLLIE-7B_CodeLLaMA_ablation_masking.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#Training args
model_name_or_path: codellama/CodeLlama-7b-hf
torch_dtype: bfloat16
use_lora: true
quantization: 4
quantization_inference: null
gradient_checkpointing: true
force_auto_device_map: false
use_flash_attention: true
# dataset arguments
dataset_dir:
/ikerlariak/osainz006/GoLLIE/data/processed_w_examples_abl_masking
train_tasks:
- ace05.eae
- ace05.ee
- ace05.ner
- ace05.rc
- ace05.re
- ace05.ver
- bc5cdr.ner
- conll03.ner
- diann.ner
- ncbidisease.ner
- ontonotes5.ner
- rams.eae
- tacred.sf
- wnut17.ner
validation_tasks:
- ace05.ee
- conll03.ner
test_tasks:
- ace05.eae
- ace05.ee
- ace05.ner
- ace05.rc
- ace05.re
- ace05.ver
- bc5cdr.ner
- conll03.ner
- diann.ner
- ncbidisease.ner
- ontonotes5.ner
- rams.eae
- tacred.sf
- wikievents.eae
- wikievents.ee
- wikievents.ner
- wnut17.ner
- e3c.ner
- broadtwitter.ner
- fabner.ner
- harveyner.ner
- multinerd.ner
- casie.eae
- casie.ee
- mitmovie.ner
- mitrestaurant.ner
- crossner.crossner_ai
- crossner.crossner_music
- crossner.crossner_politics
- crossner.crossner_literature
- crossner.crossner_natural_science
max_examples_per_task_train: 30000
max_examples_per_task_val: 5000
max_examples_per_task_test: null
max_seq_length: 2048
generation_max_length: 2048
ignore_pad_token_for_loss: true
prompt_loss_weight: 0.0
# checkpoint settings
output_dir: /ikerlariak/osainz006/models/GoLLIE/GoLLIE+-7b_CodeLLaMA_abl_masking
overwrite_output_dir: true
load_best_model_at_end: false
save_strategy: "epoch"
save_steps: 1000
save_total_limit: 999
# evaluation
do_train: true
do_eval: true
do_predict: true
evaluation_strategy: "epoch"
eval_steps: 500
eval_delay: 0
predict_with_generate: true
evaluate_all_checkpoints: false
# batch size
per_device_train_batch_size: 32
per_device_eval_batch_size: 8
gradient_accumulation_steps: 1
generation_num_beams: 1
# optimizer settings
optim: adamw_torch_fused
learning_rate: 0.0003
weight_decay: 0.0
num_train_epochs: 3
lr_scheduler_type: cosine
warmup_ratio: 0.03
adam_epsilon: 1e-7
# lora settings
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
- all
# reporting
logging_strategy: steps
logging_first_step: true
logging_steps: 25
report_to: wandb
run_name: "GoLLIE+-7b_CodeLLaMA_abl_masking"
disable_tqdm: false
# hub settings
push_to_hub: false
resume_from_checkpoint: false
# performance
bf16: true
fp16: false
torch_compile: false
ddp_find_unused_parameters: false