From 4d71396b57d28b2a00c951e40e349aef59aaac88 Mon Sep 17 00:00:00 2001 From: timhartill Date: Wed, 31 Mar 2021 12:42:59 +1300 Subject: [PATCH 1/8] add tjh subdir with gpu_visibility.py --- tjh/gpu_visibility.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tjh/gpu_visibility.py diff --git a/tjh/gpu_visibility.py b/tjh/gpu_visibility.py new file mode 100644 index 0000000..68b7184 --- /dev/null +++ b/tjh/gpu_visibility.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +""" +Methods of setting GPU visibility + +export CUDA_VISIBLE_DEVICES=5,6 + +or + +CUDA_VISIBLE_DEVICES=5,6 python test_script.py + +or + +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "5,6" + + +""" + From e1c4b123219ed5010bc96216f3cc31226912bc29 Mon Sep 17 00:00:00 2001 From: timhartill Date: Wed, 31 Mar 2021 13:43:13 +1300 Subject: [PATCH 2/8] add basic_tests.py --- tjh/basic_tests.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tjh/basic_tests.py diff --git a/tjh/basic_tests.py b/tjh/basic_tests.py new file mode 100644 index 0000000..db91b87 --- /dev/null +++ b/tjh/basic_tests.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Mar 31 13:05:08 2021 + +@author: thar011 +""" + +from transformers import AutoTokenizer, T5ForConditionalGeneration + +model_name = "allenai/unifiedqa-t5-small" # you can specify the model size here +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = T5ForConditionalGeneration.from_pretrained(model_name) + +def run_model(input_string, **generator_args): + input_ids = tokenizer.encode(input_string, return_tensors="pt") + res = model.generate(input_ids, **generator_args) + return tokenizer.batch_decode(res, skip_special_tokens=True) + + +run_model("which is best conductor? \\n (a) iron (b) feather") + +run_model("scott filled a tray with juice and put it in a freezer. the next day, scott opened the freezer. how did the juice most likely change? \\n (a) it condensed. (b) it evaporated. (c) it became a gas. (d) it became a solid.") + +run_model("which is best conductor? \\n (a) iron (b) feather (c) wood (d) plastic", + temperature=0.9, num_return_sequences=4, num_beams=20) From 0fdfb9ca14074f57518fdcb8fe2ca71df8fd36c0 Mon Sep 17 00:00:00 2001 From: timhartill Date: Wed, 31 Mar 2021 18:29:14 +1300 Subject: [PATCH 3/8] updating bart.py --- bart/bart.py | 46 ++++++++++++++++++++++++++++++++++-- bart/unified_data.py | 4 ++-- tjh/basic_tests.py | 56 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 100 insertions(+), 6 deletions(-) diff --git a/bart/bart.py b/bart/bart.py index d38aa48..e320201 100644 --- a/bart/bart.py +++ b/bart/bart.py @@ -4,9 +4,31 @@ from transformers import T5ForConditionalGeneration, BartForConditionalGeneration class MyBart(BartForConditionalGeneration): + """ TJH: adding , past_key_values=None to forward(..) takes us to next keyword error 'head_mask' + Original forward below replaced with new forward (and new =model(...) below) def forward(self, input_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=None, decoder_attention_mask=None, decoder_cached_states=None, use_cache=False, is_training=False): + """ + def forward( + self, + input_ids=None, + attention_mask=None, + decoder_input_ids=None, + decoder_attention_mask=None, + head_mask=None, + decoder_head_mask=None, + encoder_outputs=None, + past_key_values=None, + inputs_embeds=None, + decoder_inputs_embeds=None, + labels=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + is_training=False # TJH added back as UnifiedQA specific + ): if is_training: decoder_start_token_id = self.config.decoder_start_token_id @@ -16,15 +38,35 @@ def forward(self, input_ids, attention_mask=None, encoder_outputs=None, else: _decoder_input_ids = decoder_input_ids.clone() + # original unifiedQA definition: + #outputs = self.model( + # input_ids, + # attention_mask=attention_mask, + # encoder_outputs=encoder_outputs, + # decoder_input_ids=_decoder_input_ids, + # decoder_attention_mask=decoder_attention_mask, + # decoder_cached_states=decoder_cached_states, + # use_cache=use_cache + #) + + # below from modelling_bart.py outputs = self.model( input_ids, attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, encoder_outputs=encoder_outputs, - decoder_input_ids=_decoder_input_ids, decoder_attention_mask=decoder_attention_mask, - decoder_cached_states=decoder_cached_states, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + decoder_inputs_embeds=decoder_inputs_embeds, use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, ) + lm_logits = F.linear(outputs[0], self.model.shared.weight, bias=self.final_logits_bias) if is_training: loss_fct = nn.CrossEntropyLoss(reduce=False) diff --git a/bart/unified_data.py b/bart/unified_data.py index c5f8a35..b2c2f20 100644 --- a/bart/unified_data.py +++ b/bart/unified_data.py @@ -23,8 +23,8 @@ def __init__(self, logger, args, data_path, is_training): "boolq", "race_string", "openbookqa"] - self.data_path = data_path - self.data_type = data_path.split("/")[-1][:-4] + self.data_path = data_path #TJH this should be ../unifiedqa/train.tsv + self.data_type = data_path.split("/")[-1][:-4] #TJH strip .tsv from filename appearing after final "/" assert self.data_type in ["train", "dev", "test"] if args.debug: diff --git a/tjh/basic_tests.py b/tjh/basic_tests.py index db91b87..3f3d7ea 100644 --- a/tjh/basic_tests.py +++ b/tjh/basic_tests.py @@ -4,11 +4,23 @@ Created on Wed Mar 31 13:05:08 2021 @author: thar011 + +UnifiedQA initial tests: + T5 checkpoints tests + followed by BART tests + """ +# UnifiedQA T5 checkpoints tests: + from transformers import AutoTokenizer, T5ForConditionalGeneration -model_name = "allenai/unifiedqa-t5-small" # you can specify the model size here +# inference on <= 3B models work +#model_name = "allenai/unifiedqa-t5-small" # you can specify the model size here +#model_name = "allenai/unifiedqa-t5-base" # you can specify the model size here +model_name = "allenai/unifiedqa-t5-large" # you can specify the model size here +#model_name = "allenai/unifiedqa-t5-3b" # you can specify the model size here +model_name = "allenai/unifiedqa-t5-11b" # you can specify the model size here tokenizer = AutoTokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) @@ -18,9 +30,49 @@ def run_model(input_string, **generator_args): return tokenizer.batch_decode(res, skip_special_tokens=True) -run_model("which is best conductor? \\n (a) iron (b) feather") +run_model("which is best conductor? \\n (a) iron (b) feather") #['iron'] +run_model("which is best conductor? \\n ") # ['no answer>'] +run_model("which is best conductor - iron or feather? \\n ") # ['iron'] +run_model("Name a conductor of electricty? \\n Name any conductor") # ['any conductor'] +run_model("Name a conductor of electricty? \\n ") # ['yes'] +run_model("Name a conductor of electricity: \\n ") # ['yes'] +run_model("What is 53 + 9521? \\n ") # ['no answer>'] + + run_model("scott filled a tray with juice and put it in a freezer. the next day, scott opened the freezer. how did the juice most likely change? \\n (a) it condensed. (b) it evaporated. (c) it became a gas. (d) it became a solid.") run_model("which is best conductor? \\n (a) iron (b) feather (c) wood (d) plastic", temperature=0.9, num_return_sequences=4, num_beams=20) + + +# BART tests (run from unifiedqa-tjh/bart directory): + +import torch +from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig +from bart import MyBart + +base_model = "facebook/bart-large" +#unifiedqa_path = "unifiedQA-uncased/best-model.pt" # path to the downloaded checkpoint +unifiedqa_path = "/data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt" # path to the downloaded checkpoint + +tokenizer = BartTokenizer.from_pretrained(base_model) +model = MyBart.from_pretrained(base_model, state_dict=torch.load(unifiedqa_path)) +model.eval() + +# ERROR: TypeError: forward() got an unexpected keyword argument 'past_key_values' +x = model.generate_from_string("Which is best conductor? \\n (A) iron (B) feather", tokenizer=tokenizer) +print(x) + +x = model.generate_from_string("What is the sum of 3 and 5? \\n (A) 8 (B) 3 (C) 5 (D) 10", tokenizer=tokenizer) +print(x) + + +#try basic bart model (no error): +model = BartForConditionalGeneration.from_pretrained(base_model) +model.eval() +run_model("which is best conductor? \\n (a) iron (b) feather") #['whichwhich is best conductor?'] + + + + From 749b6746cb710172e8365925823c0f0e7d51e9f3 Mon Sep 17 00:00:00 2001 From: timhartill Date: Thu, 1 Apr 2021 18:00:59 +1300 Subject: [PATCH 4/8] updated bart.py and run.py --- bart/bart.py | 2 +- bart/run.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bart/bart.py b/bart/bart.py index e320201..05d1425 100644 --- a/bart/bart.py +++ b/bart/bart.py @@ -53,7 +53,7 @@ def forward( outputs = self.model( input_ids, attention_mask=attention_mask, - decoder_input_ids=decoder_input_ids, + decoder_input_ids=_decoder_input_ids, #TJH added underscore encoder_outputs=encoder_outputs, decoder_attention_mask=decoder_attention_mask, head_mask=head_mask, diff --git a/bart/run.py b/bart/run.py index cde10b3..c3bc735 100644 --- a/bart/run.py +++ b/bart/run.py @@ -11,7 +11,7 @@ from bart import MyBart def run(args, logger): - tokenizer = BartTokenizer.from_pretrained("bart-large") + tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") if args.is_unifiedqa: dev_data = UnifiedQAData(logger, args, args.predict_file, False) @@ -31,10 +31,10 @@ def run(args, logger): train_data.load_dataloader() if args.checkpoint is not None: - model = MyBart.from_pretrained("bart-large", + model = MyBart.from_pretrained("facebook/bart-large", state_dict=torch.load(args.checkpoint)) else: - model = MyBart.from_pretrained("bart-large") + model = MyBart.from_pretrained("facebook/bart-large") if args.n_gpu>1: model = torch.nn.DataParallel(model) if args.n_gpu>0: @@ -53,7 +53,7 @@ def run(args, logger): if args.do_predict: checkpoint = os.path.join(args.output_dir, 'best-model.pt') if args.checkpoint is None else args.checkpoint - model = MyBart.from_pretrained("bart-large", + model = MyBart.from_pretrained("facebook/bart-large", state_dict=torch.load(checkpoint)) logger.info("Loading checkpoint from {}".format(checkpoint)) if args.n_gpu>0: From d7f744092cf2c5d3fa9ead7c613f5301176454f2 Mon Sep 17 00:00:00 2001 From: timhartill Date: Fri, 2 Apr 2021 22:05:47 +1300 Subject: [PATCH 5/8] working bart.py and run.py --- bart/bart.py | 83 ++++++++++++++++++++++++++++++++++++---------------- bart/run.py | 20 ++++++++----- 2 files changed, 69 insertions(+), 34 deletions(-) diff --git a/bart/bart.py b/bart/bart.py index 05d1425..a594fc0 100644 --- a/bart/bart.py +++ b/bart/bart.py @@ -2,13 +2,34 @@ import torch.nn.functional as F from torch import Tensor, nn from transformers import T5ForConditionalGeneration, BartForConditionalGeneration +from transformers.modeling_outputs import Seq2SeqLMOutput #TJH added + +def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int): + """ TJH: from modelling_bart.py NOT currently used + Shift input ids one token to the right. + """ + shifted_input_ids = input_ids.new_zeros(input_ids.shape) + shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() + shifted_input_ids[:, 0] = decoder_start_token_id + + assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + # replace possible -100 values in labels by `pad_token_id` + shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) + + return shifted_input_ids + + class MyBart(BartForConditionalGeneration): """ TJH: adding , past_key_values=None to forward(..) takes us to next keyword error 'head_mask' + Original forward below replaced with new forward (and new =model(...) below) def forward(self, input_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=None, decoder_attention_mask=None, decoder_cached_states=None, use_cache=False, is_training=False): + + New version assumes that for training, decoder inputs are in labels + and for generation, decoder inputs are in decoder_input_ids """ def forward( self, @@ -22,38 +43,31 @@ def forward( past_key_values=None, inputs_embeds=None, decoder_inputs_embeds=None, - labels=None, + labels=None, #TJH In 4.4.2 labels contains what in unifiedqa is called decoder_input_ids use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None, - is_training=False # TJH added back as UnifiedQA specific ): + #TJH: Added for compatibility with 4.4.2 + return_dict = return_dict if return_dict is not None else self.config.use_return_dict - if is_training: - decoder_start_token_id = self.config.decoder_start_token_id - _decoder_input_ids = decoder_input_ids.new_zeros(decoder_input_ids.shape) - _decoder_input_ids[..., 1:] = decoder_input_ids[..., :-1].clone() - _decoder_input_ids[..., 0] = decoder_start_token_id - else: - _decoder_input_ids = decoder_input_ids.clone() - - # original unifiedQA definition: - #outputs = self.model( - # input_ids, - # attention_mask=attention_mask, - # encoder_outputs=encoder_outputs, - # decoder_input_ids=_decoder_input_ids, - # decoder_attention_mask=decoder_attention_mask, - # decoder_cached_states=decoder_cached_states, - # use_cache=use_cache - #) + if labels is not None: #TJH added for compatibility with other 4.4.2 seq2seq models + if decoder_input_ids is None: + #TJH: how it is done in modelling_bart.py. Using the unifiedQA method instead +# decoder_input_ids = shift_tokens_right( +# labels, self.config.pad_token_id, self.config.decoder_start_token_id +# ) + decoder_start_token_id = self.config.decoder_start_token_id + decoder_input_ids = labels.new_zeros(labels.shape) + decoder_input_ids[..., 1:] = labels[..., :-1].clone() + decoder_input_ids[..., 0] = decoder_start_token_id - # below from modelling_bart.py + # TJH: below from modeling_bart.py outputs = self.model( input_ids, attention_mask=attention_mask, - decoder_input_ids=_decoder_input_ids, #TJH added underscore + decoder_input_ids=decoder_input_ids, #TJH: no underscore encoder_outputs=encoder_outputs, decoder_attention_mask=decoder_attention_mask, head_mask=head_mask, @@ -68,13 +82,30 @@ def forward( ) lm_logits = F.linear(outputs[0], self.model.shared.weight, bias=self.final_logits_bias) - if is_training: + + loss = None + if labels is not None: #TJH added labels is not None instead of is_training loss_fct = nn.CrossEntropyLoss(reduce=False) losses = loss_fct(lm_logits.view(-1, self.config.vocab_size), - decoder_input_ids.view(-1)) + labels.view(-1)) loss = torch.sum(losses * decoder_attention_mask.float().view(-1)) - return loss - return (lm_logits, ) + outputs[1:] + + if not return_dict: #TJH: from modeling_bart.py + output = (lm_logits,) + outputs[1:] + return ((loss,) + output) if loss is not None else output + + return Seq2SeqLMOutput( #TJH: from modeling_bart.py. + loss=loss, + logits=lm_logits, + past_key_values=outputs.past_key_values, + decoder_hidden_states=outputs.decoder_hidden_states, + decoder_attentions=outputs.decoder_attentions, + cross_attentions=outputs.cross_attentions, + encoder_last_hidden_state=outputs.encoder_last_hidden_state, + encoder_hidden_states=outputs.encoder_hidden_states, + encoder_attentions=outputs.encoder_attentions, + ) + def generate_from_string(self, _input, tokenizer=None, **generator_args): assert tokenizer is not None diff --git a/bart/run.py b/bart/run.py index c3bc735..76b2d84 100644 --- a/bart/run.py +++ b/bart/run.py @@ -11,7 +11,7 @@ from bart import MyBart def run(args, logger): - tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") + tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") #TJH: bart-large if args.is_unifiedqa: dev_data = UnifiedQAData(logger, args, args.predict_file, False) @@ -32,9 +32,9 @@ def run(args, logger): if args.checkpoint is not None: model = MyBart.from_pretrained("facebook/bart-large", - state_dict=torch.load(args.checkpoint)) + state_dict=torch.load(args.checkpoint)) #TJH: bart-large else: - model = MyBart.from_pretrained("facebook/bart-large") + model = MyBart.from_pretrained("facebook/bart-large") #TJH: bart-large if args.n_gpu>1: model = torch.nn.DataParallel(model) if args.n_gpu>0: @@ -54,7 +54,7 @@ def run(args, logger): if args.do_predict: checkpoint = os.path.join(args.output_dir, 'best-model.pt') if args.checkpoint is None else args.checkpoint model = MyBart.from_pretrained("facebook/bart-large", - state_dict=torch.load(checkpoint)) + state_dict=torch.load(checkpoint)) #TJH: bart-large logger.info("Loading checkpoint from {}".format(checkpoint)) if args.n_gpu>0: model.to(torch.device("cuda")) @@ -86,9 +86,13 @@ def _convert(key): for batch in train_data.dataloader: global_step += 1 batch = [b.to(torch.device("cuda")) for b in batch] - loss = model(input_ids=batch[0], attention_mask=batch[1], - decoder_input_ids=batch[2], decoder_attention_mask=batch[3], - is_training=True) +# TJH: this was the original unifiedqa: +# loss = model(input_ids=batch[0], attention_mask=batch[1], +# decoder_input_ids=batch[2], decoder_attention_mask=batch[3], +# is_training=True) + outputs = model(input_ids=batch[0], attention_mask=batch[1], + labels=batch[2], decoder_attention_mask=batch[3]) + loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0] #TJH added if args.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if torch.isnan(loss).data: @@ -155,7 +159,7 @@ def inference(model, dev_data, save_predictions=False): outputs = model.generate(input_ids=batch[0], attention_mask=batch[1], num_beams=dev_data.args.num_beams, - min_lnegth=1, + min_length=1, #TJH: was min_lnegth max_length=dev_data.args.max_output_length, early_stopping=True,) for input_, output in zip(batch[0], outputs): From ae7797bf67d5835a5ab793f4f183544743f42469 Mon Sep 17 00:00:00 2001 From: timhartill Date: Fri, 2 Apr 2021 22:07:02 +1300 Subject: [PATCH 6/8] add runtrain.sh --- bart/rundropft.sh | 14 ++++++++++++++ bart/runtrain.sh | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 bart/rundropft.sh create mode 100644 bart/runtrain.sh diff --git a/bart/rundropft.sh b/bart/rundropft.sh new file mode 100644 index 0000000..66dc574 --- /dev/null +++ b/bart/rundropft.sh @@ -0,0 +1,14 @@ +# training command +# other options: --do_predict --skip_inference --debug --checkpoint ${unifiedqa_checkpoint} +# --prefix dev_ --prefix test_ --checkpoint_step + +python cli.py --do_train --output_dir /data/thar011/out/unifiedqa_dropft \ + --checkpoint /data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt \ + --is_unifiedqa \ + --train_file /data/thar011/data/unifiedqa/train.tsv \ + --predict_file /data/thar011/data/unifiedqa/dev.tsv \ + --train_batch_size 64 \ + --predict_batch_size 64 \ + --append_another_bos --do_lowercase \ + --eval_period 10000 --verbose + diff --git a/bart/runtrain.sh b/bart/runtrain.sh new file mode 100644 index 0000000..6fbd38a --- /dev/null +++ b/bart/runtrain.sh @@ -0,0 +1,16 @@ +# training command +# other options: --do_predict --skip_inference --debug --checkpoint ${unifiedqa_checkpoint} +# --prefix dev_ --prefix test_ + +python cli.py --do_train --output_dir /data/thar011/out/unifiedqa \ + --is_unifiedqa \ + --checkpoint /data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt \ + --train_file /data/thar011/data/unifiedqa/train.tsv \ + --predict_file /data/thar011/data/unifiedqa/dev.tsv \ + --train_batch_size 64 \ + --predict_batch_size 64 \ + --append_another_bos --do_lowercase \ + --eval_period 10000 --verbose \ + --num_train_epochs 10000 \ + --gradient_accumulation_steps 1 + From c1ec42e65cc6f2bd2e66ca086c3c7c12401b73ac Mon Sep 17 00:00:00 2001 From: timhartill Date: Sat, 3 Apr 2021 00:03:28 +1300 Subject: [PATCH 7/8] add .gitignore + more logging --- .gitignore | 9 +++++++++ bart/bart.py | 2 +- bart/run.py | 11 ++++++++++- bart/runtrain.sh | 5 +++-- 4 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ce36dcd --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Ignore files +*.pyc +extract_features_timbackup.py + +# Ignore directories +features/ +dataset/videos/ +__pycache__ + diff --git a/bart/bart.py b/bart/bart.py index a594fc0..28c3b59 100644 --- a/bart/bart.py +++ b/bart/bart.py @@ -84,7 +84,7 @@ def forward( lm_logits = F.linear(outputs[0], self.model.shared.weight, bias=self.final_logits_bias) loss = None - if labels is not None: #TJH added labels is not None instead of is_training + if labels is not None: #TJH labels is not None instead of is_training loss_fct = nn.CrossEntropyLoss(reduce=False) losses = loss_fct(lm_logits.view(-1, self.config.vocab_size), labels.view(-1)) diff --git a/bart/run.py b/bart/run.py index 76b2d84..8b6bc4a 100644 --- a/bart/run.py +++ b/bart/run.py @@ -32,7 +32,8 @@ def run(args, logger): if args.checkpoint is not None: model = MyBart.from_pretrained("facebook/bart-large", - state_dict=torch.load(args.checkpoint)) #TJH: bart-large + state_dict=torch.load(args.checkpoint)) #TJH: bart-large + logger.info("Loading checkpoint from {}".format(args.checkpoint)) #TJH Added else: model = MyBart.from_pretrained("facebook/bart-large") #TJH: bart-large if args.n_gpu>1: @@ -83,7 +84,11 @@ def _convert(key): logger.info("Starting training!") for epoch in range(int(args.num_train_epochs)): + if args.verbose: + logger.info("Starting Epoch %d" % (epoch)) #TJH added for batch in train_data.dataloader: + if args.verbose and global_step % 100 == 0: + logger.info("Epoch %d Global Step %d" % (epoch, global_step)) #TJH Added global_step += 1 batch = [b.to(torch.device("cuda")) for b in batch] # TJH: this was the original unifiedqa: @@ -121,6 +126,8 @@ def _convert(key): torch.save(model_state_dict, os.path.join(args.output_dir, "best-model-{}.pt".format(str(global_step).zfill(6)))) else: + if args.verbose: + logger.info("Step %d Starting inference.." % (global_step)) #TJH Added model.eval() curr_em = inference(model if args.n_gpu==1 else model.module, dev_data) logger.info("Step %d Train loss %.2f %s %.2f%% on epoch=%d" % ( @@ -142,8 +149,10 @@ def _convert(key): stop_training = False else: wait_step += 1 + logger.info("No improvement. Number of wait steps: %d of max wait steps: %d" % (wait_step, args.wait_step)) if wait_step >= args.wait_step: stop_training = True + logger.info("Early Stopping due to no improvement after %d wait steps!" % (wait_step)) #TJH Added break model.train() if stop_training: diff --git a/bart/runtrain.sh b/bart/runtrain.sh index 6fbd38a..894e2df 100644 --- a/bart/runtrain.sh +++ b/bart/runtrain.sh @@ -2,7 +2,7 @@ # other options: --do_predict --skip_inference --debug --checkpoint ${unifiedqa_checkpoint} # --prefix dev_ --prefix test_ -python cli.py --do_train --output_dir /data/thar011/out/unifiedqa \ +python cli.py --do_train --output_dir /data/thar011/out/unifiedqa_1gputest \ --is_unifiedqa \ --checkpoint /data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt \ --train_file /data/thar011/data/unifiedqa/train.tsv \ @@ -12,5 +12,6 @@ python cli.py --do_train --output_dir /data/thar011/out/unifiedqa \ --append_another_bos --do_lowercase \ --eval_period 10000 --verbose \ --num_train_epochs 10000 \ - --gradient_accumulation_steps 1 + --gradient_accumulation_steps 1 \ + --wait_step 10 From 7ce580578b5bcd472232784ccbd22a17e79877f2 Mon Sep 17 00:00:00 2001 From: timhartill Date: Mon, 5 Apr 2021 17:02:00 +1200 Subject: [PATCH 8/8] updated tokenizer params in data.py unified_data.py --- bart/data.py | 6 ++++-- bart/runpredict.sh | 18 ++++++++++++++++++ bart/runtrain.sh | 10 +++++----- bart/unified_data.py | 12 ++++++++---- 4 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 bart/runpredict.sh diff --git a/bart/data.py b/bart/data.py index dbecf3e..327c7b4 100644 --- a/bart/data.py +++ b/bart/data.py @@ -110,10 +110,12 @@ def load_dataset(self, tokenizer, do_return=False): questions = [" "+question for question in questions] answers = [" " +answer for answer in answers] question_input = tokenizer.batch_encode_plus(questions, - pad_to_max_length=True, + truncation=True, #TJH added + padding='max_length', #TJH was pad_to_max_length=True, max_length=self.args.max_input_length) answer_input = tokenizer.batch_encode_plus(answers, - pad_to_max_length=True, + truncation=True, #TJH added + padding='max_length', #TJH was pad_to_max_length=True, max_length=self.args.max_output_length) input_ids, attention_mask = question_input["input_ids"], question_input["attention_mask"] decoder_input_ids, decoder_attention_mask = answer_input["input_ids"], answer_input["attention_mask"] diff --git a/bart/runpredict.sh b/bart/runpredict.sh new file mode 100644 index 0000000..0eaae10 --- /dev/null +++ b/bart/runpredict.sh @@ -0,0 +1,18 @@ +#run predictions - picks up best_model from output_dir otherwise can specify --checkpoint + +python cli.py --do_predict --output_dir /data/thar011/out/unifiedqa_2gputest_from_uqackpt \ + --predict_file /data/thar011/data/unifiedqa/drop/dev.tsv \ + --predict_batch_size 64 \ + --append_another_bos --do_lowercase \ + --verbose \ + --prefix dev_drop_ + + +python cli.py --do_predict --output_dir /data/thar011/out/unifiedqa_2gputest_from_uqackpt \ + --predict_file /data/thar011/data/unifiedqa/ropes/dev.tsv \ + --predict_batch_size 64 \ + --append_another_bos --do_lowercase \ + --verbose \ + --prefix dev_ropes_ + + diff --git a/bart/runtrain.sh b/bart/runtrain.sh index 894e2df..891fc80 100644 --- a/bart/runtrain.sh +++ b/bart/runtrain.sh @@ -1,17 +1,17 @@ # training command # other options: --do_predict --skip_inference --debug --checkpoint ${unifiedqa_checkpoint} # --prefix dev_ --prefix test_ +# --checkpoint /data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt \ -python cli.py --do_train --output_dir /data/thar011/out/unifiedqa_1gputest \ +python cli.py --do_train --output_dir /data/thar011/out/unifiedqa_2gputest_from_bart \ --is_unifiedqa \ - --checkpoint /data/thar011/ckpts/unifiedqa-bart-large-allenai/unifiedQA-uncased/best-model.pt \ --train_file /data/thar011/data/unifiedqa/train.tsv \ --predict_file /data/thar011/data/unifiedqa/dev.tsv \ - --train_batch_size 64 \ - --predict_batch_size 64 \ + --train_batch_size 32 \ + --predict_batch_size 32 \ --append_another_bos --do_lowercase \ --eval_period 10000 --verbose \ --num_train_epochs 10000 \ - --gradient_accumulation_steps 1 \ + --gradient_accumulation_steps 2 \ --wait_step 10 diff --git a/bart/unified_data.py b/bart/unified_data.py index b2c2f20..be19d5e 100644 --- a/bart/unified_data.py +++ b/bart/unified_data.py @@ -23,7 +23,7 @@ def __init__(self, logger, args, data_path, is_training): "boolq", "race_string", "openbookqa"] - self.data_path = data_path #TJH this should be ../unifiedqa/train.tsv + self.data_path = data_path #TJH this would be ../unifiedqa/train.tsv self.data_type = data_path.split("/")[-1][:-4] #TJH strip .tsv from filename appearing after final "/" assert self.data_type in ["train", "dev", "test"] @@ -96,10 +96,14 @@ def load_dataset(self, tokenizer): questions = [" "+question for question in questions] answers = [" " +answer for answer in answers] question_input = self.tokenizer.batch_encode_plus(questions, - pad_to_max_length=True, - max_length=self.args.max_input_length) + truncation=True, #TJH added + padding='max_length', #TJH was pad_to_max_length=True, + max_length=self.args.max_input_length) answer_input = self.tokenizer.batch_encode_plus(answers, - pad_to_max_length=True) + truncation=True, #TJH added + padding='max_length', #TJH was pad_to_max_length=True, + max_length=self.args.max_input_length) + input_ids, attention_mask = question_input["input_ids"], question_input["attention_mask"] decoder_input_ids, decoder_attention_mask = answer_input["input_ids"], answer_input["attention_mask"] print ("Finish tokenizering...")