-
Notifications
You must be signed in to change notification settings - Fork 4
/
tacred.py
executable file
·94 lines (75 loc) · 4 KB
/
tacred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import tensorflow as tf
import constant
from main import read
from sl import pseudo_labeling
flags = tf.flags
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
home = os.path.expanduser("~")
train_file = os.path.join("./data", "tacred", "train.pkl")
dev_file = os.path.join("./data", "tacred", "dev.pkl")
test_file = os.path.join("./data", "tacred", "test.pkl")
glove_word_file = os.path.join("./data", "glove", "glove.840B.300d.txt")
emb_dict = os.path.join("./data", "tacred", "emb_dict.json")
pattern_file = os.path.join("./data", "tacred", "explanations.json")
target_dir = "data"
log_dir = "log/event"
save_dir = "log/model"
flags.DEFINE_string("dataset", "tacred", "")
flags.DEFINE_string("mode", "refine", "pretrain / refine / few")
flags.DEFINE_string("gpu", "1", "The GPU to run on")
flags.DEFINE_string("target_dir", target_dir, "")
flags.DEFINE_string("log_dir", log_dir, "")
flags.DEFINE_string("save_dir", save_dir, "")
flags.DEFINE_string("glove_word_file", glove_word_file, "")
flags.DEFINE_string("pattern_file", pattern_file, "")
flags.DEFINE_string("pretrain_sent_file", './data/tacred/PT_toks.json', "")
flags.DEFINE_string("pretrain_label_file", './data/tacred/PT_pattern_mask.npy', "")
flags.DEFINE_string("pretrain_sent_file2", './data/tacred/TK_tok_exp.json', "")
flags.DEFINE_string("pretrain_label_file2", './data/tacred/TK_label.npy', "")
flags.DEFINE_string("train_file", train_file, "")
flags.DEFINE_string("dev_file", dev_file, "")
flags.DEFINE_string("test_file", test_file, "")
flags.DEFINE_string("emb_dict", emb_dict, "")
flags.DEFINE_integer("glove_word_size", int(2.2e6), "Corpus size for Glove")
flags.DEFINE_integer("glove_dim", 300, "Embedding dimension for Glove")
flags.DEFINE_integer("top_k", 100000, "Finetune top k words in embedding")
flags.DEFINE_integer("length", 110, "Limit length for sentence")
flags.DEFINE_integer("num_class", len(constant.LABEL_TO_ID), "Number of classes")
flags.DEFINE_string("tag", "", "The tag name of event files")
flags.DEFINE_integer("batch_size", 50, "Batch size")
flags.DEFINE_integer("pseudo_size", 100, "Batch size for pseudo labeling")
flags.DEFINE_integer('pretrain_size_together',100,"Batch size for pretraining module")
flags.DEFINE_integer("num_epoch", 50, "Number of epochs")
flags.DEFINE_integer("period", 10, "period to save batch loss")
flags.DEFINE_string("optimizer", "adagrad", "Training method [sgd, adagrad, adam]")
flags.DEFINE_float("init_lr", 0.5, "Initial lr")
flags.DEFINE_float("lr_decay", 0.95, "Decay rate")
flags.DEFINE_float("keep_prob", 0.5, "Keep prob in dropout")
flags.DEFINE_float("word_keep_prob", 0.96, "Keep prob for word")
flags.DEFINE_float("grad_clip", 5.0, "Global Norm gradient clipping rate")
flags.DEFINE_integer("hidden", 200, "Hidden size")
flags.DEFINE_integer("att_hidden", 200, "Hidden size for attention")
flags.DEFINE_bool("use_sur", True, "Whether to use word-level matching")
flags.DEFINE_bool("use_cont", False, "Whether to use context-level matching")
flags.DEFINE_string("string_sim", "att", "the method of string matching [mean, att]")
flags.DEFINE_float("percent", 1.0, "Sample rate of unlabled data")
flags.DEFINE_float("alpha", 0.2, "pretrain loss")
flags.DEFINE_float("beta", 0.5, "simloss")
flags.DEFINE_float("gamma", 0.7, "unlabeled loss")
flags.DEFINE_float('pos_weight',1,'pos_weight of pretrain loss')
flags.DEFINE_float('pretrain_lr',0.1,'pretrain learning rate')
flags.DEFINE_float('pretrain_threshold',0.5,'pretrain test threshold')
flags.DEFINE_integer('pretrain_train_size',40000,'pretrain traning size')
flags.DEFINE_integer('pretrain_test_size',2000,'pretrain test size')
flags.DEFINE_integer('pretrain_epoch',10,'pretrain epoch')
flags.DEFINE_integer('pretrain_size',100,'pretrain size') #set this equal to pretrain_size_together
flags.DEFINE_float('pretrain_alpha',0.5,'simloss rate')
def main(_):
config = flags.FLAGS
os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu
with tf.Graph().as_default():
data = read(config)
pseudo_labeling(config, data)
if __name__ == "__main__":
tf.app.run()