-
Notifications
You must be signed in to change notification settings - Fork 30
/
main.py
335 lines (299 loc) · 11.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
import argparse
import logging
import os
from datetime import datetime
from pathlib import Path
import yaml
from libmultilabel.common_utils import AttributeDict, timer
from libmultilabel.logging import add_stream_handler, add_collect_handler
def add_all_arguments(parser):
# path / directory
parser.add_argument(
"--result_dir", default="./runs", help="The directory to save checkpoints and logs (default: %(default)s)"
)
# data
parser.add_argument("--data_name", default="unnamed_data", help="Dataset name (default: %(default)s)")
parser.add_argument("--training_file", help="Path to training data (default: %(default)s)")
parser.add_argument("--val_file", help="Path to validation data (default: %(default)s)")
parser.add_argument("--test_file", help="Path to test data (default: %(default)s")
parser.add_argument(
"--val_size",
type=float,
default=0.2,
help="Training-validation split: a ratio in [0, 1] or an integer for the size of the validation set (default: %(default)s).",
)
parser.add_argument(
"--min_vocab_freq",
type=int,
default=1,
help="The minimum frequency needed to include a token in the vocabulary (default: %(default)s)",
)
parser.add_argument(
"--max_seq_length",
type=int,
default=500,
help="The maximum number of tokens of a sample (default: %(default)s)",
)
parser.add_argument(
"--shuffle",
type=bool,
default=True,
help="Whether to shuffle training data before each epoch (default: %(default)s)",
)
parser.add_argument(
"--merge_train_val",
action="store_true",
help="Whether to merge the training and validation data. (default: %(default)s)",
)
parser.add_argument(
"--include_test_labels",
action="store_true",
help="Whether to include labels in the test dataset. (default: %(default)s)",
)
parser.add_argument(
"--remove_no_label_data",
action="store_true",
help="Whether to remove training and validation instances that have no labels. (default: %(default)s)",
)
parser.add_argument(
"--add_special_tokens",
type=bool,
default=True,
help="Whether to add the special tokens for inputs of the transformer-based language model. (default: %(default)s)",
)
# train
parser.add_argument("--seed", type=int, help="Random seed (default: %(default)s)")
parser.add_argument(
"--epochs", type=int, default=10000, help="The number of epochs to train (default: %(default)s)"
)
parser.add_argument("--batch_size", type=int, default=16, help="Size of training batches (default: %(default)s)")
parser.add_argument(
"--optimizer",
default="adam",
choices=["adam", "adamw", "adamax", "sgd"],
help="Optimizer (default: %(default)s)",
)
parser.add_argument(
"--learning_rate", type=float, default=0.0001, help="Learning rate for optimizer (default: %(default)s)"
)
parser.add_argument("--weight_decay", type=float, default=0, help="Weight decay factor (default: %(default)s)")
parser.add_argument(
"--momentum", type=float, default=0.9, help="Momentum factor for SGD only (default: %(default)s)"
)
parser.add_argument(
"--lr_scheduler",
type=str,
default=None,
help="Name of the learning rate scheduler (default: %(default)s)",
)
parser.add_argument(
"--patience",
type=int,
default=5,
help="The number of epochs to wait for improvement before early stopping (default: %(default)s)",
)
parser.add_argument(
"--early_stopping_metric",
default=None,
help="The metric to monitor for early stopping. Set to `val_metric` if specified as None. (default: %(default)s)",
)
parser.add_argument(
"--normalize_embed",
action="store_true",
help="Whether the embeddings of each word is normalized to a unit vector (default: %(default)s)",
)
# model
parser.add_argument("--model_name", default="unnamed_model", help="Model to be used (default: %(default)s)")
parser.add_argument(
"--init_weight", default="kaiming_uniform", help="Weight initialization to be used (default: %(default)s)"
)
parser.add_argument(
"--loss_function", default="binary_cross_entropy_with_logits", help="Loss function (default: %(default)s)"
)
# eval
parser.add_argument(
"--eval_batch_size", type=int, default=256, help="Size of evaluating batches (default: %(default)s)"
)
parser.add_argument(
"--metric_threshold",
type=float,
default=0.5,
help="The decision value threshold over which a label is predicted as positive (default: %(default)s)",
)
parser.add_argument(
"--monitor_metrics",
nargs="+",
default=["P@1", "P@3", "P@5"],
help="Metrics to monitor for evaluation (default: %(default)s)",
)
parser.add_argument(
"--val_metric", default="P@1", help="The metric to select the best model for testing (default: %(default)s)"
)
# pretrained vocab / embeddings
parser.add_argument("--vocab_file", type=str, help="Path to a file holding vocabuaries (default: %(default)s)")
parser.add_argument(
"--embed_file", type=str, help="Path to a file holding pre-trained embeddings (default: %(default)s)"
)
parser.add_argument("--label_file", type=str, help="Path to a file holding all labels (default: %(default)s)")
# log
parser.add_argument(
"--save_k_predictions",
type=int,
nargs="?",
const=100,
default=0,
help="Save top k predictions on test set. k=%(const)s if not specified. (default: %(default)s)",
)
parser.add_argument(
"--predict_out_path",
default="./predictions.txt",
help="Path to the output file holding label results (default: %(default)s)",
)
# auto-test
parser.add_argument(
"--limit_train_batches",
type=float,
default=1.0,
help="Percentage of train dataset to use for auto-testing (default: %(default)s)",
)
parser.add_argument(
"--limit_val_batches",
type=float,
default=1.0,
help="Percentage of validation dataset to use for auto-testing (default: %(default)s)",
)
parser.add_argument(
"--limit_test_batches",
type=float,
default=1.0,
help="Percentage of test dataset to use for auto-testing (default: %(default)s)",
)
# others
parser.add_argument("--cpu", action="store_true", help="Disable CUDA")
parser.add_argument("--silent", action="store_true", help="Enable silent mode")
parser.add_argument(
"--data_workers", type=int, default=4, help="Use multi-cpu core for data pre-processing (default: %(default)s)"
)
parser.add_argument(
"--embed_cache_dir",
type=str,
help="For parameter search only: path to a directory for storing embeddings for multiple runs. (default: %(default)s)",
)
parser.add_argument(
"--eval", action="store_true", help="Only run evaluation on the test set (default: %(default)s)"
)
parser.add_argument("--checkpoint_path", help="The checkpoint to warm-up with (default: %(default)s)")
# linear options
parser.add_argument("--linear", action="store_true", help="Train linear model")
parser.add_argument(
"--data_format",
type=str,
default="txt",
help="'svm' for SVM format or 'txt' for LibMultiLabel format (default: %(default)s)",
)
parser.add_argument("--liblinear_options", type=str, help="Options passed to liblinear (default: %(default)s)")
parser.add_argument(
"--linear_technique",
type=str,
default="1vsrest",
choices=["1vsrest", "thresholding", "cost_sensitive", "cost_sensitive_micro", "binary_and_multiclass", "tree"],
help="Technique for linear classification (default: %(default)s)",
)
parser.add_argument(
"--save_positive_predictions",
action="store_true",
help="Save all the predictions with decision value larger then 0. If used, the save_k_predictions must be set to 0",
)
# tree options
parser.add_argument("--tree_degree", type=int, default=100, help="Degree of the tree (default: %(default)s)")
parser.add_argument(
"--tree_max_depth", type=int, default=10, help="Maximum depth of the tree (default: %(default)s)"
)
parser.add_argument(
"--beam_width",
type=int,
default=10,
help="The width of the beam search (default: %(default)s)",
)
# AttentionXML
parser.add_argument(
"--cluster_size",
type=int,
default=8,
help="the maximal number of labels inside a cluster (default: %(default)s)",
)
parser.add_argument(
"-h",
"--help",
action="help",
help="If you are trying to specify network config such as dropout or activation or config of the learning rate scheduler, use a yaml file instead. "
"See example configs in example_config",
)
def get_config():
parser = argparse.ArgumentParser(add_help=False, description="multi-label and multi-class classification")
# load params from config file
parser.add_argument("-c", "--config", help="Path to configuration file")
args, _ = parser.parse_known_args()
config = {}
if args.config:
with open(args.config) as fp:
config = yaml.load(fp, Loader=yaml.SafeLoader)
add_all_arguments(parser)
parser.set_defaults(**config)
args = parser.parse_args()
# set one argument with the value of another argument (not supported in argparse)
if args.early_stopping_metric is None:
args.early_stopping_metric = args.val_metric
if not hasattr(args, "scheduler_config"):
args.scheduler_config = None
config = AttributeDict(vars(args))
config.run_name = "{}_{}_{}".format(
config.data_name,
Path(config.config).stem if config.config else config.model_name,
datetime.now().strftime("%Y%m%d%H%M%S"),
)
config.checkpoint_dir = os.path.join(config.result_dir, config.run_name)
config.log_path = os.path.join(config.checkpoint_dir, "logs.json")
config.predict_out_path = config.predict_out_path or os.path.join(config.checkpoint_dir, "predictions.txt")
return config
def check_config(config):
"""Check if the configuration has invalid arguments.
Args:
config (AttributeDict): Config of the experiment from `get_args`.
"""
if config.model_name == "XMLCNN" and config.seed is not None:
raise ValueError(
"nn.AdaptiveMaxPool1d doesn't have a deterministic implementation but seed is"
"specified. Please do not specify seed."
)
if config.eval and config.test_file is None:
raise ValueError("--eval is specified but there is no test data set")
return None
@timer
def main():
# Get config
config = get_config()
check_config(config)
# Set up logger
log_level = logging.WARNING if config.silent else logging.INFO
stream_handler = add_stream_handler(log_level)
collect_handler = add_collect_handler(logging.NOTSET)
logging.info(f"Run name: {config.run_name}")
if config.linear:
from linear_trainer import linear_run
linear_run(config)
else:
from torch_trainer import TorchTrainer
trainer = TorchTrainer(config) # initialize trainer
# train
if not config.eval:
trainer.train()
# test
if "test" in trainer.datasets:
trainer.test()
collected_logs = collect_handler.get_logs()
if collected_logs:
print("\n\n======= Collected log messages =======")
print("\n".join(collected_logs))
if __name__ == "__main__":
main()