forked from argosopentech/argos-train
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yml
executable file
·95 lines (83 loc) · 2.11 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Based on https://opennmt.net/OpenNMT-py/examples/Translation.html
## Where the samples will be written
save_data: run/opennmt_data
## Where the vocab(s) will be written
src_vocab: run/opennmt_data/openmt.vocab
tgt_vocab: run/opennmt_data/openmt.vocab
# Should match the vocab size for SentencePiece
# https://forum.opennmt.net/t/opennmt-py-error-when-training-with-large-amount-of-data/4310/12?u=argosopentech
src_vocab_size: 50000
tgt_vocab_size: 50000
share_vocab: True
# Corpus opts:
data:
corpus_1:
path_src: run/split_data/src-train.txt
path_tgt: run/split_data/tgt-train.txt
transforms: [sentencepiece, filtertoolong]
valid:
path_src: run/split_data/src-val.txt
path_tgt: run/split_data/tgt-val.txt
transforms: [sentencepiece, filtertoolong]
### Transform related opts:
#### https://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-the-transformer-model
#### Subword
src_subword_model: run/sentencepiece.model
tgt_subword_model: run/sentencepiece.model
src_subword_nbest: 1
src_subword_alpha: 0.0
tgt_subword_nbest: 1
tgt_subword_alpha: 0.0
#### Filter
src_seq_length: 150
tgt_seq_length: 150
# silently ignore empty lines in the data
skip_empty_level: silent
# General opts
save_model: run/openmt.model
save_checkpoint_steps: 1000
valid_steps: 5000
train_steps: 50000
early_stopping: 4
# Batching
queue_size: 10000
bucket_size: 262144
world_size: 1
gpu_ranks: [0]
batch_type: "tokens"
batch_size: 8192
valid_batch_size: 4096
max_generator_batches: 2
accum_count: [4]
accum_steps: [0]
# Optimization
model_dtype: "fp32"
optim: "adam"
learning_rate: 2
warmup_steps: 8000
decay_method: "noam"
adam_beta2: 0.998
max_grad_norm: 0
label_smoothing: 0.1
param_init: 0
param_init_glorot: true
normalization: "tokens"
# Model
encoder_type: transformer
decoder_type: transformer
position_encoding: true
# OpenNMT-py v3
# position_encoding: false
# max_relative_positions: 20
enc_layers: 6
dec_layers: 6
heads: 8
hidden_size: 512
rnn_size: 512
word_vec_size: 512
transformer_ff: 2048
dropout_steps: [0]
dropout: [0.1]
attention_dropout: [0.1]
share_decoder_embeddings: true
share_embeddings: true