-
Notifications
You must be signed in to change notification settings - Fork 0
/
base_yaml.py
121 lines (94 loc) · 2.99 KB
/
base_yaml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
from preferences import Paths
"""===========================================================
Neural net parameter selector for BabyLemmatizer 2
asahala 2023
https://github.com/asahala
University of Helsinki
Origins of Emesal Project
Centre of Excellence for Ancient Near-Eastern Empires
==========================================================="""
def write_yaml(filename, content):
with open(filename, 'w') as f:
f.write(content)
def set_hyper(examples, steps_per_epoch,
total_steps, start_decay):
decay_steps = max(int(steps_per_epoch/10), 1)
return f"""save_checkpoint_steps: {total_steps}
train_steps: {total_steps}
valid_steps: {steps_per_epoch}
dropout: 0.3
optim: adam
learning_rate: 0.0005
learning_rate_decay: 0.9
encoder_type: brnn
batch_size: 64
start_decay_steps: {start_decay}
decay_steps: {decay_steps}"""
def make_lemmatizer_yaml(prefix, hyper):
lemmatizer =\
f"""save_data: {Paths.models}/{prefix}/lemmatizer/model
src_vocab: {Paths.models}/{prefix}/lemmatizer/vocab.src
tgt_vocab: {Paths.models}/{prefix}/lemmatizer/vocab.tgt
overwrite: True
# Corpus opts:
data:
corpus_1:
path_src: {Paths.models}/{prefix}/lemmatizer/traindata/train.src
path_tgt: {Paths.models}/{prefix}/lemmatizer/traindata/train.tgt
valid:
path_src: {Paths.models}/{prefix}/lemmatizer/traindata/dev.src
path_tgt: {Paths.models}/{prefix}/lemmatizer/traindata/dev.tgt
# Vocabulary files that were just created
#src_vocab: {Paths.models}/{prefix}/lemmatizer/vocab.src
#tgt_vocab: {Paths.models}/{prefix}/lemmatizer/vocab.tgt
# Train on a single GPU
#world_size: 1
#gpu_ranks: [0]
#####
# Where to save the checkpoints
save_model: {Paths.models}/{prefix}/lemmatizer/model
{hyper}
"""
write_yaml(os.path.join(Paths.models, prefix, 'lemmatizer.yaml'), lemmatizer)
def make_tagger_yaml(prefix, hyper):
tagger =\
f"""save_data: {Paths.models}/{prefix}/tagger/model
src_vocab: {Paths.models}/{prefix}/tagger/vocab.src
tgt_vocab: {Paths.models}/{prefix}/tagger/vocab.tgt
overwrite: True
# Corpus opts:
data:
corpus_1:
path_src: {Paths.models}/{prefix}/tagger/traindata/train.src
path_tgt: {Paths.models}/{prefix}/tagger/traindata/train.tgt
valid:
path_src: {Paths.models}/{prefix}/tagger/traindata/dev.src
path_tgt: {Paths.models}/{prefix}/tagger/traindata/dev.tgt
# Vocabulary files that were just created
#src_vocab: {Paths.models}/{prefix}/tagger/vocab.src
#tgt_vocab: {Paths.models}/{prefix}/tagger/vocab.tgt
# Train on a single GPU
#world_size: 1
#gpu_ranks: [0]
#####
# Where to save the checkpoints
save_model: {Paths.models}/{prefix}/tagger/model
{hyper}
"""
write_yaml(os.path.join(Paths.models, prefix, 'tagger.yaml'), tagger)
"""
save_checkpoint_steps: 35000
train_steps: 35000
valid_steps: 15000
dropout: 0.3
optim: adam
learning_rate: 0.0005
learning_rate_decay: 0.9
encoder_type: brnn
batch_size: 64
start_decay_steps: 15000
decay_steps: 512
"""