-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlanguage_model.py
229 lines (184 loc) · 8.26 KB
/
language_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import json
import torch
import torch.nn as nn
from evaluation import hit_score, mrr_score
from finetune_clip_models import CLIP_1, CLIP_2, CLIP_3
from torch.utils.data import DataLoader
from tqdm import tqdm
from typing import Union
from utils import ContrastiveCosineLoss, plot_loss_graph
def get_eval_scores(train_dataloader: DataLoader,
test_dataloader: DataLoader,
choose_model: str,
loss_function: str) -> None:
"""
Runs the training process, plots the loss, hit@1 rate, and MRR per epoch,
then runs the testing process and reports the hit@1 rate and MRR.
Args:
train_dataloader (DataLoader): segments data into batches of 32
samples for train data
test_dataloader (DataLoader): segments data into batches of 32
samples for test data
choose_model (str): specifies model to be used
loss_function (str): specifies loss function to be used
"""
model, epoch_loss, epoch_hit, epoch_mrr = training(train_dataloader,
choose_model,
loss_function)
save_eval_scores(choose_model, epoch_hit, epoch_mrr, epoch_loss,
loss_function)
print("Drawing graphs showing the metrics per epoch...")
plot_loss_graph(epoch_loss, epoch_hit, epoch_mrr)
hit, mrr = testing(model, test_dataloader)
save_eval_scores(choose_model, hit / len(test_dataloader.dataset),
mrr / len(test_dataloader.dataset),
loss_function=loss_function, test=True)
print(f"Hit@1 score for test set: {hit / len(test_dataloader.dataset)}")
print(f"MRR value for the test set: {mrr / len(test_dataloader.dataset)}")
def save_eval_scores(model: str,
hit: Union[list, float],
mrr: Union[list, float],
loss: Union[list, None] = None,
loss_function: Union[str, None] = None,
test: bool = False,
CLIP_0: bool = False):
"""
Saves the evaluation metrics to a .json file for future reference.
Args:
model (str): name of model used
hit (Union[list, float]): Hit@1 rates per epoch
mrr (Union[list, float]): MRR values per epoch
loss_function (Union[str, None]): name of loss function used
loss (Union[list, None]): list of loss values per epoch
test (bool): indicates if used for testing or training
CLIP_0 (bool): indicates if used with CLIP 0 model
"""
try:
try:
with open("./data/metrics.json", "r+") as file:
existing_data = json.load(file)
except json.decoder.JSONDecodeError:
existing_data = {}
except FileNotFoundError:
existing_data = {}
with open("./data/metrics.json", "w") as file:
json.dump(existing_data, file)
if f"{model}, {loss_function}" not in existing_data:
if not CLIP_0:
existing_data[f"{model}, {loss_function}"] = {}
else:
existing_data[f"{model}"] = {}
new_data = {}
if not CLIP_0:
# Used with the test metrics
if test:
new_data["hit"] = hit
new_data["mrr"] = mrr[0]
existing_data[
f"{model}, {loss_function}"]["test metrics"] = new_data
# Used with the training metrics
else:
for i in range(len(loss)):
new_data[f"epoch {i + 1}"] = {}
new_data[f"epoch {i + 1}"]["loss"] = loss[i]
new_data[f"epoch {i + 1}"]["hit"] = hit[i]
new_data[f"epoch {i + 1}"]["mrr"] = mrr[i][0]
existing_data[
f"{model}, {loss_function}"]["train metrics"] = new_data
# Procedure used with the pretrained CLIP model
else:
new_data["hit"] = hit
new_data["mrr"] = mrr[0]
existing_data[f"{model}"] = new_data
with open("./data/metrics.json", "w") as file:
json.dump(existing_data, file, indent=4)
def testing(model: nn.Module,
test_dataloader: DataLoader) -> tuple[float, float]:
"""
Runs the testing process on a trained model with the test dataset. Yields
the Hit@1 Rate and MRR.
Args:
model (nn.Module): trained model obtained after training
test_dataloader (DataLoader): segments data into batches of 32 samples
for test data
Return:
tuple[float, float]: Hit@1 Rate and MRR for the testing dataset
"""
hit = 0
mrr = 0
print("Starting testing process for the trained model..")
with torch.no_grad():
for batch in tqdm(test_dataloader):
text, images, _, label_idx = batch
text_logit, image_logit = model(text, images)
sim = torch.einsum('ijk,ik->ij', image_logit, text_logit)
hit += hit_score(sim, label_idx)
mrr += mrr_score(sim, label_idx)
return hit, mrr
def training(train_dataloader: DataLoader,
choose_model: str = "clip_3",
loss_function: str = "contrastive cosine loss") -> tuple:
"""
Runs the training process given the training data, the chosen model, and
the chosen loss function.
Args:
train_dataloader (DataLoader): segments data into batches of 32 samples
for train data
choose_model (str): specifies the model to be used
loss_function (str): specifies the loss function to be used
Return:
tuple[nn.Module, list, list, list]: trained model and loss, hit@1 rate,
and MRR per epoch
"""
num_epochs = 20
input_size = 512
hidden_size = 512
output_size = 512
print(f"Starting training process for {choose_model} with {loss_function} "
f"as the loss function...")
if loss_function == "contrastive cosine loss":
loss_f = ContrastiveCosineLoss()
elif loss_function == "cross entropy loss":
loss_f = nn.CrossEntropyLoss()
if choose_model == "clip_1":
model = CLIP_1(input_size, output_size)
elif choose_model == "clip_2":
model = CLIP_2(input_size, output_size)
elif choose_model == "clip_3":
model = CLIP_3(input_size, hidden_size, output_size)
# AdamW optimizer defined with learning rate of 1e-4.
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)
# Lists to store model evaluation metrics per epoch
epoch_loss = []
epoch_mrr = []
epoch_hit = []
for epoch in range(num_epochs):
avg_loss = 0
hit = 0
mrr = 0
model.train()
print(f"Epoch: {epoch + 1}")
for batch in tqdm(train_dataloader):
text, images, target, label_idx = batch
optimizer.zero_grad()
text_logit, image_logit = model(text, images)
# Performs matrix multiplication of 2 tensors given the dimensions.
# This method is called Einstein's summation.
sim = torch.einsum('ijk,ik->ij', image_logit, text_logit)
loss = loss_f(sim, target)
loss.backward()
optimizer.step()
avg_loss += loss.item() * images.size(0)
# label_idx and sim are converted to numpy arrays since components
# with required_grad as True can be referenced before next epoch.
x = label_idx.detach().numpy()
y = sim.detach().numpy()
hit += hit_score(y, x)
mrr += mrr_score(y, x)
epoch_loss.append(avg_loss / len(train_dataloader.dataset))
epoch_hit.append(hit / len(train_dataloader.dataset))
epoch_mrr.append(mrr / len(train_dataloader.dataset))
print("Training Loss:", avg_loss / len(train_dataloader.dataset))
print("Training MRR:", mrr / len(train_dataloader.dataset))
print("Training Hit@1 Rate:", hit / len(train_dataloader.dataset))
return model, epoch_loss, epoch_hit, epoch_mrr