-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.py
31 lines (24 loc) · 927 Bytes
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
import numpy as np
def loadFont():
glove_path='/home/zhangwenqiao/Project/VideoQA/data/msvd_qa/word_embedding.npy'
vocab_path='/home/zhangwenqiao/Project/VideoQA/data/msvd_qa/test_word.txt'
glove = pd.read_csv(
glove_path, sep=' ', quoting=csv.QUOTE_NONE, header=None)
glove.set_index(0, inplace=True)
# load vocabulary.
vocab = pd.read_csv(vocab_path, header=None)[0]
embedding = np.zeros([len(vocab), len(glove.columns)], np.float64)
not_found = []
for i in range(len(vocab)):
word = vocab[i]
if word in glove.index:
embedding[i] = glove.loc[word]
else:
not_found.append(i)
print('Not found:\n', vocab.iloc[not_found])
embedding_avg = np.mean(embedding, 0)
embedding[not_found] = embedding_avg
np.save(embedding_path, embedding.astype(np.float32))
if __name__ == '__main__':
loadFont()