-
Notifications
You must be signed in to change notification settings - Fork 2
/
settings.py
37 lines (32 loc) · 1.92 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from os.path import abspath, dirname, join
import os
PROJ_DIR = join(abspath(dirname(__file__)), '.')
DATA_DIR = join(PROJ_DIR, './data/')
OUTPUT_DIR = join(PROJ_DIR, './output/')
TRAIN_NAME2PUB = join(DATA_DIR, 'assignment_train.json')
VAL_NAME2PUB = join(DATA_DIR, 'await_validation.json')
TEST_NAME2PUB = join(DATA_DIR, 'await_test.json')
VAL_PATH = join(DATA_DIR, 'pubs_validate.json')
TEST_PATH = join(DATA_DIR, 'pubs_test.json')
IDF_THRESHOLD = 50
cuda_visible_devices = '1'
local_output_path = join(OUTPUT_DIR, 'local_output.pkl') # doc_id -> Z_i, np.ndarray
# Global settings
assignments_train_path = join(DATA_DIR, 'assignment_train.json')
assignments_val_path = join(DATA_DIR, 'assignment_validate.json')
pubs_train_path = join(DATA_DIR, 'pubs_train.json')
pubs_validate_path = join(DATA_DIR, 'pubs_validate.json')
pubs_test_path = join(DATA_DIR, 'pubs_test.json')
stop_words_path = './data/stop_words.txt'
idf_path = join(OUTPUT_DIR, 'idf.pkl') # word -> idf value, float
global_output_path = join(OUTPUT_DIR, 'global_output.pkl') # doc_id -> Y_i, np.ndarray
material_path = join(OUTPUT_DIR, 'material.pkl') # doc_id -> [word1, word2, ...], list
weighted_embedding_path = './output/weighted_embedding.pkl' # doc_id -> X_i, np.ndarray
rule_check_file_path = join(OUTPUT_DIR, 'rule_check.json')
pos_pair_path = join(OUTPUT_DIR, 'pos_pair.json')
single_dogs_path = join(OUTPUT_DIR, 'single_dogs.json')
#word2vect_model_path = join(DATA_DIR, 'GoogleNews-vectors-negative300.bin.gz') # word2vec model. usage: KeyedVectors.load_word2vec_format(...)
word2vect_model_path = './output/word.emb' # word2vec model. usage: Word2Vec.load(...)
triple_set = './output/triple.pkl' # 'emb' -> anchors; 'emb_pos': positive weighted embedding; 'emb_neg': negative ones
EMBEDDING_DIM = 100
CPU_COUNT = 10