forked from AadityaNair/LDA-with-Deep-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lda.py
33 lines (25 loc) · 814 Bytes
/
lda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import logging
import os
import gensim
from nltk.corpus import reuters
from utils import TRAINING_SET, preprocess_document
NUM_TOPICS = int(os.environ.get('NUM_TOPICS', 20))
WORKERS = int(os.environ.get('WORKERS', 3))
logging.basicConfig(
format='%(asctime)s : %(levelname)s : %(message)s',
level=logging.INFO
)
lda = gensim.models.ldamulticore.LdaMulticore
words_list = list(
map(
preprocess_document,
map(
lambda x: reuters.raw(x),
TRAINING_SET
)
)
)
dictionary = gensim.corpora.Dictionary(words_list)
bow_list = list(map(lambda x: dictionary.doc2bow(x), words_list))
ldamodel = lda(bow_list, num_topics=NUM_TOPICS, id2word=dictionary, passes=100, workers=WORKERS)
ldamodel.save(os.environ.get('LDA_MODEL', './models/lda/trained_lda.txt'))