forked from derekgreene/dynamic-nmf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdisplay-topics.py
executable file
·55 lines (45 loc) · 2.41 KB
/
display-topics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
"""
Simple tool to display topic modeling results generated by NMF, as stored in one or more PKL files.
Requires prettytable:
https://code.google.com/p/prettytable/
"""
import logging as log
from optparse import OptionParser
import unsupervised.nmf, unsupervised.rankings
# --------------------------------------------------------------
def main():
parser = OptionParser(usage="usage: %prog [options] results_file1 results_file2 ...")
parser.add_option("-t", "--top", action="store", type="int", dest="top", help="number of top terms to show", default=10)
parser.add_option("-l","--long", action="store_true", dest="long_display", help="long format display")
parser.add_option("-z","--log_file", action="store",type="string",dest="output_path",help="log file", default=None)
(options, args) = parser.parse_args()
if( len(args) < 1 ):
parser.error( "Must specify at least one topic modeling results file produced by NMF" )
if options.output_path is None:
log.basicConfig(level=20, format='%(message)s')
else:
log.basicConfig(level=20, format="%(message)s",filename=options.output_path)
# number of columns to use when displaying topics
column_size = 8
# Load each cached ranking set
for in_path in args:
(doc_ids, terms, term_rankings, partition, W, H, labels) = unsupervised.nmf.load_nmf_results( in_path )
log.info( "- Loaded model with %d topics from %s" % (len(term_rankings), in_path) )
log.info( "Top %d terms for %d topics:" % (options.top,len(term_rankings)) )
m = unsupervised.rankings.term_rankings_size( term_rankings )
# display line by line?
if options.long_display:
log.info( unsupervised.rankings.format_term_rankings_long( term_rankings, labels, min(options.top,m) ) )
else:
# wrap columns to improve readability
current = 0
while current < len(term_rankings):
current_end = min(current+column_size,len(term_rankings))
current_rankings = term_rankings[current:current_end]
current_labels = labels[current:current_end]
log.info( unsupervised.rankings.format_term_rankings( current_rankings, current_labels, min(options.top,m) ) )
current += column_size
# --------------------------------------------------------------
if __name__ == "__main__":
main()