-
Notifications
You must be signed in to change notification settings - Fork 1
/
summarizer.py
172 lines (156 loc) · 6.7 KB
/
summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import utils
from datetime import datetime
import pytz
from textteaser import TextTeaser
def tag_message(msg, tag):
indent = ' ' * (len(msg['personEmail']) + 2)
return tag + ': ' + msg['text'].replace('\n', '\n{}'.format(tag+': ')).rstrip() + '\n'
def tag_file(msg, tag):
text = ''
for f in msg['files']:
file_msg = 'FILE: %s' % f
text += tag + ': ' + 'FILE: {}'.format(f) + '\n'
return text
def compile_messages(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None):
text, email_with_users = '', utils.get_emails_with_users(user, room_name)
now = pytz.utc.localize(datetime.utcnow())
messages = None
if not msg_limit:
messages = utils.get_messages_for_user(user, room_name)
else:
messages = utils.get_messages_for_user(user, room_name, msg_limit)
if not messages or 'items' not in messages:
raise Exception('Could not get messages')
for msg in reversed(messages['items']):
if 'text' in msg:
time = pytz.utc.localize(datetime.strptime(msg['created'], '%Y-%m-%dT%H:%M:%S.%fZ'))
if days_limit == None and hours_limit == None and min_limit == None or \
days_limit != None and abs((now - time).days) <= days_limit or \
hours_limit != None and abs((now - time).total_seconds()) // 3600 <= hours_limit or \
min_limit != None and abs((now - time).seconds) // 60 <= min_limit:\
text += tag_message(msg, email_with_users[msg['personEmail']])
"""
if 'files' in msg:
text += tag_file(msg, email_with_users[msg['personEmail']])
"""
return text.rstrip()
def indent_tagged(text, tags):
if text == None:
None
if tags == None:
return text
last_tag, indented = '', ''
for line in text:
tag = None
for t in tags:
if line[:len(t)] == t:
tag = t
if tag is None:
indented = indented.rstrip() + line + '\n'
continue
indent = ' ' * (len(tag) + 2)
line = line.replace('\n{}'.format(tag+': '), '\n' + indent)
if last_tag == tag:
indented += indent + line[len(tag)+2:] + '\n'
else:
indented += line + '\n'
last_tag = tag
return indented
"""
Use get_transcript to get a transcript of chat messages
"""
def get_transcript(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None):
return indent_tagged(compile_messages(user, room_name, msg_limit, days_limit, hours_limit, min_limit).splitlines(), \
utils.get_emails_with_users(user, room_name).values())
"""
Use summarize to summarize chat messages in a room with NLP
"""
def summarize(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None, title=None):
if title == None:
title = '%s Summary' % room_name
text = compile_messages(user, room_name, msg_limit, days_limit, hours_limit, min_limit)
tt = TextTeaser()
return indent_tagged(tt.summarize(title, text), utils.get_emails_with_users(user, room_name).values())
"""
Test Calls"
"""
# print summarize('chris', 'Ping Pong SJ-29', 6)
# print summarize('chris', 'Golden Eagles', 100)
# print get_transcript('chris', 'Codefest Spark Voice Assistant')
'''import utils
from datetime import datetime
import pytz
from textteaser import TextTeaser
def tag_message(msg, tag):
indent = ' ' * (len(msg['personEmail']) + 2)
return tag + ': ' + msg['text'].replace('\n', '\n{}'.format(tag+': ')).rstrip() + '\n'
def tag_file(msg, tag):
text = ''
for f in msg['files']:
file_msg = 'FILE: %s' % f
text += tag + ': ' + 'FILE: {}'.format(f) + '\n'
return text
def compile_messages(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None):
text, email_with_users = '', utils.get_emails_with_users(user, room_name)
now = pytz.utc.localize(datetime.utcnow())
messages = None
if not msg_limit:
messages = utils.get_messages_for_user(user, room_name)
else:
messages = utils.get_messages_for_user(user, room_name, msg_limit)
if not messages or 'items' not in messages:
raise Exception('Could not get messages')
for msg in reversed(messages['items']):
if 'text' in msg:
time = pytz.utc.localize(datetime.strptime(msg['created'], '%Y-%m-%dT%H:%M:%S.%fZ'))
if days_limit == None and hours_limit == None and min_limit == None or \
days_limit != None and abs((now - time).days) <= days_limit or \
hours_limit != None and abs((now - time).total_seconds()) // 3600 <= hours_limit or \
min_limit != None and abs((now - time).seconds) // 60 <= min_limit:\
text += tag_message(msg, email_with_users[msg['personEmail']])
"""
if 'files' in msg:
text += tag_file(msg, email_with_users[msg['personEmail']])
"""
return text.rstrip()
def indent_tagged(text, tags):
if text == None:
None
if tags == None:
return text
last_tag, indented = '', ''
for line in text:
tag = None
for t in tags:
if line[:len(t)] == t:
tag = t
if tag is None:
indented = indented.rstrip() + line + '\n'
continue
indent = ' ' * (len(tag) + 2)
line = line.replace('\n{}'.format(tag+': '), '\n' + indent)
if last_tag == tag:
indented += indent + line[len(tag)+2:] + '\n'
else:
indented += line + '\n'
last_tag = tag
return indented
"""
Use get_transcript to get a transcript of chat messages
"""
def get_transcript(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None):
return indent_tagged(compile_messages(user, room_name, msg_limit, days_limit, hours_limit, min_limit).splitlines(), \
utils.get_emails_with_users(user, room_name).values())
"""
Use summarize to summarize chat messages in a room with NLP
"""
def summarize(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None, title=None):
if title == None:
title = '%s Summary' % room_name
text = compile_messages(user, room_name, msg_limit, days_limit, hours_limit, min_limit)
tt = TextTeaser()
return indent_tagged(tt.summarize(title, text), utils.get_emails_with_users(user, room_name).values())
# print summarize('chris', 'Ping Pong SJ-29', 6)
# print summarize('chris', 'Golden Eagles', 100)
# print get_transcript('chris', 'Codefest Spark Voice Assistant')
'''