-
Notifications
You must be signed in to change notification settings - Fork 1
/
live_archive.py
312 lines (261 loc) · 11.5 KB
/
live_archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import json
import sys
from datetime import datetime
from pathlib import Path
from threading import Lock, get_ident
from shared import *
archive_path = Path(config['live_archive']['archive_path'])
file_locks = {}
def timestamp_to_date_string(ts):
"""Converts a Slack timestamp into an ISO date (used in naming the daily JSON logs)"""
if isinstance(ts, float):
return datetime.fromtimestamp(ts).date().isoformat()
elif isinstance(ts, str):
return datetime.fromtimestamp(float(ts)).date().isoformat()
else:
raise TypeError("Expected either float or string timestamp")
def channel_id_to_name(channel_id):
"""Gets the current name of the channel with ID 'channel_id'"""
res = app.client.conversations_info(channel=channel_id)
if "ok" not in res.data or not res["ok"]:
print(f"Error message:{res}", file=err_stream, flush=True)
raise ConnectionError(f"conversations_info(channel={channel_id}) web request failed")
channel_name = res["channel"]["name"]
return channel_name
def log_file_path(channel_id, ts):
"""Returns a Path object pointing to the JSON logfile of a message with timestamp 'ts' in channel 'channel_id"""
channel_name = channel_id_to_name(channel_id)
return archive_path / channel_name / (timestamp_to_date_string(ts) + ".json")
@app.event("message")
def archive_message(message):
"""Runs on every message event and archives it if it's not hidden"""
print("Message received", flush=True, file=debug_stream)
# As message events are sent before 'channel_rename' events we're dealing with them here
# Otherwise the program would still need to handle it to put it in the correct folder
if "subtype" in message:
if message['subtype'] == "channel_name":
rename_channel(message['channel'], message['old_name'], message['name'])
elif message['subtype'] == "message_changed":
update_message(message['channel'], message['message']['ts'], message['message'])
# Thread reply, need to edit parent message
# Cannot filter on subtype as no subtype field is omitted due to bug in Slack API
# See here: https://api.slack.com/events/message/message_replied
if "thread_ts" in message:
add_thread_reply(message['channel'], message['thread_ts'],
message['user'], message['ts'])
if "hidden" in message:
if not message['hidden']:
add_to_archive(message)
# This probably should not ever occur,
# because if the message has a 'hidden' field it's probably always true
else:
add_to_archive(message)
def update_message(channel_id, ts, updated_message):
"""Replaces message with timestamp 'ts' in channel 'channel_id' with updated message"""
with open(log_file_path(channel_id, ts), 'r+') as log_file:
message_list = json.load(log_file)
for message in message_list:
# Update fields of message
# This is done via iteration instead of a complete replacement
# As updated_message might not contain reactions and other stuff
if message['ts'] == ts:
for (key, value) in updated_message.items():
message[key] = value
log_file.seek(0)
json.dump(message_list, log_file, indent=4)
log_file.truncate()
def add_thread_reply(channel_id, thread_ts, reply_user, reply_ts):
"""Adds a thread reply object to an archived message"""
with open(log_file_path(channel_id, thread_ts), 'r+') as log_file:
message_list = json.load(log_file)
message = next((m for m in message_list if m['ts'] == thread_ts), None)
if message is not None:
if "reply_count" in message:
message['reply_count'] += 1
else:
message['reply_count'] = 1
reply = {"user": reply_user, "ts": reply_ts}
if "replies" in message:
message['replies'].append(reply)
else:
message['replies'] = [reply]
if "reply_users" in message:
if reply_user not in message['reply_users']:
message['reply_users'].append(reply_user)
if "reply_users_count" in message:
message['reply_users_count'] += 1
else:
message['reply_users_count'] = len(message['reply_users'])
else:
message['reply_users'] = [reply_user]
message['reply_users_count'] = 1
# TODO: Cover the weird case of no reply users but an already existing reply_users_count
# Though not sure how relevant this is, as slack-export-viewer does not seem to care
else:
print(f"Received reply to thread (channel={channel_id}, ts={reply_ts}) but have not found"
f"original thread with ts = {thread_ts}.", file=err_stream, flush=True)
# Go back to start and write
log_file.seek(0)
json.dump(message_list, log_file, indent=4)
log_file.truncate()
def rename_channel(channel_id, old_name, new_name):
"""Renames channel folders and updates channel.json"""
channel_list_path = archive_path / "channels.json"
with open(channel_list_path, 'r+') as channel_list:
old_channel_list = json.load(channel_list)
channel = next((ch for ch in old_channel_list if ch['id'] == channel_id), None)
if channel is not None:
channel['name'] = new_name
else:
print(f"Renamed channel (id={channel_id}) from {old_name} to {new_name},"
f"but have not found channel with id on channels.json",
file=err_stream, flush=True)
channel_list.seek(0)
json.dump(old_channel_list, channel_list, indent=4)
channel_list.truncate()
print(f"Channel rename event from {old_name} to {new_name}", flush=True,
file=info_stream)
if old_name is not None:
old_path = archive_path / old_name
new_path = archive_path / new_name
old_path.rename(new_path)
else:
print(f"Attempted to rename channel id {channel_id}, but it doesn't exist",
flush=True, file=warn_stream)
@app.event("channel_created")
def create_channel(client, payload):
"""On channel creation, if it's an actual channel joins then adds it into channels.json"""
channel = payload['channel']
channel_id = channel['id']
# Get all detailed channel object
res = client.conversations_info(channel=channel_id)
if "ok" not in res.data or not res['ok']:
print(f"Error message: {res}", file=err_stream, flush=True)
raise ConnectionError(f"Could not get channel info with id = {channel_id}")
full_channel_info = res["channel"]
if full_channel_info['is_channel']:
# Join channel if created
res = client.conversations_join(channel=channel_id)
if "ok" not in res.data or not res['ok']:
print(f"Error message: {res}", file=err_stream, flush=True)
raise ConnectionError(f"Could not join channel with id = {channel_id}")
print(f'Channel {channel["name"]} created', flush=True,
file=info_stream)
channel_list_path = archive_path / "channels.json"
with open(channel_list_path, 'r+') as channel_list:
old_channel_list = json.load(channel_list)
old_channel_list.append(full_channel_info)
channel_list.seek(0)
json.dump(old_channel_list, channel_list, indent=4)
channel_list.truncate()
@app.event("reaction_added")
def add_reaction(payload):
"""Adds reaction to the archived message"""
reaction = payload['reaction']
reacting_user = payload['user']
if payload['item']['type'] == "message":
# Get which message was reacted to
parent_channel_id = payload['item']['channel']
parent_ts = payload['item']['ts']
with open(log_file_path(parent_channel_id, parent_ts), "r+") as log_file:
message_list = json.load(log_file)
message = next((m for m in message_list if m['ts'] == parent_ts), None)
if message is not None:
new_reaction_entry = {'name': reaction, 'count': 1, 'users': [reacting_user]}
if "reactions" in message:
reaction_entry = next((r for r in message['reactions'] if r['name'] == reaction), None)
if reaction_entry is not None:
reaction_entry['count'] += 1
# Checking just in case something stupid happened
if reacting_user not in reaction_entry['user']:
reaction_entry['users'].append(reacting_user)
else:
message['reactions'].append(new_reaction_entry)
else:
message['reactions'] = [new_reaction_entry]
else:
print(f"Reaction {reaction} added to message (ts={parent_ts}, channel={parent_channel_id}), "
f"but message not found in log. Ignoring...",
file=err_stream, flush=True)
log_file.seek(0)
json.dump(message_list, log_file, indent=4)
log_file.truncate()
@app.event("reaction_removed")
def remove_reaction(payload):
"""Removes reaction from an archived message"""
reaction = payload['reaction']
reacting_user = payload['user']
if payload['item']['type'] == "message":
# Get which message the reaction was removed from
parent_channel_id = payload['item']['channel']
parent_ts = payload['item']['ts']
with open(log_file_path(parent_channel_id, parent_ts), "r+") as log_file:
message_list = json.load(log_file)
message = next((m for m in message_list if m['ts'] == parent_ts), None)
if message is not None:
if "reactions" in message:
# Should not need to check, but just in case
reaction_entry = next((r for r in message['reactions'] if r['name'] == reaction), None)
if reaction_entry is not None:
if reaction_entry['count'] > 1:
reaction_entry['count'] -= 1
reaction_entry['users'].remove(reacting_user)
else:
message['reactions'].remove(reaction_entry)
if len(message['reactions']) == 0:
message.pop('reactions')
else:
print(f"Reaction {reaction} removed from message (ts={parent_ts}, channel={parent_channel_id}), "
f"but not in message's reaction list. Ignoring...",
file=err_stream, flush=True)
else:
print(f"Reaction {reaction} removed from message (ts={parent_ts}, channel={parent_channel_id}), "
f"but message does not have reactions in log. Ignoring...",
file=err_stream, flush=True)
else:
print(f"Reaction {reaction} removed from message (ts={parent_ts}, channel={parent_channel_id}), "
f"but message not found. Ignoring...",
file=err_stream, flush=True)
log_file.seek(0)
json.dump(message_list, log_file, indent=4)
log_file.truncate()
def add_to_archive(message):
"""Archives a message"""
channel_id = message['channel']
ts = message['ts']
channel_name = channel_id_to_name(channel_id)
directory_path = archive_path / channel_name
directory_path.mkdir(exist_ok=True, parents=True)
current_day_path = log_file_path(channel_id, ts)
# Reading an entire day's messages and rewriting is incredibly lazy and inefficient
# However it will do for now
# Alternatives are
# A) manually remove last ] in JSON file with file.seek() and append last message object and add ] again
# B) manage an internal Archive object which writes to disk periodically (say at the end of each day / every 100 message)
# B seems to be preferable at the moment to me
print(f"Attempting to acquire file lock for {current_day_path}, thread id = {get_ident()}",
file=debug_stream, flush=True)
# Lock file in case other threads try to write data to it at the same time
if current_day_path in file_locks:
file_locks[current_day_path].acquire()
else:
file_locks[current_day_path] = Lock()
file_locks[current_day_path].acquire()
print(f"Acquired file lock for {current_day_path}, thread id = {get_ident()}",
file=debug_stream, flush=True)
# Create file if it does not exist
if not current_day_path.is_file():
with open(current_day_path, "w") as current_day:
json.dump([message], current_day, indent=4)
else:
with open(current_day_path, 'r+') as current_day:
message_list = json.load(current_day)
message_list.append(message)
# Go back to start
current_day.seek(0)
json.dump(message_list, current_day, indent=4)
current_day.truncate()
# Release file lock
file_locks[current_day_path].release()
print(f"Released file lock for {current_day_path}, thread id = {get_ident()}",
file=debug_stream, flush=True)