Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Archive threads #196

Merged
merged 2 commits into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/archive/archive_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
ARCHIVE = "archive"
IMAGES = "images"
TEXT_LOG_PATH = "text_log.txt"
THREADS = "threads"
96 changes: 66 additions & 30 deletions modules/archive/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import os
import zipfile
import asyncio
import unicodedata
import re
from nextcord.ext import commands
from typing import Tuple, Union
from typing import Tuple, Union, TextIO
from utils import discord_utils, logging_utils, command_predicates
from modules.archive import archive_constants, archive_utils

Expand All @@ -25,6 +27,40 @@ def __init__(self, bot):

archive_utils.reset_archive_dir()

async def archive_message(self, f: TextIO, msg: nextcord.Message):
f.write(
f"[ {msg.created_at.strftime('%m-%d-%Y, %H:%M:%S')} ] "
f"{msg.author.display_name.rjust(25, ' ')}: "
f"{msg.clean_content}"
)
for attachment in msg.attachments:
f.write(f" {attachment.filename}")
# change duplicate filenames
# img.png would become img (1).png
original_path = os.path.join(
archive_constants.ARCHIVE,
archive_constants.IMAGES,
attachment.filename,
)
proposed_path = original_path
dupe_counter = 1
while os.path.exists(proposed_path):
proposed_path = (
original_path.split(".")[0]
+ f" ({dupe_counter})."
+ original_path.split(".")[1]
)
dupe_counter += 1
# The discord filenames can get too long and throw an OSError
try:
await attachment.save(proposed_path)
except OSError:
await attachment.save(
f"path_too_long_{dupe_counter}.{original_path.split('.')[1]}"
)
# Important: Write the newline after each comment is done
f.write("\n")

async def archive_one_channel(
self, channel: Union[nextcord.TextChannel, nextcord.Thread]
) -> Tuple[nextcord.File, int, nextcord.File, int]:
Expand All @@ -36,38 +72,38 @@ async def archive_one_channel(
)
with open(text_log_path, "w") as f:
async for msg in channel.history(limit=None, oldest_first=True):
f.write(
f"[ {msg.created_at.strftime('%m-%d-%Y, %H:%M:%S')} ] "
f"{msg.author.display_name.rjust(25, ' ')}: "
f"{msg.clean_content}"
)
for attachment in msg.attachments:
f.write(f" {attachment.filename}")
# change duplicate filenames
# img.png would become img (1).png
original_path = os.path.join(
await self.archive_message(f, msg)
# Threads are attached to normal messages
if msg.flags.has_thread and msg.thread:
f.write(f"[ {msg.thread.id} ] {'THREAD'.rjust(25, ' ')}: ")
f.write(msg.thread.name + "\n")
thread_dir = os.path.join(
archive_constants.ARCHIVE,
archive_constants.IMAGES,
attachment.filename,
f"{channel.name}_{archive_constants.THREADS}",
)
proposed_path = original_path
dupe_counter = 1
while os.path.exists(proposed_path):
proposed_path = (
original_path.split(".")[0]
+ f" ({dupe_counter})."
+ original_path.split(".")[1]
# The thread "name" is by default the original message content. Slugify and truncate if necessary
norm_thread_name = (
unicodedata.normalize(
"NFKD", f"{msg.thread.id}_{msg.thread.name}"
)
dupe_counter += 1
# The discord filenames can get too long and throw an OSError
try:
await attachment.save(proposed_path)
except OSError:
await attachment.save(
f"path_too_long_{dupe_counter}.{original_path.split('.')[1]}"
)
# Important: Write the newline after each comment is done
f.write("\n")
.encode("ascii", "ignore")
.decode("ascii")
)
norm_thread_name = re.sub(r"[^\w\s-]", "", norm_thread_name.lower())
norm_thread_name = re.sub(r"[-\s]+", "-", norm_thread_name).strip(
"-_"
)
norm_thread_name = norm_thread_name[:250]
os.makedirs(thread_dir, exist_ok=True)
thread_log_path = os.path.join(
thread_dir, norm_thread_name + ".txt"
)
with open(thread_log_path, "w") as thread_f:
async for t_msg in msg.thread.history(
limit=None, oldest_first=True
):
await self.archive_message(thread_f, t_msg)

text_file_size = f.tell()

ZIP_FILENAME = os.path.join(
Expand Down