Add the ability to download videos from twitter (#13)

Uses a third part API "twitsave.com" but at this point with the state of the normal twitter API, I cannot be bothered to pay $100 a month just for 1.5k posts per month... Thanks random devs for creating much more easily scrapeable way for this! Currently its in a BETA state, but the latest tests prove that it seems to work correctly for now so time to test in prod
kytpbs · Jul 28, 2024 · e508c11 · e508c11
1 parent cd554a9
commit e508c11
Show file tree

Hide file tree

Showing 3 changed files with 108 additions and 0 deletions.
diff --git a/src/Helpers/twitter_helpers.py b/src/Helpers/twitter_helpers.py
@@ -0,0 +1,17 @@
+import re
+from bs4 import BeautifulSoup
+import discord
+
+TWITTER_ID_REGEX = r"status/(\d+)"
+
+def convert_paths_to_discord_files(paths: list[str]) -> list[discord.File]:
+    return [discord.File(path) for path in paths]
+
+def get_tweet_id(url: str) -> str | None:
+    match = re.search(TWITTER_ID_REGEX, url)
+    return match.group(1) if match else None
+
+def get_filename_from_data(data: BeautifulSoup) -> str:
+    file_name = data.find_all("div", class_="leading-tight")[0].find_all("p", class_="m-2")[0].text # Video file name
+    file_name = re.sub(r"[^a-zA-Z0-9]+", ' ', file_name).strip() + ".mp4" # Remove special characters from file name
+    return file_name
diff --git a/src/commands.py b/src/commands.py
@@ -7,6 +7,8 @@
 
 import src.client as client
 import src.voice_commands as vc_cmds
+from src.twitter import download_tweets_attachments
+from src.Helpers.twitter_helpers import convert_paths_to_discord_files
 from Constants import BOT_ADMIN_SERVER_ID, BOT_OWNER_ID, CYAN, KYTPBS_TAG
 from src import GPT, Youtube
 from src.Helpers.birthday_helpers import get_user_and_date_from_string
@@ -305,6 +307,22 @@ async def ping(interaction: discord.Interaction):
     await interaction.response.send_message(f"Pong: {round(discord_client.latency * 1000)}ms")
 
 
+@tree.command(name="twitter-indir", description="Twitter'dan bir Tweet'i indirir, ve içindeki medyayı gösterir")
+@app_commands.allowed_installs(guilds=True, users=True)
+@app_commands.allowed_contexts(guilds=True, dms=True, private_channels=True)
+async def twitter_download(interaction: discord.Interaction, url: str):
+    await interaction.response.defer(ephemeral=False)
+    #TODO: add better error handling then just catching all exceptions
+    try:
+        attachments = convert_paths_to_discord_files(download_tweets_attachments(url))
+    except Exception as e:
+        await interaction.followup.send("Bir şey ters gitti... lütfen tekrar deneyin", ephemeral=True)
+        raise e # re-raise the exception so we can see what went wrong
+    if interaction.channel is None or isinstance(interaction.channel, (discord.ForumChannel, discord.CategoryChannel)):
+        return
+    await interaction.followup.send(files=attachments)
+
+
 def get_tree_instance():
     return tree
 

diff --git a/src/twitter.py b/src/twitter.py
@@ -0,0 +1,73 @@
+import logging
+import os
+import bs4
+from dotenv import load_dotenv
+import requests
+
+from src.Helpers.twitter_helpers import get_filename_from_data, get_tweet_id
+
+load_dotenv()
+API_URL_START = "https://twitsave.com/info?url="
+
+
+def download_video_from_tweet(url: str, filename: int | str, path: str | None = None):
+    """
+    Downloads Videos from a twitter tweet,
+    if path is None, the default path is downloads/twitter
+
+    Args:
+        filename (Tweet): the file name to save the video as
+        path (str | None, optional): Path to download all the attachments to. Defaults to None.
+
+    Returns:
+        int : count of attachments downloaded
+    """
+    if path is None:
+        path = os.path.join("downloads", "twitter")
+
+    os.makedirs(path, exist_ok=True)
+
+    try:
+        response = requests.get(url, timeout=30)
+    except requests.exceptions.RequestException as e:
+        logging.error("Error while downloading tweet: %s", str(e))
+        return
+
+    filepath = os.path.join(
+        path,
+        f"{filename}.mp4",
+    )
+    with open(
+        filepath,
+        "wb",
+    ) as file:
+        file.write(response.content)
+
+    return filepath
+
+
+def download_tweets_attachments(url: str, path: str | None = None) -> list[str]:
+    attachment_list: list[str] = []
+    try:
+        response = requests.get(API_URL_START + url, timeout=30)
+    except requests.exceptions.RequestException as e:
+        logging.error("Error while downloading tweet: %s", str(e))
+        return attachment_list
+    data = bs4.BeautifulSoup(response.text, "html.parser")
+    #TODO: ERROR-HANDLING: Try-Catch for the scraping going wrong
+    download_button = data.find_all("div", class_="origin-top-right")[0]
+    quality_buttons = download_button.find_all("a")
+    highest_quality_url = quality_buttons[0].get("href")  # Highest quality video url
+    tweet_id = get_tweet_id(url)
+    filename = tweet_id if tweet_id is not None else get_filename_from_data(data)
+    # TODO: Handle multiple attachments, currenly don't know what happens with multiple attachments
+    attachment = download_video_from_tweet(
+        highest_quality_url, filename=filename, path=path
+    )
+
+    if attachment is None:
+        return attachment_list
+
+    attachment_list.append(attachment)
+
+    return attachment_list