Skip to content

Commit

Permalink
Add the ability to download videos from twitter (#13)
Browse files Browse the repository at this point in the history
Uses a third part API "twitsave.com" but at this point with the state of the normal twitter API, I cannot be bothered to pay $100 a month just for 1.5k posts per month...

Thanks random devs for creating much more easily scrapeable way for this!

Currently its in a BETA state, but the latest tests prove that it seems to work correctly for now so time to test in prod
  • Loading branch information
kytpbs authored Jul 28, 2024
1 parent cd554a9 commit e508c11
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/Helpers/twitter_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import re
from bs4 import BeautifulSoup
import discord

TWITTER_ID_REGEX = r"status/(\d+)"

def convert_paths_to_discord_files(paths: list[str]) -> list[discord.File]:
return [discord.File(path) for path in paths]

def get_tweet_id(url: str) -> str | None:
match = re.search(TWITTER_ID_REGEX, url)
return match.group(1) if match else None

def get_filename_from_data(data: BeautifulSoup) -> str:
file_name = data.find_all("div", class_="leading-tight")[0].find_all("p", class_="m-2")[0].text # Video file name
file_name = re.sub(r"[^a-zA-Z0-9]+", ' ', file_name).strip() + ".mp4" # Remove special characters from file name
return file_name
18 changes: 18 additions & 0 deletions src/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import src.client as client
import src.voice_commands as vc_cmds
from src.twitter import download_tweets_attachments
from src.Helpers.twitter_helpers import convert_paths_to_discord_files
from Constants import BOT_ADMIN_SERVER_ID, BOT_OWNER_ID, CYAN, KYTPBS_TAG
from src import GPT, Youtube
from src.Helpers.birthday_helpers import get_user_and_date_from_string
Expand Down Expand Up @@ -305,6 +307,22 @@ async def ping(interaction: discord.Interaction):
await interaction.response.send_message(f"Pong: {round(discord_client.latency * 1000)}ms")


@tree.command(name="twitter-indir", description="Twitter'dan bir Tweet'i indirir, ve içindeki medyayı gösterir")
@app_commands.allowed_installs(guilds=True, users=True)
@app_commands.allowed_contexts(guilds=True, dms=True, private_channels=True)
async def twitter_download(interaction: discord.Interaction, url: str):
await interaction.response.defer(ephemeral=False)
#TODO: add better error handling then just catching all exceptions
try:
attachments = convert_paths_to_discord_files(download_tweets_attachments(url))
except Exception as e:
await interaction.followup.send("Bir şey ters gitti... lütfen tekrar deneyin", ephemeral=True)
raise e # re-raise the exception so we can see what went wrong
if interaction.channel is None or isinstance(interaction.channel, (discord.ForumChannel, discord.CategoryChannel)):
return
await interaction.followup.send(files=attachments)


def get_tree_instance():
return tree

Expand Down
73 changes: 73 additions & 0 deletions src/twitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging
import os
import bs4
from dotenv import load_dotenv
import requests

from src.Helpers.twitter_helpers import get_filename_from_data, get_tweet_id

load_dotenv()
API_URL_START = "https://twitsave.com/info?url="


def download_video_from_tweet(url: str, filename: int | str, path: str | None = None):
"""
Downloads Videos from a twitter tweet,
if path is None, the default path is downloads/twitter
Args:
filename (Tweet): the file name to save the video as
path (str | None, optional): Path to download all the attachments to. Defaults to None.
Returns:
int : count of attachments downloaded
"""
if path is None:
path = os.path.join("downloads", "twitter")

os.makedirs(path, exist_ok=True)

try:
response = requests.get(url, timeout=30)
except requests.exceptions.RequestException as e:
logging.error("Error while downloading tweet: %s", str(e))
return

filepath = os.path.join(
path,
f"{filename}.mp4",
)
with open(
filepath,
"wb",
) as file:
file.write(response.content)

return filepath


def download_tweets_attachments(url: str, path: str | None = None) -> list[str]:
attachment_list: list[str] = []
try:
response = requests.get(API_URL_START + url, timeout=30)
except requests.exceptions.RequestException as e:
logging.error("Error while downloading tweet: %s", str(e))
return attachment_list
data = bs4.BeautifulSoup(response.text, "html.parser")
#TODO: ERROR-HANDLING: Try-Catch for the scraping going wrong
download_button = data.find_all("div", class_="origin-top-right")[0]
quality_buttons = download_button.find_all("a")
highest_quality_url = quality_buttons[0].get("href") # Highest quality video url
tweet_id = get_tweet_id(url)
filename = tweet_id if tweet_id is not None else get_filename_from_data(data)
# TODO: Handle multiple attachments, currenly don't know what happens with multiple attachments
attachment = download_video_from_tweet(
highest_quality_url, filename=filename, path=path
)

if attachment is None:
return attachment_list

attachment_list.append(attachment)

return attachment_list

0 comments on commit e508c11

Please sign in to comment.