-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the ability to download videos from twitter (#13)
Uses a third part API "twitsave.com" but at this point with the state of the normal twitter API, I cannot be bothered to pay $100 a month just for 1.5k posts per month... Thanks random devs for creating much more easily scrapeable way for this! Currently its in a BETA state, but the latest tests prove that it seems to work correctly for now so time to test in prod
- Loading branch information
Showing
3 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import re | ||
from bs4 import BeautifulSoup | ||
import discord | ||
|
||
TWITTER_ID_REGEX = r"status/(\d+)" | ||
|
||
def convert_paths_to_discord_files(paths: list[str]) -> list[discord.File]: | ||
return [discord.File(path) for path in paths] | ||
|
||
def get_tweet_id(url: str) -> str | None: | ||
match = re.search(TWITTER_ID_REGEX, url) | ||
return match.group(1) if match else None | ||
|
||
def get_filename_from_data(data: BeautifulSoup) -> str: | ||
file_name = data.find_all("div", class_="leading-tight")[0].find_all("p", class_="m-2")[0].text # Video file name | ||
file_name = re.sub(r"[^a-zA-Z0-9]+", ' ', file_name).strip() + ".mp4" # Remove special characters from file name | ||
return file_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import logging | ||
import os | ||
import bs4 | ||
from dotenv import load_dotenv | ||
import requests | ||
|
||
from src.Helpers.twitter_helpers import get_filename_from_data, get_tweet_id | ||
|
||
load_dotenv() | ||
API_URL_START = "https://twitsave.com/info?url=" | ||
|
||
|
||
def download_video_from_tweet(url: str, filename: int | str, path: str | None = None): | ||
""" | ||
Downloads Videos from a twitter tweet, | ||
if path is None, the default path is downloads/twitter | ||
Args: | ||
filename (Tweet): the file name to save the video as | ||
path (str | None, optional): Path to download all the attachments to. Defaults to None. | ||
Returns: | ||
int : count of attachments downloaded | ||
""" | ||
if path is None: | ||
path = os.path.join("downloads", "twitter") | ||
|
||
os.makedirs(path, exist_ok=True) | ||
|
||
try: | ||
response = requests.get(url, timeout=30) | ||
except requests.exceptions.RequestException as e: | ||
logging.error("Error while downloading tweet: %s", str(e)) | ||
return | ||
|
||
filepath = os.path.join( | ||
path, | ||
f"{filename}.mp4", | ||
) | ||
with open( | ||
filepath, | ||
"wb", | ||
) as file: | ||
file.write(response.content) | ||
|
||
return filepath | ||
|
||
|
||
def download_tweets_attachments(url: str, path: str | None = None) -> list[str]: | ||
attachment_list: list[str] = [] | ||
try: | ||
response = requests.get(API_URL_START + url, timeout=30) | ||
except requests.exceptions.RequestException as e: | ||
logging.error("Error while downloading tweet: %s", str(e)) | ||
return attachment_list | ||
data = bs4.BeautifulSoup(response.text, "html.parser") | ||
#TODO: ERROR-HANDLING: Try-Catch for the scraping going wrong | ||
download_button = data.find_all("div", class_="origin-top-right")[0] | ||
quality_buttons = download_button.find_all("a") | ||
highest_quality_url = quality_buttons[0].get("href") # Highest quality video url | ||
tweet_id = get_tweet_id(url) | ||
filename = tweet_id if tweet_id is not None else get_filename_from_data(data) | ||
# TODO: Handle multiple attachments, currenly don't know what happens with multiple attachments | ||
attachment = download_video_from_tweet( | ||
highest_quality_url, filename=filename, path=path | ||
) | ||
|
||
if attachment is None: | ||
return attachment_list | ||
|
||
attachment_list.append(attachment) | ||
|
||
return attachment_list |