Skip to content

Commit

Permalink
Make downloading async for background tasks to run (#15)
Browse files Browse the repository at this point in the history
* turn the download interphase to an async method

also change it to a class method instead of a static one, because we may want to use other class methods. (refactor foreshadowing)

* make `YoutubeDownloader` async

* add download methods for usage by subclasses

This will make async downloads a lot easier if the library gives the download links directly or I webscrape them out

* make `InstagramDownloader` async and refactor

Had to use `asyncio.to_thread` due to downloading videos using a library just like `YoutubeDownloader`

* make `TwitterDownload` async and refactor

Used the super classes download methods to download the files.

* get rid of twitter helpers

* use await on download commands due to async change

* turn download tests to async

* make sure the path exists before downloading in `downloader.py`

THE TESTS FINALLY COUGHT A BUG LESSGOOO!!!
  • Loading branch information
kytpbs authored Aug 21, 2024
1 parent c8bd0f5 commit 963fddf
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 133 deletions.
12 changes: 6 additions & 6 deletions Tests/video_system/instagram/test_instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,26 @@ def remove_all_cache(self):
assert os.path.exists(DOWNLOAD_PATH)
shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)

def test_instagram_single_video_download(self):
videos = InstagramDownloader.download_video_from_link(
async def test_instagram_single_video_download(self):
videos = await InstagramDownloader.download_video_from_link(
TEST_REEL_1, path=DOWNLOAD_PATH
)
should_be_path = os.path.join("Tests", "video_system", "instagram", "should_be_0.mp4")

assert videos, "couldn't download, probably due to graphql error (login wasn't successful)"
self.download_single_video_test(videos, should_be_path)

def test_instagram_download_extra_params(self):
videos = InstagramDownloader.download_video_from_link(
async def test_instagram_download_extra_params(self):
videos = await InstagramDownloader.download_video_from_link(
TEST_REEL_2, path=DOWNLOAD_PATH
)
should_be_path = os.path.join("Tests", "video_system", "instagram", "should_be_0.mp4")

assert videos, "couldn't download, probably due to graphql error (login wasn't successful)"
self.download_single_video_test(videos, should_be_path)

def test_instagram_download_multiple_videos(self):
videos = InstagramDownloader.download_video_from_link(
async def test_instagram_download_multiple_videos(self):
videos = await InstagramDownloader.download_video_from_link(
TEST_REEL_3, path=DOWNLOAD_PATH
)

Expand Down
12 changes: 6 additions & 6 deletions Tests/video_system/twitter/test_twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ def remove_all_cache(self):
assert os.path.exists(DOWNLOAD_PATH)
shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)

def test_twitter_single_video_download(self):
videos = TwitterDownloader.download_video_from_link(
async def test_twitter_single_video_download(self):
videos = await TwitterDownloader.download_video_from_link(
TEST_TWEET_1, path=DOWNLOAD_PATH
)
should_be_path = os.path.join(SHOULD_BE_FILES_PATH, "should_be_0.mp4")

self.download_single_video_test(videos, should_be_path)

def test_twitter_download_extra_params(self):
videos = TwitterDownloader.download_video_from_link(
async def test_twitter_download_extra_params(self):
videos = await TwitterDownloader.download_video_from_link(
TEST_TWEET_2, path=DOWNLOAD_PATH
)
should_be_path = os.path.join(SHOULD_BE_FILES_PATH, "should_be_0.mp4")

self.download_single_video_test(videos, should_be_path)

def test_twitter_download_multiple_videos(self):
videos = TwitterDownloader.download_video_from_link(
async def test_twitter_download_multiple_videos(self):
videos = await TwitterDownloader.download_video_from_link(
TEST_TWEET_3, path=DOWNLOAD_PATH
)

Expand Down
4 changes: 2 additions & 2 deletions Tests/video_system/youtube/test_youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def remove_cache(self):
assert os.path.exists(DOWNLOAD_PATH)
shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)

def test_basic_download(self):
async def test_basic_download(self):
try:
videos = YoutubeDownloader.download_video_from_link(TEST_YOUTUBE_1, DOWNLOAD_PATH)
videos = await YoutubeDownloader.download_video_from_link(TEST_YOUTUBE_1, DOWNLOAD_PATH)
except yt_dlp.DownloadError as e:
assert e.msg
import warnings
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
testpaths =
Tests
pythonpath = .
asyncio_mode = auto
17 changes: 0 additions & 17 deletions src/Helpers/twitter_helpers.py

This file was deleted.

5 changes: 3 additions & 2 deletions src/Youtube.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import functools
import logging
import os
Expand Down Expand Up @@ -95,7 +96,7 @@ def get_last_played_guilded() -> video_data_guild:

class YoutubeDownloader(VideoDownloader):
@staticmethod
def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
async def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
if path is None:
path = os.path.join("downloads", "youtube")

Expand All @@ -111,7 +112,7 @@ def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_
}

with yt_dlp.YoutubeDL(costum_options) as ydl:
ydt = ydl.extract_info(url, download=True)
ydt = await asyncio.to_thread(ydl.extract_info, url, download=True)

if ydt is None:
return []
Expand Down
8 changes: 5 additions & 3 deletions src/download_commands.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import discord

from src.Helpers.twitter_helpers import convert_paths_to_discord_files
from src.downloading_system import get_downloader

def _convert_paths_to_discord_files(paths: list[str]) -> list[discord.File]:
return [discord.File(path) for path in paths]


async def download_video_command(interaction: discord.Interaction, url: str, is_ephemeral: bool = False):
#TODO: add better error handling then just catching all exceptions
Expand All @@ -16,9 +18,9 @@ async def download_video_command(interaction: discord.Interaction, url: str, is_
await interaction.response.defer(ephemeral=is_ephemeral)

try:
attachments = downloader.download_video_from_link(url)
attachments = await downloader.download_video_from_link(url)
file_paths = [attachment.path for attachment in attachments]
discord_files = convert_paths_to_discord_files(file_paths)
discord_files = _convert_paths_to_discord_files(file_paths)
except Exception as e:
await interaction.followup.send("Bir şey ters gitti... lütfen tekrar deneyin", ephemeral=True)
raise e # re-raise the exception so we can see what went wrong
Expand Down
66 changes: 64 additions & 2 deletions src/downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import logging
from abc import ABC, abstractmethod
import os

import aiohttp

_NONE_STRING = "Doesn't exist"

Expand Down Expand Up @@ -43,9 +46,9 @@ class VideoDownloader(ABC):
INTERPHASE FOR DOWNLOADING CONTENT FROM A WEBSITE
"""

@staticmethod
@classmethod
@abstractmethod
def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
async def download_video_from_link(cls, url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
"""
Downloads Videos from a url
if path is None, the default path is downloads/{website_name}
Expand All @@ -58,3 +61,62 @@ def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_
path,
)
return []

@classmethod
async def _download_link(cls, url: str, download_to: str) -> str | None:
"""
Downloads a file from the given URL and saves it to the specified path.
Warning:
This function will not overwrite existing files. If the file already exists at the specified path, it will not be downloaded again.
Args:
url (str): The URL of the file to download.
download_to (str): The local file path where the downloaded file should be saved.
Returns:
downloaded_path (str): The path to the downloaded file if successful, otherwise None.
"""
# if we already downloaded the file before, return it
if os.path.exists(download_to):
return download_to

os.makedirs(os.path.dirname(download_to), exist_ok=True)

try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
response.raise_for_status()
data = await response.read()
with open(download_to, "wb") as file:
file.write(data)
except aiohttp.ClientError as e:
logging.error("Error while downloading instagram post: %s", str(e))
return None
return download_to

@classmethod
async def _download_links(cls, links: list[str], path: str, video_id: str) -> list[str]:
"""Downloads files from a list of URLs and saves them to the specified path.
Will not overwrite existing files. If a file already exists at the specified path, it will not be downloaded again.
Adds _{index} to the filename to avoid overwriting files with the same name.
Args:
links (list[str]): the list of URLs to download, will add _{index} starting at 1 to the filename for every file
path (str): the local file path where the downloaded files should be saved
video_id (str): the video ID to use in the filename, is the same thing as filename
Returns:
list[str]: the list of paths to the downloaded files
will not put the path in the list if the download failed
"""
downloaded_paths = []
for index, link in enumerate(links, start=1):
download_to = os.path.join(path, f"{video_id}_{index}.mp4")
downloaded_link = await cls._download_link(link, download_to)
if downloaded_link is None:
continue
downloaded_paths.append(downloaded_link)

return downloaded_paths
51 changes: 27 additions & 24 deletions src/instagram.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import logging
import os
import re
Expand Down Expand Up @@ -75,64 +76,66 @@ def _login() -> bool:
logging.error("Instagram login failed!!! FIX CREDENTIALS. Error: %s", e)
return False

def _try_login():
global logged_in # pylint: disable=global-statement # don't really mind having a global login
if not logged_in:
logged_in = _login()

logged_in = _login()

_try_login()

def _get_post_from_url(url: str) -> Post | None:

def _get_shortcode_from_url(url: str) -> str | None:
result = re.match(_SHORTCODE_REGEX, url)
if result is None:
return None
shortcode = result.group(1)
if not isinstance(shortcode, str):
return None
return shortcode

def _get_post_from_url(url: str) -> Post | None:
shortcode = _get_shortcode_from_url(url)
if shortcode is None:
return None
try:
return Post.from_shortcode(downloader.context, shortcode)
except ConnectionException as e: # probably graphql error
logging.exception(e)
return None

def _get_file_name(post: Post, index: int) -> str:
is_multi_vid = post.typename == "GraphSidecar"
to_add = f"_{index}" if is_multi_vid else ""
return f"{post.shortcode}{to_add}.mp4"

class InstagramDownloader(VideoDownloader):
@staticmethod
def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
@classmethod
async def download_video_from_link(cls, url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
attachment_list: VIDEO_RETURN_TYPE = []
global logged_in # pylint: disable=global-statement # can't think of a better way rn
logged_in = _login() # retry login if it failed the first time
_try_login() # try to login if not already logged in

if path is None:
path = os.path.join("downloads", "instagram")

os.makedirs(path, exist_ok=True)

path = Path(path) # type: ignore

post = _get_post_from_url(url)
if post is None:
return attachment_list
downloader.filename_pattern = "{shortcode}"

is_video_list = post.get_is_videos()
is_video_list = list(filter(lambda x: x is True, is_video_list))
video_count = len(list(filter(None, post.get_is_videos())))

downloaded: bool = False
if post.typename == "GraphSidecar":
for index, _ in enumerate(is_video_list, start=1):

file_path = os.path.join(path, f"{post.shortcode}_{index}.mp4") # type: ignore # there is a bug in pylance...
file = VideoFile(file_path, post.caption)

if not os.path.exists(file.path) and not downloaded:
downloader.download_post(post, path) # type: ignore # path is literally a Path object it cannot be None...
downloaded = True

attachment_list.append(file)
else:
file_path = os.path.join(path, f"{post.shortcode}.mp4") # type: ignore # there is a bug in pylance...
for index in range(1, video_count + 1): # will run once if not sidecar
file_path = os.path.join(path, _get_file_name(post, index))
file = VideoFile(file_path, post.caption)

if not os.path.exists(file.path) and not downloaded:
downloader.download_post(post, path) # type: ignore # path is literally a Path object it cannot be None...
await asyncio.to_thread(downloader.download_post, post, Path(path))
downloaded = True

attachment_list.append(file)

return attachment_list
Loading

0 comments on commit 963fddf

Please sign in to comment.