Make downloading async for background tasks to run (#15)

* turn the download interphase to an async method also change it to a class method instead of a static one, because we may want to use other class methods. (refactor foreshadowing) * make `YoutubeDownloader` async * add download methods for usage by subclasses This will make async downloads a lot easier if the library gives the download links directly or I webscrape them out * make `InstagramDownloader` async and refactor Had to use `asyncio.to_thread` due to downloading videos using a library just like `YoutubeDownloader` * make `TwitterDownload` async and refactor Used the super classes download methods to download the files. * get rid of twitter helpers * use await on download commands due to async change * turn download tests to async * make sure the path exists before downloading in `downloader.py` THE TESTS FINALLY COUGHT A BUG LESSGOOO!!!
kytpbs · Aug 21, 2024 · 963fddf · 963fddf
1 parent c8bd0f5
commit 963fddf
Show file tree

Hide file tree

Showing 11 changed files with 154 additions and 133 deletions.
diff --git a/Tests/video_system/instagram/test_instagram.py b/Tests/video_system/instagram/test_instagram.py
@@ -27,26 +27,26 @@ def remove_all_cache(self):
         assert os.path.exists(DOWNLOAD_PATH)
         shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)
 
-    def test_instagram_single_video_download(self):
-        videos = InstagramDownloader.download_video_from_link(
+    async def test_instagram_single_video_download(self):
+        videos = await InstagramDownloader.download_video_from_link(
             TEST_REEL_1, path=DOWNLOAD_PATH
         )
         should_be_path = os.path.join("Tests", "video_system", "instagram", "should_be_0.mp4")
 
         assert videos, "couldn't download, probably due to graphql error (login wasn't successful)"
         self.download_single_video_test(videos, should_be_path)
 
-    def test_instagram_download_extra_params(self):
-        videos = InstagramDownloader.download_video_from_link(
+    async def test_instagram_download_extra_params(self):
+        videos = await InstagramDownloader.download_video_from_link(
             TEST_REEL_2, path=DOWNLOAD_PATH
         )
         should_be_path = os.path.join("Tests", "video_system", "instagram", "should_be_0.mp4")
 
         assert videos, "couldn't download, probably due to graphql error (login wasn't successful)"
         self.download_single_video_test(videos, should_be_path)
 
-    def test_instagram_download_multiple_videos(self):
-        videos = InstagramDownloader.download_video_from_link(
+    async def test_instagram_download_multiple_videos(self):
+        videos = await InstagramDownloader.download_video_from_link(
             TEST_REEL_3, path=DOWNLOAD_PATH
         )
 

diff --git a/Tests/video_system/twitter/test_twitter.py b/Tests/video_system/twitter/test_twitter.py
@@ -27,24 +27,24 @@ def remove_all_cache(self):
         assert os.path.exists(DOWNLOAD_PATH)
         shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)
 
-    def test_twitter_single_video_download(self):
-        videos = TwitterDownloader.download_video_from_link(
+    async def test_twitter_single_video_download(self):
+        videos = await TwitterDownloader.download_video_from_link(
             TEST_TWEET_1, path=DOWNLOAD_PATH
         )
         should_be_path = os.path.join(SHOULD_BE_FILES_PATH, "should_be_0.mp4")
 
         self.download_single_video_test(videos, should_be_path)
 
-    def test_twitter_download_extra_params(self):
-        videos = TwitterDownloader.download_video_from_link(
+    async def test_twitter_download_extra_params(self):
+        videos = await TwitterDownloader.download_video_from_link(
             TEST_TWEET_2, path=DOWNLOAD_PATH
         )
         should_be_path = os.path.join(SHOULD_BE_FILES_PATH, "should_be_0.mp4")
 
         self.download_single_video_test(videos, should_be_path)
 
-    def test_twitter_download_multiple_videos(self):
-        videos = TwitterDownloader.download_video_from_link(
+    async def test_twitter_download_multiple_videos(self):
+        videos = await TwitterDownloader.download_video_from_link(
             TEST_TWEET_3, path=DOWNLOAD_PATH
         )
 

diff --git a/Tests/video_system/youtube/test_youtube.py b/Tests/video_system/youtube/test_youtube.py
@@ -23,9 +23,9 @@ def remove_cache(self):
         assert os.path.exists(DOWNLOAD_PATH)
         shutil.rmtree(DOWNLOAD_PATH, ignore_errors=True)
 
-    def test_basic_download(self):
+    async def test_basic_download(self):
         try:
-            videos = YoutubeDownloader.download_video_from_link(TEST_YOUTUBE_1, DOWNLOAD_PATH)
+            videos = await YoutubeDownloader.download_video_from_link(TEST_YOUTUBE_1, DOWNLOAD_PATH)
         except yt_dlp.DownloadError as e:
             assert e.msg
             import warnings

diff --git a/pytest.ini b/pytest.ini
@@ -2,3 +2,4 @@
 testpaths =
     Tests
 pythonpath = .
+asyncio_mode = auto
diff --git a/src/Helpers/twitter_helpers.py b/src/Helpers/twitter_helpers.py
diff --git a/src/Youtube.py b/src/Youtube.py
@@ -1,3 +1,4 @@
+import asyncio
 import functools
 import logging
 import os
@@ -95,7 +96,7 @@ def get_last_played_guilded() -> video_data_guild:
 
 class YoutubeDownloader(VideoDownloader):
     @staticmethod
-    def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
+    async def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
         if path is None:
             path = os.path.join("downloads", "youtube")
 
@@ -111,7 +112,7 @@ def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_
         }
 
         with yt_dlp.YoutubeDL(costum_options) as ydl:
-            ydt = ydl.extract_info(url, download=True)
+            ydt = await asyncio.to_thread(ydl.extract_info, url, download=True)
 
         if ydt is None:
             return []

diff --git a/src/download_commands.py b/src/download_commands.py
@@ -1,9 +1,11 @@
 import logging
 import discord
 
-from src.Helpers.twitter_helpers import convert_paths_to_discord_files
 from src.downloading_system import get_downloader
 
+def _convert_paths_to_discord_files(paths: list[str]) -> list[discord.File]:
+    return [discord.File(path) for path in paths]
+
 
 async def download_video_command(interaction: discord.Interaction, url: str, is_ephemeral: bool = False):
     #TODO: add better error handling then just catching all exceptions
@@ -16,9 +18,9 @@ async def download_video_command(interaction: discord.Interaction, url: str, is_
     await interaction.response.defer(ephemeral=is_ephemeral)
 
     try:
-        attachments = downloader.download_video_from_link(url)
+        attachments = await downloader.download_video_from_link(url)
         file_paths = [attachment.path for attachment in attachments]
-        discord_files = convert_paths_to_discord_files(file_paths)
+        discord_files = _convert_paths_to_discord_files(file_paths)
     except Exception as e:
         await interaction.followup.send("Bir şey ters gitti... lütfen tekrar deneyin", ephemeral=True)
         raise e # re-raise the exception so we can see what went wrong

diff --git a/src/downloader.py b/src/downloader.py
@@ -1,5 +1,8 @@
 import logging
 from abc import ABC, abstractmethod
+import os
+
+import aiohttp
 
 _NONE_STRING = "Doesn't exist"
 
@@ -43,9 +46,9 @@ class VideoDownloader(ABC):
     INTERPHASE FOR DOWNLOADING CONTENT FROM A WEBSITE
     """
 
-    @staticmethod
+    @classmethod
     @abstractmethod
-    def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
+    async def download_video_from_link(cls, url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
         """
         Downloads Videos from a url
         if path is None, the default path is downloads/{website_name}
@@ -58,3 +61,62 @@ def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_
             path,
         )
         return []
+
+    @classmethod
+    async def _download_link(cls, url: str, download_to: str) -> str | None:
+        """
+        Downloads a file from the given URL and saves it to the specified path.
+        
+        
+        Warning:
+            This function will not overwrite existing files. If the file already exists at the specified path, it will not be downloaded again.
+
+        Args:
+            url (str): The URL of the file to download.
+            download_to (str): The local file path where the downloaded file should be saved.
+
+        Returns:
+            downloaded_path (str): The path to the downloaded file if successful, otherwise None.
+        """
+        # if we already downloaded the file before, return it
+        if os.path.exists(download_to):
+            return download_to
+
+        os.makedirs(os.path.dirname(download_to), exist_ok=True)
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url) as response:
+                    response.raise_for_status()
+                    data = await response.read()
+                    with open(download_to, "wb") as file:
+                        file.write(data)
+        except aiohttp.ClientError as e:
+            logging.error("Error while downloading instagram post: %s", str(e))
+            return None
+        return download_to
+
+    @classmethod
+    async def _download_links(cls, links: list[str], path: str, video_id: str) -> list[str]:
+        """Downloads files from a list of URLs and saves them to the specified path.
+        Will not overwrite existing files. If a file already exists at the specified path, it will not be downloaded again.
+        Adds _{index} to the filename to avoid overwriting files with the same name.
+
+        Args:
+            links (list[str]): the list of URLs to download, will add _{index} starting at 1 to the filename for every file
+            path (str): the local file path where the downloaded files should be saved
+            video_id (str): the video ID to use in the filename, is the same thing as filename
+
+        Returns:
+            list[str]: the list of paths to the downloaded files
+            will not put the path in the list if the download failed
+        """
+        downloaded_paths = []
+        for index, link in enumerate(links, start=1):
+            download_to = os.path.join(path, f"{video_id}_{index}.mp4")
+            downloaded_link = await cls._download_link(link, download_to)
+            if downloaded_link is None:
+                continue
+            downloaded_paths.append(downloaded_link)
+
+        return downloaded_paths
diff --git a/src/instagram.py b/src/instagram.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 import os
 import re
@@ -75,64 +76,66 @@ def _login() -> bool:
         logging.error("Instagram login failed!!! FIX CREDENTIALS. Error: %s", e)
         return False
 
+def _try_login():
+    global logged_in # pylint: disable=global-statement # don't really mind having a global login
+    if not logged_in:
+        logged_in = _login()
 
-logged_in = _login()
 
+_try_login()
 
-def _get_post_from_url(url: str) -> Post | None:
+
+def _get_shortcode_from_url(url: str) -> str | None:
     result = re.match(_SHORTCODE_REGEX, url)
     if result is None:
         return None
     shortcode = result.group(1)
     if not isinstance(shortcode, str):
         return None
+    return shortcode
+
+def _get_post_from_url(url: str) -> Post | None:
+    shortcode = _get_shortcode_from_url(url)
+    if shortcode is None:
+        return None
     try:
         return Post.from_shortcode(downloader.context, shortcode)
     except ConnectionException as e:  # probably graphql error
         logging.exception(e)
         return None
 
+def _get_file_name(post: Post, index: int) -> str:
+    is_multi_vid = post.typename == "GraphSidecar"
+    to_add = f"_{index}" if is_multi_vid else ""
+    return f"{post.shortcode}{to_add}.mp4"
 
 class InstagramDownloader(VideoDownloader):
-    @staticmethod
-    def download_video_from_link(url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
+    @classmethod
+    async def download_video_from_link(cls, url: str, path: str | None = None) -> VIDEO_RETURN_TYPE:
         attachment_list: VIDEO_RETURN_TYPE = []
-        global logged_in  # pylint: disable=global-statement # can't think of a better way rn
-        logged_in = _login()  # retry login if it failed the first time
+        _try_login() # try to login if not already logged in
 
         if path is None:
             path = os.path.join("downloads", "instagram")
 
         os.makedirs(path, exist_ok=True)
 
-        path = Path(path)  # type: ignore
-
         post = _get_post_from_url(url)
         if post is None:
             return attachment_list
         downloader.filename_pattern = "{shortcode}"
 
-        is_video_list = post.get_is_videos()
-        is_video_list = list(filter(lambda x: x is True, is_video_list))
+        video_count = len(list(filter(None, post.get_is_videos())))
 
         downloaded: bool = False
-        if post.typename == "GraphSidecar":
-            for index, _ in enumerate(is_video_list, start=1):
-
-                file_path = os.path.join(path, f"{post.shortcode}_{index}.mp4")  # type: ignore # there is a bug in pylance...
-                file = VideoFile(file_path, post.caption)
-
-                if not os.path.exists(file.path) and not downloaded:
-                    downloader.download_post(post, path)  # type: ignore # path is literally a Path object it cannot be None...
-                    downloaded = True
-
-                attachment_list.append(file)
-        else:
-            file_path = os.path.join(path, f"{post.shortcode}.mp4") # type: ignore # there is a bug in pylance...
+        for index in range(1, video_count + 1): # will run once if not sidecar
+            file_path = os.path.join(path, _get_file_name(post, index))
             file = VideoFile(file_path, post.caption)
+
             if not os.path.exists(file.path) and not downloaded:
-                downloader.download_post(post, path) # type: ignore # path is literally a Path object it cannot be None...
+                await asyncio.to_thread(downloader.download_post, post, Path(path))
                 downloaded = True
+
             attachment_list.append(file)
 
         return attachment_list