add Instagram to video-indir command

INFO: tries to log in to a instagram account from env variables: `INSTAGRAM_USERNAME` and `INSTAGRAM_PASSWORD` WARNING: writes to "/jsons/instagram_session.json" when instagram logs in BE CAREFULL TO KEEP IT A SECRET LIKE A TOKEN ALSO_UPDATES: gitignore and dockerignore
kytpbs · Jul 31, 2024 · 8767b6e · 8767b6e
1 parent ad15c9e
commit 8767b6e
Show file tree

Hide file tree

Showing 4 changed files with 120 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -32,3 +32,9 @@
 **/values.dev.yaml
 LICENSE
 README.md
+
+# Costom ignore
+**/downloads
+**/.github
+birthdays.json
+instagram_session.json
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ Token.py
 OpenAiKey.py
 spotify_tokens.py
 *.env
+/jsons/instagram_session.json
 # Any log files
 *.log
 # Any kind of cache

diff --git a/src/downloading_system.py b/src/downloading_system.py
@@ -3,6 +3,7 @@
 
 from src.Youtube import YoutubeDownloader
 from src.downloader import VideoDownloader
+from src.instagram import InstagramDownloader
 from src.twitter import TwitterDownloader
 
 _TWITTER_REGEX = re.compile(r"\b(?:https?:\/\/)?(?:www\.)?(?:twitter\.com\/|t\.co\/|x\.com\/)\S*")
@@ -16,6 +17,8 @@ def get_downloader(url: str) -> Type[VideoDownloader] | None:
     """
     if re.match(_TWITTER_REGEX, url):
         return TwitterDownloader
+    if re.match(_INSTAGRAM_REGEX, url):
+        return InstagramDownloader
     if re.match(_YOUTUBE_REGEX, url):
         return YoutubeDownloader
     return None
diff --git a/src/instagram.py b/src/instagram.py
@@ -0,0 +1,110 @@
+import logging
+import os
+import re
+from pathlib import Path
+
+from dotenv import load_dotenv
+from instaloader import ConnectionException, LoginException
+from instaloader.instaloader import Instaloader
+from instaloader.structures import Post
+
+from Constants import JSON_FOLDER
+from src.downloader import VideoFile, VideoDownloader
+from src.Read import json_read, write_json
+
+_SHORTCODE_REGEX = (
+    r"^https?:\/\/(?:www\.)?instagram\.com\/[^\/]+(?:\/[^\/]+)?\/([^\/]{11})\/.*$"
+)
+
+load_dotenv()
+
+logged_in = False
+
+downloader = Instaloader(
+    download_videos=True,
+    download_pictures=False,
+    save_metadata=False,
+    download_comments=False,
+)
+
+
+def _login() -> bool:
+    """
+    Loads the session from the json file. or logs in if it doesn't exist.
+    Safe to call multiple times.
+
+    Returns:
+        bool: True if the session was loaded successfully
+    """
+    if logged_in:
+        return True
+
+    session_path = os.path.join(JSON_FOLDER, "instagram_session.json")
+    username = os.getenv("INSTAGRAM_USERNAME")
+    if username is None:
+        logging.error("INSTAGRAM_USERNAME is not set in the environment variables")
+        return False
+    if os.path.exists(session_path):
+        session_data = json_read(session_path)
+
+        downloader.load_session(username, session_data)
+        return True
+
+    password = os.getenv("INSTAGRAM_PASSWORD")
+    if password is None:
+        logging.error(
+            "INSTAGRAM_PASSWORD was not set in the environment variables"
+            + " and couldn't find instagram_session file in jsons folder"
+        )
+        return False
+    try:
+        downloader.login(username, password)
+        session = downloader.save_session()
+        write_json(session_path, session)
+        return True
+    except LoginException as e:
+        logging.error("Instagram login failed!!! FIX CREDENTIALS. Error: %s", e)
+        return False
+
+
+logged_in = _login()
+
+
+def _get_post_from_url(url: str) -> Post | None:
+    result = re.match(_SHORTCODE_REGEX, url)
+    if result is None:
+        return None
+    shortcode = result.group(1)
+    if not isinstance(shortcode, str):
+        return None
+    try:
+        return Post.from_shortcode(downloader.context, shortcode)
+    except ConnectionException as e:  # probably graphql error
+        logging.exception(e)
+        return None
+
+
+class InstagramDownloader(VideoDownloader):
+    @staticmethod
+    def download_video_from_link(url: str, path: str | None = None) -> list[VideoFile]:
+        attachment_list: list[VideoFile] = []
+
+        if path is None:
+            path = os.path.join("downloads", "instagram")
+
+        os.makedirs(path, exist_ok=True)
+
+        path = Path(path)  # type: ignore
+
+        post = _get_post_from_url(url)
+        if post is None:
+            return attachment_list
+        downloader.filename_pattern = "{shortcode}"
+        file_path = os.path.join(path, f"{post.shortcode}.mp4")  # type: ignore # there is a bug in pylance...
+        file = VideoFile(file_path, post.caption)
+
+        if not os.path.exists(file.path):
+            downloader.download_post(post, path)  # type: ignore # path is literally a Path object it cannot be None...
+
+        attachment_list.append(file)
+        return attachment_list