Skip to content

Commit

Permalink
add Instagram to video-indir command
Browse files Browse the repository at this point in the history
INFO: tries to log in to a instagram account from env variables: `INSTAGRAM_USERNAME` and `INSTAGRAM_PASSWORD`

WARNING: writes to "/jsons/instagram_session.json" when instagram logs in BE CAREFULL TO KEEP IT A SECRET LIKE A TOKEN

ALSO_UPDATES: gitignore and dockerignore
  • Loading branch information
kytpbs committed Jul 31, 2024
1 parent ad15c9e commit 8767b6e
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@
**/values.dev.yaml
LICENSE
README.md

# Costom ignore
**/downloads
**/.github
birthdays.json
instagram_session.json
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Token.py
OpenAiKey.py
spotify_tokens.py
*.env
/jsons/instagram_session.json
# Any log files
*.log
# Any kind of cache
Expand Down
3 changes: 3 additions & 0 deletions src/downloading_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from src.Youtube import YoutubeDownloader
from src.downloader import VideoDownloader
from src.instagram import InstagramDownloader
from src.twitter import TwitterDownloader

_TWITTER_REGEX = re.compile(r"\b(?:https?:\/\/)?(?:www\.)?(?:twitter\.com\/|t\.co\/|x\.com\/)\S*")
Expand All @@ -16,6 +17,8 @@ def get_downloader(url: str) -> Type[VideoDownloader] | None:
"""
if re.match(_TWITTER_REGEX, url):
return TwitterDownloader
if re.match(_INSTAGRAM_REGEX, url):
return InstagramDownloader
if re.match(_YOUTUBE_REGEX, url):
return YoutubeDownloader
return None
110 changes: 110 additions & 0 deletions src/instagram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import logging
import os
import re
from pathlib import Path

from dotenv import load_dotenv
from instaloader import ConnectionException, LoginException
from instaloader.instaloader import Instaloader
from instaloader.structures import Post

from Constants import JSON_FOLDER
from src.downloader import VideoFile, VideoDownloader
from src.Read import json_read, write_json

_SHORTCODE_REGEX = (
r"^https?:\/\/(?:www\.)?instagram\.com\/[^\/]+(?:\/[^\/]+)?\/([^\/]{11})\/.*$"
)

load_dotenv()

logged_in = False

downloader = Instaloader(
download_videos=True,
download_pictures=False,
save_metadata=False,
download_comments=False,
)


def _login() -> bool:
"""
Loads the session from the json file. or logs in if it doesn't exist.
Safe to call multiple times.
Returns:
bool: True if the session was loaded successfully
"""
if logged_in:
return True

session_path = os.path.join(JSON_FOLDER, "instagram_session.json")
username = os.getenv("INSTAGRAM_USERNAME")
if username is None:
logging.error("INSTAGRAM_USERNAME is not set in the environment variables")
return False
if os.path.exists(session_path):
session_data = json_read(session_path)

downloader.load_session(username, session_data)
return True

password = os.getenv("INSTAGRAM_PASSWORD")
if password is None:
logging.error(
"INSTAGRAM_PASSWORD was not set in the environment variables"
+ " and couldn't find instagram_session file in jsons folder"
)
return False
try:
downloader.login(username, password)
session = downloader.save_session()
write_json(session_path, session)
return True
except LoginException as e:
logging.error("Instagram login failed!!! FIX CREDENTIALS. Error: %s", e)
return False


logged_in = _login()


def _get_post_from_url(url: str) -> Post | None:
result = re.match(_SHORTCODE_REGEX, url)
if result is None:
return None
shortcode = result.group(1)
if not isinstance(shortcode, str):
return None
try:
return Post.from_shortcode(downloader.context, shortcode)
except ConnectionException as e: # probably graphql error
logging.exception(e)
return None


class InstagramDownloader(VideoDownloader):
@staticmethod
def download_video_from_link(url: str, path: str | None = None) -> list[VideoFile]:
attachment_list: list[VideoFile] = []

if path is None:
path = os.path.join("downloads", "instagram")

os.makedirs(path, exist_ok=True)

path = Path(path) # type: ignore

post = _get_post_from_url(url)
if post is None:
return attachment_list
downloader.filename_pattern = "{shortcode}"
file_path = os.path.join(path, f"{post.shortcode}.mp4") # type: ignore # there is a bug in pylance...
file = VideoFile(file_path, post.caption)

if not os.path.exists(file.path):
downloader.download_post(post, path) # type: ignore # path is literally a Path object it cannot be None...

attachment_list.append(file)
return attachment_list

0 comments on commit 8767b6e

Please sign in to comment.