Skip to content

Commit

Permalink
fix: export - fallbacks to get_media
Browse files Browse the repository at this point in the history
  • Loading branch information
milovate committed Dec 20, 2024
1 parent cc67f67 commit dcea38b
Showing 1 changed file with 37 additions and 21 deletions.
58 changes: 37 additions & 21 deletions daras_ai_v2/gdrive_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import io

import mimetypes
from furl import furl
import requests

Expand Down Expand Up @@ -73,8 +73,13 @@ def gdrive_download(f: furl, mime_type: str, export_links: dict) -> tuple[bytes,

if f.host != "drive.google.com":
# export google docs to appropriate type
export_mime_type, _ = docs_export_mimetype(f)
if f_url_export := export_links.get(export_mime_type, None):
export_mime_type, _, is_google_workspace_doc = docs_export_mimetype(
f, mime_type
)

if is_google_workspace_doc and (
f_url_export := export_links.get(export_mime_type, None)
):
r = requests.get(f_url_export)
file_bytes = r.content
raise_for_status(r, is_user_url=True)
Expand All @@ -96,33 +101,44 @@ def gdrive_download(f: furl, mime_type: str, export_links: dict) -> tuple[bytes,
return file_bytes, mime_type


def docs_export_mimetype(f: furl) -> tuple[str, str]:
def docs_export_mimetype(f: furl, mime_type) -> tuple[str, str, bool]:
"""
return the mimetype to export google docs - https://developers.google.com/drive/api/guides/ref-export-formats
Args:
f (furl): google docs link
Returns:
tuple[str, str]: (mime_type, extension)
tuple[str, str]: (mime_type, extension, is_google_workspace_supported)
"""
if "document" in f.path.segments:
mime_type = "text/plain"
ext = ".txt"
elif "spreadsheets" in f.path.segments:
mime_type = "text/csv"
ext = ".csv"
elif "presentation" in f.path.segments:
mime_type = (
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
ext = ".pptx"
elif "drawings" in f.path.segments:
mime_type = "application/pdf"
ext = ".pdf"

supported_mimetypes = {
"application/vnd.google-apps.spreadsheet",
"application/vnd.google-apps.presentation",
"application/vnd.google-apps.drawing",
"application/vnd.google-apps.document",
}

is_google_workspace_supported = mime_type in supported_mimetypes

if is_google_workspace_supported:
if "document" in f.path.segments:
mime_type = "text/plain"
ext = ".txt"
elif "spreadsheets" in f.path.segments:
mime_type = "text/csv"
ext = ".csv"
elif "presentation" in f.path.segments:
mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
ext = ".pptx"
elif "drawings" in f.path.segments:
mime_type = "application/pdf"
ext = ".pdf"
else:
raise ValueError(f"Not sure how to export google docs url: {str(f)!r}")
else:
raise ValueError(f"Not sure how to export google docs url: {str(f)!r}")
return mime_type, ext
ext = f".{mimetypes.guess_extension(mime_type)}" or ""
return mime_type, ext, is_google_workspace_supported


def gdrive_metadata(file_id: str) -> dict:
Expand Down

0 comments on commit dcea38b

Please sign in to comment.