Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: cobalt v10 api - parse filename | fix #27 #28

Merged
merged 2 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions lib/save_it/bot.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ defmodule SaveIt.Bot do

alias SmallSdk.Telegram
alias SmallSdk.Cobalt
alias SmallSdk.WebDownloader

@bot :save_it_bot

Expand Down Expand Up @@ -232,7 +233,7 @@ defmodule SaveIt.Bot do
nil ->
update_message(chat.id, progress_message.message_id, Enum.slice(@progress, 0..1))

case FileHelper.download_files(download_urls) do
case WebDownloader.download_files(download_urls) do
{:ok, files} ->
update_message(
chat.id,
Expand All @@ -258,8 +259,6 @@ defmodule SaveIt.Bot do
end

downloaded_files ->
Logger.info("👍 File already downloaded, don't need to download again")

update_message(chat.id, progress_message.message_id, Enum.slice(@progress, 0..2))

# TODO: bot_send_media_group(chat.id, downloaded_files)
Expand All @@ -272,7 +271,7 @@ defmodule SaveIt.Bot do
nil ->
update_message(chat.id, progress_message.message_id, Enum.slice(@progress, 0..1))

case FileHelper.download(download_url) do
case WebDownloader.download_file(download_url) do
{:ok, file_name, file_content} ->
update_message(
chat.id,
Expand All @@ -295,16 +294,14 @@ defmodule SaveIt.Bot do
end

downloaded_file ->
Logger.info("👍 File already downloaded, don't need to download again")

update_message(chat.id, progress_message.message_id, Enum.slice(@progress, 0..2))

bot_send_file(chat.id, downloaded_file, {:file, downloaded_file})
delete_messages(chat.id, [message_id, progress_message.message_id])
end

{:error, msg} ->
update_message(chat.id, progress_message.message_id, msg)
{:error, _} ->
update_message(chat.id, progress_message.message_id, "💔 Failed to get download URL.")
end
end
end
Expand Down
65 changes: 0 additions & 65 deletions lib/save_it/file_helper.ex
Original file line number Diff line number Diff line change
@@ -1,70 +1,9 @@
defmodule SaveIt.FileHelper do
require Logger
use Tesla

@files_dir "./data/storage/files"
@urls_dir "./data/storage/urls"

def download(url) do
cond do
String.contains?(url, "/api/stream") -> download_stream(url)
true -> download_file(url)
end
end

def download_files(urls) do
Logger.info("download_files started, urls: #{inspect(urls)}")

res =
urls
|> Enum.map(&download/1)
|> Enum.reduce_while([], fn
{:ok, file_name, file_content}, acc -> {:cont, [{file_name, file_content} | acc]}
{:error, reason}, _ -> {:halt, {:error, reason}}
end)

{:ok, res}
end

defp download_stream(url) do
Logger.info("download_stream started, url: #{url}")

case get(url) do
{:ok, %Tesla.Env{status: 200, body: body}} ->
file_name = gen_file_name(url) <> ".mp4"
{:ok, file_name, body}

{:ok, %Tesla.Env{status: status}} ->
{:error, "Status: #{status}"}

{:error, reason} ->
{:error, "Reason: #{inspect(reason)}"}
end
end

defp download_file(url) do
Logger.info("download_file started, url: #{url}")

case get(url) do
{:ok, %Tesla.Env{status: 200, body: body, headers: headers}} ->
ext =
headers
|> Enum.find(fn {k, _} -> k == "content-type" end)
|> elem(1)
|> String.split("/")
|> List.last()

file_name = gen_file_name(url) <> "." <> ext
{:ok, file_name, body}

{:ok, %Tesla.Env{status: status}} ->
{:error, "Status: #{status}"}

{:error, reason} ->
{:error, "Reason #{inspect(reason)}"}
end
end

def set_google_drive_folder_id(chat_id, folder_id) do
write_file_to_disk("./data/settings/#{chat_id}", "folder_id.txt", folder_id)
end
Expand Down Expand Up @@ -165,8 +104,4 @@ defmodule SaveIt.FileHelper do
nil
end
end

defp gen_file_name(url) do
:crypto.hash(:sha256, url) |> Base.url_encode64(padding: false)
end
end
26 changes: 10 additions & 16 deletions lib/save_it/google_drive.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
defmodule SaveIt.GoogleDrive do
@moduledoc """
TODO:
TODO:重要:
改善
- [ ] 1. 如果没有配置,直接 skip
- [ ] 移动至 small_sdk

- [ ] list folders
- [ ] select folder and save folder_id
"""
Expand Down Expand Up @@ -117,31 +121,21 @@ defmodule SaveIt.GoogleDrive do
"""
end

defp handle_response(
{:ok,
%Tesla.Env{
status: 200,
body: %{"files" => files}
}}
) do
defp handle_response({:ok, %{status: 200, body: %{"files" => files}}}) do
{:ok, files}
end

defp handle_response({:ok, %Tesla.Env{status: 200, body: body}}) do
Logger.info("Successfully uploaded file to Google Drive")
defp handle_response({:ok, %{status: 200, body: body}}) do
{:ok, body}
end

defp handle_response({:ok, %Tesla.Env{status: status, body: body}}) do
Logger.warning(
"Failed to upload file to Google Drive, status: #{status}, body: #{inspect(body)}"
)

defp handle_response({:ok, %{status: status, body: body}}) do
Logger.warning("Failed at Google Drive, status: #{status}, body: #{inspect(body)}")
{:error, %{status: status, body: body}}
end

defp handle_response({:error, reason}) do
Logger.error("Failed to upload file to Google Drive, reason: #{inspect(reason)}")
Logger.error("Failed at Google Drive, reason: #{inspect(reason)}")
{:error, reason}
end
end
46 changes: 12 additions & 34 deletions lib/small_sdk/cobalt.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,20 @@ defmodule SmallSdk.Cobalt do
req = build_request("/")
res = Req.post(req, json: %{url: url})

body = handle_response(res)

case body do
%{"url" => download_url} ->
case handle_response(res) do
{:ok, %{"url" => download_url}} ->
{:ok, download_url}

%{"status" => "picker", "picker" => picker_items} ->
{:ok, %{"status" => "picker", "picker" => picker_items}} ->
{:ok, url, Enum.map(picker_items, &Map.get(&1, "url"))}

%{"status" => "error", "text" => msg} ->
Logger.warning("response.body is status error, text: #{msg}")
{:error, msg}
{:ok, _} ->
Logger.warning("cobalt response: #{inspect(res)}")
{:error, "Can't get download url using Cobalt API"}

_ ->
Logger.warning("response.body: #{inspect(body)}")
{:error, "inner service error"}
{:error, msg} ->
Logger.error("cobalt error: #{msg}")
{:error, "Can't get download url using Cobalt API"}
end
end

Expand Down Expand Up @@ -53,30 +51,10 @@ defmodule SmallSdk.Cobalt do
def handle_response({:ok, %{status: status, body: body}}) do
case status do
status when status in 200..209 ->
body

400 ->
Logger.warning("Bad Request: #{inspect(body)}")
raise "Bad Request"

401 ->
raise "Unauthorized"

404 ->
nil

409 ->
raise "Conflict"

422 ->
raise "Unprocessable Entity"

503 ->
raise "Service Unavailable"
{:ok, body}

_ ->
Logger.error("Unhandled status code #{status}: #{inspect(body)}")
raise "Unknown error: #{status}"
{:error, "Request failed with status #{status}: #{inspect(body)}"}
end
end

Expand All @@ -91,7 +69,7 @@ defmodule SmallSdk.Cobalt do
body

status ->
Logger.warning("Request failed with status #{status}: #{inspect(body)}")
Logger.error("Request failed with status #{status}: #{inspect(body)}")
raise "Request failed with status #{status}"
end
end
Expand Down
1 change: 1 addition & 0 deletions lib/small_sdk/typesense.ex
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ defmodule SmallSdk.Typesense do
raise "Unprocessable Entity"

503 ->
# TODO: service monitoring, alerting to IM
raise "Service Unavailable"

_ ->
Expand Down
82 changes: 82 additions & 0 deletions lib/small_sdk/web_downloader.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
defmodule SmallSdk.WebDownloader do
require Logger

# FIXME:TODAY return {:ok, file_name, file_content} | {:error, reason}
def download_files(urls) do
Logger.info("download_files started, urls: #{inspect(urls)}")

res =
urls
|> Enum.map(&download_file/1)
|> Enum.reduce_while([], fn
{:ok, filename, file_content}, acc -> {:cont, [{filename, file_content} | acc]}
{:error, reason}, _ -> {:halt, {:error, reason}}
end)

{:ok, res}
end
Comment on lines +5 to +17
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Ensure download_files/1 returns consistent success and error tuples

Currently, download_files/1 may return {:ok, {:error, reason}} when an error occurs during file downloads, which can be confusing. The function should return {:error, reason} in case of an error to maintain consistent return types.

Apply this diff to adjust the return value:

 def download_files(urls) do
    Logger.info("download_files started, urls: #{inspect(urls)}")

    res =
      urls
      |> Enum.map(&download_file/1)
      |> Enum.reduce_while([], fn
        {:ok, filename, file_content}, acc -> {:cont, [{filename, file_content} | acc]}
        {:error, reason}, _ -> {:halt, {:error, reason}}
      end)

-   {:ok, res}
+   case res do
+     {:error, reason} -> {:error, reason}
+     files -> {:ok, files}
+   end
 end
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def download_files(urls) do
Logger.info("download_files started, urls: #{inspect(urls)}")
res =
urls
|> Enum.map(&download_file/1)
|> Enum.reduce_while([], fn
{:ok, filename, file_content}, acc -> {:cont, [{filename, file_content} | acc]}
{:error, reason}, _ -> {:halt, {:error, reason}}
end)
{:ok, res}
end
def download_files(urls) do
Logger.info("download_files started, urls: #{inspect(urls)}")
res =
urls
|> Enum.map(&download_file/1)
|> Enum.reduce_while([], fn
{:ok, filename, file_content}, acc -> {:cont, [{filename, file_content} | acc]}
{:error, reason}, _ -> {:halt, {:error, reason}}
end)
case res do
{:error, reason} -> {:error, reason}
files -> {:ok, files}
end
end


# TODO: have to handle Stream data
def download_file(url) do
Logger.info("download_file started, url: #{url}")

case Req.get(url) do
{:ok, %{status: status, body: ""}} ->
Logger.warning("Downloaded an empty file, status: #{status}")
{:error, "💔 Downloaded an empty file"}

{:ok, %{status: status, body: body, headers: headers}} ->
case status do
status when status in 200..209 ->
filename =
cond do
String.contains?(url, "/tunnel") ->
parse_filename(url, :content_disposition, headers)

true ->
parse_filename(url, :content_type, headers)
end

{:ok, filename, body}

_ ->
Logger.error("download_file failed, status: #{status}, body: #{inspect(body)}")
{:error, "💔 Failed to download file"}
end

{:error, reason} ->
Logger.error("download_file failed, reason: #{inspect(reason)}")
{:error, "💔 Failed to download file"}
end
end
Comment on lines +20 to +51
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Validate URLs to prevent potential security risks

The function download_file/1 accepts URLs and performs HTTP GET requests without validation. This could expose the system to security risks such as Server-Side Request Forgery (SSRF).

Consider adding validation to ensure that URLs are safe and conform to expected patterns, or limit requests to trusted domains.


defp parse_filename(url, :content_type, headers) do
ext =
headers
|> Map.get("content-type")
|> List.first()
|> String.split("/")
|> List.last()

gen_file_name(url) <> "." <> ext
end
Comment on lines +53 to +62
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for missing or malformed 'content-type' headers

The function assumes that the 'content-type' header exists and is properly formatted. If the header is missing or has an unexpected format, it could lead to runtime errors.

Consider adding safeguards to handle such cases safely:

ext =
  headers
  |> Enum.find_value(fn {key, value} -> if key == "content-type", do: value end)
  |> case do
    nil -> "bin"
    content_type ->
      content_type
      |> String.split("/")
      |> List.last()
  end

⚠️ Potential issue

Ensure correct extraction of 'content-type' from headers

In parse_filename/3 when handling :content_type, the headers are being accessed using Map.get/2, but headers is likely a list of tuples, not a map. Using Map.get/2 on a list will not retrieve the header correctly.

To fix this issue, consider using List.keyfind/3 or Enum.find/2 to access headers properly.

Apply this diff:

-   headers
-   |> Map.get("content-type")
+   headers
+   |> Enum.find_value(fn {key, value} -> if key == "content-type", do: value end)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
defp parse_filename(url, :content_type, headers) do
ext =
headers
|> Map.get("content-type")
|> List.first()
|> String.split("/")
|> List.last()
gen_file_name(url) <> "." <> ext
end
defp parse_filename(url, :content_type, headers) do
ext =
headers
|> Enum.find_value(fn {key, value} -> if key == "content-type", do: value end)
|> List.first()
|> String.split("/")
|> List.last()
gen_file_name(url) <> "." <> ext
end


defp parse_filename(_url, :content_disposition, headers) do
filename =
headers
|> Map.get("content-disposition")
|> List.first()
|> String.split(";")
|> Enum.find(fn x -> String.contains?(x, "filename") end)
|> String.split("=")
|> List.last()
|> String.trim()
|> String.replace("\"", "")

filename
end
Comment on lines +64 to +77
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle potential errors when parsing 'content-disposition' header

The function assumes that the 'content-disposition' header exists and follows a specific format. If the header is missing or malformed, it could cause runtime errors.

Add checks or default values to handle cases where the filename cannot be extracted:

filename =
  headers
  |> Enum.find_value(fn {key, value} -> if key == "content-disposition", do: value end)
  |> case do
    nil -> gen_file_name(url)
    content_disposition ->
      content_disposition
      |> String.split(";")
      |> Enum.find(fn x -> String.contains?(x, "filename") end)
      |> case do
        nil -> gen_file_name(url)
        filename_part ->
          filename_part
          |> String.split("=")
          |> List.last()
          |> String.trim("\"")
      end
  end

⚠️ Potential issue

Correct the retrieval of 'content-disposition' header

In parse_filename/3 when handling :content_disposition, headers is being accessed with Map.get/2, but headers is likely a list of tuples. This will not work as intended.

Update the code to properly extract the 'content-disposition' header:

-   headers
-   |> Map.get("content-disposition")
+   headers
+   |> Enum.find_value(fn {key, value} -> if key == "content-disposition", do: value end)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
defp parse_filename(_url, :content_disposition, headers) do
filename =
headers
|> Map.get("content-disposition")
|> List.first()
|> String.split(";")
|> Enum.find(fn x -> String.contains?(x, "filename") end)
|> String.split("=")
|> List.last()
|> String.trim()
|> String.replace("\"", "")
filename
end
defp parse_filename(_url, :content_disposition, headers) do
filename =
headers
|> Enum.find_value(fn {key, value} -> if key == "content-disposition", do: value end)
|> List.first()
|> String.split(";")
|> Enum.find(fn x -> String.contains?(x, "filename") end)
|> String.split("=")
|> List.last()
|> String.trim()
|> String.replace("\"", "")
filename
end


defp gen_file_name(url) do
:crypto.hash(:sha256, url) |> Base.url_encode64(padding: false)
end
end
Loading