Skip to content

Commit

Permalink
Merge pull request #19161 from arash77/add-utf8-encoded-filename
Browse files Browse the repository at this point in the history
Enhance UTF-8 support for filename handling in downloads
  • Loading branch information
mvdbeek authored Nov 20, 2024
2 parents 21ac331 + dfafd7a commit 1615f12
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 10 deletions.
15 changes: 7 additions & 8 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
FILENAME_VALID_CHARS,
inflector,
iter_start_of_line,
to_content_disposition,
unicodify,
UNKNOWN,
)
Expand Down Expand Up @@ -437,7 +438,7 @@ def _serve_raw(
element_identifier=kwd.get("element_identifier"),
filename_pattern=kwd.get("filename_pattern"),
)
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
headers["Content-Disposition"] = to_content_disposition(filename)
return open(dataset.get_file_name(), mode="rb"), headers

def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
Expand Down Expand Up @@ -483,7 +484,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
headers["content-type"] = (
"application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
)
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
headers["Content-Disposition"] = to_content_disposition(filename)
return open(data.get_file_name(), "rb"), headers

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
Expand Down Expand Up @@ -660,16 +661,13 @@ def _download_filename(
element_identifier: Optional[str] = None,
filename_pattern: Optional[str] = None,
) -> str:
def escape(raw_identifier):
return "".join(c in FILENAME_VALID_CHARS and c or "_" for c in raw_identifier)[0:150]

if not to_ext or to_ext == "data":
# If a client requests to_ext with the extension 'data', they are
# deferring to the server, set it based on datatype.
to_ext = dataset.extension

template_values = {
"name": escape(dataset.name),
"name": dataset.name,
"ext": to_ext,
"hid": dataset.hid,
}
Expand All @@ -682,8 +680,9 @@ def escape(raw_identifier):

if hdca is not None:
# Use collection context to build up filename.
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = escape(hdca.name)
if element_identifier is not None:
template_values["element_identifier"] = element_identifier
template_values["hdca_name"] = hdca.name
template_values["hdca_hid"] = hdca.hid

return string.Template(filename_pattern).substitute(**template_values)
Expand Down
9 changes: 9 additions & 0 deletions lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
Union,
)
from urllib.parse import (
quote,
urlencode,
urlparse,
urlsplit,
Expand Down Expand Up @@ -2006,3 +2007,11 @@ def lowercase_alphanum_to_hex(lowercase_alphanum: str) -> str:
import numpy as np

return np.base_repr(int(lowercase_alphanum, 36), 16).lower()


def to_content_disposition(target: str) -> str:
filename, ext = os.path.splitext(target)
character_limit = 255 - len(ext)
sanitized_filename = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in filename)[0:character_limit] + ext
utf8_encoded_filename = quote(re.sub(r'[\/\\\?%*:|"<>]', "_", filename), safe="")[0:character_limit] + ext
return f"attachment; filename=\"{sanitized_filename}\"; filename*=UTF-8''{utf8_encoded_filename}"
4 changes: 2 additions & 2 deletions lib/galaxy/util/zipstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import zipstream

from galaxy.util import to_content_disposition
from .path import safe_walk

CRC32_MIN = 1444
Expand Down Expand Up @@ -41,8 +42,7 @@ def response(self) -> Iterator[bytes]:
def get_headers(self) -> Dict[str, str]:
headers = {}
if self.archive_name:
archive_name = self.archive_name.encode("latin-1", "replace").decode("latin-1")
headers["Content-Disposition"] = f'attachment; filename="{archive_name}.zip"'
headers["Content-Disposition"] = to_content_disposition(f"{self.archive_name}.zip")
if self.upstream_mod_zip:
headers["X-Archive-Files"] = "zip"
else:
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy_test/api/test_dataset_collections.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import zipfile
from io import BytesIO
from typing import List
from urllib.parse import quote

from galaxy.util.unittest_utils import skip_if_github_down
from galaxy_test.base.api_asserts import assert_object_id_error
Expand Down Expand Up @@ -189,6 +190,7 @@ def test_download_non_english_characters(self):
hdca_id = self.dataset_populator.fetch(payload, wait=True).json()["outputs"][0]["id"]
create_response = self._download_dataset_collection(history_id=history_id, hdca_id=hdca_id)
self._assert_status_code_is(create_response, 200)
assert quote(name, safe="") in create_response.headers["Content-Disposition"]

@requires_new_user
def test_hda_security(self):
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy_test/api/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Dict,
List,
)
from urllib.parse import quote

from galaxy.model.unittest_utils.store_fixtures import (
deferred_hda_model_store_dict,
Expand Down Expand Up @@ -897,3 +898,10 @@ def test_cannot_update_datatype_on_immutable_history(self, history_id):
response = self._put(f"histories/{history_id}/contents/{hda_id}", data={"datatype": "tabular"}, json=True)
self._assert_status_code_is(response, 403)
assert response.json()["err_msg"] == "History is immutable"

def test_download_non_english_characters(self, history_id):
name = "دیتاست"
hda = self.dataset_populator.new_dataset(history_id=history_id, name=name, content="data", wait=True)
response = self._get(f"histories/{history_id}/contents/{hda['id']}/display?to_ext=json")
self._assert_status_code_is(response, 200)
assert quote(name, safe="") in response.headers["Content-Disposition"]

0 comments on commit 1615f12

Please sign in to comment.