From 85613c7fa2642d36d0b48a573bd5a0d1143d0585 Mon Sep 17 00:00:00 2001 From: Thomas Stephens Date: Tue, 16 Jun 2020 11:26:10 -0500 Subject: [PATCH 1/5] Set content type of files uploaded to Google Object Storage by guessing the mimetype of the upload URI. Co-authored-by: Tim Jensen --- storage/google_storage.py | 3 ++- tests/test_google_storage.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/storage/google_storage.py b/storage/google_storage.py index 744b4c4..20e003d 100644 --- a/storage/google_storage.py +++ b/storage/google_storage.py @@ -1,6 +1,7 @@ import base64 import datetime import json +import mimetypes import os import google.cloud.storage @@ -39,7 +40,7 @@ def load_from_filename(self, file_path): def load_from_file(self, in_file): blob = self._get_blob() - blob.upload_from_file(in_file) + blob.upload_from_file(in_file, content_type=mimetypes.guess_type(self._storage_uri)[0]) def delete(self): blob = self._get_blob() diff --git a/tests/test_google_storage.py b/tests/test_google_storage.py index c5e4dd5..418e0f2 100644 --- a/tests/test_google_storage.py +++ b/tests/test_google_storage.py @@ -80,7 +80,16 @@ def test_load_from_file_uploads_blob_from_file_object(self): self.assert_gets_bucket_with_credentials() self.mock_bucket.blob.assert_called_once_with("path/filename") - self.mock_blob.upload_from_file.assert_called_once_with(mock_file) + self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type=None) + + def test_load_from_file_guesses_content_type_based_on_filename(self): + mock_file = mock.Mock() + + storage = get_storage("gs://{}@bucketname/path/whatever.html".format(self.credentials)) + + storage.load_from_file(mock_file) + + self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type="text/html") def test_delete_deletes_blob(self): storage = get_storage("gs://{}@bucketname/path/filename".format(self.credentials)) From a445a4149e1ced1459b7483c05ec2a22c358f9a4 Mon Sep 17 00:00:00 2001 From: Thomas Stephens Date: Tue, 16 Jun 2020 13:26:55 -0500 Subject: [PATCH 2/5] Set content type of files uploaded to S3 by guessing the mimetype of the local filename, or upload URI if there is no local file, to make the behavior consistent with Google Storage Co-authored-by: Tim Jensen --- storage/s3_storage.py | 23 +++++++++++++++--- tests/helpers.py | 11 +++++---- tests/test_s3_storage.py | 52 +++++++++++++++++++++++++++++++++------- 3 files changed, 71 insertions(+), 15 deletions(-) diff --git a/storage/s3_storage.py b/storage/s3_storage.py index 6274804..d4a6469 100644 --- a/storage/s3_storage.py +++ b/storage/s3_storage.py @@ -1,3 +1,4 @@ +import mimetypes import os import urllib import urlparse @@ -70,13 +71,24 @@ def save_to_directory(self, directory_path): def load_from_filename(self, file_path): client = self._connect() + extra_args = None + content_type = mimetypes.guess_type(file_path)[0] + if content_type is not None: + extra_args = {"ContentType": content_type} + transfer = boto3.s3.transfer.S3Transfer(client) - transfer.upload_file(file_path, self._bucket, self._keyname) + transfer.upload_file(file_path, self._bucket, self._keyname, extra_args=extra_args) def load_from_file(self, in_file): client = self._connect() - client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file) + extra_args = {} + + content_type = mimetypes.guess_type(self._storage_uri)[0] + if content_type is not None: + extra_args["ContentType"] = content_type + + client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file, **extra_args) def load_from_directory(self, source_directory): client = self._connect() @@ -86,11 +98,16 @@ def load_from_directory(self, source_directory): for file in files: upload_path = os.path.join(relative_path, file) + extra_args = None + content_type = mimetypes.guess_type(file)[0] + if content_type is not None: + extra_args = {"ContentType": content_type} retry.attempt( client.upload_file, os.path.join(root, file), self._bucket, - upload_path) + upload_path, + ExtraArgs=extra_args) def delete(self): client = self._connect() diff --git a/tests/helpers.py b/tests/helpers.py index 24b2c19..b513b86 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,7 +2,7 @@ import tempfile -def create_temp_nested_directory_with_files(): +def create_temp_nested_directory_with_files(suffixes=["", "", ""]): # temp_directory/ # temp_input_one # temp_input_two @@ -11,7 +11,8 @@ def create_temp_nested_directory_with_files(): temp_dir = {} temp_dir["temp_directory"] = {"path": tempfile.mkdtemp()} temp_dir["temp_input_one"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["temp_directory"]["path"], suffix=suffixes[0])} temp_dir["temp_input_one"]["path"] = temp_dir["temp_input_one"]["file"].name temp_dir["temp_input_one"]["name"] = os.path.basename(temp_dir["temp_input_one"]["file"].name) @@ -19,7 +20,8 @@ def create_temp_nested_directory_with_files(): temp_dir["temp_input_one"]["file"].flush() temp_dir["temp_input_two"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["temp_directory"]["path"], suffix=suffixes[1])} temp_dir["temp_input_two"]["path"] = temp_dir["temp_input_two"]["file"].name temp_dir["temp_input_two"]["name"] = os.path.basename(temp_dir["temp_input_two"]["file"].name) temp_dir["temp_input_two"]["file"].write("BAR") @@ -31,7 +33,8 @@ def create_temp_nested_directory_with_files(): temp_dir["nested_temp_directory"]["path"]) temp_dir["nested_temp_input"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["nested_temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["nested_temp_directory"]["path"], suffix=suffixes[2])} temp_dir["nested_temp_input"]["path"] = temp_dir["nested_temp_input"]["file"].name temp_dir["nested_temp_input"]["name"] = os.path.basename( temp_dir["nested_temp_input"]["file"].name) diff --git a/tests/test_s3_storage.py b/tests/test_s3_storage.py index 25b4a4f..9b7c07b 100644 --- a/tests/test_s3_storage.py +++ b/tests/test_s3_storage.py @@ -52,6 +52,21 @@ def test_load_from_file(self, mock_session_class): mock_s3.put_object.assert_called_with(Bucket="bucket", Key="some/file", Body=mock_file) + @mock.patch("boto3.session.Session", autospec=True) + def test_load_from_file_guesses_content_type_based_on_filename(self, mock_session_class): + mock_session = mock_session_class.return_value + mock_s3 = mock_session.client.return_value + + mock_file = mock.Mock() + + storage = storagelib.get_storage( + "s3://access_key:access_secret@bucket/some/whatever.jpg") + + storage.load_from_file(mock_file) + + mock_s3.put_object.assert_called_with( + Bucket="bucket", Key="some/whatever.jpg", Body=mock_file, ContentType="image/jpeg") + @mock.patch("boto3.s3.transfer.S3Transfer", autospec=True) @mock.patch("boto3.session.Session", autospec=True) def test_load_from_filename(self, mock_session_class, mock_transfer_class): @@ -73,7 +88,22 @@ def test_load_from_filename(self, mock_session_class, mock_transfer_class): mock_transfer_class.assert_called_with(mock_s3) - mock_transfer.upload_file.assert_called_with("source/file", "bucket", "some/file") + mock_transfer.upload_file.assert_called_with( + "source/file", "bucket", "some/file", extra_args=None) + + @mock.patch("boto3.s3.transfer.S3Transfer", autospec=True) + @mock.patch("boto3.session.Session", autospec=True) + def test_load_from_filename_guess_content_type_based_on_filename( + self, mock_session_class, mock_transfer_class): + mock_transfer = mock_transfer_class.return_value + + storage = storagelib.get_storage( + "s3://access_key:access_secret@bucket/some/file") + + storage.load_from_filename("source/whatever.jpg") + + mock_transfer.upload_file.assert_called_with( + "source/whatever.jpg", "bucket", "some/file", extra_args={"ContentType": "image/jpeg"}) @mock.patch("boto3.session.Session", autospec=True) def test_save_to_file(self, mock_session_class): @@ -375,7 +405,7 @@ def test_load_from_directory(self, mock_session_class, mock_transfer_class): mock_session = mock_session_class.return_value mock_s3_client = mock_session.client.return_value - temp_directory = create_temp_nested_directory_with_files() + temp_directory = create_temp_nested_directory_with_files(suffixes=[".js", ".unknown", ""]) storage = storagelib.get_storage( "s3://access_key:access_secret@bucket/dir?region=US_EAST") @@ -385,15 +415,18 @@ def test_load_from_directory(self, mock_session_class, mock_transfer_class): mock_s3_client.upload_file.assert_has_calls([ mock.call( temp_directory["temp_input_two"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_two"]["name"])), + os.path.join("dir", temp_directory["temp_input_two"]["name"]), + ExtraArgs=None), mock.call( temp_directory["temp_input_one"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_one"]["name"])), + os.path.join("dir", temp_directory["temp_input_one"]["name"]), + ExtraArgs={"ContentType": "application/javascript"}), mock.call( temp_directory["nested_temp_input"]["path"], "bucket", os.path.join( "dir", temp_directory["nested_temp_directory"]["name"], - temp_directory["nested_temp_input"]["name"])) + temp_directory["nested_temp_input"]["name"]), + ExtraArgs=None) ], any_order=True) @mock.patch("storage.retry.time.sleep", autospec=True) @@ -423,15 +456,18 @@ def test_load_from_directory_retries_failed_file_uploads( mock_s3_client.upload_file.assert_has_calls([ mock.call( temp_directory["temp_input_two"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_two"]["name"])), + os.path.join("dir", temp_directory["temp_input_two"]["name"]), + ExtraArgs=None), mock.call( temp_directory["temp_input_one"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_one"]["name"])), + os.path.join("dir", temp_directory["temp_input_one"]["name"]), + ExtraArgs=None), mock.call( temp_directory["nested_temp_input"]["path"], "bucket", os.path.join( "dir", temp_directory["nested_temp_directory"]["name"], - temp_directory["nested_temp_input"]["name"])) + temp_directory["nested_temp_input"]["name"]), + ExtraArgs=None) ], any_order=True) self.assertEqual( mock_s3_client.upload_file.call_args_list[1], From eb451b4ab8c462eb0f580eda6699efc98de66854 Mon Sep 17 00:00:00 2001 From: Thomas Stephens Date: Tue, 16 Jun 2020 13:36:04 -0500 Subject: [PATCH 3/5] Fully parse storage URI when reading scheme out of it, for looking up the storage implementation. This prevents the error from having potentially sensitive credentials in it, if the scheme is left off the URI. Co-authored-by: Tim Jensen --- storage/storage.py | 7 +++++-- tests/test_storage.py | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/storage/storage.py b/storage/storage.py index 8fbfa42..53060f1 100644 --- a/storage/storage.py +++ b/storage/storage.py @@ -141,5 +141,8 @@ class InvalidStorageUri(RuntimeError): def get_storage(storage_uri): - storage_type = storage_uri.split("://")[0] - return _STORAGE_TYPES[storage_type](storage_uri) + storage_type = urlparse.urlparse(storage_uri).scheme + try: + return _STORAGE_TYPES[storage_type](storage_uri) + except KeyError: + raise InvalidStorageUri("Invalid storage type '{}'".format(storage_type)) diff --git a/tests/test_storage.py b/tests/test_storage.py index 2a093e7..8d64b8d 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -4,7 +4,7 @@ from unittest import TestCase import storage as storagelib -from storage.storage import Storage +from storage.storage import Storage, InvalidStorageUri class TestTimeout(TestCase): @@ -56,6 +56,26 @@ class MyStorageClass(storagelib.storage.Storage): self.assertIsInstance(store_obj, MyStorageClass) +class TestGetStorage(TestCase): + def test_raises_for_unsupported_scheme(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("unsupported://creds:secret@bucket/path") + + self.assertEqual("Invalid storage type 'unsupported'", str(error.exception)) + + def test_raises_for_missing_scheme(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("//creds:secret@invalid/storage/uri") + + self.assertEqual("Invalid storage type ''", str(error.exception)) + + def test_raises_for_missing_scheme_and_netloc(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("invalid/storage/uri") + + self.assertEqual("Invalid storage type ''", str(error.exception)) + + class TestStorage(TestCase): def test_get_sanitized_uri_removes_username_and_password(self): storage = Storage(storage_uri="https://username:password@bucket/path/filename") From 605392b0d946a71c19cfa5be8a905f30bb62296f Mon Sep 17 00:00:00 2001 From: Thomas Stephens Date: Tue, 16 Jun 2020 13:38:51 -0500 Subject: [PATCH 4/5] Add a file with a mimetype to the integration tests input, to get more code branch coverage (although, note that we don't assert that the mimetype is set correct at the storage end). Co-authored-by: Tim Jensen --- integration-tests.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/integration-tests.py b/integration-tests.py index 389adfd..e4971b8 100644 --- a/integration-tests.py +++ b/integration-tests.py @@ -40,6 +40,12 @@ def setUpClass(cls): os.write(handle, contents) os.close(handle) + handle, _ = tempfile.mkstemp( + prefix="source-mimetyped-file", suffix=".png", + dir=tempfile.mkdtemp(prefix="source-contentdir", dir=cls.directory)) + os.write(handle, contents) + os.close(handle) + @classmethod def tearDownClass(cls): shutil.rmtree(cls.directory) From 1029d62d075e11e38a6e5aaf0c2fd3a7fd39dc56 Mon Sep 17 00:00:00 2001 From: Thomas Stephens Date: Tue, 16 Jun 2020 13:40:03 -0500 Subject: [PATCH 5/5] Bump the version number Co-authored-by: Tim Jensen --- README.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a29d186..f3b513d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Install via pip: pip install object_storage ``` -The current version is `0.12.7`. +The current version is `0.12.8`. ## Quick Start ## diff --git a/setup.py b/setup.py index b05b9d8..b8aeca4 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ ] setup(name="object_storage", - version="0.12.7", + version="0.12.8", description="Python library for accessing files over various file transfer protocols.", url="https://github.com/ustudio/storage", packages=["storage"],