diff --git a/README.md b/README.md index a29d186..f3b513d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Install via pip: pip install object_storage ``` -The current version is `0.12.7`. +The current version is `0.12.8`. ## Quick Start ## diff --git a/integration-tests.py b/integration-tests.py index 389adfd..e4971b8 100644 --- a/integration-tests.py +++ b/integration-tests.py @@ -40,6 +40,12 @@ def setUpClass(cls): os.write(handle, contents) os.close(handle) + handle, _ = tempfile.mkstemp( + prefix="source-mimetyped-file", suffix=".png", + dir=tempfile.mkdtemp(prefix="source-contentdir", dir=cls.directory)) + os.write(handle, contents) + os.close(handle) + @classmethod def tearDownClass(cls): shutil.rmtree(cls.directory) diff --git a/setup.py b/setup.py index b05b9d8..b8aeca4 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ ] setup(name="object_storage", - version="0.12.7", + version="0.12.8", description="Python library for accessing files over various file transfer protocols.", url="https://github.com/ustudio/storage", packages=["storage"], diff --git a/storage/google_storage.py b/storage/google_storage.py index 744b4c4..20e003d 100644 --- a/storage/google_storage.py +++ b/storage/google_storage.py @@ -1,6 +1,7 @@ import base64 import datetime import json +import mimetypes import os import google.cloud.storage @@ -39,7 +40,7 @@ def load_from_filename(self, file_path): def load_from_file(self, in_file): blob = self._get_blob() - blob.upload_from_file(in_file) + blob.upload_from_file(in_file, content_type=mimetypes.guess_type(self._storage_uri)[0]) def delete(self): blob = self._get_blob() diff --git a/storage/s3_storage.py b/storage/s3_storage.py index 6274804..d4a6469 100644 --- a/storage/s3_storage.py +++ b/storage/s3_storage.py @@ -1,3 +1,4 @@ +import mimetypes import os import urllib import urlparse @@ -70,13 +71,24 @@ def save_to_directory(self, directory_path): def load_from_filename(self, file_path): client = self._connect() + extra_args = None + content_type = mimetypes.guess_type(file_path)[0] + if content_type is not None: + extra_args = {"ContentType": content_type} + transfer = boto3.s3.transfer.S3Transfer(client) - transfer.upload_file(file_path, self._bucket, self._keyname) + transfer.upload_file(file_path, self._bucket, self._keyname, extra_args=extra_args) def load_from_file(self, in_file): client = self._connect() - client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file) + extra_args = {} + + content_type = mimetypes.guess_type(self._storage_uri)[0] + if content_type is not None: + extra_args["ContentType"] = content_type + + client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file, **extra_args) def load_from_directory(self, source_directory): client = self._connect() @@ -86,11 +98,16 @@ def load_from_directory(self, source_directory): for file in files: upload_path = os.path.join(relative_path, file) + extra_args = None + content_type = mimetypes.guess_type(file)[0] + if content_type is not None: + extra_args = {"ContentType": content_type} retry.attempt( client.upload_file, os.path.join(root, file), self._bucket, - upload_path) + upload_path, + ExtraArgs=extra_args) def delete(self): client = self._connect() diff --git a/storage/storage.py b/storage/storage.py index 8fbfa42..53060f1 100644 --- a/storage/storage.py +++ b/storage/storage.py @@ -141,5 +141,8 @@ class InvalidStorageUri(RuntimeError): def get_storage(storage_uri): - storage_type = storage_uri.split("://")[0] - return _STORAGE_TYPES[storage_type](storage_uri) + storage_type = urlparse.urlparse(storage_uri).scheme + try: + return _STORAGE_TYPES[storage_type](storage_uri) + except KeyError: + raise InvalidStorageUri("Invalid storage type '{}'".format(storage_type)) diff --git a/tests/helpers.py b/tests/helpers.py index 24b2c19..b513b86 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,7 +2,7 @@ import tempfile -def create_temp_nested_directory_with_files(): +def create_temp_nested_directory_with_files(suffixes=["", "", ""]): # temp_directory/ # temp_input_one # temp_input_two @@ -11,7 +11,8 @@ def create_temp_nested_directory_with_files(): temp_dir = {} temp_dir["temp_directory"] = {"path": tempfile.mkdtemp()} temp_dir["temp_input_one"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["temp_directory"]["path"], suffix=suffixes[0])} temp_dir["temp_input_one"]["path"] = temp_dir["temp_input_one"]["file"].name temp_dir["temp_input_one"]["name"] = os.path.basename(temp_dir["temp_input_one"]["file"].name) @@ -19,7 +20,8 @@ def create_temp_nested_directory_with_files(): temp_dir["temp_input_one"]["file"].flush() temp_dir["temp_input_two"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["temp_directory"]["path"], suffix=suffixes[1])} temp_dir["temp_input_two"]["path"] = temp_dir["temp_input_two"]["file"].name temp_dir["temp_input_two"]["name"] = os.path.basename(temp_dir["temp_input_two"]["file"].name) temp_dir["temp_input_two"]["file"].write("BAR") @@ -31,7 +33,8 @@ def create_temp_nested_directory_with_files(): temp_dir["nested_temp_directory"]["path"]) temp_dir["nested_temp_input"] = { - "file": tempfile.NamedTemporaryFile(dir=temp_dir["nested_temp_directory"]["path"])} + "file": tempfile.NamedTemporaryFile( + dir=temp_dir["nested_temp_directory"]["path"], suffix=suffixes[2])} temp_dir["nested_temp_input"]["path"] = temp_dir["nested_temp_input"]["file"].name temp_dir["nested_temp_input"]["name"] = os.path.basename( temp_dir["nested_temp_input"]["file"].name) diff --git a/tests/test_google_storage.py b/tests/test_google_storage.py index c5e4dd5..418e0f2 100644 --- a/tests/test_google_storage.py +++ b/tests/test_google_storage.py @@ -80,7 +80,16 @@ def test_load_from_file_uploads_blob_from_file_object(self): self.assert_gets_bucket_with_credentials() self.mock_bucket.blob.assert_called_once_with("path/filename") - self.mock_blob.upload_from_file.assert_called_once_with(mock_file) + self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type=None) + + def test_load_from_file_guesses_content_type_based_on_filename(self): + mock_file = mock.Mock() + + storage = get_storage("gs://{}@bucketname/path/whatever.html".format(self.credentials)) + + storage.load_from_file(mock_file) + + self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type="text/html") def test_delete_deletes_blob(self): storage = get_storage("gs://{}@bucketname/path/filename".format(self.credentials)) diff --git a/tests/test_s3_storage.py b/tests/test_s3_storage.py index 25b4a4f..9b7c07b 100644 --- a/tests/test_s3_storage.py +++ b/tests/test_s3_storage.py @@ -52,6 +52,21 @@ def test_load_from_file(self, mock_session_class): mock_s3.put_object.assert_called_with(Bucket="bucket", Key="some/file", Body=mock_file) + @mock.patch("boto3.session.Session", autospec=True) + def test_load_from_file_guesses_content_type_based_on_filename(self, mock_session_class): + mock_session = mock_session_class.return_value + mock_s3 = mock_session.client.return_value + + mock_file = mock.Mock() + + storage = storagelib.get_storage( + "s3://access_key:access_secret@bucket/some/whatever.jpg") + + storage.load_from_file(mock_file) + + mock_s3.put_object.assert_called_with( + Bucket="bucket", Key="some/whatever.jpg", Body=mock_file, ContentType="image/jpeg") + @mock.patch("boto3.s3.transfer.S3Transfer", autospec=True) @mock.patch("boto3.session.Session", autospec=True) def test_load_from_filename(self, mock_session_class, mock_transfer_class): @@ -73,7 +88,22 @@ def test_load_from_filename(self, mock_session_class, mock_transfer_class): mock_transfer_class.assert_called_with(mock_s3) - mock_transfer.upload_file.assert_called_with("source/file", "bucket", "some/file") + mock_transfer.upload_file.assert_called_with( + "source/file", "bucket", "some/file", extra_args=None) + + @mock.patch("boto3.s3.transfer.S3Transfer", autospec=True) + @mock.patch("boto3.session.Session", autospec=True) + def test_load_from_filename_guess_content_type_based_on_filename( + self, mock_session_class, mock_transfer_class): + mock_transfer = mock_transfer_class.return_value + + storage = storagelib.get_storage( + "s3://access_key:access_secret@bucket/some/file") + + storage.load_from_filename("source/whatever.jpg") + + mock_transfer.upload_file.assert_called_with( + "source/whatever.jpg", "bucket", "some/file", extra_args={"ContentType": "image/jpeg"}) @mock.patch("boto3.session.Session", autospec=True) def test_save_to_file(self, mock_session_class): @@ -375,7 +405,7 @@ def test_load_from_directory(self, mock_session_class, mock_transfer_class): mock_session = mock_session_class.return_value mock_s3_client = mock_session.client.return_value - temp_directory = create_temp_nested_directory_with_files() + temp_directory = create_temp_nested_directory_with_files(suffixes=[".js", ".unknown", ""]) storage = storagelib.get_storage( "s3://access_key:access_secret@bucket/dir?region=US_EAST") @@ -385,15 +415,18 @@ def test_load_from_directory(self, mock_session_class, mock_transfer_class): mock_s3_client.upload_file.assert_has_calls([ mock.call( temp_directory["temp_input_two"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_two"]["name"])), + os.path.join("dir", temp_directory["temp_input_two"]["name"]), + ExtraArgs=None), mock.call( temp_directory["temp_input_one"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_one"]["name"])), + os.path.join("dir", temp_directory["temp_input_one"]["name"]), + ExtraArgs={"ContentType": "application/javascript"}), mock.call( temp_directory["nested_temp_input"]["path"], "bucket", os.path.join( "dir", temp_directory["nested_temp_directory"]["name"], - temp_directory["nested_temp_input"]["name"])) + temp_directory["nested_temp_input"]["name"]), + ExtraArgs=None) ], any_order=True) @mock.patch("storage.retry.time.sleep", autospec=True) @@ -423,15 +456,18 @@ def test_load_from_directory_retries_failed_file_uploads( mock_s3_client.upload_file.assert_has_calls([ mock.call( temp_directory["temp_input_two"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_two"]["name"])), + os.path.join("dir", temp_directory["temp_input_two"]["name"]), + ExtraArgs=None), mock.call( temp_directory["temp_input_one"]["path"], "bucket", - os.path.join("dir", temp_directory["temp_input_one"]["name"])), + os.path.join("dir", temp_directory["temp_input_one"]["name"]), + ExtraArgs=None), mock.call( temp_directory["nested_temp_input"]["path"], "bucket", os.path.join( "dir", temp_directory["nested_temp_directory"]["name"], - temp_directory["nested_temp_input"]["name"])) + temp_directory["nested_temp_input"]["name"]), + ExtraArgs=None) ], any_order=True) self.assertEqual( mock_s3_client.upload_file.call_args_list[1], diff --git a/tests/test_storage.py b/tests/test_storage.py index 2a093e7..8d64b8d 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -4,7 +4,7 @@ from unittest import TestCase import storage as storagelib -from storage.storage import Storage +from storage.storage import Storage, InvalidStorageUri class TestTimeout(TestCase): @@ -56,6 +56,26 @@ class MyStorageClass(storagelib.storage.Storage): self.assertIsInstance(store_obj, MyStorageClass) +class TestGetStorage(TestCase): + def test_raises_for_unsupported_scheme(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("unsupported://creds:secret@bucket/path") + + self.assertEqual("Invalid storage type 'unsupported'", str(error.exception)) + + def test_raises_for_missing_scheme(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("//creds:secret@invalid/storage/uri") + + self.assertEqual("Invalid storage type ''", str(error.exception)) + + def test_raises_for_missing_scheme_and_netloc(self): + with self.assertRaises(InvalidStorageUri) as error: + storagelib.get_storage("invalid/storage/uri") + + self.assertEqual("Invalid storage type ''", str(error.exception)) + + class TestStorage(TestCase): def test_get_sanitized_uri_removes_username_and_password(self): storage = Storage(storage_uri="https://username:password@bucket/path/filename")