Skip to content

Commit

Permalink
Merge pull request #56 from ustudio/correctly-set-content-type-with-g…
Browse files Browse the repository at this point in the history
…oogle-storage

Correctly Set Content Type with Google Storage and Amazon S3
  • Loading branch information
spiralman authored Jun 17, 2020
2 parents 28349fb + 1ac35ca commit 8c88537
Show file tree
Hide file tree
Showing 16 changed files with 135 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
name: run typechecks
command: |
. ~/venv/bin/activate
MYPYPATH=stubs mypy --strict storage/ tests/
mypy
- store_artifacts:
path: test-reports
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ Install via pip:
pip install object_storage
```

The current version is `0.14.0`.
The current Python 2.7 version is `0.12.7`
The current version is `0.14.1`.
For Python 2.7, use the latest release from the `v0.12` branch.

## Quick Start ##

Expand Down
2 changes: 2 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ warn_redundant_casts = True
warn_unused_ignores = True
warn_return_any = True
implicit_reexport = False
mypy_path = stubs
files = storage/,tests/
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
filterwarnings =
error
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
atomicwrites==1.3.0
attrs==19.1.0
boto3==1.9.213
botocore==1.12.213
boto3==1.14.3
botocore==1.17.3
cachetools==3.1.1
certifi==2019.6.16
chardet==3.0.4
Expand All @@ -28,7 +28,7 @@ os-service-types==1.7.0
packaging==19.1
pbr==5.4.2
pluggy==0.12.0
protobuf==3.9.1
protobuf==3.12.2
py==1.8.0
pyasn1==0.4.6
pyasn1-modules==0.2.6
Expand All @@ -41,11 +41,11 @@ python-swiftclient==3.8.0
pytz==2019.2
requests==2.22.0
rsa==4.0
s3transfer==0.2.1
s3transfer==0.3.3
six==1.12.0
stevedore==1.30.1
typed-ast==1.4.0
typing-extensions==3.7.4
urllib3==1.25.3
urllib3==1.25.9
wcwidth==0.1.7
zipp==0.5.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
]

setup(name="object_storage",
version="0.14.0",
version="0.14.1",
description="Python library for accessing files over various file transfer protocols.",
url="https://github.com/ustudio/storage",
packages=["storage"],
Expand Down
5 changes: 4 additions & 1 deletion storage/google_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
import datetime
import json
import mimetypes
import os

import google.cloud.storage.client
Expand Down Expand Up @@ -45,7 +46,9 @@ def load_from_filename(self, file_path: str) -> None:

def load_from_file(self, in_file: BinaryIO) -> None:
blob = self._get_blob()
blob.upload_from_file(in_file)
blob.upload_from_file(
in_file,
content_type=mimetypes.guess_type(self._storage_uri)[0])

def delete(self) -> None:
blob = self._get_blob()
Expand Down
25 changes: 21 additions & 4 deletions storage/s3_storage.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import mimetypes
import os
from urllib.parse import parse_qs, unquote

import boto3.session
import boto3.s3.transfer
from botocore.session import Session

from typing import BinaryIO, Optional
from typing import BinaryIO, Dict, Optional

from storage import retry
from storage.storage import Storage, register_storage_protocol, _LARGE_CHUNK
Expand Down Expand Up @@ -73,13 +74,24 @@ def save_to_directory(self, directory_path: str) -> None:
def load_from_filename(self, file_path: str) -> None:
client = self._connect()

extra_args = None
content_type = mimetypes.guess_type(file_path)[0]
if content_type is not None:
extra_args = {"ContentType": content_type}

transfer = boto3.s3.transfer.S3Transfer(client)
transfer.upload_file(file_path, self._bucket, self._keyname)
transfer.upload_file(file_path, self._bucket, self._keyname, extra_args=extra_args)

def load_from_file(self, in_file: BinaryIO) -> None:
client = self._connect()

client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file)
extra_args: Dict[str, str] = {}

content_type = mimetypes.guess_type(self._storage_uri)[0]
if content_type is not None:
extra_args["ContentType"] = content_type

client.put_object(Bucket=self._bucket, Key=self._keyname, Body=in_file, **extra_args)

def load_from_directory(self, source_directory: str) -> None:
client = self._connect()
Expand All @@ -89,8 +101,13 @@ def load_from_directory(self, source_directory: str) -> None:

for filename in files:
upload_path = os.path.join(relative_path, filename)
extra_args = None
content_type = mimetypes.guess_type(filename)[0]
if content_type is not None:
extra_args = {"ContentType": content_type}
retry.attempt(
client.upload_file, os.path.join(root, filename), self._bucket, upload_path)
client.upload_file, os.path.join(root, filename), self._bucket, upload_path,
ExtraArgs=extra_args)

def delete(self) -> None:
client = self._connect()
Expand Down
7 changes: 5 additions & 2 deletions storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,11 @@ class InvalidStorageUri(RuntimeError):


def get_storage(storage_uri: str) -> Storage:
storage_type = storage_uri.split("://")[0]
return _STORAGE_TYPES[storage_type](storage_uri)
storage_type = urlparse(storage_uri).scheme
try:
return _STORAGE_TYPES[storage_type](storage_uri)
except KeyError:
raise InvalidStorageUri(f"Invalid storage type '{storage_type}'")


ParsedQuery = Dict[str, List[str]]
Expand Down
3 changes: 2 additions & 1 deletion stubs/botocore/session.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class Session(object):

def download_file(self, Bucket: str, Key: str, filepath: str) -> None: ...

def put_object(self, Bucket: str, Key: str, Body: BinaryIO) -> None: ...
def put_object(
self, Bucket: str, Key: str, Body: BinaryIO, ContentType: Optional[str]) -> None: ...

def upload_file(self, Filename: str, Bucket: str, Key: str) -> None: ...

Expand Down
2 changes: 1 addition & 1 deletion stubs/google/cloud/storage/blob.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Blob(object):

def upload_from_filename(self, path: str) -> None: ...

def upload_from_file(self, fp: BinaryIO) -> None: ...
def upload_from_file(self, fp: BinaryIO, content_type: Optional[str]) -> None: ...

def delete(self) -> None: ...

Expand Down
6 changes: 6 additions & 0 deletions test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def setUpClass(cls):
os.write(handle, contents)
os.close(handle)

handle, _ = tempfile.mkstemp(
prefix="source-mimetyped-file", suffix=".png",
dir=tempfile.mkdtemp(prefix="source-contentdir", dir=cls.directory))
os.write(handle, contents)
os.close(handle)

@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.directory)
Expand Down
14 changes: 8 additions & 6 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def __init__(self, parent: Optional[str] = None) -> None:
def name(self) -> str:
return self.directory.name

def add_file(self, contents: bytes) -> NamedIO:
temp = cast(NamedIO, tempfile.NamedTemporaryFile(dir=self.directory.name))
def add_file(self, contents: bytes, suffix: str = "") -> NamedIO:
temp = cast(NamedIO, tempfile.NamedTemporaryFile(dir=self.directory.name, suffix=suffix))
temp.write(contents)
temp.flush()
temp.seek(0)
Expand Down Expand Up @@ -89,19 +89,21 @@ def cleanup_nested_directory(value: NestedDirectoryDict) -> None:
value["temp_directory"]["object"].cleanup()


def create_temp_nested_directory_with_files() -> NestedDirectoryDict:
def create_temp_nested_directory_with_files(
suffixes: List[str] = ["", "", ""]
) -> NestedDirectoryDict:
# temp_directory/
# temp_input_one
# temp_input_two
# nested_temp_directory/
# nested_temp_input

directory = TempDirectory()
new_file_1 = directory.add_file(b"FOO")
new_file_2 = directory.add_file(b"BAR")
new_file_1 = directory.add_file(b"FOO", suffixes[0])
new_file_2 = directory.add_file(b"BAR", suffixes[1])

nested_directory = directory.add_dir()
nested_file = nested_directory.add_file(b"FOOBAR")
nested_file = nested_directory.add_file(b"FOOBAR", suffixes[2])

return {
"temp_directory": {
Expand Down
11 changes: 10 additions & 1 deletion tests/test_google_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,16 @@ def test_load_from_file_uploads_blob_from_file_object(self) -> None:
self.assert_gets_bucket_with_credentials()

self.mock_bucket.blob.assert_called_once_with("path/filename")
self.mock_blob.upload_from_file.assert_called_once_with(mock_file)
self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type=None)

def test_load_from_file_guesses_content_type_based_on_filename(self) -> None:
mock_file = mock.Mock()

storage = get_storage("gs://{}@bucketname/path/whatever.html".format(self.credentials))

storage.load_from_file(mock_file)

self.mock_blob.upload_from_file.assert_called_once_with(mock_file, content_type="text/html")

def test_delete_deletes_blob(self) -> None:
storage = get_storage("gs://{}@bucketname/path/filename".format(self.credentials))
Expand Down
52 changes: 44 additions & 8 deletions tests/test_s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ def test_load_from_file(self, mock_session_class: mock.Mock) -> None:

mock_s3.put_object.assert_called_with(Bucket="bucket", Key="some/file", Body=mock_file)

@mock.patch("boto3.session.Session", autospec=True)
def test_load_from_file_guesses_content_type_based_on_filename(
self, mock_session_class: mock.Mock) -> None:
mock_session = mock_session_class.return_value
mock_s3 = mock_session.client.return_value

mock_file = mock.Mock()

storage = get_storage(
"s3://access_key:access_secret@bucket/some/whatever.jpg")

storage.load_from_file(mock_file)

mock_s3.put_object.assert_called_with(
Bucket="bucket", Key="some/whatever.jpg", Body=mock_file, ContentType="image/jpeg")

@mock.patch("boto3.s3.transfer.S3Transfer", autospec=True)
@mock.patch("boto3.session.Session", autospec=True)
def test_load_from_filename(
Expand All @@ -92,7 +108,21 @@ def test_load_from_filename(

mock_transfer_class.assert_called_with(mock_s3)

mock_transfer.upload_file.assert_called_with("source/file", "bucket", "some/file")
mock_transfer.upload_file.assert_called_with(
"source/file", "bucket", "some/file", extra_args=None)

@mock.patch("boto3.s3.transfer.S3Transfer", autospec=True)
@mock.patch("boto3.session.Session", autospec=True)
def test_load_from_filename_guesses_content_type_based_on_filename(
self, mock_session_class: mock.Mock, mock_transfer_class: mock.Mock) -> None:
mock_transfer = mock_transfer_class.return_value

storage = get_storage("s3://access_key:access_secret@bucket/some/file")

storage.load_from_filename("source/whatever.jpeg")

mock_transfer.upload_file.assert_called_with(
"source/whatever.jpeg", "bucket", "some/file", extra_args={"ContentType": "image/jpeg"})

@mock.patch("boto3.session.Session", autospec=True)
def test_save_to_file(self, mock_session_class: mock.Mock) -> None:
Expand Down Expand Up @@ -399,7 +429,7 @@ def test_load_from_directory(
mock_session = mock_session_class.return_value
mock_s3_client = mock_session.client.return_value

self.temp_directory = create_temp_nested_directory_with_files()
self.temp_directory = create_temp_nested_directory_with_files([".js", ".unknown", ""])

storage = get_storage(
"s3://access_key:access_secret@bucket/dir?region=US_EAST")
Expand All @@ -409,15 +439,18 @@ def test_load_from_directory(
mock_s3_client.upload_file.assert_has_calls([
mock.call(
self.temp_directory["temp_input_two"]["path"], "bucket",
os.path.join("dir", self.temp_directory["temp_input_two"]["name"])),
os.path.join("dir", self.temp_directory["temp_input_two"]["name"]),
ExtraArgs=None),
mock.call(
self.temp_directory["temp_input_one"]["path"], "bucket",
os.path.join("dir", self.temp_directory["temp_input_one"]["name"])),
os.path.join("dir", self.temp_directory["temp_input_one"]["name"]),
ExtraArgs={"ContentType": "application/javascript"}),
mock.call(
self.temp_directory["nested_temp_input"]["path"], "bucket",
os.path.join(
"dir", self.temp_directory["nested_temp_directory"]["name"],
self.temp_directory["nested_temp_input"]["name"]))
self.temp_directory["nested_temp_input"]["name"]),
ExtraArgs=None)
], any_order=True)

@mock.patch("storage.retry.time.sleep", autospec=True)
Expand Down Expand Up @@ -448,15 +481,18 @@ def test_load_from_directory_retries_failed_file_uploads(
mock_s3_client.upload_file.assert_has_calls([
mock.call(
self.temp_directory["temp_input_two"]["path"], "bucket",
os.path.join("dir", self.temp_directory["temp_input_two"]["name"])),
os.path.join("dir", self.temp_directory["temp_input_two"]["name"]),
ExtraArgs=None),
mock.call(
self.temp_directory["temp_input_one"]["path"], "bucket",
os.path.join("dir", self.temp_directory["temp_input_one"]["name"])),
os.path.join("dir", self.temp_directory["temp_input_one"]["name"]),
ExtraArgs=None),
mock.call(
self.temp_directory["nested_temp_input"]["path"], "bucket",
os.path.join(
"dir", self.temp_directory["nested_temp_directory"]["name"],
self.temp_directory["nested_temp_input"]["name"]))
self.temp_directory["nested_temp_input"]["name"]),
ExtraArgs=None)
], any_order=True)
self.assertEqual(
mock_s3_client.upload_file.call_args_list[1],
Expand Down
20 changes: 20 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,26 @@ def _validate_parsed_uri(self) -> None:
get_storage(f"{self.scheme}://some/uri/path")


class TestGetStorage(TestCase):
def test_raises_for_unsupported_scheme(self) -> None:
with self.assertRaises(InvalidStorageUri) as error:
get_storage("unsupported://creds:secret@bucket/path")

self.assertEqual("Invalid storage type 'unsupported'", str(error.exception))

def test_raises_for_missing_scheme(self) -> None:
with self.assertRaises(InvalidStorageUri) as error:
get_storage("//creds:secret@invalid/storage/uri")

self.assertEqual("Invalid storage type ''", str(error.exception))

def test_raises_for_missing_scheme_and_netloc(self) -> None:
with self.assertRaises(InvalidStorageUri) as error:
get_storage("invalid/storage/uri")

self.assertEqual("Invalid storage type ''", str(error.exception))


class TestStorage(TestCase):
def test_get_sanitized_uri_removes_username_and_password(self) -> None:
storage = Storage(storage_uri="https://username:password@bucket/path/filename")
Expand Down

0 comments on commit 8c88537

Please sign in to comment.