From ca990a351e8625a2182b7f301a836b412a7f3b14 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Wed, 8 Apr 2020 13:31:52 +0900 Subject: [PATCH 1/6] Add santize storage uri methods, one that is overly restrictive, as well as an implementation that includes extra values in the URL. [PLAY-2375] --- storage/url_parser.py | 30 +++++++++++++++++++++++++++ tests/test_url_parser.py | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 storage/url_parser.py create mode 100644 tests/test_url_parser.py diff --git a/storage/url_parser.py b/storage/url_parser.py new file mode 100644 index 0000000..bac064b --- /dev/null +++ b/storage/url_parser.py @@ -0,0 +1,30 @@ +from urlparse import parse_qsl, ParseResult +from urllib import urlencode + + +def _new_uri(parsed_uri, new_netloc, new_query): + return ParseResult( + parsed_uri.scheme, new_netloc, parsed_uri.path, parsed_uri.params, urlencode(new_query), + parsed_uri.fragment) + + +def remove_user_info(parsed_uri): + new_netloc = parsed_uri.hostname + + if parsed_uri.port is not None: + new_netloc = ":".join((new_netloc, str(parsed_uri.port))) + + new_uri = _new_uri(parsed_uri, new_netloc, dict(parse_qsl(parsed_uri.query))) + + return new_uri.geturl() + + +def sanitize_resource_uri(parsed_uri): + new_netloc = parsed_uri.hostname + + if parsed_uri.port is not None: + new_netloc = ":".join((new_netloc, str(parsed_uri.port))) + + new_uri = _new_uri(parsed_uri, new_netloc, {}) + + return new_uri.geturl() diff --git a/tests/test_url_parser.py b/tests/test_url_parser.py new file mode 100644 index 0000000..cf20c37 --- /dev/null +++ b/tests/test_url_parser.py @@ -0,0 +1,44 @@ +from urlparse import urlparse + +from unittest import TestCase + +from storage import url_parser + + +class TestUrlParser(TestCase): + + def test_remove_user_info_removes_username_and_password(self): + storage_uri = urlparse("https://username:password@bucket/path/filename") + sanitized_uri = url_parser.remove_user_info(storage_uri) + + self.assertEqual("https://bucket/path/filename", sanitized_uri) + + def test_remove_user_info_preserves_parameters(self): + storage_uri = urlparse("https://username:password@bucket/path/filename?other=parameter") + sanitized_uri = url_parser.remove_user_info(storage_uri) + + self.assertEqual("https://bucket/path/filename?other=parameter", sanitized_uri) + + def test_remove_user_info_preserves_port_number(self): + storage_uri = urlparse("ftp://username:password@ftp.foo.com:8080/path/filename") + sanitized_uri = url_parser.remove_user_info(storage_uri) + + self.assertEqual("ftp://ftp.foo.com:8080/path/filename", sanitized_uri) + + def test_sanitize_resource_uri_removes_username_and_password(self): + storage_uri = urlparse("https://username:password@bucket/path/filename") + sanitized_uri = url_parser.sanitize_resource_uri(storage_uri) + + self.assertEqual("https://bucket/path/filename", sanitized_uri) + + def test_sanitize_resource_uri_does_not_preserves_parameters(self): + storage_uri = urlparse("https://username:password@bucket/path/filename?other=parameter") + sanitized_uri = url_parser.sanitize_resource_uri(storage_uri) + + self.assertEqual("https://bucket/path/filename", sanitized_uri) + + def test_sanitize_resource_uri_preserves_port_number(self): + storage_uri = urlparse("ftp://username:password@ftp.foo.com:8080/path/filename") + sanitized_uri = url_parser.sanitize_resource_uri(storage_uri) + + self.assertEqual("ftp://ftp.foo.com:8080/path/filename", sanitized_uri) From 07d81ad8c6de6f8a539e4e4d7116fa56018b4275 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Wed, 8 Apr 2020 14:35:24 +0900 Subject: [PATCH 2/6] Add default implementation for santizing the storage uri. [PLAY-2375] --- storage/storage.py | 5 +++++ tests/test_google_storage.py | 6 ++++++ tests/test_storage.py | 25 ++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/storage/storage.py b/storage/storage.py index 4b9fca1..8fbfa42 100644 --- a/storage/storage.py +++ b/storage/storage.py @@ -2,6 +2,8 @@ import threading import urlparse +from url_parser import sanitize_resource_uri + _STORAGE_TYPES = {} # maintains supported storage protocols _LARGE_CHUNK = 32 * 1024 * 1024 @@ -118,6 +120,9 @@ def get_download_url(self, seconds=60, key=None): raise NotImplementedError( "{0} does not implement 'get_download_url'".format(self._class_name())) + def get_sanitized_uri(self): + return sanitize_resource_uri(self._parsed_storage_uri) + def _generate_download_url_from_base(base, object_name): """Generate a download url by joining the base with the storage object_name. diff --git a/tests/test_google_storage.py b/tests/test_google_storage.py index 34b7419..c5e4dd5 100644 --- a/tests/test_google_storage.py +++ b/tests/test_google_storage.py @@ -126,6 +126,12 @@ def test_get_download_url_does_not_use_key_when_provided(self): expiration=datetime.timedelta(seconds=60), response_disposition="attachment") + def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self): + storage = get_storage("gs://{}@bucketname/path/filename".format(self.credentials)) + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual("gs://bucketname/path/filename", sanitized_uri) + def _mock_blob(self, name): blob = mock.Mock() blob.name = name diff --git a/tests/test_storage.py b/tests/test_storage.py index c30e235..2a093e7 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,8 +1,11 @@ import mock -import storage as storagelib import threading + from unittest import TestCase +import storage as storagelib +from storage.storage import Storage + class TestTimeout(TestCase): @mock.patch("threading.Thread", wraps=threading.Thread) @@ -51,3 +54,23 @@ class MyStorageClass(storagelib.storage.Storage): uri = "{0}://some/uri/path".format(self.scheme) store_obj = storagelib.get_storage(uri) self.assertIsInstance(store_obj, MyStorageClass) + + +class TestStorage(TestCase): + def test_get_sanitized_uri_removes_username_and_password(self): + storage = Storage(storage_uri="https://username:password@bucket/path/filename") + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual("https://bucket/path/filename", sanitized_uri) + + def test_get_sanitized_uri_does_not_preserves_parameters(self): + storage = Storage(storage_uri="https://username:password@bucket/path/filename?other=param") + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual("https://bucket/path/filename", sanitized_uri) + + def test_get_sanitized_uri_preserves_port_number(self): + storage = Storage(storage_uri="ftp://username:password@ftp.foo.com:8080/path/filename") + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual("ftp://ftp.foo.com:8080/path/filename", sanitized_uri) From 543fe012456acd6a7233a55ddc1825af3224fc40 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Wed, 8 Apr 2020 15:18:33 +0900 Subject: [PATCH 3/6] Update classes that require implementations that need to include extra values in the base uri. [PLAY-2375] --- storage/ftp_storage.py | 4 ++ storage/local_storage.py | 4 ++ storage/s3_storage.py | 4 ++ storage/swift_storage.py | 18 ++++++++- tests/test_ftp_storage.py | 14 +++++++ tests/test_local_storage.py | 19 ++++++++++ tests/test_s3_storage.py | 9 +++++ tests/test_swift_storage.py | 75 +++++++++++++++++++++++++++++++++++++ 8 files changed, 145 insertions(+), 2 deletions(-) diff --git a/storage/ftp_storage.py b/storage/ftp_storage.py index 15601cd..ba913cf 100644 --- a/storage/ftp_storage.py +++ b/storage/ftp_storage.py @@ -7,6 +7,7 @@ from .storage import Storage, register_storage_protocol, _generate_download_url_from_base, \ DEFAULT_FTP_TIMEOUT, DEFAULT_FTP_KEEPALIVE_ENABLE, DEFAULT_FTP_KEEPCNT, \ DEFAULT_FTP_KEEPIDLE, DEFAULT_FTP_KEEPINTVL +from url_parser import remove_user_info @register_storage_protocol("ftp") @@ -221,6 +222,9 @@ def get_download_url(self, seconds=60, key=None): return _generate_download_url_from_base( self._download_url_base, self._parsed_storage_uri.path.split('/')[-1]) + def get_sanitized_uri(self): + return remove_user_info(self._parsed_storage_uri) + @register_storage_protocol("ftps") class FTPSStorage(FTPStorage): diff --git a/storage/local_storage.py b/storage/local_storage.py index 7f10e31..da7d72d 100644 --- a/storage/local_storage.py +++ b/storage/local_storage.py @@ -4,6 +4,7 @@ import urlparse from .storage import Storage, register_storage_protocol, _generate_download_url_from_base +from url_parser import remove_user_info @register_storage_protocol("file") @@ -86,3 +87,6 @@ def get_download_url(self, seconds=60, key=None): """ return _generate_download_url_from_base( self._download_url_base, self._parsed_storage_uri.path.split('/')[-1]) + + def get_sanitized_uri(self): + return remove_user_info(self._parsed_storage_uri) diff --git a/storage/s3_storage.py b/storage/s3_storage.py index d6a2752..6274804 100644 --- a/storage/s3_storage.py +++ b/storage/s3_storage.py @@ -7,6 +7,7 @@ from . import retry from .storage import Storage, register_storage_protocol, _LARGE_CHUNK +from url_parser import remove_user_info @register_storage_protocol("s3") @@ -114,3 +115,6 @@ def get_download_url(self, seconds=60, key=None): Params={"Bucket": self._bucket, "Key": self._keyname}, ExpiresIn=seconds ) + + def get_sanitized_uri(self): + return remove_user_info(self._parsed_storage_uri) diff --git a/storage/swift_storage.py b/storage/swift_storage.py index d82559f..ef5c021 100644 --- a/storage/swift_storage.py +++ b/storage/swift_storage.py @@ -2,7 +2,9 @@ import mimetypes import os import urllib +from urllib import urlencode import urlparse +from urlparse import parse_qsl, ParseResult import pyrax @@ -191,6 +193,19 @@ def get_download_url(self, seconds=60, key=None): return urlparse.urlunparse(parsed_url) + def get_sanitized_uri(self): + parsed_uri = self._parsed_storage_uri + new_query = dict(parse_qsl(parsed_uri.query)) + + if "download_url_key" in new_query: + del new_query["download_url_key"] + + new_uri = ParseResult( + parsed_uri.scheme, parsed_uri.hostname, parsed_uri.path, parsed_uri.params, + urlencode(new_query), parsed_uri.fragment) + + return new_uri.geturl() + def register_swift_protocol(scheme, auth_endpoint): """Register a Swift based storage protocol under the specified scheme.""" @@ -213,8 +228,7 @@ def __init__(self, *args, **kwargs): return decorate_swift_protocol -@register_swift_protocol(scheme="cloudfiles", - auth_endpoint=None) +@register_swift_protocol(scheme="cloudfiles", auth_endpoint=None) class CloudFilesStorage(SwiftStorage): """Rackspace Cloudfiles storage. diff --git a/tests/test_ftp_storage.py b/tests/test_ftp_storage.py index 648777a..c41f8f5 100644 --- a/tests/test_ftp_storage.py +++ b/tests/test_ftp_storage.py @@ -593,6 +593,20 @@ def test_ftp_get_download_url_returns_none_with_empty_base(self, mock_ftp_class) mock_ftp_class.assert_not_called() + @mock.patch("ftplib.FTP", autospec=True) + def test_ftp_get_sanitized_uri(self, mock_ftp_class): + download_url_base = urllib.quote_plus("http://hostname/path/to/") + + ftpuri = "ftp://user:password@ftp.foo.com/some/dir/file.txt?download_url_base={}".format( + download_url_base) + + storage = storagelib.get_storage(ftpuri) + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual( + "ftp://ftp.foo.com/some/dir/file.txt?download_url_base={}".format(download_url_base), + sanitized_uri) + class TestFTPSStorage(TestCase): @mock.patch("ftplib.FTP_TLS", autospec=True) diff --git a/tests/test_local_storage.py b/tests/test_local_storage.py index aa18a18..7adb9a1 100644 --- a/tests/test_local_storage.py +++ b/tests/test_local_storage.py @@ -257,3 +257,22 @@ def test_local_storage_get_download_url_returns_none_on_empty_base(self): with self.assertRaises(DownloadUrlBaseUndefinedError): out_storage.get_download_url() + + def test_local_storage_get_sanitized_uri_returns_filepath(self): + temp_input = tempfile.NamedTemporaryFile() + temp_input.write("FOOBAR") + temp_input.flush() + + download_url_base = "http://host:123/path/to/" + download_url_base_encoded = urllib.quote_plus(download_url_base) + + storage_uri = "file://{}?download_url_base={}".format( + temp_input.name, download_url_base_encoded) + out_storage = storagelib.get_storage(storage_uri) + + sanitized_uri = out_storage.get_sanitized_uri() + + self.assertEqual( + "file://{}?download_url_base={}".format( + temp_input.name, download_url_base_encoded), + sanitized_uri) diff --git a/tests/test_s3_storage.py b/tests/test_s3_storage.py index 8c77844..25b4a4f 100644 --- a/tests/test_s3_storage.py +++ b/tests/test_s3_storage.py @@ -642,3 +642,12 @@ def test_get_download_url_calls_boto_generate_presigned_url_custom_expiration( Params={"Bucket": "some_bucket", "Key": "some/file"}, ExpiresIn=1000 ) + + def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self): + url = "s3://access_key:access_secret@some_bucket/" + key = "some/filename" + + storage = storagelib.get_storage("".join([url, key, "?region=US_EAST"])) + sanitized_uri = storage.get_sanitized_uri() + + self.assertEqual("s3://some_bucket/some/filename?region=US_EAST", sanitized_uri) diff --git a/tests/test_swift_storage.py b/tests/test_swift_storage.py index 1cca316..0eaa49b 100644 --- a/tests/test_swift_storage.py +++ b/tests/test_swift_storage.py @@ -1,6 +1,7 @@ import os from StringIO import StringIO import tempfile +from urllib import urlencode from unittest import TestCase import mock @@ -697,6 +698,43 @@ def test_swift_get_download_url_encodes_object_names_with_spaces(self, mock_crea download_url, "http://cloudfiles.com/path/to/filename%20with%20spaces.txt?param1=12345¶m2=67890") + def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self): + base_uri = "swift://USER:KEY@CONTAINER/path/to/file.mp4" + query_args = { + "auth_endpoint": "http://identity.server.com:1234/v2/", + "tenant_id": "1234", + "region": "DFW" + } + + swift_uri = "{}?{}".format(base_uri, urlencode(query_args)) + storage_object = storagelib.get_storage(swift_uri) + + sanitized_uri = storage_object.get_sanitized_uri() + + self.assertEqual( + "swift://container/path/to/file.mp4?{}".format(urlencode(query_args)), + sanitized_uri) + + def test_get_sanitized_uri_returns_storage_uri_without_download_url_key(self): + base_uri = "swift://USER:KEY@CONTAINER/path/to/file.mp4" + query_args = { + "auth_endpoint": "http://identity.server.com:1234/v2/", + "tenant_id": "1234", + "region": "DFW" + } + + updated_query_args = query_args.copy() + updated_query_args.update({"download_url_key": "KEY"}) + + swift_uri = "{}?{}".format(base_uri, urlencode(updated_query_args)) + storage_object = storagelib.get_storage(swift_uri) + + sanitized_uri = storage_object.get_sanitized_uri() + + self.assertEqual( + "swift://container/path/to/file.mp4?{}".format(urlencode(query_args)), + sanitized_uri) + class TestRegisterSwiftProtocol(TestCase): @@ -880,3 +918,40 @@ def test_rackspace_handles_incorrectly_filtered_metadata_when_fetching_download_ mock_cloudfiles.get_temp_url.assert_called_once_with( "container", "file.txt", seconds=60, method="GET", key="secret_key_from_server") + + def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self): + base_uri = "cloudfiles://USER:KEY@CONTAINER/path/to/file.mp4" + query_args = { + "auth_endpoint": "http://identity.server.com:1234/v2/", + "tenant_id": "1234", + "region": "DFW" + } + + cloudfiles_uri = "{}?{}".format(base_uri, urlencode(query_args)) + storage_object = storagelib.get_storage(cloudfiles_uri) + + sanitized_uri = storage_object.get_sanitized_uri() + + self.assertEqual( + "cloudfiles://container/path/to/file.mp4?{}".format(urlencode(query_args)), + sanitized_uri) + + def test_get_sanitized_uri_returns_storage_uri_without_download_url_key(self): + base_uri = "cloudfiles://USER:KEY@CONTAINER/path/to/file.mp4" + query_args = { + "auth_endpoint": "http://identity.server.com:1234/v2/", + "tenant_id": "1234", + "region": "DFW" + } + + updated_query_args = query_args.copy() + updated_query_args.update({"download_url_key": "KEY"}) + + cloudfiles_uri = "{}?{}".format(base_uri, urlencode(updated_query_args)) + storage_object = storagelib.get_storage(cloudfiles_uri) + + sanitized_uri = storage_object.get_sanitized_uri() + + self.assertEqual( + "cloudfiles://container/path/to/file.mp4?{}".format(urlencode(query_args)), + sanitized_uri) From 4ee490bacf70905448e4133199d7d28790d98959 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Thu, 9 Apr 2020 09:42:33 +0900 Subject: [PATCH 4/6] Bumping version to v0.12.7. [PLAY-2375] --- README.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b495937..aaee76f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Install via pip: pip install object_storage ``` -The current version is `0.12.6`. +The current version is `0.12.7`. ## Quick Start ## diff --git a/setup.py b/setup.py index 07adfeb..b05b9d8 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ ] setup(name="object_storage", - version="0.12.6", + version="0.12.7", description="Python library for accessing files over various file transfer protocols.", url="https://github.com/ustudio/storage", packages=["storage"], From fb952118492e7a280ca505b1edfa8072943c1135 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Thu, 9 Apr 2020 14:49:22 +0900 Subject: [PATCH 5/6] Document the newly added function, get_sanitized_uri(), as per reviewer's request. [PLAY-2375] --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index aaee76f..7b44d91 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,10 @@ For local file storage, the call will return a URL formed by joining the `downlo `download_url_base` query param was included in the storage URI, `get_download_url` will raise a `DownloadUrlBaseUndefinedError` exception. (*see* [**file**](#file) *below*) +#### `get_sanitized_uri()` #### + +Removes the username/password, as well as all query parameters, form the URL. + ### Supported Protocols ### The following protocols are supported, and can be selected by From 4ff125b10b26a9a81808ef829eb39d2cd6edb028 Mon Sep 17 00:00:00 2001 From: Molly Thurin Date: Thu, 9 Apr 2020 16:06:11 +0900 Subject: [PATCH 6/6] Document the url parser library in the READ.me docs, as per reviewer's request. [PLAY-2375] --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 7b44d91..a29d186 100644 --- a/README.md +++ b/README.md @@ -349,6 +349,24 @@ retry. Currently, no methods in the storage library mark exceptions as `do_not_retry`. +### url_parser ### + +The `url_parser` module provides a means for client code to sanitize URIs in +such a way that is most appropriate for the way it encodes secret data. + +#### API #### + +##### `sanitize_resource_uri(parsed_uri)` ##### + +Implementation is overly restrictive -- only returning the scheme, hostname, +port and path, no query parameters. + +##### `remove_user_info(parsed_uri)` ##### + +Implementation all credential information before the hostname (if present), and +returns the scheme, hostname, port, path, and query parameters. + + ### Extending ### There are two decorators that can be used when extending the storage library.