Skip to content

Commit

Permalink
Merge pull request #55 from ustudio/play-2375-python2-sanitize-storag…
Browse files Browse the repository at this point in the history
…e-uris

[PLAY-2375] -- Python 2.7 Support: Sanitize Storage URIs
  • Loading branch information
shroudofthurin authored Apr 9, 2020
2 parents 57493c5 + 4ff125b commit c3e95bd
Show file tree
Hide file tree
Showing 15 changed files with 278 additions and 5 deletions.
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Install via pip:
pip install object_storage
```

The current version is `0.12.6`.
The current version is `0.12.7`.

## Quick Start ##

Expand Down Expand Up @@ -103,6 +103,10 @@ For local file storage, the call will return a URL formed by joining the `downlo
`download_url_base` query param was included in the storage URI, `get_download_url`
will raise a `DownloadUrlBaseUndefinedError` exception. (*see* [**file**](#file) *below*)

#### `get_sanitized_uri()` ####

Removes the username/password, as well as all query parameters, form the URL.

### Supported Protocols ###

The following protocols are supported, and can be selected by
Expand Down Expand Up @@ -345,6 +349,24 @@ retry.
Currently, no methods in the storage library mark exceptions as
`do_not_retry`.

### url_parser ###

The `url_parser` module provides a means for client code to sanitize URIs in
such a way that is most appropriate for the way it encodes secret data.

#### API ####

##### `sanitize_resource_uri(parsed_uri)` #####

Implementation is overly restrictive -- only returning the scheme, hostname,
port and path, no query parameters.

##### `remove_user_info(parsed_uri)` #####

Implementation all credential information before the hostname (if present), and
returns the scheme, hostname, port, path, and query parameters.


### Extending ###

There are two decorators that can be used when extending the storage library.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
]

setup(name="object_storage",
version="0.12.6",
version="0.12.7",
description="Python library for accessing files over various file transfer protocols.",
url="https://github.com/ustudio/storage",
packages=["storage"],
Expand Down
4 changes: 4 additions & 0 deletions storage/ftp_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .storage import Storage, register_storage_protocol, _generate_download_url_from_base, \
DEFAULT_FTP_TIMEOUT, DEFAULT_FTP_KEEPALIVE_ENABLE, DEFAULT_FTP_KEEPCNT, \
DEFAULT_FTP_KEEPIDLE, DEFAULT_FTP_KEEPINTVL
from url_parser import remove_user_info


@register_storage_protocol("ftp")
Expand Down Expand Up @@ -221,6 +222,9 @@ def get_download_url(self, seconds=60, key=None):
return _generate_download_url_from_base(
self._download_url_base, self._parsed_storage_uri.path.split('/')[-1])

def get_sanitized_uri(self):
return remove_user_info(self._parsed_storage_uri)


@register_storage_protocol("ftps")
class FTPSStorage(FTPStorage):
Expand Down
4 changes: 4 additions & 0 deletions storage/local_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import urlparse

from .storage import Storage, register_storage_protocol, _generate_download_url_from_base
from url_parser import remove_user_info


@register_storage_protocol("file")
Expand Down Expand Up @@ -86,3 +87,6 @@ def get_download_url(self, seconds=60, key=None):
"""
return _generate_download_url_from_base(
self._download_url_base, self._parsed_storage_uri.path.split('/')[-1])

def get_sanitized_uri(self):
return remove_user_info(self._parsed_storage_uri)
4 changes: 4 additions & 0 deletions storage/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from . import retry
from .storage import Storage, register_storage_protocol, _LARGE_CHUNK
from url_parser import remove_user_info


@register_storage_protocol("s3")
Expand Down Expand Up @@ -114,3 +115,6 @@ def get_download_url(self, seconds=60, key=None):
Params={"Bucket": self._bucket, "Key": self._keyname},
ExpiresIn=seconds
)

def get_sanitized_uri(self):
return remove_user_info(self._parsed_storage_uri)
5 changes: 5 additions & 0 deletions storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import threading
import urlparse

from url_parser import sanitize_resource_uri


_STORAGE_TYPES = {} # maintains supported storage protocols
_LARGE_CHUNK = 32 * 1024 * 1024
Expand Down Expand Up @@ -118,6 +120,9 @@ def get_download_url(self, seconds=60, key=None):
raise NotImplementedError(
"{0} does not implement 'get_download_url'".format(self._class_name()))

def get_sanitized_uri(self):
return sanitize_resource_uri(self._parsed_storage_uri)


def _generate_download_url_from_base(base, object_name):
"""Generate a download url by joining the base with the storage object_name.
Expand Down
18 changes: 16 additions & 2 deletions storage/swift_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import mimetypes
import os
import urllib
from urllib import urlencode
import urlparse
from urlparse import parse_qsl, ParseResult

import pyrax

Expand Down Expand Up @@ -191,6 +193,19 @@ def get_download_url(self, seconds=60, key=None):

return urlparse.urlunparse(parsed_url)

def get_sanitized_uri(self):
parsed_uri = self._parsed_storage_uri
new_query = dict(parse_qsl(parsed_uri.query))

if "download_url_key" in new_query:
del new_query["download_url_key"]

new_uri = ParseResult(
parsed_uri.scheme, parsed_uri.hostname, parsed_uri.path, parsed_uri.params,
urlencode(new_query), parsed_uri.fragment)

return new_uri.geturl()


def register_swift_protocol(scheme, auth_endpoint):
"""Register a Swift based storage protocol under the specified scheme."""
Expand All @@ -213,8 +228,7 @@ def __init__(self, *args, **kwargs):
return decorate_swift_protocol


@register_swift_protocol(scheme="cloudfiles",
auth_endpoint=None)
@register_swift_protocol(scheme="cloudfiles", auth_endpoint=None)
class CloudFilesStorage(SwiftStorage):
"""Rackspace Cloudfiles storage.
Expand Down
30 changes: 30 additions & 0 deletions storage/url_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from urlparse import parse_qsl, ParseResult
from urllib import urlencode


def _new_uri(parsed_uri, new_netloc, new_query):
return ParseResult(
parsed_uri.scheme, new_netloc, parsed_uri.path, parsed_uri.params, urlencode(new_query),
parsed_uri.fragment)


def remove_user_info(parsed_uri):
new_netloc = parsed_uri.hostname

if parsed_uri.port is not None:
new_netloc = ":".join((new_netloc, str(parsed_uri.port)))

new_uri = _new_uri(parsed_uri, new_netloc, dict(parse_qsl(parsed_uri.query)))

return new_uri.geturl()


def sanitize_resource_uri(parsed_uri):
new_netloc = parsed_uri.hostname

if parsed_uri.port is not None:
new_netloc = ":".join((new_netloc, str(parsed_uri.port)))

new_uri = _new_uri(parsed_uri, new_netloc, {})

return new_uri.geturl()
14 changes: 14 additions & 0 deletions tests/test_ftp_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,20 @@ def test_ftp_get_download_url_returns_none_with_empty_base(self, mock_ftp_class)

mock_ftp_class.assert_not_called()

@mock.patch("ftplib.FTP", autospec=True)
def test_ftp_get_sanitized_uri(self, mock_ftp_class):
download_url_base = urllib.quote_plus("http://hostname/path/to/")

ftpuri = "ftp://user:[email protected]/some/dir/file.txt?download_url_base={}".format(
download_url_base)

storage = storagelib.get_storage(ftpuri)
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual(
"ftp://ftp.foo.com/some/dir/file.txt?download_url_base={}".format(download_url_base),
sanitized_uri)


class TestFTPSStorage(TestCase):
@mock.patch("ftplib.FTP_TLS", autospec=True)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_google_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ def test_get_download_url_does_not_use_key_when_provided(self):
expiration=datetime.timedelta(seconds=60),
response_disposition="attachment")

def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self):
storage = get_storage("gs://{}@bucketname/path/filename".format(self.credentials))
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual("gs://bucketname/path/filename", sanitized_uri)

def _mock_blob(self, name):
blob = mock.Mock()
blob.name = name
Expand Down
19 changes: 19 additions & 0 deletions tests/test_local_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,22 @@ def test_local_storage_get_download_url_returns_none_on_empty_base(self):

with self.assertRaises(DownloadUrlBaseUndefinedError):
out_storage.get_download_url()

def test_local_storage_get_sanitized_uri_returns_filepath(self):
temp_input = tempfile.NamedTemporaryFile()
temp_input.write("FOOBAR")
temp_input.flush()

download_url_base = "http://host:123/path/to/"
download_url_base_encoded = urllib.quote_plus(download_url_base)

storage_uri = "file://{}?download_url_base={}".format(
temp_input.name, download_url_base_encoded)
out_storage = storagelib.get_storage(storage_uri)

sanitized_uri = out_storage.get_sanitized_uri()

self.assertEqual(
"file://{}?download_url_base={}".format(
temp_input.name, download_url_base_encoded),
sanitized_uri)
9 changes: 9 additions & 0 deletions tests/test_s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,3 +642,12 @@ def test_get_download_url_calls_boto_generate_presigned_url_custom_expiration(
Params={"Bucket": "some_bucket", "Key": "some/file"},
ExpiresIn=1000
)

def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self):
url = "s3://access_key:access_secret@some_bucket/"
key = "some/filename"

storage = storagelib.get_storage("".join([url, key, "?region=US_EAST"]))
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual("s3://some_bucket/some/filename?region=US_EAST", sanitized_uri)
25 changes: 24 additions & 1 deletion tests/test_storage.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import mock
import storage as storagelib
import threading

from unittest import TestCase

import storage as storagelib
from storage.storage import Storage


class TestTimeout(TestCase):
@mock.patch("threading.Thread", wraps=threading.Thread)
Expand Down Expand Up @@ -51,3 +54,23 @@ class MyStorageClass(storagelib.storage.Storage):
uri = "{0}://some/uri/path".format(self.scheme)
store_obj = storagelib.get_storage(uri)
self.assertIsInstance(store_obj, MyStorageClass)


class TestStorage(TestCase):
def test_get_sanitized_uri_removes_username_and_password(self):
storage = Storage(storage_uri="https://username:password@bucket/path/filename")
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual("https://bucket/path/filename", sanitized_uri)

def test_get_sanitized_uri_does_not_preserves_parameters(self):
storage = Storage(storage_uri="https://username:password@bucket/path/filename?other=param")
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual("https://bucket/path/filename", sanitized_uri)

def test_get_sanitized_uri_preserves_port_number(self):
storage = Storage(storage_uri="ftp://username:[email protected]:8080/path/filename")
sanitized_uri = storage.get_sanitized_uri()

self.assertEqual("ftp://ftp.foo.com:8080/path/filename", sanitized_uri)
75 changes: 75 additions & 0 deletions tests/test_swift_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from StringIO import StringIO
import tempfile
from urllib import urlencode
from unittest import TestCase

import mock
Expand Down Expand Up @@ -697,6 +698,43 @@ def test_swift_get_download_url_encodes_object_names_with_spaces(self, mock_crea
download_url,
"http://cloudfiles.com/path/to/filename%20with%20spaces.txt?param1=12345&param2=67890")

def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self):
base_uri = "swift://USER:KEY@CONTAINER/path/to/file.mp4"
query_args = {
"auth_endpoint": "http://identity.server.com:1234/v2/",
"tenant_id": "1234",
"region": "DFW"
}

swift_uri = "{}?{}".format(base_uri, urlencode(query_args))
storage_object = storagelib.get_storage(swift_uri)

sanitized_uri = storage_object.get_sanitized_uri()

self.assertEqual(
"swift://container/path/to/file.mp4?{}".format(urlencode(query_args)),
sanitized_uri)

def test_get_sanitized_uri_returns_storage_uri_without_download_url_key(self):
base_uri = "swift://USER:KEY@CONTAINER/path/to/file.mp4"
query_args = {
"auth_endpoint": "http://identity.server.com:1234/v2/",
"tenant_id": "1234",
"region": "DFW"
}

updated_query_args = query_args.copy()
updated_query_args.update({"download_url_key": "KEY"})

swift_uri = "{}?{}".format(base_uri, urlencode(updated_query_args))
storage_object = storagelib.get_storage(swift_uri)

sanitized_uri = storage_object.get_sanitized_uri()

self.assertEqual(
"swift://container/path/to/file.mp4?{}".format(urlencode(query_args)),
sanitized_uri)


class TestRegisterSwiftProtocol(TestCase):

Expand Down Expand Up @@ -880,3 +918,40 @@ def test_rackspace_handles_incorrectly_filtered_metadata_when_fetching_download_

mock_cloudfiles.get_temp_url.assert_called_once_with(
"container", "file.txt", seconds=60, method="GET", key="secret_key_from_server")

def test_get_sanitized_uri_returns_storage_uri_without_username_and_password(self):
base_uri = "cloudfiles://USER:KEY@CONTAINER/path/to/file.mp4"
query_args = {
"auth_endpoint": "http://identity.server.com:1234/v2/",
"tenant_id": "1234",
"region": "DFW"
}

cloudfiles_uri = "{}?{}".format(base_uri, urlencode(query_args))
storage_object = storagelib.get_storage(cloudfiles_uri)

sanitized_uri = storage_object.get_sanitized_uri()

self.assertEqual(
"cloudfiles://container/path/to/file.mp4?{}".format(urlencode(query_args)),
sanitized_uri)

def test_get_sanitized_uri_returns_storage_uri_without_download_url_key(self):
base_uri = "cloudfiles://USER:KEY@CONTAINER/path/to/file.mp4"
query_args = {
"auth_endpoint": "http://identity.server.com:1234/v2/",
"tenant_id": "1234",
"region": "DFW"
}

updated_query_args = query_args.copy()
updated_query_args.update({"download_url_key": "KEY"})

cloudfiles_uri = "{}?{}".format(base_uri, urlencode(updated_query_args))
storage_object = storagelib.get_storage(cloudfiles_uri)

sanitized_uri = storage_object.get_sanitized_uri()

self.assertEqual(
"cloudfiles://container/path/to/file.mp4?{}".format(urlencode(query_args)),
sanitized_uri)
Loading

0 comments on commit c3e95bd

Please sign in to comment.