From 43f8f17336234ce61658adff3655037471609874 Mon Sep 17 00:00:00 2001 From: carlo cancellieri Date: Fri, 8 Oct 2021 20:38:01 +0200 Subject: [PATCH] introduce Google Cloud Bucket support and private file proxy --- ckanext/cloudstorage/controller.py | 26 +++++++++++++++++++++++--- ckanext/cloudstorage/storage.py | 26 +++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 992b0c1..4cc7a7d 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -5,10 +5,14 @@ from pylons import c from pylons.i18n import _ +from ckan.common import request, response from ckan import logic, model from ckan.lib import base, uploader import ckan.lib.helpers as h +import ckanext.cloudstorage.storage as _storage +storage = _storage.CloudStorage +is_proxy_download=storage.proxy_download.fget(storage) class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): @@ -48,12 +52,28 @@ def resource_download(self, id, resource_id, filename=None): # if the client requests with a Content-Type header (e.g. Text preview) # we have to add the header to the signature try: - content_type = getattr(c.pylons.request, "content_type", None) + content_type = getattr(request, "content_type", None) except AttributeError: content_type = None - uploaded_url = upload.get_url_from_filename(resource['id'], filename, - content_type=content_type) + + # If the repository is private you may want to use ckan accout to proxy + # protected contents + # ckanext.cloudstorage.proxy_download = [False|True] + # Default: False + if is_proxy_download: + # remote object + obj = upload.get_object(resource['id'],filename) + # metaadta + extra = obj.extra + if extra: + # let's leverage on external mimetype if present + response.headers['Content-Type'] = extra.get('content_type',content_type) + # return stream back + return upload.get_object_as_stream(obj) + uploaded_url = upload.get_url_from_filename(resource['id'], filename, + content_type=content_type) + # The uploaded file is missing for some reason, such as the # provider being down. if uploaded_url is None: diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 903390b..4e959db 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -151,7 +151,16 @@ def guess_mimetype(self): return p.toolkit.asbool( config.get('ckanext.cloudstorage.guess_mimetype', False) ) - + + @property + def proxy_download(self): + """ + If the ckan may stream the object (will use service account to download + from private storages) + """ + return p.toolkit.asbool( + config.get('ckanext.cloudstorage.proxy_download', False) + ) class ResourceCloudStorage(CloudStorage): def __init__(self, resource): @@ -244,7 +253,7 @@ def upload(self, id, max_size=10): content_settings=content_settings ) else: - + # TODO: This might not be needed once libcloud is upgraded if isinstance(self.file_upload, SpooledTemporaryFile): self.file_upload.next = self.file_upload.next() @@ -340,7 +349,9 @@ def get_url_from_filename(self, rid, filename, content_type=None): try: return self.driver.get_object_cdn_url(obj) except NotImplementedError: - if 'S3' in self.driver_name: + if 'S3' in self.driver_name \ + or 'GOOGLE_STORAGE' in self.driver_name: + return urlparse.urljoin( 'https://' + self.driver.connection.host, '{container}/{path}'.format( @@ -354,6 +365,15 @@ def get_url_from_filename(self, rid, filename, content_type=None): return obj.extra['url'] raise + def get_object(self, rid, filename): + # Find the key the file *should* be stored at. + path = self.path_from_filename(rid, filename) + # Find the object for the given key. + return self.container.get_object(path) + + def get_object_as_stream(self, obj): + return self.driver.download_object_as_stream(obj) + @property def package(self): return model.Package.get(self.resource['package_id'])