diff --git a/.env b/.env new file mode 100644 index 0000000..0329a0f --- /dev/null +++ b/.env @@ -0,0 +1,44 @@ +# Variables in this file will be substituted into docker-compose.yml +# Save a copy of this file as .env and insert your own values. +# Verify correct substitution with "docker-compose config" +# If variables are newly added or enabled, please delete and rebuild the images to pull in changes: +# docker-compose down +# docker rmi -f docker_ckan docker_db +# docker rmi $(docker images -f dangling=true -q) +# docker-compose build +# docker-compose up -d +# docker-compose restart ckan # give the db service time to initialize the db cluster on first run + +# Image: ckan +CKAN_SITE_ID=default +# +# On AWS, your CKAN_SITE_URL is the output of: +# curl -s http://169.254.169.254/latest/meta-data/public-hostname +# CKAN_SITE_URL=http://ec2-xxx-xxx-xxx-xxx.ap-southeast-2.compute.amazonaws.com +# When running locally, CKAN_SITE_URL must contain the port +CKAN_SITE_URL=http://localhost:5000 +# +# CKAN_PORT must be available on the host: sudo netstat -na +# To apply change: docker-compose down && docker rmi docker_ckan && docker-compose build ckan +CKAN_PORT=5000 +# +# Email settings +CKAN_SMTP_SERVER=smtp.corporateict.domain:25 +CKAN_SMTP_STARTTLS=True +CKAN_SMTP_USER=user +CKAN_SMTP_PASSWORD=pass +CKAN_SMTP_MAIL_FROM=ckan@localhost +# +# Image: db +POSTGRES_PASSWORD=ckan +# +# POSTGRES_PORT must be available on the host: sudo netstat -na | grep 5432 +# To apply change: docker-compose down && docker rmi docker_db docker_ckan && docker-compose build +POSTGRES_PORT=5432 +# +# The datastore database will be created in the db container as docs +# Readwrite user/pass will be ckan:POSTGRES_PASSWORD +# Readonly user/pass will be datastore_ro:DATASTORE_READONLY_PASSWORD +DATASTORE_READONLY_PASSWORD=datastore cloudstorage + +CKAN_SITE_TITLE='Testing subject' diff --git a/.gitignore b/.gitignore index 979398f..7958b9e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ syntax: glob *.swp *.swo .DS_Store -ckan.egg-info/* +*.egg-info/ sandbox/* dist @@ -18,3 +18,5 @@ fl_notes.txt *.ini .noseids *~ +.coverage +cover/ diff --git a/README.md b/README.md index f5308ff..d8bc58f 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,20 @@ For most drivers, this is all you need: ckanext.cloudstorage.driver_options = {"key": "", "secret": ""} +## Google Storage + +To use the Google Storage driver the following driver options are required: + + {"key": "", "secret": "", "project": ""} + +**Note on secure URL's with Google Storage** +With Google's lack of folder-level permissions the whole bucket will need to be made private when +using secure urls. This will now affect generic file uploads as well. To still allow +generic files to be public (`ckanext.cloudstorage.use_secure_urls_for_generics` is `False` by default) +we will set the ACL of a newly uploaded object to `public-read` when only +`ckanext.cloudstorage.use_secure_urls` is activated. If you later decide to make the generic files private +you will have to manually update the ACL on the already uploaded objects to make them private. + # Support Most libcloud-based providers should work out of the box, but only those listed @@ -39,6 +53,7 @@ below have been tested: | Azure | YES | YES | YES (if `azure-storage` is installed) | | AWS S3 | YES | YES | YES (if `boto` is installed) | | Rackspace | YES | YES | No | +| Google Storage | YES | YES | YES (if `google-cloud-storage` and `pycrypto` are installed) | # What are "Secure URLs"? @@ -50,8 +65,18 @@ the resource. This means that the normal CKAN-provided access restrictions can apply to resources with no further effort on your part, but still get all the benefits of your CDN/blob storage. + # applies to resources ckanext.cloudstorage.use_secure_urls = 1 + # applies to generic uploads eg. group images, logo + ckanext.cloudstorage.use_secure_urls_for_generics = 1 + +The access permissions on the storage container used will have to be set accordingly to reflect +these settings (if using Google Storage, see note on Google Storage use). + +`use_secure_urls_for_generics` is recommended to be off, to allow for caching of assets +such as the logo. + This option also enables multipart uploads, but you need to create database tables first. Run next command from extension folder: `paster cloudstorage initdb -c /etc/ckan/default/production.ini ` @@ -77,8 +102,7 @@ cloudstorage will take care of the rest. Ex: 1. You should disable public listing on the cloud service provider you're using, if supported. -2. Currently, only resources are supported. This means that things like group - and organization images still use CKAN's local file storage. +2. Currently, the migration tool only supports resources. # FAQ diff --git a/ckanext/cloudstorage/controller.py b/ckanext/cloudstorage/controller.py index 73574c6..31a0a72 100644 --- a/ckanext/cloudstorage/controller.py +++ b/ckanext/cloudstorage/controller.py @@ -1,14 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os.path +import logging from pylons import c from pylons.i18n import _ - +from webob.exc import status_map from ckan import logic, model from ckan.lib import base, uploader +from ckan.common import is_flask_request import ckan.lib.helpers as h +log = logging.getLogger(__name__) + class StorageController(base.BaseController): def resource_download(self, id, resource_id, filename=None): @@ -52,3 +56,29 @@ def resource_download(self, id, resource_id, filename=None): base.abort(404, _('No download is available')) h.redirect_to(uploaded_url) + + def uploaded_file_redirect(self, upload_to, filename): + '''Redirect static file requests to their location on cloudstorage.''' + upload = uploader.get_uploader('notused') + file_path = upload.path_from_filename(filename) + uploaded_url = upload.get_url_from_path(file_path) + + if upload.use_secure_urls: + h.redirect_to(uploaded_url) + else: + if is_flask_request(): + raise NotImplementedError("Permanent redirect for flask \ + requests is not implemented yet") + else: + # We are manually performing a redirect for Pylons + # as this is the only way to set the caching headers + # to make a Permanently Moved cachable + # (see https://github.com/Pylons/pylons/blob/master/pylons/controllers/util.py#L218-L229) + exc = status_map[301] + raise exc( + location=uploaded_url.encode('utf-8'), + headers={ + "Cache-Control": "public, max-age=3600", + "Pragma": "none" + } + ) diff --git a/ckanext/cloudstorage/plugin.py b/ckanext/cloudstorage/plugin.py index 5d7a939..29e823e 100644 --- a/ckanext/cloudstorage/plugin.py +++ b/ckanext/cloudstorage/plugin.py @@ -53,9 +53,8 @@ def get_resource_uploader(self, data_dict): return storage.ResourceCloudStorage(data_dict) def get_uploader(self, upload_to, old_filename=None): - # We don't provide misc-file storage (group images for example) - # Returning None here will use the default Uploader. - return None + # Custom uploader for generic file uploads + return storage.FileCloudStorage(upload_to, old_filename) def before_map(self, map): sm = SubMapper( @@ -77,6 +76,12 @@ def before_map(self, map): action='resource_download' ) + sm.connect( + 'uploaded_file', + '/uploads/{upload_to}/{filename}', + action='uploaded_file_redirect' + ) + return map # IActions diff --git a/ckanext/cloudstorage/storage.py b/ckanext/cloudstorage/storage.py index 93686ad..e6025e0 100644 --- a/ckanext/cloudstorage/storage.py +++ b/ckanext/cloudstorage/storage.py @@ -5,16 +5,19 @@ import os.path import urlparse from ast import literal_eval -from datetime import datetime, timedelta - +from datetime import timedelta +import datetime from pylons import config from ckan import model from ckan.lib import munge import ckan.plugins as p +import logging from libcloud.storage.types import Provider, ObjectDoesNotExistError from libcloud.storage.providers import get_driver +log = logging.getLogger(__name__) + class CloudStorage(object): def __init__(self): @@ -73,13 +76,7 @@ def container_name(self): @property def use_secure_urls(self): - """ - `True` if ckanext-cloudstroage is configured to generate secure - one-time URLs to resources, `False` otherwise. - """ - return p.toolkit.asbool( - config.get('ckanext.cloudstorage.use_secure_urls', False) - ) + raise NotImplementedError('This property should be specified in subclass') @property def leave_files(self): @@ -131,6 +128,26 @@ def can_use_advanced_aws(self): return False + @property + def can_use_advanced_google_cloud(self): + """ + `True` if the `google-cloud` module is installed and + ckanext-cloudstorage has been configured to use Google Cloud Storage, + otherwise `False`. + """ + # Are we even using google cloud? + if 'GOOGLE_STORAGE' in self.driver_name: + try: + # Yes? is the google-cloud-storage package available? + from google.cloud import storage + # shut the linter up. + assert storage + return True + except ImportError: + pass + + return False + @property def guess_mimetype(self): """ @@ -141,6 +158,177 @@ def guess_mimetype(self): config.get('ckanext.cloudstorage.guess_mimetype', False) ) + def get_object_public_url(self, filename): + """ + Returns the public url of an object. + Raises `NotImplementedError` for drivers yet unsupported, or when + `use_secure_urls` is set to `True`. + + Assumes container is made public. + """ + if self.driver_name == 'GOOGLE_STORAGE': + if self.use_secure_urls: + raise NotImplementedError("Should be pretty easy though!") + return "https://storage.googleapis.com/{0}/{1}" \ + .format(self.container_name, + self.path_from_filename(filename)) + else: + raise NotImplementedError( + "This method hasn't been implemented yet for this driver.") + + def upload_to_path(self, file_path): + """ + Upload to storage bucket + + :param file_path: File path in storage bucket + :param old_file_path: File path of old file in storage bucket. + """ + + if self.can_use_advanced_azure: + from azure.storage import blob as azure_blob + from azure.storage.blob.models import ContentSettings + + blob_service = azure_blob.BlockBlobService( + self.driver_options['key'], + self.driver_options['secret'] + ) + content_settings = None + if self.guess_mimetype: + content_type, _ = mimetypes.guess_type(file_path) + if content_type: + content_settings = ContentSettings( + content_type=content_type + ) + + return blob_service.create_blob_from_stream( + container_name=self.container_name, + blob_name=file_path, + stream=self.file_upload, + content_settings=content_settings + ) + else: + extra = {} + if 'GOOGLE_STORAGE' in self.driver_name: + use_secure_urls = p.toolkit.asbool( + config.get('ckanext.cloudstorage.use_secure_urls', False)) + use_secure_urls_for_generics = p.toolkit.asbool( + config.get('ckanext.cloudstorage.use_secure_urls_for_generics', False)) + set_public_acl = use_secure_urls is True and use_secure_urls_for_generics is False + if set_public_acl: + log.debug('set acl of new object to public-read for GOOGLE_STORAGE') + extra['acl'] = 'public-read' + + self.container.upload_object_via_stream( + self.file_upload, + object_name=file_path, + extra=extra + ) + + def delete_object_from_path(self, file_path): + """ + Delete object from cloudstorage at `file_path` + :param file_path: Path of file to be deletedd + """ + try: + self.container.delete_object( + self.container.get_object( + file_path + ) + ) + except ObjectDoesNotExistError: + # It's possible for the object to have already been deleted, or + # for it to not yet exist in a committed state due to an + # outstanding lease. + return + + def get_url_from_path(self, path): + """ + Retrieve a publically accessible URL for the given path + + .. note:: + + Works for Azure and any libcloud driver that implements + support for get_object_cdn_url (ex: AWS S3, Google Storage). + + :param path: The resource path. + + :returns: Externally accessible URL or None. + """ + # If advanced azure features are enabled, generate a temporary + # shared access link instead of simply redirecting to the file. + if self.use_secure_urls: + if self.can_use_advanced_azure: + from azure.storage import blob as azure_blob + + blob_service = azure_blob.BlockBlobService( + self.driver_options['key'], + self.driver_options['secret'] + ) + + return blob_service.make_blob_url( + container_name=self.container_name, + blob_name=path, + sas_token=blob_service.generate_blob_shared_access_signature( + container_name=self.container_name, + blob_name=path, + expiry=datetime.utcnow() + timedelta(hours=1), + permission=azure_blob.BlobPermissions.READ + ) + ) + elif self.can_use_advanced_aws: + from boto.s3.connection import S3Connection + s3_connection = S3Connection( + self.driver_options['key'], + self.driver_options['secret'] + ) + return s3_connection.generate_url( + expires_in=60 * 60, + method='GET', + bucket=self.container_name, + query_auth=True, + key=path + ) + + elif self.can_use_advanced_google_cloud: + from google.cloud import storage + + client = storage.client.Client.from_service_account_json( + self.driver_options['secret'] + ) + + bucket = client.get_bucket(self.container_name) + blob = bucket.get_blob(path) + return blob.generate_signed_url( + expiration=timedelta(seconds=60*60), + method='GET', + ) + else: + raise Exception('Unable to generate secure url. Is your cloud \ + provider\'s driver installed?') + + # Find the object for the given key. + obj = self.container.get_object(path) + if obj is None: + return + + # Not supported by all providers! + try: + return self.driver.get_object_cdn_url(obj) + except NotImplementedError: + if 'S3' in self.driver_name or 'GOOGLE_STORAGE' in self.driver_name: + return urlparse.urljoin( + 'https://' + self.driver.connection.host, + '{container}/{path}'.format( + container=self.container_name, + path=path + ) + ) + # This extra 'url' property isn't documented anywhere, sadly. + # See azure_blobs.py:_xml_to_object for more. + elif 'url' in obj.extra: + return obj.extra['url'] + raise + class ResourceCloudStorage(CloudStorage): def __init__(self, resource): @@ -186,6 +374,16 @@ def __init__(self, resource): self.old_filename = old_resource.url resource['url_type'] = '' + @property + def use_secure_urls(self): + """ + `True` if ckanext-cloudstroage is configured to generate secure + one-time URLs to resources, `False` otherwise. + """ + return p.toolkit.asbool( + config.get('ckanext.cloudstorage.use_secure_urls', False) + ) + def path_from_filename(self, rid, filename): """ Returns a bucket path for the given resource_id and filename. @@ -206,134 +404,118 @@ def upload(self, id, max_size=10): :param id: The resource_id. :param max_size: Ignored. """ + # If a filename has been provided (a file is being uplaoded) write the + # file to the appropriate key in the container if self.filename: - if self.can_use_advanced_azure: - from azure.storage import blob as azure_blob - from azure.storage.blob.models import ContentSettings + file_path = self.path_from_filename(id, self.filename) + self.upload_to_path(file_path) + if self._clear and self.old_filename and not self.leave_files: + old_file_path = self.path_from_filename(id, self.old_filename) + self.delete_object_from_path(old_file_path) - blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] - ) - content_settings = None - if self.guess_mimetype: - content_type, _ = mimetypes.guess_type(self.filename) - if content_type: - content_settings = ContentSettings( - content_type=content_type - ) - - return blob_service.create_blob_from_stream( - container_name=self.container_name, - blob_name=self.path_from_filename( - id, - self.filename - ), - stream=self.file_upload, - content_settings=content_settings - ) - else: - self.container.upload_object_via_stream( - self.file_upload, - object_name=self.path_from_filename( - id, - self.filename - ) - ) - - elif self._clear and self.old_filename and not self.leave_files: - # This is only set when a previously-uploaded file is replace - # by a link. We want to delete the previously-uploaded file. - try: - self.container.delete_object( - self.container.get_object( - self.path_from_filename( - id, - self.old_filename - ) - ) - ) - except ObjectDoesNotExistError: - # It's possible for the object to have already been deleted, or - # for it to not yet exist in a committed state due to an - # outstanding lease. - return - - def get_url_from_filename(self, rid, filename): + def get_url_from_filename(self, id, filename): + """ + Generate public URL from resource id and filename + :param id: The resource ID + :param filename: The resource filename """ - Retrieve a publically accessible URL for the given resource_id - and filename. + path = self.path_from_filename(id, filename) + return self.get_url_from_path(path) - .. note:: + @property + def package(self): + return model.Package.get(self.resource['package_id']) - Works for Azure and any libcloud driver that implements - support for get_object_cdn_url (ex: AWS S3). - :param rid: The resource ID. - :param filename: The resource filename. +class FileCloudStorage(CloudStorage): + """ + Support upload of general files to cloudstorage. + """ + def __init__(self, upload_to, old_filename=None): + super(FileCloudStorage, self).__init__() - :returns: Externally accessible URL or None. + self.filename = None + self.filepath = None + self.old_filename = old_filename + if self.old_filename: + self.old_filepath = self.path_from_filename(old_filename) + + @property + def use_secure_urls(self): + """ + `True` if ckanext-cloudstorage is configured to generate secure + one-time URLs to generic files, `False` otherwise. """ - # Find the key the file *should* be stored at. - path = self.path_from_filename(rid, filename) + return p.toolkit.asbool( + config.get('ckanext.cloudstorage.use_secure_urls_for_generics', False) + ) - # If advanced azure features are enabled, generate a temporary - # shared access link instead of simply redirecting to the file. - if self.can_use_advanced_azure and self.use_secure_urls: - from azure.storage import blob as azure_blob + def path_from_filename(self, filename): + """ + Returns a bucket path for the given filename. - blob_service = azure_blob.BlockBlobService( - self.driver_options['key'], - self.driver_options['secret'] - ) + :param: filename: The unmunged filename. + """ + return os.path.join( + 'storage', + 'uploads', + munge.munge_filename(filename) + ) - return blob_service.make_blob_url( - container_name=self.container_name, - blob_name=path, - sas_token=blob_service.generate_blob_shared_access_signature( - container_name=self.container_name, - blob_name=path, - expiry=datetime.utcnow() + timedelta(hours=1), - permission=azure_blob.BlobPermissions.READ - ) - ) - elif self.can_use_advanced_aws and self.use_secure_urls: - from boto.s3.connection import S3Connection - s3_connection = S3Connection( - self.driver_options['key'], - self.driver_options['secret'] - ) - return s3_connection.generate_url( - expires_in=60 * 60, - method='GET', - bucket=self.container_name, - query_auth=True, - key=path - ) + def update_data_dict(self, data_dict, url_field, file_field, clear_field): + """ + Manipulate data from the data_dict. THis needs to be called before it + reaches any validators. + + :param url_field: Name of the field where the upload is going to be + :param file_field: Name of the key where the FieldStorage is kept (i.e. + the field where the file data actually is). + :param clear_field: Name of a boolean field which requests the upload + to be deleted + """ + self.url = data_dict.get(url_field, '') + self._clear = data_dict.pop(clear_field, None) + self.file_field = file_field + self.upload_field_storage = data_dict.pop(file_field, None) + + if hasattr(self.upload_field_storage, 'filename'): + self.filename = self.upload_field_storage.filename + self.filename = str(datetime.datetime.utcnow()) + self.filename + self.filename = munge.munge_filename_legacy(self.filename) + self.filepath = self.path_from_filename(self.filename) + data_dict[url_field] = self.filename + self.file_upload = self.upload_field_storage.file + # keep the file if there has been no change + elif self.old_filename and not self.old_filename.startswith('http'): + if not self._clear: + data_dict[url_field] = self.old_filename + if self._clear and self.url == self.old_filename: + data_dict[url_field] = '' + + def upload(self, max_size=2): + """ + Complete the fileupload, or clear an existing upload. - # Find the object for the given key. - obj = self.container.get_object(path) - if obj is None: - return + This should happen just before a commit but after the data has + been validated and flushed to the db. This is so we do not store + anything unless the request is actually good. + :param max_size: ignored + """ + if self.filename: + file_path = self.path_from_filename(self.filename) + return self.upload_to_path(file_path) + if self._clear and self.old_filename and not self.leave_files: + old_file_path = self.path_from_filename(self.old_filename) + self.delete_object_from_path(old_file_path) - # Not supported by all providers! - try: - return self.driver.get_object_cdn_url(obj) - except NotImplementedError: - if 'S3' in self.driver_name: - return urlparse.urljoin( - 'https://' + self.driver.connection.host, - '{container}/{path}'.format( - container=self.container_name, - path=path - ) - ) - # This extra 'url' property isn't documented anywhere, sadly. - # See azure_blobs.py:_xml_to_object for more. - elif 'url' in obj.extra: - return obj.extra['url'] - raise + def get_url_from_filename(self, filename): + """ + Get public url from filename + :param filename: name of file + """ + path = self.path_from_filename(filename) + # We don't want to use secure urls for normal file uploads. + # Doing so would cause assets caching issues such as the logo + # to be reloaded on every page load. + return self.get_url_from_path(path) - @property - def package(self): - return model.Package.get(self.resource['package_id']) diff --git a/ckanext/cloudstorage/tests/data.csv b/ckanext/cloudstorage/tests/data.csv new file mode 100644 index 0000000..0f55c8b --- /dev/null +++ b/ckanext/cloudstorage/tests/data.csv @@ -0,0 +1,2 @@ +date,price +1950-01-01,34.730 diff --git a/ckanext/cloudstorage/tests/test_controller.py b/ckanext/cloudstorage/tests/test_controller.py new file mode 100644 index 0000000..9507ed1 --- /dev/null +++ b/ckanext/cloudstorage/tests/test_controller.py @@ -0,0 +1,154 @@ +import os + +from nose.tools import assert_equal, assert_true, assert_raises +from mock import patch, create_autospec, MagicMock + +import ckan.plugins +import ckan.tests.helpers as helpers +import ckan.tests.factories as factories +from webtest import Upload + +from ckan.common import config +import ckanapi +from libcloud.storage.types import Provider +from libcloud.storage.providers import get_driver + +from ckanext.cloudstorage.controller import StorageController + +from webob.exc import status_map + +google_driver = get_driver(Provider.GOOGLE_STORAGE) + + +class Uploader(Upload): + """Extends webtest's Upload class a bit more so it actually stores file data. + """ + + def __init__(self, *args, **kwargs): + self.file = kwargs.pop('file') + super(Uploader, self).__init__(*args, **kwargs) + + +class TestStorageController(helpers.FunctionalTestBase): + def _upload_resource(self): + factories.Sysadmin(apikey='my-test-key') + + app = self._get_test_app() + demo = ckanapi.TestAppCKAN(app, apikey='my-test-key') + factories.Dataset(name='my-dataset') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + resource = demo.action.resource_create(package_id='my-dataset', + upload=open(file_path), + url='file.txt') + return resource, demo, app + + @patch('ckanext.cloudstorage.storage.get_driver') + @helpers.change_config('ckan.site_url', 'http://mytest.ckan.net') + def test_resource_show_url(self, get_driver): + """The resource_show url is expected for uploaded resource file.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + + resource, demo, _ = self._upload_resource() + + # does resource_show have the expected resource file url? + resource_show = demo.action.resource_show(id=resource['id']) + + expected_url = 'http://mytest.ckan.net/dataset/{0}/resource/{1}/download/data.csv' \ + .format(resource['package_id'], resource['id']) + + assert_equal(resource_show['url'], expected_url) + + @patch('ckanext.cloudstorage.storage.get_driver') + @helpers.change_config('ckan.site_url', 'http://localhost:5000') + def test_resource_download_s3(self, get_driver): + """A resource uploaded to S3 ckan be downloaded.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + + resource, demo, app = self._upload_resource() + resource_show = demo.action.resource_show(id=resource['id']) + resource_file_url = resource_show['url'] + + assert_equal(resource_file_url, u'{2}/dataset/{0}/resource/{1}/download/data.csv' + .format(resource['package_id'], resource['id'], 'http://localhost:5000')) + + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.controller.h') + def test_resource_download_s3_no_filename(self, h, get_driver): + """A resource uploaded can be downloaded when no filename in url.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + + resource, demo, app = self._upload_resource() + + resource_file_url = '/dataset/{0}/resource/{1}/download' \ + .format(resource['package_id'], resource['id']) + + mock_driver.get_object_cdn_url.return_value = resource_file_url + + file_response = app.get(resource_file_url) + + h.redirect_to.assert_called_with(resource_file_url) + + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.controller.h') + def test_resource_download_url_link(self, h, get_driver): + """A resource with a url (not a file) is redirected correctly.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + mock_driver.get_object_cdn_url.return_value = 'http://example' + + factories.Sysadmin(apikey='my-test-apikey') + + app = self._get_test_app() + demo = ckanapi.TestAppCKAN(app, apikey='my-test-apikey') + dataset = factories.Dataset() + + resource = demo.action.resource_create(package_id=dataset['id'], + url='http://example') + resource_show = demo.action.resource_show(id=resource['id']) + resource_file_url = '/dataset/{0}/resource/{1}/download' \ + .format(resource['package_id'], resource['id']) + assert_equal(resource_show['url'], 'http://example') + + # attempt redirect to linked url + r = app.get(resource_file_url) + h.redirect_to.assert_called_with('http://example') + + +class TestControllerUploadFileRedirect(helpers.FunctionalTestBase): + + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', True) + @patch('ckanext.cloudstorage.storage.FileCloudStorage.get_url_from_path') + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.controller.h.redirect_to') + def test_uses_normal_redirect_for_secure_urls(self, redirect_to, get_driver, get_url_from_path): + url = 'http://some.url/path' + get_url_from_path.return_value = url + + StorageController().uploaded_file_redirect('notused', 'file.txt') + redirect_to.assert_called_once_with(url) + + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', False) + @patch('ckanext.cloudstorage.storage.FileCloudStorage.get_url_from_path') + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.controller.h.redirect_to') + def test_uses_manual_pylons_redirect_for_unsecure_urls_redirect(self, redirect_to, get_driver, get_url_from_path): + url = 'http://some.url/path' + get_url_from_path.return_value = url + + with assert_raises(status_map[301]) as exc: + StorageController().uploaded_file_redirect('notused', 'file.txt') + assert_equal(exc.exception.location, url) + assert_equal(exc.exception.headers['Pragma'], 'none') + diff --git a/ckanext/cloudstorage/tests/test_plugin.py b/ckanext/cloudstorage/tests/test_plugin.py new file mode 100644 index 0000000..7f92ea1 --- /dev/null +++ b/ckanext/cloudstorage/tests/test_plugin.py @@ -0,0 +1,35 @@ +import os +from nose.tools import assert_equal, assert_raises +from mock import patch, MagicMock + +from ckan.tests import helpers, factories +from ckan.lib import helpers as h + +import ckanapi + +from ckanext.cloudstorage.controller import StorageController +class TestPlugin(helpers.FunctionalTestBase): + + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.controller.StorageController', spec=StorageController) + def test_resource_download_calls_ext_method(self, resource_download, get_driver): + """ + Test `ckanext.cloudstorage.controller.StorageController.resource_download` is called for `resource_download` action. + """ + app = self._get_test_app() + demo = ckanapi.TestAppCKAN(app, apikey='my-test-apikey') + factories.Sysadmin(apikey='my-test-apikey') + + factories.Dataset(name='my-dataset') + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + resource = demo.action.resource_create( + package_id='my-dataset', + upload=open(file_path), + url='file.txt' + ) + + # proves it's calling the right code, right? + with assert_raises(TypeError) as exc: + r = app.get(resource['url']) + assert_equal(exc.exception.message, "'MagicMock' object is not iterable") + resource_download.assert_called_once() diff --git a/ckanext/cloudstorage/tests/test_storage.py b/ckanext/cloudstorage/tests/test_storage.py new file mode 100644 index 0000000..8189fe1 --- /dev/null +++ b/ckanext/cloudstorage/tests/test_storage.py @@ -0,0 +1,306 @@ +import os +from nose.tools import assert_equal, assert_true, assert_raises +from mock import create_autospec, patch, MagicMock +import datetime +import ckanapi +from webtest import Upload + +from ckan.tests import helpers, factories +from ckan.plugins import toolkit +from ckanext.cloudstorage.storage import ( + CloudStorage, ResourceCloudStorage, FileCloudStorage +) + +from pylons import config + +from libcloud.storage.types import Provider +from libcloud.storage.providers import get_driver + +google_driver = get_driver(Provider.GOOGLE_STORAGE) + + +class Uploader(Upload): + """Extends webtest's Upload class a bit more so it actually stores file data. + """ + + def __init__(self, *args, **kwargs): + self.file = kwargs.pop('file') + super(Uploader, self).__init__(*args, **kwargs) + + +class TestCloudStorageBaseClass(helpers.FunctionalTestBase): + + @helpers.change_config('ckanext.cloudstorage.driver', 'GOOGLE_STORAGE') + @helpers.change_config('ckanext.cloudstorage.use_secure_urls', False) + @patch('ckanext.cloudstorage.storage.CloudStorage.can_use_advanced_azure', False) + @patch('ckanext.cloudstorage.storage.CloudStorage.container') + @patch('ckanext.cloudstorage.storage.get_driver') + def test_upload_to_path_does_not_set_acl(self, get_driver, container): + uploader = CloudStorage() + uploader.file_upload = 'file_content' + uploader.upload_to_path('/some/path/file.txt') + + container.upload_object_via_stream.assert_called_once_with( + 'file_content', extra={}, object_name='/some/path/file.txt') + + @helpers.change_config('ckanext.cloudstorage.driver', 'GOOGLE_STORAGE') + @helpers.change_config('ckanext.cloudstorage.use_secure_urls', True) + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', False) + @patch('ckanext.cloudstorage.storage.CloudStorage.can_use_advanced_azure', False) + @patch('ckanext.cloudstorage.storage.CloudStorage.container') + @patch('ckanext.cloudstorage.storage.get_driver') + def test_upload_to_path_sets_acl(self, get_driver, container): + uploader = CloudStorage() + uploader.file_upload = 'file_content' + uploader.upload_to_path('/some/path/file.txt') + + container.upload_object_via_stream.assert_called_once_with( + 'file_content', + extra={ + 'acl': 'public-read' + }, + object_name='/some/path/file.txt' + ) + + +class TestResourceUploader(helpers.FunctionalTestBase): + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_resource_upload(self, get_driver): + """Test a basic resource file upload.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + factories.Sysadmin(apikey='my-test-apikey') + + app = self._get_test_app() + demo = ckanapi.TestAppCKAN(app, apikey='my-test-apikey') + factories.Dataset(name='my-dataset') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + resource = demo.action.resource_create( + package_id='my-dataset', + upload=open(file_path), + url='file.txt' + ) + + key = 'resources/{0}/data.csv' \ + .format(resource['id']) + + args, kwargs = container.upload_object_via_stream.call_args + + assert_equal(kwargs['object_name'], key) + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_resource_upload_then_clear(self, get_driver): + """Test that clearing on upload removes the storage key.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + + sysadmin = factories.Sysadmin(apikey="my-test-key") + + app = self._get_test_app() + demo = ckanapi.TestAppCKAN(app, apikey="my-test-key") + dataset = factories.Dataset(name='my-dataset') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + resource = demo.action.resource_create( + package_id='my-dataset', + upload=open(file_path), + url='file.txt' + ) + + key = 'resources/{0}/data.csv'.format(resource['id']) + + args, kwargs = container.upload_object_via_stream.call_args + assert_equal(kwargs['object_name'], key) + + container.get_object.return_value = 'object' + + url = toolkit.url_for( + controller='package', action='resource_edit', id=dataset['id'], resource_id=resource['id']) + env = {"REMOTE_USER": sysadmin['name'].encode('ascii')} + app.post(url, {'clear_upload': True, 'url': 'http://asdf', 'save': 'save'}, extra_environ=env) + + args, _ = container.get_object.call_args + path = args[0] + assert_equal(path, key) + args, _ = container.delete_object.call_args + assert_equal(args[0], 'object') + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_path_from_filename(self, get_driver): + """path_from_filename returns as expected.""" + dataset = factories.Dataset() + resource = factories.Resource(package_id=dataset['id']) + + uploader = ResourceCloudStorage(resource) + returned_path = uploader.path_from_filename(resource['id'], 'myfile.txt') + assert_equal(returned_path, 'resources/{0}/myfile.txt'.format(resource['id'])) + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_resource_upload_with_url_and_clear(self, get_driver): + """Test that clearing an upload and using a URL does not crash.""" + + sysadmin = factories.Sysadmin(apikey='my-test-key') + + app = self._get_test_app() + dataset = factories.Dataset(name='my-dataset') + + url = toolkit.url_for(controller='package', action='new_resource', id=dataset['id']) + env = {'REMOTE_USER': sysadmin['name'].encode('ascii')} + + app.post(url, {'clear_uplaod': True, 'id': '', # empty id from the form + 'url': 'http://asdf', 'save': 'save'}, extra_environ=env) + + @helpers.change_config('ckanext.cloudstorage.use_secure_urls', True) + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', False) + @patch('ckanext.cloudstorage.storage.get_driver') + def test_resource_storage_reads_correct_use_secure_urls_config_option(self, get_driver): + dataset = factories.Dataset(name='my-dataset') + resource = factories.Resource( + package_id=dataset['id'], + ) + uploader = ResourceCloudStorage(resource) + assert_true(uploader.use_secure_urls) + + +class TestFileCloudStorage(helpers.FunctionalTestBase): + + @patch('ckanext.cloudstorage.storage.FileCloudStorage') + def test_file_upload_calls_FileCloudStorage(self, FileCloudStorage): + sysadmin = factories.Sysadmin(apikey='apikey') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + filename = 'image.png' + + img_uploader = Uploader(filename, file=open(file_path)) + + with patch('ckanext.cloudstorage.storage.datetime') as mock_date: + mock_date.datetime.utcnow.returl_value = datetime.datetime(2001, 1, 29) + context = {'user': sysadmin['name']} + helpers.call_action('group_create', context=context, + name='group', + image_upload=img_uploader, + image_url=filename, + save='save') + + FileCloudStorage.assert_called_once_with('group', None) + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_group_image_upload(self, get_driver): + """Test a group image file uplaod.""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + mock_driver.get_object_cdn_url.return_value = 'http://cdn.url' + get_driver.return_value = MagicMock(return_value=mock_driver) + + sysadmin = factories.Sysadmin(apikey='my-test-key') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + filename = 'image.png' + + img_uploader = Uploader(filename, file=open(file_path)) + + with patch('ckanext.cloudstorage.storage.datetime') as mock_date: + mock_date.datetime.utcnow.return_value = \ + datetime.datetime(2001, 1, 29) + context = {'user': sysadmin['name']} + helpers.call_action('group_create', context=context, + name='my-group', + image_upload=img_uploader, + image_url=filename, + save='save') + + key = "storage/uploads/2001-01-29-000000{0}" \ + .format(filename) + + group = helpers.call_action('group_show', id='my-group') + print('group', group) + + args, kwargs = container.upload_object_via_stream.call_args + assert_equal(kwargs['object_name'], unicode(key)) + + # app = self._get_test_app() + # image_file_url = '/uploads/group/{0}'.format(filename) + # r = app.get(image_file_url) + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_group_image_upload_then_clear(self, get_driver): + """Test that clearing an upload calls delete_object""" + mock_driver = MagicMock(spec=google_driver, name='driver') + container = MagicMock(name='container') + mock_driver.get_container.return_value = container + get_driver.return_value = MagicMock(return_value=mock_driver) + + sysadmin = factories.Sysadmin(apikey='my-test-apikey') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + file_name = 'image.png' + + img_uploader = Uploader(file_name, file=open(file_path)) + + with patch('ckanext.cloudstorage.storage.datetime') as mock_date: + mock_date.datetime.utcnow.return_value = \ + datetime.datetime(2001, 1, 29) + context = {'user': sysadmin['name']} + helpers.call_action('group_create', context=context, + name='my-group', + image_upload=img_uploader, + image_url=file_name) + + object_mock = MagicMock(name='object') + container.get_object.return_value = object_mock + + helpers.call_action('group_update', context=context, + id='my-group', name='my-group', + image_url='http://example', clear_upload=True) + + # assert delete object is called + container.delete_object.assert_called_with(object_mock) + + @patch('ckanext.cloudstorage.storage.get_driver') + def test_get_object_public_url(self, get_driver): + """ + Test get_object_public_url returns expected string + """ + uploader = FileCloudStorage('notused') + url = uploader.get_object_public_url('file.png') + assert_equal(url, 'https://storage.googleapis.com/test/storage/uploads/file.png') + + @helpers.change_config('ckanext.cloudstorage.use_secure_urls', False) + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', True) + @patch('ckanext.cloudstorage.storage.get_driver') + def test_filestorage_secure_urls_reads_correct_config_option(self, get_driver): + uploader = FileCloudStorage(None) + assert_true(uploader.use_secure_urls) + + @helpers.change_config('ckanext.cloudstorage.use_secure_urls', False) + @helpers.change_config('ckanext.cloudstorage.use_secure_urls_for_generics', True) + @patch('ckanext.cloudstorage.storage.get_driver') + @patch('ckanext.cloudstorage.storage.FileCloudStorage.can_use_advanced_azure', False) + @patch('ckanext.cloudstorage.storage.FileCloudStorage.can_use_advanced_aws', False) + @patch('ckanext.cloudstorage.storage.FileCloudStorage.can_use_advanced_google_cloud', False) + def test_path_from_filename_uses_public_url_when_option_is_false(self, get_driver): + sysadmin = factories.Sysadmin(apikey='my-test-apikey') + + file_path = os.path.join(os.path.dirname(__file__), 'data.csv') + file_name = 'image.png' + + img_uploader = Uploader(file_name, file=open(file_path)) + + with patch('ckanext.cloudstorage.storage.datetime') as mock_date: + mock_date.datetime.utcnow.return_value = \ + datetime.datetime(2001, 1, 29) + context = {'user': sysadmin['name']} + helpers.call_action('group_create', context=context, + name='my-group', + image_upload=img_uploader, + image_url=file_name) + + uploader = FileCloudStorage(None) + assert_raises(Exception, uploader.get_url_from_filename, 'image.png') diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..d6a809f --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,3 @@ +ckanapi==4.1 +google-cloud-storage==1.7.0 + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..904545b --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pycrypto