From 4046e4d28435e2366f60f72e2b79188289662039 Mon Sep 17 00:00:00 2001 From: Ginger Burns Date: Wed, 30 Oct 2024 17:28:47 +0000 Subject: [PATCH] style: automatic reformat auto reformat with ruff/docformatter/prettier after config changes --- ckanext/versioned_datastore/cli.py | 6 +- ckanext/versioned_datastore/helpers.py | 6 +- ckanext/versioned_datastore/interfaces.py | 41 ++--- .../lib/basic_query/geo.py | 12 +- .../lib/basic_query/search.py | 16 +- .../lib/basic_query/utils.py | 20 +- .../lib/datastore_utils.py | 28 +-- .../lib/downloads/derivatives/__init__.py | 2 +- .../lib/downloads/derivatives/csv.py | 4 +- .../downloads/derivatives/dwc/generator.py | 15 +- .../lib/downloads/derivatives/dwc/utils.py | 3 +- .../lib/downloads/derivatives/json.py | 3 +- .../lib/downloads/derivatives/xlsx.py | 2 +- .../lib/downloads/download.py | 31 ++-- .../lib/downloads/loaders.py | 4 +- .../lib/downloads/notifiers/__init__.py | 5 +- .../lib/downloads/notifiers/base.py | 27 +-- .../lib/downloads/notifiers/email.py | 5 +- .../lib/downloads/notifiers/webhook.py | 2 +- .../lib/downloads/query.py | 3 +- .../lib/downloads/servers/direct.py | 6 +- .../lib/downloads/utils.py | 12 +- .../lib/importing/details.py | 6 +- .../lib/importing/importing.py | 27 +-- .../lib/importing/indexing.py | 14 +- .../lib/importing/ingestion/deletion.py | 16 +- .../lib/importing/ingestion/exceptions.py | 26 +-- .../lib/importing/ingestion/ingesting.py | 50 ++--- .../lib/importing/ingestion/readers.py | 20 +- .../lib/importing/ingestion/records.py | 4 +- .../lib/importing/ingestion/utils.py | 14 +- .../lib/importing/queuing.py | 14 +- .../lib/importing/stats.py | 12 +- .../lib/importing/utils.py | 4 +- .../versioned_datastore/lib/query/fields.py | 36 ++-- .../versioned_datastore/lib/query/schema.py | 21 ++- .../versioned_datastore/lib/query/slugs.py | 52 +++--- .../versioned_datastore/lib/query/utils.py | 38 ++-- .../versioned_datastore/lib/query/v1_0_0.py | 48 ++--- .../logic/actions/basic_search.py | 26 +-- .../versioned_datastore/logic/actions/crud.py | 31 ++-- .../logic/actions/downloads.py | 8 +- .../logic/actions/extras.py | 20 +- .../logic/actions/meta/arg_objects.py | 2 +- .../logic/actions/meta/help.py | 100 +++++----- .../logic/actions/meta/schema.py | 7 +- .../logic/actions/multisearch.py | 157 ++++++++-------- ckanext/versioned_datastore/logic/auth.py | 3 +- .../migration/versioned_datastore/env.py | 25 ++- .../19a61e5b669f_add_new_download_tables.py | 1 + .../526b12c69d55_add_navigational_slugs.py | 1 + ...3f_add_server_args_to_download_request_.py | 4 +- ckanext/versioned_datastore/model/details.py | 13 +- .../versioned_datastore/model/downloads.py | 13 +- ckanext/versioned_datastore/model/slugs.py | 7 +- ckanext/versioned_datastore/model/stats.py | 4 +- ckanext/versioned_datastore/plugin.py | 20 +- .../versioned_datastore/routes/__init__.py | 2 +- ckanext/versioned_datastore/routes/search.py | 2 +- ckanext/versioned_datastore/routes/status.py | 7 +- docs/_scripts/gen_api_pages.py | 1 - tests/conftest.py | 5 +- tests/helpers/patches.py | 2 +- tests/integration/downloads/test_downloads.py | 10 +- .../lib/basic_query/test_basic_query_geo.py | 13 +- .../basic_query/test_basic_query_search.py | 2 +- .../lib/basic_query/test_basic_query_utils.py | 22 +-- .../lib/downloads/test_downloads_notifiers.py | 2 +- .../downloads/test_downloads_runmanager.py | 8 +- .../lib/downloads/test_downloads_utils.py | 6 +- tests/unit/lib/query/test_query_query.py | 3 +- tests/unit/lib/query/test_query_v1_0_0.py | 172 +++++++++--------- tests/unit/lib/test_datastore_utils.py | 4 +- .../logic/actions/test_actions_downloads.py | 2 +- tests/unit/test_helpers.py | 10 +- 75 files changed, 703 insertions(+), 667 deletions(-) diff --git a/ckanext/versioned_datastore/cli.py b/ckanext/versioned_datastore/cli.py index d7169106..0e30d5ff 100644 --- a/ckanext/versioned_datastore/cli.py +++ b/ckanext/versioned_datastore/cli.py @@ -1,11 +1,11 @@ import click - from ckan.plugins import toolkit + from .model.details import datastore_resource_details_table from .model.downloads import ( - datastore_downloads_requests_table, - datastore_downloads_derivative_files_table, datastore_downloads_core_files_table, + datastore_downloads_derivative_files_table, + datastore_downloads_requests_table, ) from .model.slugs import datastore_slugs_table, navigational_slugs_table from .model.stats import import_stats_table diff --git a/ckanext/versioned_datastore/helpers.py b/ckanext/versioned_datastore/helpers.py index 91535955..dce2fef4 100644 --- a/ckanext/versioned_datastore/helpers.py +++ b/ckanext/versioned_datastore/helpers.py @@ -1,7 +1,8 @@ import json -from ckan.plugins import toolkit from datetime import date +from ckan.plugins import toolkit + from .lib.common import ALL_FORMATS from .lib.importing import stats from .lib.query.slugs import create_nav_slug @@ -138,7 +139,8 @@ def latest_item_version(resource_id, record_id=None): :param resource_id: the id of the resource to search in :param record_id: optional; a record id to search for instead - :return: if record id is provided, the latest record version, else the latest resource version + :return: if record id is provided, the latest record version, else the latest + resource version """ action = ( 'datastore_get_record_versions' diff --git a/ckanext/versioned_datastore/interfaces.py b/ckanext/versioned_datastore/interfaces.py index 39b65a01..6cd4ad62 100644 --- a/ckanext/versioned_datastore/interfaces.py +++ b/ckanext/versioned_datastore/interfaces.py @@ -98,16 +98,16 @@ def datastore_modify_fields(self, resource_id, mapping, fields): :param resource_id: the resource id that was searched :param mapping: the mapping for the elasticsearch index containing the - resource's data. This is the raw mapping as a dict, retrieved - straight from elasticsearch's mapping endpoint + resource's data. This is the raw mapping as a dict, retrieved straight from + elasticsearch's mapping endpoint :param fields: the field definitions that have so far been extracted from the - mapping, by default this is all fields + mapping, by default this is all fields :return: the list of field definition dicts """ return fields def datastore_modify_index_doc(self, resource_id, index_doc): - ''' + """ Action allowing the modification of a resource's data during indexing. The index_doc passed is a dict in the form: @@ -134,7 +134,7 @@ def datastore_modify_index_doc(self, resource_id, index_doc): :param resource_id: the id of the resource being indexed :param index_doc: a dict that will be sent to elasticsearch :return: the dict for elasticsearch to index - ''' + """ return index_doc def datastore_is_read_only_resource(self, resource_id): @@ -162,7 +162,7 @@ def datastore_after_indexing(self, request, splitgill_stats, stats_id): :param request: the ResourceIndexRequest object that triggered the indexing task :param splitgill_stats: the statistics about the indexing task from splitgill :param stats_id: the id of the statistics entry in the ImportStats database - table + table """ pass @@ -230,7 +230,7 @@ def datastore_after_convert_basic_query(self, basic_query, multisearch_query): custom filters. :param basic_query: the original basic query, before it was modified by other - plugins + plugins :param multisearch_query: the converted multisearch version of the query :return: the modified multisearch query """ @@ -243,9 +243,8 @@ def get_query_schemas(self): Hook to allow registering custom query schemas. :return: a list of tuples of the format (query schema version, schema object) - where the query schema version is a string of format v#.#.# and the - schema object is an instance of - ckanext.versioned_datastore.lib.query.Schema + where the query schema version is a string of format v#.#.# and the schema + object is an instance of ckanext.versioned_datastore.lib.query.Schema """ return [] @@ -262,7 +261,7 @@ def download_modify_notifier_start_templates( :param text_template: the text template string :param html_template: the html template string :return: a 2-tuple containing the text template string and the html template - string + string """ return text_template, html_template @@ -277,7 +276,7 @@ def download_modify_notifier_end_templates( :param text_template: the text template string :param html_template: the html template string :return: a 2-tuple containing the text template string and the html template - string + string """ return text_template, html_template @@ -292,7 +291,7 @@ def download_modify_notifier_error_templates( :param text_template: the text template string :param html_template: the html template string :return: a 2-tuple containing the text template string and the html template - string + string """ return text_template, html_template @@ -317,9 +316,9 @@ def download_derivative_generators(self, registered_derivatives=None): Extend or modify the list of derivative generators. :param registered_derivatives: a dict of existing derivative generator classes, - returned from previously loaded plugins + returned from previously loaded plugins :return: a dict of loaded derivative generator classes, keyed on the name used - to specify them in download requests + to specify them in download requests """ return registered_derivatives or {} @@ -328,9 +327,9 @@ def download_file_servers(self, registered_servers=None): Extend or modify the list of file servers. :param registered_servers: a dict of existing file server classes, returned from - previously loaded plugins + previously loaded plugins :return: a dict of loaded file server classes, keyed on the name used to specify - them in download requests + them in download requests """ return registered_servers or {} @@ -339,9 +338,9 @@ def download_notifiers(self, registered_notifiers=None): Extend or modify the list of download notifiers. :param registered_notifiers: a dict of existing notifier classes, returned from - previously loaded plugins + previously loaded plugins :return: a dict of loaded notifier classes, keyed on the name used to specify - them in download requests + them in download requests """ return registered_notifiers or {} @@ -350,9 +349,9 @@ def download_data_transformations(self, registered_transformations=None): Extend or modify the list of data transformations. :param registered_transformations: a dict of existing data transformations, - returned from previously loaded plugins + returned from previously loaded plugins :return: a dict of loaded transformations, keyed on the name used to specify - them in download requests + them in download requests """ return registered_transformations or {} diff --git a/ckanext/versioned_datastore/lib/basic_query/geo.py b/ckanext/versioned_datastore/lib/basic_query/geo.py index 6f908ec4..723ccfad 100644 --- a/ckanext/versioned_datastore/lib/basic_query/geo.py +++ b/ckanext/versioned_datastore/lib/basic_query/geo.py @@ -1,4 +1,5 @@ import json + from ckan.plugins import toolkit from elasticsearch_dsl import Q @@ -11,11 +12,12 @@ def add_point_filter(search, distance, coordinates): Adds a point filter query to the search object and returns a new search object. :param search: the current elasticsearch DSL object - :param distance: the radius of the circle centred on the specified location within which records - must lie to be matched. This can specified in any form that elasticsearch - accepts for distances (see their doc, but essentially values like 10km etc). - :param coordinates: the point to centre the radius on, specified as a lon/lat pair in a list - (i.e. [40.2, -20]). + :param distance: the radius of the circle centred on the specified location within + which records must lie to be matched. This can specified in any form that + elasticsearch accepts for distances (see their doc, but essentially values like + 10km etc). + :param coordinates: the point to centre the radius on, specified as a lon/lat pair + in a list (i.e. [40.2, -20]). :return: a search object """ options = { diff --git a/ckanext/versioned_datastore/lib/basic_query/search.py b/ckanext/versioned_datastore/lib/basic_query/search.py index 512942fa..8b6e594a 100644 --- a/ckanext/versioned_datastore/lib/basic_query/search.py +++ b/ckanext/versioned_datastore/lib/basic_query/search.py @@ -3,9 +3,9 @@ from ckan.plugins import PluginImplementations from elasticsearch_dsl import Search -from .geo import add_geo_search -from ..datastore_utils import prefix_field from ...interfaces import IVersionedDatastore +from ..datastore_utils import prefix_field +from .geo import add_geo_search def _find_version(data_dict): @@ -17,8 +17,8 @@ def _find_version(data_dict): recline.js framework used by the NHM on CKAN 2.3 where no additional parameters can be passed other than q, filters etc. - :param data_dict: the data dict, this might be modified if the __version__ key is used (it will - be removed if present) + :param data_dict: the data dict, this might be modified if the __version__ key is + used (it will be removed if present) :return: the version found as an integer, or None if no version was found """ version = data_dict.get('version', None) @@ -49,9 +49,9 @@ def create_search(context, data_dict, original_data_dict): :param context: the context dict :param data_dict: the data dict of parameters - :return: a 3-tuple containing: the original data_dict that was passed into this function, the - data_dict after modification by other plugins and finally the - elasticsearch-dsl Search object + :return: a 3-tuple containing: the original data_dict that was passed into this + function, the data_dict after modification by other plugins and finally the + elasticsearch-dsl Search object """ # allow other extensions implementing our interface to modify the data_dict for plugin in PluginImplementations(IVersionedDatastore): @@ -82,7 +82,7 @@ def build_search_object( facets=None, facet_limits=None, sort=None, - **kwargs + **kwargs, ): """ Given the parameters, creates a new elasticsearch-dsl Search object and returns it. diff --git a/ckanext/versioned_datastore/lib/basic_query/utils.py b/ckanext/versioned_datastore/lib/basic_query/utils.py index d40b0a9b..5578020d 100644 --- a/ckanext/versioned_datastore/lib/basic_query/utils.py +++ b/ckanext/versioned_datastore/lib/basic_query/utils.py @@ -1,16 +1,17 @@ import copy import json + +from ckan.lib.search import SearchIndexError +from ckan.plugins import PluginImplementations from elasticsearch import NotFoundError from elasticsearch_dsl import MultiSearch, Search from splitgill.indexing.utils import DOC_TYPE from splitgill.search import create_version_query -from ckan.lib.search import SearchIndexError -from ckan.plugins import PluginImplementations +from ...interfaces import IVersionedDatastore from .. import common -from ..datastore_utils import prefix_resource, prefix_field +from ..datastore_utils import prefix_field, prefix_resource from ..importing.details import get_all_details -from ...interfaces import IVersionedDatastore def run_search(search, indexes, version=None): @@ -18,11 +19,12 @@ def run_search(search, indexes, version=None): Convenience function to runs a search on the given indexes using the client available in this module. - If the index(es) required for the search are missing then a CKAN SearchIndexError exception is - raised. + If the index(es) required for the search are missing then a CKAN SearchIndexError + exception is raised. :param search: the elasticsearch-dsl search object - :param indexes: either a list of index names to search in or a single index name as a string + :param indexes: either a list of index names to search in or a single index name as + a string :param version: version to filter the search results to, optional :return: the result of running the query """ @@ -37,7 +39,7 @@ def run_search(search, indexes, version=None): def format_facets(aggs): - ''' + """ Formats the facet aggregation result into the format we require. Specifically we expand the buckets out into a dict that looks like this: @@ -61,7 +63,7 @@ def format_facets(aggs): :param aggs: the aggregation dict returned from splitgill/elasticsearch :return: the facet information as a dict - ''' + """ facets = {} for facet, details in aggs.items(): facets[facet] = { diff --git a/ckanext/versioned_datastore/lib/datastore_utils.py b/ckanext/versioned_datastore/lib/datastore_utils.py index d6f9fa8b..43723ffa 100644 --- a/ckanext/versioned_datastore/lib/datastore_utils.py +++ b/ckanext/versioned_datastore/lib/datastore_utils.py @@ -1,10 +1,10 @@ +from cachetools import TTLCache, cached from ckan import model -from ckan.plugins import toolkit, PluginImplementations +from ckan.plugins import PluginImplementations, toolkit from splitgill.indexing.utils import DOC_TYPE -from cachetools import cached, TTLCache -from . import common from ..interfaces import IVersionedDatastore +from . import common def get_latest_version(resource_id): @@ -71,7 +71,7 @@ def get_public_alias_name(resource_id): datastore data. This is just "pub" (retrieved from get_public_alias_prefix above) prepended to the normal prefixed index name, for example: - pubnhm-05ff2255-c38a-40c9-b657-4ccb55ab2feb + pubnhm-05ff2255-c38a-40c9-b657-4ccb55ab2feb :param resource_id: the resource's id :return: the name of the alias @@ -107,10 +107,11 @@ def update_privacy(resource_id, is_private=None): already set correctly on the resource's index in Elasticsearch this does nothing. :param resource_id: the resource's id - :param is_private: whether the package the resource is in is private or not. This is an optional - parameter, if it is left out we look up the resource's package in the - database and find out the private setting that way. - :return: True if modifications were required to update the resource data's privacy, False if not + :param is_private: whether the package the resource is in is private or not. This is + an optional parameter, if it is left out we look up the resource's package in + the database and find out the private setting that way. + :return: True if modifications were required to update the resource data's privacy, + False if not """ if is_private is None: resource = model.Resource.get(resource_id) @@ -128,7 +129,8 @@ def make_private(resource_id): all, or the alias already doesn't exist, nothing happens. :param resource_id: the resource's id - :return: True if modifications were required to make the resource's data private, False if not + :return: True if modifications were required to make the resource's data private, + False if not """ index_name = prefix_resource(resource_id) public_index_name = get_public_alias_name(resource_id) @@ -146,7 +148,8 @@ def make_public(resource_id): the alias already exists, nothing happens. :param resource_id: the resource's id - :return: True if modifications were required to make the resource's data public, False if not + :return: True if modifications were required to make the resource's data public, + False if not """ index_name = prefix_resource(resource_id) public_index_name = get_public_alias_name(resource_id) @@ -190,8 +193,9 @@ def iter_data_fields(mapping): a field at the top level is just ('field', ): {} but a nested one would be ('field', 'sub'): {}). - :param mapping: the mapping dict returned from elasticsearch, this should be the first value in - the dict after the index name, i.e. the result of get_mapping(index)[index] + :param mapping: the mapping dict returned from elasticsearch, this should be the + first value in the dict after the index name, i.e. the result of + get_mapping(index)[index] :return: an iterator which yields fields and their configs """ diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/__init__.py b/ckanext/versioned_datastore/lib/downloads/derivatives/__init__.py index afd3e1bf..f4d8c1b2 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/__init__.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/__init__.py @@ -1,7 +1,7 @@ from .csv import CsvDerivativeGenerator +from .dwc import DwcDerivativeGenerator from .json import JsonDerivativeGenerator from .xlsx import XlsxDerivativeGenerator -from .dwc import DwcDerivativeGenerator derivatives = [ CsvDerivativeGenerator, diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/csv.py b/ckanext/versioned_datastore/lib/downloads/derivatives/csv.py index c1d761a5..f997f982 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/csv.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/csv.py @@ -1,7 +1,7 @@ import csv -from .base import BaseDerivativeGenerator from ..utils import flatten_dict +from .base import BaseDerivativeGenerator class CsvDerivativeGenerator(BaseDerivativeGenerator): @@ -15,7 +15,7 @@ def __init__( query, resource_id=None, delimiter='comma', - **format_args + **format_args, ): super(CsvDerivativeGenerator, self).__init__( output_dir, fields, query, resource_id, delimiter='comma', **format_args diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/generator.py b/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/generator.py index 383cebe8..b835cf2a 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/generator.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/generator.py @@ -6,13 +6,13 @@ from datetime import datetime as dt from uuid import uuid4 +from ckan.plugins import PluginImplementations, plugin_loaded, toolkit from lxml import etree -from ckan.plugins import toolkit, plugin_loaded, PluginImplementations +from .....interfaces import IVersionedDatastoreDownloads +from ..base import BaseDerivativeGenerator from . import urls, utils from .schema import Schema -from ..base import BaseDerivativeGenerator -from .....interfaces import IVersionedDatastoreDownloads class DwcDerivativeGenerator(BaseDerivativeGenerator): @@ -174,7 +174,8 @@ def _extract_record(self, record): fields from core fields and filters out any fields that don't match the schema. :param record: the row of data - :return: core row (dict), extension rows keyed on extension name (dict of lists of dicts) + :return: core row (dict), extension rows keyed on extension name (dict of lists + of dicts) """ core = {} ext = {} @@ -280,9 +281,9 @@ def make_eml(self): """ Create the xml text content of the resource metadata file. - Tries to use some sensible - defaults and get information from other relevant plugins where available, but there's still - the potential for errors or silly data. + Tries to use some sensible defaults and get information from other relevant + plugins where available, but there's still the potential for errors or silly + data. :return: xml string """ # load some useful actions so we don't have to fetch them repeatedly diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/utils.py b/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/utils.py index 8bc5dfc4..954e1721 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/utils.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/dwc/utils.py @@ -1,9 +1,8 @@ from io import BytesIO import requests -from lxml import etree - from ckan.plugins import toolkit +from lxml import etree parser = etree.XMLParser(recover=True) diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/json.py b/ckanext/versioned_datastore/lib/downloads/derivatives/json.py index 120fb115..7667144c 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/json.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/json.py @@ -1,7 +1,6 @@ -import csv +import json from .base import BaseDerivativeGenerator -import json class JsonDerivativeGenerator(BaseDerivativeGenerator): diff --git a/ckanext/versioned_datastore/lib/downloads/derivatives/xlsx.py b/ckanext/versioned_datastore/lib/downloads/derivatives/xlsx.py index 84d650df..2047fa8b 100644 --- a/ckanext/versioned_datastore/lib/downloads/derivatives/xlsx.py +++ b/ckanext/versioned_datastore/lib/downloads/derivatives/xlsx.py @@ -1,7 +1,7 @@ from openpyxl import Workbook, load_workbook -from .base import BaseDerivativeGenerator from ..utils import flatten_dict +from .base import BaseDerivativeGenerator class XlsxDerivativeGenerator(BaseDerivativeGenerator): diff --git a/ckanext/versioned_datastore/lib/downloads/download.py b/ckanext/versioned_datastore/lib/downloads/download.py index ea7e319d..1b5ad617 100644 --- a/ckanext/versioned_datastore/lib/downloads/download.py +++ b/ckanext/versioned_datastore/lib/downloads/download.py @@ -1,22 +1,27 @@ -import os.path -from collections import defaultdict - -import fastavro import hashlib import json import os +import os.path import shutil import tempfile import zipfile +from collections import defaultdict from datetime import datetime as dt -from elasticsearch_dsl import Search from functools import partial from glob import iglob + +import fastavro +from ckan.lib import uploader +from ckan.plugins import PluginImplementations, toolkit +from elasticsearch_dsl import Search from splitgill.indexing.utils import get_elasticsearch_client from splitgill.search import create_version_query -from ckan.lib import uploader -from ckan.plugins import toolkit, PluginImplementations +from ...interfaces import IVersionedDatastoreDownloads +from ...logic.actions.meta.arg_objects import DerivativeArgs +from ...model.downloads import CoreFileRecord, DerivativeFileRecord, DownloadRequest +from .. import common +from ..datastore_utils import prefix_resource from .loaders import ( get_derivative_generator, get_file_server, @@ -24,13 +29,7 @@ get_transformation, ) from .query import Query -from .utils import get_schemas, calculate_field_counts, filter_data_fields, get_fields -from .. import common -from ..datastore_utils import prefix_resource -from ...interfaces import IVersionedDatastoreDownloads -from ...logic.actions.meta.arg_objects import DerivativeArgs -from ...model.downloads import CoreFileRecord, DownloadRequest -from ...model.downloads import DerivativeFileRecord +from .utils import calculate_field_counts, filter_data_fields, get_fields, get_schemas class DownloadRunManager: @@ -254,8 +253,8 @@ def generate_core(self): """ Generates and loads core files. - This method will add new resources to existing core records - with identical filters, reducing data duplication. + This method will add new resources to existing core records with identical + filters, reducing data duplication. :return: the core record """ if not os.path.exists(self.core_folder_path): diff --git a/ckanext/versioned_datastore/lib/downloads/loaders.py b/ckanext/versioned_datastore/lib/downloads/loaders.py index b5664f94..74c987ab 100644 --- a/ckanext/versioned_datastore/lib/downloads/loaders.py +++ b/ckanext/versioned_datastore/lib/downloads/loaders.py @@ -1,8 +1,8 @@ from ckan.plugins import PluginImplementations, toolkit + from ckanext.versioned_datastore.interfaces import IVersionedDatastoreDownloads -from . import derivatives, servers, notifiers, transforms -from functools import partial +from . import derivatives, notifiers, servers, transforms def get_derivative_generator(derivative_name, *args, **kwargs): diff --git a/ckanext/versioned_datastore/lib/downloads/notifiers/__init__.py b/ckanext/versioned_datastore/lib/downloads/notifiers/__init__.py index d611b872..4d69ec2e 100644 --- a/ckanext/versioned_datastore/lib/downloads/notifiers/__init__.py +++ b/ckanext/versioned_datastore/lib/downloads/notifiers/__init__.py @@ -1,7 +1,8 @@ +from ckan.plugins import toolkit + from .email import EmailNotifier -from .webhook import WebhookNotifier from .null import NullNotifier -from ckan.plugins import toolkit +from .webhook import WebhookNotifier notifiers = [EmailNotifier, WebhookNotifier, NullNotifier] diff --git a/ckanext/versioned_datastore/lib/downloads/notifiers/base.py b/ckanext/versioned_datastore/lib/downloads/notifiers/base.py index 95c1f1f6..e74e81a3 100644 --- a/ckanext/versioned_datastore/lib/downloads/notifiers/base.py +++ b/ckanext/versioned_datastore/lib/downloads/notifiers/base.py @@ -1,7 +1,8 @@ from abc import ABCMeta, abstractmethod + +from ckan.plugins import PluginImplementations, toolkit from jinja2 import Template -from ckan.plugins import toolkit, PluginImplementations from ....interfaces import IVersionedDatastoreDownloads from ....model.downloads import DownloadRequest @@ -9,37 +10,37 @@ class BaseNotifier(metaclass=ABCMeta): name = 'base' - default_start_text = ''' + default_start_text = """ Your download on {{ site_name }} has started processing. The status of your download can be viewed here: {{ status_page }} - '''.strip() + """.strip() - default_start_html = ''' + default_start_html = """

Your download on {{ site_name }} has started processing.

The status of your download can be viewed here.

- '''.strip() + """.strip() - default_end_text = ''' + default_end_text = """ The link to the resource data you requested on {{ site_url }} is available at {{ download_url }}. - '''.strip() + """.strip() - default_end_html = ''' + default_end_html = """

The link to the resource data you requested on {{ site_name }} is available here.

- '''.strip() + """.strip() - default_error_text = ''' + default_error_text = """ Your download on {{ site_name }} has encountered an error and has stopped processing. More details can be viewed at: {{ status_page }}. Please try again later and contact us at {{ contact_email }} if the problem persists. - '''.strip() + """.strip() - default_error_html = ''' + default_error_html = """

Your download on {{ site_name }} has encountered an error and has stopped processing.

More details can be viewed here.

Please try again later and contact us at {{ contact_email }} if the problem persists.

- '''.strip() + """.strip() def __init__(self, request, **type_args): self._request = request diff --git a/ckanext/versioned_datastore/lib/downloads/notifiers/email.py b/ckanext/versioned_datastore/lib/downloads/notifiers/email.py index 4ebe4057..30fe9a10 100644 --- a/ckanext/versioned_datastore/lib/downloads/notifiers/email.py +++ b/ckanext/versioned_datastore/lib/downloads/notifiers/email.py @@ -2,15 +2,16 @@ from ckan.lib import mailer from ckan.plugins import toolkit + from .base import BaseNotifier -default_html_body = ''' +default_html_body = """ {0} -'''.strip() +""".strip() class EmailNotifier(BaseNotifier): diff --git a/ckanext/versioned_datastore/lib/downloads/notifiers/webhook.py b/ckanext/versioned_datastore/lib/downloads/notifiers/webhook.py index 0391900c..22eef326 100644 --- a/ckanext/versioned_datastore/lib/downloads/notifiers/webhook.py +++ b/ckanext/versioned_datastore/lib/downloads/notifiers/webhook.py @@ -1,6 +1,6 @@ import requests - from ckan.plugins import toolkit + from .base import BaseNotifier diff --git a/ckanext/versioned_datastore/lib/downloads/query.py b/ckanext/versioned_datastore/lib/downloads/query.py index fb5f1987..27cd7ea7 100644 --- a/ckanext/versioned_datastore/lib/downloads/query.py +++ b/ckanext/versioned_datastore/lib/downloads/query.py @@ -1,6 +1,8 @@ import hashlib from ckan.plugins import toolkit + +from ...logic.actions.meta.arg_objects import QueryArgs from ..basic_query.utils import convert_to_multisearch from ..query.schema import ( get_latest_query_version, @@ -9,7 +11,6 @@ validate_query, ) from ..query.utils import get_resources_and_versions -from ...logic.actions.meta.arg_objects import QueryArgs class Query(object): diff --git a/ckanext/versioned_datastore/lib/downloads/servers/direct.py b/ckanext/versioned_datastore/lib/downloads/servers/direct.py index d91a0391..5ed8e86e 100644 --- a/ckanext/versioned_datastore/lib/downloads/servers/direct.py +++ b/ckanext/versioned_datastore/lib/downloads/servers/direct.py @@ -1,7 +1,9 @@ -from .base import BaseFileServer -from ckan.plugins import toolkit import os +from ckan.plugins import toolkit + +from .base import BaseFileServer + class DirectFileServer(BaseFileServer): name = 'direct' diff --git a/ckanext/versioned_datastore/lib/downloads/utils.py b/ckanext/versioned_datastore/lib/downloads/utils.py index cf56ca76..db8eeb71 100644 --- a/ckanext/versioned_datastore/lib/downloads/utils.py +++ b/ckanext/versioned_datastore/lib/downloads/utils.py @@ -1,16 +1,16 @@ -from elasticsearch_dsl import Search, A +from elasticsearch_dsl import A, Search from fastavro import parse_schema from splitgill.search import create_version_query -from .query import Query from .. import common from ..datastore_utils import ( - prefix_resource, - prefix_field, iter_data_fields, + prefix_field, + prefix_resource, unprefix_index, ) from ..query.fields import get_mappings +from .query import Query def get_schemas(query: Query): @@ -104,9 +104,9 @@ def get_fields(field_counts, ignore_empty_fields, resource_id=None): :param field_counts: the dict of resource ids -> fields -> counts :param ignore_empty_fields: whether fields with no values should be included in the - resulting list or not + resulting list or not :param resource_id: the resource id to get the fields for. The default is None which - means that the fields from all resources will be returned + means that the fields from all resources will be returned :return: a list of fields in case-insensitive ascending order """ # TODO: retrieve the sort order for resources from the database and use diff --git a/ckanext/versioned_datastore/lib/importing/details.py b/ckanext/versioned_datastore/lib/importing/details.py index ef2d63c5..c34eeacd 100644 --- a/ckanext/versioned_datastore/lib/importing/details.py +++ b/ckanext/versioned_datastore/lib/importing/details.py @@ -51,8 +51,10 @@ def get_all_details(resource_id, up_to_version=None): beyond it are ignored and not returned in the resulting OrderedDict. :param resource_id: the resource id - :param up_to_version: the maximum version to include in the resulting OrderedDict (inclusive) - :return: None or an OrderedDict of version: DatastoreResourceDetails objects in ascending order + :param up_to_version: the maximum version to include in the resulting OrderedDict + (inclusive) + :return: None or an OrderedDict of version: DatastoreResourceDetails objects in + ascending order """ query = ( model.Session.query(DatastoreResourceDetails) diff --git a/ckanext/versioned_datastore/lib/importing/importing.py b/ckanext/versioned_datastore/lib/importing/importing.py index 32fd3707..b3c1e0fa 100644 --- a/ckanext/versioned_datastore/lib/importing/importing.py +++ b/ckanext/versioned_datastore/lib/importing/importing.py @@ -1,12 +1,12 @@ import logging from datetime import datetime +from .. import common +from ..datastore_utils import get_latest_version from .indexing import ResourceIndexRequest, index_resource from .ingestion import deletion from .ingestion.ingesting import ingest_resource from .utils import check_version_is_valid -from .. import common -from ..datastore_utils import get_latest_version log = logging.getLogger(__name__) @@ -22,12 +22,12 @@ class ResourceImportRequest(object): """ def __init__(self, resource, version, replace, records=None, api_key=None): - ''' + """ :param resource: the resource we're going to import (this must be the resource dict) :param version: the version of the resource to import :param replace: whether to replace the existing data or not :param records: a list of dicts to import, or None if the data is coming from URL or file - ''' + """ self.resource = resource self.version = version self.replace = replace @@ -55,10 +55,11 @@ def import_resource_data(request): elasticsearch. If the data argument is None (note, not falsey or an empty list, actually None) then the resource's url field is used as the source of the data. - This function is blocking so it should be called through the background task queue to avoid - blocking up a CKAN thread. + This function is blocking so it should be called through the background task queue + to avoid blocking up a CKAN thread. - :param request: the ResourceImportRequest object describing the resource import we need to do + :param request: the ResourceImportRequest object describing the resource import we + need to do """ # first, double check that the version is valid if not check_version_is_valid(request.resource_id, request.version): @@ -111,10 +112,10 @@ class ResourceDeletionRequest: """ def __init__(self, resource, version): - ''' + """ :param resource: the resource we're going to delete (this must be the resource dict) :param version: the version of the resource to delete - ''' + """ self.resource = resource self.version = version self.resource_id = resource['id'] @@ -131,11 +132,11 @@ def delete_resource_data(request): Deletes all the resource's data. This involves ingesting a new version where all fields in each record are missing and then indexing this new version. - This function is blocking so it should be called through the background task queue to avoid - blocking up a CKAN thread. + This function is blocking so it should be called through the background task queue + to avoid blocking up a CKAN thread. - :param request: the ResourceDeletionRequest object describing the resource deletion we need to - do + :param request: the ResourceDeletionRequest object describing the resource deletion + we need to do """ # first, double check that the version is valid if not check_version_is_valid(request.resource_id, request.version): diff --git a/ckanext/versioned_datastore/lib/importing/indexing.py b/ckanext/versioned_datastore/lib/importing/indexing.py index 45510a6a..e3067fb5 100644 --- a/ckanext/versioned_datastore/lib/importing/indexing.py +++ b/ckanext/versioned_datastore/lib/importing/indexing.py @@ -5,11 +5,11 @@ from splitgill.indexing.feeders import SimpleIndexFeeder from splitgill.indexing.indexers import Indexer from splitgill.indexing.indexes import Index -from splitgill.indexing.utils import get_versions_and_data, DOC_TYPE +from splitgill.indexing.utils import DOC_TYPE, get_versions_and_data -from . import stats -from .. import common from ...interfaces import IVersionedDatastore +from .. import common +from . import stats log = logging.getLogger(__name__) @@ -23,13 +23,13 @@ class DatastoreIndex(Index): def __init__( self, config, name, version, latitude_field=None, longitude_field=None ): - ''' + """ :param config: the splitgill config object :param name: the resource id, this will be used as the index name :param version: the version being indexed up to :param latitude_field: optional - the name of a field containing latitudinal data :param longitude_field: optional - the name of a field containing longitudinal data - ''' + """ super(DatastoreIndex, self).__init__(config, name, version) self.latitude_field = latitude_field self.longitude_field = longitude_field @@ -131,11 +131,11 @@ class ResourceIndexRequest(object): """ def __init__(self, resource, lower_version, upper_version): - ''' + """ :param resource: the dict for the resource we're going to index :param lower_version: the lower version to index (exclusive) :param upper_version: the upper version to index (inclusive) - ''' + """ self.resource = resource self.lower_version = lower_version self.upper_version = upper_version diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/deletion.py b/ckanext/versioned_datastore/lib/importing/ingestion/deletion.py index 48af595d..d319f4c8 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/deletion.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/deletion.py @@ -1,12 +1,12 @@ import logging from splitgill.ingestion.converters import RecordToMongoConverter -from splitgill.ingestion.feeders import IngestionFeeder, BaseRecord +from splitgill.ingestion.feeders import BaseRecord, IngestionFeeder from splitgill.ingestion.ingesters import Ingester from splitgill.mongo import get_mongo -from .. import stats from ... import common +from .. import stats log = logging.getLogger(__name__) @@ -17,11 +17,11 @@ class DeletionRecord(BaseRecord): """ def __init__(self, version, resource_id, record_id): - ''' + """ :param version: the version of this operation :param resource_id: the resource id of the resource we're deleting from :param record_id: the record to be deleted - ''' + """ super(DeletionRecord, self).__init__(version) self.resource_id = resource_id self.record_id = record_id @@ -62,10 +62,10 @@ class DeletionFeeder(IngestionFeeder): """ def __init__(self, version, resource_id): - ''' + """ :param version: the version of data to be fed :param resource_id: the resource id for which the data applies - ''' + """ super(DeletionFeeder, self).__init__(version) self.resource_id = resource_id @@ -133,12 +133,12 @@ class ReplaceDeletionFeeder(IngestionFeeder): """ def __init__(self, version, resource_id, tracker, original_source): - ''' + """ :param version: the version of data to be fed :param resource_id: the resource id for which the data applies :param tracker: the InclusionTracker object :param original_source: the name of the original resource data source - ''' + """ super(ReplaceDeletionFeeder, self).__init__(version) self.resource_id = resource_id self.tracker = tracker diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/exceptions.py b/ckanext/versioned_datastore/lib/importing/ingestion/exceptions.py index 0ea76e8f..70a167b8 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/exceptions.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/exceptions.py @@ -7,15 +7,15 @@ class IngestionException(Exception): class UnsupportedDataSource(IngestionException): - ''' + """ Should be raised when the data source we are attempting to ingest isn't one we can ingest - i.e. the format isn't one we support. - ''' + """ def __init__(self, res_format): - ''' + """ :param res_format: the resource format - ''' + """ super(UnsupportedDataSource, self).__init__( f'Could not find ingest reader for {res_format if res_format else "n/a"}' ) @@ -29,11 +29,11 @@ class InvalidId(IngestionException): """ def __init__(self, row_number, row, cause=None): - ''' + """ :param row_number: the row number (1-indexed, excluding the header) :param row: the row (this should be a dict :param cause: optional cause exception, for example a ValueError thrown by int(row['_id') - ''' + """ message = f'Row {row_number} had an invalid integer id: "{row["_id"]}"' if cause is not None: message = f'{message} [{cause.__class__.__name__}: {str(cause)}]' @@ -51,9 +51,9 @@ class DuplicateDataSource(IngestionException): """ def __init__(self, file_hash): - ''' + """ :param file_hash: the file hash that clashed - ''' + """ super(DuplicateDataSource, self).__init__( f'This file has been ingested before, ignoring [hash: {file_hash}]' ) @@ -64,9 +64,9 @@ class InvalidCharacterException(IngestionException): """ Thrown when there is an invalid unicode character found in the resource data. - This is detected - by checking if the unicode version of the row contains any category C characters (control - characters basically, see here: http://www.unicode.org/reports/tr44/#General_Category_Values). + This is detected by checking if the unicode version of the row contains any category + C characters (control characters basically, see here: + http://www.unicode.org/reports/tr44/#General_Category_Values). This is treated as an error to avoid putting crap unicode into the jsonl.gz intermediate file and then erroring when attempting to deserialise the json. Typically this error is produced when the user has uploaded a file in a really weird character @@ -74,10 +74,10 @@ class InvalidCharacterException(IngestionException): """ def __init__(self, row_number, row): - ''' + """ :param row_number: the row number (1-indexed, excluding the header) :param row: the row (this should be a dict - ''' + """ message = f'Row {row_number} (excluding header) contained an invalid character' super(InvalidCharacterException, self).__init__(message) self.row_number = row_number diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/ingesting.py b/ckanext/versioned_datastore/lib/importing/ingestion/ingesting.py index 830526e0..53efbb5e 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/ingesting.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/ingesting.py @@ -1,31 +1,31 @@ +import codecs import contextlib import gzip -import logging -import shutil -import tempfile -import zipfile - -import codecs import itertools +import logging import math import os -import simplejson +import shutil +import tempfile import unicodedata +import zipfile from contextlib import suppress from datetime import datetime + +import simplejson from splitgill.ingestion.converters import RecordToMongoConverter from splitgill.ingestion.feeders import IngestionFeeder from splitgill.ingestion.ingesters import Ingester from splitgill.mongo import get_mongo +from ....model.stats import ImportStats +from .. import stats +from ..details import create_details, get_last_file_hash from . import exceptions from .deletion import ReplaceDeletionFeeder -from .readers import get_reader, APIReader +from .readers import APIReader, get_reader from .records import DatastoreRecord -from .utils import download_to_temp_file, compute_hash, InclusionTracker -from .. import stats -from ..details import create_details, get_last_file_hash -from ....model.stats import ImportStats +from .utils import InclusionTracker, compute_hash, download_to_temp_file log = logging.getLogger(__name__) @@ -39,8 +39,8 @@ def ingest_resource(version, config, resource, data, replace, api_key): :param resource: the resource dict :param data: the data to ingest (can be None if not using the API) :param replace: boolean indicating whether to replace the existing data or not - :param api_key: the API key if the resource's CKAN URL is to be used as the source and the - resource is private + :param api_key: the API key if the resource's CKAN URL is to be used as the source + and the resource is private :return: True if the ingest was successful, False if not """ # cache the resource id as we use it a few times @@ -229,14 +229,15 @@ def get_fp_and_reader_for_resource_data(resource, data=None, api_key=None): resource source and a ResourceReader instance for reading the data from the file pointer. - If the data parameter is passed (should be a list of dicts) then no file pointer is yielded - (None is yielded instead). A ResourceReader instance is yielded as normal. + If the data parameter is passed (should be a list of dicts) then no file pointer is + yielded (None is yielded instead). A ResourceReader instance is yielded as normal. :param resource: the resource dict :param data: optional data, if provided must be a list of dicts - :param api_key: the API key of a user who can read the data, if indeed the data needs an API - key to get it. This is needed when the URL is the CKAN resource download URL - of a private resource. Can be None to indicate no API key is required + :param api_key: the API key of a user who can read the data, if indeed the data + needs an API key to get it. This is needed when the URL is the CKAN resource + download URL of a private resource. Can be None to indicate no API key is + required :return: yields a file pointer and a ResourceReader instance """ handled = False @@ -309,12 +310,12 @@ class DatastoreFeeder(IngestionFeeder): """ def __init__(self, config, resource_id, version, data_file_name): - ''' + """ :param config: the splitgill config object :param resource_id: the resource id :param version: the version of the data we're ingesting :param data_file_name: the name of the intermediate data file to read the data from - ''' + """ super(DatastoreFeeder, self).__init__(version) self.config = config self.resource_id = resource_id @@ -327,7 +328,8 @@ def iter_rows(self, skip_header_row): This generator yields dicts and therefore handles reading and deserialising the rows so that you don't have to. - :param skip_header_row: whether to skip the header row or yield it as if it was a normal row + :param skip_header_row: whether to skip the header row or yield it as if it was + a normal row :return: a generator of dicts """ with gzip.open(self.data_file_name, 'rb') as gzip_file: @@ -343,8 +345,8 @@ def get_existing_max_id(self): """ Figure out what the current max id is in this resource's collection. - :return: the highest id in the collection currently (it'll be an int), or 0 if there - aren't any documents in the collection + :return: the highest id in the collection currently (it'll be an int), or 0 if + there aren't any documents in the collection """ with get_mongo(self.config, collection=self.resource_id) as mongo: # sort by id descending to get the highest diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/readers.py b/ckanext/versioned_datastore/lib/importing/ingestion/readers.py index b6d5aef8..cc0e1820 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/readers.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/readers.py @@ -1,16 +1,16 @@ import abc - import codecs import csv import numbers + import openpyxl import xlrd from cchardet import UniversalDetector from openpyxl.cell.read_only import EmptyCell +from ... import common from .exceptions import InvalidId from .utils import ensure_reset -from ... import common def get_reader(resource_format): @@ -40,10 +40,10 @@ class ResourceReader(abc.ABC): """ def __init__(self, compressible): - ''' + """ :param compressible: whether the reader can cope with a gzipped file pointer being passed to the get_fields and get_rows functions. - ''' + """ self.compressible = compressible @abc.abstractmethod @@ -95,15 +95,15 @@ def iter_rows(self, resource_data_fp): class SVReader(ResourceReader): - ''' + """ A *SV reader - handles CSVs and TSVs. - ''' + """ def __init__(self, dialect): - ''' + """ :param dialect: the dialect of the source, this is passed straight to the csv reader constructor function - ''' + """ super(SVReader, self).__init__(True) self.dialect = dialect self.encoding = None @@ -323,9 +323,9 @@ class APIReader(ResourceReader): """ def __init__(self, data): - ''' + """ :param data: the data as a list of dicts - ''' + """ super(APIReader, self).__init__(False) self.data = data diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/records.py b/ckanext/versioned_datastore/lib/importing/ingestion/records.py index 89d165d4..19b43763 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/records.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/records.py @@ -35,12 +35,12 @@ class DatastoreRecord(BaseRecord): """ def __init__(self, version, record_id, data, resource_id): - ''' + """ :param version: the version of this record :param record_id: the record's id :param data: a dict containing the fields and values for the record :param resource_id: the resource id this record belongs to - ''' + """ super(DatastoreRecord, self).__init__(version) self.record_id = record_id self.data = data diff --git a/ckanext/versioned_datastore/lib/importing/ingestion/utils.py b/ckanext/versioned_datastore/lib/importing/ingestion/utils.py index 295f11ff..9550bf9e 100644 --- a/ckanext/versioned_datastore/lib/importing/ingestion/utils.py +++ b/ckanext/versioned_datastore/lib/importing/ingestion/utils.py @@ -2,7 +2,7 @@ import hashlib import sqlite3 import tempfile -from contextlib import contextmanager, closing +from contextlib import closing, contextmanager import requests @@ -32,13 +32,13 @@ def download_to_temp_file(url, headers=None, compress=True, chunk_size=1024): temporary file is then yielded to the caller for use. Once the context collapses the temporary file is removed. - If the compress parameter is passed as True (the default) the data will be downloaded and - written to a file using GZIP and a GzipFile pointer will be returned. + If the compress parameter is passed as True (the default) the data will be + downloaded and written to a file using GZIP and a GzipFile pointer will be returned. :param url: the url to stream the data from :param headers: a dict of headers to pass with the request - :param compress: whether to compress the downloaded data when storing it, if so a GzipFile - pointer will be returned (default: True) + :param compress: whether to compress the downloaded data when storing it, if so a + GzipFile pointer will be returned (default: True) :param chunk_size: the number of bytes to read at a time from the url stream """ headers = headers if headers else {} @@ -92,9 +92,9 @@ class InclusionTracker(object): """ def __init__(self, ingester): - ''' + """ :param ingester: the ingester object - we'll use the update signal to track the ids - ''' + """ self.ingester = ingester self.temporary_file = None self.tracker_db = None diff --git a/ckanext/versioned_datastore/lib/importing/queuing.py b/ckanext/versioned_datastore/lib/importing/queuing.py index 76c94222..de1e9530 100644 --- a/ckanext/versioned_datastore/lib/importing/queuing.py +++ b/ckanext/versioned_datastore/lib/importing/queuing.py @@ -1,12 +1,12 @@ from ckan.plugins import toolkit from .importing import ( - import_resource_data, - ResourceImportRequest, ResourceDeletionRequest, + ResourceImportRequest, delete_resource_data, + import_resource_data, ) -from .indexing import index_resource, ResourceIndexRequest +from .indexing import ResourceIndexRequest, index_resource def queue(task, request): @@ -35,9 +35,11 @@ def queue_import(resource, version, replace, records=None, api_key=None): :param resource: the resource we're going to import (this must be the resource dict) :param version: the version of the resource to import :param replace: whether to replace the existing data or not - :param records: a list of dicts to import, or None if the data is coming from URL or file - :param api_key: the api key of the user who initiated the import, this is required if the - package the resource is in is private and the data in the resource was uploaded + :param records: a list of dicts to import, or None if the data is coming from URL or + file + :param api_key: the api key of the user who initiated the import, this is required + if the package the resource is in is private and the data in the resource was + uploaded :return: the queued job """ resource_import_request = ResourceImportRequest( diff --git a/ckanext/versioned_datastore/lib/importing/stats.py b/ckanext/versioned_datastore/lib/importing/stats.py index 22774129..2ca48572 100644 --- a/ckanext/versioned_datastore/lib/importing/stats.py +++ b/ckanext/versioned_datastore/lib/importing/stats.py @@ -1,7 +1,7 @@ +from datetime import datetime from traceback import format_exception_only from ckan import model -from datetime import datetime from sqlalchemy import desc from ...model.stats import ImportStats @@ -20,8 +20,8 @@ def start_operation(resource_id, import_type, version, start=None): :param resource_id: the id of the resource being worked on :param import_type: the type of import operation being undertaken :param version: the version of the data - :param start: the datetime when this operation was started (optional, if None current time will - be used) + :param start: the datetime when this operation was started (optional, if None + current time will be used) :return: the database id of the saved ImportStats object """ if start is None: @@ -116,9 +116,9 @@ def monitor_indexing(stats_id, indexer, update_frequency=1000): :param stats_id: the database id of the object to update :param indexer: the Indexer object to monitor - :param update_frequency: the frequency with which to update the ImportStats. Setting this too - low will cause the database written to a lot which could cause - performance issues. + :param update_frequency: the frequency with which to update the ImportStats. Setting + this too low will cause the database written to a lot which could cause + performance issues. """ @indexer.index_signal.connect_via(indexer) diff --git a/ckanext/versioned_datastore/lib/importing/utils.py b/ckanext/versioned_datastore/lib/importing/utils.py index 590fcfd1..6983808c 100644 --- a/ckanext/versioned_datastore/lib/importing/utils.py +++ b/ckanext/versioned_datastore/lib/importing/utils.py @@ -7,8 +7,8 @@ def check_version_is_valid(resource_id, version): the ingest version not the indexed version as this is the source of truth about the versions of the resource we know about. - The version must be greater than the latest ingested version or there must not be any ingested - versions available. + The version must be greater than the latest ingested version or there must not be + any ingested versions available. :param resource_id: the resource's id :param version: the version to check diff --git a/ckanext/versioned_datastore/lib/query/fields.py b/ckanext/versioned_datastore/lib/query/fields.py index c4f6891a..c2a592d5 100644 --- a/ckanext/versioned_datastore/lib/query/fields.py +++ b/ckanext/versioned_datastore/lib/query/fields.py @@ -1,19 +1,19 @@ import itertools +import re from collections import Counter, defaultdict -import re from elasticsearch_dsl import MultiSearch, Q from elasticsearch_dsl.query import Bool -from .utils import chunk_iterator from .. import common from ..datastore_utils import ( - prefix_resource, iter_data_fields, - unprefix_index, prefix_field, + prefix_resource, + unprefix_index, ) from ..importing.details import get_all_details +from .utils import chunk_iterator def get_mappings(resource_ids, chunk_size=5): @@ -40,9 +40,9 @@ class Fields(object): """ def __init__(self, skip_ids=True): - ''' + """ :param skip_ids: whether to skip the _id columns or not (at any nesting level) - ''' + """ self.skip_ids = skip_ids # this counts the number of resources a group appears in @@ -57,11 +57,11 @@ def add(self, field_path, resource_id): """ Adds the given field path from the given resource id into the object's stores. - :param field_path: a tuple representing a field's path, for root level fields this should be - a tuple with a single element like ('genus', ) whereas for nested fields - 2 or more elements will be present, like ('associatedMedia', 'category'). - The field path is joined using a '.' to create a full path for each - field. + :param field_path: a tuple representing a field's path, for root level fields + this should be a tuple with a single element like ('genus', ) whereas for + nested fields 2 or more elements will be present, like ('associatedMedia', + 'category'). The field path is joined using a '.' to create a full path for + each field. :param resource_id: the resource id the field belongs to """ if self.skip_ids and field_path[-1] == '_id': @@ -96,9 +96,10 @@ def top_groups(self): Generator which yields the groups with the highest resource representation from highest to lowest. - :return: a generator which yields a 3-tuple on each iteration. The 3-tuple contains the - group name, the resource count and a dict of containing all the field names in the - group and the resources they appear in (field name -> list of resource ids) + :return: a generator which yields a 3-tuple on each iteration. The 3-tuple + contains the group name, the resource count and a dict of containing all the + field names in the group and the resources they appear in (field name -> + list of resource ids) """ # return a sorted list in reverse count order, secondarily sorted by group name ascending # h/t https://stackoverflow.com/a/23033745. We sort by alphabetical secondarily to ensure @@ -128,8 +129,11 @@ def get_searches(self, search): ) # yield the group tuple and an elasticsearch-dsl object for the group's fields - yield (group_name, resource_count, variants), search.index(indexes).filter( - Bool(should=shoulds, minimum_should_match=1) + yield ( + (group_name, resource_count, variants), + search.index(indexes).filter( + Bool(should=shoulds, minimum_should_match=1) + ), ) def is_forced(self, group): diff --git a/ckanext/versioned_datastore/lib/query/schema.py b/ckanext/versioned_datastore/lib/query/schema.py index daa823a1..3c040fb8 100644 --- a/ckanext/versioned_datastore/lib/query/schema.py +++ b/ckanext/versioned_datastore/lib/query/schema.py @@ -1,13 +1,12 @@ +import abc import io +import itertools import json from collections import OrderedDict -import abc -import itertools -import os import six -from jsonschema.validators import validator_for, RefResolver from importlib_resources import files +from jsonschema.validators import RefResolver, validator_for schemas = OrderedDict() schema_base_path = files('ckanext.versioned_datastore.theme').joinpath( @@ -58,7 +57,8 @@ def validate_query(query, version): :param query: the query dict :param version: the query schema version to validate against - :return: True if the validation succeeded, otherwise jsonschema exceptions will be raised + :return: True if the validation succeeded, otherwise jsonschema exceptions will be + raised """ if version not in schemas: raise InvalidQuerySchemaVersionError(version) @@ -74,8 +74,8 @@ def translate_query(query, version, search=None): :param query: the whole query dict :param version: the query schema version to translate using - :param search: an instantiated elasticsearch-dsl object to be built on instead of creating - a fresh object. By default a new search object is created. + :param search: an instantiated elasticsearch-dsl object to be built on instead of + creating a fresh object. By default a new search object is created. :return: an instantiated elasticsearch-dsl object """ if version not in schemas: @@ -117,7 +117,8 @@ def load_core_schema(version): Given a query schema version, loads the schema from the schema_base_path directory. :param version: the version to load - :return: the loaded schema (as a dict) and a jsonschmea validator object for the schema + :return: the loaded schema (as a dict) and a jsonschmea validator object for the + schema """ schema_file = schema_base_path.joinpath(version).joinpath(f'{version}.json') with io.open(schema_file, 'r', encoding='utf-8') as f: @@ -154,8 +155,8 @@ def translate(self, query, search=None): Translates the query into an elasticsearch-dsl search object. :param query: the whole query dict - :param search: an instantiated elasticsearch-dsl object to be built on instead of creating - a fresh object. By default a new search object is created. + :param search: an instantiated elasticsearch-dsl object to be built on instead + of creating a fresh object. By default a new search object is created. :return: an instantiated elasticsearch-dsl object """ pass diff --git a/ckanext/versioned_datastore/lib/query/slugs.py b/ckanext/versioned_datastore/lib/query/slugs.py index 95da357b..00d6ea57 100644 --- a/ckanext/versioned_datastore/lib/query/slugs.py +++ b/ckanext/versioned_datastore/lib/query/slugs.py @@ -1,17 +1,15 @@ import datetime import hashlib -import random import logging +import random from ckan import model -from ckan.plugins import toolkit from sqlalchemy.exc import IntegrityError -from .schema import get_latest_query_version, hash_query -from .schema import validate_query -from .slug_words import list_one, list_two, list_three -from .utils import get_available_datastore_resources, get_resources_and_versions from ...model.slugs import DatastoreSlug, NavigationalSlug +from .schema import get_latest_query_version, hash_query, validate_query +from .slug_words import list_one, list_three, list_two +from .utils import get_resources_and_versions log = logging.getLogger(__name__) @@ -28,7 +26,8 @@ def generate_query_hash( :param query_version: the query version :param version: the data version :param resource_ids: the ids of the resources under search - :param resource_ids_and_versions: the resource ids and specific versions to search at for them + :param resource_ids_and_versions: the resource ids and specific versions to search + at for them :return: a unique id for the query, which is a hash of the query and parameters """ hash_value = hashlib.sha1() @@ -79,9 +78,9 @@ def create_slug( Only valid queries get a slug, otherwise we raise a ValidationError. - Only valid resource ids included in the list will be stored, any invalid ones will be excluded. - If a list of resource ids is provided and none of the requested resource ids are valid, then a - ValidationError is raised. + Only valid resource ids included in the list will be stored, any invalid ones will + be excluded. If a list of resource ids is provided and none of the requested + resource ids are valid, then a ValidationError is raised. :param context: the context dict so that we can check the validity of any resources :param query: the query dict @@ -89,12 +88,12 @@ def create_slug( :param version: the version to search at :param resource_ids: the resources to search (a list) :param resource_ids_and_versions: the resources and versions to search at (a dict) - :param pretty_slug: whether to generate a pretty slug or just use the uuid id of the slug, by - default this is True + :param pretty_slug: whether to generate a pretty slug or just use the uuid id of the + slug, by default this is True :param attempts: how many times to try creating a pretty slug, default: 5 - :return: a 2-tuple containing a boolean indicating whether the slug object returned was newly - created and the DatastoreSlug object itself. If we couldn't create a slug object for - some reason then (False, None) is returned. + :return: a 2-tuple containing a boolean indicating whether the slug object returned + was newly created and the DatastoreSlug object itself. If we couldn't create a + slug object for some reason then (False, None) is returned. """ # only store valid queries! validate_query(query, query_version) @@ -229,24 +228,25 @@ def reserve_slug( probably only be called during this extension's initialisation via the datastore_reserve_slugs interface function. - If a slug already exists in the database with the same reserved pretty slug and the same - query parameters then nothing happens. + If a slug already exists in the database with the same reserved pretty slug and the + same query parameters then nothing happens. - If a slug already exists in the database with the same reserved pretty slug but a different - set of query parameters then a DuplicateSlugException is raised. + If a slug already exists in the database with the same reserved pretty slug but a + different set of query parameters then a DuplicateSlugException is raised. - If a slug already exists in the database with the same query parameters but no reserved - pretty slug then the reserved pretty slug is added to the slug. + If a slug already exists in the database with the same query parameters but no + reserved pretty slug then the reserved pretty slug is added to the slug. :param reserved_pretty_slug: the slug string to reserve :param query: the query dict :param query_version: the query schema version :param version: the version of the data :param resource_ids: the resource ids to search - :param resource_ids_and_versions: the resources ids and specific versions for each to search - :return: a DatastoreSlug object that has either been found (if it already existed), created (if - no slug existed) or updated (if a slug existed for the query parameters, but no - reserved query string was associated with it). + :param resource_ids_and_versions: the resources ids and specific versions for each + to search + :return: a DatastoreSlug object that has either been found (if it already existed), + created (if no slug existed) or updated (if a slug existed for the query + parameters, but no reserved query string was associated with it). """ # default some parameters and then assert they are all the right types. We do this because if # there are problems they're going to be reported back to the developer not the user @@ -314,7 +314,7 @@ def clean_nav_slugs(before=None): Delete old/expired navigational slugs. :param before: a datetime object; slugs created before this time will be removed - (defaults to 2 days ago) + (defaults to 2 days ago) :return: the number of deleted slugs """ if before is None: diff --git a/ckanext/versioned_datastore/lib/query/utils.py b/ckanext/versioned_datastore/lib/query/utils.py index 7475f841..da9af679 100644 --- a/ckanext/versioned_datastore/lib/query/utils.py +++ b/ckanext/versioned_datastore/lib/query/utils.py @@ -1,14 +1,14 @@ from copy import copy from datetime import datetime -from elasticsearch_dsl import Search, MultiSearch -from splitgill.search import create_version_query, create_index_specific_version_filter -from splitgill.utils import to_timestamp - from ckan import model from ckan.plugins import toolkit +from elasticsearch_dsl import MultiSearch, Search +from splitgill.search import create_index_specific_version_filter, create_version_query +from splitgill.utils import to_timestamp + from .. import common -from ..datastore_utils import prefix_resource, get_last_after, trim_index_name +from ..datastore_utils import get_last_after, prefix_resource, trim_index_name def get_available_datastore_resources(context, only=None): @@ -20,8 +20,8 @@ def get_available_datastore_resources(context, only=None): an empty list) then all resource ids available to the user are returned. :param context: the dict ckan context to request auth against - :param only: optional list of resource ids to filter the returned list by. Defaults to None - which indicates all available resources should be returned + :param only: optional list of resource ids to filter the returned list by. Defaults + to None which indicates all available resources should be returned :return: a set of resource ids """ # retrieve all resource ids and associated package ids direct from the database for speed @@ -98,12 +98,13 @@ def determine_resources_to_search( through either the resource_ids or resource_ids_and_versions parameters then only these resource ids will be returned, if indeed they are accessible to the user. - :param context: the context dict allowing us to determine the user and do auth on the resources + :param context: the context dict allowing us to determine the user and do auth on + the resources :param resource_ids: a list of resources to search :param resource_ids_and_versions: a dict of resources and versions to search at - :return: 2-tuple containing a list of resource ids to search and a list of resource ids that - have been skipped because the user doesn't have access to them or they aren't datastore - resources + :return: 2-tuple containing a list of resource ids to search and a list of resource + ids that have been skipped because the user doesn't have access to them or they + aren't datastore resources """ # validate the resource_ids passed in. If the resource_ids_and_versions parameter is in use then # it is taken as the resource_ids source and resource_ids is ignored @@ -132,8 +133,8 @@ def determine_version_filter( :param version: the version to filter on across all resources :param resource_ids: the resource to search - :param resource_ids_and_versions: a dict of resource ids -> versions providing resource specific - versions for search + :param resource_ids_and_versions: a dict of resource ids -> versions providing + resource specific versions for search :return: an elasticsearch-dsl object """ if not resource_ids_and_versions: @@ -237,9 +238,10 @@ def get_resources_and_versions( :param resource_ids: a list of resource ids :param resource_ids_and_versions: a dict of resource id: resource version - :param version: a datestamp used as a default version for resources without a version + :param version: a datestamp used as a default version for resources without a + version :param allow_non_datastore: allow non datastore resources to be included (will be - returned with common.NON_DATASTORE_VERSION) + returned with common.NON_DATASTORE_VERSION) :return: a tuple of resource_ids, resource_ids_and_versions """ @@ -279,9 +281,9 @@ def get_resources_and_versions( version = to_timestamp(datetime.now()) for resource_id in available_resource_ids: if resource_id in non_datastore_resources: - rounded_resource_ids_and_versions[ - resource_id - ] = common.NON_DATASTORE_VERSION + rounded_resource_ids_and_versions[resource_id] = ( + common.NON_DATASTORE_VERSION + ) continue # try to get the target version from the passed resource_ids_and_versions dict, but if # it's not in there, default to the version variable diff --git a/ckanext/versioned_datastore/lib/query/v1_0_0.py b/ckanext/versioned_datastore/lib/query/v1_0_0.py index ba702d65..fe62f11b 100644 --- a/ckanext/versioned_datastore/lib/query/v1_0_0.py +++ b/ckanext/versioned_datastore/lib/query/v1_0_0.py @@ -7,8 +7,8 @@ from elasticsearch_dsl import Search from elasticsearch_dsl.query import Bool, Q -from .schema import Schema, load_core_schema, schema_base_path from ..datastore_utils import prefix_field +from .schema import Schema, load_core_schema, schema_base_path from .utils import convert_small_or_groups, remove_empty_groups @@ -56,8 +56,8 @@ def translate(self, query, search=None): Translates the query into an elasticsearch-dsl search object. :param query: the whole query dict - :param search: an instantiated elasticsearch-dsl object to be built on instead of creating - a fresh object. By default a new search object is created. + :param search: an instantiated elasticsearch-dsl object to be built on instead + of creating a fresh object. By default a new search object is created. :return: an instantiated elasticsearch-dsl object """ search = Search() if search is None else search @@ -129,7 +129,8 @@ def create_and(self, group): here seeing as we can and it makes smaller elasticsearch queries. :param group: the group to build the and from - :return: the first member from the group if there's only one member in the group, or a Bool + :return: the first member from the group if there's only one member in the + group, or a Bool """ members = [self.create_group_or_term(member) for member in group] return members[0] if len(members) == 1 else Bool(filter=members) @@ -144,7 +145,8 @@ def create_or(self, group): here seeing as we can and it makes smaller elasticsearch queries. :param group: the group to build the or from - :return: the first member from the group if there's only one member in the group, or a Bool + :return: the first member from the group if there's only one member in the + group, or a Bool """ return self.build_or([self.create_group_or_term(member) for member in group]) @@ -294,14 +296,15 @@ def create_geo_named_area(self, options): elasticsearch geo_polygon queries, if necessary combined using ands, ors and nots to provide MultiPolygon hole support. - In v1.0.0, Natural Earth Data datasets are used to provide the lists of names and - corresponding geojson areas. The 1:50million scale is used in an attempt to provide a good - level of detail without destroying Elasticsearch with enormous numbers of points. See the - `theme/public/querySchemas/geojson/` directory for source data and readme, and also the - load_geojson function in this class. + In v1.0.0, Natural Earth Data datasets are used to provide the lists of names + and corresponding geojson areas. The 1:50million scale is used in an attempt to + provide a good level of detail without destroying Elasticsearch with enormous + numbers of points. See the `theme/public/querySchemas/geojson/` directory for + source data and readme, and also the load_geojson function in this class. :param options: the options for the geo_named_area query - :return: an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool Query) + :return: an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool + Query) """ category, name = next(iter(options.items())) return self.build_multipolygon_query(self.geojson[category][name]) @@ -315,7 +318,8 @@ def create_geo_custom_area(self, coordinates): holes defined in the Polygon). :param coordinates: a MultiPolygon coordinates list - :return: an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool Query) + :return: an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool + Query) """ return self.build_multipolygon_query(coordinates) @@ -363,8 +367,8 @@ def build_multipolygon_query(coordinates): parameter should match the format required by GeoJSON and therefore be a series of nested lists, see the GeoJSON docs for details. - :param coordinates: the coordinate list, which is basically a list of Polygons. See the - GeoJSON doc for the exact format and meaning + :param coordinates: the coordinate list, which is basically a list of Polygons. + See the GeoJSON doc for the exact format and meaning :return: an elasticsearch-dsl object representing the MultiPolygon """ queries = [] @@ -395,13 +399,15 @@ def load_geojson(filename, name_keys): The geojson file is assumed to be a list of features containing only Polygon or MultiPolygon types. - The name_keys parameter should be a sequence of keys to use to retrieve a name for the - feature from the properties dict. The first key found in the properties dict with a value is - used and therefore the keys listed should be in priority order. The extracted name is passed - to string.capwords to produce a sensible and consistent set of names. + The name_keys parameter should be a sequence of keys to use to retrieve a name + for the feature from the properties dict. The first key found in the properties + dict with a value is used and therefore the keys listed should be in priority + order. The extracted name is passed to string.capwords to produce a sensible and + consistent set of names. :param filename: the name geojson file to load from the given path - :param name_keys: a priority ordered sequence of keys to use for feature name retrieval + :param name_keys: a priority ordered sequence of keys to use for feature name + retrieval :return: a dict of names -> MultiPolygons """ path = schema_base_path.joinpath(v1_0_0Schema.version).joinpath('geojson') @@ -455,10 +461,10 @@ def hash_query(self, query): """ query_hash = hashlib.sha1() if 'search' in query: - query_hash.update(f'search:{query["search"]}'.encode(u'utf-8')) + query_hash.update(f'search:{query["search"]}'.encode('utf-8')) if 'filters' in query: data = f'filters:{self.create_group_or_term(query["filters"])}'.encode( - u'utf-8' + 'utf-8' ) query_hash.update(data) return query_hash.hexdigest() diff --git a/ckanext/versioned_datastore/logic/actions/basic_search.py b/ckanext/versioned_datastore/logic/actions/basic_search.py index d4bd33ed..8c5033b8 100644 --- a/ckanext/versioned_datastore/logic/actions/basic_search.py +++ b/ckanext/versioned_datastore/logic/actions/basic_search.py @@ -1,20 +1,19 @@ from datetime import datetime -from ckan.plugins import toolkit, PluginImplementations +from ckan.plugins import PluginImplementations, toolkit +from ckantools.decorators import action +from elasticsearch import RequestError +from elasticsearch_dsl import A, Search from splitgill.indexing.utils import DOC_TYPE from splitgill.search import create_version_query from splitgill.utils import to_timestamp -from elasticsearch import RequestError -from elasticsearch_dsl import A, Search -from .meta import help, schema -from ckantools.decorators import action from ...interfaces import IVersionedDatastore from ...lib.basic_query.search import create_search -from ...lib.basic_query.utils import run_search, get_fields, format_facets -from ...lib.datastore_utils import prefix_field, prefix_resource, get_last_after +from ...lib.basic_query.utils import format_facets, get_fields, run_search +from ...lib.datastore_utils import get_last_after, prefix_field, prefix_resource from ...lib.query.query_log import log_query - +from .meta import help, schema # these are the keys we're interested in logging from the data dict _query_log_keys = ('q', 'filters') @@ -104,7 +103,8 @@ def datastore_autocomplete(context, data_dict, original_data_dict): :param context: the context dict from the action call :param data_dict: the data_dict from the action call :param original_data_dict: the data_dict before it was validated - :return: a dict containing a list of results and an after value for the next page of results + :return: a dict containing a list of results and an after value for the next page of + results """ # extract the fields specifically needed for setting up the autocomplete query field = data_dict.pop('field') @@ -162,8 +162,8 @@ def datastore_query_extent(context, data_dict, original_data_dict): :param context: the context dict from the action call :param data_dict: the data_dict from the action call :param original_data_dict: the data_dict before it was validated - :return: a dict containing the total number of matches for the query, the total number of - matches with geo data and the bounds of the query + :return: a dict containing the total number of matches for the query, the total + number of matches with geo data and the bounds of the query """ # ensure the search doesn't respond with any hits cause we don't need them and override two # unused params @@ -227,8 +227,8 @@ def datastore_search_raw( :param original_data_dict: the data_dict before it was validated :param search: the elasticsearch query to run :param version: the version of the data to query against - :param raw_result: whether to return the result as a raw elasticsearch result, or format it in - the same way as a normal datastore_search call would + :param raw_result: whether to return the result as a raw elasticsearch result, or + format it in the same way as a normal datastore_search call would :param include_version: whether to include the version in the search or not :return: a dict containing the results of the search """ diff --git a/ckanext/versioned_datastore/logic/actions/crud.py b/ckanext/versioned_datastore/logic/actions/crud.py index f7b94f76..797d81a3 100644 --- a/ckanext/versioned_datastore/logic/actions/crud.py +++ b/ckanext/versioned_datastore/logic/actions/crud.py @@ -1,22 +1,23 @@ -from ckan.plugins import toolkit from datetime import datetime -from splitgill.utils import to_timestamp -from .meta import help, schema +from ckan.plugins import toolkit from ckantools.decorators import action +from splitgill.utils import to_timestamp + from ...lib import common from ...lib.datastore_utils import ( - is_resource_read_only, - is_ingestible, - update_privacy, - ReadOnlyResourceException, InvalidVersionException, + ReadOnlyResourceException, is_datastore_resource, + is_ingestible, + is_resource_read_only, + update_privacy, ) from ...lib.importing import stats from ...lib.importing.indexing import DatastoreIndex -from ...lib.importing.queuing import queue_index, queue_import, queue_deletion +from ...lib.importing.queuing import queue_deletion, queue_import, queue_index from ...lib.importing.utils import check_version_is_valid +from .meta import help, schema @action(schema.datastore_create(), help.datastore_create) @@ -59,8 +60,8 @@ def datastore_upsert(resource_id, replace, context, original_data_dict, version= :param replace: whether to replace the data already in the resource or append to it :param context: the context dict from the action call :param original_data_dict: the data_dict before it was validated - :param version: the version of the new data, can be None (default) but if not must be newer - than the latest version of the resource + :param version: the version of the new data, can be None (default) but if not must + be newer than the latest version of the resource :return: information about the background job that is handling the ingestion """ # this comes through as junk if it's not removed before validating. This happens because the @@ -132,7 +133,8 @@ def datastore_reindex(resource_id, context): :param resource_id: the resource id to reindex :param context: the context dict from the action call - :return: a dict containing info about the background job that is doing the reindexing + :return: a dict containing info about the background job that is doing the + reindexing """ if is_resource_read_only(resource_id): raise toolkit.ValidationError('This resource has been marked as read only') @@ -159,9 +161,10 @@ def datastore_ensure_privacy(context, resource_id=None): setting. :param context: the context dict from the action call - :param resource_id: the id of the resource update. Can be None (the default) which means all - resources are updated - :return: a dict containing the total number of resources checked and the total modified + :param resource_id: the id of the resource update. Can be None (the default) which + means all resources are updated + :return: a dict containing the total number of resources checked and the total + modified """ modified = 0 total = 0 diff --git a/ckanext/versioned_datastore/logic/actions/downloads.py b/ckanext/versioned_datastore/logic/actions/downloads.py index e4772b17..9523b430 100644 --- a/ckanext/versioned_datastore/logic/actions/downloads.py +++ b/ckanext/versioned_datastore/logic/actions/downloads.py @@ -1,11 +1,11 @@ +from ckan.plugins import toolkit from ckantools.decorators import action -from ckan.plugins import toolkit -from .meta import help, schema -from .meta.arg_objects import ServerArgs, NotifierArgs, QueryArgs, DerivativeArgs from ...lib.downloads.download import DownloadRunManager -from ...model.downloads import DownloadRequest from ...lib.downloads.notifiers import validate_notifier_args +from ...model.downloads import DownloadRequest +from .meta import help, schema +from .meta.arg_objects import DerivativeArgs, NotifierArgs, QueryArgs, ServerArgs @action(schema.datastore_queue_download(), help.datastore_queue_download) diff --git a/ckanext/versioned_datastore/logic/actions/extras.py b/ckanext/versioned_datastore/logic/actions/extras.py index e85be95e..e35dd6fc 100644 --- a/ckanext/versioned_datastore/logic/actions/extras.py +++ b/ckanext/versioned_datastore/logic/actions/extras.py @@ -1,20 +1,20 @@ from datetime import datetime from ckan.plugins import toolkit +from ckantools.decorators import action +from elasticsearch_dsl import MultiSearch, Search from splitgill.search import create_version_query -from splitgill.utils import to_timestamp, chunk_iterator -from elasticsearch_dsl import Search, MultiSearch +from splitgill.utils import chunk_iterator, to_timestamp -from .meta import help, schema -from ckantools.decorators import action from ...lib import common from ...lib.basic_query.search import create_search from ...lib.datastore_utils import ( - prefix_resource, - is_datastore_resource, get_public_alias_name, + is_datastore_resource, + prefix_resource, ) from ...lib.query.schema import get_latest_query_version +from .meta import help, schema @action( @@ -76,12 +76,12 @@ def datastore_get_resource_versions( multisearch = MultiSearch(using=common.ES_CLIENT, index=index_name) for details in details_chunk: multisearch = multisearch.add( - Search()[0:0].filter(create_version_query(details["version"])) + Search()[0:0].filter(create_version_query(details['version'])) ) results = multisearch.execute() # update the count details we got from splitgill with the actual record count for detail, result in zip(details_chunk, results): - detail["count"] = result.hits.total + detail['count'] = result.hits.total return counts @@ -97,8 +97,8 @@ def datastore_get_rounded_version(resource_id, version=None): rounding down. :param resource_id: the id of the resource - :param version: the version timestamp. If None (the default) the latest version of the resource - is returned + :param version: the version timestamp. If None (the default) the latest version of + the resource is returned :return: the rounded version timestamp """ index_name = prefix_resource(resource_id) diff --git a/ckanext/versioned_datastore/logic/actions/meta/arg_objects.py b/ckanext/versioned_datastore/logic/actions/meta/arg_objects.py index 97ac0d2e..31b4b1bf 100644 --- a/ckanext/versioned_datastore/logic/actions/meta/arg_objects.py +++ b/ckanext/versioned_datastore/logic/actions/meta/arg_objects.py @@ -1,7 +1,7 @@ +from ckan.plugins import toolkit from ckantools.validators import list_of_strings from ckantools.validators.ivalidators import BaseArgs -from ckan.plugins import toolkit from ckanext.datastore.logic.schema import json_validator # grab all the validator functions upfront diff --git a/ckanext/versioned_datastore/logic/actions/meta/help.py b/ckanext/versioned_datastore/logic/actions/meta/help.py index 8ab1e89f..0997c72b 100644 --- a/ckanext/versioned_datastore/logic/actions/meta/help.py +++ b/ckanext/versioned_datastore/logic/actions/meta/help.py @@ -1,4 +1,4 @@ -datastore_search = ''' +datastore_search = """ This action allows you to search data in a resource. It is designed to function in a similar way to CKAN's core datastore_search but with a few extra bells and whistles, most prominently versioning. This allows the resource to be searched at any moment in it's lifespan and have the @@ -82,9 +82,9 @@ In addition to returning these result dicts, the actual result object is made available through the context dict under the key "versioned_datastore_query_result". This isn't available through the http action API however. -''' +""" -datastore_create = ''' +datastore_create = """ Adds a resource to the versioned datastore. This action doesn't take any data, it simply ensures any setup work is complete for the given resource in the search backend. To add data after creating a resource in the datastore, use the datastore_upsert action. @@ -98,9 +98,9 @@ initialised) and False if not. If False is returned this implies that the resource cannot be ingested into the datastore because the format is not supported :rtype: boolean -''' +""" -datastore_upsert = ''' +datastore_upsert = """ Upserts data into the datastore for the resource. The data can be provided in the data_dict using the key 'records' or, if data is not specified, the URL on the resource is used. @@ -121,9 +121,9 @@ :returns: details about the job that has been submitted to fulfill the upsert request. :rtype: dict -''' +""" -datastore_delete = ''' +datastore_delete = """ Deletes the data in the datastore against the given resource_id. Note that this is achieved by setting all records to be empty in a new version and then indexing that new version. This ensures that the data is not available in the latest version but is in old ones. @@ -133,9 +133,9 @@ :param version: the version to delete the data at, can be missing and if it is it's defaults to the current timestamp :type version: integer -''' +""" -datastore_reindex = ''' +datastore_reindex = """ Triggers a reindex of the given resource's data. This does not reingest the data to mongo, but it does reindex the data in mongo to elasticsearch. The intent of this action is to allow mapping changes (for example) to be picked up. @@ -148,9 +148,9 @@ :returns: a dict containing the details of the reindex as returned from elasticsearch :rtype: dict -''' +""" -datastore_ensure_privacy = ''' +datastore_ensure_privacy = """ Ensure that the privacy settings are correct across all resources in the datastore or for just one resource. @@ -169,9 +169,9 @@ :type ensured: integer :param total: the total number of resources examined :type total: integer -''' +""" -datastore_autocomplete = ''' +datastore_autocomplete = """ Provides autocompletion results against a specific field in a specific resource. **Data dict params:** @@ -202,9 +202,9 @@ :returns: a dict containing the list of values and an after value for the next page's results :rtype: dict -''' +""" -datastore_query_extent = ''' +datastore_query_extent = """ Return the geospatial extent of the results of a given datastore search query. The data_dict parameters are the same as the arguments for `datastore_search`. @@ -219,9 +219,9 @@ :param bounds: the extent of the query's results, this will be missing if no bound can be calculated (for example, if the resource has no geo data) :type bounds: list in the format [[lat min, long min], [lat max, long max]] -''' +""" -datastore_search_raw = ''' +datastore_search_raw = """ This action allows you to search data in a resource using a raw elasticsearch query. This action allows more flexibility over the search both in terms of querying using any of elasticsearch's different DSL queries as well as aspects like turning versioning on and off. @@ -290,9 +290,9 @@ http action API however. If raw_result is True, then the elasticsearch response is returned without modification. -''' +""" -datastore_get_record_versions = ''' +datastore_get_record_versions = """ Given a record id and an resource it appears in, returns the version timestamps available for that record in ascending order. @@ -306,9 +306,9 @@ :returns: a list of versions :rtype: list -''' +""" -datastore_get_resource_versions = ''' +datastore_get_resource_versions = """ Given a resource id, returns the version timestamps available for that resource in ascending order along with the number of records modified in the version and the number of records at that version. @@ -326,9 +326,9 @@ :returns: a list of dicts, each in the form: {"version": #, "changes": #, "count": #} :rtype: list of dicts -''' +""" -datastore_get_rounded_version = ''' +datastore_get_rounded_version = """ Round the requested version of this query down to the nearest actual version of the resource. This is necessary because we work in a system where although you can just query at a timestamp you should round it down to the nearest known version. This guarantees that when @@ -366,9 +366,9 @@ :returns: the rounded version or None if no versions are available for the given resource id :rtype: integer or None -''' +""" -datastore_multisearch = ''' +datastore_multisearch = """ This action allows you to search data in multiple resources. The resources that are searched for the in this action and the version they are searched at are @@ -442,9 +442,9 @@ :param timings: dict of events and how long they took as part of the response processing. This is only included in the response if the timings parameter is True :type timings: dict -''' +""" -datastore_create_slug = ''' +datastore_create_slug = """ Create a query slug based on the provided query parameters. This action returns a slug which can be used to retrieve the query parameters passed (not the @@ -490,9 +490,9 @@ :type slug: string :param is_new: whether the returned slug was newly created or already existed :type is_new: bool -''' +""" -datastore_resolve_slug = ''' +datastore_resolve_slug = """ Given a slug, resolves it and returns the query information associated with it. Params: @@ -517,9 +517,9 @@ :type resource_ids_and_versions: a dict :param created: the date time the slug was originally created :type created: datetime in isoformat -''' +""" -datastore_field_autocomplete = ''' +datastore_field_autocomplete = """ Returns a dictionary of available fields in the datastore which contain the passed text. The fields will be retrieved from resources available to the user. If a list of resource ids is passed as a parameter then the resources from that list that the user has access to will be @@ -556,9 +556,9 @@ :type fields: dict :param count: the number of fields returned :type count: int -''' +""" -datastore_value_autocomplete = ''' +datastore_value_autocomplete = """ Finds values in the given field, from the given resources, which start with the given prefix and returns up to {size} of them in a list. The values are sorted in alphabetical order. @@ -624,18 +624,18 @@ only included in the response if the timings parameter is True :type timings: dict -''' +""" -datastore_queue_download = ''' +datastore_queue_download = """ Queues a task to generate a downloadable zip containing the data produced by the given query. **Results:** :returns: details about the job that has been submitted to fulfill the upsert request. :rtype: dict -''' +""" -datastore_regenerate_download = ''' +datastore_regenerate_download = """ Calls datastore_queue_download to regenerate a previous request. Note that notifier args are still required as these are not stored in the original request, and server args may be specified to override any stored ones. @@ -644,9 +644,9 @@ :returns: details about the job that has been submitted to fulfill the upsert request. :rtype: dict -''' +""" -datastore_guess_fields = ''' +datastore_guess_fields = """ This action allows you to retrieve a set of fields to display by default for a given search across potentially multiple resources. The returned list of groups of fields is ordered by the number of resources the fields in each group appears in under the provided query. Ties are handled by ordering @@ -702,9 +702,9 @@ :param fields: a dict of field names -> list of resource ids representing the fields in the group and the resources they come from :type fields: dict -''' +""" -datastore_hash_query = ''' +datastore_hash_query = """ This action simply hashes the given query and returns the hex digest of it. The hash is produced using sha1 and a custom algorithm - it's not just a hash of the query dict. @@ -717,9 +717,9 @@ Returns: :rtype: string -''' +""" -datastore_is_datastore_resource = ''' +datastore_is_datastore_resource = """ This action checks whether the given resource is in the datastore. Params: @@ -730,17 +730,17 @@ Returns: :rtype: boolean -''' +""" -datastore_get_latest_query_schema_version = ''' +datastore_get_latest_query_schema_version = """ This action simply returns the latest available query schema version. Returns: :rtype: string -''' +""" -datastore_count = ''' +datastore_count = """ Count the number of records available at a specific version across a set of resources. This allows quick counting of total records without any query, if you want to count with a query, use the search actions with a limit of 0. @@ -758,9 +758,9 @@ The result of this action is a dictionary with the following keys: :rtype: an integer count -''' +""" -datastore_edit_slug = ''' +datastore_edit_slug = """ Add or modify the reserved slug for a query. Reserved slugs can only be replaced by sysadmins, but if one has not been added yet for a query, any logged-in user can supply it. @@ -774,7 +774,7 @@ Returns: :rtype: bool -''' +""" datastore_multisearch_counts = """ Count the number of records that match the query in each of the provided resources and diff --git a/ckanext/versioned_datastore/logic/actions/meta/schema.py b/ckanext/versioned_datastore/logic/actions/meta/schema.py index a8bfc753..426b2cbf 100644 --- a/ckanext/versioned_datastore/logic/actions/meta/schema.py +++ b/ckanext/versioned_datastore/logic/actions/meta/schema.py @@ -1,10 +1,11 @@ -import json import re from ckan.plugins import toolkit +from ckantools.validators import list_of_strings, list_validator, object_validator + from ckanext.datastore.logic.schema import json_validator, unicode_or_json_validator -from ckantools.validators import list_validator, list_of_strings, object_validator -from .arg_objects import QueryArgs, DerivativeArgs, ServerArgs, NotifierArgs + +from .arg_objects import DerivativeArgs, NotifierArgs, QueryArgs, ServerArgs # grab all the validator functions upfront boolean_validator = toolkit.get_validator('boolean_validator') diff --git a/ckanext/versioned_datastore/logic/actions/multisearch.py b/ckanext/versioned_datastore/logic/actions/multisearch.py index d128b042..0525d8ab 100644 --- a/ckanext/versioned_datastore/logic/actions/multisearch.py +++ b/ckanext/versioned_datastore/logic/actions/multisearch.py @@ -3,46 +3,46 @@ from typing import Dict import jsonschema +from ckan.plugins import PluginImplementations, plugin_loaded, toolkit from ckantools.decorators import action from ckantools.timer import Timer -from elasticsearch_dsl import MultiSearch, A +from elasticsearch_dsl import A, MultiSearch from splitgill.utils import to_timestamp -from ckan.plugins import toolkit, PluginImplementations, plugin_loaded -from .meta import help, schema from ...interfaces import IVersionedDatastore from ...lib import common from ...lib.basic_query.utils import convert_to_multisearch from ...lib.datastore_utils import ( - prefix_resource, - unprefix_index, iter_data_fields, - trim_index_name, prefix_field, + prefix_resource, + trim_index_name, + unprefix_index, ) from ...lib.query.fields import ( get_all_fields, - select_fields, - get_single_resource_fields, get_mappings, + get_single_resource_fields, + select_fields, ) from ...lib.query.query_log import log_query from ...lib.query.schema import ( - get_latest_query_version, InvalidQuerySchemaVersionError, - validate_query, - translate_query, + get_latest_query_version, hash_query, normalise_query, + translate_query, + validate_query, ) -from ...lib.query.slugs import create_slug, resolve_slug, create_nav_slug +from ...lib.query.slugs import create_nav_slug, create_slug, resolve_slug from ...lib.query.utils import ( - get_available_datastore_resources, + calculate_after, determine_resources_to_search, determine_version_filter, - calculate_after, find_searched_resources, + get_available_datastore_resources, ) +from .meta import help, schema @action( @@ -67,24 +67,24 @@ def datastore_multisearch( :param context: the context dict from the action call :param query: the query dict. If None (default) then an empty query is used - :param query_version: the version of the query schema the query is using. If None (default) then - the latest query schema version is used - :param version: the version to search the data at. If None (default) the current time is used - :param resource_ids: the list of resource to search. If None (default) then all the resources - the user has access to are queried. If a list of resources are passed then - any resources not accessible to the user will be removed before querying - :param resource_ids_and_versions: a dict of resources and versions to search each of them at. - This allows precise searching of each resource at a specific - parameter. If None (default) then the resource_ids parameter - is used together with the version parameter. If this parameter - is provided though, it takes priority over the resource_ids - and version parameters. - :param size: the number of records to return. Defaults to 100 if not provided and must be - between 0 and 1000. - :param after: pagination after value that has come from a previous result. If None (default) - this parameter is ignored. - :param top_resources: whether to include information about the resources with the most results - in them (defaults to False) in the result + :param query_version: the version of the query schema the query is using. If None + (default) then the latest query schema version is used + :param version: the version to search the data at. If None (default) the current + time is used + :param resource_ids: the list of resource to search. If None (default) then all the + resources the user has access to are queried. If a list of resources are passed + then any resources not accessible to the user will be removed before querying + :param resource_ids_and_versions: a dict of resources and versions to search each of + them at. This allows precise searching of each resource at a specific parameter. + If None (default) then the resource_ids parameter is used together with the + version parameter. If this parameter is provided though, it takes priority over + the resource_ids and version parameters. + :param size: the number of records to return. Defaults to 100 if not provided and + must be between 0 and 1000. + :param after: pagination after value that has come from a previous result. If None + (default) this parameter is ignored. + :param top_resources: whether to include information about the resources with the + most results in them (defaults to False) in the result :param timings: whether to include timing information in the result dict :return: a dict of results including the records and total """ @@ -215,24 +215,25 @@ def datastore_create_slug( :param context: the context dict from the action call :param query: the query dict. If None (default) then an empty query is used - :param query_version: the version of the query schema the query is using. If None (default) then - the latest query schema version is used - :param version: the version to search the data at. If None (default) the current time is used - :param resource_ids: the list of resource to search. If None (default) then all the resources - the user has access to are queried. If a list of resources are passed then - any resources not accessible to the user will be removed before querying - :param resource_ids_and_versions: a dict of resources and versions to search each of them at. - This allows precise searching of each resource at a specific - parameter. If None (default) then the resource_ids parameter - is used together with the version parameter. If this parameter - is provided though, it takes priority over the resource_ids - and version parameters. - :param pretty_slug: whether to produce a "pretty" slug or not. If True (the default) a selection - of 2 adjectives and an animal will be used to create the slug, otherwise if - False, a uuid will be used + :param query_version: the version of the query schema the query is using. If None + (default) then the latest query schema version is used + :param version: the version to search the data at. If None (default) the current + time is used + :param resource_ids: the list of resource to search. If None (default) then all the + resources the user has access to are queried. If a list of resources are passed + then any resources not accessible to the user will be removed before querying + :param resource_ids_and_versions: a dict of resources and versions to search each of + them at. This allows precise searching of each resource at a specific parameter. + If None (default) then the resource_ids parameter is used together with the + version parameter. If this parameter is provided though, it takes priority over + the resource_ids and version parameters. + :param pretty_slug: whether to produce a "pretty" slug or not. If True (the default) + a selection of 2 adjectives and an animal will be used to create the slug, + otherwise if False, a uuid will be used :param nav_slug: if this is True, a temporary navigational slug will be produced - instead of a standard slug - :return: a dict containing the slug and whether it was created during this function call or not + instead of a standard slug + :return: a dict containing the slug and whether it was created during this function + call or not """ if query is None: query = {} @@ -306,9 +307,10 @@ def datastore_resolve_slug(slug): # then check if it's a query DOI if plugin_loaded('query_dois'): - from ckanext.query_dois.model import QueryDOI from ckan import model + from ckanext.query_dois.model import QueryDOI + resolved = model.Session.query(QueryDOI).filter(QueryDOI.doi == slug).first() if resolved: if resolved.query_version == 'v0': @@ -342,10 +344,10 @@ def datastore_field_autocomplete(context, text='', resource_ids=None, lowercase= :param context: the context dict from the action call :param text: the text to search with (default is an empty string) - :param resource_ids: a list of resources to find fields from, if None (the default) all resource - fields are searched - :param lowercase: whether to do a lowercase check or not, essentially whether to be case - insensitive. Default: True, be case insensitive. + :param resource_ids: a list of resources to find fields from, if None (the default) + all resource fields are searched + :param lowercase: whether to do a lowercase check or not, essentially whether to be + case insensitive. Default: True, be case insensitive. :return: the fields and the resources they came from as a dict """ # figure out which resources should be searched @@ -397,16 +399,18 @@ def datastore_guess_fields( """ Guesses the fields that are most relevant to show with the given query. - If only one resource is included in the search then the requested number of fields from the - resource at the required version are returned in ingest order if the details are available. + If only one resource is included in the search then the requested number of fields + from the resource at the required version are returned in ingest order if the + details are available. - If multiple resources are queried, the most common fields across the resource under search are - returned. The fields are grouped together in an attempt to match the same field name in - different cases across different resources. The most common {size} groups are returned. + If multiple resources are queried, the most common fields across the resource under + search are returned. The fields are grouped together in an attempt to match the same + field name in different cases across different resources. The most common {size} + groups are returned. - The groups returned are ordered firstly by the number of resources they appear in in descending - order, then if there are ties, the number of records the group finds is used and this again is - ordered in a descending fashion. + The groups returned are ordered firstly by the number of resources they appear in in + descending order, then if there are ties, the number of records the group finds is + used and this again is ordered in a descending fashion. :param context: the context dict from the action call :param query: the query @@ -506,8 +510,8 @@ def datastore_value_autocomplete( :param context: the context dict from the action call :param field: the field to get the values from - :param prefix: the prefix value to search with (this can be missing/blank to just return the - first values) + :param prefix: the prefix value to search with (this can be missing/blank to just + return the first values) :param query: the query :param query_version: the query schema version :param version: the version to search at @@ -655,20 +659,17 @@ def datastore_multisearch_counts( :param context: the context dict from the action call :param query: the query dict. If None (default) then an empty query is used :param query_version: the version of the query schema the query is using. If None - (default) then the latest query schema version is used + (default) then the latest query schema version is used :param version: the version to search the data at. If None (default) the current - time is used + time is used :param resource_ids: the list of resource to search. If None (default) then all the - resources the user has access to are queried. If a list of - resources are passed then any resources not accessible to the - user will be removed before querying + resources the user has access to are queried. If a list of resources are passed + then any resources not accessible to the user will be removed before querying :param resource_ids_and_versions: a dict of resources and versions to search each of - them at. This allows precise searching of each - resource at a specific parameter. If None - (default) then the resource_ids parameter is used - together with the version parameter. If this - parameter is provided though, it takes priority - over the resource_ids and version parameters. + them at. This allows precise searching of each resource at a specific parameter. + If None (default) then the resource_ids parameter is used together with the + version parameter. If this parameter is provided though, it takes priority over + the resource_ids and version parameters. :return: a dict of resource IDs -> count """ # provide some more complex defaults for some parameters if necessary @@ -711,7 +712,7 @@ def datastore_multisearch_counts( # use an aggregation to get the hit count of each resource, set the size to the # number of resources we're querying to ensure we get all counts in one go and don't # have to paginate with a composite agg - search.aggs.bucket("counts", "terms", field="_index", size=len(resource_ids)) + search.aggs.bucket('counts', 'terms', field='_index', size=len(resource_ids)) # create a multisearch for this one query - this ensures there aren't any issues # with the length of the URL as the index list is passed as a part of the body @@ -722,8 +723,8 @@ def datastore_multisearch_counts( # build the response JSON counts = { - trim_index_name(bucket["key"]): bucket["doc_count"] - for bucket in result.aggs.to_dict()["counts"]["buckets"] + trim_index_name(bucket['key']): bucket['doc_count'] + for bucket in result.aggs.to_dict()['counts']['buckets'] } # add resources that didn't have any hits into the counts dict too counts.update( diff --git a/ckanext/versioned_datastore/logic/auth.py b/ckanext/versioned_datastore/logic/auth.py index 2336eba2..9bded730 100644 --- a/ckanext/versioned_datastore/logic/auth.py +++ b/ckanext/versioned_datastore/logic/auth.py @@ -1,4 +1,3 @@ -from ckan.plugins import toolkit from ckantools.decorators import auth @@ -151,4 +150,4 @@ def datastore_custom_download_filename(context, data_dict): @auth(anon=True) def datastore_multisearch_counts(context, data_dict): # allow access to everyone - return {"success": True} + return {'success': True} diff --git a/ckanext/versioned_datastore/migration/versioned_datastore/env.py b/ckanext/versioned_datastore/migration/versioned_datastore/env.py index 2478562e..3ad05f5d 100644 --- a/ckanext/versioned_datastore/migration/versioned_datastore/env.py +++ b/ckanext/versioned_datastore/migration/versioned_datastore/env.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import with_statement -from alembic import context -from sqlalchemy import engine_from_config, pool -from logging.config import fileConfig import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -33,21 +34,19 @@ def run_migrations_offline(): """ Run migrations in 'offline' mode. - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. + This configures the context with just a URL and not an Engine, though an Engine is + acceptable here as well. By skipping the Engine creation we don't even need a DBAPI + to be available. - Calls to context.execute() here emit the given string to the - script output. + Calls to context.execute() here emit the given string to the script output. """ - url = config.get_main_option(u"sqlalchemy.url") + url = config.get_main_option('sqlalchemy.url') context.configure( url=url, target_metadata=target_metadata, literal_binds=True, - version_table=u'{}_alembic_version'.format(name), + version_table='{}_alembic_version'.format(name), ) with context.begin_transaction(): @@ -63,7 +62,7 @@ def run_migrations_online(): """ connectable = engine_from_config( config.get_section(config.config_ini_section), - prefix=u'sqlalchemy.', + prefix='sqlalchemy.', poolclass=pool.NullPool, ) @@ -71,7 +70,7 @@ def run_migrations_online(): context.configure( connection=connection, target_metadata=target_metadata, - version_table=u'{}_alembic_version'.format(name), + version_table='{}_alembic_version'.format(name), ) with context.begin_transaction(): diff --git a/ckanext/versioned_datastore/migration/versioned_datastore/versions/19a61e5b669f_add_new_download_tables.py b/ckanext/versioned_datastore/migration/versioned_datastore/versions/19a61e5b669f_add_new_download_tables.py index b7291946..905c30ce 100644 --- a/ckanext/versioned_datastore/migration/versioned_datastore/versions/19a61e5b669f_add_new_download_tables.py +++ b/ckanext/versioned_datastore/migration/versioned_datastore/versions/19a61e5b669f_add_new_download_tables.py @@ -5,6 +5,7 @@ Revises: Create Date: 2023-01-06 10:27:56.739905 """ + import hashlib import json from datetime import datetime as dt diff --git a/ckanext/versioned_datastore/migration/versioned_datastore/versions/526b12c69d55_add_navigational_slugs.py b/ckanext/versioned_datastore/migration/versioned_datastore/versions/526b12c69d55_add_navigational_slugs.py index 46d89da6..fbbe5079 100644 --- a/ckanext/versioned_datastore/migration/versioned_datastore/versions/526b12c69d55_add_navigational_slugs.py +++ b/ckanext/versioned_datastore/migration/versioned_datastore/versions/526b12c69d55_add_navigational_slugs.py @@ -5,6 +5,7 @@ Revises: 19a61e5b669f Create Date: 2023-06-07 16:25:59.090795 """ + from datetime import datetime as dt from uuid import uuid4 diff --git a/ckanext/versioned_datastore/migration/versioned_datastore/versions/d2ca5da0573f_add_server_args_to_download_request_.py b/ckanext/versioned_datastore/migration/versioned_datastore/versions/d2ca5da0573f_add_server_args_to_download_request_.py index 2abe7afd..a454c290 100644 --- a/ckanext/versioned_datastore/migration/versioned_datastore/versions/d2ca5da0573f_add_server_args_to_download_request_.py +++ b/ckanext/versioned_datastore/migration/versioned_datastore/versions/d2ca5da0573f_add_server_args_to_download_request_.py @@ -5,11 +5,11 @@ Revises: 526b12c69d55 Create Date: 2023-06-28 13:28:33.607360 """ -from alembic import op + import sqlalchemy as sa +from alembic import op from sqlalchemy.dialects.postgresql import JSONB - # revision identifiers, used by Alembic. revision = 'd2ca5da0573f' down_revision = '526b12c69d55' diff --git a/ckanext/versioned_datastore/model/details.py b/ckanext/versioned_datastore/model/details.py index caffb1f2..b931d64a 100644 --- a/ckanext/versioned_datastore/model/details.py +++ b/ckanext/versioned_datastore/model/details.py @@ -1,7 +1,7 @@ import json -from ckan.model import meta, DomainObject -from sqlalchemy import Column, Table, BigInteger, UnicodeText +from ckan.model import DomainObject, meta +from sqlalchemy import BigInteger, Column, Table, UnicodeText # this table stores general details about each version of each resource. Currently it only stores # the column names and order. @@ -32,10 +32,11 @@ def get_columns(self, validate=True): """ Retrieve the columns contained in this resource's version. - :param validate: if True (the default) then fullstops are replaced with underscores before - returning the list of columns and any falsey columns (empty strings, Nones) - are removed - :return: a list of column names in the order they were in the original data source + :param validate: if True (the default) then fullstops are replaced with + underscores before returning the list of columns and any falsey columns + (empty strings, Nones) are removed + :return: a list of column names in the order they were in the original data + source """ columns = [] for column in json.loads(self.columns): diff --git a/ckanext/versioned_datastore/model/downloads.py b/ckanext/versioned_datastore/model/downloads.py index 5a088bcc..4843851a 100644 --- a/ckanext/versioned_datastore/model/downloads.py +++ b/ckanext/versioned_datastore/model/downloads.py @@ -1,20 +1,19 @@ from datetime import datetime +from ckan.model import DomainObject, Session, meta +from ckan.model.types import make_uuid from sqlalchemy import ( - Column, - Table, BigInteger, - UnicodeText, + Column, DateTime, ForeignKey, + Table, + UnicodeText, desc, ) from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import relationship, backref from sqlalchemy.exc import InvalidRequestError - -from ckan.model import meta, DomainObject, Session -from ckan.model.types import make_uuid +from sqlalchemy.orm import backref, relationship # this one is outside DownloadRequest so we can use it as a default in the table def state_initial = 'initiated' diff --git a/ckanext/versioned_datastore/model/slugs.py b/ckanext/versioned_datastore/model/slugs.py index 6b7552fd..b990969a 100644 --- a/ckanext/versioned_datastore/model/slugs.py +++ b/ckanext/versioned_datastore/model/slugs.py @@ -1,17 +1,18 @@ from datetime import datetime -from ckan.model import meta, DomainObject +from ckan.model import DomainObject, meta from ckan.model.types import make_uuid from sqlalchemy import ( + BigInteger, Column, + DateTime, Table, - BigInteger, UnicodeText, UniqueConstraint, - DateTime, or_, ) from sqlalchemy.dialects.postgresql import JSONB + from ..lib.query.schema import get_latest_query_version # this table stores query slugs diff --git a/ckanext/versioned_datastore/model/stats.py b/ckanext/versioned_datastore/model/stats.py index 546e517c..d3e9cac8 100644 --- a/ckanext/versioned_datastore/model/stats.py +++ b/ckanext/versioned_datastore/model/stats.py @@ -1,6 +1,6 @@ -from ckan.model import meta, DomainObject +from ckan.model import DomainObject, meta from ckan.model.types import JsonDictType -from sqlalchemy import Column, DateTime, Float, Boolean, Table, BigInteger, UnicodeText +from sqlalchemy import BigInteger, Boolean, Column, DateTime, Float, Table, UnicodeText # this table stores general statistics about the ingest and index events that occur on resources. It # is also used to figure out what versions have been ingested and to a certain extent indexed and diff --git a/ckanext/versioned_datastore/plugin.py b/ckanext/versioned_datastore/plugin.py index 98672cbc..288d35e2 100644 --- a/ckanext/versioned_datastore/plugin.py +++ b/ckanext/versioned_datastore/plugin.py @@ -5,30 +5,30 @@ from ckan import model from ckan.model import DomainObjectOperation from ckan.plugins import ( - toolkit, - interfaces, + PluginImplementations, SingletonPlugin, implements, - PluginImplementations, + interfaces, + toolkit, ) +from ckantools.loaders import create_actions, create_auth from splitgill.utils import to_timestamp -from . import routes, helpers, cli -from .interfaces import IVersionedDatastoreQuerySchema, IVersionedDatastore +from . import cli, helpers, routes +from .interfaces import IVersionedDatastore, IVersionedDatastoreQuerySchema from .lib.common import setup from .lib.datastore_utils import ( - is_datastore_resource, - ReadOnlyResourceException, InvalidVersionException, - update_resources_privacy, - get_queue_length, + ReadOnlyResourceException, get_es_health, + get_queue_length, + is_datastore_resource, + update_resources_privacy, ) from .lib.query.schema import register_schema from .lib.query.v1_0_0 import v1_0_0Schema from .logic import auth from .logic.actions import basic_search, crud, downloads, extras, multisearch -from ckantools.loaders import create_actions, create_auth try: from ckanext.status.interfaces import IStatus diff --git a/ckanext/versioned_datastore/routes/__init__.py b/ckanext/versioned_datastore/routes/__init__.py index b17c2a4d..48a897c0 100644 --- a/ckanext/versioned_datastore/routes/__init__.py +++ b/ckanext/versioned_datastore/routes/__init__.py @@ -5,7 +5,7 @@ # Created by the Natural History Museum in London, UK from ckan.plugins import toolkit -from . import datastore, search, downloads, status +from . import datastore, downloads, search, status blueprints = [datastore.blueprint, search.blueprint, status.blueprint] diff --git a/ckanext/versioned_datastore/routes/search.py b/ckanext/versioned_datastore/routes/search.py index 1cf46825..be594dfa 100644 --- a/ckanext/versioned_datastore/routes/search.py +++ b/ckanext/versioned_datastore/routes/search.py @@ -8,7 +8,7 @@ from sqlalchemy import func from ..lib.query import slugs -from ..lib.query.slug_words import list_one, list_two, list_three +from ..lib.query.slug_words import list_one, list_three, list_two from ..model.slugs import DatastoreSlug blueprint = Blueprint(name='search', import_name=__name__) diff --git a/ckanext/versioned_datastore/routes/status.py b/ckanext/versioned_datastore/routes/status.py index 067bb0f0..f2c2d7c7 100644 --- a/ckanext/versioned_datastore/routes/status.py +++ b/ckanext/versioned_datastore/routes/status.py @@ -1,9 +1,10 @@ import os -from datetime import datetime as dt, timedelta +from datetime import datetime as dt +from datetime import timedelta +from ckan.plugins import plugin_loaded, toolkit from flask import Blueprint, jsonify -from ckan.plugins import toolkit, plugin_loaded from ..lib.downloads.loaders import get_file_server from ..lib.downloads.servers import DirectFileServer from ..lib.query.slugs import create_nav_slug @@ -80,8 +81,8 @@ def get_download_details(download_id): query_doi = None doi_url = None if plugin_loaded('query_dois'): - from ckanext.query_dois.lib.doi import find_existing_doi from ckanext.query_dois.helpers import get_landing_page_url + from ckanext.query_dois.lib.doi import find_existing_doi # query-dois only saves resources that return records non_zero_resources = { diff --git a/docs/_scripts/gen_api_pages.py b/docs/_scripts/gen_api_pages.py index d4a5d480..f83bf683 100644 --- a/docs/_scripts/gen_api_pages.py +++ b/docs/_scripts/gen_api_pages.py @@ -1,6 +1,5 @@ # !/usr/bin/env python # encoding: utf-8 - """ Generate the code reference pages and navigation. diff --git a/tests/conftest.py b/tests/conftest.py index 76c56054..12519db5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,8 +11,9 @@ from splitgill.mongo import get_mongo from ckanext.versioned_datastore.lib import common -from ckanext.versioned_datastore.model import stats, slugs, details, downloads -from tests.helpers import utils, data as test_data +from ckanext.versioned_datastore.model import details, downloads, slugs, stats +from tests.helpers import data as test_data +from tests.helpers import utils @pytest.fixture(scope='class') diff --git a/tests/helpers/patches.py b/tests/helpers/patches.py index 2c976da9..5d1a557f 100644 --- a/tests/helpers/patches.py +++ b/tests/helpers/patches.py @@ -1,6 +1,6 @@ from collections import defaultdict, namedtuple -from mock import patch, MagicMock +from mock import MagicMock, patch from tests.helpers.utils import sync_enqueue_job diff --git a/tests/integration/downloads/test_downloads.py b/tests/integration/downloads/test_downloads.py index 525fa10c..00d0e6d1 100644 --- a/tests/integration/downloads/test_downloads.py +++ b/tests/integration/downloads/test_downloads.py @@ -1,16 +1,18 @@ import csv import json import os -import pytest import shutil import tempfile import zipfile -from mock import patch, MagicMock -from tests.helpers import patches, data as test_data +import pytest from ckan.plugins import toolkit from ckan.tests import factories +from mock import MagicMock, patch + from ckanext.versioned_datastore.model.downloads import DownloadRequest +from tests.helpers import data as test_data +from tests.helpers import patches scenarios = [ ('csv', {}), @@ -398,7 +400,7 @@ def _shutil_mock(src, dest): class TestDownloadWithQueryDois: @classmethod def setup_class(cls): - from ckanext.query_dois.model import query_doi_table, query_doi_stat_table + from ckanext.query_dois.model import query_doi_stat_table, query_doi_table cls.tables = [query_doi_table, query_doi_stat_table] diff --git a/tests/unit/lib/basic_query/test_basic_query_geo.py b/tests/unit/lib/basic_query/test_basic_query_geo.py index a1620d55..957ae049 100644 --- a/tests/unit/lib/basic_query/test_basic_query_geo.py +++ b/tests/unit/lib/basic_query/test_basic_query_geo.py @@ -2,15 +2,16 @@ import pytest from ckan.plugins import toolkit +from elasticsearch_dsl.query import Bool, GeoPolygon +from mock import MagicMock, call, patch + from ckanext.versioned_datastore.lib.basic_query.geo import ( - add_point_filter, FIELD, + add_geo_search, add_multipolygon_filter, + add_point_filter, add_polygon_filter, - add_geo_search, ) -from elasticsearch_dsl.query import GeoPolygon, Bool -from mock import MagicMock, call, patch class TestAddPointFilter(object): @@ -31,7 +32,7 @@ def test_simple(self): 'lat': coordinates[1], 'lon': coordinates[0], }, - } + }, ) def test_float_conversion(self): @@ -51,7 +52,7 @@ def test_float_conversion(self): 'lat': float(coordinates[1]), 'lon': float(coordinates[0]), }, - } + }, ) diff --git a/tests/unit/lib/basic_query/test_basic_query_search.py b/tests/unit/lib/basic_query/test_basic_query_search.py index cf625da2..76514b88 100644 --- a/tests/unit/lib/basic_query/test_basic_query_search.py +++ b/tests/unit/lib/basic_query/test_basic_query_search.py @@ -6,8 +6,8 @@ from ckanext.versioned_datastore.lib.basic_query.search import ( _find_version, - create_search, build_search_object, + create_search, ) from ckanext.versioned_datastore.lib.datastore_utils import prefix_field diff --git a/tests/unit/lib/basic_query/test_basic_query_utils.py b/tests/unit/lib/basic_query/test_basic_query_utils.py index f6552525..fea0dabb 100644 --- a/tests/unit/lib/basic_query/test_basic_query_utils.py +++ b/tests/unit/lib/basic_query/test_basic_query_utils.py @@ -1,5 +1,5 @@ import pytest -from mock import patch, MagicMock +from mock import MagicMock, patch from splitgill.indexing.utils import DOC_TYPE from ckanext.versioned_datastore.lib.basic_query.utils import format_facets, get_fields @@ -53,18 +53,18 @@ def test_format_facets(self): @pytest.mark.usefixtures('with_versioned_datastore_tables', 'with_plugins') def test_get_fields(self): mock_mapping = { - u"beans-index": { - u"mappings": { + 'beans-index': { + 'mappings': { DOC_TYPE: { - u"properties": { - u"data": { - u"properties": { - u"_id": {'type': 'long'}, - u"field1": { - u"type": u"keyword", + 'properties': { + 'data': { + 'properties': { + '_id': {'type': 'long'}, + 'field1': { + 'type': 'keyword', }, - u"field2": { - u"type": u"date", + 'field2': { + 'type': 'date', }, } } diff --git a/tests/unit/lib/downloads/test_downloads_notifiers.py b/tests/unit/lib/downloads/test_downloads_notifiers.py index 151cd428..ff427de7 100644 --- a/tests/unit/lib/downloads/test_downloads_notifiers.py +++ b/tests/unit/lib/downloads/test_downloads_notifiers.py @@ -1,12 +1,12 @@ import pytest from mock import MagicMock, patch -from tests.helpers import patches from ckanext.versioned_datastore.lib.downloads.notifiers import ( EmailNotifier, NullNotifier, WebhookNotifier, ) +from tests.helpers import patches notifiers = [ (EmailNotifier, {'emails': ['data@nhm.ac.uk']}), diff --git a/tests/unit/lib/downloads/test_downloads_runmanager.py b/tests/unit/lib/downloads/test_downloads_runmanager.py index 293e57ff..105fd904 100644 --- a/tests/unit/lib/downloads/test_downloads_runmanager.py +++ b/tests/unit/lib/downloads/test_downloads_runmanager.py @@ -1,18 +1,18 @@ import pytest from ckan.model import Session -from mock import patch, MagicMock +from mock import patch from ckanext.versioned_datastore.lib.downloads.download import DownloadRunManager from ckanext.versioned_datastore.logic.actions.meta.arg_objects import ( - QueryArgs, DerivativeArgs, - ServerArgs, NotifierArgs, + QueryArgs, + ServerArgs, ) from ckanext.versioned_datastore.model.downloads import ( - DownloadRequest, CoreFileRecord, DerivativeFileRecord, + DownloadRequest, ) from tests.helpers import patches diff --git a/tests/unit/lib/downloads/test_downloads_utils.py b/tests/unit/lib/downloads/test_downloads_utils.py index b15c34c2..39594fff 100644 --- a/tests/unit/lib/downloads/test_downloads_utils.py +++ b/tests/unit/lib/downloads/test_downloads_utils.py @@ -3,7 +3,7 @@ from ckan.tests import factories from mock import MagicMock, patch -from ckanext.versioned_datastore.lib.downloads import utils, query +from ckanext.versioned_datastore.lib.downloads import query, utils from tests.helpers import patches @@ -72,11 +72,11 @@ def test_get_schema(self): } ) with patch( - "ckanext.versioned_datastore.lib.downloads.utils.get_mappings", + 'ckanext.versioned_datastore.lib.downloads.utils.get_mappings', get_mapping_mock, ): parsed_schemas = utils.get_schemas(q) - parsed_schema = parsed_schemas[resource_dict["id"]] + parsed_schema = parsed_schemas[resource_dict['id']] assert isinstance(parsed_schema, dict) assert parsed_schema['type'] == 'record' assert parsed_schema['name'] == 'ResourceRecord' diff --git a/tests/unit/lib/query/test_query_query.py b/tests/unit/lib/query/test_query_query.py index 8e8a4390..d960874e 100644 --- a/tests/unit/lib/query/test_query_query.py +++ b/tests/unit/lib/query/test_query_query.py @@ -5,9 +5,10 @@ import jsonschema import pytest -from ckanext.versioned_datastore.lib.query import schema as schema_lib from mock import MagicMock, patch +from ckanext.versioned_datastore.lib.query import schema as schema_lib + class TestQuery(object): def test_load_core_schema(self): diff --git a/tests/unit/lib/query/test_query_v1_0_0.py b/tests/unit/lib/query/test_query_v1_0_0.py index 4474c2b5..f3ce9a9a 100644 --- a/tests/unit/lib/query/test_query_v1_0_0.py +++ b/tests/unit/lib/query/test_query_v1_0_0.py @@ -2,11 +2,11 @@ # encoding: utf-8 import io import json - -import jsonschema import os +import jsonschema import pytest + from ckanext.versioned_datastore.lib.query.schema import schema_base_path from ckanext.versioned_datastore.lib.query.v1_0_0 import v1_0_0Schema @@ -41,13 +41,13 @@ def test_translate_2(self): def test_translate_3(self): query = { - u"filters": { - u"and": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}}, + 'filters': { + 'and': [ + {'string_equals': {'fields': ['genus'], 'value': 'helix'}}, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"europe", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'europe', } }, ] @@ -75,13 +75,13 @@ def test_translate_3(self): def test_translate_4(self): query = { 'search': 'italy', - u"filters": { - u"and": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}}, + 'filters': { + 'and': [ + {'string_equals': {'fields': ['genus'], 'value': 'helix'}}, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"europe", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'europe', } }, ] @@ -111,27 +111,27 @@ def test_translate_4(self): def test_translate_5(self): query = { - u"filters": { - u"and": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}}, + 'filters': { + 'and': [ + {'string_equals': {'fields': ['genus'], 'value': 'helix'}}, { - u"or": [ + 'or': [ { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"italy", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'italy', } }, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"spain", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'spain', } }, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"portugal", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'portugal', } }, ] @@ -183,36 +183,36 @@ def test_translate_5(self): def test_translate_6(self): query = { - u"filters": { - u"and": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}}, + 'filters': { + 'and': [ + {'string_equals': {'fields': ['genus'], 'value': 'helix'}}, { - u"number_range": { - u"fields": [u"year"], - u"less_than": 2010, - u"less_than_inclusive": True, - u"greater_than": 2000, - u"greater_than_inclusive": True, + 'number_range': { + 'fields': ['year'], + 'less_than': 2010, + 'less_than_inclusive': True, + 'greater_than': 2000, + 'greater_than_inclusive': True, } }, { - u"or": [ + 'or': [ { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"italy", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'italy', } }, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"spain", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'spain', } }, { - u"string_contains": { - u"fields": [u"higherGeography"], - u"value": u"portugal", + 'string_contains': { + 'fields': ['higherGeography'], + 'value': 'portugal', } }, ] @@ -265,15 +265,15 @@ def test_translate_6(self): def test_translate_7(self): query = { - u"filters": { - u"and": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}}, + 'filters': { + 'and': [ + {'string_equals': {'fields': ['genus'], 'value': 'helix'}}, { - u"geo_point": { - u"latitude": 51.4712, - u"longitude": -0.9421, - u"radius": 10, - u"radius_unit": u"mi", + 'geo_point': { + 'latitude': 51.4712, + 'longitude': -0.9421, + 'radius': 10, + 'radius_unit': 'mi', } }, ] @@ -297,19 +297,19 @@ def test_translate_7(self): self.compare_query_and_search(query, search_dict) def test_translate_8(self): - query = {u"filters": {u"and": [{u"exists": {u"fields": [u"associatedMedia"]}}]}} + query = {'filters': {'and': [{'exists': {'fields': ['associatedMedia']}}]}} search_dict = {'query': {'exists': {'field': 'data.associatedMedia'}}} self.compare_query_and_search(query, search_dict) def test_translate_9(self): - query = {u"filters": {u"and": [{u"exists": {u"geo_field": True}}]}} + query = {'filters': {'and': [{'exists': {'geo_field': True}}]}} search_dict = {'query': {'exists': {'field': 'meta.geo'}}} self.compare_query_and_search(query, search_dict) def test_translate_10(self): country = 'CuraƧao' multipolygon = v1_0_0Schema().geojson['country'][country] - query = {u"filters": {u"and": [{u"geo_named_area": {u"country": country}}]}} + query = {'filters': {'and': [{'geo_named_area': {'country': country}}]}} search_dict = { 'query': { 'geo_polygon': { @@ -345,10 +345,10 @@ def to_points(points): return [{'lat': point[1], 'lon': point[0]} for point in points] query = { - u"filters": { - u"and": [ + 'filters': { + 'and': [ { - u"geo_custom_area": [ + 'geo_custom_area': [ # just a square [a_square], # a square with a square hole in it @@ -359,30 +359,26 @@ def to_points(points): } } search_dict = { - u"query": { - u"bool": { - u"minimum_should_match": 1, - u"should": [ - { - u"geo_polygon": { - u"meta.geo": {u"points": to_points(a_square)} - } - }, + 'query': { + 'bool': { + 'minimum_should_match': 1, + 'should': [ + {'geo_polygon': {'meta.geo': {'points': to_points(a_square)}}}, { - u"bool": { - u"filter": [ + 'bool': { + 'filter': [ { - u"geo_polygon": { - u"meta.geo": { - u"points": to_points(another_square) + 'geo_polygon': { + 'meta.geo': { + 'points': to_points(another_square) } } } ], - u"must_not": [ + 'must_not': [ { - u"geo_polygon": { - u"meta.geo": {u"points": to_points(a_hole)} + 'geo_polygon': { + 'meta.geo': {'points': to_points(a_hole)} } } ], @@ -396,10 +392,8 @@ def to_points(points): def test_translate_12(self): query = { - u"filters": { - u"not": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}} - ] + 'filters': { + 'not': [{'string_equals': {'fields': ['genus'], 'value': 'helix'}}] } } search_dict = { @@ -411,11 +405,9 @@ def test_translate_ignores_additional_properties_in_filters(self): schema = v1_0_0Schema() nope = { - u"filters": { + 'filters': { 'something_else': {}, - u"not": [ - {u"string_equals": {u"fields": [u"genus"], u"value": u"helix"}} - ], + 'not': [{'string_equals': {'fields': ['genus'], 'value': 'helix'}}], } } with pytest.raises(jsonschema.ValidationError): @@ -425,12 +417,12 @@ def test_translate_ignores_additional_properties_in_geo_named_area(self): schema = v1_0_0Schema() nope = { - u"filters": { - u"and": [ + 'filters': { + 'and': [ { - u"geo_named_area": { - u"country": u"Belgium", - u"something_else": False, + 'geo_named_area': { + 'country': 'Belgium', + 'something_else': False, } } ] diff --git a/tests/unit/lib/test_datastore_utils.py b/tests/unit/lib/test_datastore_utils.py index 309ef8a2..e7d05e7a 100644 --- a/tests/unit/lib/test_datastore_utils.py +++ b/tests/unit/lib/test_datastore_utils.py @@ -1,11 +1,11 @@ +from mock import MagicMock, patch from splitgill.indexing.utils import DOC_TYPE -from mock import patch, MagicMock from ckanext.versioned_datastore.lib.common import ALL_FORMATS, DATASTORE_ONLY_RESOURCE from ckanext.versioned_datastore.lib.datastore_utils import ( + is_datastore_only_resource, is_datastore_resource, is_ingestible, - is_datastore_only_resource, iter_data_fields, ) diff --git a/tests/unit/logic/actions/test_actions_downloads.py b/tests/unit/logic/actions/test_actions_downloads.py index f16666cf..d31d9610 100644 --- a/tests/unit/logic/actions/test_actions_downloads.py +++ b/tests/unit/logic/actions/test_actions_downloads.py @@ -3,9 +3,9 @@ from ckanext.versioned_datastore.logic.actions.downloads import datastore_queue_download from ckanext.versioned_datastore.logic.actions.meta.arg_objects import ( - QueryArgs, DerivativeArgs, NotifierArgs, + QueryArgs, ) from tests.helpers import patches diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index b3d94ed2..710d2a46 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -3,12 +3,12 @@ from mock import MagicMock, patch from ckanext.versioned_datastore.helpers import ( - is_duplicate_ingestion, + get_available_formats, get_human_duration, - get_stat_icon, get_stat_activity_class, + get_stat_icon, get_stat_title, - get_available_formats, + is_duplicate_ingestion, ) from ckanext.versioned_datastore.lib.common import ALL_FORMATS from ckanext.versioned_datastore.lib.importing.ingestion.exceptions import ( @@ -16,10 +16,10 @@ UnsupportedDataSource, ) from ckanext.versioned_datastore.lib.importing.stats import ( - INGEST, + ALL_TYPES, INDEX, + INGEST, PREP, - ALL_TYPES, )