diff --git a/.github/workflows/build_on_pull_request.yaml b/.github/workflows/build_on_pull_request.yaml new file mode 100644 index 0000000..f32be5a --- /dev/null +++ b/.github/workflows/build_on_pull_request.yaml @@ -0,0 +1,77 @@ +name: Build Docker images for geolake components and push to the repository + +on: + pull_request: + types: [opened, synchronize] + workflow_dispatch: +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source for drivers + run: python3 -m build ./drivers + - name: Set Docker image tag name + run: echo "TAG=$(date +'%Y.%m.%d.%H.%M')" >> $GITHUB_ENV + - name: Login to Scaleway Container Registry + uses: docker/login-action@v2 + with: + username: nologin + password: ${{ secrets.DOCKER_PASSWORD }} + registry: ${{ vars.DOCKER_REGISTRY }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build and push drivers + uses: docker/build-push-action@v4 + with: + context: ./drivers + file: ./drivers/Dockerfile + push: true + build-args: | + REGISTRY=${{ vars.GEOKUBE_REGISTRY }} + tags: | + ${{ vars.DOCKER_REGISTRY }}/geolake-drivers:${{ env.TAG }} + ${{ vars.DOCKER_REGISTRY }}/geolake-drivers:latest + - name: Build and push datastore component + uses: docker/build-push-action@v4 + with: + context: ./datastore + file: ./datastore/Dockerfile + push: true + build-args: | + REGISTRY=${{ vars.DOCKER_REGISTRY }} + tags: | + ${{ vars.DOCKER_REGISTRY }}/geolake-datastore:${{ env.TAG }} + ${{ vars.DOCKER_REGISTRY }}/geolake-datastore:latest + - name: Build and push api component + uses: docker/build-push-action@v4 + with: + context: ./api + file: ./api/Dockerfile + push: true + build-args: | + REGISTRY=${{ vars.DOCKER_REGISTRY }} + tags: | + ${{ vars.DOCKER_REGISTRY }}/geolake-api:${{ env.TAG }} + ${{ vars.DOCKER_REGISTRY }}/geolake-api:latest + - name: Build and push executor component + uses: docker/build-push-action@v4 + with: + context: ./executor + file: ./executor/Dockerfile + push: true + build-args: | + REGISTRY=${{ vars.DOCKER_REGISTRY }} + tags: | + ${{ vars.DOCKER_REGISTRY }}/geolake-executor:${{ env.TAG }} + ${{ vars.DOCKER_REGISTRY }}/geolake-executor:latest \ No newline at end of file diff --git a/api/Dockerfile b/api/Dockerfile index 1eddc88..c038cb3 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -1,12 +1,9 @@ -FROM rg.nl-ams.scw.cloud/dds-production/geokube:v0.2a5 -WORKDIR /code -COPY ./api/requirements.txt /code/requirements.txt +ARG REGISTRY=rg.fr-par.scw.cloud/geolake +ARG TAG=latest +FROM $REGISTRY/geolake-datastore:$TAG +WORKDIR /app +COPY requirements.txt /code/requirements.txt RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -COPY ./utils/wait-for-it.sh /code/wait-for-it.sh -COPY ./datastore /code/app/datastore -COPY ./db/dbmanager /code/db/dbmanager -COPY ./geoquery/ /code/geoquery -COPY ./resources /code/app/resources -COPY ./api/app /code/app +COPY app /app EXPOSE 80 -CMD ["uvicorn", "app.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file +CMD ["uvicorn", "app.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"] diff --git a/api/app/utils.py b/api/app/api_utils.py similarity index 61% rename from api/app/utils.py rename to api/app/api_utils.py index f3ab57c..82ea9f6 100644 --- a/api/app/utils.py +++ b/api/app/api_utils.py @@ -1,8 +1,7 @@ """Utils module""" -from typing import Literal -def convert_bytes(size_bytes: int, to: Literal["kb", "mb", "gb"]) -> float: +def convert_bytes(size_bytes: int, to: str) -> float: """Converts size in bytes to the other unit - one out of: ["kb", "mb", "gb"] @@ -16,19 +15,24 @@ def convert_bytes(size_bytes: int, to: Literal["kb", "mb", "gb"]) -> float: size : float `size_bytes` converted to the given unit """ + assert to is not None, "Expected unit cannot be `None`" to = to.lower() - if to == "kb": - value = size_bytes / 1024 - elif to == "mb": - value = size_bytes / 1024**2 - elif to == "gb": - value = size_bytes / 1024**3 - else: - raise ValueError(f"unsupported units: {to}") - return value + match to: + case "bytes": + return size_bytes + case "kb": + return size_bytes / 1024 + case "mb": + return size_bytes / 1024**2 + case "gb": + return size_bytes / 1024**3 + case _: + raise ValueError(f"unsupported units: {to}") -def make_bytes_readable_dict(size_bytes: int, units: str = None) -> dict: +def make_bytes_readable_dict( + size_bytes: int, units: str | None = None +) -> dict: """Prepare dictionary representing size (in bytes) in more readable unit to keep value in the range [0,1] - if `units` is `None`. If `units` is not None, converts `size_bytes` to the size expressed by @@ -38,8 +42,8 @@ def make_bytes_readable_dict(size_bytes: int, units: str = None) -> dict: ---------- size_bytes : int Size expressed in bytes - units : optional, str - Units (case insensitive), one of [kB, MB, GB] + units : optional str + Returns ------- result : dict @@ -52,8 +56,8 @@ def make_bytes_readable_dict(size_bytes: int, units: str = None) -> dict: if units is None: units = "bytes" if units != "bytes": - size_bytes = convert_bytes(size_bytes=size_bytes, to=units) - return {"value": size_bytes, "units": units} + converted_size = convert_bytes(size_bytes=size_bytes, to=units) + return {"value": converted_size, "units": units} val = size_bytes if val > 1024: units = "kB" @@ -64,6 +68,6 @@ def make_bytes_readable_dict(size_bytes: int, units: str = None) -> dict: if val > 1024: units = "GB" val /= 1024 - if val > 0.0 and (val := round(val, 2)) == 0.00: + if val > 0.0 and (round(val, 2) == 0.00): val = 0.01 return {"value": round(val, 2), "units": units} diff --git a/api/app/auth/__init__.py b/api/app/auth/__init__.py index a27d427..e69de29 100644 --- a/api/app/auth/__init__.py +++ b/api/app/auth/__init__.py @@ -1,2 +0,0 @@ -from .context import Context -from .manager import assert_not_anonymous diff --git a/api/app/auth/backend.py b/api/app/auth/backend.py new file mode 100644 index 0000000..c172b58 --- /dev/null +++ b/api/app/auth/backend.py @@ -0,0 +1,66 @@ +"""The module contains authentication backend""" +from uuid import UUID + +from starlette.authentication import ( + AuthCredentials, + AuthenticationBackend, + UnauthenticatedUser, +) +from dbmanager.dbmanager import DBManager + +import exceptions as exc +from auth.models import DDSUser +from auth import scopes + + +class DDSAuthenticationBackend(AuthenticationBackend): + """Class managing authentication and authorization""" + + async def authenticate(self, conn): + """Authenticate user based on `User-Token` header""" + if "User-Token" in conn.headers: + return self._manage_user_token_auth(conn.headers["User-Token"]) + return AuthCredentials([scopes.ANONYMOUS]), UnauthenticatedUser() + + def _manage_user_token_auth(self, user_token: str): + try: + user_id, api_key = self.get_authorization_scheme_param(user_token) + except exc.BaseDDSException as err: + raise err.wrap_around_http_exception() + user_dto = DBManager().get_user_details(user_id) + eligible_scopes = [scopes.AUTHENTICATED] + self._get_scopes_for_user( + user_dto=user_dto + ) + if user_dto.api_key != api_key: + raise exc.AuthenticationFailed( + user_dto + ).wrap_around_http_exception() + return AuthCredentials(eligible_scopes), DDSUser(username=user_id) + + def _get_scopes_for_user(self, user_dto) -> list[str]: + if user_dto is None: + return [] + eligible_scopes = [] + for role in user_dto.roles: + if "admin" == role.role_name: + eligible_scopes.append(scopes.ADMIN) + continue + # NOTE: Role-specific scopes + # Maybe need some more logic + eligible_scopes.append(role.role_name) + return eligible_scopes + + def get_authorization_scheme_param(self, user_token: str): + """Get `user_id` and `api_key` if authorization scheme is correct.""" + if user_token is None or user_token.strip() == "": + raise exc.EmptyUserTokenError + if ":" not in user_token: + raise exc.ImproperUserTokenError + user_id, api_key, *rest = user_token.split(":") + if len(rest) > 0: + raise exc.ImproperUserTokenError + try: + _ = UUID(user_id, version=4) + except ValueError as err: + raise exc.ImproperUserTokenError from err + return (user_id, api_key) diff --git a/api/app/auth/context.py b/api/app/auth/context.py deleted file mode 100644 index 49290b2..0000000 --- a/api/app/auth/context.py +++ /dev/null @@ -1,179 +0,0 @@ -"""Module with auth utils""" -from uuid import UUID -from typing import Optional - -from fastapi import Request -from db.dbmanager.dbmanager import DBManager - -from ..api_logging import get_dds_logger -from .. import exceptions as exc - -log = get_dds_logger(__name__) - - -class UserCredentials: - """Class containing current user credentials""" - - __slots__ = ("_user_id", "_user_key") - - def __init__( - self, user_id: Optional[str] = None, user_key: Optional[str] = None - ): - self._user_id = user_id - if self._user_id is None: - self._user_key = None - else: - self._user_key = user_key - - @property - def is_public(self) -> bool: - """Determine if the current user is public (anonymous)""" - return self._user_id is None - - @property - def id(self) -> int: - """Get the ID of the current user""" - return self._user_id - - @property - def key(self) -> str: - "Get key of the current user" - return self._user_key - - def __eq__(self, other) -> bool: - if not isinstance(other, UserCredentials): - return False - if self.id == other.id and self.key == other.key: - return True - return False - - def __ne__(self, other): - return self != other - - def __repr__(self): - return ( - f"" - ) - - -class Context: - """The class managing execution context of the single request passing - through the Web component. Its attributes are immutable when set to - non-None values. - - Context contains following attributes: - 1. user: UserCredentials - Credentials of the user within the context - 2. rid: UUID- like string - ID of the request passing throught the Web component - - """ - - __slots__ = ("rid", "user") - - rid: str - user: UserCredentials - - def __init__(self, rid: str, user: UserCredentials): - log.debug("creating new context", extra={"rid": rid}) - self.rid = rid - self.user = user - - @property - def is_public(self) -> bool: - """Determine if the context contains an anonymous user""" - return self.user.is_public - - def __delattr__(self, name): - if getattr(self, name, None) is not None: - raise AttributeError("The attribute '{name}' cannot be deleted!") - super().__delattr__(name) - - def __setattr__(self, name, value): - if getattr(self, name, None) is not None: - raise AttributeError( - "The attribute '{name}' cannot modified when not None!" - ) - super().__setattr__(name, value) - - -class ContextCreator: - """Class managing the Context creation""" - - @staticmethod - def new_context( - request: Request, *, rid: str, user_token: Optional[str] = None - ) -> Context: - """Create a brand new `Context` object based on the provided - `request`, `rid`, and `user_token` arguments. - - Parameters - ---------- - request : fastapi.Request - A request for which context is about to be created - rid : str - ID of the DDS Request - user_token : str - Token of a user - - Returns - ------- - context : Context - A new context - - Raises - ------ - ImproperUserTokenError - If user token is not in the right format - AuthenticationFailed - If provided api key does not agree with the one stored in the DB - """ - assert rid is not None, "DDS Request ID cannot be `None`!" - try: - user_credentials = UserCredentials( - *ContextCreator._get_user_id_and_key_from_token(user_token) - ) - except exc.EmptyUserTokenError: - # NOTE: we then consider a user as anonymous - user_credentials = UserCredentials() - if not user_credentials.is_public: - log.debug("context authentication", extra={"rid": rid}) - ContextCreator.authenticate(user_credentials) - context = Context(rid=rid, user=user_credentials) - return context - - @staticmethod - def authenticate(user: UserCredentials): - """Authenticate user. Verify that the provided api agrees with - the one stored in the database. - - Parameters - ---------- - user : UserCredentials - - Raises - ------ - AuthenticationFailed - If user with the given ID is found in the database but stored api key - is different than the provided one. - """ - user_db = DBManager().get_user_details(user.id) - if user_db.api_key != user.key: - raise exc.AuthenticationFailed(user.id) - - @staticmethod - def _get_user_id_and_key_from_token(user_token: str): - if user_token is None or user_token.strip() == "": - raise exc.EmptyUserTokenError - if ":" not in user_token: - raise exc.ImproperUserTokenError - user_id, api_key, *rest = user_token.split(":") - if len(rest) > 0: - raise exc.ImproperUserTokenError - try: - _ = UUID(user_id, version=4) - except ValueError as err: - raise exc.ImproperUserTokenError from err - else: - return (user_id, api_key) diff --git a/api/app/auth/manager.py b/api/app/auth/manager.py index 536ac7b..02bf686 100644 --- a/api/app/auth/manager.py +++ b/api/app/auth/manager.py @@ -1,18 +1,13 @@ """Module with access/authentication functions""" -from inspect import signature -from functools import wraps from typing import Optional -from ..api_logging import get_dds_logger -from ..auth import Context -from ..decorators_factory import assert_parameters_are_defined, bind_arguments -from .. import exceptions as exc +from utils.api_logging import get_dds_logger +import exceptions as exc log = get_dds_logger(__name__) def is_role_eligible_for_product( - context: Context, product_role_name: Optional[str] = None, user_roles_names: Optional[list[str]] = None, ): @@ -38,7 +33,6 @@ def is_role_eligible_for_product( "verifying eligibility of the product role '%s' against roles '%s'", product_role_name, user_roles_names, - extra={"rid": context.rid}, ) if product_role_name == "public" or product_role_name is None: return True @@ -53,7 +47,6 @@ def is_role_eligible_for_product( def assert_is_role_eligible( - context: Context, product_role_name: Optional[str] = None, user_roles_names: Optional[list[str]] = None, ): @@ -73,26 +66,7 @@ def assert_is_role_eligible( AuthorizationFailed """ if not is_role_eligible_for_product( - context=context, product_role_name=product_role_name, user_roles_names=user_roles_names, ): - raise exc.AuthorizationFailed(user_id=context.user.id) - - -def assert_not_anonymous(func): - """Decorator for convenient authentication management""" - sig = signature(func) - assert_parameters_are_defined( - sig, required_parameters=[("context", Context)] - ) - - @wraps(func) - def wrapper_sync(*args, **kwargs): - args_dict = bind_arguments(sig, *args, **kwargs) - context = args_dict["context"] - if context.is_public: - raise exc.AuthorizationFailed(user_id=None) - return func(*args, **kwargs) - - return wrapper_sync + raise exc.AuthorizationFailed diff --git a/api/app/auth/models.py b/api/app/auth/models.py new file mode 100644 index 0000000..bff896f --- /dev/null +++ b/api/app/auth/models.py @@ -0,0 +1,38 @@ +"""The module contains models related to the authentication and authorization""" +from starlette.authentication import SimpleUser + + +class DDSUser(SimpleUser): + """Immutable class containing information about the authenticated user""" + + def __init__(self, username: str) -> None: + super().__init__(username=username) + + @property + def id(self): + return self.username + + def __eq__(self, other) -> bool: + if not isinstance(other, DDSUser): + return False + if self.username == other.username: + return True + return False + + def __ne__(self, other): + return self != other + + def __repr__(self): + return f"" + + def __delattr__(self, name): + if getattr(self, name, None) is not None: + raise AttributeError("The attribute '{name}' cannot be deleted!") + super().__delattr__(name) + + def __setattr__(self, name, value): + if getattr(self, name, None) is not None: + raise AttributeError( + "The attribute '{name}' cannot modified when not None!" + ) + super().__setattr__(name, value) diff --git a/api/app/auth/scopes.py b/api/app/auth/scopes.py new file mode 100644 index 0000000..75113e4 --- /dev/null +++ b/api/app/auth/scopes.py @@ -0,0 +1,5 @@ +"""This module contains predefined authorization scopes""" + +ADMIN = "admin" +AUTHENTICATED = "authenticated" +ANONYMOUS = "anonymous" diff --git a/api/app/callbacks/on_startup.py b/api/app/callbacks/on_startup.py index f064a47..ec883d3 100644 --- a/api/app/callbacks/on_startup.py +++ b/api/app/callbacks/on_startup.py @@ -1,7 +1,7 @@ """Module with functions call during API server startup""" -from ..api_logging import get_dds_logger +from utils.api_logging import get_dds_logger -from ..datastore.datastore import Datastore +from datastore.datastore import Datastore log = get_dds_logger(__name__) diff --git a/datastore/__init__.py b/api/app/const/__init__.py similarity index 100% rename from datastore/__init__.py rename to api/app/const/__init__.py diff --git a/api/app/const/tags.py b/api/app/const/tags.py new file mode 100644 index 0000000..58a2213 --- /dev/null +++ b/api/app/const/tags.py @@ -0,0 +1,5 @@ +"""The module with endpoint tags definitions""" + +BASIC = "basic" +DATASET = "dataset" +REQUEST = "request" diff --git a/api/app/const/venv.py b/api/app/const/venv.py new file mode 100644 index 0000000..85c3658 --- /dev/null +++ b/api/app/const/venv.py @@ -0,0 +1,7 @@ +"""This modul contains all supported environment variables names""" + +ENDPOINT_PREFIX = "ENDPOINT_PREFIX" +ALLOWED_CORS_ORIGINS_REGEX = "ALLOWED_CORS_ORIGINS_REGEX" +LOGGING_FORMAT = "LOGGING_FORMAT" +LOGGING_LEVEL = "LOGGING_LEVEL" +WEB_COMPONENT_HOST = "WEB_COMPONENT_HOST" diff --git a/api/app/decorators_factory.py b/api/app/decorators_factory.py index 29acd4b..d2e4b39 100644 --- a/api/app/decorators_factory.py +++ b/api/app/decorators_factory.py @@ -22,10 +22,7 @@ def assert_parameters_are_defined( If a required parameter is not defined or is of wrong type """ for param_name, param_type in required_parameters: - if ( - param_name not in sig.parameters - or sig.parameters[param_name].annotation != param_type - ): + if param_name not in sig.parameters: raise TypeError( f"The parameter '{param_name}' annotated with the type" f" '{param_type}' must be defined for the callable decorated" diff --git a/api/app/encoders.py b/api/app/encoders.py index 30a4da5..9566f57 100644 --- a/api/app/encoders.py +++ b/api/app/encoders.py @@ -29,6 +29,7 @@ def make_ndarray_dtypes_valid(o: np.ndarray) -> np.ndarray: return o.astype(np.int64) if np.issubdtype(o.dtype, np.float32): return o.astype(np.float64) + return o def extend_json_encoders(): diff --git a/api/app/endpoint_handlers/__init__.py b/api/app/endpoint_handlers/__init__.py index 1297441..c5a44be 100644 --- a/api/app/endpoint_handlers/__init__.py +++ b/api/app/endpoint_handlers/__init__.py @@ -1,4 +1,3 @@ from . import file as file_handler from . import dataset as dataset_handler from . import request as request_handler -from . import user as user_handler diff --git a/api/app/endpoint_handlers/dataset.py b/api/app/endpoint_handlers/dataset.py index 4036524..a3f8ca5 100644 --- a/api/app/endpoint_handlers/dataset.py +++ b/api/app/endpoint_handlers/dataset.py @@ -1,30 +1,32 @@ """Modules realizing logic for dataset-related endpoints""" +import os import pika from typing import Optional -from db.dbmanager.dbmanager import DBManager +from dbmanager.dbmanager import DBManager from geoquery.geoquery import GeoQuery +from geoquery.task import TaskList +from datastore.datastore import Datastore, DEFAULT_MAX_REQUEST_SIZE_GB +from datastore import exception as datastore_exception -from ..auth import Context -from ..auth.manager import ( +from utils.metrics import log_execution_time +from utils.api_logging import get_dds_logger +from auth.manager import ( is_role_eligible_for_product, - assert_is_role_eligible, ) -from ..auth import assert_not_anonymous -from ..api_logging import get_dds_logger -from .. import exceptions as exc -from ..utils import make_bytes_readable_dict -from ..metrics import log_execution_time -from ..validation import assert_product_exists -from ..datastore.datastore import Datastore +import exceptions as exc +from api_utils import make_bytes_readable_dict +from validation import assert_product_exists log = get_dds_logger(__name__) -data_store = Datastore(cache_path="/cache") +data_store = Datastore() + +MESSAGE_SEPARATOR = os.environ["MESSAGE_SEPARATOR"] @log_execution_time(log) -def get_datasets(context: Context) -> list[dict]: +def get_datasets(user_roles_names: list[str]) -> list[dict]: """Realize the logic for the endpoint: `GET /datasets` @@ -34,8 +36,8 @@ def get_datasets(context: Context) -> list[dict]: Parameters ---------- - context : Context - Context of the current http request + user_roles_names : list of str + List of user's roles Returns ------- @@ -50,15 +52,12 @@ def get_datasets(context: Context) -> list[dict]: """ log.debug( "getting all eligible products for datasets...", - extra={"rid": context.rid}, ) - user_roles_names = DBManager().get_user_roles_names(context.user.id) datasets = [] for dataset_id in data_store.dataset_list(): log.debug( "getting info and eligible products for `%s`", dataset_id, - extra={"rid": context.rid}, ) dataset_info = data_store.dataset_info(dataset_id=dataset_id) try: @@ -66,7 +65,6 @@ def get_datasets(context: Context) -> list[dict]: prod_name: prod_info for prod_name, prod_info in dataset_info["products"].items() if is_role_eligible_for_product( - context=context, product_role_name=prod_info.get("role"), user_roles_names=user_roles_names, ) @@ -76,7 +74,6 @@ def get_datasets(context: Context) -> list[dict]: "dataset `%s` does not have products defined", dataset_id, exc_info=True, - extra={"rid": context.rid}, ) raise exc.MissingKeyInCatalogEntryError( key="products", dataset=dataset_id @@ -84,11 +81,10 @@ def get_datasets(context: Context) -> list[dict]: else: if len(eligible_prods) == 0: log.debug( - "no eligible products for dataset `%s` for the user" - " `%s`. dataset skipped", + "no eligible products for dataset `%s` for the role `%s`." + " dataset skipped", dataset_id, - context.user.id, - extra={"rid": context.rid}, + user_roles_names, ) else: dataset_info["products"] = eligible_prods @@ -99,7 +95,9 @@ def get_datasets(context: Context) -> list[dict]: @log_execution_time(log) @assert_product_exists def get_product_details( - context: Context, dataset_id: str, product_id: str + user_roles_names: list[str], + dataset_id: str, + product_id: Optional[str] = None, ) -> dict: """Realize the logic for the endpoint: @@ -110,12 +108,12 @@ def get_product_details( Parameters ---------- - context : Context - Context of the current http request + user_roles_names : list of str + List of user's roles dataset_id : str ID of the dataset - product_id : str - ID of the dataset + product_id : optional, str + ID of the product. If `None` the 1st product will be considered Returns ------- @@ -130,23 +128,26 @@ def get_product_details( log.debug( "getting details for eligible products of `%s`", dataset_id, - extra={"rid": context.rid}, ) - user_roles_names = DBManager().get_user_roles_names(context.user.id) - details = data_store.product_details( - dataset_id=dataset_id, product_id=product_id, use_cache=True - ) - assert_is_role_eligible( - context=context, - product_role_name=details["metadata"].get("role"), - user_roles_names=user_roles_names, - ) - return details + try: + if product_id: + return data_store.product_details( + dataset_id=dataset_id, + product_id=product_id, + role=user_roles_names, + use_cache=True, + ) + else: + return data_store.first_eligible_product_details( + dataset_id=dataset_id, role=user_roles_names, use_cache=True + ) + except datastore_exception.UnauthorizedError as err: + raise exc.AuthorizationFailed from err @log_execution_time(log) @assert_product_exists -def get_metadata(context: Context, dataset_id: str, product_id: str): +def get_metadata(dataset_id: str, product_id: str): """Realize the logic for the endpoint: `GET /datasets/{dataset_id}/{product_id}/metadata` @@ -155,8 +156,6 @@ def get_metadata(context: Context, dataset_id: str, product_id: str): Parameters ---------- - context : Context - Context of the current http request dataset_id : str ID of the dataset product_id : str @@ -164,7 +163,6 @@ def get_metadata(context: Context, dataset_id: str, product_id: str): """ log.debug( "getting metadata for '{dataset_id}.{product_id}'", - extra={"rid": context.rid}, ) return data_store.product_metadata(dataset_id, product_id) @@ -172,7 +170,6 @@ def get_metadata(context: Context, dataset_id: str, product_id: str): @log_execution_time(log) @assert_product_exists def estimate( - context: Context, dataset_id: str, product_id: str, query: GeoQuery, @@ -215,10 +212,9 @@ def estimate( @log_execution_time(log) -@assert_not_anonymous @assert_product_exists def query( - context: Context, + user_id: str, dataset_id: str, product_id: str, query: GeoQuery, @@ -231,8 +227,8 @@ def query( Parameters ---------- - context : Context - Context of the current http request + user_id : str + ID of the user executing the query dataset_id : str ID of the dataset product_id : str @@ -247,14 +243,16 @@ def query( Raises ------- - AuthorizationFailed + MaximumAllowedSizeExceededError + if the allowed size is below the estimated one + EmptyDatasetError + if estimated size is zero + """ - log.debug("geoquery: %s", query, extra={"rid": context.rid}) - estimated_size = estimate( - context, dataset_id, product_id, query, "GB" - ).get("value") + log.debug("geoquery: %s", query) + estimated_size = estimate(dataset_id, product_id, query, "GB").get("value") allowed_size = data_store.product_metadata(dataset_id, product_id).get( - "maximum_query_size_gb", 10 + "maximum_query_size_gb", DEFAULT_MAX_REQUEST_SIZE_GB ) if estimated_size > allowed_size: raise exc.MaximumAllowedSizeExceededError( @@ -263,20 +261,90 @@ def query( estimated_size_gb=estimated_size, allowed_size_gb=allowed_size, ) + if estimated_size == 0.0: + raise exc.EmptyDatasetError( + dataset_id=dataset_id, product_id=product_id + ) broker_conn = pika.BlockingConnection( - pika.ConnectionParameters(host="broker") + pika.ConnectionParameters( + host=os.getenv("BROKER_SERVICE_HOST", "broker") + ) ) broker_channel = broker_conn.channel() request_id = DBManager().create_request( - user_id=context.user.id, + user_id=user_id, dataset=dataset_id, product=product_id, query=query.original_query_json(), ) # TODO: find a separator; for the moment use "\" - message = f"{request_id}\\{dataset_id}\\{product_id}\\{query.json()}" + message = MESSAGE_SEPARATOR.join( + [str(request_id), "query", dataset_id, product_id, query.json()] + ) + + broker_channel.basic_publish( + exchange="", + routing_key="query_queue", + body=message, + properties=pika.BasicProperties( + delivery_mode=2, # make message persistent + ), + ) + broker_conn.close() + return request_id + + +@log_execution_time(log) +def run_workflow( + user_id: str, + workflow: TaskList, +): + """Realize the logic for the endpoint: + + `POST /datasets/workflow` + + Schedule the workflow and return the ID of the request. + + Parameters + ---------- + user_id : str + ID of the user executing the query + workflow : TaskList + Workflow to perform + + Returns + ------- + request_id : int + ID of the request + + Raises + ------- + MaximumAllowedSizeExceededError + if the allowed size is below the estimated one + EmptyDatasetError + if estimated size is zero + + """ + log.debug("geoquery: %s", workflow) + broker_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=os.getenv("BROKER_SERVICE_HOST", "broker") + ) + ) + broker_channel = broker_conn.channel() + request_id = DBManager().create_request( + user_id=user_id, + dataset=workflow.dataset_id, + product=workflow.product_id, + query=workflow.json(), + ) + + # TODO: find a separator; for the moment use "\" + message = MESSAGE_SEPARATOR.join( + [str(request_id), "workflow", workflow.json()] + ) broker_channel.basic_publish( exchange="", diff --git a/api/app/endpoint_handlers/file.py b/api/app/endpoint_handlers/file.py index 47d6a19..04cf562 100644 --- a/api/app/endpoint_handlers/file.py +++ b/api/app/endpoint_handlers/file.py @@ -2,18 +2,17 @@ import os from fastapi.responses import FileResponse -from db.dbmanager.dbmanager import DBManager, RequestStatus +from dbmanager.dbmanager import DBManager, RequestStatus -from ..auth import Context -from ..api_logging import get_dds_logger -from ..metrics import log_execution_time -from .. import exceptions as exc +from utils.api_logging import get_dds_logger +from utils.metrics import log_execution_time +import exceptions as exc log = get_dds_logger(__name__) @log_execution_time(log) -def download_request_result(context: Context, request_id: int): +def download_request_result(request_id: int): """Realize the logic for the endpoint: `GET /download/{request_id}` @@ -23,8 +22,6 @@ def download_request_result(context: Context, request_id: int): Parameters ---------- - context : Context - Context of the current http request request_id : int ID of the request @@ -43,7 +40,6 @@ def download_request_result(context: Context, request_id: int): log.debug( "preparing downloads for request id: %s", request_id, - extra={"rid": context.rid}, ) ( request_status, @@ -53,7 +49,6 @@ def download_request_result(context: Context, request_id: int): log.debug( "request with id: '%s' does not exist or it is not finished yet!", request_id, - extra={"rid": context.rid}, ) raise exc.RequestNotYetAccomplished(request_id=request_id) download_details = DBManager().get_download_details_for_request( @@ -63,7 +58,6 @@ def download_request_result(context: Context, request_id: int): log.error( "file '%s' does not exists!", download_details.location_path, - extra={"rid": context.rid}, ) raise FileNotFoundError return FileResponse( diff --git a/api/app/endpoint_handlers/request.py b/api/app/endpoint_handlers/request.py index fbcca06..320bceb 100644 --- a/api/app/endpoint_handlers/request.py +++ b/api/app/endpoint_handlers/request.py @@ -1,17 +1,15 @@ """Modules with functions realizing logic for requests-related endpoints""" -from db.dbmanager.dbmanager import DBManager +from dbmanager.dbmanager import DBManager -from ..auth import Context, assert_not_anonymous -from ..api_logging import get_dds_logger -from .. import exceptions as exc -from ..metrics import log_execution_time +from utils.api_logging import get_dds_logger +from utils.metrics import log_execution_time +import exceptions as exc log = get_dds_logger(__name__) @log_execution_time(log) -@assert_not_anonymous -def get_requests(context: Context): +def get_requests(user_id: str): """Realize the logic for the endpoint: `GET /requests` @@ -20,19 +18,19 @@ def get_requests(context: Context): Parameters ---------- - context : Context - Context of the current http request + user_id : str + ID of the user for whom requests are taken Returns ------- requests : list List of all requests done by the user """ - return DBManager().get_requests_for_user_id(user_id=context.user.id) + return DBManager().get_requests_for_user_id(user_id=user_id) @log_execution_time(log) -def get_request_status(context: Context, request_id: int): +def get_request_status(user_id: str, request_id: int): """Realize the logic for the endpoint: `GET /requests/{request_id}/status` @@ -42,8 +40,8 @@ def get_request_status(context: Context, request_id: int): Parameters ---------- - context : Context - Context of the current http request + user_id : str + ID of the user whose request's status is about to be checed request_id : int ID of the request @@ -52,20 +50,20 @@ def get_request_status(context: Context, request_id: int): status : tuple Tuple of status and fail reason. """ + # NOTE: maybe verification should be added if user checks only him\her requests try: status, reason = DBManager().get_request_status_and_reason(request_id) except IndexError as err: log.error( "request with id: '%s' was not found!", request_id, - extra={"rid": context.rid}, ) raise exc.RequestNotFound(request_id=request_id) from err return {"status": status.name, "fail_reason": reason} @log_execution_time(log) -def get_request_resulting_size(context: Context, request_id: int): +def get_request_resulting_size(request_id: int): """Realize the logic for the endpoint: `GET /requests/{request_id}/size` @@ -74,8 +72,6 @@ def get_request_resulting_size(context: Context, request_id: int): Parameters ---------- - context : Context - Context of the current http request request_id : int ID of the request @@ -94,13 +90,12 @@ def get_request_resulting_size(context: Context, request_id: int): log.info( "request with id '%s' could not be found", request_id, - extra={"rid": context.rid}, ) raise exc.RequestNotFound(request_id=request_id) @log_execution_time(log) -def get_request_uri(context: Context, request_id: int): +def get_request_uri(request_id: int): """ Realize the logic for the endpoint: @@ -126,7 +121,6 @@ def get_request_uri(context: Context, request_id: int): log.error( "request with id: '%s' was not found!", request_id, - extra={"rid": context.rid}, ) raise exc.RequestNotFound(request_id=request_id) from err if download_details is None: @@ -139,7 +133,6 @@ def get_request_uri(context: Context, request_id: int): " Request status is '%s'", request_id, request_status, - extra={"rid": context.rid}, ) raise exc.RequestStatusNotDone( request_id=request_id, request_status=request_status diff --git a/api/app/endpoint_handlers/user.py b/api/app/endpoint_handlers/user.py deleted file mode 100644 index 60a6c4a..0000000 --- a/api/app/endpoint_handlers/user.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Modules realizing logic for user-related endpoints""" -from typing import Optional - -from pydantic import BaseModel -from db.dbmanager.dbmanager import DBManager - -from ..auth import Context, assert_not_anonymous -from ..api_logging import get_dds_logger -from ..metrics import log_execution_time - -log = get_dds_logger(__name__) - - -class UserDTO(BaseModel): - """DTO class containing information about a user to store in the DB""" - - contact_name: str - user_id: Optional[str] = None - api_key: Optional[str] = None - roles: Optional[list[str]] = None - - -@log_execution_time(log) -@assert_not_anonymous -def add_user(context: Context, user: UserDTO): - """Add a user to the database - - Parameters - ---------- - context : Context - Context of the current http request - user: UserDTO - User to be added - - Returns - ------- - user_id : UUID - ID of the newly created user in the database - """ - # TODO: some admin priviliges check - return DBManager().add_user( - contact_name=user.contact_name, - user_id=user.user_id, - api_key=user.api_key, - roles_names=user.roles, - ) diff --git a/api/app/exceptions.py b/api/app/exceptions.py index a1d54f7..01de71c 100644 --- a/api/app/exceptions.py +++ b/api/app/exceptions.py @@ -125,9 +125,9 @@ class AuthorizationFailed(BaseDDSException): msg: str = "{user} is not authorized for the resource!" code: int = 403 - def __init__(self, user_id: Optional[str]): + def __init__(self, user_id: Optional[str] = None): if user_id is None: - self.msg = self.msg.format(user="Anonymous user") + self.msg = self.msg.format(user="User") else: self.msg = self.msg.format(user=f"User '{user_id}'") super().__init__(self.msg) @@ -167,3 +167,16 @@ def __init__(self, dataset_id: str, product_id: str): dataset_id=dataset_id, product_id=product_id ) super().__init__(self.msg) + + +class EmptyDatasetError(BaseDDSException): + """The size of the requested dataset is zero""" + + msg: str = "The resulting dataset '{dataset_id}.{product_id}' is empty" + + def __init__(self, dataset_id, product_id): + self.msg = self.msg.format( + dataset_id=dataset_id, + product_id=product_id, + ) + super().__init__(self.msg) diff --git a/api/app/main.py b/api/app/main.py index f1d1f2e..cc6a71c 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -1,37 +1,36 @@ """Main module with dekube-dds API endpoints defined""" __version__ = "2.0" import os -from uuid import uuid4 from typing import Optional -from fastapi import FastAPI, Header, HTTPException, Request +from fastapi import FastAPI, HTTPException, Request, status from fastapi.middleware.cors import CORSMiddleware +from starlette.middleware.authentication import AuthenticationMiddleware +from starlette.authentication import requires from aioprometheus import ( Counter, Summary, - Gauge, timer, - inprogress, - count_exceptions, MetricsMiddleware, ) from aioprometheus.asgi.starlette import metrics from geoquery.geoquery import GeoQuery +from geoquery.task import TaskList -from .auth.context import ContextCreator -from .api_logging import get_dds_logger -from . import exceptions as exc -from .endpoint_handlers import ( +from utils.api_logging import get_dds_logger +import exceptions as exc +from endpoint_handlers import ( dataset_handler, file_handler, request_handler, - user_handler, ) -from .endpoint_handlers.user import UserDTO -from .callbacks import all_onstartup_callbacks -from .encoders import extend_json_encoders +from auth.backend import DDSAuthenticationBackend +from callbacks import all_onstartup_callbacks +from encoders import extend_json_encoders +from const import venv, tags +from auth import scopes logger = get_dds_logger(__name__) @@ -50,14 +49,20 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0.html", }, - root_path=os.environ.get("ENDPOINT_PREFIX", "/api"), + root_path=os.environ.get(venv.ENDPOINT_PREFIX, "/api"), on_startup=all_onstartup_callbacks, ) +# ======== Authentication backend ========= # +app.add_middleware( + AuthenticationMiddleware, backend=DDSAuthenticationBackend() +) + # ======== CORS ========= # -if "ALLOWED_CORS_ORIGINS_REGEX" in os.environ: +cors_kwargs: dict[str, str | list[str]] +if venv.ALLOWED_CORS_ORIGINS_REGEX in os.environ: cors_kwargs = { - "allow_origin_regex": os.environ["ALLOWED_CORS_ORIGINS_REGEX"] + "allow_origin_regex": os.environ[venv.ALLOWED_CORS_ORIGINS_REGEX] } else: cors_kwargs = {"allow_origins": ["*"]} @@ -70,6 +75,7 @@ **cors_kwargs, ) + # ======== Prometheus metrics ========= # app.add_middleware(MetricsMiddleware) app.add_route("/metrics", metrics) @@ -80,76 +86,69 @@ app.state.api_http_requests_total = Counter( "api_http_requests_total", "Total number of requests" ) -app.state.api_exceptions_total = Counter( - "api_exceptions_total", "Total number of exception raised" -) -app.state.api_requests_inprogress_total = Gauge( - "api_requests_inprogress_total", "Endpoints being currently in progress" -) + # ======== Endpoints definitions ========= # -@app.get("/") +@app.get("/", tags=[tags.BASIC]) async def dds_info(): """Return current version of the DDS API""" return f"DDS API {__version__}" -@app.get("/datasets") +@app.get("/datasets", tags=[tags.DATASET]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /datasets"} ) -@inprogress( - app.state.api_requests_inprogress_total, labels={"route": "GET /datasets"} -) -@count_exceptions( - app.state.api_exceptions_total, labels={"route": "GET /datasets"} -) -async def get_datasets( - request: Request, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), -): +async def get_datasets(request: Request): """List all products eligible for a user defined by user_token""" app.state.api_http_requests_total.inc({"route": "GET /datasets"}) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token + return dataset_handler.get_datasets( + user_roles_names=request.auth.scopes ) - return dataset_handler.get_datasets(context) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.get("/datasets/{dataset_id}/{product_id}") +@app.get("/datasets/{dataset_id}", tags=[tags.DATASET]) @timer( app.state.api_request_duration_seconds, - labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, -) -@inprogress( - app.state.api_requests_inprogress_total, - labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, + labels={"route": "GET /datasets/{dataset_id}"}, ) -@count_exceptions( - app.state.api_exceptions_total, +async def get_first_product_details( + request: Request, + dataset_id: str, +): + """Get details for the 1st product of the dataset""" + app.state.api_http_requests_total.inc( + {"route": "GET /datasets/{dataset_id}"} + ) + try: + return dataset_handler.get_product_details( + user_roles_names=request.auth.scopes, + dataset_id=dataset_id, + ) + except exc.BaseDDSException as err: + raise err.wrap_around_http_exception() from err + + +@app.get("/datasets/{dataset_id}/{product_id}", tags=[tags.DATASET]) +@timer( + app.state.api_request_duration_seconds, labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, ) async def get_product_details( request: Request, dataset_id: str, product_id: str, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get details for the requested product if user is authorized""" app.state.api_http_requests_total.inc( {"route": "GET /datasets/{dataset_id}/{product_id}"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return dataset_handler.get_product_details( - context, + user_roles_names=request.auth.scopes, dataset_id=dataset_id, product_id=product_id, ) @@ -157,7 +156,7 @@ async def get_product_details( raise err.wrap_around_http_exception() from err -@app.get("/datasets/{dataset_id}/{product_id}/metadata") +@app.get("/datasets/{dataset_id}/{product_id}/metadata", tags=[tags.DATASET]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /datasets/{dataset_id}/{product_id}/metadata"}, @@ -166,44 +165,29 @@ async def get_metadata( request: Request, dataset_id: str, product_id: str, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get metadata of the given product""" app.state.api_http_requests_total.inc( {"route": "GET /datasets/{dataset_id}/{product_id}/metadata"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return dataset_handler.get_metadata( - context, dataset_id=dataset_id, product_id=product_id + dataset_id=dataset_id, product_id=product_id ) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.post("/datasets/{dataset_id}/{product_id}/estimate") +@app.post("/datasets/{dataset_id}/{product_id}/estimate", tags=[tags.DATASET]) @timer( app.state.api_request_duration_seconds, labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, ) -@inprogress( - app.state.api_requests_inprogress_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, -) -@count_exceptions( - app.state.api_exceptions_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, -) async def estimate( request: Request, dataset_id: str, product_id: str, query: GeoQuery, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), unit: str = None, ): """Estimate the resulting size of the query""" @@ -211,11 +195,7 @@ async def estimate( {"route": "POST /datasets/{dataset_id}/{product_id}/estimate"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return dataset_handler.estimate( - context, dataset_id=dataset_id, product_id=product_id, query=query, @@ -225,37 +205,25 @@ async def estimate( raise err.wrap_around_http_exception() from err -@app.post("/datasets/{dataset_id}/{product_id}/execute") +@app.post("/datasets/{dataset_id}/{product_id}/execute", tags=[tags.DATASET]) @timer( app.state.api_request_duration_seconds, labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, ) -@inprogress( - app.state.api_requests_inprogress_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, -) -@count_exceptions( - app.state.api_exceptions_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, -) +@requires([scopes.AUTHENTICATED]) async def query( request: Request, dataset_id: str, product_id: str, query: GeoQuery, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Schedule the job of data retrieve""" app.state.api_http_requests_total.inc( {"route": "POST /datasets/{dataset_id}/{product_id}/execute"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return dataset_handler.query( - context, + user_id=request.user.id, dataset_id=dataset_id, product_id=product_id, query=query, @@ -264,152 +232,126 @@ async def query( raise err.wrap_around_http_exception() from err -@app.get("/requests") +@app.post("/datasets/workflow", tags=[tags.DATASET]) +@timer( + app.state.api_request_duration_seconds, + labels={"route": "POST /datasets/workflow"}, +) +@requires([scopes.AUTHENTICATED]) +async def workflow( + request: Request, + tasks: TaskList, +): + """Schedule the job of workflow processing""" + app.state.api_http_requests_total.inc({"route": "POST /datasets/workflow"}) + try: + return dataset_handler.run_workflow( + user_id=request.user.id, + workflow=tasks, + ) + except exc.BaseDDSException as err: + raise err.wrap_around_http_exception() from err + + +@app.get("/requests", tags=[tags.REQUEST]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /requests"} ) +@requires([scopes.AUTHENTICATED]) async def get_requests( request: Request, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get all requests for the user""" app.state.api_http_requests_total.inc({"route": "GET /requests"}) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) - return request_handler.get_requests(context) + return request_handler.get_requests(request.user.id) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.get("/requests/{request_id}/status") +@app.get("/requests/{request_id}/status", tags=[tags.REQUEST]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /requests/{request_id}/status"}, ) +@requires([scopes.AUTHENTICATED]) async def get_request_status( request: Request, request_id: int, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get status of the request without authentication""" - # NOTE: no auth required for checking status app.state.api_http_requests_total.inc( {"route": "GET /requests/{request_id}/status"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return request_handler.get_request_status( - context, request_id=request_id + user_id=request.user.id, request_id=request_id ) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.get("/requests/{request_id}/size") +@app.get("/requests/{request_id}/size", tags=[tags.REQUEST]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /requests/{request_id}/size"}, ) +@requires([scopes.AUTHENTICATED]) async def get_request_resulting_size( request: Request, request_id: int, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get size of the file being the result of the request""" app.state.api_http_requests_total.inc( {"route": "GET /requests/{request_id}/size"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) return request_handler.get_request_resulting_size( - context, request_id=request_id + request_id=request_id ) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.get("/requests/{request_id}/uri") +@app.get("/requests/{request_id}/uri", tags=[tags.REQUEST]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /requests/{request_id}/uri"}, ) +@requires([scopes.AUTHENTICATED]) async def get_request_uri( request: Request, request_id: int, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Get download URI for the request""" app.state.api_http_requests_total.inc( {"route": "GET /requests/{request_id}/uri"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) - return request_handler.get_request_uri(context, request_id=request_id) + return request_handler.get_request_uri(request_id=request_id) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err -@app.get("/download/{request_id}") +@app.get("/download/{request_id}", tags=[tags.REQUEST]) @timer( app.state.api_request_duration_seconds, labels={"route": "GET /download/{request_id}"}, ) +# @requires([scopes.AUTHENTICATED]) # TODO: mange download auth in the web component async def download_request_result( request: Request, request_id: int, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), ): """Download result of the request""" app.state.api_http_requests_total.inc( {"route": "GET /download/{request_id}"} ) try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) - return file_handler.download_request_result( - context, request_id=request_id - ) + return file_handler.download_request_result(request_id=request_id) except exc.BaseDDSException as err: raise err.wrap_around_http_exception() from err except FileNotFoundError as err: raise HTTPException( - status_code=404, detail="File was not found!" + status_code=status.HTTP_404_NOT_FOUND, detail="File was not found!" ) from err - - -@app.post("/users/add") -@timer( - app.state.api_request_duration_seconds, - labels={"route": "POST /users/add/"}, -) -async def add_user( - request: Request, - user: UserDTO, - dds_request_id: str = Header(str(uuid4()), convert_underscores=True), - user_token: Optional[str] = Header(None, convert_underscores=True), -): - """Add user to the database""" - app.state.api_http_requests_total.inc({"route": "POST /users/add/"}) - try: - context = ContextCreator.new_context( - request, rid=dds_request_id, user_token=user_token - ) - return user_handler.add_user(context, user) - except exc.BaseDDSException as err: - raise err.wrap_around_http_exception() from err - except Exception as err: - raise HTTPException(status_code=400, detail=str(err)) from err diff --git a/api/app/validation.py b/api/app/validation.py index 13faec4..51bdbc1 100644 --- a/api/app/validation.py +++ b/api/app/validation.py @@ -1,10 +1,10 @@ -from .api_logging import get_dds_logger -from .decorators_factory import assert_parameters_are_defined, bind_arguments +from datastore.datastore import Datastore +from utils.api_logging import get_dds_logger +from decorators_factory import assert_parameters_are_defined, bind_arguments from functools import wraps from inspect import signature -from . import exceptions as exc +import exceptions as exc -from .datastore.datastore import Datastore log = get_dds_logger(__name__) @@ -24,7 +24,10 @@ def assert_inner(*args, **kwargs): product_id = args_dict["product_id"] if dataset_id not in Datastore().dataset_list(): raise exc.MissingDatasetError(dataset_id=dataset_id) - elif product_id not in Datastore().product_list(dataset_id): + elif ( + product_id is not None + and product_id not in Datastore().product_list(dataset_id) + ): raise exc.MissingProductError( dataset_id=dataset_id, product_id=product_id ) diff --git a/api/requirements.txt b/api/requirements.txt index 0a12456..97fcaf3 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,8 +1,5 @@ fastapi -pydantic uvicorn pika -intake sqlalchemy -pytest -aioprometheus \ No newline at end of file +aioprometheus diff --git a/backup/Dockerfile b/backup/Dockerfile deleted file mode 100644 index 00b6af1..0000000 --- a/backup/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM postgres:14.1 -RUN mkdir /scripts -COPY do_backup.sh /scripts/do_backup.sh \ No newline at end of file diff --git a/backup/do_backup.sh b/backup/do_backup.sh deleted file mode 100644 index 004228e..0000000 --- a/backup/do_backup.sh +++ /dev/null @@ -1,9 +0,0 @@ -set -x -# -# backup geokube-dds DB -# -echo "[$(date)] [info] Start geokube-dds DB backup" -TIMESTAMP=$(date +%Y%m%d-%H%M%S) -FILENAME=/snapshots/${POSTGRES_DB}-${TIMESTAMP}.bak -echo "[$(date)] [info] Dumping geokube-dds DB in ${FILENAME}" -PGPASSWORD=${POSTGRES_PASSWORD} pg_dump -U ${POSTGRES_USER} -h ${POSTGRES_HOST} -Fc -f ${FILENAME} ${POSTGRES_DB} \ No newline at end of file diff --git a/datastore/Dockerfile b/datastore/Dockerfile new file mode 100644 index 0000000..78056e8 --- /dev/null +++ b/datastore/Dockerfile @@ -0,0 +1,16 @@ +ARG REGISTRY=rg.fr-par.scw.cloud/geolake +ARG TAG=latest +FROM $REGISTRY/geolake-drivers:$TAG +RUN conda install -c conda-forge --yes --freeze-installed psycopg2 \ + && conda clean -afy +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt +COPY ./datastore /app/datastore +COPY ./workflow /app/workflow +COPY ./dbmanager /app/dbmanager +COPY ./geoquery /app/geoquery +COPY ./utils /app/utils +COPY ./tests /app/tests +RUN cp /opt/conda/lib/python3.10/site-packages/intake_geokube/geoquery.py /app/geoquery/geoquery.py +COPY ./wait-for-it.sh / + diff --git a/db/dbmanager/__init__.py b/datastore/datastore/__init__.py similarity index 100% rename from db/dbmanager/__init__.py rename to datastore/datastore/__init__.py diff --git a/datastore/datastore/const.py b/datastore/datastore/const.py new file mode 100644 index 0000000..22435bc --- /dev/null +++ b/datastore/datastore/const.py @@ -0,0 +1,6 @@ +"""This module contains useful constants definitions grouped into classes""" + + +class BaseRole: + PUBLIC = "public" + ADMIN = "admin" diff --git a/datastore/datastore.py b/datastore/datastore/datastore.py similarity index 58% rename from datastore/datastore.py rename to datastore/datastore/datastore.py index d55f5a7..20a5f27 100644 --- a/datastore/datastore.py +++ b/datastore/datastore/datastore.py @@ -6,6 +6,7 @@ import json import intake +from dask.delayed import Delayed from geoquery.geoquery import GeoQuery @@ -14,6 +15,10 @@ from .singleton import Singleton from .util import log_execution_time +from .const import BaseRole +from .exception import UnauthorizedError + +DEFAULT_MAX_REQUEST_SIZE_GB = 10 class Datastore(metaclass=Singleton): @@ -21,7 +26,7 @@ class Datastore(metaclass=Singleton): _LOG = logging.getLogger("geokube.Datastore") - def __init__(self, cache_path: str = "./") -> None: + def __init__(self) -> None: if "CATALOG_PATH" not in os.environ: self._LOG.error( "missing required environment variable: 'CATALOG_PATH'" @@ -29,17 +34,27 @@ def __init__(self, cache_path: str = "./") -> None: raise KeyError( "Missing required environment variable: 'CATALOG_PATH'" ) - cat = intake.open_catalog(os.environ["CATALOG_PATH"]) - # self.catalog = cat(CACHE_DIR=cache_path) - self.catalog = cat + if "CACHE_PATH" not in os.environ: + self._LOG.error( + "'CACHE_PATH' environment variable was not set. catalog will" + " not be opened!" + ) + raise RuntimeError( + "'CACHE_PATH' environment variable was not set. catalog will" + " not be opened!" + ) + self.catalog = intake.open_catalog(os.environ["CATALOG_PATH"]) + self.cache_dir = os.environ["CACHE_PATH"] + self._LOG.info("cache dir set to %s", self.cache_dir) self.cache = None @log_execution_time(_LOG) - def get_cached_product( + def get_cached_product_or_read( self, dataset_id: str, product_id: str ) -> DataCube | Dataset: - """Get product from the cache rather than directly loading from - the catalog. If might be `geokube.DataCube` or `geokube.Dataset`. + """Get product from the cache instead of loading files indicated in + the catalog if `metadata_caching` set to `True`. + If might return `geokube.DataCube` or `geokube.Dataset`. Parameters ------- @@ -51,7 +66,6 @@ def get_cached_product( Returns ------- kube : DataCube or Dataset - Data stored in the cache (either `geokube.DataCube` or `geokube.Dataset`) """ if self.cache is None: self._load_cache() @@ -65,7 +79,7 @@ def get_cached_product( dataset_id, product_id, ) - self.cache[dataset_id][product_id] = self.catalog[dataset_id][ + return self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][ product_id ].read_chunked() return self.cache[dataset_id][product_id] @@ -83,10 +97,20 @@ def _load_cache(self): ) self.cache[dataset_id] = {} for product_id in self.product_list(dataset_id): + catalog_entry = self.catalog(CACHE_DIR=self.cache_dir)[ + dataset_id + ][product_id] + if not catalog_entry.metadata_caching: + self._LOG.info( + "`metadata_caching` for product %s.%s set to `False`", + dataset_id, + product_id, + ) + continue try: - self.cache[dataset_id][product_id] = self.catalog[ - dataset_id - ][product_id].read_chunked() + self.cache[dataset_id][ + product_id + ] = catalog_entry.read_chunked() except ValueError: self._LOG.error( "failed to load cache for `%s.%s`", @@ -105,7 +129,7 @@ def dataset_list(self) -> list: datasets : list List of datasets present in the catalog """ - datasets = set(self.catalog) + datasets = set(self.catalog(CACHE_DIR=self.cache_dir)) datasets -= { "medsea-rea-e3r1", } @@ -128,7 +152,7 @@ def product_list(self, dataset_id: str): products : list List of products for the dataset """ - return list(self.catalog[dataset_id]) + return list(self.catalog(CACHE_DIR=self.cache_dir)[dataset_id]) @log_execution_time(_LOG) def dataset_info(self, dataset_id: str): @@ -146,15 +170,17 @@ def dataset_info(self, dataset_id: str): Dict of short information about the dataset """ info = {} - entry = self.catalog[dataset_id] + entry = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id] if entry.metadata: info["metadata"] = entry.metadata info["metadata"]["id"] = dataset_id info["products"] = {} - for product_id in self.catalog[dataset_id]: - entry = self.catalog[dataset_id][product_id] - info["products"][product_id] = entry.metadata - info["products"][product_id]["description"] = entry.description + for product_id in entry: + prod_entry = entry[product_id] + info["products"][product_id] = prod_entry.metadata + info["products"][product_id][ + "description" + ] = prod_entry.description return info @log_execution_time(_LOG) @@ -173,11 +199,70 @@ def product_metadata(self, dataset_id: str, product_id: str): metadata : dict DatasetMetadata of the product """ - return self.catalog[dataset_id][product_id].metadata + return self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][ + product_id + ].metadata + + @log_execution_time(_LOG) + def first_eligible_product_details( + self, + dataset_id: str, + role: str | list[str] | None = None, + use_cache: bool = False, + ): + """Get details for the 1st product of the dataset eligible for the `role`. + If `role` is `None`, the `public` role is considered. + + Parameters + ---------- + dataset_id : str + ID of the dataset + role : optional str or list of str, default=`None` + Role code for which the 1st eligible product of a dataset + should be selected + use_cache : bool, optional, default=False + Data will be loaded from cache if set to `True` or directly + from the catalog otherwise + + Returns + ------- + details : dict + Details of the product + + Raises + ------ + UnauthorizedError + if none of product of the requested dataset is eligible for a role + """ + info = {} + product_ids = self.product_list(dataset_id) + for prod_id in product_ids: + if not self.is_product_valid_for_role( + dataset_id, prod_id, role=role + ): + continue + entry = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][prod_id] + if entry.metadata: + info["metadata"] = entry.metadata + info["description"] = entry.description + info["id"] = prod_id + info["dataset"] = self.dataset_info(dataset_id=dataset_id) + if use_cache: + info["data"] = self.get_cached_product_or_read( + dataset_id, prod_id + ).to_dict() + else: + info["data"] = entry.read_chunked().to_dict() + return info + raise UnauthorizedError() @log_execution_time(_LOG) def product_details( - self, dataset_id: str, product_id: str, use_cache: bool = False + self, + dataset_id: str, + product_id: str, + role: str | list[str] | None = None, + use_cache: bool = False, ): """Get details for the single product @@ -187,48 +272,54 @@ def product_details( ID of the dataset product_id : str ID of the product + role : optional str or list of str, default=`None` + Role code for which the the product is requested. use_cache : bool, optional, default=False Data will be loaded from cache if set to `True` or directly from the catalog otherwise - Returns ------- details : dict Details of the product + + Raises + ------ + UnauthorizedError + if the requested product is not eligible for a role """ info = {} - entry = self.catalog[dataset_id][product_id] + if not self.is_product_valid_for_role( + dataset_id, product_id, role=role + ): + raise UnauthorizedError() + entry = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][product_id] if entry.metadata: info["metadata"] = entry.metadata info["description"] = entry.description info["id"] = product_id info["dataset"] = self.dataset_info(dataset_id=dataset_id) if use_cache: - info["data"] = self.get_cached_product( + info["data"] = self.get_cached_product_or_read( dataset_id, product_id ).to_dict() else: - info["data"] = ( - self.catalog[dataset_id][product_id].read_chunked().to_dict() - ) + info["data"] = entry.read_chunked().to_dict() return info def product_info( self, dataset_id: str, product_id: str, use_cache: bool = False ): info = {} - entry = self.catalog[dataset_id][product_id] + entry = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][product_id] if entry.metadata: info["metadata"] = entry.metadata if use_cache: - info["data"] = self.get_cached_product( + info["data"] = self.get_cached_product_or_read( dataset_id, product_id ).to_dict() else: - info["data"] = ( - self.catalog[dataset_id][product_id].read_chunked().to_dict() - ) + info["data"] = entry.read_chunked().to_dict() return info @log_execution_time(_LOG) @@ -247,7 +338,7 @@ def query( ID of the dataset product_id : str ID of the product - query : GeoQuery or dict or str + query : GeoQuery or dict or str or bytes or bytearray Query to be executed for the given product compute : bool, optional, default=False If True, resulting data of DataCube will be computed, otherwise @@ -259,13 +350,15 @@ def query( DataCube processed according to `query` """ self._LOG.debug("query: %s", query) - query = Datastore._maybe_convert_to_geoquery(query) - self._LOG.debug("processing GeoQuery: %s", query) + geoquery: GeoQuery = GeoQuery.parse(query) + self._LOG.debug("processing GeoQuery: %s", geoquery) # NOTE: we always use catalog directly and single product cache self._LOG.debug("loading product...") - kube = self.catalog[dataset_id][product_id].read_chunked() + kube = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][ + product_id + ].read_chunked() self._LOG.debug("original kube len: %s", len(kube)) - return Datastore._process_query(kube, query, compute) + return Datastore._process_query(kube, geoquery, compute) @log_execution_time(_LOG) def estimate( @@ -291,24 +384,36 @@ def estimate( Number of bytes of the estimated kube """ self._LOG.debug("query: %s", query) - query = Datastore._maybe_convert_to_geoquery(query) - self._LOG.debug("processing GeoQuery: %s", query) + geoquery: GeoQuery = GeoQuery.parse(query) + self._LOG.debug("processing GeoQuery: %s", geoquery) # NOTE: we always use catalog directly and single product cache self._LOG.debug("loading product...") # NOTE: for estimation we use cached products - kube = self.get_cached_product(dataset_id, product_id) + kube = self.get_cached_product_or_read(dataset_id, product_id) self._LOG.debug("original kube len: %s", len(kube)) - return Datastore._process_query(kube, query, False).nbytes + return Datastore._process_query(kube, geoquery, False).nbytes - @staticmethod - def _maybe_convert_to_geoquery(query: GeoQuery | dict | str): - if isinstance(query, str): - Datastore._LOG.debug("converting query: str -> dict...") - query = json.loads(query) - if isinstance(query, dict): - Datastore._LOG.debug("converting query: dict -> GeoQuery...") - query = GeoQuery(**query) - return query + @log_execution_time(_LOG) + def is_product_valid_for_role( + self, + dataset_id: str, + product_id: str, + role: str | list[str] | None = None, + ): + entry = self.catalog(CACHE_DIR=self.cache_dir)[dataset_id][product_id] + product_role = BaseRole.PUBLIC + if entry.metadata: + product_role = entry.metadata.get("role", BaseRole.PUBLIC) + if product_role == BaseRole.PUBLIC: + return True + if not role: + # NOTE: it means, we consider the public profile + return False + if BaseRole.ADMIN in role: + return True + if product_role in role: + return True + return False @staticmethod def _process_query(kube, query: GeoQuery, compute: None | bool = False): @@ -316,6 +421,8 @@ def _process_query(kube, query: GeoQuery, compute: None | bool = False): Datastore._LOG.debug("filtering with: %s", query.filters) kube = kube.filter(**query.filters) Datastore._LOG.debug("resulting kube len: %s", len(kube)) + if isinstance(kube, Delayed) and compute: + kube = kube.compute() if query.variable: Datastore._LOG.debug("selecting fields...") kube = kube[query.variable] @@ -346,10 +453,7 @@ def _process_query(kube, query: GeoQuery, compute: None | bool = False): else: method = "nearest" kube = kube.sel(vertical=vertical, method=method) - if compute: - Datastore._LOG.debug("computing...") - kube.compute() - return kube + return kube.compute() if compute else kube @staticmethod def _maybe_convert_dict_slice_to_slice(dict_vals): diff --git a/datastore/datastore/exception.py b/datastore/datastore/exception.py new file mode 100644 index 0000000..d048e83 --- /dev/null +++ b/datastore/datastore/exception.py @@ -0,0 +1,5 @@ +"""Module with exceptions definitions""" + + +class UnauthorizedError(ValueError): + """Role is not authorized""" diff --git a/datastore/singleton.py b/datastore/datastore/singleton.py similarity index 92% rename from datastore/singleton.py rename to datastore/datastore/singleton.py index 7916a74..ff6ef01 100644 --- a/datastore/singleton.py +++ b/datastore/datastore/singleton.py @@ -7,12 +7,13 @@ import os import logging from threading import Lock +from typing import Any, Type class Singleton(type): """Thread-safe implementation of the singleton design pattern metaclass""" - _instances = {} + _instances: dict[Type, Any] = {} _lock: Lock = Lock() def __call__(cls, *args, **kwargs): diff --git a/datastore/util.py b/datastore/datastore/util.py similarity index 100% rename from datastore/util.py rename to datastore/datastore/util.py diff --git a/geoquery/__init__.py b/datastore/dbmanager/__init__.py similarity index 100% rename from geoquery/__init__.py rename to datastore/dbmanager/__init__.py diff --git a/db/dbmanager/dbmanager.py b/datastore/dbmanager/dbmanager.py similarity index 97% rename from db/dbmanager/dbmanager.py rename to datastore/dbmanager/dbmanager.py index b049b97..b11c46c 100644 --- a/db/dbmanager/dbmanager.py +++ b/datastore/dbmanager/dbmanager.py @@ -86,7 +86,7 @@ class User(Base): ) contact_name = Column(String(255)) requests = relationship("Request") - roles = relationship("Role", secondary=association_table) + roles = relationship("Role", secondary=association_table, lazy="selectin") class Worker(Base): @@ -148,7 +148,7 @@ def __init__(self) -> None: "POSTGRES_DB", "POSTGRES_USER", "POSTGRES_PASSWORD", - "POSTGRES_PORT", + "DB_SERVICE_PORT", ]: self._LOG.info( "attempt to load data from environment variable: `%s`", @@ -164,13 +164,14 @@ def __init__(self) -> None: user = os.environ["POSTGRES_USER"] password = os.environ["POSTGRES_PASSWORD"] - host = os.environ["POSTGRES_HOST"] - port = os.environ["POSTGRES_PORT"] + host = os.environ["DB_SERVICE_HOST"] + port = os.environ["DB_SERVICE_PORT"] database = os.environ["POSTGRES_DB"] url = f"postgresql://{user}:{password}@{host}:{port}/{database}" + self._LOG.info("db connection: `%s`", url) self.__engine = create_engine( - url, echo=is_true(os.environ.get("ECHO_DB", False)) + url, echo=is_true(os.environ.get("DB_LOGGING", False)) ) self.__session_maker = sessionmaker(bind=self.__engine) diff --git a/db/dbmanager/singleton.py b/datastore/dbmanager/singleton.py similarity index 100% rename from db/dbmanager/singleton.py rename to datastore/dbmanager/singleton.py diff --git a/geoquery/tests/__init__.py b/datastore/geoquery/__init__.py similarity index 100% rename from geoquery/tests/__init__.py rename to datastore/geoquery/__init__.py diff --git a/geoquery/geoquery.py b/datastore/geoquery/geoquery.py similarity index 76% rename from geoquery/geoquery.py rename to datastore/geoquery/geoquery.py index a0b888b..8446660 100644 --- a/geoquery/geoquery.py +++ b/datastore/geoquery/geoquery.py @@ -1,8 +1,10 @@ import json -from typing import Optional, List, Dict, Union, Mapping, Any +from typing import Optional, List, Dict, Union, Mapping, Any, TypeVar from pydantic import BaseModel, root_validator, validator +TGeoQuery = TypeVar("TGeoQuery") + class GeoQuery(BaseModel, extra="allow"): variable: Optional[Union[str, List[str]]] @@ -51,3 +53,20 @@ def original_query_json(self): # shorter and more elegant res = dict(filter(lambda item: item[1] is not None, res.items())) return json.dumps(res) + + @classmethod + def parse( + cls, load: TGeoQuery | dict | str | bytes | bytearray + ) -> TGeoQuery: + if isinstance(load, cls): + return load + if isinstance(load, (str, bytes, bytearray)): + load = json.loads(load) + if isinstance(load, dict): + load = GeoQuery(**load) + else: + raise TypeError( + f"type of the `load` argument ({type(load).__name__}) is not" + " supported!" + ) + return load diff --git a/datastore/geoquery/task.py b/datastore/geoquery/task.py new file mode 100644 index 0000000..4b9a3d8 --- /dev/null +++ b/datastore/geoquery/task.py @@ -0,0 +1,62 @@ +import json +from collections import Counter +from typing import Any, Optional, TypeVar + +from pydantic import BaseModel, Field, validator + +TWorkflow = TypeVar("TWorkflow") + + +class Task(BaseModel): + id: str | int + op: str + use: Optional[list[str | int]] = Field(default_factory=list) + args: Optional[dict[str, Any]] = Field(default_factory=dict) + + @validator("use", pre=True, always=True, each_item=False) + def match_use(cls, v): + if v is None: + return [] + return v + + +class TaskList(BaseModel): + tasks: list[Task] + + @validator("tasks") + def match_unique_ids(cls, items): + for id_value, id_count in Counter([item.id for item in items]).items(): + if id_count != 1: + raise ValueError(f"duplicated key found: `{id_value}`") + return items + + @classmethod + def parse( + cls, + workflow: TWorkflow | dict | list[dict] | str | bytes | bytearray, + ) -> TWorkflow: + if isinstance(workflow, cls): + return workflow + if isinstance(workflow, (str | bytes | bytearray)): + workflow = json.loads(workflow) + if isinstance(workflow, list): + return cls(tasks=workflow) + elif isinstance(workflow, dict): + return cls(**workflow) + else: + raise TypeError( + f"`workflow` argument of type `{type(workflow).__name__}`" + " cannot be safetly parsed to the `Workflow`" + ) + + @property + def dataset_id(self): + for task in self.tasks: + if task.op == "subset": + return task.args.get("dataset_id", "") + + @property + def product_id(self): + for task in self.tasks: + if task.op == "subset": + return task.args.get("product_id", "") diff --git a/web/app/__init__.py b/datastore/geoquery/tests/__init__.py similarity index 100% rename from web/app/__init__.py rename to datastore/geoquery/tests/__init__.py diff --git a/geoquery/tests/test_geoquery.py b/datastore/geoquery/tests/test_geoquery.py similarity index 100% rename from geoquery/tests/test_geoquery.py rename to datastore/geoquery/tests/test_geoquery.py diff --git a/datastore/requirements.txt b/datastore/requirements.txt new file mode 100644 index 0000000..449eb47 --- /dev/null +++ b/datastore/requirements.txt @@ -0,0 +1,2 @@ +networkx +pydantic<2.0.0 \ No newline at end of file diff --git a/web/tests/__init__.py b/datastore/tests/__init__.py similarity index 100% rename from web/tests/__init__.py rename to datastore/tests/__init__.py diff --git a/datastore/tests/workflow/__init__.py b/datastore/tests/workflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datastore/tests/workflow/fixtures.py b/datastore/tests/workflow/fixtures.py new file mode 100644 index 0000000..8ce94ad --- /dev/null +++ b/datastore/tests/workflow/fixtures.py @@ -0,0 +1,122 @@ +import pytest + + +@pytest.fixture +def subset_query() -> str: + yield """ + { + "dataset_id": "era5-single-levels", + "product_id": "reanalysis", + "query": { + "area": { + "north": -85, + "south": -90, + "east": 260, + "west": 240 + }, + "time": { + "hour": [ + "15" + ], + "year": [ + "1981", + "1985", + "2022" + ], + "month": [ + "3", + "6" + ], + "day": [ + "23", + "27" + ] + }, + "variable": [ + "2_metre_dewpoint_temperature", + "surface_net_downward_shortwave_flux" + ] + } + } + """ + + +@pytest.fixture +def resample_query(): + yield """ + { + "freq": "1D", + "operator": "nanmax", + "resample_args": { + "closed": "right" + } + } + """ + + +@pytest.fixture +def workflow_str(): + yield """ + [ + { + "id": "subset1", + "op": "subset", + "args": { + "dataset_id": "era5-single-levels", + "product_id": "reanalysis", + "query": { + "area": { + "north": -85, + "south": -90, + "east": 260, + "west": 240 + } + } + } + }, + { + "id": "resample1", + "use": ["subset1"], + "op": "resample", + "args": + { + "freq": "1D", + "operator": "nanmax" + } + } + ] + """ + + +@pytest.fixture +def bad_workflow_str(): + yield """ + [ + { + "id": "subset1", + "op": "subset", + "args": { + "dataset_id": "era5-single-levels", + "product_id": "reanalysis", + "query": { + "area": { + "north": -85, + "south": -90, + "east": 260, + "west": 240 + } + } + } + }, + { + "id": "resample1", + "use": ["subset1", "subset2"], + "op": "resample", + "args": + { + "freq": "1D", + "operator": "nanmax" + } + } + ] + """ diff --git a/datastore/tests/workflow/test_operators.py b/datastore/tests/workflow/test_operators.py new file mode 100644 index 0000000..46cf109 --- /dev/null +++ b/datastore/tests/workflow/test_operators.py @@ -0,0 +1,20 @@ +from workflow import operators as op + +from .fixtures import subset_query, resample_query + + +def test_create_subset_operator_with_str_args(subset_query): + sub_op = op.Operator("subset", subset_query) + assert isinstance(sub_op, op.Subset) + assert isinstance(sub_op.args, op.SubsetArgs) + assert sub_op.args.dataset_id == "era5-single-levels" + assert sub_op.args.product_id == "reanalysis" + + +def test_create_resample_operator_with_str_args(resample_query): + res_op = op.Operator("resample", resample_query) + assert isinstance(res_op, op.Resample) + assert isinstance(res_op.args, op.ResampleArgs) + assert res_op.args.freq == "1D" + assert res_op.args.operator == "nanmax" + assert res_op.args.resample_args == {"closed": "right"} diff --git a/datastore/tests/workflow/test_workflow.py b/datastore/tests/workflow/test_workflow.py new file mode 100644 index 0000000..7036b73 --- /dev/null +++ b/datastore/tests/workflow/test_workflow.py @@ -0,0 +1,23 @@ +import pytest +from workflow.workflow import Workflow + +from .fixtures import workflow_str, bad_workflow_str + + +def test_create_workflow(workflow_str): + comp_graph = Workflow(workflow_str) + assert len(comp_graph) == 2 + task_iter = comp_graph.traverse() + node1, precedint1 = next(task_iter) + assert precedint1 == tuple() + assert node1.operator.name == "subset" + + node2, precedint2 = next(task_iter) + assert len(precedint2) == 1 + assert node2.operator.name == "resample" + assert precedint2[0].operator.name == "subset" + + +def test_fail_when_task_not_defined(bad_workflow_str): + with pytest.raises(ValueError, match=r"task with id*"): + _ = Workflow(bad_workflow_str) diff --git a/datastore/utils/__init__.py b/datastore/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/app/api_logging.py b/datastore/utils/api_logging.py similarity index 73% rename from api/app/api_logging.py rename to datastore/utils/api_logging.py index f68804f..58d148d 100644 --- a/api/app/api_logging.py +++ b/datastore/utils/api_logging.py @@ -3,17 +3,6 @@ import logging as default_logging -class UnknownRIDFilter(default_logging.Filter): - """Logging filter which passes default value `rid`. - It can be replaced by `defaults` paramter of `logging.Formatter` - in Python 3.10.""" - - def filter(self, record): - if not hasattr(record, "rid"): - record.rid = "N/A" - return True - - def get_dds_logger( name: str, level: Literal["debug", "info", "warning", "error", "critical"] = "info", @@ -39,7 +28,7 @@ def get_dds_logger( log = default_logging.getLogger(name) format_ = os.environ.get( "LOGGING_FORMAT", - "%(asctime)s %(name)s %(levelname)s %(rid)s %(message)s", + "%(asctime)s %(name)s %(levelname)s %(message)s", ) formatter = default_logging.Formatter(format_) logging_level = os.environ.get("LOGGING_LEVEL", level.upper()) @@ -48,5 +37,4 @@ def get_dds_logger( stream_handler.setFormatter(formatter) stream_handler.setLevel(logging_level) log.addHandler(stream_handler) - log.addFilter(UnknownRIDFilter()) return log diff --git a/api/app/metrics.py b/datastore/utils/metrics.py similarity index 100% rename from api/app/metrics.py rename to datastore/utils/metrics.py diff --git a/utils/wait-for-it.sh b/datastore/wait-for-it.sh similarity index 100% rename from utils/wait-for-it.sh rename to datastore/wait-for-it.sh diff --git a/datastore/workflow/__init__.py b/datastore/workflow/__init__.py new file mode 100644 index 0000000..9c75326 --- /dev/null +++ b/datastore/workflow/__init__.py @@ -0,0 +1 @@ +from workflow.workflow import Workflow diff --git a/datastore/workflow/workflow.py b/datastore/workflow/workflow.py new file mode 100644 index 0000000..e609a77 --- /dev/null +++ b/datastore/workflow/workflow.py @@ -0,0 +1,223 @@ +import json +from typing import Generator, Hashable, Callable, Literal, Any +from functools import partial +import logging + +import networkx as nx +from geokube.core.datacube import DataCube +from geoquery.geoquery import GeoQuery +from geoquery.task import TaskList +from datastore.datastore import Datastore + +AggregationFunctionName = ( + Literal["max"] + | Literal["nanmax"] + | Literal["min"] + | Literal["nanmin"] + | Literal["mean"] + | Literal["nanmean"] + | Literal["sum"] + | Literal["nansum"] +) + + +_LOG = logging.getLogger("geokube.workflow") + +TASK_ATTRIBUTE = "task" + + +class _WorkflowTask: + __slots__ = ("id", "dependencies", "operator") + + id: Hashable + dependencies: list[Hashable] | None + operator: Callable[..., DataCube] + + def __init__( + self, + id: Hashable, + operator: Callable[..., DataCube], + dependencies: list[Hashable] | None = None, + ) -> None: + self.operator = operator + self.id = id + if dependencies is None: + dependencies = [] + self.dependencies = dependencies + + def compute(self, kube: DataCube | None) -> DataCube: + return self.operator(kube) + + +class Workflow: + __slots__ = ("graph", "present_nodes_ids", "is_verified") + + graph: nx.DiGraph + present_nodes_ids: set[Hashable] + is_verified: bool + + def __init__(self) -> None: + self.graph = nx.DiGraph() + self.present_nodes_ids = set() + self.is_verified = False + + @classmethod + def from_tasklist(cls, task_list: TaskList) -> "Workflow": + workflow = cls() + for task in task_list.tasks: + match task.op: + case "subset": + workflow.subset(task.id, **task.args) + case "resample": + workflow.resample( + task.id, dependencies=task.use, **task.args + ) + case "average": + workflow.average( + task.id, dependencies=task.use, **task.args + ) + case "to_regular": + workflow.to_regular( + task.id, dependencies=task.use, **task.args + ) + case _: + raise ValueError( + f"task operator: {task.op} is not defined" + ) + return workflow + + def _add_computational_node(self, task: _WorkflowTask): + node_id = task.id + assert ( + node_id not in self.present_nodes_ids + ), "worflow task IDs need to be unique!" + self.present_nodes_ids.add(node_id) + self.graph.add_node(node_id, **{TASK_ATTRIBUTE: task}) + for dependend_node in task.dependencies: + self.graph.add_edge(dependend_node, node_id) + self.is_verified = False + + def subset( + self, + id: Hashable, + dataset_id: str, + product_id: str, + query: GeoQuery | dict, + ) -> "Workflow": + def _subset(kube: DataCube | None = None) -> DataCube: + return Datastore().query( + dataset_id=dataset_id, + product_id=product_id, + query=query + if isinstance(query, GeoQuery) + else GeoQuery(**query), + compute=False, + ) + + task = _WorkflowTask(id=id, operator=_subset) + self._add_computational_node(task) + return self + + def resample( + self, + id: Hashable, + freq: str, + agg: Callable[..., DataCube] | AggregationFunctionName, + resample_kwargs: dict[str, Any] | None, + *, + dependencies: list[Hashable], + ) -> "Workflow": + def _resample(kube: DataCube | None = None) -> DataCube: + assert kube is not None, "`kube` cannot be `None` for resampling" + return kube.resample( + operator=agg, + frequency=freq, + **resample_kwargs, + ) + + task = _WorkflowTask( + id=id, operator=_resample, dependencies=dependencies + ) + self._add_computational_node(task) + return self + + def average( + self, id: Hashable, dim: str, *, dependencies: list[Hashable] + ) -> "Workflow": + def _average(kube: DataCube | None = None) -> DataCube: + assert kube is not None, "`kube` cannot be `None` for averaging" + return kube.average(dim=dim) + + task = _WorkflowTask( + id=id, operator=_average, dependencies=dependencies + ) + self._add_computational_node(task) + return self + + def to_regular( + self, id: Hashable, *, dependencies: list[Hashable] + ) -> "Workflow": + def _to_regular(kube: DataCube | None = None) -> DataCube: + assert kube is not None, "`kube` cannot be `None` for `to_regular``" + return kube.to_regular() + task = _WorkflowTask( + id=id, operator=_to_regular, dependencies=dependencies + ) + self._add_computational_node(task) + return self + + def add_task( + self, + id: Hashable, + func: Callable[..., DataCube], + dependencies: list[str] | None = None, + **func_kwargs, + ) -> "Workflow": + task = _WorkflowTask( + id=id, + operator=partial(func, **func_kwargs), + dependencies=dependencies, + ) + self._add_computational_node(task) + return self + + def verify(self) -> "Workflow": + if self.is_verified: + return + assert nx.is_directed_acyclic_graph( + self.graph + ), "the workflow contains cycles!" + for u, v in self.graph.edges: + if TASK_ATTRIBUTE not in self.graph.nodes[u].keys(): + _LOG.error( + "task with id `%s` is not defined for the workflow", u + ) + raise ValueError( + f"task with id `{u}` is not defined for the workflow" + ) + if TASK_ATTRIBUTE not in self.graph.nodes[v].keys(): + _LOG.error( + "task with id `%s` is not defined for the workflow", v + ) + raise ValueError( + f"task with id `{v}` is not defined for the workflow" + ) + self.is_verified = True + + def traverse(self) -> Generator[_WorkflowTask, None, None]: + for node_id in nx.topological_sort(self.graph): + _LOG.debug("computing task for the node: %s", node_id) + yield self.graph.nodes[node_id][TASK_ATTRIBUTE] + + def compute(self) -> DataCube: + self.verify() + result = None + for task in self.traverse(): + result = task.compute(result) + return result + + def __len__(self): + return len(self.graph.nodes) + + def __getitem__(self, idx: Hashable): + return self.graph.nodes[idx] diff --git a/db/Dockerfile b/db/Dockerfile deleted file mode 100644 index 8bcf754..0000000 --- a/db/Dockerfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM postgres:14.1 -ADD ./scripts/init.sql /docker-entrypoint-initdb.d/ \ No newline at end of file diff --git a/db/scripts/1-init.sql b/db/scripts/1-init.sql deleted file mode 100644 index ecd4a30..0000000 --- a/db/scripts/1-init.sql +++ /dev/null @@ -1,80 +0,0 @@ --- CREATE USER dds WITH PASSWORD 'dds'; --- CREATE DATABASE dds; --- GRANT ALL PRIVILEGES ON DATABASE dds TO dds; - --- extension for using UUID column type -CREATE EXTENSION "uuid-ossp"; - -CREATE TABLE IF NOT EXISTS users ( - user_id uuid DEFAULT uuid_generate_v4() PRIMARY KEY, - api_key VARCHAR(255) UNIQUE NOT NULL, - contact_name VARCHAR(255) -); - -CREATE TABLE IF NOT EXISTS roles ( - role_id SERIAL PRIMARY KEY, - role_name VARCHAR (255) UNIQUE NOT NULL -); - -CREATE TABLE IF NOT EXISTS users_roles ( - ur_id SERIAL PRIMARY KEY, - user_id uuid NOT NULL, - role_id SERIAL NOT NULL, - CONSTRAINT fk_user - FOREIGN KEY(user_id) - REFERENCES users(user_id), - CONSTRAINT fk_role - FOREIGN KEY(role_id) - REFERENCES roles(role_id) -); - -CREATE TABLE IF NOT EXISTS workers ( - worker_id SERIAL PRIMARY KEY, - status VARCHAR(255) NOT NULL, - host VARCHAR(255), - dask_scheduler_port INT, - dask_dashboard_address CHAR(10), - created_on TIMESTAMP NOT NULL -); - -CREATE TABLE IF NOT EXISTS requests ( - request_id SERIAL PRIMARY KEY, - status VARCHAR(255) NOT NULL, - priority INT, - user_id uuid NOT NULL, - worker_id INT, - dataset VARCHAR(255), - product VARCHAR(255), - query json, - estimate_size_bytes BIGINT, - created_on TIMESTAMP NOT NULL, - last_update TIMESTAMP, - fail_reason VARCHAR(1000), - CONSTRAINT fk_user - FOREIGN KEY(user_id) - REFERENCES users(user_id), - CONSTRAINT fk_worker - FOREIGN KEY(worker_id) - REFERENCES workers(worker_id) -); - -CREATE TABLE IF NOT EXISTS downloads ( - download_id SERIAL PRIMARY KEY, - download_uri VARCHAR(255), - request_id INT UNIQUE, - storage_id INT, - location_path VARCHAR(255), - size_bytes BIGINT, - created_on TIMESTAMP NOT NULL, - CONSTRAINT fk_req - FOREIGN KEY(request_id) - REFERENCES requests(request_id) -); - -CREATE TABLE IF NOT EXISTS storages ( - storage_id SERIAL PRIMARY KEY, - name VARCHAR(255), - host VARCHAR(20), - protocol VARCHAR(10), - port INT -); \ No newline at end of file diff --git a/db/scripts/2-populate.sql b/db/scripts/2-populate.sql deleted file mode 100644 index c18764d..0000000 --- a/db/scripts/2-populate.sql +++ /dev/null @@ -1,13 +0,0 @@ -INSERT INTO roles VALUES (0, 'public'); -INSERT INTO roles VALUES (1, 'admin'); -INSERT INTO roles VALUES (2, 'internal'); -INSERT INTO roles VALUES (3, 'cmcc'); -INSERT INTO roles VALUES (4, 'silvanus'); -INSERT INTO roles VALUES (5, 'atlantic-project'); -INSERT INTO roles VALUES (6, 'spei-review'); -INSERT INTO roles VALUES (7, 'climate-projection-review'); -INSERT INTO users VALUES ('d9152e98-9de8-4064-b281-f61f8cecffe9', 'arZFgTatrOJpJ3egHEjRUyTUDt763SX6uAI4m2CVT4I', 'Mario Rossi'); -INSERT INTO users VALUES ('54a0473f-51ea-45a7-a3e9-eb150fc47302', 'hcuxnej74hbhGagdfBua4sd5VVzxchysrg', 'Test user'); -INSERT INTO users_roles VALUES (0, 'd9152e98-9de8-4064-b281-f61f8cecffe9', 2); -INSERT INTO users_roles VALUES (1, '54a0473f-51ea-45a7-a3e9-eb150fc47302', 2); -INSERT INTO users_roles VALUES (2, 'd9152e98-9de8-4064-b281-f61f8cecffe9', 4); \ No newline at end of file diff --git a/drivers/Dockerfile b/drivers/Dockerfile new file mode 100644 index 0000000..1e8927b --- /dev/null +++ b/drivers/Dockerfile @@ -0,0 +1,8 @@ +ARG REGISTRY=rg.fr-par.scw.cloud/geokube +ARG TAG=latest +FROM $REGISTRY/geokube:$TAG +RUN conda install -c conda-forge --yes --freeze-installed intake=0.6.6 +RUN conda clean -afy +COPY dist/intake_geokube-0.1a0-py3-none-any.whl / +RUN pip install /intake_geokube-0.1a0-py3-none-any.whl +RUN rm /intake_geokube-0.1a0-py3-none-any.whl diff --git a/drivers/LICENSE b/drivers/LICENSE new file mode 100644 index 0000000..2b65938 --- /dev/null +++ b/drivers/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/drivers/README.md b/drivers/README.md new file mode 100644 index 0000000..f08349c --- /dev/null +++ b/drivers/README.md @@ -0,0 +1,2 @@ +# intake-geokube +GeoKube plugin for Intake \ No newline at end of file diff --git a/drivers/intake_geokube/__init__.py b/drivers/intake_geokube/__init__.py new file mode 100644 index 0000000..dc60a1d --- /dev/null +++ b/drivers/intake_geokube/__init__.py @@ -0,0 +1,5 @@ +"""Geokube Plugin for Intake.""" + +# This avoids a circilar dependency pitfall by ensuring that the +# driver-discovery code runs first, see: +# https://intake.readthedocs.io/en/latest/making-plugins.html#entrypoints diff --git a/drivers/intake_geokube/_version.py b/drivers/intake_geokube/_version.py new file mode 100644 index 0000000..fced54c --- /dev/null +++ b/drivers/intake_geokube/_version.py @@ -0,0 +1,567 @@ +# -*- coding: utf-8 -*- + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "intake_xarray/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command( + commands, args, cwd=None, verbose=False, hide_stderr=False, env=None +): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command( + GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True + ) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ( + "unable to parse git-describe output: '%s'" % describe_out + ) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command( + GITS, ["rev-list", "HEAD", "--count"], cwd=root + ) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords( + get_keywords(), cfg.tag_prefix, verbose + ) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/drivers/intake_geokube/base.py b/drivers/intake_geokube/base.py new file mode 100644 index 0000000..e3c689b --- /dev/null +++ b/drivers/intake_geokube/base.py @@ -0,0 +1,78 @@ +# from . import __version__ +from intake.source.base import DataSource, Schema +from geokube.core.datacube import DataCube +from geokube.core.dataset import Dataset + + +class GeokubeSource(DataSource): + """Common behaviours for plugins in this repo""" + + version = "0.1a0" + container = "geokube" + partition_access = True + + def _get_schema(self): + """Make schema object, which embeds goekube fields metadata""" + + if self._kube is None: + self._open_dataset() + # TODO: Add schema for Geokube Dataset + if isinstance(self._kube, DataCube): + metadata = { + "fields": { + k: { + "dims": list(self._kube[k].dim_names), + # 'axis': list(self._kube[k].dims_axis_names), + "coords": list(self._kube[k].coords.keys()), + } + for k in self._kube.fields.keys() + }, + } + metadata.update(self._kube.properties) + self._schema = Schema( + datashape=None, + dtype=None, + shape=None, + npartitions=None, + extra_metadata=metadata, + ) + # TODO: Add schema for Geokube Dataset + if isinstance(self._kube, Dataset): + self._schema = Schema( + datashape=None, + dtype=None, + shape=None, + npartitions=None, + extra_metadata={}, + ) + + return self._schema + + def read(self): + """Return an in-memory geokube""" + self._load_metadata() + # TODO: Implement load in memory + return self._kube + + def read_chunked(self): + """Return a lazy geokube object""" + self._load_metadata() + return self._kube + + def read_partition(self, i): + """Fetch one chunk of data at tuple index i""" + raise NotImplementedError + + def to_dask(self): + """Return geokube object where variables (fields/coordinates) are dask arrays + """ + return self.read_chunked() + + def to_pyarrow(self): + """Return an in-memory pyarrow object""" + raise NotImplementedError + + def close(self): + """Delete open file from memory""" + self._kube = None + self._schema = None diff --git a/drivers/intake_geokube/geoquery.py b/drivers/intake_geokube/geoquery.py new file mode 100644 index 0000000..544e654 --- /dev/null +++ b/drivers/intake_geokube/geoquery.py @@ -0,0 +1,111 @@ +import json +from typing import Optional, List, Dict, Union, Any, TypeVar + +from pydantic import BaseModel, root_validator, validator + +TGeoQuery = TypeVar("TGeoQuery") + +def _maybe_convert_dict_slice_to_slice(dict_vals): + if "start" in dict_vals or "stop" in dict_vals: + return slice( + dict_vals.get("start"), + dict_vals.get("stop"), + dict_vals.get("step"), + ) + return dict_vals + +class _GeoQueryJSONEncoder(json.JSONEncoder): + + def default(self, obj): + if isinstance(obj, slice): + return { + "start": obj.start, + "stop": obj.stop, + "step": obj.step + } + return json.JSONEncoder.default(self, obj) + + +class GeoQuery(BaseModel): + variable: Optional[Union[str, List[str]]] + # TODO: Check how `time` is to be represented + time: Optional[Union[Dict[str, str | None], Dict[str, List[str]]]] + area: Optional[Dict[str, float]] + location: Optional[Dict[str, Union[float, List[float]]]] + vertical: Optional[Union[float, List[float], Dict[str, float]]] + filters: Optional[Dict] + format: Optional[str] + format_args: Optional[Dict] + + class Config: + extra = "allow" + json_encoders = {slice: lambda s: { + "start": s.start, + "stop": s.stop, + "step": s.step + }} + + # TODO: Check if we are going to allow the vertical coordinates inside both + # `area`/`location` nad `vertical` + + @root_validator + def area_locations_mutually_exclusive_validator(cls, query): + if query["area"] is not None and query["location"] is not None: + raise KeyError( + "area and location couldn't be processed together, please use" + " one of them" + ) + return query + + @root_validator(pre=True) + def build_filters(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "filters" in values: + return values + filters = {k: _maybe_convert_dict_slice_to_slice(v) for k, v in values.items() if k not in cls.__fields__} + values = {k: v for k, v in values.items() if k in cls.__fields__} + values["filters"] = filters + return values + + @validator("time", always=True) + def match_time_dict(cls, value): + if isinstance(value, dict): + assert any([k in value for k in ("start", "stop", "year", "month", "day", "hour")]), "Missing dictionary key" + if "start" in value or "stop" in value: + return _maybe_convert_dict_slice_to_slice(value) + return value + + + @validator("vertical", always=True) + def match_vertical_dict(cls, value): + if isinstance(value, dict): + assert "start" in value, "Missing 'start' key" + assert "stop" in value, "Missing 'stop' key" + return _maybe_convert_dict_slice_to_slice(value) + return value + + def original_query_json(self): + """Return the JSON representation of the original query submitted + to the geokube-dds""" + res = super().dict() + res = dict(**res.pop("filters", {}), **res) + # NOTE: skip empty values to make query representation + # shorter and more elegant + res = dict(filter(lambda item: item[1] is not None, res.items())) + return json.dumps(res, cls=_GeoQueryJSONEncoder) + + @classmethod + def parse( + cls, load: TGeoQuery | dict | str | bytes | bytearray + ) -> TGeoQuery: + if isinstance(load, cls): + return load + if isinstance(load, (str, bytes, bytearray)): + load = json.loads(load) + if isinstance(load, dict): + load = GeoQuery(**load) + else: + raise TypeError( + f"type of the `load` argument ({type(load).__name__}) is not" + " supported!" + ) + return load diff --git a/drivers/intake_geokube/netcdf.py b/drivers/intake_geokube/netcdf.py new file mode 100644 index 0000000..7247891 --- /dev/null +++ b/drivers/intake_geokube/netcdf.py @@ -0,0 +1,60 @@ +"""geokube driver for intake.""" +import logging +from typing import Mapping, Optional +from .base import GeokubeSource +from geokube import open_dataset, open_datacube + + +class NetCDFSource(GeokubeSource): + name = "geokube_netcdf" + + def __init__( + self, + path: str, + pattern: str = None, + field_id: str = None, + delay_read_cubes: bool = False, + metadata_caching: bool = False, + metadata_cache_path: str = None, + storage_options: dict = None, + xarray_kwargs: dict = None, + metadata=None, + mapping: Optional[Mapping[str, Mapping[str, str]]] = None, + load_files_on_persistance: Optional[bool] = True, + ): + self._kube = None + self.path = path + self.pattern = pattern + self.field_id = field_id + self.delay_read_cubes = delay_read_cubes + self.metadata_caching = metadata_caching + self.metadata_cache_path = metadata_cache_path + self.storage_options = storage_options + self.mapping = mapping + self.xarray_kwargs = {} if xarray_kwargs is None else xarray_kwargs + self.load_files_on_persistance = load_files_on_persistance + # self.xarray_kwargs.update({'engine' : 'netcdf'}) + super(NetCDFSource, self).__init__(metadata=metadata) + + def _open_dataset(self): + if self.pattern is None: + self._kube = open_datacube( + path=self.path, + id_pattern=self.field_id, + metadata_caching=self.metadata_caching, + metadata_cache_path=self.metadata_cache_path, + mapping=self.mapping, + **self.xarray_kwargs + ) + else: + self._kube = open_dataset( + path=self.path, + pattern=self.pattern, + id_pattern=self.field_id, + delay_read_cubes=self.delay_read_cubes, + metadata_caching=self.metadata_caching, + metadata_cache_path=self.metadata_cache_path, + mapping=self.mapping, + **self.xarray_kwargs + ) + return self._kube diff --git a/drivers/intake_geokube/sentinel.py b/drivers/intake_geokube/sentinel.py new file mode 100644 index 0000000..4c6b612 --- /dev/null +++ b/drivers/intake_geokube/sentinel.py @@ -0,0 +1,205 @@ +"""Geokube driver for sentinel data.""" + +from collections import defaultdict +from multiprocessing.util import get_temp_dir +import os +import dask +import zipfile +import glob +from functools import partial +from typing import Generator, Iterable, Mapping, Optional, List + +import numpy as np +import pandas as pd +import xarray as xr +from pyproj import Transformer +from pyproj.crs import CRS, GeographicCRS +from intake.source.utils import reverse_format + +from geokube import open_datacube +from geokube.core.dataset import Dataset + +from .base import GeokubeSource +from .geoquery import GeoQuery + +SENSING_TIME_ATTR: str = "sensing_time" +FILE: str = "files" +DATACUBE: str = "datacube" + + +def get_field_name_from_path(path: str): + res, file = path.split(os.sep)[-2:] + band = file.split("_")[-2] + return f"{res}_{band}" + + +def preprocess_sentinel(dset: xr.Dataset, pattern: str, **kw) -> xr.Dataset: + crs = CRS.from_cf(dset["spatial_ref"].attrs) + transformer = Transformer.from_crs( + crs_from=crs, crs_to=GeographicCRS(), always_xy=True + ) + x_vals, y_vals = dset["x"].to_numpy(), dset["y"].to_numpy() + lon_vals, lat_vals = transformer.transform(*np.meshgrid(x_vals, y_vals)) + source_path = dset.encoding["source"] + sensing_time = os.path.splitext(source_path.split(os.sep)[-6])[0].split( + "_" + )[-1] + time = pd.to_datetime([sensing_time]).to_numpy() + dset = dset.assign_coords( + { + "time": time, + "latitude": (("x", "y"), lat_vals), + "longitude": (("x", "y"), lon_vals), + } + ).rename({"band_data": get_field_name_from_path(source_path)}) + return dset + + +def get_zip_files_from_path(path: str) -> Generator: + assert path and isinstance(path, str), "`path` must be a string" + assert path.lower().endswith("zip"), "`path` must point to a ZIP archive" + if "*" in path: + yield from glob.iglob(path) + return + yield path + + +def unzip_data(files: Iterable[str], target: str) -> List[str]: + """Unzip ZIP archive to the `target` directory.""" + target_files = [] + for file in files: + prod_id = os.path.splitext(os.path.basename(file))[0] + target_prod = os.path.join(target, prod_id) + os.makedirs(target_prod, exist_ok=True) + with zipfile.ZipFile(file) as archive: + archive.extractall(path=target_prod) + target_files.append(os.listdir(target_prod)) + return target_files + + +def _prepare_df_from_files(files: Iterable[str], pattern: str) -> pd.DataFrame: + data = [] + for f in files: + attr = reverse_format(pattern, f) + attr[FILE] = f + data.append(attr) + return pd.DataFrame(data) + + +class CMCCSentinelSource(GeokubeSource): + name = "cmcc_sentinel_geokube" + version = "0.0.1" + + def __init__( + self, + path: str, + pattern: str = None, + zippath: str = None, + zippattern: str = None, + metadata=None, + xarray_kwargs: dict = None, + mapping: Optional[Mapping[str, Mapping[str, str]]] = None, + **kwargs, + ): + super().__init__(metadata=metadata, **kwargs) + self._kube = None + self.path = path + self.pattern = pattern + self.zippath = zippath + self.zippattern = zippattern + self.mapping = mapping + self.metadata_caching = False + self.xarray_kwargs = {} if xarray_kwargs is None else xarray_kwargs + self._unzip_dir = get_temp_dir() + self._zipdf = None + self._jp2df = None + assert ( + SENSING_TIME_ATTR in self.pattern + ), f"{SENSING_TIME_ATTR} is missing in the pattern" + self.preprocess = partial( + preprocess_sentinel, + pattern=self.pattern, + ) + if self.geoquery: + self.filters = self.geoquery.filters + else: + self.filters = {} + + def __post_init__(self) -> None: + assert ( + SENSING_TIME_ATTR in self.pattern + ), f"{SENSING_TIME_ATTR} is missing in the pattern" + self.preprocess = partial( + preprocess_sentinel, + pattern=self.pattern, + ) + + def _compute_res_df(self) -> List[str]: + self._zipdf = self._get_files_attr() + self._maybe_select_by_zip_attrs() + _ = unzip_data(self._zipdf[FILE].values, target=self._unzip_dir) + self._create_jp2_df() + self._maybe_select_by_jp2_attrs() + + def _get_files_attr(self) -> pd.DataFrame: + df = _prepare_df_from_files( + get_zip_files_from_path(self.path), self.pattern + ) + assert ( + SENSING_TIME_ATTR in df + ), f"{SENSING_TIME_ATTR} column is missing" + return df.set_index(SENSING_TIME_ATTR).sort_index() + + def _maybe_select_by_zip_attrs(self) -> Optional[pd.DataFrame]: + filters_to_pop = [] + for flt in self.filters: + if flt in self._zipdf.columns: + self._zipdf = self._zipdf.set_index(flt) + if flt == self._zipdf.index.name: + self._zipdf = self._zipdf.loc[self.filters[flt]] + filters_to_pop.append(flt) + for f in filters_to_pop: + self.filters.pop(f) + self._zipdf = self._zipdf.reset_index() + + + def _create_jp2_df(self) -> None: + self._jp2df = _prepare_df_from_files( + glob.iglob(os.path.join(self._unzip_dir, self.zippath)), + os.path.join(self._unzip_dir, self.zippattern), + ) + + def _maybe_select_by_jp2_attrs(self): + filters_to_pop = [] + for key, value in self.filters.items(): + if key not in self._jp2df: + continue + if isinstance(value, str): + self._jp2df = self._jp2df[self._jp2df[key] == value] + elif isinstance(value, Iterable): + self._jp2df = self._jp2df[self._jp2df[key].isin(value)] + else: + raise TypeError(f"type `{type(value)}` is not supported!") + filters_to_pop.append(key) + for f in filters_to_pop: + self.filters.pop(f) + + def _open_dataset(self): + self._compute_res_df() + self._jp2df + cubes = [] + for i, row in self._jp2df.iterrows(): + cubes.append( + dask.delayed(open_datacube)( + path=row[FILE], + id_pattern=None, + mapping=self.mapping, + metadata_caching=self.metadata_caching, + **self.xarray_kwargs, + preprocess=self.preprocess, + ) + ) + self._jp2df[DATACUBE] = cubes + self._kube = Dataset(self._jp2df.reset_index(drop=True)) + self.geoquery.filters = self.filters + return self._kube diff --git a/drivers/intake_geokube/wrf.py b/drivers/intake_geokube/wrf.py new file mode 100644 index 0000000..1968e40 --- /dev/null +++ b/drivers/intake_geokube/wrf.py @@ -0,0 +1,170 @@ +"""geokube driver for intake.""" +import logging +from functools import partial +from typing import Any, Mapping, Optional, Union + +import numpy as np +import xarray as xr + +from .base import GeokubeSource +from geokube import open_datacube, open_dataset + + +_DIM_RENAME_MAP = { + "Time": "time", + "south_north": "latitude", + "west_east": "longitude", +} +_COORD_RENAME_MAP = {"XTIME": "time", "XLAT": "latitude", "XLONG": "longitude"} +_COORD_SQUEEZE_NAMES = ("latitude", "longitude") +_PROJECTION = {"grid_mapping_name": "latitude_longitude"} + + +def _cast_to_set(item: Any): + if item is None: + return set() + if isinstance(item, set): + return item + if isinstance(item, str): + return {item} + if isinstance(item, list): + return set(item) + raise TypeError(f"type '{type(item)}' is not supported!") + + +def rename_coords(dset: xr.Dataset, **kwargs) -> xr.Dataset: + """Rename coordinates""" + dset_ = dset.rename_vars(_COORD_RENAME_MAP) + # Removing `Time` dimension from latitude and longitude. + coords = dset_.coords + for name in _COORD_SQUEEZE_NAMES: + coord = dset_[name] + if "Time" in coord.dims: + coords[name] = coord.squeeze(dim="Time", drop=True) + return dset_ + + +def change_dims(dset: xr.Dataset, **kwargs) -> xr.Dataset: + """Changes dimensions to time, latitude, and longitude""" + # Preparing new horizontal coordinates. + lat = (["south_north"], dset["latitude"].to_numpy().mean(axis=1)) + lon = (["west_east"], dset["longitude"].to_numpy().mean(axis=0)) + # Removing old horizontal coordinates. + dset_ = dset.drop_vars(["latitude", "longitude"]) + # Adding new horizontal coordinates and setting their units. + coords = dset_.coords + coords["latitude"] = lat + coords["longitude"] = lon + dset_["latitude"].attrs["units"] = "degree_north" + dset_["longitude"].attrs["units"] = "degree_east" + # Making `time`, `latitude`, and `longitude` new dimensions, instead of + # `Time`, `south_north`, and `west_east`. + dset_ = dset_.swap_dims(_DIM_RENAME_MAP) + return dset_ + + +def add_projection(dset: xr.Dataset, **kwargs) -> xr.Dataset: + """Add projection information to the dataset""" + coords = dset.coords + coords["crs"] = xr.DataArray(data=np.array(1), attrs=_PROJECTION) + for var in dset.data_vars.values(): + enc = var.encoding + enc["grid_mapping"] = "crs" + # TODO: Check if this is needed. This code renames coordinates stored + # in encoding from `'XLONG XLAT XTIME'` to `'longitude latitude time'`. + if coord_names := enc.get("coordinates"): + for old_name, new_name in _COORD_RENAME_MAP.items(): + coord_names = coord_names.replace(old_name, new_name) + enc["coordinates"] = coord_names + return dset + + +def choose_variables( + dset: xr.Dataset, + variables_to_keep: Optional[Union[str, list[str]]] = None, + variables_to_skip: Optional[Union[str, list[str]]] = None, + **kwargs, +) -> xr.Dataset: + """Choose only some variables by keeping or skipping some of them""" + variables_to_keep = _cast_to_set(variables_to_keep) + variables_to_skip = _cast_to_set(variables_to_skip) + selected_variables = set(dset.data_vars.keys()) + if len(variables_to_keep) > 0: + selected_variables = set(dset.data_vars.keys()) & variables_to_keep + selected_variables = selected_variables - variables_to_skip + if len(set(dset.data_vars.keys())) != len(selected_variables): + return dset[selected_variables] + return dset + + +def preprocess_wrf(dset: xr.Dataset, **kwargs) -> xr.Dataset: + """Preprocess WRF dataset""" + dset = rename_coords(dset, **kwargs) + dset = change_dims(dset) + dset = add_projection(dset, **kwargs) + dset = choose_variables(dset, **kwargs) + return dset + + +class CMCCWRFSource(GeokubeSource): + name = "cmcc_wrf_geokube" + + def __init__( + self, + path: str, + pattern: str = None, + field_id: str = None, + delay_read_cubes: bool = False, + metadata_caching: bool = False, + metadata_cache_path: str = None, + storage_options: dict = None, + xarray_kwargs: dict = None, + metadata=None, + mapping: Optional[Mapping[str, Mapping[str, str]]] = None, + load_files_on_persistance: Optional[bool] = True, + variables_to_keep: Optional[Union[str, list[str]]] = None, + variables_to_skip: Optional[Union[str, list[str]]] = None, + ): + self._kube = None + self.path = path + self.pattern = pattern + self.field_id = field_id + self.delay_read_cubes = delay_read_cubes + self.metadata_caching = metadata_caching + self.metadata_cache_path = metadata_cache_path + self.storage_options = storage_options + self.mapping = mapping + self.xarray_kwargs = {} if xarray_kwargs is None else xarray_kwargs + self.load_files_on_persistance = load_files_on_persistance + self.preprocess = partial( + preprocess_wrf, + variables_to_keep=variables_to_keep, + variables_to_skip=variables_to_skip, + ) + # self.xarray_kwargs.update({'engine' : 'netcdf'}) + super(CMCCWRFSource, self).__init__(metadata=metadata) + + def _open_dataset(self): + if self.pattern is None: + self._kube = open_datacube( + path=self.path, + id_pattern=self.field_id, + metadata_caching=self.metadata_caching, + metadata_cache_path=self.metadata_cache_path, + mapping=self.mapping, + **self.xarray_kwargs, + preprocess=self.preprocess, + ) + else: + self._kube = open_dataset( + path=self.path, + pattern=self.pattern, + id_pattern=self.field_id, + delay_read_cubes=self.delay_read_cubes, + metadata_caching=self.metadata_caching, + metadata_cache_path=self.metadata_cache_path, + mapping=self.mapping, + **self.xarray_kwargs, + preprocess=self.preprocess, + ) + return self._kube diff --git a/drivers/setup.py b/drivers/setup.py new file mode 100644 index 0000000..c99c224 --- /dev/null +++ b/drivers/setup.py @@ -0,0 +1,35 @@ +import setuptools + +with open("README.md", "r") as f: + long_description = f.read() + +setuptools.setup( + name="intake-geokube", + version="0.1a0", + author="CMCC Foundation - PPOS Research Group", + author_email="ppos-services@cmcc.it", + description="Geokube driver for Intake.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/geokube/intake-geokube", + packages=setuptools.find_packages(), + install_requires=["intake", "pytest"], + entry_points={ + "intake.drivers": [ + "geokube_netcdf = intake_geokube.netcdf:NetCDFSource", + "cmcc_wrf_geokube = intake_geokube.wrf:CMCCWRFSource", + ] + }, + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: Web Environment", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Atmospheric Science", + ], + python_requires=">=3.8", + license="Apache License, Version 2.0", +) diff --git a/drivers/tests/__init__.py b/drivers/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/drivers/tests/resources/test_catalog.yaml b/drivers/tests/resources/test_catalog.yaml new file mode 100644 index 0000000..b5b4b1f --- /dev/null +++ b/drivers/tests/resources/test_catalog.yaml @@ -0,0 +1,26 @@ +metadata: + description: >- + Some description + contact: + name: Data Deliver System Support Team + email: dds-support@cmcc.it + webpage: https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6 + publication_date: pub_date + update_frequency: update_frequency + related_data: + - name: related_data_name + url: related_data_url +sources: + ensemble-spread: + description: Ensemble Spread + driver: geokube_netcdf + args: + mapping: {"latitude": {"api": "my_lat", "new_feature": "new_val"}} + path: '/data/inputs/E-OBS/spread/*_ens_*.nc' + pattern: '/data/inputs/E-OBS/spread/{var}_ens_spread_{resolution}deg_reg_{version}.nc' + delay_read_cubes: false + metadata_caching: false + xarray_kwargs: + parallel: true + decode_coords: 'all' + chunks: { 'latitude': -1, 'longitude': -1, 'time': 50 } \ No newline at end of file diff --git a/drivers/tests/test_intake_geokube.py b/drivers/tests/test_intake_geokube.py new file mode 100644 index 0000000..f91a9c5 --- /dev/null +++ b/drivers/tests/test_intake_geokube.py @@ -0,0 +1,24 @@ +import os +import pytest +import intake +import yaml + + +@pytest.fixture +def e_obs_catalog_path(): + yield os.path.join("tests", "resources", "test_catalog.yaml") + + +def test_mapping_1(e_obs_catalog_path): + catalog = intake.open_catalog(e_obs_catalog_path) + ds = catalog["ensemble-spread"].read() + for cb in ds.cubes: + for f in cb.values(): + assert "my_lat" in f.domain._coords + xcb = cb.to_xarray() + assert "my_lat" in xcb + assert "latitude" not in xcb + assert "longitude" in xcb + assert "time" in xcb + assert "new_feature" in xcb.my_lat.attrs + assert xcb.my_lat.attrs["new_feature"] == "new_val" diff --git a/executor/Dockerfile b/executor/Dockerfile index 36f8879..3888c93 100644 --- a/executor/Dockerfile +++ b/executor/Dockerfile @@ -1,11 +1,8 @@ -FROM rg.nl-ams.scw.cloud/dds-production/geokube:v0.2a5 -WORKDIR /code -COPY ./executor/requirements.txt /code/requirements.txt -RUN pip install --no-cache-dir -r requirements.txt -COPY ./db/dbmanager /code/app/db/dbmanager -COPY ./utils/wait-for-it.sh /code/wait-for-it.sh -COPY ./datastore /code/app/datastore -COPY ./geoquery /code/app/geoquery -COPY ./resources /code/app/resources -COPY ./executor/app /code/app -CMD [ "python", "./app/main.py" ] \ No newline at end of file +ARG REGISTRY=rg.fr-par.scw.cloud/geolake +ARG TAG=latest +FROM $REGISTRY/geolake-datastore:$TAG +WORKDIR /app +COPY requirements.txt /code/requirements.txt +RUN pip install --no-cache-dir -r /code/requirements.txt +COPY app /app +CMD [ "python", "main.py" ] diff --git a/executor/app/main.py b/executor/app/main.py index e1f8a0b..bf3f494 100644 --- a/executor/app/main.py +++ b/executor/app/main.py @@ -1,55 +1,237 @@ -# We have three type of executor: -# - query executor (query) -# - estimate query executor (estimate) -# - catalog info executor (info) -# -# Configuration parameters for the executor: -# type: query, estimate, catalog -# dask cluster base ports (if they are not provided the cluster is not created: (e.g. for estimate and catalog info)) -# channel: channel_queue, channel_type, channel_durable -# catalog path -# store_path (where to store the query results) -# -# An executor will register to the DB and get a worker id -# if dask cluster base ports are provided, a dask cluster is created -# an executor mush have a unique port for the dask scheduler/dashboard - import os -import json import time +import datetime import pika import logging import asyncio -from dask.distributed import Client, LocalCluster, Nanny, Status - import threading, functools +from zipfile import ZipFile +import numpy as np +from dask.distributed import Client, LocalCluster, Nanny, Status +from dask.delayed import Delayed from geokube.core.datacube import DataCube +from geokube.core.dataset import Dataset +from geokube.core.field import Field from datastore.datastore import Datastore -from db.dbmanager.dbmanager import DBManager, RequestStatus +from workflow import Workflow +from geoquery.geoquery import GeoQuery +from dbmanager.dbmanager import DBManager, RequestStatus from meta import LoggableMeta +from messaging import Message, MessageType _BASE_DOWNLOAD_PATH = "/downloads" -def ds_query(ds_id: str, prod_id: str, query, compute, request_id): - res_path = os.path.join(_BASE_DOWNLOAD_PATH, request_id) - os.makedirs(res_path, exist_ok=True) - kube = Datastore().query(ds_id, prod_id, query, compute) - if isinstance(kube, DataCube): - return kube.persist(res_path) +def get_file_name_for_climate_downscaled(kube: DataCube, message: Message): + query: GeoQuery = GeoQuery.parse(message.content) + is_time_range = False + if query.time: + is_time_range = "start" in query.time or "stop" in query.time + var_names = list(kube.fields.keys()) + if len(kube) == 1: + if is_time_range: + FILENAME_TEMPLATE = "{ncvar_name}_VHR-PRO_IT2km_CMCC-CM_{product_id}_CCLM5-0-9_1hr_{start_date}_{end_date}_{request_id}" + ncvar_name = kube.fields[var_names[0]].ncvar + return FILENAME_TEMPLATE.format( + product_id=message.product_id, + request_id=message.request_id, + ncvar_name=ncvar_name, + start_date=np.datetime_as_string( + kube.time.values[0], unit="D" + ), + end_date=np.datetime_as_string(kube.time.values[-1], unit="D"), + ) + else: + FILENAME_TEMPLATE = "{ncvar_name}_VHR-PRO_IT2km_CMCC-CM_{product_id}_CCLM5-0-9_1hr_{request_id}" + ncvar_name = kube.fields[var_names[0]].ncvar + return FILENAME_TEMPLATE.format( + product_id=message.product_id, + request_id=message.request_id, + ncvar_name=ncvar_name, + ) else: - return kube.persist(res_path, zip_if_many=True) + if is_time_range: + FILENAME_TEMPLATE = "VHR-PRO_IT2km_CMCC-CM_{product_id}_CCLM5-0-9_1hr_{start_date}_{end_date}_{request_id}" + return FILENAME_TEMPLATE.format( + product_id=message.product_id, + request_id=message.request_id, + start_date=np.datetime_as_string( + kube.time.values[0], unit="D" + ), + end_date=np.datetime_as_string(kube.time.values[-1], unit="D"), + ) + else: + FILENAME_TEMPLATE = ( + "VHR-PRO_IT2km_CMCC-CM_{product_id}_CCLM5-0-9_1hr_{request_id}" + ) + return FILENAME_TEMPLATE.format( + product_id=message.product_id, + request_id=message.request_id, + ) + + +def rcp85_filename_condition(kube: DataCube, message: Message) -> bool: + return ( + message.dataset_id == "climate-projections-rcp85-downscaled-over-italy" + ) + + +def get_history_message(): + return ( + f"Generated by CMCC DDS version 0.9.0 {str(datetime.datetime.now())}" + ) + + +def persist_datacube( + kube: DataCube, + message: Message, + base_path: str | os.PathLike, +) -> str | os.PathLike: + if rcp85_filename_condition(kube, message): + path = get_file_name_for_climate_downscaled(kube, message) + else: + var_names = list(kube.fields.keys()) + if len(kube) == 1: + path = "_".join( + [ + var_names[0], + message.dataset_id, + message.product_id, + message.request_id, + ] + ) + else: + path = "_".join( + [message.dataset_id, message.product_id, message.request_id] + ) + kube._properties["history"] = get_history_message() + if isinstance(message.content, GeoQuery): + format = message.content.format + else: + format = "netcdf" + match format: + case "netcdf": + full_path = os.path.join(base_path, f"{path}.nc") + kube.to_netcdf(full_path) + case "geojson": + full_path = os.path.join(base_path, f"{path}.json") + kube.to_geojson(full_path) + case _: + raise ValueError(f"format `{format}` is not supported") + return full_path + + +def persist_dataset( + dset: Dataset, + message: Message, + base_path: str | os.PathLike, +): + def _get_attr_comb(dataframe_item, attrs): + return "_".join([dataframe_item[attr_name] for attr_name in attrs]) + + def _persist_single_datacube(dataframe_item, base_path, format): + dcube = dataframe_item[dset.DATACUBE_COL] + if isinstance(dcube, Delayed): + dcube = dcube.compute() + if len(dcube) == 0: + return None + for field in dcube.fields.values(): + if 0 in field.shape: + return None + attr_str = _get_attr_comb(dataframe_item, dset._Dataset__attrs) + var_names = list(dcube.fields.keys()) + if len(dcube) == 1: + path = "_".join( + [ + var_names[0], + message.dataset_id, + message.product_id, + attr_str, + message.request_id, + ] + ) + else: + path = "_".join( + [ + message.dataset_id, + message.product_id, + attr_str, + message.request_id, + ] + ) + match format: + case "netcdf": + full_path = os.path.join(base_path, f"{path}.nc") + dcube.to_netcdf(full_path) + case "geojson": + full_path = os.path.join(base_path, f"{path}.json") + dcube.to_geojson(full_path) + return full_path + + if isinstance(message.content, GeoQuery): + format = message.content.format + else: + format = "netcdf" + datacubes_paths = dset.data.apply( + _persist_single_datacube, base_path=base_path, format=format, axis=1 + ) + paths = datacubes_paths[~datacubes_paths.isna()] + if len(paths) == 0: + return None + elif len(paths) == 1: + return paths.iloc[0] + zip_name = "_".join( + [message.dataset_id, message.product_id, message.request_id] + ) + path = os.path.join(base_path, f"{zip_name}.zip") + with ZipFile(path, "w") as archive: + for file in paths: + archive.write(file, arcname=os.path.basename(file)) + for file in paths: + os.remove(file) + return path + + +def process(message: Message, compute: bool): + res_path = os.path.join(_BASE_DOWNLOAD_PATH, message.request_id) + os.makedirs(res_path, exist_ok=True) + match message.type: + case MessageType.QUERY: + kube = Datastore().query( + message.dataset_id, + message.product_id, + message.content, + compute, + ) + case MessageType.WORKFLOW: + kube = Workflow.from_tasklist(message.content).compute() + case _: + raise ValueError("unsupported message type") + if isinstance(kube, Field): + kube = DataCube( + fields=[kube], + properties=kube.properties, + encoding=kube.encoding, + ) + match kube: + case DataCube(): + return persist_datacube(kube, message, base_path=res_path) + case Dataset(): + return persist_dataset(kube, message, base_path=res_path) + case _: + raise TypeError( + "expected geokube.DataCube or geokube.Dataset, but passed" + f" {type(kube).__name__}" + ) class Executor(metaclass=LoggableMeta): _LOG = logging.getLogger("geokube.Executor") - def __init__(self, broker, store_path, cache_path): - self._datastore = Datastore(cache_path=cache_path) - self._datastore._load_cache() + def __init__(self, broker, store_path): self._store = store_path broker_conn = pika.BlockingConnection( pika.ConnectionParameters(host=broker, heartbeat=10), @@ -64,11 +246,11 @@ def create_dask_cluster(self, dask_cluster_opts: dict = None): dask_cluster_opts["scheduler_port"] = int( os.getenv("DASK_SCHEDULER_PORT", 8188) ) + dask_cluster_opts["processes"] = True port = int(os.getenv("DASK_DASHBOARD_PORT", 8787)) dask_cluster_opts["dashboard_address"] = f":{port}" - dask_cluster_opts["n_workers"] = int( - os.getenv("DASK_N_WORKERS", 1) - ) + dask_cluster_opts["n_workers"] = None + dask_cluster_opts["memory_limit"] = "auto" self._worker_id = self._db.create_worker( status="enabled", dask_scheduler_port=dask_cluster_opts["scheduler_port"], @@ -83,6 +265,7 @@ def create_dask_cluster(self, dask_cluster_opts: dict = None): n_workers=dask_cluster_opts["n_workers"], scheduler_port=dask_cluster_opts["scheduler_port"], dashboard_address=dask_cluster_opts["dashboard_address"], + memory_limit=dask_cluster_opts["memory_limit"], ) self._LOG.info( "creating Dask Client...", extra={"track_id": self._worker_id} @@ -121,57 +304,44 @@ def ack_message(self, channel, delivery_tag): ) pass - def query(self, connection, channel, delivery_tag, body): - m = body.decode().split("\\") - request_id = m[0] - dataset_id = m[1] - product_id = m[2] - query = m[3] - self._LOG.debug( - "executing query: `%s`", body, extra={"track_id": request_id} - ) - - # TODO: estimation size should be updated, too - self._db.update_request( - request_id=request_id, - worker_id=self._worker_id, - status=RequestStatus.RUNNING, - ) - self._LOG.debug( - "submitting job for request", extra={"track_id": request_id} - ) - future = self._dask_client.submit( - ds_query, - ds_id=dataset_id, - prod_id=product_id, - query=query, - compute=False, - request_id=request_id, - ) + def retry_until_timeout( + self, + future, + message: Message, + retries: int = 30, + sleep_time: int = 10, + ): + assert retries is not None, "`retries` cannot be `None`" + assert sleep_time is not None, "`sleep_time` cannot be `None`" status = fail_reason = location_path = None try: self._LOG.debug( "attempt to get result for the request", - extra={"track_id": request_id}, + extra={"track_id": message.request_id}, ) - for _ in range(int(os.environ.get("RESULT_CHECK_RETRIES", 30))): + for _ in range(retries): if future.done(): self._LOG.debug( "result is done", - extra={"track_id": request_id}, + extra={"track_id": message.request_id}, ) location_path = future.result() status = RequestStatus.DONE + self._LOG.debug( + "result save under: %s", + location_path, + extra={"track_id": message.request_id}, + ) break self._LOG.debug( - "result is not ready yet. sleeping 30 sec", - extra={"track_id": request_id}, + f"result is not ready yet. sleeping {sleep_time} sec", + extra={"track_id": message.request_id}, ) - time.sleep(int(os.environ.get("SLEEP_SEC", 30))) + time.sleep(sleep_time) else: self._LOG.info( "processing timout", - extra={"track_id": request_id}, + extra={"track_id": message.request_id}, ) future.cancel() status = RequestStatus.TIMEOUT @@ -182,12 +352,43 @@ def query(self, connection, channel, delivery_tag, body): e, exc_info=True, stack_info=True, - extra={"track_id": request_id}, + extra={"track_id": message.request_id}, ) status = RequestStatus.FAILED - fail_reason = f"{type(e)}: {str(e)}" + fail_reason = f"{type(e).__name__}: {str(e)}" + return (location_path, status, fail_reason) + + def handle_message(self, connection, channel, delivery_tag, body): + message: Message = Message(body) + self._LOG.debug( + "executing query: `%s`", + message.content, + extra={"track_id": message.request_id}, + ) + + # TODO: estimation size should be updated, too self._db.update_request( - request_id=request_id, + request_id=message.request_id, + worker_id=self._worker_id, + status=RequestStatus.RUNNING, + ) + + self._LOG.debug( + "submitting job for workflow request", + extra={"track_id": message.request_id}, + ) + future = self._dask_client.submit( + process, + message=message, + compute=False, + ) + location_path, status, fail_reason = self.retry_until_timeout( + future, + message=message, + retries=int(os.environ.get("RESULT_CHECK_RETRIES")), + ) + self._db.update_request( + request_id=message.request_id, worker_id=self._worker_id, status=status, location_path=location_path, @@ -195,19 +396,22 @@ def query(self, connection, channel, delivery_tag, body): fail_reason=fail_reason, ) self._LOG.debug( - "acknowledging request", extra={"track_id": request_id} + "acknowledging request", extra={"track_id": message.request_id} ) cb = functools.partial(self.ack_message, channel, delivery_tag) connection.add_callback_threadsafe(cb) self.maybe_restart_cluster(status) - self._LOG.debug("request acknowledged", extra={"track_id": request_id}) + self._LOG.debug( + "request acknowledged", extra={"track_id": message.request_id} + ) def on_message(self, channel, method_frame, header_frame, body, args): (connection, threads) = args delivery_tag = method_frame.delivery_tag t = threading.Thread( - target=self.query, args=(connection, channel, delivery_tag, body) + target=self.handle_message, + args=(connection, channel, delivery_tag, body), ) t.start() threads.append(t) @@ -239,14 +443,11 @@ def get_size(self, location_path): if __name__ == "__main__": - broker = os.getenv("BROKER", "broker") + broker = os.getenv("BROKER_SERVICE_HOST", "broker") executor_types = os.getenv("EXECUTOR_TYPES", "query").split(",") store_path = os.getenv("STORE_PATH", ".") - cache_path = os.getenv("CACHE_PATH", ".") - executor = Executor( - broker=broker, store_path=store_path, cache_path=cache_path - ) + executor = Executor(broker=broker, store_path=store_path) print("channel subscribe") for etype in executor_types: if etype == "query": diff --git a/executor/app/messaging.py b/executor/app/messaging.py new file mode 100644 index 0000000..21ce585 --- /dev/null +++ b/executor/app/messaging.py @@ -0,0 +1,45 @@ +import os +import logging +from enum import Enum + +from geoquery.geoquery import GeoQuery +from geoquery.task import TaskList + +MESSAGE_SEPARATOR = os.environ["MESSAGE_SEPARATOR"] + + +class MessageType(Enum): + QUERY = "query" + WORKFLOW = "workflow" + + +class Message: + _LOG = logging.getLogger("geokube.Message") + + request_id: int + dataset_id: str = "" + product_id: str = "" + type: MessageType + content: GeoQuery | TaskList + + def __init__(self, load: bytes) -> None: + self.request_id, msg_type, *query = load.decode().split( + MESSAGE_SEPARATOR + ) + match MessageType(msg_type): + case MessageType.QUERY: + self._LOG.debug("processing content of `query` type") + assert len(query) == 3, "improper content for query message" + self.dataset_id, self.product_id, self.content = query + self.content: GeoQuery = GeoQuery.parse(self.content) + self.type = MessageType.QUERY + case MessageType.WORKFLOW: + self._LOG.debug("processing content of `workflow` type") + assert len(query) == 1, "improper content for workflow message" + self.content: TaskList = TaskList.parse(query[0]) + self.dataset_id = self.content.dataset_id + self.product_id = self.content.product_id + self.type = MessageType.WORKFLOW + case _: + self._LOG.error("type `%s` is not supported", msg_type) + raise ValueError(f"type `{msg_type}` is not supported!") diff --git a/executor/app/meta.py b/executor/app/meta.py index 1f09cd8..739ef62 100644 --- a/executor/app/meta.py +++ b/executor/app/meta.py @@ -12,8 +12,8 @@ def __new__(cls, child_cls, bases, namespace): if hasattr(res, "_LOG"): format_ = os.environ.get( "LOGGING_FORMAT", - "%(asctime)s %(name)s %(levelname)s %(lineno)d %(track_id)s" - " %(message)s", + "%(asctime)s %(name)s %(levelname)s %(lineno)d" + " %(track_id)s %(message)s", ) formatter = logging.Formatter(format_) logging_level = os.environ.get("LOGGING_LEVEL", "INFO") diff --git a/executor/requirements.txt b/executor/requirements.txt index 34c41ff..f188e90 100644 --- a/executor/requirements.txt +++ b/executor/requirements.txt @@ -1,8 +1,4 @@ pika==1.2.1 -bokeh>=2.4.2,<3 prometheus_client -dask -distributed -intake -pydantic -sqlalchemy \ No newline at end of file +sqlalchemy +pydantic \ No newline at end of file diff --git a/geokube_packages/Dockerfile b/geokube_packages/Dockerfile deleted file mode 100644 index d7e17de..0000000 --- a/geokube_packages/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM continuumio/miniconda3 -RUN conda install -c conda-forge --yes --freeze-installed \ - cartopy xesmf=0.6.3 psycopg2 \ - 'bokeh>=2.4.2,<3' numpy=1.23.5 pandas=1.4.3 netCDF4 scipy xarray=2022.6.0 \ - && conda clean -afy -COPY geokube-0.2a0-py3-none-any.whl / -COPY intake_geokube-0.1a0-py3-none-any.whl / -RUN pip install /geokube-0.2a0-py3-none-any.whl -RUN pip install /intake_geokube-0.1a0-py3-none-any.whl diff --git a/geokube_packages/environment.yaml b/geokube_packages/environment.yaml deleted file mode 100644 index 3163504..0000000 --- a/geokube_packages/environment.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: t3 -channels: - - conda-forge -dependencies: - - python=3.9 - - numpy=1.20 - - xarray=2022.6.0 - - geopandas=0.9.0 - - netcdf4=1.6.0 - - pandas=1.4.3 - - dask - - distributed - - cartopy - - xesmf - - psycopg2 - - bokeh=2.4.2 - # - scipy - # - plotly - # - cf_units - # - hvplot - # - shapely - # - metpy - # - pyarrow - # - pytest-cov - # - pytest \ No newline at end of file diff --git a/geokube_packages/geokube-0.2a0-py3-none-any.whl b/geokube_packages/geokube-0.2a0-py3-none-any.whl deleted file mode 100644 index 2251f27..0000000 Binary files a/geokube_packages/geokube-0.2a0-py3-none-any.whl and /dev/null differ diff --git a/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp b/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp deleted file mode 100644 index e82675c..0000000 Binary files a/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp and /dev/null differ diff --git a/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp2 b/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp2 deleted file mode 100644 index a328489..0000000 Binary files a/geokube_packages/geokube-0.2a0-py3-none-any.whl.bkp2 and /dev/null differ diff --git a/geokube_packages/intake_geokube-0.1a0-py3-none-any.whl b/geokube_packages/intake_geokube-0.1a0-py3-none-any.whl deleted file mode 100644 index 22914e9..0000000 Binary files a/geokube_packages/intake_geokube-0.1a0-py3-none-any.whl and /dev/null differ diff --git a/resources/catalogs/catalog.yaml b/resources/catalogs/catalog.yaml deleted file mode 100644 index 327029a..0000000 --- a/resources/catalogs/catalog.yaml +++ /dev/null @@ -1,7 +0,0 @@ -sources: - - e-obs: - driver: intake.catalog.local.YAMLFileCatalog - args: - path: '{{ CATALOG_DIR }}/external/e-obs.yaml' - diff --git a/resources/catalogs/external/e-obs.yaml b/resources/catalogs/external/e-obs.yaml deleted file mode 100644 index c1a370e..0000000 --- a/resources/catalogs/external/e-obs.yaml +++ /dev/null @@ -1,28 +0,0 @@ -metadata: - description: >- - E-OBS is a daily gridded land-only observational dataset over Europe. The blended time series from the station network of the European Climate Assessment & Dataset (ECA&D) project form the basis for the E-OBS gridded dataset. All station data are sourced directly from the European National Meteorological and Hydrological Services (NMHSs) or other data holding institutions. For a considerable number of countries the number of stations used is the complete national network and therefore much more dense than the station network that is routinely shared among NMHSs (which is the basis of other gridded datasets). The density of stations gradually increases through collaborations with NMHSs within European research contracts. Initially, in 2008, this gridded dataset was developed to provide validation for the suite of Europe-wide climate model simulations produced as part of the European Union ENSEMBLES project. While E-OBS remains an important dataset for model validation, it is also used more generally for monitoring the climate across Europe, particularly with regard to the assessment of the magnitude and frequency of daily extremes. The position of E-OBS is unique in Europe because of the relatively high spatial horizontal grid spacing, the daily resolution of the dataset, the provision of multiple variables and the length of the dataset. Finally, the station data on which E-OBS is based are available through the ECA&D webpages (where the owner of the data has given permission to do so). In these respects it contrasts with other datasets. The dataset is daily, meaning the observations cover 24 hours per time step. The exact 24-hour period can be different per region. The reason for this is that some data providers measure between midnight to midnight while others might measure from morning to morning. Since E-OBS is an observational dataset, no attempts have been made to adjust time series for this 24-hour offset. It is made sure, where known, that the largest part of the measured 24-hour period corresponds to the day attached to the time step in E-OBS (and ECA&D). - contact: dds-support@cmcc.it - label: E-OBS daily gridded meteorological data for Europe from 1950 to present - image: https://diasfiles.cmccos.it/images/e-obs.png - doi: https://doi.org/10.24381/cds.151d3ec6 - license: - name: E-OBS Product License - url: https://www.ecad.eu/documents/ECAD_datapolicy.pdf - publication_date: 2020-08-01 - - -sources: - ensemble: - description: E-OBS Ensemble Dataset - driver: geokube_netcdf - metadata: - role: internal - args: - pattern: '/code/app/resources/netcdfs/e-obs-ensemble-{ensemble}-var_{var}-resolution_{resolution}-version_{version}.0e.nc' - path: '/code/app/resources/netcdfs/e-obs-ensemble-mean-var_*.nc' - field_id: '{__ddsapi_name}' - delay_read_cubes: false - metadata_caching: true - metadata_cache_path: 'e-obs.cache' - - diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.1-version_v20.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.1-version_v20.0e.nc deleted file mode 100644 index e741bbc..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.1-version_v20.0e.nc and /dev/null differ diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.25-version_v20.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.25-version_v20.0e.nc deleted file mode 100644 index e363110..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_rr-resolution_0.25-version_v20.0e.nc and /dev/null differ diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v20.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v20.0e.nc deleted file mode 100644 index 80a2810..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v20.0e.nc and /dev/null differ diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v21.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v21.0e.nc deleted file mode 100644 index 66b7657..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.1-version_v21.0e.nc and /dev/null differ diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v20.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v20.0e.nc deleted file mode 100644 index d85ff9c..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v20.0e.nc and /dev/null differ diff --git a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v21.0e.nc b/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v21.0e.nc deleted file mode 100644 index 37fd10c..0000000 Binary files a/resources/netcdfs/e-obs-ensemble-mean-var_tg-resolution_0.25-version_v21.0e.nc and /dev/null differ diff --git a/resources/scripts/prepare_catalog_entry.py b/resources/scripts/prepare_catalog_entry.py deleted file mode 100644 index 3d60055..0000000 --- a/resources/scripts/prepare_catalog_entry.py +++ /dev/null @@ -1,323 +0,0 @@ -from typing import Optional -from datetime import datetime - -from pydantic import BaseModel, AnyHttpUrl, FileUrl, validator, root_validator -import yaml - -STEPS = 11 - - -class Contact(BaseModel): - name: Optional[str] - email: Optional[str] - webpage: Optional[str] = None # AnyHttpUrl - - @root_validator(pre=True) - def match_contact(cls, values): - print(f"Step 8/{STEPS}: Defining contact person") - return values - - @validator("name", pre=True, always=True) - def match_name(cls, _, values): - return input(f"Step 8.1: Name of the contact person: ") - - @validator("email", pre=True, always=True) - def match_email(cls, _, values): - return input(f"Step 8.2: Email of the contact person: ") - - @validator("webpage", pre=True, always=True) - def match_webpage(cls, _, values): - if url := input(f"Step 8.3: Webpage (optional): ").strip() == "": - return None - return url - - -class License(BaseModel): - name: Optional[str] - webpage: Optional[str] # AnyHttpUrl - - @root_validator(pre=True) - def match_license(cls, values): - print(f"Step 9/{STEPS}: Defining license") - return values - - @validator("name", pre=True, always=True) - def match_name(cls, _, values): - return input(f"Step 9.1: Name of the license: ") - - @validator("webpage", pre=True, always=True) - def match_webpage(cls, _, values): - return input(f"Step 9.2: Webpage of the license: ") - - -class RelatedData(BaseModel): - name: Optional[str] - webpage: Optional[str] # AnyHttpUrl - - @validator("name", pre=True, always=True) - def match_name(cls, _, values): - return input(f"Step 10.1: Name of the related data: ") - - @validator("webpage", pre=True, always=True) - def match_webpage(cls, _, values): - return input(f"Step 10.2: Webpage of the related data: ") - - -class Metadata(BaseModel): - dataset_id: Optional[str] - description: Optional[str] - attribution: Optional[str] - label: Optional[str] - image: Optional[str] # FileUrl - doi: Optional[str] - publication_date: Optional[str] - contact: Optional[Contact] - license: Optional[License] - related_data: Optional[list[RelatedData]] - - @validator("dataset_id", pre=True, always=True) - def match_dataset_id(cls, _, values): - while True: - dataset_id = input( - f"Step 1/{STEPS}: What is the name of the dataset (no" - " whitspaces)? " - ) - for letter in dataset_id: - if letter.isspace(): - print("Dataset id cannot have whitespaces") - break - else: - if dataset_id.strip() != "": - return dataset_id - - @validator("description", pre=True, always=True) - def match_desc(cls, _, values): - return input(f"Step 2/{STEPS}: Dataset description: ") - - @validator("attribution", pre=True, always=True) - def match_attr(cls, _, values): - return input(f"Step 3/{STEPS}: Dataset attribution: ") - - @validator("label", pre=True, always=True) - def match_label(cls, _, values): - while True: - lab = input(f"Step 4/{STEPS}: Dataset label [<100 characters]: ") - if len(lab) >= 100: - print( - f"Label of size {len(lab)} is too long. It should be <100" - ) - continue - return lab - - @validator("image", pre=True, always=True) - def match_img(cls, _, values): - return input(f"Step 5/{STEPS}: Dataset image URL: ") - - @validator("doi", pre=True, always=True) - def match_doi(cls, _, values): - if ( - doi := input(f"Step 6/{STEPS}: Dataset DOI (optional): ").strip() - ) == "": - return None - return doi - - @validator("publication_date", pre=True, always=True) - def match_pub_date(cls, _, values): - while True: - pub_date = input( - f"Step 7/{STEPS}: Publication date (YYYY-MM-DD): " - ) - try: - return datetime.strptime(pub_date, "%Y-%m-%d").strftime( - "%Y-%m-%d" - ) - except ValueError as err: - print(err) - - @validator("contact", pre=True, always=True) - def match_contact(cls, _, values): - return Contact() - - @validator("license", pre=True, always=True) - def match_license(cls, _, values): - return License() - - @validator("related_data", pre=True, always=True) - def match_rel_data(cls, _, values): - print(f"Step 10/{STEPS}: Defining related data") - while True: - rel_data_nbr = input( - f"Step 10.0/{STEPS}: How many related data would you like to" - " define? " - ) - rel_data = [] - try: - rel_data_nbr = int(rel_data_nbr) - except ValueError: - print("You should pass a number!") - else: - break - for i in range(1, rel_data_nbr + 1): - breakpoint() - print(f"Related dataset {i}/{rel_data_nbr}") - rel_data.append(RelatedData()) - return rel_data - - -class XarrayKwrgs(BaseModel): - parallel: Optional[bool] = True - decode_coords: Optional[str] = "all" - chunks: Optional[dict] = None - - -class Args(BaseModel): - path: Optional[str] - delay_read_cubes: Optional[bool] = False - field_id: Optional[str] = None - metadata_caching: Optional[bool] = True - metadata_cache_path: Optional[str] = None - chunks: Optional[dict] - mapping: Optional[dict] - xarray_kwargs: Optional[XarrayKwrgs] = None - - @root_validator(pre=True) - def match_root(cls, values): - print( - f"Step 11.6: Defining arguments !! Leave default if you don't" - f" know !! " - ) - return values - - @validator("path", pre=True, always=True) - def match_path(cls, _, values): - while True: - path = input( - f"Step 11.6.1: Path (use glob patterns if required!) " - ) - if path.strip() != "": - return path - - @validator("xarray_kwargs", pre=True, always=True) - def match_xarray_kwargs(cls, _, values): - return - - -class ProdMetadata(BaseModel): - role: Optional[str] - - @validator("role", pre=True, always=True) - def match_role(cls, _, values): - role = ( - input(f"Step 11.3: Product role (optional) [default: public]: ") - .lower() - .strip() - ) - if role == "": - return "public" - return role - - -class Product(BaseModel): - product_id: Optional[str] - description: Optional[str] - metadata: Optional[ProdMetadata] - maximum_query_size_gb: Optional[float] = 10 - driver: Optional[str] = "geokube_netcdf" - args: Optional[Args] - - @validator("product_id", pre=True, always=True) - def match_product_id(cls, _, values): - while True: - dataset_id = input( - f"Step 11.1: What is the name of the product (no whitspaces)? " - ) - if dataset_id.strip() != "": - return dataset_id - - @validator("description", pre=True, always=True) - def match_desc(cls, _, values): - return input(f"Step 11.2: Product description: ") - - @validator("metadata", pre=True, always=True) - def match_role(cls, _, values): - print("Step 11.3: Product metadata") - return ProdMetadata() - - @validator("maximum_query_size_gb", pre=True, always=True) - def match_query_limit(cls, _, values): - query_limit = ( - input( - f"Step 11.4: Maximum query size in GB (optional) [default: 10" - f" GB]: " - ) - .lower() - .strip() - ) - if query_limit == "": - return 10 - try: - query_limit = float(query_limit) - except ValueError: - print("Query limit should be a number!") - else: - return query_limit - - @validator("driver", pre=True, always=True) - def match_driver(cls, _, values): - driver = ( - input( - f"Step 11.5: Driver to use. !! Leave default if you don't" - f" know!! [default: geokube_netcdf]: " - ) - .lower() - .strip() - ) - if driver == "": - return "geokube_netcdf" - return driver - - @validator("args", pre=True, always=True) - def match_args(cls, _, values): - return Args() - - -class CatalogEntry(BaseModel): - metadata: Optional[Metadata] - sources: Optional[dict[str, Product]] - - @validator("metadata", pre=True, always=True) - def match_metadata(cls, _, values): - return Metadata() - - @validator("sources", pre=True, always=True) - def match_sources(cls, _, values): - print(f"Step 11/{STEPS}: Defining products") - while True: - prod_nbr = input( - f"Step 11.0: How many products would you like to define? " - ) - try: - prod_nbr = int(prod_nbr) - except ValueError: - print("You should pass a number!") - else: - break - prod_data = {} - for i in range(1, prod_nbr + 1): - print(f"Product {i}/{prod_nbr}") - prod = Product() - prod_data[prod.product_id] = prod - return prod_data - - -if __name__ == "__main__": - print("=== Preparing the new catalog entry .yaml file! ===") - entry = CatalogEntry() - file_name = f"{entry.metadata.dataset_id}.yaml" - with open(file_name, "wt") as file: - yaml.dump(entry.dict(), file) - - print( - f"The catalog entry file '{file_name}' was generated! Now send it to:" - " dds-support@cmcc.it" - ) diff --git a/web/Dockerfile b/web/Dockerfile deleted file mode 100644 index 09ae07a..0000000 --- a/web/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM rg.nl-ams.scw.cloud/dds-production/geokube:v0.2a5 -WORKDIR /code -COPY ./web/requirements.txt /code/requirements.txt -RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -COPY ./utils/wait-for-it.sh /code/wait-for-it.sh -COPY ./datastore /code/app/datastore -COPY ./db/dbmanager /code/db/dbmanager -COPY ./geoquery/ /code/geoquery -COPY ./resources /code/app/resources -COPY ./web/app /code/app -COPY ./web/tests /code/tests -# RUN pytest /code/tests -EXPOSE 80 -# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"] -# if behind a proxy use --proxy-headers -CMD ["uvicorn", "app.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/web/app/access.py b/web/app/access.py deleted file mode 100644 index 2ff3f89..0000000 --- a/web/app/access.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Module responsible for authnetication and authorization functionalities""" -from __future__ import annotations - -import logging -import requests -import jwt - -from db.dbmanager.dbmanager import DBManager - -from .utils import UserCredentials, log_execution_time -from .meta import LoggableMeta -from .exceptions import AuthenticationFailed, UserAlreadyExistError - - -class AccessManager(metaclass=LoggableMeta): - """The component for managing access to data, authentication, and - authorization of a user""" - - _LOG = logging.getLogger("geokube.AccessManager") - - @classmethod - def _decode_jwt(cls, authorization: str) -> dict: - response = requests.get( - "https://auth01.cmcc.it/realms/DDS", timeout=10 - ) - # NOTE: public key 2nd and 3rd lines cannot be indented - keycloak_public_key = f"""-----BEGIN PUBLIC KEY----- -{response.json()['public_key']} ------END PUBLIC KEY-----""" - if not authorization: - cls._LOG.info( - "`authorization` header is empty! using public profile" - ) - return UserCredentials() - token = authorization.split(" ")[-1] - return jwt.decode(token, keycloak_public_key, audience="account") - - @classmethod - def _infer_roles(cls, user: dict) -> list[str]: - if user["email"].endswith("cmcc.it"): - return ["cmcc"] - return ["public"] - - @classmethod - @log_execution_time(_LOG) - def retrieve_credentials_from_jwt(cls, authorization) -> UserCredentials: - """Get credentials based on JWT token or public profile, - if `authorization` header is not provided. - - Parameters - ---------- - authorization : str - Value of a request header with name `Authorization` - - Returns - ------- - user_credentials : UserCredentials - Current user credentials - - Raises - ------- - AuthenticationFailed - If user was not authenticated properly - """ - cls._LOG.debug("getting credentials based on JWT") - user_id = cls._decode_jwt(authorization=authorization)["sub"] - # NOTE: if user with `user_id` is defined with DB, - # we claim authorization was successful - if user_details := DBManager().get_user_details(user_id): - return UserCredentials( - user_id=user_id, user_token=user_details.api_key - ) - cls._LOG.info("no user found for id `%s`", user_id) - raise AuthenticationFailed - - @classmethod - @log_execution_time(_LOG) - def add_user(cls, authorization: str): - """Add user to the database and return generated api key - - Parameters - ---------- - authorization : str - `Authorization` token - - Returns - ------- - user : User - User added to DB - - Raises - ------ - UserAlreadyExistError - Raised if user is already present in the database - """ - user = cls._decode_jwt(authorization=authorization) - if (user_details := DBManager().get_user_details(user["sub"])) is None: - contact_name = " ".join([user["given_name"], user["family_name"]]) - roles = cls._infer_roles(user_details) - return DBManager().add_user( - contact_name=contact_name, - user_id=user["sub"], - roles_names=roles, - ) - raise UserAlreadyExistError diff --git a/web/app/context.py b/web/app/context.py deleted file mode 100644 index b0dddbc..0000000 --- a/web/app/context.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Module contains Context class definition""" -from uuid import UUID, uuid4 - -from fastapi import Request - -from .utils.auth import UserCredentials -from .access import AccessManager -from .exceptions import AuthenticationFailed - - -class Context: - """The class managing execution context of the single request passing - through the Web component. Its attributes are immutable when set to - non-None values. - - Context contains following attributes: - 1. user: UserCredentials - Credentials of the user within the context - 2. rid: UUID - ID of the request passing throught the Web component - - """ - - user: UserCredentials - rid: UUID - - def __init__( - self, - request: Request, - authorization: str = None, - *, - enable_public: bool = False - ): - """Create an instance of the context. - - Parameters - ---------- - request : fastapi.Request - A request object to use - authorization : optional, str, default=`None` - Authorization token in the format `Bearer ...` - enable_public : optional, bool, default=`False` - Flag indicating if public profile is allowed or if authenticated - user is required - - Raises - ------ - AuthenticationFailed - If authorization token was wrong and `enable_public` was set - to `False` - """ - self.user = None - if authorization: - try: - self.define_user(authorization) - except AuthenticationFailed as err: - if enable_public: - self.user = UserCredentials() - else: - raise err - self.rid = uuid4() - - def __delattr__(self, name): - if getattr(self, name, None) is not None: - raise AttributeError("The attribute '{name}' cannot be deleted!") - super().__delattr__(name) - - def __setattr__(self, name, value): - if getattr(self, name, None) is not None: - raise AttributeError( - "The attribute '{name}' cannot modified when not None!" - ) - super().__setattr__(name, value) - - def is_user_defined(self): - """Check if user is defined for the context - - Returns - ------- - user_defined : bool - `True` if user is defined for the context, `False` otherwise - """ - return self.user is not None - - def define_user(self, authorization: str): - """Define the user for the context by means of the 'authorization' header. - If 'authorization' is 'None', defines the public profile. - - Parameters - ---------- - authorization : str - Authorization header - - Raises - ------ - AuthenticationFailed - if authorization token was not associated with the user - """ - self.user = AccessManager.retrieve_credentials_from_jwt(authorization) diff --git a/web/app/exceptions.py b/web/app/exceptions.py deleted file mode 100644 index 6b5c8e9..0000000 --- a/web/app/exceptions.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Module with definitions of exceptions for 'web' component""" - - -class AuthenticationFailed(ValueError): - """User authentication failed""" - - -class GeokubeAPIRequestFailed(RuntimeError): - """Error while sending request to geokube-dds API""" - - -class UserAlreadyExistError(KeyError): - """Given user already exists""" diff --git a/web/app/main.py b/web/app/main.py deleted file mode 100644 index ae9c0ff..0000000 --- a/web/app/main.py +++ /dev/null @@ -1,275 +0,0 @@ -"""Endpoints for `web` component""" -__version__ = "2.0" -import os -from typing import Optional - -from fastapi import FastAPI, Header, HTTPException, Request -from fastapi.middleware.cors import CORSMiddleware -from geoquery.geoquery import GeoQuery - -from aioprometheus import ( - Counter, - Summary, - Gauge, - timer, - inprogress, - count_exceptions, - MetricsMiddleware, -) -from aioprometheus.asgi.starlette import metrics - -from .access import AccessManager -from .models import ListOfDatasets, ListOfRequests -from .requester import GeokubeAPIRequester -from .widget import WidgetFactory -from .exceptions import ( - AuthenticationFailed, - GeokubeAPIRequestFailed, -) -from .context import Context -from .utils.numeric import prepare_estimate_size_message - -app = FastAPI( - title="geokube-dds API for Webportal", - description="REST API for DDS Webportal", - version=__version__, - contact={ - "name": "geokube Contributors", - "email": "geokube@googlegroups.com", - }, - license_info={ - "name": "Apache 2.0", - "url": "https://www.apache.org/licenses/LICENSE-2.0.html", - }, - root_path=os.environ.get("ENDPOINT_PREFIX", "/web"), - on_startup=[GeokubeAPIRequester.init], -) - -# ======== CORS ========= # -if "ALLOWED_CORS_ORIGINS_REGEX" in os.environ: - cors_kwargs = { - "allow_origin_regex": os.environ["ALLOWED_CORS_ORIGINS_REGEX"] - } -else: - cors_kwargs = {"allow_origins": ["*"]} - -app.add_middleware( - CORSMiddleware, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - **cors_kwargs, -) - -# ======== Prometheus metrics ========= # -app.add_middleware(MetricsMiddleware) -app.add_route("/metrics", metrics) - -app.state.web_request_duration_seconds = Summary( - "web_request_duration_seconds", "Request duration" -) -app.state.web_http_requests_total = Counter( - "web_http_requests_total", "Total number of requests" -) -app.state.web_exceptions_total = Counter( - "web_exceptions_total", "Total number of exception raised" -) -app.state.web_requests_inprogress_total = Gauge( - "web_requests_inprogress_total", "Endpoints being currently in progress" -) - -# ======== Endpoints definitions ========= # -@app.get("/") -async def dds_info(): - """Return current version of the DDS API for the Webportal""" - return f"DDS Webportal API {__version__}" - - -@app.get("/datasets") -@timer( - app.state.web_request_duration_seconds, labels={"route": "GET /datasets"} -) -@count_exceptions( - app.state.web_exceptions_total, labels={"route": "GET /datasets"} -) -@inprogress( - app.state.web_requests_inprogress_total, labels={"route": "GET /datasets"} -) -async def get_datasets( - request: Request, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Get list of eligible datasets for the home page of the Webportal""" - app.state.web_http_requests_total.inc({"type": "GET /datasets"}) - try: - context = Context(request, authorization, enable_public=True) - datasets = GeokubeAPIRequester.get(url="/datasets", context=context) - except GeokubeAPIRequestFailed as err: - raise HTTPException(status_code=400, detail=str(err)) from err - else: - return ListOfDatasets.from_details(datasets) - - -@app.get("/datasets/{dataset_id}/{product_id}") -@timer( - app.state.web_request_duration_seconds, - labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, -) -@count_exceptions( - app.state.web_exceptions_total, - labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, -) -@inprogress( - app.state.web_requests_inprogress_total, - labels={"route": "GET /datasets/{dataset_id}/{product_id}"}, -) -async def get_details_product( - request: Request, - dataset_id: str, - product_id: str, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Get details for Webportal""" - app.state.web_http_requests_total.inc( - {"type": "GET /datasets/{dataset_id}/{product_id}"} - ) - try: - context = Context(request, authorization, enable_public=True) - details = GeokubeAPIRequester.get( - url=f"/datasets/{dataset_id}/{product_id}", context=context - ) - except GeokubeAPIRequestFailed as err: - raise HTTPException(status_code=400, detail=str(err)) from err - else: - return WidgetFactory(details).widgets - - -@app.post("/datasets/{dataset_id}/{product_id}/execute") -@timer( - app.state.web_request_duration_seconds, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, -) -@count_exceptions( - app.state.web_exceptions_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, -) -@inprogress( - app.state.web_requests_inprogress_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/execute"}, -) -async def execute( - request: Request, - dataset_id: str, - product_id: str, - query: GeoQuery, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Schedule the job of data retrieving by using geokube-dds API""" - app.state.web_http_requests_total.inc( - {"route": "POST /datasets/{dataset_id}/{product_id}/execute"} - ) - try: - context = Context(request, authorization, enable_public=False) - response = GeokubeAPIRequester.post( - url=f"/datasets/{dataset_id}/{product_id}/execute", - data=query.json(), - context=context, - ) - except AuthenticationFailed as err: - raise HTTPException( - status_code=401, detail="User could not be authenticated" - ) from err - except GeokubeAPIRequestFailed as err: - raise HTTPException(status_code=400, detail=str(err)) from err - else: - return response - - -@app.post("/datasets/{dataset_id}/{product_id}/estimate") -@timer( - app.state.web_request_duration_seconds, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, -) -@count_exceptions( - app.state.web_exceptions_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, -) -@inprogress( - app.state.web_requests_inprogress_total, - labels={"route": "POST /datasets/{dataset_id}/{product_id}/estimate"}, -) -async def estimate( - request: Request, - dataset_id: str, - product_id: str, - query: GeoQuery, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Estimate the resulting size of the query by using geokube-dds API""" - app.state.web_http_requests_total.inc( - {"route": "POST /datasets/{dataset_id}/{product_id}/estimate"} - ) - try: - context = Context(request, authorization, enable_public=True) - response = GeokubeAPIRequester.post( - url=f"/datasets/{dataset_id}/{product_id}/estimate?unit=GB", - data=query.json(), - context=context, - ) - metadata = GeokubeAPIRequester.get( - url=f"/datasets/{dataset_id}/{product_id}/metadata", - context=context, - ) - except GeokubeAPIRequestFailed as err: - raise HTTPException(status_code=400, detail=str(err)) from err - else: - return prepare_estimate_size_message( - maximum_allowed_size_gb=metadata.get("maximum_query_size_gb", 10), - estimated_size_gb=response.get("value"), - ) - - -# TODO: !!!access should be restricted!!! -@app.get("/get_api_key") -async def get_api_key( - request: Request, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Get API key for a user with the given `Authorization` token. - Adds user to DB and generates api key, if user is not found.""" - try: - context = Context(request, authorization, enable_public=False) - except AuthenticationFailed: - AccessManager.add_user(authorization=authorization) - context = Context(request, authorization, enable_public=False) - else: - return {"key": f"{context.user.id}:{context.user.key}"} - - -@app.get("/requests") -@timer( - app.state.web_request_duration_seconds, labels={"route": "GET /requests"} -) -async def get_requests( - request: Request, - authorization: Optional[str] = Header(None, convert_underscores=True), -): - """Get requests for a user the given Authorization token""" - app.state.web_http_requests_total.inc({"type": "GET /requests"}) - try: - context = Context(request, authorization, enable_public=False) - response_json = GeokubeAPIRequester.get( - url="/requests", context=context - ) - except AuthenticationFailed as err: - raise HTTPException( - status_code=401, detail="User could not be authenticated" - ) from err - except GeokubeAPIRequestFailed as err: - raise HTTPException(status_code=400, detail=str(err)) from err - else: - requests = ListOfRequests(data=response_json) - requests.add_requests_url_prefix( - os.environ.get("DOWNLOAD_PREFIX", GeokubeAPIRequester.API_URL) - ) - return requests diff --git a/web/app/meta.py b/web/app/meta.py deleted file mode 100644 index dab4cc8..0000000 --- a/web/app/meta.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Module with `LoggableMeta` metaclass""" -import os -import logging - - -class LoggableMeta(type): - """Metaclass for dealing with logger levels and handlers""" - - def __new__(cls, child_cls, bases, namespace): - # NOTE: method is called while creating a class, not an instance! - res = super().__new__(cls, child_cls, bases, namespace) - if hasattr(res, "_LOG"): - format_ = os.environ.get( - "LOGGING_FORMAT", - "%(asctime)s %(name)s %(levelname)s %(lineno)d %(message)s", - ) - formatter = logging.Formatter(format_) - logging_level = os.environ.get("LOGGING_LEVEL", "INFO") - res._LOG.setLevel(logging_level) - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - stream_handler.setLevel(logging_level) - res._LOG.addHandler(stream_handler) - for handler in logging.getLogger("geokube").handlers: - handler.setFormatter(formatter) - return res diff --git a/web/app/models.py b/web/app/models.py deleted file mode 100644 index c6737ad..0000000 --- a/web/app/models.py +++ /dev/null @@ -1,357 +0,0 @@ -"""Module containing utils classes for view data for the Webportal""" -import os -import json -import logging -from enum import Enum -from typing import Any, ClassVar, Optional, Union -from datetime import date, datetime, timedelta -import numpy as np - -from pydantic import ( - BaseModel, - AnyHttpUrl, - HttpUrl, - validator, - root_validator, - Field as PydanticField, -) -from db.dbmanager.dbmanager import RequestStatus - - -class RequestStatusDTO(Enum): - """DTO enum for request statuses""" - - Completed = "DONE" - Running = "RUNNING" - Pending = "PENDING" - Failed = "FAILED" - Timeout = "TIMEOUT" - - -class Contact(BaseModel): - """Contact DTO of dataset metadata""" - - name: str - email: str - webpage: str # some products have wrong webpage, HttpUrl - - -class License(BaseModel): - """License DTO of dataset metadata""" - - name: str - url: Optional[ - str - ] = None # some products have wrong webpage, Optional[HttpUrl] = None - - -class ProdId(BaseModel): - """Product short DTO of dataset metadata""" - - id: str - description: Optional[str] = "" - - @root_validator(pre=True) - def preprocess_load(cls, values): - if "description" not in values or values["description"] is None: - values["description"] = values["id"] - return values - - -class DatasetMetadata(BaseModel): - """Dataset metadata DTO with information about name, - default product, description, etc.""" - - id: str - default: Optional[str] = None - description: Optional[str] = "" - label: Optional[str] = "" - image: Optional[HttpUrl] = "" - attribution: Optional[str] = "" - update_frequency: Optional[str] = "" - doi: Optional[HttpUrl] = None - publication_date: Optional[date] = None - contact: Contact - license: Optional[License] = None - products: list[ProdId] - - @validator("publication_date", pre=True) - def parse_publication_date(cls, value): - if isinstance(value, str): - return datetime.strptime(value, "%Y-%m-%d").date() - return value - - @root_validator(pre=True) - def preprocess_products(cls, values): - prods = [ - {"id": prod_key, "description": prod.get("description")} - for prod_key, prod in values["products"].items() - ] - if values.get("default") is None and len(prods) > 0: - values["metadata"]["default"] = prods[0]["id"] - return dict(products=prods, **values["metadata"]) - - -class ListOfDatasets(BaseModel): - """List of datasets DTO representing output for /datasets request""" - - version: Optional[str] = "v1" - status: Optional[str] = "OK" - data: list[DatasetMetadata] - - @classmethod - def from_details(cls, details): - """Create a list of datasets based on details dict - - Parameters - ---------- - details : dict - A dictionary representing the details. It should - be in the form: - { - version: "...", - status: "...", - data: [ - { - id: "...", - default: "...", - ... - } - ] - } - """ - return cls(data=details) - - -class Filter(BaseModel): - """Filter DTO of product metadata""" - - name: Optional[str] = None - user_defined: Optional[bool] = True - label: Optional[str] = None - - @root_validator(pre=True) - def match_label(cls, values): - if "label" not in values or values["label"] is None: - values["label"] = values["name"] - return values - - @validator("user_defined", pre=True) - def maybe_cast_user_defined(cls, value): - if isinstance(value, str): - return value.lower() in ["t", "true", "yes", "y"] - elif isinstance(value, bool): - return value - else: - raise TypeError - - -class Coordinate(BaseModel, arbitrary_types_allowed=True): - """DTO for single coordinate""" - - name: str - axis: str - min: Union[float, datetime] - max: Union[float, datetime] - values: Optional[Any] = None - time_unit: Optional[str] = None - time_step: Optional[float] = None - label: Optional[str] = None - units: Optional[str] = None - - @validator("label", always=True) - def match_label(cls, value, values): - if value is None: - return values["axis"].capitalize() - return value - - -class Domain(BaseModel): - """Domain DTO of the kube. It contains cooridnate - reference system and coordinates""" - - crs: dict[str, Any] - coordinates: dict[str, Coordinate] - - @validator("coordinates", pre=True) - def match_coords(cls, value): - if isinstance(value, dict): - return { - item["axis"].lower(): dict(**item, name=key) - for key, item in value.items() - } - return value - - -class Field(BaseModel): - """Single field DTO of the kube""" - - name: str - description: Optional[str] = None - - @root_validator(pre=True) - def match_description(cls, values): - if "description" not in values or values["description"] is None: - values["description"] = values["name"] - return values - - -class Kube(BaseModel): - """Single Kube DTO - a domain and a list of fields""" - - domain: Domain - fields: list[Field] - - @validator("fields", pre=True) - def parse_field(cls, value): - return [ - {"name": fieild_key, "description": field.get("description")} - for fieild_key, field in value.items() - ] - - -class DatasetRow(BaseModel): - """DTO contatining attributes and associated datacube""" - - attributes: dict[str, str] - datacube: Optional[Kube] - - -class ProductMetadata(BaseModel): - """Product metadata DTO""" - - catalog_dir: str - filters: Optional[dict[str, Filter]] = PydanticField(default_factory=dict) - role: Optional[str] = "public" - - @validator("filters", pre=True) - def match_filters(cls, filters): - if isinstance(filters, dict): - return filters - if isinstance(filters, list): - return {item["name"]: item for item in filters} - raise TypeError - - -class Product(BaseModel): - """Product DTO""" - - _SUPPORTED_FORMATS_LABELS: ClassVar[dict[str, str]] = { - "grib": "GRIB", - "pickle": "PICKLE", - "netcdf": "netCDF", - "geotiff": "geoTIFF", - } - id: str - data: list[Union[DatasetRow, Kube]] - metadata: ProductMetadata - description: Optional[str] = None - dataset: DatasetMetadata - - @validator("data", pre=True) - def match_data_list(cls, value): - if not isinstance(value, list): - return [value] - return value - - @validator("description", always=True) - def match_description(cls, value, values): - if value is None: - return values["id"] - return value - - -class WidgetsCollection(BaseModel): - """DTO including all information required by the Web Portal to render - datasets""" - - version: Optional[str] = "v1" - status: Optional[str] = "OK" - id: str - label: str - dataset: DatasetMetadata - widgets: list[dict] - widgets_order: list[str] - - -class Request(BaseModel): - """Single request DTO for Web portal""" - - request_id: str - dataset: str - product: str - request_json: dict - submission_date: datetime - end_date: Optional[datetime] = None - duration: Optional[str] = None - size: Optional[int] = None - url: Optional[str] = None - status: str - - @root_validator(pre=True) - def match_keys(cls, values): - values["request_json"] = json.loads(values.pop("query", "{}")) - values["submission_date"] = values.pop("created_on", None) - values["status"] = RequestStatusDTO( - RequestStatus(values["status"]).name - ).name - if download := values.get("download"): - values["url"] = download.get("download_uri") - values["end_date"] = download.get("created_on") - values["size"] = download.get("size_bytes") - return values - - @validator("duration", pre=True, always=True) - def match_duration(cls, value, values): - if last_update := values.get("end_date"): - # NOTE: we remove microseconds parts (after dot) - # NOTE: we add 1 second to get rid of 00:00:00 - return str( - (last_update - values["submission_date"]) - + timedelta(seconds=1) - ).split(".", maxsplit=1)[0] - return value - - def add_url_prefix(self, prefix): - """Add inplace prefix to the URL in the following way: - resulting url = prefix + base url - - Parameters - ------- - prefix : str - Prefix to add to the URL - """ - if self.url: - self.url = "/".join([prefix, self.url]) - else: - self.url = None - - -class ListOfRequests(BaseModel): - """DTO for list of requests""" - - version: Optional[str] = "v1" - status: Optional[str] = "OK" - data: Optional[list[Request]] - - @validator("data") - def sort_data(cls, value): - if value is not None and len(value) > 0: - return sorted( - value, - key=lambda request: request.submission_date, - reverse=True, - ) - return value - - def add_requests_url_prefix(self, prefix: str): - """Add inplace prefix to URL of each Request in 'data' attribute - by calling Request.add_url_prefix method - - Parameters - ------- - prefix : str - Prefix to add to all Requests URL - """ - for req in self.data: - req.add_url_prefix(prefix) diff --git a/web/app/requester.py b/web/app/requester.py deleted file mode 100644 index 847e695..0000000 --- a/web/app/requester.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Module containing utils for geokube-dds API accessing""" -from __future__ import annotations -import os -import logging -import requests - -from .utils import UserCredentials, log_execution_time -from .meta import LoggableMeta -from .exceptions import GeokubeAPIRequestFailed -from .context import Context - - -class GeokubeAPIRequester(metaclass=LoggableMeta): - """The class handling requests to geokube dds API""" - - _LOG = logging.getLogger("GeokubeAPIRequester") - API_URL: str = None - _IS_INIT: bool = False - - @classmethod - def init(cls): - """Initialize class with API URL""" - cls.API_URL = os.environ.get("API_URL", "https://ddshub.cmcc.it/api") - cls._LOG.info( - "'API_URL' environment variable collected: %s", cls.API_URL - ) - cls._IS_INIT = True - - @staticmethod - def _get_http_header_from_user_credentials( - user_credentials: UserCredentials | None = None, - ): - if user_credentials is not None and user_credentials.id is not None: - return { - "User-Token": ( - f"{user_credentials.id}:{user_credentials.user_token}" - ) - } - return {} - - @classmethod - def _prepare_headers(cls, context: Context): - headers = { - "Content-Type": "application/json", - "DDS-Request-Id": str(context.rid), - } - headers.update( - GeokubeAPIRequester._get_http_header_from_user_credentials( - context.user - ) - ) - return headers - - @classmethod - @log_execution_time(_LOG) - def post(cls, url: str, data: str, context: Context): - """ - Send POST request to geokube-dds API - - Parameters - ---------- - url : str - Path to which the query should be send. It is created as - f"{GeokubeAPIRequester.API_URL{url}" - data : str - JSON payload of the request - context : Context - Context of the http request - - Returns - ------- - response : str - Response from geokube-dds API - - Raises - ------- - GeokubeAPIRequestFailed - If request failed due to any reason - """ - assert cls._IS_INIT, "GeokubeAPIRequester was not initialized!" - target_url = f"{cls.API_URL}{url}" - headers = cls._prepare_headers(context) - cls._LOG.debug("sending POST request to %s", target_url) - cls._LOG.debug("payload of the POST request: %s", data) - response = requests.post( - target_url, - data=data, - headers=headers, - timeout=int(os.environ.get("API_TIMEOUT", 20)), - ) - if response.status_code != 200: - raise GeokubeAPIRequestFailed( - response.json().get( - "detail", "Request to geokube-dds API failed!" - ) - ) - if "application/json" in response.headers.get("Content-Type", ""): - return response.json() - return response.text() - - @classmethod - @log_execution_time(_LOG) - def get( - cls, - url: str, - context: Context, - timeout: int = 10, - ): - """ - Send GET request to geokube-dds API - - Parameters - ---------- - url : str - Path to which the query should be send. It is created as - f"{GeokubeAPIRequester.API_URL}{url}" - context : Context - Context of the http request - timeout : int, default=10 - Request timout in seconds - - Returns - ------- - response : str - Response from geokube-dds API - - Raises - ------- - GeokubeAPIRequestFailed - If request failed due to any reason - """ - assert cls._IS_INIT, "GeokubeAPIRequester was not initialized!" - target_url = f"{cls.API_URL}{url}" - headers = cls._prepare_headers(context) - cls._LOG.debug("sending GET request to %s", target_url) - response = requests.get( - target_url, - headers=headers, - timeout=int(os.environ.get("API_TIMEOUT", timeout)), - ) - if response.status_code != 200: - cls._LOG.info( - "request to geokube-dds API failed due to: %s", response.text - ) - raise GeokubeAPIRequestFailed( - response.json().get( - "detail", "Request to geokube-dds API failed!" - ) - ) - return response.json() diff --git a/web/app/utils/__init__.py b/web/app/utils/__init__.py deleted file mode 100644 index 7ba404d..0000000 --- a/web/app/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .numeric import maybe_round_value -from .auth import UserCredentials -from .execution import log_execution_time diff --git a/web/app/utils/auth.py b/web/app/utils/auth.py deleted file mode 100644 index a007c1d..0000000 --- a/web/app/utils/auth.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Module with utils related to authentication and authorization""" -from typing import Optional - -from pydantic import BaseModel, UUID4 - - -class UserCredentials(BaseModel): - """Class containing current user credentials, including ID and token""" - - user_id: Optional[UUID4] = None - user_token: Optional[str] = None - - @property - def is_public(self) -> bool: - """Get information if a user uses public profile - - Returns - ------- - public_flag : bool - `True` if user uses public profile, `False` otherwise - """ - return self.user_id is None - - @property - def id(self) -> UUID4: - """Get user ID. - - Returns - ------- - user_id : UUID - User ID - """ - return self.user_id - - @property - def key(self) -> str: - """Get user API token. - - Returns - ------- - user_token : str - User API token - """ - return self.user_token - - def __eq__(self, other): - if not isinstance(other, UserCredentials): - return False - if self.id == other.id and self.key == other.key: - return True - return False - - def __ne__(self, other): - return self != other - - def __repr__(self): - return ( - f"" - ) diff --git a/web/app/utils/execution.py b/web/app/utils/execution.py deleted file mode 100644 index 636479d..0000000 --- a/web/app/utils/execution.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Module with execution utils""" -from inspect import iscoroutinefunction -from functools import wraps -import time -import logging - - -def log_execution_time(logger: logging.Logger): - """Decorator logging execution time of the method or function (both sync and async) - """ - - def inner(func): - @wraps(func) - def wrapper_sync(*args, **kwds): - exec_start_time = time.monotonic() - try: - return func(*args, **kwds) - finally: - # NOTE: maybe logging should be on DEBUG level - logger.info( - "execution of '%s' function from '%s' package took %.4f" - " seconds", - func.__name__, - func.__module__, - time.monotonic() - exec_start_time, - ) - - @wraps(func) - async def wrapper_async(*args, **kwds): - exec_start_time = time.monotonic() - try: - return await func(*args, **kwds) - finally: - # NOTE: maybe logging should be on DEBUG level - logger.info( - "execution of '%s' function from '%s' package took %.4f" - " seconds", - func.__name__, - func.__module__, - time.monotonic() - exec_start_time, - ) - - return wrapper_async if iscoroutinefunction(func) else wrapper_sync - - return inner diff --git a/web/app/utils/numeric.py b/web/app/utils/numeric.py deleted file mode 100644 index 3e286cb..0000000 --- a/web/app/utils/numeric.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Module with utils for number handling""" -from numbers import Number - - -def maybe_round_value(item, decimals=2): - """Round a number to take number decimal digits indicated by 'decimals' argument. - It 'item' is not float or int, returns original value - - Parameters - ---------- - item : Any - Item to be rounded (if number) - decimals : int, default=2 - Number of decimal places - - Returns - ------- - item : Any - Rounded value (if number) or original value otherwise - """ - return round(item, decimals) if isinstance(item, Number) else item - - -def prepare_estimate_size_message( - maximum_allowed_size_gb: float, estimated_size_gb: float -): - """Prepare estimate size and maximum allowed size into the message - expected by the Webportal - - Parameters - ---------- - maximum_allowed_size_gb : float - Maximum allowed size in gigabytes - estimated_size_gb : float - Estimated size in gigabytes - Returns - ------- - message : dict - A dicitonary with keys `status` and `message` - """ - status = "OK" - if estimated_size_gb is None: - status = "Error" - msg = "Could not estimate the size for that dataset" - if estimated_size_gb == 0.0: - status = "Error" - msg = "Resulting dataset is empty" - else: - estimated_size_gb = maybe_round_value(estimated_size_gb) - maximum_allowed_size_gb = maybe_round_value(maximum_allowed_size_gb) - if estimated_size_gb > maximum_allowed_size_gb: - status = "Error" - msg = ( - f"Estimated request size ({estimated_size_gb} GB) is more than" - f" maximum allowed size ({maximum_allowed_size_gb} GB). Please" - " review your query" - ) - elif estimated_size_gb < 0.01: - msg = "Estimated request size: <0.01 GB" - else: - msg = f"Estimated request size: {estimated_size_gb} GB" - return {"status": status, "message": msg} diff --git a/web/app/widget.py b/web/app/widget.py deleted file mode 100644 index f2edabd..0000000 --- a/web/app/widget.py +++ /dev/null @@ -1,582 +0,0 @@ -"""Module with tools for widgets management""" -import logging -from collections import defaultdict, OrderedDict -from datetime import datetime, timedelta - -import numpy as np -from pydantic import validate_arguments - -from .meta import LoggableMeta -from .utils import log_execution_time, maybe_round_value -from .models import Product, WidgetsCollection, Kube - - -def min_max_dict(min=np.inf, max=-np.inf): - """Create a default dictionary with 'min' and 'max' keys""" - return {"min": min, "max": max} - - -class Widget: - """Class representing a single Widget in the Webportal""" - - def __init__( - self, - wname, - wlabel, - wrequired, - wparameter, - wtype, - wdetails=None, - whelp=None, - winfo=None, - ): - self.__data = { - "name": str(wname), - "label": str(wlabel), - "required": bool(wrequired), - "parameter": str(wparameter) if wparameter is not None else None, - "type": str(wtype), - "details": wdetails, - "help": whelp, - "info": winfo, - } - - def __getitem__(self, key): - return self.__data[key] - - def to_dict(self): - """Return dictionary representation of a Widget - - Returns - ------- - widget_dict - Dictionary with keys being attributes of a Widget object - """ - return self.__data.copy() - - @classmethod - def from_dict(cls, data): - """Construct Widget object based on the provided dictionary - - Parameters - ---------- - data : dict - Dict representing attributes of a Widget - - Returns - ------- - widget - Widget object - """ - return Widget(**data) - - -class WidgetFactory(metaclass=LoggableMeta): - """Class which prepares widgets for Web portal""" - - _LOG = logging.getLogger("Widget") - _MAIN_COORDS = {"time", "latitude", "longitude"} - _NUMBER_OF_DECIMALS = 2 - - @log_execution_time(_LOG) - @validate_arguments - def __init__(self, product: Product): - self._LOG.debug("provided filters: %s", product.metadata.filters) - self._d = product - self._wid = [] - self._wid_order = [] - self._compute_variable_widget() - self._compute_attrs_widgets() - self._compute_temporal_widgets() - self._compute_spatial_widgets() - self._compute_auxiliary_coords_widgets() - self._compute_format_widget() - - def _is_for_skipping(self, name): - self._LOG.debug("checking if '%s' should be skipped", name) - if (flt := self._d.metadata.filters.get(name)) is not None: - self._LOG.debug( - "should '%s' be skipped - %s ", name, not flt.user_defined - ) - return not flt.user_defined - self._LOG.debug("filter for '%s' was not found. retaining", name) - return False - - def _maybe_get_label(self, name, default=None): - self._LOG.debug("checking label for '%s'", name) - if (flt := self._d.metadata.filters.get(name)) is not None: - self._LOG.debug("found label %s for '%s' - ", flt.label, flt.name) - return flt.label - self._LOG.debug("filter for '%s' was not found", name) - return default if default is not None else name - - @property - @log_execution_time(_LOG) - def widgets(self) -> dict: - """Get the entire collection of Web portal widgets in the predefined - order: - 1. variables widget - 2. attributes widgets - 3. temporal selection widgets - 4. spatial selection widgets - 5. format widgets - - Returns - ------- - widgets : WidgetsCollection - The collection of widgets for Web portal - """ - return WidgetsCollection( - id=self._d.id, - label=self._d.description, - dataset=self._d.dataset, - widgets=self._wid, - widgets_order=self._wid_order, - ) - - def _compute_variable_widget(self, sort_keys: bool = True) -> None: - all_fields = {} - for dr in self._d.data: - if dr is None: - # NOTE: it means, datacube is Delayed object - # we don't care about variables - continue - if isinstance(dr, Kube): - fields = dr.fields - else: - if dr.datacube is None: - # NOTE: it means, datacube is Delayed object - # we don't care about variables - continue - fields = dr.datacube.fields - for field in fields: - if field.name in all_fields: - continue - if self._is_for_skipping(field.name): - continue - all_fields[field.name] = { - "value": field.name, - "label": self._maybe_get_label( - field.name, field.description - ), - } - if not all_fields: - return - if sort_keys: - all_fields = OrderedDict(all_fields) - self._wid.append( - Widget( - wname="variable", - wlabel="Variables", - wrequired=True, - wparameter="variable", - wtype="StringList", - wdetails={"values": list(all_fields.values())}, - ).to_dict() - ) - self._wid_order.append("variable") - - def _compute_attrs_widgets(self, sort_keys: bool = True) -> None: - attrs_opts = defaultdict(set) - for dr in self._d.data: - if isinstance(dr, Kube): - # NOTE: there is only one Kube in this case, - # so we can exit function - return - for att_name, att_val in dr.attributes.items(): - attrs_opts[att_name].add(att_val) - for att_name, att_opts in attrs_opts.items(): - if self._is_for_skipping(att_name): - continue - if (flt := self._d.metadata.filters.get(att_name)) is not None: - label = flt.label - else: - label = att_name - att_opts = list(att_opts) - if sort_keys: - att_opts = sorted(att_opts) - values = [{"value": key, "label": key} for key in att_opts] - wid = Widget( - wname=att_name, - wlabel=label, - wrequired=False, - wparameter=att_name, - wtype="StringChoice", - wdetails={"values": values}, - ) - self._wid.append(wid.to_dict()) - self._wid_order.append(att_name) - - def _compute_temporal_widgets(self) -> None: - temporal_coords = min_max_dict(min=None, max=None) - min_time_step = np.inf - time_unit = None - for dr in self._d.data: - if dr is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - if isinstance(dr, Kube): - coords = dr.domain.coordinates - else: - if dr.datacube is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - coords = dr.datacube.domain.coordinates - if "time" not in coords: - continue - time_coord = coords["time"] - if time_coord.time_step is None: - # NOTE: it means, time axis has just one item - continue - if time_coord.time_step < min_time_step: - min_time_step = time_coord.time_step - time_unit = time_coord.time_unit - - if temporal_coords["max"]: - temporal_coords["max"] = max( - [temporal_coords["max"], time_coord.max] - ) - else: - temporal_coords["max"] = time_coord.max - if temporal_coords["min"]: - temporal_coords["min"] = min( - [temporal_coords["min"], time_coord.min] - ) - else: - temporal_coords["min"] = time_coord.min - - if not (temporal_coords["min"] and temporal_coords["max"]): - return - wid = Widget( - wname="temporal_coverage", - wlabel="Temporal coverage", - wrequired=True, - wparameter=None, - wtype="ExclusiveFrame", - wdetails={"widgets": ["date_list", "date_range"]}, - ) - self._wid.append(wid.to_dict()) - step = ( - timedelta(**{f"{time_unit}s": min_time_step}) - if time_unit in {"day", "hour", "minute"} - else timedelta( - days=min_time_step * (365 if time_unit == "year" else 30) - ) - ) - time_widgets = { - "year": [ - {"label": str(y), "value": str(y)} - for y in range( - temporal_coords["min"].year, - temporal_coords["max"].year + 1, - ) - ] - } - if (time_unit == "month" and min_time_step < 12) or step < timedelta( - days=365 - ): - months = ( - range( - temporal_coords["min"].month, - temporal_coords["max"].month + 1, - ) - if len(time_widgets["year"]) == 1 - else range(1, 13) - ) - time_widgets["month"] = [ - { - "label": datetime.strptime(str(m), "%m").strftime("%B"), - "value": str(m), - } - for m in months - ] - if step < timedelta(days=28): - time_widgets["day"] = [ - {"label": str(d), "value": str(d)} for d in range(1, 32) - ] - if step < timedelta(hours=24): - minute = f"{temporal_coords['min'].minute:02d}" - time_widgets["hour"] = [ - {"label": f"{h:02}", "value": f"{h:02}"} - for h in range(24) - ] - wid = Widget( - wname="date_list", - wlabel="Date", - wrequired=True, - wparameter=None, - wtype="InclusiveFrame", - wdetails={"widgets": list(time_widgets.keys())}, - ) - self._wid.append(wid.to_dict()) - - for freq, time_values in time_widgets.items(): - wid = Widget( - wname=freq, - wlabel=freq.capitalize(), - wrequired=True, - wparameter=f"time:{freq}", - wtype="StringList", - wdetails={"values": time_values}, - ) - self._wid.append(wid.to_dict()) - - t_range = [ - { - "name": "start", - "label": "Start Date", - "range": temporal_coords["min"], - }, - { - "name": "stop", - "label": "End Date", - "range": temporal_coords["max"], - }, - ] - wid = Widget( - wname="date_range", - wlabel="Date range", - wrequired=True, - wparameter="time", - wtype="DateTimeRange", - wdetails={"fields": t_range}, - ) - - self._wid.append(wid.to_dict()) - self._wid_order.append("temporal_coverage") - - def _compute_spatial_widgets(self) -> None: - spatial_coords = defaultdict(min_max_dict) - for dr in self._d.data: - if dr is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - if isinstance(dr, Kube): - coords = dr.domain.coordinates - else: - if dr.datacube is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - coords = dr.datacube.domain.coordinates - if "latitude" not in coords or "longitude" not in coords: - continue - for coord_name in ["latitude", "longitude"]: - spatial_coords[coord_name]["max"] = max( - [ - spatial_coords[coord_name]["max"], - coords[coord_name].max, - ] - ) - spatial_coords[coord_name]["min"] = min( - [ - spatial_coords[coord_name]["min"], - coords[coord_name].min, - ] - ) - if not spatial_coords: - return - - wid = Widget( - wname="spatial_coverage", - wlabel="Spatial coverage", - wrequired=False, - wparameter=None, - wtype="ExclusiveFrame", - wdetails={"widgets": ["area", "location"]}, - ) - self._wid.append(wid.to_dict()) - self._wid_order.append("spatial_coverage") - - area_fields = [ - { - "name": orient, - "label": orient.capitalize(), - "range": round( - spatial_coords[coord][ext], self._NUMBER_OF_DECIMALS - ), - } - for orient, coord, ext in zip( - ("north", "west", "south", "east"), - ("latitude", "longitude") * 2, - ("max", "min", "min", "max"), - ) - ] - wid = Widget( - wname="area", - wlabel="Area", - wrequired=True, - wparameter="area", - wtype="geoarea", - wdetails={"fields": area_fields}, - ) - self._wid.append(wid.to_dict()) - - loc_fields = [ - { - "name": coord, - "label": coord.capitalize(), - "range": [ - round(spatial_coords[coord][ext], self._NUMBER_OF_DECIMALS) - for ext in ("min", "max") - ], - } - for coord in ("latitude", "longitude") - ] - wid = Widget( - wname="location", - wlabel="Location", - wrequired=True, - wparameter="location", - wtype="geolocation", - wdetails={"fields": loc_fields}, - ) - self._wid.append(wid.to_dict()) - - def _get_aux_coord_names(self, all_coords_names): - return list(set(all_coords_names) - self._MAIN_COORDS) - - def _compute_auxiliary_coords_widgets(self) -> None: - aux_coords = defaultdict(dict) - for dr in self._d.data: - if dr is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - if isinstance(dr, Kube): - coords = dr.domain.coordinates - else: - if dr.datacube is None: - # NOTE: it means, datacube is Delayed object - # we don't care about coordinates - continue - coords = dr.datacube.domain.coordinates - if ( - len( - aux_kube_coords_names := self._get_aux_coord_names( - coords.keys() - ) - ) - == 0 - ): - continue - for coord_name in aux_kube_coords_names: - if self._is_for_skipping(coord_name): - continue - - # TODO: `vals` might be 2d. what to do? compute uniqe? - vals = None - if coords[coord_name].values is not None: - vals = np.unique(np.array(coords[coord_name].values)) - try: - vals = vals.astype(np.float) - except ValueError: - self._LOG.info( - "skipping coordinate '%s' - non-castable to float" - " (%s)", - coord_name, - vals, - ) - continue - else: - aux_coords[coord_name]["values"] = sorted(vals) - if "min" in aux_coords[coord_name]: - aux_coords[coord_name]["min"] = min( - [ - aux_coords[coord_name]["min"], - coords[coord_name].min, - ] - ) - else: - aux_coords[coord_name]["min"] = coords[coord_name].min - if "max" in aux_coords[coord_name]: - aux_coords[coord_name]["max"] = max( - [ - aux_coords[coord_name]["max"], - coords[coord_name].max, - ] - ) - else: - aux_coords[coord_name]["max"] = coords[coord_name].max - - aux_coords[coord_name]["label"] = self._maybe_get_label( - coord_name, coords[coord_name].label - ) - aux_coords[coord_name]["name"] = coords[coord_name].name - if not aux_coords: - return - for coord_name, coord_value in aux_coords.items(): - wid = Widget( - wname=coord_name, - wlabel=coord_name.capitalize(), - wrequired=True, - wparameter=None, - wtype="ExclusiveFrame", - wdetails={ - "widgets": [f"{coord_name}_list", f"{coord_name}_range"] - }, - ) - self._wid.append(wid.to_dict()) - self._wid_order.append(coord_name) - - if "values" in coord_value: - values = [ - { - "value": val, - "label": ( - f"{maybe_round_value(val, self._NUMBER_OF_DECIMALS):.2f}" - ), - } - for val in coord_value["values"] - ] - - wid = Widget( - wname=f"{coord_name}_list", - wlabel=coord_name.capitalize(), - wrequired=False, - wparameter=coord_name, - wtype="StringList", - wdetails={"values": values}, - ) - self._wid.append(wid.to_dict()) - if "max" in coord_value and "min" in coord_value: - range_ = [ - { - "name": "start", - "label": f"Min {coord_name}", - "range": coord_value["min"], - }, - { - "name": "stop", - "label": f"Max {coord_name}", - "range": coord_value["max"], - }, - ] - wid = Widget( - wname=f"{coord_name}_range", - wlabel=coord_name.capitalize(), - wrequired=False, - wparameter=coord_name, - wtype="NumberRange", - wdetails={"fields": range_}, - ) - self._wid.append(wid.to_dict()) - - def _compute_format_widget(self) -> None: - wid = Widget( - wname="format", - wlabel="Format", - wrequired=True, - wparameter="format", - wtype="FileFormat", - wdetails={ - "values": [ - {"value": "netcdf", "label": "netCDF", "ext": ".nc"} - ] - }, # TODO: more formats - ) - self._wid.append(wid.to_dict()) - self._wid_order.append("format") diff --git a/web/requirements.txt b/web/requirements.txt deleted file mode 100644 index c094f1f..0000000 --- a/web/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -fastapi -pydantic -uvicorn -intake -pytest -jinja2 -pyjwt==1.7.1 -sqlalchemy -aioprometheus \ No newline at end of file diff --git a/web/tests/resources/dataset_metadata.yaml b/web/tests/resources/dataset_metadata.yaml deleted file mode 100644 index 6457351..0000000 --- a/web/tests/resources/dataset_metadata.yaml +++ /dev/null @@ -1,108 +0,0 @@ -description: >- - The dataset contains dynamically downscaled ERA5 reanalysis, originally - available at ≈31 km x 31 km horizontal resolution, to 2.2 km x 2.2 km. - Dynamical downscaling has been conducted directly for the project - (foreground) through Regional Climate Model (RCM) COSMO5.0_CLM9 e - INT2LM 2.06. The RCM COSMO CLM is currently developed by the CLM-Community, with - which CMCC collaborates since 2008 (additional info on COSMO CLM). - The temporal resolution of outputs is hourly (like for ERA5). - Runs cover the whole Italian territory (and neighbouring areas according to - the necessary computation boundary) so to provide a very detailed (in - terms of space-time resolution) and comprehensive (in terms of - meteorological fields) dataset of climatological data for at least the last 30 years (01/1989-10/2020). - Typical use of similar dataset is (applied) research and downstream services (e.g. for decision support system) -
- The temporal coverage of the dataset is from 01/01/1989 00:00 to 31/12/2020 23:00 and the temporal resolution is 1 hour. - All output variables (reported in the following table) are on single levels except soil water content that is provided for 7 soil levels. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Variable NameUnits
2m temperatureK
2m dew point temperatureK
Total precipitationKg/m2
U-component of 10m windm/s
V-component of 10m windm/s
2m maximum temperatureK
2m minimum temperatureK
mean sea level pressurePa
specific humiditykg/kg
total cloud coverDimensionless
Surface EvaporationKg/m2
Averaged surface net downward shortwave radiationW/m2
Averaged surface net downward longwave radiationW/m2
Surface snow amount m
Soil (multi levels) water contentm
-attribution: >- - The use of the COSMO CLM model is completely free of charge for all research applications. - The use of COSMO-CLM generated data within HIGHLANDER is free for partners (acting as intermediate users) for the project’s purposes; the - use for other purposes (and by further external end-users) requires an appropriate disclaimer, including reference to - COPERNICUS, CINECA, CLM Assembly and CMCC, such as and if additional data post-processing (e.g. fields - elaboration or new formats) is required, this can be agreed on after discussing with Dataset Manager/Owner/Provider - This datasets contains modified Copernicus Climate Change Service information 2021. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains. (See License to Use Copernicus Products ) -

- Whenever you publish research or applications based on this dataset you should include the following citation: -

- Raffa, M.; Reder, A.; Marras, G.F.; Mancini, M.; Scipione, G.; Santini, M.; Mercogliano, P. VHR-REA_IT Dataset: Very High Resolution Dynamical Downscaling of ERA5 Reanalysis over Italy by COSMO-CLM. Data 2021, 6, 88. https://doi.org/10.3390/data6080088 -contact: - name: Paola Mercogliano - email: paola.mercogliano@cmcc.it - webpage: https://www.cmcc.it/people/mercogliano-paola -label: ERA5 downscaling @2.2 km over Italy -image: https://ddsfiles.s3.fr-par.scw.cloud/images/TOT_PREC_CCLM2km_land.png -doi: https://doi.org/10.25424/cmcc/era5-2km_italy -update_frequency: None -license: - name: Dataset License - url: https://ddsfiles.s3.fr-par.scw.cloud/vhr_era5_dds_license.pdf -publication_date: 2021-08-01 -related_data: -- name: ERA5 hourly data on single levels from 1979 to present - url: https://doi.org/10.24381/cds.adbb2d47 \ No newline at end of file diff --git a/web/tests/resources/list_dataset.json b/web/tests/resources/list_dataset.json deleted file mode 100644 index 419a7f7..0000000 --- a/web/tests/resources/list_dataset.json +++ /dev/null @@ -1,609 +0,0 @@ -{ - "version": "v1", - "status": "OK", - "data": [ - { - "description": "BioClim is a dataset of 35 bioclimatic indicators calculated from historical and future climate simulations. These indicators (e.g. Annual mean temperature, Temperature annual range, Evapotranspiration, Thermicity, Annual and Seasonal precipitation and many others) are valuable for ecological modeling purposes. Besides the historical period (1960-1999) from WATCH reanalyses, the 35 indicators for the future periods are based on time series of climate variables simulated under a combination of 6 Earth System Models (ESMs), 2 Representative Concentration Pathways (RCP 4.5 and 8.5) and 2 time horizons (2040-2079 and 2060-2099), amounting to a total of 23 ensemble members for each indicator, all provided as NetCDF files.", - "label": "Bioclimatic Indicators", - "how_to_cite": "Whenever you publish research or applications based on this dataset you should include the following citation:

Noce, S., Caporaso, L. & Santini, M. A new global dataset of bioclimatic indicators. Sci Data 7, 398 (2020). https://doi.org/10.1038/s41597-020-00726-5", - "contact": { - "name": "Sergio Noce", - "email": "sergio.noce@cmcc.it", - "webpage": "https://www.cmcc.it/people/noce-sergio" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/bioclimind.jpg", - "doi": "https://doi.org/10.25424/CMCC/BIOCLIMIND", - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2020-12-22", - "keywords": [ - "Bioclimatic indicators", - "Ecological modeling", - "CMIP5", - "Biogeography", - "Species Distribution Modeling" - ], - "related_data": [ - { - "name": "A new global dataset of bioclimatic indicators", - "url": "https://doi.org/10.1038/s41597-020-00726-5" - } - ], - "id": "bioclimind", - "default": "future", - "products": [ - { - "id": "future", - "description": "Future Scenarios (2040-2099)" - }, - { - "id": "historical", - "description": "Historical (1969-1999)" - } - ] - }, - { - "description": "The Black Sea physics analysis and Forecasting System (BSFS) is one of the Production Units of the Black Sea Monitoring and Forecasting Centre (BS-MFC), developed in the frame of the Copernicus Marine Environment and Monitoring Service (CMEMS). The physical core is based on a hydrodynamic model implemented over the whole Black Sea basin. The model horizontal grid resolution is 1/36° in zonal direction, 1/27° in meridional direction (ca. 3 km) and has 31 unevenly spaced vertical levels. The hydrodynamics are supplied by the Nucleus for European Modeling of the Ocean (NEMO, v3.4).The model solutions are corrected by the variational assimilation OceanVar (based on a 3DVAR scheme), originally developed for the Mediterranean Sea and later extended for the global ocean. The observations assimilated in the BSFS includes in-situ profiles, along-track sea level anomalies (SLA) and gridded sea surface temperature (SST) provided by Copernicus Thematic Assembly Centres. BSFS provides every day 3 days of analysis, 1 day simulation and 10 days of forecast fields; once per week, the system runs for 14 days in the past to provide the best initial condition for the forecasting cycle. BSFS catalogue offers daily and hourly means from Jul 2018 - ongoing for the following list of variables: temperature, salinity, sea surface height, currents, mixed layer depth and bottom temperature. For further information see Black Sea Description page. This dataset is delivered using the NEMO model native grid.", - "contact": { - "name": "Ocean Lab", - "email": "ocean-lab@cmcc.it", - "webpage": "http://oceanlab.cmcc.it" - }, - "label": "Black Sea Physics Analysis and Forecasting System", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/blacksea_v3.png", - "attribution": "This dataset has been produced by the Euro-Mediterranean Center on Climate Change (CMCC) in the context of the Copernicus Marine Environment and Monitoring Service (CMEMS). The activities leading to these results have been contracted by Mercator Ocean International, that implements CMEMS as part of the Copernicus Programme.", - "update_frequency": "weekly", - "doi": null, - "license": { - "name": "Copernicus Marine License", - "url": "https://marine.copernicus.eu/faq/cite-cmems-products-cmems-credit/?idpage=169" - }, - "publication_date": "2020-12-22", - "related_data": [ - { - "name": "Copernicus Marine BlackSea Forecasting Data", - "url": "https://resources.marine.copernicus.eu/?option=com_csw&task=results?option=com_csw&view=details&product_id=BLKSEA_ANALYSIS_FORECAST_PHYS_007_001" - } - ], - "id": "blacksea-analysis", - "default": "daily-analysis", - "products": [ - { - "id": "daily-analysis", - "description": "Daily Analysis" - } - ] - }, - { - "description": "A high-resolution (0.25 degree) historical global gridded dataset of monthly and annual cooling and heating degree-days (1970-2019) based on GLDAS data. Monthly and Annual Cooling/Heating degree-days (CDD/HDD) using daily average temperature (°C), and Cooling degree-days using daily average wet-bulb temperature (Twb), are based on the following threshold (base) temperatures:

    \n
  • CDD, CDDwb: 18, 18.3, 22, 23, 24 and 25 (°C)
  • \n
  • HDD: 10, 15, 15.5, 16, 17 and 18 (°C)
  • \n
The degree-days are computed using meteorological parameters from the Global Land Data Acquisation System (GLDAS) ver. 2 (@ 0.25 degree global gridded resolution). The dataset referred to as \"DegDays_0p25_1970_2019\" covers 50 years over the period 1970-2019. Units: Degree-Celsius Days. In order to convert to Degree-Fahrenheit Days, multiply by 1.8. e.g. CDD (°F) = 9/5* CDD (°C)
Important: The input temperature variables from GLDAS are from two sub-versions (ver. 2.0 for period 1970-2009, and ver 2.1 for period 2010-present day). The degree-days may therefore show a break in time-series at a few location around the years 2010-11, due to the change in the GLDAS versions. Users are therefore advised caution when using the data for trend analysis for instance. Further details on the merging of the two versions can be found here", - "label": "Historical Global Cooling and Heating degree-days (1970-2019)", - "contact": { - "name": "Malcolm Mistry", - "email": "malcolm.mistry@cmcc.it", - "webpage": "https://www.cmcc.it/people/malcom-mistry" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/Degree_days_1970_2019.png", - "doi": null, - "how_to_cite": "Whenever you publish research or applications based on this dataset you should include the following citation.

Mistry, MN. Historical global gridded degree‐days: A high‐spatial resolution database of CDD and HDD. Geosci Data J. 2019; 6: 214–221. https://doi.org/10.1002/gdj3.83", - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2020-12-22", - "update_frequency": null, - "keywords": [ - "Climate extremes" - ], - "id": "cooling-heating-degreedays", - "default": "annual", - "products": [ - { - "id": "annual", - "description": "Annual" - }, - { - "id": "monthly", - "description": "Monthly" - } - ] - }, - { - "description": "The CMCC Global Ocean Physical Reanalysis System (C-GLORS) is used at CMCC to simulate the state of the ocean in the last decades. It consists of a variational data assimilation system (OceanVar), capable of assimilating all in-situ observations along with altimetry data, and a forecast step performed by the ocean model NEMO coupled with the LIM2 sea-ice model. The version of the Reanalysis presented here is the v7 with global resolution of 0.25° and 75 evenly spaced vertical levels. The v7 is forced with ECMWF Era-Interim on top of the ocean. Further details of the C-GLORS scheme can be found in http://c-glors.cmcc.it/index/index.html. The v7 is also part of the CMEMS product GLOBAL OCEAN ENSEMBLE PHYSICS REANALYSIS (GLOBAL_REANALYSIS_PHY_001_026, GLOBAL_REANALYSIS_PHY_001_031) . The current dataset has been interpolated with Zapata tool onto a regular grid.", - "contact": { - "name": "Andrea Cipollone", - "email": "andrea.cipollone@cmcc.it", - "webpage": "https://www.cmcc.it/people/cipollone-andrea" - }, - "label": "CMCC Global Ocean Physical Reanalysis System (C-GLORS)", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/ohc_trend_cglors.png", - "doi": null, - "update_frequency": null, - "license": { - "name": "Copernicus Marine License", - "url": "https://marine.copernicus.eu/user-corner/technical-faq/how-cite-cmems-products-cmems-credit?idpage=169" - }, - "publication_date": "2021-01-25", - "related_data": null, - "how_to_cite": "This dataset has been produced by the Euro-Mediterranean Center on Climate Change (CMCC) in the context of the Copernicus Marine Environment and Monitoring Service (CMEMS). The activities leading to these results have been contracted by Mercator Ocean International, that implements CMEMS as part of the Copernicus Programme.

Whenever you publish research or applications based on this dataset you should include the following citations:

Storto, et al., Clim Dyn 53, 287–312 (2019). https://doi.org/10.1007/s00382-018-4585-5
Storto, A. and Masina, S.: https://doi.org/10.5194/essd-8-679-2016, 2016", - "id": "cglorsv7", - "default": "monthly", - "products": [ - { - "id": "monthly", - "description": "Monthly" - } - ] - }, - { - "description": "This dataset provides Climate Impact Indices related to (extreme) precipitation for the European domain. The dataset provides a historical perspective of changes in extreme precipitation across Europe and follows the definitions for the Climate Impact Indices that are defined by the CCl/CLIVAR/JCOMM Expert Team on Climate Change Detection and Indices ( ETCCDI ). The datasets on which the Climate Impact Indices are based are the E-OBS observational dataset, which provides daily sums of precipitation, and the ERA5 reanalysis dataset, of which the hourly precipitation sums are used. The value of the current dataset is in the aggregation of these dataset in Climate Impact Indices which provide a more direct view on the climatic variability and change in extreme precipitation than its sources. The selected indices highlight extreme precipitation from various angles: by using both fixed thresholds and percentile thresholds, related to the climate specific for that area, assessments of heavy precipitation events can be made and comparisons between stations can be made, even when the stations are from different climatic conditions. Both the length of wet spells as well as ‘hard extremes’, relating to return levels of up to 100-year, are provided. This dataset provides Climate Impact Indices related to (extreme) precipitation for the European domain. ", - "attribution": "This dataset has been produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC) and the Royal Netherlands Meteorological Institute (KNMI). The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, KNMI, European Commission nor ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citation and acknowledge according to the licence to use Copernicus Products.

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Mancini, M, Stojiljkovic, M, de Valk, C, and van der Schrier, G (2021): Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.3a9c4f89", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "Extreme precipitation indicators for Europe from 1950 to 2019", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/europe_extreme_precipitation_risk_indicators.png", - "doi": null, - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "update_frequency": "None", - "publication_date": "2021-11-02", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://dds.cmcc.it/#/dataset/era5-single-levels" - }, - { - "name": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "url": "https://dds.cmcc.it/#/dataset/e-obs" - }, - { - "name": "Extreme Precipitation and Flood Risk Indicators for European cities from 1989 to 2018", - "url": "https://dds-dev.cmcc.it/#/dataset/european-cities-flood-risk-indicators" - }, - { - "name": "C3S Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-extreme-precipitation-indicators" - }, - { - "name": "C3S Flood risk indicators for European cities from 1989 to 2018", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-flood-indicators" - } - ], - "id": "europe-extreme-precipitation-risk-indicators", - "default": "yearly", - "products": [ - { - "id": "yearly", - "description": "Yearly Extreme Precipitation Risk Indicators for Europe" - }, - { - "id": "monthly", - "description": "Monthly Extreme Precipitation Risk Indicators for Europe" - }, - { - "id": "daily", - "description": "Daily Extreme Precipitation Risk Indicators for Europe" - }, - { - "id": "30-years", - "description": "30 years (1989-2018) Extreme Precipitation Risk Indicators for Europe" - } - ] - }, - { - "description": "The dataset presents climate impact indicators related to extreme precipitation and indicators to evaluate the spatial distribution of flood risk in terms of hazards and direct damages. It is provided as a high-resolution product focused on 20 European cities that were identified as vulnerable to urban pluvial flooding by experts and practitioners from government agencies and Civil Protection. The dataset combines a high-resolution, probabilistic, description of extreme precipitation, exposure datasets and damage/vulnerability models into a comprehensive pluvial flood risk assessment for cities across Europe for the current climate. It allows city stakeholders to exploit flood risk analysis over the city. The dataset is derived from data available on the Climate Data Store and the Copernicus Land Monitoring Service (CLMS). The former includes ERA5 reanalysis data, dynamically downscaled to 2km x 2km grid with the regional climate model COSMO-CLM and accounting for urban parameterization in order to reach the spatial and temporal resolution suitable for pluvial flood analysis at a city scale. This downscaled product is used for deriving hourly precipitation input at prescribed recurrence intervals that, in combination with supporting digital elevation models (DEM) from the CLMS, is used to feed hazard and damage models.", - "attribution": "This dataset has been produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC), the Royal Netherlands Meteorological Institute (KNMI) and GECOSistema s.r.l. The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, KNMI, GECOSistema, European Commission nor ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citations and acknowledgement according to the licence to use Copernicus Products.

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Padulano, R, Essenfelder, A, Mazzoli, P, and Bagli, S (2021): Flood risk indicators for European cities from 1989 to 2018 Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.9d3db0eb

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Mancini, M, Stojiljkovic, M, de Valk, C, and van der Schrier, G (2021): Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.3a9c4f89", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "Extreme Precipitation and Flood Risk Indicators for European cities from 1989 to 2018", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/european_cities_flood_risk_indicators.png", - "doi": null, - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "update_frequency": null, - "publication_date": "2021-11-02", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://dds.cmcc.it/#/dataset/era5-single-levels" - }, - { - "name": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "url": "https://dds.cmcc.it/#/dataset/e-obs" - }, - { - "name": "Extreme precipitation indicators for Europe from 1950 to 2019", - "url": "https://dds.cmcc.it/#/dataset/europe-extreme-precipitation-risk-indicators" - }, - { - "name": "C3S Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-extreme-precipitation-indicators" - }, - { - "name": "C3S Flood risk indicators for European cities from 1989 to 2018", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-flood-indicators" - } - ], - "id": "european-cities-flood-risk-indicators", - "default": "flood-risk", - "products": [ - { - "id": "flood-risk", - "description": "Extreme Flood Risk Indicators" - }, - { - "id": "daily-precipitation-risk", - "description": "Daily Extreme Precipitation Risk Indicators" - }, - { - "id": "30-years-precipitation-risk", - "description": "30-year Extreme Precipitation Risk Indicators" - } - ] - }, - { - "description": "This dataset provides rainfall erosivity (R factor), associated indicators, and the potential for soil loss induced by water erosion for Italy. The dataset is derived from integrating rainfall data included in the Climate Data Store (CDS) of the Copernicus Climate Change Service (C3S) with non-climate data to assess soil susceptibility to water erosion according to Revised Universal Soil Loss Equation (RUSLE) approach. The gridded dataset can support the decision-making process of many stakeholders for strategical planning purposes across different sectors addressed by the Copernicus Climate Change Service. The dataset provides:
    \n
  • Key information on water erosion dynamics in terms of R factor and potential soil loss for the historical period. The assessment of R factor uses as input gridded observations (E-OBS) and reanalysis data (ERA5, ERA5-Land) for precipitation included in the CDS. The soil loss assessment is obtained by further operating the R factor derived from ERA5-Land with non-climate gridded data representing soil susceptibility to water erosion in accordance to the RUSLE formulation. These are provided at ≈500m horizontal resolution in this dataset.
  • \n
  • Key information on water erosion dynamics in terms of R factor and potential soil loss for future periods. The assessment is again based on the RUSLE approach and uses as input precipitation data included in the EURO-CORDEX ensemble climate projections (0.11°) under several Representative Concentration Pathways (RCPs), still available on the CDS. In this case, bias-corrected monthly precipitation data are used and the R factor is provided both at native horizontal resolution and regridded to ≈500m to be operated with other RUSLE factors to derive potential soil loss for the future. Additional R factor proxy variables, based on daily precipitation data, are provided at native horizontal resolution and as anomalies with respect to the historical period, with no bias-correction.
  • \n
", - "attribution": "This dataset was produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC). The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, European Commission and ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Monia Santini", - "email": "monia.santini@cmcc.it", - "webpage": "https://www.cmcc.it/people/santini-m" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/soilerosion.png", - "label": "Soil Erosion Indicators for Italy (1981-2080)", - "doi": "https://doi.org/10.24381/cds.66d88ff8", - "update_frequency": null, - "license": { - "name": "License to use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2021-07-01", - "id": "soil-erosion-over-italy", - "default": "historical", - "products": [ - { - "id": "historical", - "description": "Reference period (1981-2010)" - }, - { - "id": "future", - "description": "Future Scenarios (2021-2080)" - }, - { - "id": "aggregate", - "description": "Aggregate Statistics for Future Scenarios (2021-2080)" - } - ] - }, - { - "description": "The dataset provides a set of physical ocean parameters over the Mediterranean Sea region which describe the evolution of the system under CMIP5 historical (1980-2005) and future RCP4.5 and RCP8.5 scenarios for the 21st century (2006-2100). These estimates were produced with the Mediterranean Sea eddy-resolving configuration of the NEMO v3.4 modelling system (Oddo et al., 2009) and uses a horizontal grid resolution of 1/16° (~6.5km) and 72 unevenly spaced vertical levels (ranging from 3 m at the surface down to 600 m in the deeper layers). Boundary conditions come from the atmosphere-ocean global circulation model CMCC-CM (Scoccimarro et al., 2011) and account for 6-hourly atmospheric fields, daily fresh water discharges (rivers and Black Sea exchange), and monthly fields of temperature, salinity, and velocities prescribed at the open lateral boundaries (see details in Lovato et al., 2013).", - "attribution": null, - "contact": { - "name": "Tomas Lovato", - "email": "tomas.lovato@cmcc.it", - "webpage": "https://www.cmcc.it/people/lovato-tomas-2" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/medsea-scenarios.jpg", - "label": "Mediterranean Sea marine physical simulations under CMIP5 historical and future scenario projections for the 21st century.", - "doi": null, - "update_frequency": null, - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2021-07-13", - "id": "medsea-cmip5-projections-physics", - "default": "historical", - "products": [ - { - "id": "historical", - "description": "Historical period" - }, - { - "id": "RCP45", - "description": "Scenarios RCP 4.5" - }, - { - "id": "RCP85", - "description": "Scenarios RCP 8.5" - } - ] - }, - { - "description": "The dataset provides a set of biogeochemical parameters over the Mediterranean Sea region which describe the evolution of the system under CMIP5 future RCP4.5 and RCP8.5 scenarios and in the control simulation (based on repeating 2005-2014 physical forcing) for the 21st century (2005-2099).These estimates were produced using the offline coupling between the Mediterranean Sea eddy-resolving configuration of the NEMO v3.4 modeling system (Oddo et al., 2009) and the transport-reaction model OGSTM-BFM (Lazzari et al. 2012;2016). The projections have a horizontal grid resolution of 1/16° (~6.5km) and 70 unevenly spaced vertical levels (ranging from 3 m at the surface down to 600 m in the deeper layers).\"", - "attribution": null, - "contact": { - "name": "Stefano Salon", - "email": "ssalon@inogs.it", - "webpage": "https://www.inogs.it/it/users/stefano-salon" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/medsea_biogeochemistry.jpg", - "label": "Mediterranean Sea marine biogeochemistry simulations under CMIP5 future scenario projections for the 21st century", - "doi": null, - "update_frequency": null, - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2021-12-15", - "id": "medsea-cmip5-projections-biogeochemistry", - "default": "baseline", - "products": [ - { - "id": "baseline", - "description": "Baseline" - }, - { - "id": "RCP45", - "description": "Scenarios RCP 4.5" - }, - { - "id": "RCP85", - "description": "Scenarios RCP 8.5" - } - ] - }, - { - "description": "The dataset contains dynamically downscaled ERA5 reanalysis, originally available at ≈31 km x 31 km horizontal resolution, to 2.2 km x 2.2 km. Dynamical downscaling has been conducted directly for the project (foreground) through Regional Climate Model (RCM) COSMO5.0_CLM9 e INT2LM 2.06. The RCM COSMO CLM is currently developed by the CLM-Community, with which CMCC collaborates since 2008 (additional info on COSMO CLM). The temporal resolution of outputs is hourly (like for ERA5). Runs cover the whole Italian territory (and neighbouring areas according to the necessary computation boundary) so to provide a very detailed (in terms of space-time resolution) and comprehensive (in terms of meteorological fields) dataset of climatological data for at least the last 30 years (01/1989-10/2020). Typical use of similar dataset is (applied) research and downstream services (e.g. for decision support system)
The temporal coverage of the dataset is from 01/01/1989 00:00 to 31/12/2020 23:00 and the temporal resolution is 1 hour. All output variables (reported in the following table) are on single levels except soil water content that is provided for 7 soil levels.\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Variable NameUnits
2m temperatureK
2m dew point temperatureK
Total precipitationKg/m2
U-component of 10m windm/s
V-component of 10m windm/s
2m maximum temperatureK
2m minimum temperatureK
mean sea level pressurePa
specific humiditykg/kg
total cloud coverDimensionless
Surface EvaporationKg/m2
Averaged surface net downward shortwave radiationW/m2
Averaged surface net downward longwave radiationW/m2
Surface snow amount m
Soil (multi levels) water contentm
", - "attribution": "The use of the COSMO CLM model is completely free of charge for all research applications. The use of COSMO-CLM generated data within HIGHLANDER is free for partners (acting as intermediate users) for the project’s purposes; the use for other purposes (and by further external end-users) requires an appropriate disclaimer, including reference to COPERNICUS, CINECA, CLM Assembly and CMCC, such as and if additional data post-processing (e.g. fields elaboration or new formats) is required, this can be agreed on after discussing with Dataset Manager/Owner/Provider This datasets contains modified Copernicus Climate Change Service information 2021. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains. (See License to Use Copernicus Products )

Whenever you publish research or applications based on this dataset you should include the following citation:

Raffa, M.; Reder, A.; Marras, G.F.; Mancini, M.; Scipione, G.; Santini, M.; Mercogliano, P. VHR-REA_IT Dataset: Very High Resolution Dynamical Downscaling of ERA5 Reanalysis over Italy by COSMO-CLM. Data 2021, 6, 88. https://doi.org/10.3390/data6080088", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "ERA5 downscaling @2.2 km over Italy", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/TOT_PREC_CCLM2km_land.png", - "doi": "https://doi.org/10.25424/cmcc/era5-2km_italy", - "update_frequency": "None", - "license": { - "name": "Dataset License", - "url": "https://ddsfiles.s3.fr-par.scw.cloud/vhr_era5_dds_license.pdf" - }, - "publication_date": "2021-08-01", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://doi.org/10.24381/cds.adbb2d47" - } - ], - "id": "era5-downscaled-over-italy", - "default": "VHR-REA_IT_1989_2020_hourly", - "products": [ - { - "id": "VHR-REA_IT_1989_2020_hourly", - "description": "VHR-REA CCLM downscaling ERA5 (0.02 Deg)" - }, - { - "id": "orography", - "description": "VHR-REA CCLM orography" - } - ] - }, - { - "description": "Climate change tests the ability of individuals to perceive physical and mental wellbeing conditions in daily life, especially in the context of large urban settlements. The downscaling at very high resolution of ERA5 reanalysis (see dataset ERA5-DOWNSCALED-OVER-ITALY) allows to reproduce the interactions between atmosphere and surface considering spatially detailed land use distribution and was used to calculate the four indicators presented in this dataset: Wind Chill (°C), Humidex (°C), Discomfort Index (°C), Apparent Temperature (°C). The cooling index, called Wind Chill (WC), expresses the cooling sensation caused by the combined effect of temperature and wind; it is based on the formulation of Osczevski & Bluestein (2005). The discomfort index (DI) is considered one of the best indices to estimate, in a single value, the effect of temperature and humidity on the sensation of heat or cold perceived by the human body; it is derived from Thom and Bosen (1959). The Humidex (H) it is based on a simple empirical relationship that considers the air temperature and vapor pressure, the latter in turn function of temperature and relative humidity, and calculated according to Masterton and Richardson (1979). Finally, the Apparent Temperature (AT) considers all the environmental and body conditions that influence human thermoregulation; Steadman (1984) implemented an empirical formula for AT, under outside shaded environment, that combines air temperature, vapor pressure and wind speed. The temporal coverage of the dataset is from 01/01/1989 00:00 to 31/12/2020 23:00 and the temporal resolution is 1 hour. Daily statistics (minimum, mean, maximum) are also provided.

References
Masterton, J.M., Richardson, F.A. (1979) Humidex, A Method of Quantifying Human Discomfort Due to Excessive Heat and Humidity, CLI 1-79, Environment Canada, Atmospheric Environment Service, Downsview, Ontario, 45 pp.
Osczevski, R., Bluestein, M. (2005) The new wind chill equivalent temperature chart. Bulletin of the American Meteorological Society 86: 1453–1458.
Steadman, R.G. (1984) A universal scale of apparent temperature. J. Climate Appl. Meteor., 23, 1674-1687.
Thom E.C., Bosen J.F. (1959) The discomfort index. Weatherwise, 12: 57-60", - "attribution": "The dataset has been created using the VHR ERA5 dataset downscaled @2.2km over Italy as described in the paper Raffa et al. (2021) https://doi.org/10.3390/data6080088

Whenever you publish research or applications based on this dataset you should include the citation of this dataset doi https://doi.org/10.25424/cmcc/wellbeing-indicators-over-italy", - "contact": { - "name": "Monia Santini", - "email": "monia-santini@cmcc.it", - "webpage": "https://www.cmcc.it/people/santini-m" - }, - "label": "Wellbeing Indicators using ERA5 downscaled @2.2 km over Italy", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/healthwellbeing_vhr_rea.png", - "doi": "https://doi.org/10.25424/cmcc/wellbeing-indicators-over-italy", - "update_frequency": "None", - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2021-10-17", - "related_data": [ - { - "name": "ERA5 downscaling @2.2 km over Italy", - "url": "https://doi.org/10.25424/cmcc/era5-2km_italy" - } - ], - "id": "wellbeing-indicators-over-italy", - "default": "hourly", - "products": [ - { - "id": "hourly", - "description": "Hourly Wellbeing Indicators" - }, - { - "id": "daily", - "description": "Daily Wellbeing Indicators" - } - ] - }, - { - "description": "Routes of least-CO2 emission, computed by the VISIR-2 model for a ferry, starting from forecast of wave and sea currents by CMEMS. The routes can also be browsed from the web application, where further documentation about their production process is provided.", - "contact": { - "name": "Gianandrea Mannarini", - "email": "gianandrea.mannarini@cmcc.it", - "webpage": "https://www.cmcc.it/people/mannarini-gianandrea" - }, - "label": "GUTTA-VISIR least-CO2 routes", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/gutta-visir.png", - "doi": null, - "update_frequency": "Daily", - "publication_date": "2022-03-01", - "related_data": [ - { - "name": "VISIR-2 Model", - "url": "https://www.visir-model.net" - }, - { - "name": "GUTTA-VISIR Web Application", - "url": "https://www.gutta-visir.eu" - } - ], - "id": "gutta-visir", - "default": "routes", - "products": [ - { - "id": "routes", - "description": "Routes" - } - ] - }, - { - "description": "Extreme climate conditions affect the maintenance of soil functions, especially in areas particularly subject to rainfall-induced erosion. The case study on Soil Erosion in HIGHLANDER is based on a consolidated empirical model (RUSLE) to generate assessment (1991-2020) and projections (2021-2050, coming soon) about the rainfall erosivity and potential loss of soil both on forests and agricultural areas at very high spatial resolution (@2km for rainfall erosivity – both on the native rotated grid from the climate model and on regular grid - and 250 m for soil loss – on regular grid), and relying on 12 models to calculate rainfall erosivity (see Table). Such a dataset at very high resolution at national scale will support in identifying areas particularly at risk under changes in climate variability and extreme events, so to formulate strategies to reduce soil erosion through appropriate management of forests and agricultural fields, also in terms of working practices and soil protection measures.
", - "attribution": "The dataset has been created using the VHR ERA5 dataset downscaled @2.2km over Italy as described in the paper Raffa et al. (2021) and the soil susceptibility dataset provided with RUSLE2015 product. Whenever you publish research or applications based on this dataset you should refer to RUSLE2015 datasets for soil susceptibility factors, and include the following citation for rainfall erosivity factor:

Raffa, M.; Reder, A.; Marras, G.F.; Mancini, M.; Scipione, G.; Santini, M.; Mercogliano, P. VHR-REA_IT Dataset: Very High Resolution Dynamical Downscaling of ERA5 Reanalysis over Italy by COSMO-CLM. Data 2021, 6, 88. 10.3390/data6080088.", - "contact": { - "name": "Monia Santini", - "email": "monia.santini@cmcc.it", - "webpage": "https://www.cmcc.it/people/santini-m" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/SOIL-EROSION-OVER-ITALY-2km.png", - "label": "Soil Erosion Indicators for Italy (1991-2020) @2.2 km over Italy", - "doi": "https://doi.org/10.25424/cmcc/soil-erosion-over-italy-2km", - "update_frequency": "None", - "license": { - "name": "CC BY 4.0", - "url": "https://creativecommons.org/licenses/by/4.0/" - }, - "publication_date": "2022-04-11", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://doi.org/10.24381/cds.adbb2d47" - }, - { - "name": "Downscaling of ERA5 @2.2 km over Italy", - "url": "https://doi.org/10.25424/cmcc/era5-2km_italy" - }, - { - "name": "Soil erosion by water (RUSLE2015)", - "url": "https://esdac.jrc.ec.europa.eu/content/soil-erosion-water-rusle2015" - } - ], - "id": "soil-erosion-over-italy-2km", - "default": "historical", - "products": [ - { - "id": "historical", - "description": "Soil Erosion Indicators - Period 1991-2020" - } - ] - }, - { - "description": "This dataset is related to ERA5 that is the fifth generation ECMWF atmospheric reanalysis of the global climate. Reanalysis combines model data with observation data from satellite and in-situ stations from across the world into a globally complete and consistent dataset using ECMWF's Integrated Forecast System (IFS). This dataset is generated using Copernicus Climate Change Service Information 2020 and is a subset of the Climate Datastore Catalog entry (ERA5 hourly data on single levels from 1979 to present ). The dataset contains 14 variables related to the reanalysis product type: 2 metre dewpoint temperature, mean sea level pressure, snowfall, surface pressure, surface solar radiation downwards, surface thermal radiation downwards, 2 metre temperature, total precipitation, 10 metre U wind component, 10 metre V wind component, total cloud cover, minimum and maximum temperature at 2 metres since previous post-processing, mean wave direction. Data are currently available starting from 1979.", - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "label": "ERA5 hourly data on single levels from 1979 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/era5.png", - "doi": null, - "update_frequency": "Monthly", - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2020-12-22", - "related_data": [ - { - "name": "C3S ERA5 Data", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels" - } - ], - "id": "era5-single-levels", - "default": "reanalysis", - "products": [ - { - "id": "reanalysis", - "description": "Reanalysis" - } - ] - }, - { - "description": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past. This dataset includes 5 variables as available on C3S Climate Data Store.", - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2022. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "label": "ERA5-Land hourly data from 1981 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/era5-land.png", - "doi": null, - "update_frequency": "Monthly", - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2022-04-11", - "related_data": [ - { - "name": "C3S ERA5 Land Dataset", - "url": "https://doi.org/10.24381/cds.e2161bac" - } - ], - "id": "era5-land", - "default": "reanalysis", - "products": [ - { - "id": "reanalysis", - "description": "Reanalysis" - } - ] - }, - { - "description": "E-OBS is a daily gridded land-only observational dataset over Europe. The blended time series from the station network of the European Climate Assessment & Dataset (ECA&D) project form the basis for the E-OBS gridded dataset. All station data are sourced directly from the European National Meteorological and Hydrological Services (NMHSs) or other data holding institutions.
E-OBS comes as an ensemble dataset and is available on a 0.1 and 0.25 degree regular grid for the elements daily mean temperature TG, daily minimum temperature TN, daily maximum temperature TX, daily precipitation sum RR, daily averaged sea level pressure PP and daily mean global radiation QQ. They cover the area: 25N-71.5N x 25W-45E. The data files are in NetCDF-4 format. The Global 30 Arc-Second Elevation Data Set (GTOPO30), a global raster Digital Elevation Model (DEM) with a horizontal grid spacing of 30 arc seconds (approximately 1 kilometer) developed by USGS is used for the elevation file as well.
The ensemble dataset is constructed through a conditional simulation procedure. For each of the members of the ensemble a spatially correlated random field is produced using a pre-calculated spatial correlation function. The mean across the members is calculated and is provided as the \"best-guess\" fields. The spread is calculated as the difference between the 5th and 95th percentiles over the ensemble to provide a measure indicate of the 90% uncertainty range. The global radiation dataset has a 10-member ensemble, while the other elements have a 100-member ensemble. For more details see Cornes et al. (2018) and the guidance on how to use ensemble datasets.
The position of E-OBS is unique in Europe because of the relatively high spatial horizontal grid spacing, the daily resolution of the dataset, the provision of multiple variables and the length of the dataset. The dataset is daily, meaning the observations cover 24 hours per time step. The exact 24-hour period can be different per region. The reason for this is that some data providers measure between midnight to midnight while others might measure from morning to morning. Since E-OBS is an observational dataset, no attempts have been made to adjust time series for this 24-hour offset. It is made sure, where known, that the largest part of the measured 24-hour period corresponds to the day attached to the time step in E-OBS (and ECA&D).", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citation and acknowledgement:

We acknowledge the E-OBS dataset and the data providers in the ECA&D project

Cornes, R., G. van der Schrier, E.J.M. van den Besselaar, and P.D. Jones. 2018: An Ensemble Version of the E-OBS Temperature and Precipitation Datasets, J. Geophys. Res. Atmos., 123. doi:10.1029/2017JD028200", - "label": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/e-obs.png", - "doi": null, - "license": { - "name": "E-OBS Product License", - "url": "https://www.ecad.eu/documents/ECAD_datapolicy.pdf" - }, - "publication_date": "2020-12-22", - "update_frequency": "Every 6 months", - "related_data": [ - { - "name": "C3S E-OBS daily gridded dataset", - "url": "https://doi.org/10.24381/cds.151d3ec6" - }, - { - "name": "Monitoring European climate using surface observations", - "url": "https://surfobs.climate.copernicus.eu/" - } - ], - "id": "e-obs", - "default": "ensemble-mean", - "products": [ - { - "id": "ensemble-mean", - "description": "Ensemble Mean" - }, - { - "id": "ensemble-spread", - "description": "Ensemble Spread" - }, - { - "id": "elevation", - "description": "Elevation" - } - ] - } - ] - } \ No newline at end of file diff --git a/web/tests/resources/product_details.json b/web/tests/resources/product_details.json deleted file mode 100644 index 6f45305..0000000 --- a/web/tests/resources/product_details.json +++ /dev/null @@ -1,448 +0,0 @@ -{ - "metadata": { - "role": "internal", - "catalog_dir": "/catalog/external/" - }, - "data": { - "domain": { - "crs": { - "name": "latitude_longitude", - "semi_major_axis": 6371229.0, - "semi_minor_axis": 6371229.0, - "inverse_flattening": 0.0, - "longitude_of_prime_meridian": 0.0 - }, - "coordinates": { - "latitude": { - "values": [ - 2.0, - 1.75, - 1.5, - 1.25, - 1.0, - 0.75, - 0.5, - 0.25, - 0.0, - -0.25, - -0.5, - -0.75, - -1.0, - -1.25, - -1.5, - -1.75, - -2.0, - -2.25, - -2.5, - -2.75, - -3.0, - -3.25, - -3.5, - -3.75, - -4.0, - -4.25, - -4.5, - -4.75, - -5.0, - -5.25, - -5.5, - -5.75, - -6.0, - -6.25, - -6.5, - -6.75, - -7.0, - -7.25, - -7.5, - -7.75, - -8.0, - -8.25, - -8.5, - -8.75, - -9.0, - -9.25, - -9.5, - -9.75, - -10.0, - -10.25, - -10.5, - -10.75, - -11.0, - -11.25, - -11.5, - -11.75, - -12.0, - -12.25, - -12.5, - -12.75, - -13.0, - -13.25, - -13.5, - -13.75, - -14.0, - -14.25, - -14.5, - -14.75, - -15.0, - -15.25, - -15.5, - -15.75, - -16.0, - -16.25, - -16.5, - -16.75, - -17.0, - -17.25, - -17.5, - -17.75, - -18.0, - -18.25, - -18.5, - -18.75, - -19.0, - -19.25, - -19.5, - -19.75, - -20.0, - -20.25, - -20.5, - -20.75, - -21.0, - -21.25, - -21.5, - -21.75, - -22.0, - -22.25, - -22.5, - -22.75, - -23.0, - -23.25, - -23.5, - -23.75, - -24.0, - -24.25, - -24.5, - -24.75, - -25.0, - -25.25, - -25.5, - -25.75, - -26.0, - -26.25, - -26.5, - -26.75, - -27.0, - -27.25, - -27.5, - -27.75, - -28.0, - -28.25, - -28.5, - -28.75, - -29.0, - -29.25, - -29.5, - -29.75, - -30.0, - -30.25, - -30.5, - -30.75, - -31.0, - -31.25, - -31.5, - -31.75, - -32.0, - -32.25, - -32.5, - -32.75, - -33.0, - -33.25, - -33.5, - -33.75, - -34.0, - -34.25, - -34.5, - -34.75, - -35.0, - -35.25, - -35.5, - -35.75, - -36.0, - -36.25, - -36.5, - -36.75, - -37.0, - -37.25, - -37.5, - -37.75, - -38.0, - -38.25, - -38.5, - -38.75, - -39.0, - -39.25, - -39.5, - -39.75, - -40.0, - -40.25, - -40.5, - -40.75, - -41.0, - -41.25, - -41.5, - -41.75, - -42.0, - -42.25, - -42.5, - -42.75, - -43.0, - -43.25, - -43.5, - -43.75, - -44.0, - -44.25, - -44.5, - -44.75, - -45.0, - -45.25, - -45.5, - -45.75, - -46.0, - -46.25, - -46.5, - -46.75, - -47.0, - -47.25, - -47.5, - -47.75, - -48.0, - -48.25, - -48.5, - -48.75, - -49.0, - -49.25, - -49.5, - -49.75, - -50.0, - -50.25, - -50.5, - -50.75, - -51.0, - -51.25, - -51.5, - -51.75, - -52.0, - -52.25, - -52.5, - -52.75, - -53.0, - -53.25, - -53.5, - -53.75, - -54.0, - -54.25, - -54.5, - -54.75, - -55.0, - -55.25, - -55.5, - -55.75, - -56.0, - -56.25, - -56.5, - -56.75, - -57.0, - -57.25, - -57.5, - -57.75, - -58.0, - -58.25, - -58.5, - -58.75, - -59.0, - -59.25, - -59.5, - -59.75, - -60.0, - -60.25, - -60.5, - -60.75, - -61.0, - -61.25, - -61.5, - -61.75, - -62.0, - -62.25, - -62.5, - -62.75, - -63.0, - -63.25, - -63.5, - -63.75, - -64.0, - -64.25, - -64.5, - -64.75, - -65.0, - -65.25, - -65.5, - -65.75, - -66.0, - -66.25, - -66.5, - -66.75, - -67.0, - -67.25, - -67.5, - -67.75, - -68.0, - -68.25, - -68.5, - -68.75, - -69.0, - -69.25, - -69.5, - -69.75, - -70.0, - -70.25, - -70.5, - -70.75, - -71.0, - -71.25 - ], - "units": "degrees_east", - "axis": "LONGITUDE" - }, - "time": { - "values": [ - "1980-01-01T00:00:00.000000000", - "1980-01-01T01:00:00.000000000", - "1980-01-01T02:00:00.000000000", - "1980-01-01T03:00:00.000000000", - "1980-01-01T04:00:00.000000000", - "1980-01-01T05:00:00.000000000", - "1980-01-01T06:00:00.000000000", - "1980-01-01T07:00:00.000000000", - "1980-01-01T08:00:00.000000000", - "1980-01-01T09:00:00.000000000", - "1980-01-01T10:00:00.000000000", - "1980-01-01T11:00:00.000000000", - "1980-01-01T12:00:00.000000000", - "1980-01-01T13:00:00.000000000", - "1980-01-01T14:00:00.000000000", - "1980-01-01T15:00:00.000000000", - "1980-01-01T16:00:00.000000000", - "1980-01-01T17:00:00.000000000", - "1980-01-01T18:00:00.000000000", - "1980-01-01T19:00:00.000000000", - "1980-01-01T20:00:00.000000000", - "1980-01-01T21:00:00.000000000", - "1980-01-01T22:00:00.000000000", - "1980-01-01T23:00:00.000000000", - "1980-01-02T00:00:00.000000000", - "1980-01-02T01:00:00.000000000", - "1980-01-02T02:00:00.000000000", - "1980-01-02T03:00:00.000000000", - "1980-01-02T04:00:00.000000000", - "1980-01-02T05:00:00.000000000", - "1980-01-02T06:00:00.000000000", - "1980-01-02T07:00:00.000000000", - "1980-01-02T08:00:00.000000000", - "1980-01-02T09:00:00.000000000", - "1980-01-02T10:00:00.000000000", - "1980-01-02T11:00:00.000000000", - "1980-01-02T12:00:00.000000000", - "1980-01-02T13:00:00.000000000", - "1980-01-02T14:00:00.000000000", - "1980-01-02T15:00:00.000000000", - "1980-01-02T16:00:00.000000000", - "1980-01-02T17:00:00.000000000", - "1980-01-02T18:00:00.000000000", - "1980-01-02T19:00:00.000000000", - "1980-01-02T20:00:00.000000000", - "1980-01-02T21:00:00.000000000", - "1980-01-02T22:00:00.000000000", - "1980-01-02T23:00:00.000000000", - "1980-01-03T00:00:00.000000000", - "1980-01-03T01:00:00.000000000", - "1980-01-03T02:00:00.000000000", - "1980-01-03T03:00:00.000000000", - "1980-01-03T04:00:00.000000000", - "1980-01-03T05:00:00.000000000", - "1980-01-03T06:00:00.000000000", - "1980-01-03T07:00:00.000000000", - "1980-01-03T08:00:00.000000000", - "1980-01-03T09:00:00.000000000", - "1980-01-03T10:00:00.000000000", - "1980-01-03T11:00:00.000000000", - "1980-01-03T12:00:00.000000000", - "1980-01-03T13:00:00.000000000", - "1980-01-03T14:00:00.000000000", - "1980-01-03T15:00:00.000000000", - "1980-01-03T16:00:00.000000000", - "1980-01-03T17:00:00.000000000", - "1980-01-03T18:00:00.000000000" - ], - "units": "unknown", - "axis": "TIME" - } - } - }, - "fields": { - "2_metre_dewpoint_temperature": { - "units": "K" - }, - "minimum_2m_temperature_since_previous_post_processing": { - "units": "K" - }, - "air_pressure_at_mean_sea_level": { - "units": "Pa" - }, - "mean_wave_direction": { - "units": "unknown" - }, - "maximum_2m_temperature_since_previous_post_processing": { - "units": "K" - }, - "lwe_thickness_of_snowfall_amount": { - "units": "unknown" - }, - "surface_upward_latent_heat_flux": { - "units": "J m**-2" - }, - "surface_air_pressure": { - "units": "Pa" - }, - "surface_upward_sensible_heat_flux": { - "units": "J m**-2" - }, - "surface_net_downward_shortwave_flux": { - "units": "J m**-2" - }, - "surface_downwelling_shortwave_flux_in_air": { - "units": "J m**-2" - }, - "surface_net_upward_longwave_flux": { - "units": "J m**-2" - }, - "surface_thermal_radiation_downwards": { - "units": "J m**-2" - }, - "2_metre_temperature": { - "units": "K" - }, - "cloud_area_fraction": { - "units": "unknown" - }, - "total_precipitation": { - "units": "m" - }, - "10_metre_v_wind_component": { - "units": "m s**-1" - }, - "10_metre_u_wind_component": { - "units": "m s**-1" - } - } - } -} \ No newline at end of file diff --git a/web/tests/resources/sample_details.json b/web/tests/resources/sample_details.json deleted file mode 100644 index 5948363..0000000 --- a/web/tests/resources/sample_details.json +++ /dev/null @@ -1,986 +0,0 @@ -[ - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "BioClim is a dataset of 35 bioclimatic indicators calculated from historical and future climate simulations. These indicators (e.g. Annual mean temperature, Temperature annual range, Evapotranspiration, Thermicity, Annual and Seasonal precipitation and many others) are valuable for ecological modeling purposes. Besides the historical period (1960-1999) from WATCH reanalyses, the 35 indicators for the future periods are based on time series of climate variables simulated under a combination of 6 Earth System Models (ESMs), 2 Representative Concentration Pathways (RCP 4.5 and 8.5) and 2 time horizons (2040-2079 and 2060-2099), amounting to a total of 23 ensemble members for each indicator, all provided as NetCDF files.", - "label": "Bioclimatic Indicators", - "how_to_cite": "Whenever you publish research or applications based on this dataset you should include the following citation:

Noce, S., Caporaso, L. & Santini, M. A new global dataset of bioclimatic indicators. Sci Data 7, 398 (2020). https://doi.org/10.1038/s41597-020-00726-5", - "contact": { - "name": "Sergio Noce", - "email": "sergio.noce@cmcc.it", - "webpage": "https://www.cmcc.it/people/noce-sergio" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/bioclimind.jpg", - "doi": "https://doi.org/10.25424/CMCC/BIOCLIMIND", - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2020-12-22", - "keywords": [ - "Bioclimatic indicators", - "Ecological modeling", - "CMIP5", - "Biogeography", - "Species Distribution Modeling" - ], - "related_data": [ - { - "name": "A new global dataset of bioclimatic indicators", - "url": "https://doi.org/10.1038/s41597-020-00726-5" - } - ], - "id": "bioclimind" - }, - "products": { - "future": { - "role": "internal", - "filters": [ - { - "name": "rcp", - "user_defined": "T" - }, - { - "name": "time_interval", - "user_defined": "T" - }, - { - "name": "CMIP5", - "user_defined": "T" - }, - { - "name": "var" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "historical": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The Black Sea physics analysis and Forecasting System (BSFS) is one of the Production Units of the Black Sea Monitoring and Forecasting Centre (BS-MFC), developed in the frame of the Copernicus Marine Environment and Monitoring Service (CMEMS). The physical core is based on a hydrodynamic model implemented over the whole Black Sea basin. The model horizontal grid resolution is 1/36° in zonal direction, 1/27° in meridional direction (ca. 3 km) and has 31 unevenly spaced vertical levels. The hydrodynamics are supplied by the Nucleus for European Modeling of the Ocean (NEMO, v3.4).The model solutions are corrected by the variational assimilation OceanVar (based on a 3DVAR scheme), originally developed for the Mediterranean Sea and later extended for the global ocean. The observations assimilated in the BSFS includes in-situ profiles, along-track sea level anomalies (SLA) and gridded sea surface temperature (SST) provided by Copernicus Thematic Assembly Centres. BSFS provides every day 3 days of analysis, 1 day simulation and 10 days of forecast fields; once per week, the system runs for 14 days in the past to provide the best initial condition for the forecasting cycle. BSFS catalogue offers daily and hourly means from Jul 2018 - ongoing for the following list of variables: temperature, salinity, sea surface height, currents, mixed layer depth and bottom temperature. For further information see Black Sea Description page. This dataset is delivered using the NEMO model native grid.", - "contact": { - "name": "Ocean Lab", - "email": "ocean-lab@cmcc.it", - "webpage": "http://oceanlab.cmcc.it" - }, - "label": "Black Sea Physics Analysis and Forecasting System", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/blacksea_v3.png", - "attribution": "This dataset has been produced by the Euro-Mediterranean Center on Climate Change (CMCC) in the context of the Copernicus Marine Environment and Monitoring Service (CMEMS). The activities leading to these results have been contracted by Mercator Ocean International, that implements CMEMS as part of the Copernicus Programme.", - "update_frequency": "weekly", - "doi": null, - "license": { - "name": "Copernicus Marine License", - "url": "https://marine.copernicus.eu/faq/cite-cmems-products-cmems-credit/?idpage=169" - }, - "publication_date": "2020-12-22", - "related_data": [ - { - "name": "Copernicus Marine BlackSea Forecasting Data", - "url": "https://resources.marine.copernicus.eu/?option=com_csw&task=results?option=com_csw&view=details&product_id=BLKSEA_ANALYSIS_FORECAST_PHYS_007_001" - } - ], - "id": "blacksea-analysis" - }, - "products": { - "daily-analysis": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "A high-resolution (0.25 degree) historical global gridded dataset of monthly and annual cooling and heating degree-days (1970-2019) based on GLDAS data. Monthly and Annual Cooling/Heating degree-days (CDD/HDD) using daily average temperature (°C), and Cooling degree-days using daily average wet-bulb temperature (Twb), are based on the following threshold (base) temperatures:

    \n
  • CDD, CDDwb: 18, 18.3, 22, 23, 24 and 25 (°C)
  • \n
  • HDD: 10, 15, 15.5, 16, 17 and 18 (°C)
  • \n
The degree-days are computed using meteorological parameters from the Global Land Data Acquisation System (GLDAS) ver. 2 (@ 0.25 degree global gridded resolution). The dataset referred to as \"DegDays_0p25_1970_2019\" covers 50 years over the period 1970-2019. Units: Degree-Celsius Days. In order to convert to Degree-Fahrenheit Days, multiply by 1.8. e.g. CDD (°F) = 9/5* CDD (°C)
Important: The input temperature variables from GLDAS are from two sub-versions (ver. 2.0 for period 1970-2009, and ver 2.1 for period 2010-present day). The degree-days may therefore show a break in time-series at a few location around the years 2010-11, due to the change in the GLDAS versions. Users are therefore advised caution when using the data for trend analysis for instance. Further details on the merging of the two versions can be found here", - "label": "Historical Global Cooling and Heating degree-days (1970-2019)", - "contact": { - "name": "Malcolm Mistry", - "email": "malcolm.mistry@cmcc.it", - "webpage": "https://www.cmcc.it/people/malcom-mistry" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/Degree_days_1970_2019.png", - "doi": null, - "how_to_cite": "Whenever you publish research or applications based on this dataset you should include the following citation.

Mistry, MN. Historical global gridded degree‐days: A high‐spatial resolution database of CDD and HDD. Geosci Data J. 2019; 6: 214–221. https://doi.org/10.1002/gdj3.83", - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2020-12-22", - "update_frequency": null, - "keywords": [ - "Climate extremes" - ], - "id": "cooling-heating-degreedays" - }, - "products": { - "annual": { - "role": "internal", - "filters": [ - { - "name": "T", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "monthly": { - "role": "internal", - "filters": [ - { - "name": "T", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The CMCC Global Ocean Physical Reanalysis System (C-GLORS) is used at CMCC to simulate the state of the ocean in the last decades. It consists of a variational data assimilation system (OceanVar), capable of assimilating all in-situ observations along with altimetry data, and a forecast step performed by the ocean model NEMO coupled with the LIM2 sea-ice model. The version of the Reanalysis presented here is the v7 with global resolution of 0.25° and 75 evenly spaced vertical levels. The v7 is forced with ECMWF Era-Interim on top of the ocean. Further details of the C-GLORS scheme can be found in http://c-glors.cmcc.it/index/index.html. The v7 is also part of the CMEMS product GLOBAL OCEAN ENSEMBLE PHYSICS REANALYSIS (GLOBAL_REANALYSIS_PHY_001_026, GLOBAL_REANALYSIS_PHY_001_031) . The current dataset has been interpolated with Zapata tool onto a regular grid.", - "contact": { - "name": "Andrea Cipollone", - "email": "andrea.cipollone@cmcc.it", - "webpage": "https://www.cmcc.it/people/cipollone-andrea" - }, - "label": "CMCC Global Ocean Physical Reanalysis System (C-GLORS)", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/ohc_trend_cglors.png", - "doi": null, - "update_frequency": null, - "license": { - "name": "Copernicus Marine License", - "url": "https://marine.copernicus.eu/user-corner/technical-faq/how-cite-cmems-products-cmems-credit?idpage=169" - }, - "publication_date": "2021-01-25", - "related_data": null, - "how_to_cite": "This dataset has been produced by the Euro-Mediterranean Center on Climate Change (CMCC) in the context of the Copernicus Marine Environment and Monitoring Service (CMEMS). The activities leading to these results have been contracted by Mercator Ocean International, that implements CMEMS as part of the Copernicus Programme.

Whenever you publish research or applications based on this dataset you should include the following citations:

Storto, et al., Clim Dyn 53, 287–312 (2019). https://doi.org/10.1007/s00382-018-4585-5
Storto, A. and Masina, S.: https://doi.org/10.5194/essd-8-679-2016, 2016", - "id": "cglorsv7" - }, - "products": { - "monthly": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset provides Climate Impact Indices related to (extreme) precipitation for the European domain. The dataset provides a historical perspective of changes in extreme precipitation across Europe and follows the definitions for the Climate Impact Indices that are defined by the CCl/CLIVAR/JCOMM Expert Team on Climate Change Detection and Indices ( ETCCDI ). The datasets on which the Climate Impact Indices are based are the E-OBS observational dataset, which provides daily sums of precipitation, and the ERA5 reanalysis dataset, of which the hourly precipitation sums are used. The value of the current dataset is in the aggregation of these dataset in Climate Impact Indices which provide a more direct view on the climatic variability and change in extreme precipitation than its sources. The selected indices highlight extreme precipitation from various angles: by using both fixed thresholds and percentile thresholds, related to the climate specific for that area, assessments of heavy precipitation events can be made and comparisons between stations can be made, even when the stations are from different climatic conditions. Both the length of wet spells as well as ‘hard extremes’, relating to return levels of up to 100-year, are provided. This dataset provides Climate Impact Indices related to (extreme) precipitation for the European domain. ", - "attribution": "This dataset has been produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC) and the Royal Netherlands Meteorological Institute (KNMI). The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, KNMI, European Commission nor ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citation and acknowledge according to the licence to use Copernicus Products.

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Mancini, M, Stojiljkovic, M, de Valk, C, and van der Schrier, G (2021): Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.3a9c4f89", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "Extreme precipitation indicators for Europe from 1950 to 2019", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/europe_extreme_precipitation_risk_indicators.png", - "doi": null, - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "update_frequency": "None", - "publication_date": "2021-11-02", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://dds.cmcc.it/#/dataset/era5-single-levels" - }, - { - "name": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "url": "https://dds.cmcc.it/#/dataset/e-obs" - }, - { - "name": "Extreme Precipitation and Flood Risk Indicators for European cities from 1989 to 2018", - "url": "https://dds-dev.cmcc.it/#/dataset/european-cities-flood-risk-indicators" - }, - { - "name": "C3S Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-extreme-precipitation-indicators" - }, - { - "name": "C3S Flood risk indicators for European cities from 1989 to 2018", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-flood-indicators" - } - ], - "id": "europe-extreme-precipitation-risk-indicators" - }, - "products": { - "yearly": { - "role": "internal", - "filters": [ - { - "name": "source", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "monthly": { - "role": "internal", - "filters": [ - { - "name": "source", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "daily": { - "role": "internal", - "filters": [ - { - "name": "source", - "user_defined": "T" - }, - { - "name": "percentile", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "30-years": { - "role": "internal", - "filters": [ - { - "name": "source", - "user_defined": "T" - }, - { - "name": "statistic", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The dataset presents climate impact indicators related to extreme precipitation and indicators to evaluate the spatial distribution of flood risk in terms of hazards and direct damages. It is provided as a high-resolution product focused on 20 European cities that were identified as vulnerable to urban pluvial flooding by experts and practitioners from government agencies and Civil Protection. The dataset combines a high-resolution, probabilistic, description of extreme precipitation, exposure datasets and damage/vulnerability models into a comprehensive pluvial flood risk assessment for cities across Europe for the current climate. It allows city stakeholders to exploit flood risk analysis over the city. The dataset is derived from data available on the Climate Data Store and the Copernicus Land Monitoring Service (CLMS). The former includes ERA5 reanalysis data, dynamically downscaled to 2km x 2km grid with the regional climate model COSMO-CLM and accounting for urban parameterization in order to reach the spatial and temporal resolution suitable for pluvial flood analysis at a city scale. This downscaled product is used for deriving hourly precipitation input at prescribed recurrence intervals that, in combination with supporting digital elevation models (DEM) from the CLMS, is used to feed hazard and damage models.", - "attribution": "This dataset has been produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC), the Royal Netherlands Meteorological Institute (KNMI) and GECOSistema s.r.l. The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, KNMI, GECOSistema, European Commission nor ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citations and acknowledgement according to the licence to use Copernicus Products.

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Padulano, R, Essenfelder, A, Mazzoli, P, and Bagli, S (2021): Flood risk indicators for European cities from 1989 to 2018 Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.9d3db0eb

Mercogliano, P, Rianna, G, Reder, A, Raffa, M, Mancini, M, Stojiljkovic, M, de Valk, C, and van der Schrier, G (2021): Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.3a9c4f89", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "Extreme Precipitation and Flood Risk Indicators for European cities from 1989 to 2018", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/european_cities_flood_risk_indicators.png", - "doi": null, - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "update_frequency": null, - "publication_date": "2021-11-02", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://dds.cmcc.it/#/dataset/era5-single-levels" - }, - { - "name": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "url": "https://dds.cmcc.it/#/dataset/e-obs" - }, - { - "name": "Extreme precipitation indicators for Europe from 1950 to 2019", - "url": "https://dds.cmcc.it/#/dataset/europe-extreme-precipitation-risk-indicators" - }, - { - "name": "C3S Extreme precipitation risk indicators for Europe and European cities from 1950 to 2019", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-extreme-precipitation-indicators" - }, - { - "name": "C3S Flood risk indicators for European cities from 1989 to 2018", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/sis-european-risk-flood-indicators" - } - ], - "id": "european-cities-flood-risk-indicators" - }, - "products": { - "daily-precipitation-risk": { - "role": "internal", - "filters": [ - { - "name": "city", - "user_defined": "T" - }, - { - "name": "percentile", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "30-years-precipitation-risk": { - "role": "internal", - "filters": [ - { - "name": "city", - "user_defined": "T" - }, - { - "name": "percentile", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset provides rainfall erosivity (R factor), associated indicators, and the potential for soil loss induced by water erosion for Italy. The dataset is derived from integrating rainfall data included in the Climate Data Store (CDS) of the Copernicus Climate Change Service (C3S) with non-climate data to assess soil susceptibility to water erosion according to Revised Universal Soil Loss Equation (RUSLE) approach. The gridded dataset can support the decision-making process of many stakeholders for strategical planning purposes across different sectors addressed by the Copernicus Climate Change Service. The dataset provides:
    \n
  • Key information on water erosion dynamics in terms of R factor and potential soil loss for the historical period. The assessment of R factor uses as input gridded observations (E-OBS) and reanalysis data (ERA5, ERA5-Land) for precipitation included in the CDS. The soil loss assessment is obtained by further operating the R factor derived from ERA5-Land with non-climate gridded data representing soil susceptibility to water erosion in accordance to the RUSLE formulation. These are provided at ≈500m horizontal resolution in this dataset.
  • \n
  • Key information on water erosion dynamics in terms of R factor and potential soil loss for future periods. The assessment is again based on the RUSLE approach and uses as input precipitation data included in the EURO-CORDEX ensemble climate projections (0.11°) under several Representative Concentration Pathways (RCPs), still available on the CDS. In this case, bias-corrected monthly precipitation data are used and the R factor is provided both at native horizontal resolution and regridded to ≈500m to be operated with other RUSLE factors to derive potential soil loss for the future. Additional R factor proxy variables, based on daily precipitation data, are provided at native horizontal resolution and as anomalies with respect to the historical period, with no bias-correction.
  • \n
", - "attribution": "This dataset was produced on behalf of the Copernicus Climate Change Service (C3S) by the Euro-Mediterranean Center for Climate Change (CMCC). The information in this dataset has been generated using Copernicus Climate Change Service information 2021 and it is provided \"as is\" with no guarantee or warranty. The users thereof use the information at their sole risk and liability. CMCC, European Commission and ECMWF are not responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Monia Santini", - "email": "monia.santini@cmcc.it", - "webpage": "https://www.cmcc.it/people/santini-m" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/soilerosion.png", - "label": "Soil Erosion Indicators for Italy (1981-2080)", - "doi": "https://doi.org/10.24381/cds.66d88ff8", - "update_frequency": null, - "license": { - "name": "License to use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2021-07-01", - "id": "soil-erosion-over-italy" - }, - "products": { - "historical": { - "role": "internal", - "filters": [ - { - "name": "source", - "user_defined": "T" - }, - { - "name": "resolution", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "future": { - "role": "internal", - "filters": [ - { - "name": "global_model", - "user_defined": "T" - }, - { - "name": "regional_model", - "user_defined": "T" - }, - { - "name": "version", - "user_defined": "T" - }, - { - "name": "experiment", - "user_defined": "T" - }, - { - "name": "period", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - }, - "aggregate": { - "role": "internal", - "filters": [ - { - "name": "ensemble_statistic", - "user_defined": "T" - }, - { - "name": "experiment", - "user_defined": "T" - }, - { - "name": "period", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "Extreme climate conditions affect the maintenance of soil functions, especially in areas particularly subject to rainfall-induced erosion. The case study on Soil Erosion in HIGHLANDER is based on a consolidated empirical model (RUSLE) to generate assessment (1991-2020) and projections (2021-2050, coming soon) about the rainfall erosivity and potential loss of soil both on forests and agricultural areas at very high spatial resolution (@2km for rainfall erosivity – both on the native rotated grid from the climate model and on regular grid - and 250 m for soil loss – on regular grid), and relying on 12 models to calculate rainfall erosivity (see Table). Such a dataset at very high resolution at national scale will support in identifying areas particularly at risk under changes in climate variability and extreme events, so to formulate strategies to reduce soil erosion through appropriate management of forests and agricultural fields, also in terms of working practices and soil protection measures.
", - "attribution": "The dataset has been created using the VHR ERA5 dataset downscaled @2.2km over Italy as described in the paper Raffa et al. (2021) and the soil susceptibility dataset provided with RUSLE2015 product. Whenever you publish research or applications based on this dataset you should refer to RUSLE2015 datasets for soil susceptibility factors, and include the following citation for rainfall erosivity factor:

Raffa, M.; Reder, A.; Marras, G.F.; Mancini, M.; Scipione, G.; Santini, M.; Mercogliano, P. VHR-REA_IT Dataset: Very High Resolution Dynamical Downscaling of ERA5 Reanalysis over Italy by COSMO-CLM. Data 2021, 6, 88. 10.3390/data6080088.", - "contact": { - "name": "Monia Santini", - "email": "monia.santini@cmcc.it", - "webpage": "https://www.cmcc.it/people/santini-m" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/SOIL-EROSION-OVER-ITALY-2km.png", - "label": "Soil Erosion Indicators for Italy (1991-2020) @2.2 km over Italy", - "doi": null, - "update_frequency": "None", - "license": { - "name": "CC BY 4.0", - "url": "https://creativecommons.org/licenses/by/4.0/" - }, - "publication_date": "2022-04-11", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://doi.org/10.24381/cds.adbb2d47" - }, - { - "name": "Downscaling of ERA5 @2.2 km over Italy", - "url": "https://doi.org/10.25424/cmcc/era5-2km_italy" - }, - { - "name": "Soil erosion by water (RUSLE2015)", - "url": "https://esdac.jrc.ec.europa.eu/content/soil-erosion-water-rusle2015" - } - ], - "id": "soil-erosion-over-italy-2km" - }, - "products": { - "historical": { - "role": "internal", - "filters": [ - { - "name": "model", - "user_defined": "T" - }, - { - "name": "grid", - "user_defined": "T" - }, - { - "name": "var" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The dataset provides a set of physical ocean parameters over the Mediterranean Sea region which describe the evolution of the system under CMIP5 historical (1980-2005) and future RCP4.5 and RCP8.5 scenarios for the 21st century (2006-2100). These estimates were produced with the Mediterranean Sea eddy-resolving configuration of the NEMO v3.4 modelling system (Oddo et al., 2009) and uses a horizontal grid resolution of 1/16° (~6.5km) and 72 unevenly spaced vertical levels (ranging from 3 m at the surface down to 600 m in the deeper layers). Boundary conditions come from the atmosphere-ocean global circulation model CMCC-CM (Scoccimarro et al., 2011) and account for 6-hourly atmospheric fields, daily fresh water discharges (rivers and Black Sea exchange), and monthly fields of temperature, salinity, and velocities prescribed at the open lateral boundaries (see details in Lovato et al., 2013).", - "attribution": null, - "contact": { - "name": "Tomas Lovato", - "email": "tomas.lovato@cmcc.it", - "webpage": "https://www.cmcc.it/people/lovato-tomas-2" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/medsea-scenarios.jpg", - "label": "Mediterranean Sea marine physical simulations under CMIP5 historical and future scenario projections for the 21st century.", - "doi": null, - "update_frequency": null, - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2021-07-13", - "id": "medsea-cmip5-projections-physics" - }, - "products": { - "historical": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - }, - "RCP45": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - }, - "RCP85": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The dataset provides a set of biogeochemical parameters over the Mediterranean Sea region which describe the evolution of the system under CMIP5 future RCP4.5 and RCP8.5 scenarios and in the control simulation (based on repeating 2005-2014 physical forcing) for the 21st century (2005-2099).These estimates were produced using the offline coupling between the Mediterranean Sea eddy-resolving configuration of the NEMO v3.4 modeling system (Oddo et al., 2009) and the transport-reaction model OGSTM-BFM (Lazzari et al. 2012;2016). The projections have a horizontal grid resolution of 1/16° (~6.5km) and 70 unevenly spaced vertical levels (ranging from 3 m at the surface down to 600 m in the deeper layers).\"", - "attribution": null, - "contact": { - "name": "Stefano Salon", - "email": "ssalon@inogs.it", - "webpage": "https://www.inogs.it/it/users/stefano-salon" - }, - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/medsea_biogeochemistry.jpg", - "label": "Mediterranean Sea marine biogeochemistry simulations under CMIP5 future scenario projections for the 21st century", - "doi": null, - "update_frequency": null, - "license": { - "name": "Creative Commons Attribution 4.0 International (CC BY 4.0)", - "url": "https://creativecommons.org/licenses/by/4.0" - }, - "publication_date": "2021-12-15", - "id": "medsea-cmip5-projections-biogeochemistry" - }, - "products": { - "baseline": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - }, - "RCP45": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - }, - "RCP85": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "The dataset contains dynamically downscaled ERA5 reanalysis, originally available at ≈31 km x 31 km horizontal resolution, to 2.2 km x 2.2 km. Dynamical downscaling has been conducted directly for the project (foreground) through Regional Climate Model (RCM) COSMO5.0_CLM9 e INT2LM 2.06. The RCM COSMO CLM is currently developed by the CLM-Community, with which CMCC collaborates since 2008 (additional info on COSMO CLM). The temporal resolution of outputs is hourly (like for ERA5). Runs cover the whole Italian territory (and neighbouring areas according to the necessary computation boundary) so to provide a very detailed (in terms of space-time resolution) and comprehensive (in terms of meteorological fields) dataset of climatological data for at least the last 30 years (01/1989-10/2020). Typical use of similar dataset is (applied) research and downstream services (e.g. for decision support system)
The temporal coverage of the dataset is from 01/01/1989 00:00 to 31/12/2020 23:00 and the temporal resolution is 1 hour. All output variables (reported in the following table) are on single levels except soil water content that is provided for 7 soil levels.\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Variable NameUnits
2m temperatureK
2m dew point temperatureK
Total precipitationKg/m2
U-component of 10m windm/s
V-component of 10m windm/s
2m maximum temperatureK
2m minimum temperatureK
mean sea level pressurePa
specific humiditykg/kg
total cloud coverDimensionless
Surface EvaporationKg/m2
Averaged surface net downward shortwave radiationW/m2
Averaged surface net downward longwave radiationW/m2
Surface snow amount m
Soil (multi levels) water contentm
", - "attribution": "The use of the COSMO CLM model is completely free of charge for all research applications. The use of COSMO-CLM generated data within HIGHLANDER is free for partners (acting as intermediate users) for the project’s purposes; the use for other purposes (and by further external end-users) requires an appropriate disclaimer, including reference to COPERNICUS, CINECA, CLM Assembly and CMCC, such as and if additional data post-processing (e.g. fields elaboration or new formats) is required, this can be agreed on after discussing with Dataset Manager/Owner/Provider This datasets contains modified Copernicus Climate Change Service information 2021. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains. (See License to Use Copernicus Products )

Whenever you publish research or applications based on this dataset you should include the following citation:

Raffa, M.; Reder, A.; Marras, G.F.; Mancini, M.; Scipione, G.; Santini, M.; Mercogliano, P. VHR-REA_IT Dataset: Very High Resolution Dynamical Downscaling of ERA5 Reanalysis over Italy by COSMO-CLM. Data 2021, 6, 88. https://doi.org/10.3390/data6080088", - "contact": { - "name": "Paola Mercogliano", - "email": "paola.mercogliano@cmcc.it", - "webpage": "https://www.cmcc.it/people/mercogliano-paola" - }, - "label": "ERA5 downscaling @2.2 km over Italy", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/TOT_PREC_CCLM2km_land.png", - "doi": "https://doi.org/10.25424/cmcc/era5-2km_italy", - "update_frequency": "None", - "license": { - "name": "Dataset License", - "url": "https://ddsfiles.s3.fr-par.scw.cloud/vhr_era5_dds_license.pdf" - }, - "publication_date": "2021-08-01", - "related_data": [ - { - "name": "ERA5 hourly data on single levels from 1979 to present", - "url": "https://doi.org/10.24381/cds.adbb2d47" - } - ], - "id": "era5-downscaled-over-italy" - }, - "products": { - "VHR-REA_IT_1989_2020_hourly": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "...", - "contact": { - "name": "Beppe Fogli", - "email": "...", - "webpage": "..." - }, - "label": "Global Ocean NEMO ATLANTECO 1/16", - "image": null, - "attribution": null, - "update_frequency": "None", - "doi": null, - "license": { - "name": "TBD", - "url": "TBD" - }, - "publication_date": "2020-12-22", - "related_data": [ - { - "name": "...", - "url": "..." - } - ], - "id": "nemo-global-atlanteco16" - }, - "products": { - "daily": { - "role": "internal", - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset provides a set of physical ocean parameters over the Atlantic Ocean region produced using the CMCC GLOB16 eddy-resolving configuration of the NEMO 3.6 ocean-sea ice modelling system (Iovino et al., 2016), which uses a horizontal grid resolution of 1/16° (~6.5km) and 98 unevenly spaced vertical levels. The model run from 1958 to 2018 following the experimental protocol of the Ocean Model Intercomparison Project phase 2 (OMIP-2) (Griffies et al., 2016), forced by the JRA55-do v1.4.0 surface-atmospheric dataset (Tsujino et al., 2018). The dataset provides daily mean fields over the first 50 vertical levels (upper 540 meters) of the Atlantic Ocean, for the period 2009-2018.", - "contact": { - "name": "Pier Giuseppe Fogli", - "email": "piergiuseppe.fogli@cmcc.it", - "webpage": "https://www.cmcc.it/people/fogli-pier-giuseppe" - }, - "label": "Global Ocean NEMO ATLANTECO 1/16", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/glob16_atlantic_ocean.png", - "attribution": null, - "update_frequency": "None", - "doi": "https://doi.org/10.25424/cmcc/glob16-atlantic-2021", - "publication_date": "2022-04-19", - "id": "glob16-atlantic-ocean" - }, - "products": { - "daily": { - "coordinates": [ - { - "name": "nav_lat", - "method": "nearest", - "api": "latitude", - "label": "Latitude", - "description": "Latitude" - }, - { - "name": "nav_lon", - "method": "nearest", - "api": "longitude", - "label": "Longitude" - }, - { - "name": "time_counter", - "api": "time", - "label": "Time" - }, - { - "name": [ - "deptht", - "depthv", - "depthu" - ], - "show": "T", - "method": "nearest", - "api": "depth", - "label": "Vertical T,U,V levels", - "sorted": "T" - }, - { - "name": "depthw", - "method": "nearest", - "api": "depthw", - "sorted": "T" - } - ], - "variables": [ - { - "name": "bounds_lon", - "show": "F" - }, - { - "name": "bounds_lat", - "show": "F" - }, - { - "name": "deptht_bounds", - "show": "F" - }, - { - "name": "time_counter_bounds", - "show": "F" - }, - { - "name": "depthu_bounds", - "show": "F" - }, - { - "name": "depthv_bounds", - "show": "F" - } - ], - "fields": [ - { - "name": "var", - "show": "F" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset is related to ERA5 that is the fifth generation ECMWF atmospheric reanalysis of the global climate. Reanalysis combines model data with observation data from satellite and in-situ stations from across the world into a globally complete and consistent dataset using ECMWF's Integrated Forecast System (IFS). This dataset is generated using Copernicus Climate Change Service Information 2020 and is a subset of the Climate Datastore Catalog entry (ERA5 hourly data on single levels from 1979 to present ). The dataset contains 14 variables related to the reanalysis product type: 2 metre dewpoint temperature, mean sea level pressure, snowfall, surface pressure, surface solar radiation downwards, surface thermal radiation downwards, 2 metre temperature, total precipitation, 10 metre U wind component, 10 metre V wind component, total cloud cover, minimum and maximum temperature at 2 metres since previous post-processing, mean wave direction. Data are currently available starting from 1979.", - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "label": "ERA5 hourly data on single levels from 1979 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/era5.png", - "doi": null, - "update_frequency": "Monthly", - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2020-12-22", - "related_data": [ - { - "name": "C3S ERA5 Data", - "url": "https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels" - } - ], - "id": "era5-single-levels" - }, - "products": { - "reanalysis": { - "role": "internal", - "catalog_dir": "/catalog/external/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past. This dataset includes 5 variables as available on C3S Climate Data Store.", - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2022. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "label": "ERA5-Land hourly data from 1981 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/era5-land.png", - "doi": null, - "update_frequency": "Monthly", - "license": { - "name": "Licence to Use Copernicus Products", - "url": "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" - }, - "publication_date": "2022-04-11", - "related_data": [ - { - "name": "C3S ERA5 Land Dataset", - "url": "https://doi.org/10.24381/cds.e2161bac" - } - ], - "id": "era5-land" - }, - "products": { - "reanalysis": { - "role": "internal", - "catalog_dir": "/catalog/external/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "E-OBS is a daily gridded land-only observational dataset over Europe. The blended time series from the station network of the European Climate Assessment & Dataset (ECA&D) project form the basis for the E-OBS gridded dataset. All station data are sourced directly from the European National Meteorological and Hydrological Services (NMHSs) or other data holding institutions.
E-OBS comes as an ensemble dataset and is available on a 0.1 and 0.25 degree regular grid for the elements daily mean temperature TG, daily minimum temperature TN, daily maximum temperature TX, daily precipitation sum RR, daily averaged sea level pressure PP and daily mean global radiation QQ. They cover the area: 25N-71.5N x 25W-45E. The data files are in NetCDF-4 format. The Global 30 Arc-Second Elevation Data Set (GTOPO30), a global raster Digital Elevation Model (DEM) with a horizontal grid spacing of 30 arc seconds (approximately 1 kilometer) developed by USGS is used for the elevation file as well.
The ensemble dataset is constructed through a conditional simulation procedure. For each of the members of the ensemble a spatially correlated random field is produced using a pre-calculated spatial correlation function. The mean across the members is calculated and is provided as the \"best-guess\" fields. The spread is calculated as the difference between the 5th and 95th percentiles over the ensemble to provide a measure indicate of the 90% uncertainty range. The global radiation dataset has a 10-member ensemble, while the other elements have a 100-member ensemble. For more details see Cornes et al. (2018) and the guidance on how to use ensemble datasets.
The position of E-OBS is unique in Europe because of the relatively high spatial horizontal grid spacing, the daily resolution of the dataset, the provision of multiple variables and the length of the dataset. The dataset is daily, meaning the observations cover 24 hours per time step. The exact 24-hour period can be different per region. The reason for this is that some data providers measure between midnight to midnight while others might measure from morning to morning. Since E-OBS is an observational dataset, no attempts have been made to adjust time series for this 24-hour offset. It is made sure, where known, that the largest part of the measured 24-hour period corresponds to the day attached to the time step in E-OBS (and ECA&D).", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "attribution": "This dataset has been generated using Copernicus Climate Change Service information 2020. Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.

Whenever you publish research or applications based in whole or in part on these data, you should include the following citation and acknowledgement:

We acknowledge the E-OBS dataset and the data providers in the ECA&D project

Cornes, R., G. van der Schrier, E.J.M. van den Besselaar, and P.D. Jones. 2018: An Ensemble Version of the E-OBS Temperature and Precipitation Datasets, J. Geophys. Res. Atmos., 123. doi:10.1029/2017JD028200", - "label": "E-OBS daily gridded meteorological data for Europe from 1950 to present", - "image": "https://ddsfiles.s3.fr-par.scw.cloud/images/e-obs.png", - "doi": null, - "license": { - "name": "E-OBS Product License", - "url": "https://www.ecad.eu/documents/ECAD_datapolicy.pdf" - }, - "publication_date": "2020-12-22", - "update_frequency": "Every 6 months", - "related_data": [ - { - "name": "C3S E-OBS daily gridded dataset", - "url": "https://doi.org/10.24381/cds.151d3ec6" - }, - { - "name": "Monitoring European climate using surface observations", - "url": "https://surfobs.climate.copernicus.eu/" - } - ], - "id": "e-obs" - }, - "products": { - "ensemble-mean": { - "role": "internal", - "filters": [ - { - "name": "resolution", - "user_defined": "T" - }, - { - "name": "version", - "user_defined": "T" - }, - { - "name": "var" - } - ], - "catalog_dir": "/catalog/external/" - }, - "ensemble-spread": { - "role": "internal", - "filters": [ - { - "name": "resolution", - "user_defined": "T" - }, - { - "name": "version", - "user_defined": "T" - }, - { - "name": "var" - } - ], - "catalog_dir": "/catalog/external/" - }, - "elevation": { - "role": "internal", - "filters": [ - { - "name": "resolution", - "user_defined": "T" - }, - { - "name": "version", - "user_defined": "T" - }, - { - "name": "var" - } - ], - "catalog_dir": "/catalog/external/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset ...", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "attribution": "This dataset ...", - "label": "CORDEX Adjusted", - "image": null, - "doi": null, - "license": { - "name": "CORDEX License", - "url": null - }, - "publication_date": "2022-06-23", - "update_frequency": "None", - "id": "cordex" - }, - "products": { - "adjusted": { - "query_limit_gb": 10, - "size_weight": 1, - "filters": [ - { - "name": "var", - "user_defined": "T" - }, - { - "name": "global_model", - "user_defined": "T" - }, - { - "name": "RCP", - "user_defined": "T" - }, - { - "name": "experiment", - "user_defined": "T" - }, - { - "name": "regional", - "user_defined": "T" - }, - { - "name": "version", - "user_defined": "T" - }, - { - "name": "freq", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/external/" - } - } - }, - { - "metadata": { - "catalog_dir": "/catalog/", - "description": "This dataset ...", - "contact": { - "name": "Data Deliver System Support Team", - "email": "dds-support@cmcc.it", - "webpage": "https://www.cmcc.it/research-organization/research-divisions/advanced-scientific-computing-division#1553329820238-2055494b-9aa6" - }, - "attribution": "This dataset ...", - "label": "CMIP6", - "image": null, - "doi": null, - "license": { - "name": "CMIP6 License", - "url": null - }, - "publication_date": "2022-07-13", - "update_frequency": "None", - "id": "cmip6" - }, - "products": { - "scenario": { - "query_limit_gb": 10, - "size_weight": 1, - "filters": [ - { - "name": "activity", - "user_defined": "T" - }, - { - "name": "institution", - "user_defined": "T" - }, - { - "name": "source", - "user_defined": "T" - }, - { - "name": "experiment", - "user_defined": "T" - }, - { - "name": "member", - "user_defined": "T" - }, - { - "name": "table", - "user_defined": "T" - }, - { - "name": "var", - "user_defined": "T" - }, - { - "name": "grid_label", - "user_defined": "T" - } - ], - "catalog_dir": "/catalog/cmcc/" - } - } - } -] \ No newline at end of file diff --git a/web/tests/test_converter.py b/web/tests/test_converter.py deleted file mode 100644 index 67e9aee..0000000 --- a/web/tests/test_converter.py +++ /dev/null @@ -1,25 +0,0 @@ -import datetime -import os -import json -import pytest - -from web.app.models import ListOfDatasets - - -class TestConverter: - @pytest.fixture - def resource_dir(self): - return os.path.join( - os.path.dirname(os.path.abspath(__file__)), "resources" - ) - - @pytest.fixture - def details(self, resource_dir): - with open( - os.path.join(resource_dir, "sample_details.json"), "rt" - ) as file: - yield json.load(file) - - def test_parse_details_successfully(self, details): - lod = ListOfDatasets.from_details(details) - assert isinstance(lod, ListOfDatasets) diff --git a/web/tests/test_user_credentials.py b/web/tests/test_user_credentials.py deleted file mode 100644 index 5360f2b..0000000 --- a/web/tests/test_user_credentials.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest -import uuid - - -from pydantic import ValidationError - -from app.utils import UserCredentials - - -class TestUserCredentials: - def test_use_wrong_user_id_format(self): - with pytest.raises(ValidationError): - UserCredentials(user_id=10, user_token="aaa") - - def test_uuidv4_user_id(self): - id_ = uuid.uuid4() - uc = UserCredentials(user_id=id_, user_token="aaa") - assert uc.id == id_ - - def test_str_uuidv4_user_id(self): - id_ = str(uuid.uuid4()) - uc = UserCredentials(user_id=id_, user_token="aaa") - assert not isinstance(id_, type(uc.id)) - assert str(uc.id) == id_ - - def test_ensure_key_is_not_printed(self): - uc = UserCredentials(user_token="aaa") - repr_val = repr(uc) - assert uc.key == "aaa" - assert "aaa" not in repr_val