diff --git a/tests/mongo/test_mongo.py b/tests/mongo/test_mongo.py index 032bd9ac7..5c3058c9b 100644 --- a/tests/mongo/test_mongo.py +++ b/tests/mongo/test_mongo.py @@ -205,38 +205,6 @@ def test_get_df_live(mongo_connector, mongo_datasource): assert df2.equals(df) -def test_get_df_with_permissions(mongo_connector, mongo_datasource): - datasource = mongo_datasource(collection="test_col", query={"domain": "domain1"}) - df = mongo_connector.get_df(datasource, permissions={"column": "country", "operator": "eq", "value": "France"}) - expected = pd.DataFrame( - { - "country": ["France", "France"], - "language": ["French", "French"], - "value": [20, 20], - "name": ["François", "Marie"], - } - ) - assert datasource.query == [{"$match": {"$and": [{"domain": "domain1"}, {"country": {"$eq": "France"}}]}}] - assert df.shape == (2, 6) - assert set(df.columns) == {"_id", "country", "domain", "language", "value", "name"} - assert df[["country", "language", "value", "name"]].equals(expected) - - datasource = mongo_datasource(collection="test_col", query=[{"$match": {"domain": "domain1"}}]) - df = mongo_connector.get_df(datasource, permissions={"column": "country", "operator": "eq", "value": "France"}) - expected = pd.DataFrame( - { - "country": ["France", "France"], - "language": ["French", "French"], - "value": [20, 20], - "name": ["François", "Marie"], - } - ) - assert datasource.query == [{"$match": {"$and": [{"domain": "domain1"}, {"country": {"$eq": "France"}}]}}] - assert df.shape == (2, 6) - assert set(df.columns) == {"_id", "country", "domain", "language", "value", "name"} - assert df[["country", "language", "value", "name"]].equals(expected) - - def test_get_slice(mongo_connector: MongoConnector, mongo_datasource: Callable[..., MongoDataSource]): datasource = mongo_datasource(collection="test_col", query={"domain": "domain1"}) res = mongo_connector.get_slice(datasource) diff --git a/tests/test_common.py b/tests/test_common.py index 2535a6fab..1d813e8f9 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -13,7 +13,6 @@ ConnectorStatus, UndefinedVariableError, adapt_param_type, - apply_query_parameters, convert_to_numeric_paramstyle, convert_to_printf_templating_style, convert_to_qmark_paramstyle, @@ -298,21 +297,6 @@ def test_nosql_apply_parameters_to_query_dot(): assert res1 == res2 == {"facet": "auteur", "sort": "rang", "rows": 50} -def test_render_raw_permission_no_params(): - query = "(indic0 == 0 or indic1 == 1)" - assert apply_query_parameters(query, None) == query - - -def test_render_raw_permission(): - query = ( - "(indic0 == {{my_indic[0]}} or indic1 == {{my_indic[1]}}) and " - 'indic2 == "yo_{{my_indic[2]}}" and indic_list == {{my_indic}}' - ) - params = {"my_indic": ["0", 1, "2"]} - expected = '(indic0 == "0" or indic1 == 1) and ' "indic2 == \"yo_2\" and indic_list == ['0', 1, '2']" - assert apply_query_parameters(query, params) == expected - - def test_bad_variable_in_query(): """Render empty string if a jinja var is not set""" query = {"url": "/stuff/%(thing)s/foo"} diff --git a/tests/test_connector.py b/tests/test_connector.py index 44ce07d07..b3bcbd1b9 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -89,19 +89,6 @@ def _retrieve_data(self, datasource): assert dc._format_version(None) -def test_get_df_with_permissions(): - class DataConnector(ToucanConnector, data_source_model=DataSource): - type: str = "MyDB" - - def _retrieve_data(self, datasource): - return pd.DataFrame({"A": [1, 2]}) - - connector = DataConnector(name="my_name") - ds = connector.data_source_model(domain="yo", name="my_name", query="") - df = connector.get_df(ds, permissions={"column": "A", "operator": "eq", "value": 1}) - assert all(df == pd.DataFrame({"A": [1]})) - - def test_get_slice(): class DataConnector(ToucanConnector, data_source_model=DataSource): type: str = "MyDB" diff --git a/toucan_connectors/awsathena/awsathena_connector.py b/toucan_connectors/awsathena/awsathena_connector.py index 14c9cec5c..f112c96c1 100644 --- a/toucan_connectors/awsathena/awsathena_connector.py +++ b/toucan_connectors/awsathena/awsathena_connector.py @@ -15,9 +15,8 @@ logging.getLogger(__name__).warning(f"Missing dependencies for {__name__}: {exc}") CONNECTOR_OK = False -from toucan_connectors.common import ConnectorStatus, apply_query_parameters, sanitize_query +from toucan_connectors.common import ConnectorStatus, sanitize_query from toucan_connectors.pagination import build_pagination_info -from toucan_connectors.pandas_translator import PandasConditionTranslator from toucan_connectors.toucan_connector import ( DataSlice, DataStats, @@ -171,11 +170,6 @@ def get_slice( df = self._retrieve_data(data_source, offset=offset, limit=limit) df.columns = df.columns.astype(str) - if permissions is not None: - permissions_query = PandasConditionTranslator.translate(permissions) - permissions_query = apply_query_parameters(permissions_query, data_source.parameters or {}) - df = df.query(permissions_query) - return DataSlice( df, stats=DataStats(df_memory_size=df.memory_usage().sum()), diff --git a/toucan_connectors/common.py b/toucan_connectors/common.py index d3d4727c5..9e4254231 100644 --- a/toucan_connectors/common.py +++ b/toucan_connectors/common.py @@ -2,7 +2,6 @@ import asyncio import dataclasses import datetime -import logging import re from contextlib import suppress from copy import deepcopy @@ -12,8 +11,6 @@ from jinja2.nativetypes import NativeEnvironment from pydantic import Field -from toucan_connectors.utils.slugify import slugify - if TYPE_CHECKING: # pragma: no cover import pandas as pd @@ -21,16 +18,8 @@ # Query interpolation RE_PARAM = r"%\(([^(%\()]*)\)s" -RE_JINJA = r"{{([^({{)}]*)}}" RE_SINGLE_VAR_JINJA = r"{{\s*([^\W\d]\w*)\s*}}" # a single identifier, e.g: {{ __foo__ }} -RE_JINJA_ALONE = r"^" + RE_JINJA + "$" - -# Identify jinja params with no quotes around or complex condition -RE_JINJA_ALONE_IN_STRING = [RE_JINJA + r"([ )])", RE_JINJA + r"()$"] - -RE_SET_KEEP_TYPE = r"{{__keep_type__\1}}\2" -RE_GET_KEEP_TYPE = r"{{(__keep_type__[^({{)}]*)}}" RE_NAMED_PARAM = r"\'?%\([a-zA-Z0-9_]*\)s\'?" @@ -198,47 +187,6 @@ def nosql_apply_parameters_to_query( return rendered if _is_defined(rendered) else type(query)() -def apply_query_parameters(query: str, parameters: dict) -> str: - """ - Apply parameters to query - - Interpolate the query, which is a Jinja templates, with the provided parameters. - """ - - def _flatten_dict(p, parent_key=""): - new_p = {} - for k, v in deepcopy(p).items(): - new_key = f"{parent_key}_{k}" if parent_key else k - new_p[new_key] = v - if isinstance(v, list): - v = dict(enumerate(v)) - if isinstance(v, dict): - new_p.update(_flatten_dict(v, new_key)) - elif isinstance(v, str): - new_p.update({new_key: f'"{v}"'}) - else: - new_p.update({new_key: v}) - return new_p - - if parameters is None: - return query - - # Flag params to keep type if not complex (no quotes or condition) - - for pattern in RE_JINJA_ALONE_IN_STRING: - query = re.sub(pattern, RE_SET_KEEP_TYPE, query) - p_keep_type = re.findall(RE_GET_KEEP_TYPE, query) - for key in p_keep_type: - query = query.replace(key, slugify(key, separator="_")) - if len(p_keep_type): - # Add a version of parameters flatten + with quotes for string - p_keep_type = _flatten_dict(parameters, parent_key="__keep_type_") - parameters.update(p_keep_type) - - logging.getLogger(__name__).debug(f"Render query: {query} with parameters {parameters}") - return Template(query).render(parameters) - - # jq filtering diff --git a/toucan_connectors/toucan_connector.py b/toucan_connectors/toucan_connector.py index e48d2b507..05c68f51b 100644 --- a/toucan_connectors/toucan_connector.py +++ b/toucan_connectors/toucan_connector.py @@ -15,12 +15,10 @@ from toucan_connectors.common import ( ConnectorStatus, - apply_query_parameters, nosql_apply_parameters_to_query, ) from toucan_connectors.json_wrapper import JsonWrapper from toucan_connectors.pagination import PaginationInfo, build_pagination_info -from toucan_connectors.pandas_translator import PandasConditionTranslator from toucan_connectors.utils.datetime import sanitize_df_dates if TYPE_CHECKING: # pragma: no cover @@ -326,16 +324,12 @@ def _retrieve_data(self, data_source: DS): def get_df(self, data_source: DS, permissions: dict | None = None) -> "pd.DataFrame": """ Method to retrieve the data as a pandas dataframe - filtered by permissions + -filtered by permissions- => not anymore, since permissions are translated + to FilterSteps and we don't have code mode queries anymore. """ res = self._retrieve_data(data_source) res.columns = res.columns.astype(str) res = sanitize_df_dates(res) - - if permissions is not None: - permissions_query = PandasConditionTranslator.translate(permissions) - permissions_query = apply_query_parameters(permissions_query, data_source.parameters or {}) - res = res.query(permissions_query) return res def get_slice(