Skip to content

Commit

Permalink
chore: remove dead code
Browse files Browse the repository at this point in the history
  • Loading branch information
fspot committed Sep 12, 2024
1 parent 94864dc commit 758cd54
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 128 deletions.
32 changes: 0 additions & 32 deletions tests/mongo/test_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,38 +205,6 @@ def test_get_df_live(mongo_connector, mongo_datasource):
assert df2.equals(df)


def test_get_df_with_permissions(mongo_connector, mongo_datasource):
datasource = mongo_datasource(collection="test_col", query={"domain": "domain1"})
df = mongo_connector.get_df(datasource, permissions={"column": "country", "operator": "eq", "value": "France"})
expected = pd.DataFrame(
{
"country": ["France", "France"],
"language": ["French", "French"],
"value": [20, 20],
"name": ["François", "Marie"],
}
)
assert datasource.query == [{"$match": {"$and": [{"domain": "domain1"}, {"country": {"$eq": "France"}}]}}]
assert df.shape == (2, 6)
assert set(df.columns) == {"_id", "country", "domain", "language", "value", "name"}
assert df[["country", "language", "value", "name"]].equals(expected)

datasource = mongo_datasource(collection="test_col", query=[{"$match": {"domain": "domain1"}}])
df = mongo_connector.get_df(datasource, permissions={"column": "country", "operator": "eq", "value": "France"})
expected = pd.DataFrame(
{
"country": ["France", "France"],
"language": ["French", "French"],
"value": [20, 20],
"name": ["François", "Marie"],
}
)
assert datasource.query == [{"$match": {"$and": [{"domain": "domain1"}, {"country": {"$eq": "France"}}]}}]
assert df.shape == (2, 6)
assert set(df.columns) == {"_id", "country", "domain", "language", "value", "name"}
assert df[["country", "language", "value", "name"]].equals(expected)


def test_get_slice(mongo_connector: MongoConnector, mongo_datasource: Callable[..., MongoDataSource]):
datasource = mongo_datasource(collection="test_col", query={"domain": "domain1"})
res = mongo_connector.get_slice(datasource)
Expand Down
16 changes: 0 additions & 16 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
ConnectorStatus,
UndefinedVariableError,
adapt_param_type,
apply_query_parameters,
convert_to_numeric_paramstyle,
convert_to_printf_templating_style,
convert_to_qmark_paramstyle,
Expand Down Expand Up @@ -298,21 +297,6 @@ def test_nosql_apply_parameters_to_query_dot():
assert res1 == res2 == {"facet": "auteur", "sort": "rang", "rows": 50}


def test_render_raw_permission_no_params():
query = "(indic0 == 0 or indic1 == 1)"
assert apply_query_parameters(query, None) == query


def test_render_raw_permission():
query = (
"(indic0 == {{my_indic[0]}} or indic1 == {{my_indic[1]}}) and "
'indic2 == "yo_{{my_indic[2]}}" and indic_list == {{my_indic}}'
)
params = {"my_indic": ["0", 1, "2"]}
expected = '(indic0 == "0" or indic1 == 1) and ' "indic2 == \"yo_2\" and indic_list == ['0', 1, '2']"
assert apply_query_parameters(query, params) == expected


def test_bad_variable_in_query():
"""Render empty string if a jinja var is not set"""
query = {"url": "/stuff/%(thing)s/foo"}
Expand Down
13 changes: 0 additions & 13 deletions tests/test_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,6 @@ def _retrieve_data(self, datasource):
assert dc._format_version(None)


def test_get_df_with_permissions():
class DataConnector(ToucanConnector, data_source_model=DataSource):
type: str = "MyDB"

def _retrieve_data(self, datasource):
return pd.DataFrame({"A": [1, 2]})

connector = DataConnector(name="my_name")
ds = connector.data_source_model(domain="yo", name="my_name", query="")
df = connector.get_df(ds, permissions={"column": "A", "operator": "eq", "value": 1})
assert all(df == pd.DataFrame({"A": [1]}))


def test_get_slice():
class DataConnector(ToucanConnector, data_source_model=DataSource):
type: str = "MyDB"
Expand Down
8 changes: 1 addition & 7 deletions toucan_connectors/awsathena/awsathena_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
logging.getLogger(__name__).warning(f"Missing dependencies for {__name__}: {exc}")
CONNECTOR_OK = False

from toucan_connectors.common import ConnectorStatus, apply_query_parameters, sanitize_query
from toucan_connectors.common import ConnectorStatus, sanitize_query
from toucan_connectors.pagination import build_pagination_info
from toucan_connectors.pandas_translator import PandasConditionTranslator
from toucan_connectors.toucan_connector import (
DataSlice,
DataStats,
Expand Down Expand Up @@ -171,11 +170,6 @@ def get_slice(
df = self._retrieve_data(data_source, offset=offset, limit=limit)
df.columns = df.columns.astype(str)

if permissions is not None:
permissions_query = PandasConditionTranslator.translate(permissions)
permissions_query = apply_query_parameters(permissions_query, data_source.parameters or {})
df = df.query(permissions_query)

return DataSlice(
df,
stats=DataStats(df_memory_size=df.memory_usage().sum()),
Expand Down
52 changes: 0 additions & 52 deletions toucan_connectors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import asyncio
import dataclasses
import datetime
import logging
import re
from contextlib import suppress
from copy import deepcopy
Expand All @@ -12,25 +11,15 @@
from jinja2.nativetypes import NativeEnvironment
from pydantic import Field

from toucan_connectors.utils.slugify import slugify

if TYPE_CHECKING: # pragma: no cover
import pandas as pd


# Query interpolation

RE_PARAM = r"%\(([^(%\()]*)\)s"
RE_JINJA = r"{{([^({{)}]*)}}"
RE_SINGLE_VAR_JINJA = r"{{\s*([^\W\d]\w*)\s*}}" # a single identifier, e.g: {{ __foo__ }}

RE_JINJA_ALONE = r"^" + RE_JINJA + "$"

# Identify jinja params with no quotes around or complex condition
RE_JINJA_ALONE_IN_STRING = [RE_JINJA + r"([ )])", RE_JINJA + r"()$"]

RE_SET_KEEP_TYPE = r"{{__keep_type__\1}}\2"
RE_GET_KEEP_TYPE = r"{{(__keep_type__[^({{)}]*)}}"
RE_NAMED_PARAM = r"\'?%\([a-zA-Z0-9_]*\)s\'?"


Expand Down Expand Up @@ -198,47 +187,6 @@ def nosql_apply_parameters_to_query(
return rendered if _is_defined(rendered) else type(query)()


def apply_query_parameters(query: str, parameters: dict) -> str:
"""
Apply parameters to query
Interpolate the query, which is a Jinja templates, with the provided parameters.
"""

def _flatten_dict(p, parent_key=""):
new_p = {}
for k, v in deepcopy(p).items():
new_key = f"{parent_key}_{k}" if parent_key else k
new_p[new_key] = v
if isinstance(v, list):
v = dict(enumerate(v))
if isinstance(v, dict):
new_p.update(_flatten_dict(v, new_key))
elif isinstance(v, str):
new_p.update({new_key: f'"{v}"'})
else:
new_p.update({new_key: v})
return new_p

if parameters is None:
return query

# Flag params to keep type if not complex (no quotes or condition)

for pattern in RE_JINJA_ALONE_IN_STRING:
query = re.sub(pattern, RE_SET_KEEP_TYPE, query)
p_keep_type = re.findall(RE_GET_KEEP_TYPE, query)
for key in p_keep_type:
query = query.replace(key, slugify(key, separator="_"))
if len(p_keep_type):
# Add a version of parameters flatten + with quotes for string
p_keep_type = _flatten_dict(parameters, parent_key="__keep_type_")
parameters.update(p_keep_type)

logging.getLogger(__name__).debug(f"Render query: {query} with parameters {parameters}")
return Template(query).render(parameters)


# jq filtering


Expand Down
10 changes: 2 additions & 8 deletions toucan_connectors/toucan_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@

from toucan_connectors.common import (
ConnectorStatus,
apply_query_parameters,
nosql_apply_parameters_to_query,
)
from toucan_connectors.json_wrapper import JsonWrapper
from toucan_connectors.pagination import PaginationInfo, build_pagination_info
from toucan_connectors.pandas_translator import PandasConditionTranslator
from toucan_connectors.utils.datetime import sanitize_df_dates

if TYPE_CHECKING: # pragma: no cover
Expand Down Expand Up @@ -326,16 +324,12 @@ def _retrieve_data(self, data_source: DS):
def get_df(self, data_source: DS, permissions: dict | None = None) -> "pd.DataFrame":
"""
Method to retrieve the data as a pandas dataframe
filtered by permissions
-filtered by permissions- => not anymore, since permissions are translated
to FilterSteps and we don't have code mode queries anymore.
"""
res = self._retrieve_data(data_source)
res.columns = res.columns.astype(str)
res = sanitize_df_dates(res)

if permissions is not None:
permissions_query = PandasConditionTranslator.translate(permissions)
permissions_query = apply_query_parameters(permissions_query, data_source.parameters or {})
res = res.query(permissions_query)
return res

def get_slice(
Expand Down

0 comments on commit 758cd54

Please sign in to comment.