diff --git a/tests/test_common.py b/tests/test_common.py index 251b2de14..a48aa32bf 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,5 +1,6 @@ from datetime import date, datetime, timedelta +import jinja2 import numpy as np import pandas as pd import pytest @@ -254,6 +255,21 @@ def test_nosql_apply_parameters_to_query(query, params, expected): assert nosql_apply_parameters_to_query(query, params) == expected +def test_nosql_apply_parameters_to_query_unsafe(): + """ + It should prevent any code execution, by using Jinja's sandboxed environement + """ + with pytest.raises(jinja2.exceptions.SecurityError): + nosql_apply_parameters_to_query( + { + 'test': "{% for x in var.__class__.__base__.__subclasses__() %}{% if 'warning' in x.__name__ %}{{x()._module.__builtins__ ['__import__']('os').popen('ls').read()}}{% endif %}{% endfor %}" + }, + {'var': 'plop'}, + ) + with pytest.raises(jinja2.exceptions.SecurityError): + nosql_apply_parameters_to_query({'test': '{{ var.__class__.mro()[-1] }}'}, {'var': 'plop'}) + + def test_nosql_apply_parameters_to_query_dot(): """It should handle both `x["y"]` and `x.y`""" query1 = {'facet': '{{ facet.value }}', 'sort': '{{ rank[0] }}', 'rows': '{{ bibou[0].value }}'} diff --git a/toucan_connectors/common.py b/toucan_connectors/common.py index de61665c7..d54bedc4d 100644 --- a/toucan_connectors/common.py +++ b/toucan_connectors/common.py @@ -11,11 +11,17 @@ import jq import pandas as pd from aiohttp import ClientSession -from jinja2 import Environment, StrictUndefined, Template, meta +from jinja2 import StrictUndefined, Template, meta from jinja2.nativetypes import NativeEnvironment +from jinja2.sandbox import ImmutableSandboxedEnvironment from pydantic import Field from toucan_data_sdk.utils.helpers import slugify + +class NativeImmutableSandboxedEnvironment(NativeEnvironment, ImmutableSandboxedEnvironment): + ... + + # Query interpolation RE_PARAM = r'%\(([^(%\()]*)\)s' @@ -62,7 +68,7 @@ def is_jinja_alone(s: str) -> bool: def _has_parameters(query: dict | list[dict] | tuple | str) -> bool: - t = Environment().parse(query) + t = ImmutableSandboxedEnvironment().parse(query) return bool(meta.find_undeclared_variables(t) or re.search(RE_PARAM, query)) @@ -129,9 +135,9 @@ def _render_query(query: dict | list[dict] | tuple | str, parameters: dict): clean_p = _prepare_parameters(clean_p) if is_jinja_alone(query): - env = NativeEnvironment() + env = NativeImmutableSandboxedEnvironment() else: - env = Environment() + env = ImmutableSandboxedEnvironment() res = env.from_string(query).render(clean_p) # NativeEnvironment's render() isn't recursive, so we need to diff --git a/toucan_connectors/snowflake/snowflake_connector.py b/toucan_connectors/snowflake/snowflake_connector.py index 475dd1f08..2c21bfbca 100644 --- a/toucan_connectors/snowflake/snowflake_connector.py +++ b/toucan_connectors/snowflake/snowflake_connector.py @@ -11,7 +11,7 @@ import pandas as pd import requests import snowflake -from jinja2 import Template +from jinja2.sandbox import ImmutableSandboxedEnvironment from pydantic import Field, SecretStr, create_model from snowflake.connector import SnowflakeConnection @@ -220,7 +220,7 @@ def get_status(self) -> ConnectorStatus: def get_connection_params(self): params = { - 'user': Template(self.user).render(), + 'user': ImmutableSandboxedEnvironment().from_string(self.user).render(), 'account': self.account, 'authenticator': self.authentication_method, # hard Snowflake params