From 77e20e0d10ffa76b17019939e1140c13d3212fdd Mon Sep 17 00:00:00 2001 From: AnnaXiong Date: Tue, 31 Oct 2023 09:03:04 -0400 Subject: [PATCH] add vizro-ai tests --- vizro-ai/tests/.gitkeep | 0 .../components/test_chart_selection.py | 47 +++++++ .../components/test_code_validation.py | 57 ++++++++ .../components/test_custom_chart_wrap.py | 130 ++++++++++++++++++ .../components/test_dataframe_craft.py | 97 +++++++++++++ .../vizro-ai/components/test_explanation.py | 106 ++++++++++++++ .../vizro-ai/components/test_visual_code.py | 114 +++++++++++++++ .../vizro-ai/utils/test_safeguard_code.py | 125 +++++++++++++++++ .../test_get_action_loop_components.py | 2 +- 9 files changed, 677 insertions(+), 1 deletion(-) create mode 100644 vizro-ai/tests/.gitkeep create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_chart_selection.py create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_code_validation.py create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_custom_chart_wrap.py create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_dataframe_craft.py create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_explanation.py create mode 100644 vizro-ai/tests/unit/vizro-ai/components/test_visual_code.py create mode 100644 vizro-ai/tests/unit/vizro-ai/utils/test_safeguard_code.py diff --git a/vizro-ai/tests/.gitkeep b/vizro-ai/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_chart_selection.py b/vizro-ai/tests/unit/vizro-ai/components/test_chart_selection.py new file mode 100644 index 000000000..9f6246055 --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_chart_selection.py @@ -0,0 +1,47 @@ +import pandas as pd +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetChartSelection + + +@pytest.fixture +def fake_llm(): + # This is to simulate the response of LLM + response = ['{"chart_type": "bar"}'] + return FakeListLLM(responses=response) + + +class TestChartSelectionInstantiation: + def test_instantiation(self): + chart_selection = GetChartSelection(llm=fake_llm) + assert chart_selection.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_chart_selection = GetChartSelection(llm=fake_llm) + + def test_pre_process(self): + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + llm_kwargs, partial_vars = self.get_chart_selection._pre_process(df) + expected_partial_vars = {"df_schema": "A: int64\nB: int64", "df_head": df.head().to_markdown()} + assert partial_vars == expected_partial_vars + + @pytest.mark.parametrize( + "load_args, expected_chart_name", + [ + ({"chart_type": "line"}, "line"), + ({"chart_type": "bar"}, "bar"), + ({"chart_type": ["line", "bar"]}, "line,bar"), + ], + ) + def test_post_process(self, load_args, expected_chart_name): + chart_names = self.get_chart_selection._post_process(load_args) + assert chart_names == expected_chart_name + + +class TestChartSelection: + def test_fake_response(self, gapminder, fake_llm): + get_chart_selection = GetChartSelection(fake_llm) + target_chart = get_chart_selection.run( + df=gapminder, chain_input="choose a best chart for describe the composition" + ) + assert target_chart == "bar" diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_code_validation.py b/vizro-ai/tests/unit/vizro-ai/components/test_code_validation.py new file mode 100644 index 000000000..44c380f17 --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_code_validation.py @@ -0,0 +1,57 @@ +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetDebugger + + +@pytest.fixture +def fake_llm(): + """This is to simulate the response of LLM.""" + response = ['{{"fixed_code": "{}"}}'.format("print(df[['country', 'continent']])")] + return FakeListLLM(responses=response) + + +@pytest.fixture +def fake_code_snippet(): + return "print(df['country', 'continent'])" + + +@pytest.fixture +def fake_error_msg(): + return "KeyError: ('country', 'continent')" + + +class TestCodeValidationInstantiation: + def test_instantiation(self): + chart_selection = GetDebugger(llm=fake_llm) + assert chart_selection.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_debugger = GetDebugger(llm=fake_llm) + + def test_pre_process(self): + llm_kwargs, partial_vars = self.get_debugger._pre_process(fake_code_snippet) + assert partial_vars == {"code_snippet": fake_code_snippet} + + @pytest.mark.parametrize( + "load_args, expected_fixed_code", + [ + ( + {"fixed_code": "print('unit test for expected fixed code')"}, + "print('unit test for expected fixed code')", + ), + ( + {"fixed_code": "import pandas as pd\n" "\n" "print(df[['country', 'continent']])\n"}, + "import pandas as pd\n" "\n" "print(df[['country', 'continent']])\n", + ), + ], + ) + def test_post_process(self, load_args, expected_fixed_code): + fixed_code = self.get_debugger._post_process(load_args) + assert fixed_code == expected_fixed_code + + +class TestChartSelection: + def test_fake_response(self, fake_llm, fake_code_snippet, fake_error_msg): + get_debugger = GetDebugger(fake_llm) + fixed_code = get_debugger.run(chain_input=fake_error_msg, code_snippet=fake_code_snippet) + assert fixed_code == "print(df[['country', 'continent']])" diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_custom_chart_wrap.py b/vizro-ai/tests/unit/vizro-ai/components/test_custom_chart_wrap.py new file mode 100644 index 000000000..7861204dc --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_custom_chart_wrap.py @@ -0,0 +1,130 @@ +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetCustomChart + + +@pytest.fixture +def output_visual_component_1(): + return """import vizro.plotly.express as px +import pandas as pd + +df = df.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) +fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') +fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') +fig.show()""" + + +@pytest.fixture +def output_custom_chart_LLM_1(): + return """import vizro.plotly.express as px +import pandas as pd + +def custom_chart(data_frame): + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig""" + + +@pytest.fixture +def expected_final_output_1(): + return """from vizro.models.types import capture +import vizro.plotly.express as px +import pandas as pd + +@capture('graph') +def custom_chart(data_frame): + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig + +fig = custom_chart(data_frame=df)""" + + +@pytest.fixture +def output_custom_chart_LLM_2(): + return """ +import vizro.plotly.express as px +import pandas as pd +def custom_chart(data_frame): + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig""" + + +@pytest.fixture +def expected_final_output_2(): + return """from vizro.models.types import capture +import vizro.plotly.express as px +import pandas as pd +@capture('graph') +def custom_chart(data_frame): + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig + +fig = custom_chart(data_frame=df)""" + + +@pytest.fixture +def output_custom_chart_LLM_3(): + return """import vizro.plotly.express as px +import pandas as pd + +def some_chart_name(data_frame): + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig""" + + +@pytest.fixture +def fake_llm(output_custom_chart_LLM_1): + """This is to simulate the response of LLM.""" + response = ['{{"custom_chart_code": "{}"}}'.format(output_custom_chart_LLM_1)] + return FakeListLLM(responses=response) + + +class TestGetCustomChartMethods: + def test_instantiation(self): + """Test initialization of GetCustomChart.""" + get_custom_chart = GetCustomChart(llm=fake_llm) + assert get_custom_chart.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_custom_chart = GetCustomChart(llm=fake_llm) + + def test_pre_process(self): + llm_kwargs, partial_vars = self.get_custom_chart._pre_process() + assert partial_vars == {} + assert isinstance(llm_kwargs, dict) + + @pytest.mark.parametrize( + "input,output", + [ + ("output_custom_chart_LLM_1", "expected_final_output_1"), + ("output_custom_chart_LLM_2", "expected_final_output_2"), + ], + ) + def test_post_process(self, input, output, request): + input = request.getfixturevalue(input) + output = request.getfixturevalue(output) + loaded_args = {"custom_chart_code": input} + processed_code = self.get_custom_chart._post_process(loaded_args) + assert processed_code == output + + def test_post_process_fail(self, output_custom_chart_LLM_3): + loaded_args = {"custom_chart_code": output_custom_chart_LLM_3} + with pytest.raises(ValueError, match="def custom_chart is not added correctly by the LLM. Try again."): + self.get_custom_chart._post_process(loaded_args) + + +class TestGetCustomChartRun: + def test_fake_run(self, fake_llm, expected_final_output_1): + get_custom_chart = GetCustomChart(fake_llm) + # Note that the chain input is not used in this component as we fake the LLM response + processed_code = get_custom_chart.run(chain_input="XXX") + assert processed_code == expected_final_output_1 diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_dataframe_craft.py b/vizro-ai/tests/unit/vizro-ai/components/test_dataframe_craft.py new file mode 100644 index 000000000..ff605ee1c --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_dataframe_craft.py @@ -0,0 +1,97 @@ +import re + +import pandas as pd +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetDataFrameCraft + + +def dataframe_code(): + return """ + data_frame = data_frame.groupby('continent')['gdpPercap'].sum().reset_index() + data_frame = data_frame.rename(columns={'gdpPercap': 'total_gdp'}) + data_frame.plot(kind='bar', x='continent', y='total_gdp', color='skyblue', legend=False)""" + + +@pytest.fixture +def fake_llm(): + dataframe_code_before_postprocess = re.sub( + r"[\x00-\x1f]", lambda m: "\\u{:04x}".format(ord(m.group(0))), dataframe_code() + ) + response = ['{{"dataframe_code": "{}"}}'.format(dataframe_code_before_postprocess)] + return FakeListLLM(responses=response) + + +@pytest.fixture +def input_df(): + input_df = pd.DataFrame( + { + "contintent": ["Asia", "Asia", "America", "Europe"], + "country": ["China", "India", "US", "UK"], + "gdpPercap": [102, 110, 300, 200], + } + ) + return input_df + + +class TestDataFrameCraftMethods: + def test_instantiation(self): + dataframe_craft = GetDataFrameCraft(llm=fake_llm) + assert dataframe_craft.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_dataframe_craft = GetDataFrameCraft(llm=fake_llm) + + def test_pre_process(self, input_df): + llm_kwargs_to_use, partial_vars = self.get_dataframe_craft._pre_process(df=input_df) + expected_partial_vars = { + "df_schema": "contintent: object\ncountry: object\ngdpPercap: int64", + "df_head": input_df.head().to_markdown(), + } + assert partial_vars == expected_partial_vars + + @pytest.mark.parametrize( + "code_string, expected_code_string", + [ + ( + "df = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]})", + "import pandas as pd\ndf = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]}).reset_index()", + ), + ( + "df = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]}).reset_index()", + "import pandas as pd\ndf = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]}).reset_index()", + ), + ( + "data_frame = pd.DataFrame({'test1': [1, 1, 2], 'test2': [3, 4, 5]})\n" + "data_frame = data_frame.groupby('test1')['test2'].sum()", + "import pandas as pd\ndata_frame = pd.DataFrame({'test1': [1, 1, 2], 'test2': [3, 4, 5]})\n" + "df = data_frame.groupby('test1')['test2'].sum().reset_index()", + ), + ( + "import pandas as pd\n" + "df = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]}).plot(kind='bar', x='test1', y='test2')", + "import pandas as pd\ndf = pd.DataFrame({'test1': [1, 2], 'test2': [3, 4]}).reset_index()", + ), + ], + ) + def test_post_process(self, code_string, expected_code_string, input_df): + load_args = {"dataframe_code": code_string} + df_code = self.get_dataframe_craft._post_process(load_args, input_df) + + assert df_code == expected_code_string + + +class TestDataFrameCraftResponse: + def test_fake_response(self, input_df, fake_llm): + get_dataframe_craft = GetDataFrameCraft(fake_llm) + df_code = get_dataframe_craft.run( + chain_input="choose a best chart for describe the composition of gdp in continent, " + "and horizontal line for avg gdp", + df=input_df, + ) + assert ( + df_code == "import pandas as pd\n " + "data_frame = data_frame.groupby('continent')['gdpPercap'].sum().reset_index()\n " + "data_frame = data_frame.rename(columns={'gdpPercap': 'total_gdp'})\n" + "df = data_frame.reset_index()" + ) diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_explanation.py b/vizro-ai/tests/unit/vizro-ai/components/test_explanation.py new file mode 100644 index 000000000..51a06c1f4 --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_explanation.py @@ -0,0 +1,106 @@ +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetCodeExplanation + + +@pytest.fixture +def fake_llm(): + # This is to simulate the response of LLM + responses = [ + '{"business_insights": "The chart shows ' + "the composition of GDP in different continents. The horizontal line represents " + 'the average GDP across all continents.", "code_explanation": "This code groups the DataFrame by ' + "the 'continent' column and calculates the sum of the 'gdpPercap' column for each continent. It then creates " + "a bar chart using Plotly Express. " + 'It also adds a horizontal line at the average GDP value. Finally, it returns the chart."}' + ] + return FakeListLLM(responses=responses) + + +@pytest.fixture +def code_snippet(): + code_snippet = """ + from vizro.models.types import capture + import vizro.plotly.express as px + import pandas as pd + + @capture('graph') + def custom_chart(data_frame: pd.DataFrame = None): + if data_frame is None: + data_frame = pd.DataFrame() + df = data_frame.groupby('continent')['gdpPercap'].sum().reset_index().rename(columns={'gdpPercap': 'total_gdp'}) + fig = px.bar(df, x='continent', y='total_gdp', color='continent', title='Composition of GDP in Continents') + fig.add_hline(y=df['total_gdp'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') + return fig + + fig = custom_chart(data_frame=df) + """ + return code_snippet + + +@pytest.fixture +def expected_business_insights(): + business_insights = ( + "The chart shows the composition of GDP in different continents. " + "The horizontal line represents the average GDP across all continents." + ) + return business_insights + + +@pytest.fixture +def expected_code_explanation(): + code_explanation = ( + "This code groups the DataFrame by the 'continent' column and calculates the sum of " + "the 'gdpPercap' column for each continent. It then creates a bar chart using " + "Plotly Express and Vizro. " + "It also adds a horizontal line at the average GDP value. Finally, it returns the chart." + "\n
**This customized chart can be directly used in a Vizro dashboard.** " + "\nClick [custom chart docs]" + "(https://vizro.readthedocs.io/en/stable/pages/user_guides/custom_charts/) " + "for more information." + ) + return code_explanation + + +@pytest.fixture +def loaded_response(): + loaded_response = { + "business_insights": "The chart shows the composition of GDP in different continents. " + "The horizontal line represents the average GDP across all continents.", + "code_explanation": "This code groups the DataFrame by the 'continent' column and calculates the sum of " + "the 'gdpPercap' column for each continent. It then creates a bar chart using " + "Plotly Express. " + "It also adds a horizontal line at the average GDP value. Finally, it returns the chart.", + } + return loaded_response + + +class TestCodeExplanationInstantiation: + def test_instantiation(self): + explanation = GetCodeExplanation(llm=fake_llm) + assert explanation.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_code_explanation = GetCodeExplanation(llm=fake_llm) + + def test_pre_process(self, code_snippet): + llm_kwargs, partial_vars = self.get_code_explanation._pre_process(code_snippet) + expected_partial_vars = {"code_snippet": code_snippet} + assert partial_vars == expected_partial_vars + + def test_post_process(self, loaded_response, expected_business_insights, expected_code_explanation): + business_insights, code_explanation = self.get_code_explanation._post_process(loaded_response) + assert business_insights == expected_business_insights + assert code_explanation == expected_code_explanation + + +class TestChartSelection: + def test_fake_response(self, code_snippet, fake_llm, expected_business_insights, expected_code_explanation): + get_code_explanation = GetCodeExplanation(fake_llm) + business_insights, code_explanation = get_code_explanation.run( + chain_input="choose a best chart for describe the composition of gdp in continent, " + "and horizontal line for avg gdp", + code_snippet=code_snippet, + ) + assert business_insights == expected_business_insights + assert code_explanation == expected_code_explanation diff --git a/vizro-ai/tests/unit/vizro-ai/components/test_visual_code.py b/vizro-ai/tests/unit/vizro-ai/components/test_visual_code.py new file mode 100644 index 000000000..db43cd4da --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/components/test_visual_code.py @@ -0,0 +1,114 @@ +import pytest +from langchain.llms.fake import FakeListLLM +from vizro_ai.components import GetVisualCode + + +@pytest.fixture +def chart_types(): + return "bar" + + +@pytest.fixture +def df_code_1(): + return """import pandas as pd +df = df.groupby('continent')['gdpPercap'].sum().reset_index()""" + + +@pytest.fixture +def output_visual_code_LLM_1(): + return """import plotly.express as px + +fig = px.bar(df, x='continent', y='total_gdpPercap', title='Composition of GDP by Continent') +fig.add_hline(y=df['total_gdpPercap'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') +fig.show()""" + + +@pytest.fixture +def expected_final_output_1(): + return """import vizro.plotly.express as px +import pandas as pd +df = df.groupby('continent')['gdpPercap'].sum().reset_index() + +fig = px.bar(df, x='continent', y='total_gdpPercap', title='Composition of GDP by Continent') +fig.add_hline(y=df['total_gdpPercap'].mean(), line_dash='dash', line_color='red', annotation_text='Average GDP') +fig.show()""" + + +@pytest.fixture +def df_code_2(): + return """import pandas as pd +df = df.query('year == 2007').groupby('continent')['pop'].sum().reset_index(name='total_pop')""" + + +@pytest.fixture +def output_visual_code_LLM_2(): + return """import plotly.graph_objects as go + +# Create a bar chart +fig = go.Figure(data=[go.Bar(x=df['continent'], y=df['total_pop'])]) + +# Update the layout +fig.update_layout(title='Increase in Population by Continent', xaxis_title='Continent', yaxis_title='Total Population') + +# Show the chart +fig.show()""" + + +@pytest.fixture +def expected_final_output_2(): + return """import plotly.graph_objects as go +import pandas as pd +df = df.query('year == 2007').groupby('continent')['pop'].sum().reset_index(name='total_pop') + +# Create a bar chart +fig = go.Figure(data=[go.Bar(x=df['continent'], y=df['total_pop'])]) + +# Update the layout +fig.update_layout(title='Increase in Population by Continent', xaxis_title='Continent', yaxis_title='Total Population') + +# Show the chart +fig.show()""" + + +@pytest.fixture +def fake_llm(output_visual_code_LLM_1): + """This is to simulate the response of LLM.""" + response = ['{{"visual_code": "{}"}}'.format(output_visual_code_LLM_1)] + return FakeListLLM(responses=response) + + +class TestGetVisualCodeInstantiation: + def test_instantiation(self): + chart_selection = GetVisualCode(llm=fake_llm) + assert chart_selection.llm == fake_llm + + def setup_method(self, fake_llm): + self.get_visual_code = GetVisualCode(llm=fake_llm) + + def test_pre_process(self, chart_types, df_code_1): + _, partial_vars = self.get_visual_code._pre_process(chart_types=chart_types, df_code=df_code_1) + assert partial_vars == {"chart_types": chart_types, "df_code": df_code_1} + + @pytest.mark.parametrize( + "input,output,df_code", + [ + ("output_visual_code_LLM_1", "expected_final_output_1", "df_code_1"), + ("output_visual_code_LLM_2", "expected_final_output_2", "df_code_2"), + ], + ) + def test_post_process(self, input, output, df_code, request): + input = request.getfixturevalue(input) + output = request.getfixturevalue(output) + df_code = request.getfixturevalue(df_code) + loaded_args = {"visual_code": input} + processed_code = self.get_visual_code._post_process(loaded_args, df_code=df_code) + assert processed_code == output + + +class TestGetVisualCodeRun: + def test_fake_run(self, fake_llm, output_visual_code_LLM_1, expected_final_output_1, df_code_1, chart_types): + get_visual_code = GetVisualCode(fake_llm) + processed_code = get_visual_code.run( + chain_input=output_visual_code_LLM_1, df_code=df_code_1, chart_types=chart_types + ) + assert processed_code == expected_final_output_1 diff --git a/vizro-ai/tests/unit/vizro-ai/utils/test_safeguard_code.py b/vizro-ai/tests/unit/vizro-ai/utils/test_safeguard_code.py new file mode 100644 index 000000000..533f03801 --- /dev/null +++ b/vizro-ai/tests/unit/vizro-ai/utils/test_safeguard_code.py @@ -0,0 +1,125 @@ +import re + +import pytest +from vizro_ai.utils import _safeguard_check + + +class TestMaliciousImports: + @pytest.mark.parametrize("package", ["sys", "pickle", "os", "subprocess", "eval", "exec", "compile", "open"]) + def test_malicious_import(self, package): + code = f"import {package}" + + with pytest.raises( + Exception, match=f"Unsafe package {package} is used in generated code and cannot be executed." + ): + _safeguard_check(code) + + @pytest.mark.parametrize("package", ["sys", "pickle", "os", "subprocess", "eval", "exec", "compile", "open"]) + def test_malicious_import_in_nested_function(self, package): + code = f"def get_import():\n x = 1\n def get_inner_import():\n import {package}" + with pytest.raises( + Exception, match=f"Unsafe package {package} is used in generated code and cannot be executed." + ): + _safeguard_check(code) + + +class TestMaliciousFunctions: + @pytest.mark.parametrize( + "code_line, builtin", + [ + ("builtins_names = dir(__builtins__)", "__builtins__"), + ("subclasses = int.__subclasses__()", "__subclasses__"), + ("version = sys.version()", "sys"), + ], + ) + def test_malicious_methods_in_code(self, code_line, builtin): + with pytest.raises( + Exception, + match=re.escape( + f"Unsafe methods {builtin} are used in generated code line: {code_line} and cannot be executed." + ), + ): + _safeguard_check(code_line) + + @pytest.mark.parametrize("builtin", ["eval", "exec", "compile", "open", "__import__"]) + def test_malicious_builtins_usage_in_code(self, builtin): + code = f"import pandas as pd\n{builtin}('print(1)')" + with pytest.raises( + Exception, + match=re.escape( + f"Unsafe builtin functions {builtin} are used in generated code line: {builtin}('print(1)') and cannot " + f"be executed. If you require a builtin package, reach out to the Vizro team." + ), + ): + _safeguard_check(code) + + +class TestUnsafeDataFileHandling: + @pytest.mark.parametrize( + "data_handling", + [ + ".to_csv", + ".to_excel", + ".to_parquet", + ".to_clipboard", + ".read_csv", + ".read_excel", + ".read_parquet", + ".read_clipboard", + ".netcdf", + ], + ) + def test_unsafe_data_import_export_in_code(self, data_handling): + code = f"import pandas\ndf = pd.DataFrame()\ndf{data_handling}('testfile')" + with pytest.raises( + Exception, + match=re.escape( + f"Unsafe loading or saving of data files is used in code: {data_handling} in line df{data_handling}" + f"('testfile')" + ), + ): + _safeguard_check(code) + + @pytest.mark.parametrize( + "data_handling", + [ + ".to_csv", + ".to_excel", + ".to_parquet", + ".to_clipboard", + ".read_csv", + ".read_excel", + ".read_parquet", + ".read_clipboard", + ".netcdf", + ], + ) + def test_unsafe_data_import_export_in_function(self, data_handling): + code = f"import pandas\ndef data_handling():\n df = pd{data_handling}('testfile')" + with pytest.raises( + Exception, + match=re.escape( + f"Unsafe loading or saving of data files is used in code: {data_handling} in line df = " + f"pd{data_handling}('testfile')" + ), + ): + _safeguard_check(code) + + @pytest.mark.parametrize( + "datafile, file_type", + [ + ("test.csv", ".csv"), + ("test.xls", ".xls"), + ("test.zip", ".zip"), + ("test.pkl", ".pkl"), + ("test.txt", ".txt"), + ("test.mat", ".mat"), + ], + ) + def test_unsafe_datafiles(self, datafile, file_type): + code = f"import pandas\ndef data_handling():\n x = {datafile}" + with pytest.raises( + Exception, + match=(f"Unsafe loading or saving of data files is used in code: {file_type} in line x = {datafile}"), + ): + _safeguard_check(code) diff --git a/vizro-core/tests/unit/vizro/actions/_action_loop/test_get_action_loop_components.py b/vizro-core/tests/unit/vizro/actions/_action_loop/test_get_action_loop_components.py index 68d23cbfb..dcabb030d 100644 --- a/vizro-core/tests/unit/vizro/actions/_action_loop/test_get_action_loop_components.py +++ b/vizro-core/tests/unit/vizro/actions/_action_loop/test_get_action_loop_components.py @@ -149,7 +149,7 @@ def test_no_components(self): ], indirect=True, ) - def test_all_action_loop_components( # noqa: PLR0913 # pylint: disable=too-many-arguments + def test_all_action_loop_components( # pylint: disable=too-many-arguments self, fundamental_components, gateway_components,