From 565afd6cfde35585e804a27b3c8e949d61709389 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:27:46 +0100 Subject: [PATCH 01/12] Remove CausalForestEstimator --- causal_testing/testing/estimators.py | 98 ---------------------------- 1 file changed, 98 deletions(-) diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py index 4e56562c..58ee3fa3 100644 --- a/causal_testing/testing/estimators.py +++ b/causal_testing/testing/estimators.py @@ -9,7 +9,6 @@ import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf -from econml.dml import CausalForestDML from patsy import dmatrix from sklearn.ensemble import GradientBoostingRegressor @@ -504,100 +503,3 @@ def estimate_coefficient(self, bootstrap_size=100): return self.estimate_iv_coefficient(self.df), (ci_low, ci_high) - -class CausalForestEstimator(Estimator): - """A causal random forest estimator is a non-parametric estimator which recursively partitions the covariate space - to learn a low-dimensional representation of treatment effect heterogeneity. This form of estimator is best suited - to the estimation of heterogeneous treatment effects i.e. the estimated effect for every sample rather than the - population average. - """ - - def add_modelling_assumptions(self): - """Add any modelling assumptions to the estimator. - - :return self: Update self.modelling_assumptions - """ - self.modelling_assumptions += "Non-parametric estimator: no restrictions imposed on the data." - - def estimate_ate(self) -> float: - """Estimate the average treatment effect. - - :return ate, confidence_intervals: The average treatment effect and 95% confidence intervals. - """ - # Remove any NA containing rows - reduced_df = self.df.copy() - necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome] - missing_rows = reduced_df[necessary_cols].isnull().any(axis=1) - reduced_df = reduced_df[~missing_rows] - - # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y) - if self.effect_modifiers: - effect_modifier_df = reduced_df[list(self.effect_modifiers)] - else: - effect_modifier_df = reduced_df[list(self.adjustment_set)] - confounders_df = reduced_df[list(self.adjustment_set)] - treatment_df = np.ravel(reduced_df[[self.treatment]]) - outcome_df = np.ravel(reduced_df[[self.outcome]]) - - # Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model - model = CausalForestDML( - model_y=GradientBoostingRegressor(), - model_t=GradientBoostingRegressor(), - ) - model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df) - - # Obtain the ATE and 95% confidence intervals - ate = model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value) - ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value) - ci_low, ci_high = ate_interval[0], ate_interval[1] - return ate, [ci_low, ci_high] - - def estimate_cates(self) -> pd.DataFrame: - """Estimate the conditional average treatment effect for each sample in the data as a function of a set of - covariates (X) i.e. effect modifiers. That is, the predicted change in outcome caused by the intervention - (change in treatment from control to treatment value) for every execution of the system-under-test, taking into - account the value of each effect modifier X. As a result, for every unique setting of the set of covariates X, - we expect a different CATE. - - :return results_df: A dataframe containing a conditional average treatment effect, 95% confidence intervals, and - the covariate (effect modifier) values for each sample. - """ - - # Remove any NA containing rows - reduced_df = self.df.copy() - necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome] - missing_rows = reduced_df[necessary_cols].isnull().any(axis=1) - reduced_df = reduced_df[~missing_rows] - - # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y) - if self.effect_modifiers: - effect_modifier_df = reduced_df[list(self.effect_modifiers)] - else: - raise ValueError("CATE requires the user to define a set of effect modifiers.") - - if self.adjustment_set: - confounders_df = reduced_df[list(self.adjustment_set)] - else: - confounders_df = None - treatment_df = reduced_df[[self.treatment]] - outcome_df = reduced_df[[self.outcome]] - - # Fit a model to the data - model = CausalForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor()) - model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df) - - # Obtain CATES and confidence intervals - conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten() - [ci_low, ci_high] = model.effect_interval( - effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha - ) - - # Merge results into a dataframe (CATE, confidence intervals, and effect modifier values) - results_df = pd.DataFrame(columns=["cate", "ci_low", "ci_high"]) - results_df["cate"] = list(conditional_ates) - results_df["ci_low"] = list(ci_low.flatten()) - results_df["ci_high"] = list(ci_high.flatten()) - effect_modifier_df.reset_index(drop=True, inplace=True) - results_df[list(self.effect_modifiers)] = effect_modifier_df - results_df.sort_values(by=list(self.effect_modifiers), inplace=True) - return results_df, None From 8b1321a56e47fb50160e7b8f625721a8aca80ef2 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:09:12 +0100 Subject: [PATCH 02/12] Remove all references of CausalForestEstimator --- causal_testing/specification/causal_dag.py | 2 +- causal_testing/testing/estimators.py | 2 +- examples/poisson/example_run_causal_tests.py | 3 +- tests/json_front_tests/test_json_class.py | 10 ++- tests/testing_tests/test_causal_test_case.py | 42 +----------- tests/testing_tests/test_causal_test_suite.py | 37 ++++++----- tests/testing_tests/test_estimators.py | 64 ------------------- 7 files changed, 31 insertions(+), 129 deletions(-) diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py index 849f8e39..3ce9fd83 100644 --- a/causal_testing/specification/causal_dag.py +++ b/causal_testing/specification/causal_dag.py @@ -66,7 +66,7 @@ def list_all_min_sep( # 7. Check that there exists at least one neighbour of the treatment nodes that is not in the outcome node set if treatment_node_set_neighbours.difference(outcome_node_set): # 7.1. If so, sample a random node from the set of treatment nodes' neighbours not in the outcome node set - node = set(sample(treatment_node_set_neighbours.difference(outcome_node_set), 1)) + node = set(sample(sorted(treatment_node_set_neighbours.difference(outcome_node_set)), 1)) # 7.2. Add this node to the treatment node set and recurse (left branch) yield from list_all_min_sep( diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py index 58ee3fa3..3293be6c 100644 --- a/causal_testing/testing/estimators.py +++ b/causal_testing/testing/estimators.py @@ -1,5 +1,5 @@ """This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator, -LinearRegressionEstimator and CausalForestEstimator""" +LinearRegressionEstimator""" import logging from abc import ABC, abstractmethod from typing import Any diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py index bfca6fe8..50ee8458 100644 --- a/examples/poisson/example_run_causal_tests.py +++ b/examples/poisson/example_run_causal_tests.py @@ -3,7 +3,7 @@ import scipy import os -from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator +from causal_testing.testing.estimators import LinearRegressionEstimator from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome from causal_testing.testing.causal_test_result import CausalTestResult from causal_testing.json_front.json_class import JsonUtility @@ -127,7 +127,6 @@ def populate_num_shapes_unit(data): estimators = { "WidthHeightEstimator": WidthHeightEstimator, - "CausalForestEstimator": CausalForestEstimator, "LinearRegressionEstimator": LinearRegressionEstimator, } diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py index e9a25da5..e348cc57 100644 --- a/tests/json_front_tests/test_json_class.py +++ b/tests/json_front_tests/test_json_class.py @@ -4,7 +4,7 @@ import scipy import os -from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator +from causal_testing.testing.estimators import LinearRegressionEstimator, Estimator from causal_testing.testing.causal_test_outcome import NoEffect, Positive from tests.test_helpers import remove_temp_dir_if_existent from causal_testing.json_front.json_class import JsonUtility, CausalVariables @@ -292,12 +292,16 @@ def test_no_data_provided(self): json_class.setup(self.scenario) def test_estimator_formula_type_check(self): + class ExampleEstimator(Estimator): + def add_modelling_assumptions(self): + pass + example_test = { "tests": [ { "name": "test1", "mutations": {"test_input": "Increase"}, - "estimator": "CausalForestEstimator", + "estimator": "ExampleEstimator", "estimate_type": "ate", "effect_modifiers": [], "expected_effect": {"test_output": "Positive"}, @@ -312,7 +316,7 @@ def test_estimator_formula_type_check(self): "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3 > self.json_class.scenario.variables[x].z3 } - estimators = {"CausalForestEstimator": CausalForestEstimator} + estimators = {"ExampleEstimator": ExampleEstimator} with self.assertRaises(TypeError): self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False) diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py index 2e2ab52e..51f7ed0b 100644 --- a/tests/testing_tests/test_causal_test_case.py +++ b/tests/testing_tests/test_causal_test_case.py @@ -10,7 +10,7 @@ from causal_testing.data_collection.data_collector import ObservationalDataCollector from causal_testing.testing.causal_test_case import CausalTestCase from causal_testing.testing.causal_test_outcome import ExactValue -from causal_testing.testing.estimators import CausalForestEstimator, LinearRegressionEstimator +from causal_testing.testing.estimators import LinearRegressionEstimator from causal_testing.testing.base_test_case import BaseTestCase @@ -106,19 +106,6 @@ def test_check_minimum_adjustment_set(self): minimal_adjustment_set = self.causal_dag.identification(self.base_test_case) self.assertEqual(minimal_adjustment_set, {"D"}) - def test_execute_test_observational_causal_forest_estimator(self): - """Check that executing the causal test case returns the correct results for the dummy data using a causal - forest estimator.""" - estimation_model = CausalForestEstimator( - "A", - self.treatment_value, - self.control_value, - self.minimal_adjustment_set, - "C", - self.df, - ) - causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector) - self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1) def test_invalid_causal_effect(self): """Check that executing the causal test case returns the correct results for dummy data using a linear @@ -229,32 +216,5 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector) self.assertAlmostEqual(round(causal_test_result.test_value.value, 1), 4, delta=1) - def test_execute_observational_causal_forest_estimator_cates(self): - """Check that executing the causal test case returns the correct conditional average treatment effects for - dummy data with effect multiplicative effect modification. C ~ (4*(A+2) + D)*M""" - # Add some effect modifier M that has a multiplicative effect on C - self.df["M"] = np.random.randint(1, 5, len(self.df)) - self.df["C"] *= self.df["M"] - estimation_model = CausalForestEstimator( - "A", - self.treatment_value, - self.control_value, - self.minimal_adjustment_set, - "C", - self.df, - effect_modifiers={"M": None}, - ) - self.causal_test_case.estimate_type = "cates" - causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector) - causal_test_result = causal_test_result.test_value.value - # Check that each effect modifier's strata has a greater ATE than the last (ascending order) - causal_test_result_m1 = causal_test_result.loc[causal_test_result["M"] == 1] - causal_test_result_m2 = causal_test_result.loc[causal_test_result["M"] == 2] - causal_test_result_m3 = causal_test_result.loc[causal_test_result["M"] == 3] - causal_test_result_m4 = causal_test_result.loc[causal_test_result["M"] == 4] - self.assertLess(causal_test_result_m1["cate"].mean(), causal_test_result_m2["cate"].mean()) - self.assertLess(causal_test_result_m2["cate"].mean(), causal_test_result_m3["cate"].mean()) - self.assertLess(causal_test_result_m3["cate"].mean(), causal_test_result_m4["cate"].mean()) - def tearDown(self) -> None: remove_temp_dir_if_existent() diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py index b3d0f448..96fd766a 100644 --- a/tests/testing_tests/test_causal_test_suite.py +++ b/tests/testing_tests/test_causal_test_suite.py @@ -7,7 +7,7 @@ from causal_testing.testing.base_test_case import BaseTestCase from causal_testing.specification.variable import Input, Output from causal_testing.testing.causal_test_outcome import ExactValue -from causal_testing.testing.estimators import CausalForestEstimator, LinearRegressionEstimator +from causal_testing.testing.estimators import LinearRegressionEstimator, LogisticRegressionEstimator from causal_testing.specification.causal_specification import CausalSpecification, Scenario from causal_testing.data_collection.data_collector import ObservationalDataCollector from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent @@ -100,19 +100,22 @@ def test_execute_test_suite_single_base_test_case(self): causal_test_case_result = causal_test_results[self.base_test_case] self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value, 4, delta=1e-10) - def test_execute_test_suite_multiple_estimators(self): - """Check that executing a test suite with multiple estimators returns correct results for the dummy data - for each estimator - """ - estimators = [LinearRegressionEstimator, CausalForestEstimator] - test_suite_2_estimators = CausalTestSuite() - test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)] - test_suite_2_estimators.add_test_object( - base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators - ) - causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification) - causal_test_case_result = causal_test_results[self.base_test_case] - linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0] - causal_forrest_result = causal_test_case_result["CausalForestEstimator"][0] - self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1) - self.assertAlmostEqual(causal_forrest_result.test_value.value, 4, delta=1e-1) + # Without CausalForestEstimator we now only have 2 estimators. Unfortunately LogicisticRegressionEstimator does not + # currently work with TestSuite. So for now removed test + + # def test_execute_test_suite_multiple_estimators(self): + # """Check that executing a test suite with multiple estimators returns correct results for the dummy data + # for each estimator + # """ + # estimators = [LinearRegressionEstimator, LogisticRegressionEstimator] + # test_suite_2_estimators = CausalTestSuite() + # test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)] + # test_suite_2_estimators.add_test_object( + # base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators + # ) + # causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification) + # causal_test_case_result = causal_test_results[self.base_test_case] + # linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0] + # logistic_regression_estimator = causal_test_case_result["LogisticRegressionEstimator"][0] + # self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1) + # self.assertAlmostEqual(logistic_regression_estimator.test_value.value, 4, delta=1e-1) diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py index 835a1144..51faad37 100644 --- a/tests/testing_tests/test_estimators.py +++ b/tests/testing_tests/test_estimators.py @@ -4,7 +4,6 @@ import matplotlib.pyplot as plt from causal_testing.testing.estimators import ( LinearRegressionEstimator, - CausalForestEstimator, LogisticRegressionEstimator, InstrumentalVariableEstimator, ) @@ -409,69 +408,6 @@ def test_program_11_2_with_robustness_validation(self): self.assertEqual(round(cv.estimate_robustness(model)["treatments"], 4), 0.7353) -class TestCausalForestEstimator(unittest.TestCase): - """Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1]. - - Reference: Hernán MA, Robins JM (2020). Causal Inference: What If. Boca Raton: Chapman & Hall/CRC. - Link: https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/ - """ - - @classmethod - def setUpClass(cls) -> None: - cls.nhefs_df = load_nhefs_df() - cls.chapter_11_df = load_chapter_11_df() - - def test_program_15_ate(self): - """Test whether our causal forest implementation produces the similar ATE to program 15.1 (p. 163, 184).""" - df = self.nhefs_df - covariates = { - "sex", - "race", - "age", - "edu_2", - "edu_3", - "edu_4", - "edu_5", - "exercise_1", - "exercise_2", - "active_1", - "active_2", - "wt71", - "smokeintensity", - "smokeyrs", - } - causal_forest = CausalForestEstimator("qsmk", 1, 0, covariates, "wt82_71", df, {"smokeintensity": 40}) - ate, _ = causal_forest.estimate_ate() - self.assertGreater(round(ate, 1), 2.5) - self.assertLess(round(ate, 1), 4.5) - - def test_program_15_cate(self): - """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184).""" - df = self.nhefs_df - smoking_intensity_5_and_40_df = df.loc[(df["smokeintensity"] == 5) | (df["smokeintensity"] == 40)] - covariates = { - "sex", - "race", - "age", - "edu_2", - "edu_3", - "edu_4", - "edu_5", - "exercise_1", - "exercise_2", - "active_1", - "active_2", - "wt71", - "smokeintensity", - "smokeyrs", - } - causal_forest = CausalForestEstimator( - "qsmk", 1, 0, covariates, "wt82_71", smoking_intensity_5_and_40_df, {"smokeintensity": 40} - ) - cates_df, _ = causal_forest.estimate_cates() - self.assertGreater(cates_df["cate"].mean(), 0) - - class TestLinearRegressionInteraction(unittest.TestCase): """Test linear regression for estimating effects involving interaction.""" From f30ca8f08799acb376fe893258507e28c8fae80e Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Wed, 23 Aug 2023 09:53:01 +0100 Subject: [PATCH 03/12] Remove econml from pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 46fa2c0b..78309aca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ license = { text = "MIT" } keywords = ["causal inference", "verification"] dependencies = [ "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems - "econml~=0.12", "fitter~=1.4", "lhsmdu~=1.1", "networkx~=2.6", From f34a6d1a171a60fdc7e2cfdffcb7eb2ce7a0215a Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:42:13 +0100 Subject: [PATCH 04/12] Remove helpers and demonstrate tempfile --- tests/test_helpers.py | 27 ------------------- tests/testing_tests/test_causal_test_suite.py | 10 ++++--- 2 files changed, 7 insertions(+), 30 deletions(-) delete mode 100644 tests/test_helpers.py diff --git a/tests/test_helpers.py b/tests/test_helpers.py deleted file mode 100644 index 4b77d2be..00000000 --- a/tests/test_helpers.py +++ /dev/null @@ -1,27 +0,0 @@ -"""A library of helper methods for the causal testing framework tests.""" -import sys -from os import mkdir -from os.path import join, dirname, realpath, exists -from shutil import rmtree - - -def create_temp_dir_if_non_existent(): - """Create a temporary directory in the current working directory if one does not exist already. - - Create a temporary directory named temp in the current working directory provided it does not already exist, and - then return the path to the temporary directory (regardless of whether it existed previously or has just been - created). - - :return: Path to the temporary directory. - """ - temp_dir = join(dirname(realpath(sys.argv[0])), "temp/") - if not exists(temp_dir): - mkdir(temp_dir) - return temp_dir - - -def remove_temp_dir_if_existent(): - """Remove a temporary directory from the current working directory if one exists.""" - temp_dir = join(dirname(realpath(sys.argv[0])), "temp/") - if exists(temp_dir): - rmtree(temp_dir) diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py index df0c2eb3..d6af77cb 100644 --- a/tests/testing_tests/test_causal_test_suite.py +++ b/tests/testing_tests/test_causal_test_suite.py @@ -1,6 +1,8 @@ import unittest import os +import tempfile import numpy as np +import shutil import pandas as pd from causal_testing.testing.causal_test_suite import CausalTestSuite from causal_testing.testing.causal_test_case import CausalTestCase @@ -10,7 +12,6 @@ from causal_testing.testing.estimators import LinearRegressionEstimator, LogisticRegressionEstimator from causal_testing.specification.causal_specification import CausalSpecification, Scenario from causal_testing.data_collection.data_collector import ObservationalDataCollector -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent from causal_testing.specification.causal_dag import CausalDAG @@ -29,8 +30,8 @@ def setUp(self) -> None: self.scenario = Scenario({A, C, D}) # 2. Create DAG and dummy data and write to csvs - temp_dir_path = create_temp_dir_if_non_existent() - dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { A -> C; D -> A; D -> C}""" with open(dag_dot_path, "w") as file: file.write(dag_dot) @@ -64,6 +65,9 @@ def setUp(self) -> None: self.data_collector = ObservationalDataCollector(self.scenario, self.df) + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir_path) + def test_adding_test_object(self): "test an object can be added to the test_suite using the add_test_object function" test_suite = CausalTestSuite() From 89dba726e95782fed90db6ccaba4443bd18adc7f Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:44:02 +0100 Subject: [PATCH 05/12] Remove CausalForestEstimator test --- tests/testing_tests/test_estimators.py | 27 -------------------------- 1 file changed, 27 deletions(-) diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py index 76457170..cac19afc 100644 --- a/tests/testing_tests/test_estimators.py +++ b/tests/testing_tests/test_estimators.py @@ -437,33 +437,6 @@ def test_program_11_3_cublic_spline(self): self.assertAlmostEqual(ate_1[0] * 2, ate_2[0]) - def test_program_15_cate(self): - """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184).""" - df = self.nhefs_df - smoking_intensity_5_and_40_df = df.loc[(df["smokeintensity"] == 5) | (df["smokeintensity"] == 40)] - covariates = { - "sex", - "race", - "age", - "edu_2", - "edu_3", - "edu_4", - "edu_5", - "exercise_1", - "exercise_2", - "active_1", - "active_2", - "wt71", - "smokeintensity", - "smokeyrs", - } - causal_forest = CausalForestEstimator( - "qsmk", 1, 0, covariates, "wt82_71", smoking_intensity_5_and_40_df, {"smokeintensity": 40} - ) - cates_df, _ = causal_forest.estimate_cates() - self.assertGreater(cates_df["cate"].mean(), 0) - - class TestLinearRegressionInteraction(unittest.TestCase): """Test linear regression for estimating effects involving interaction.""" From c9f0fc8fa3e32d4f9c56eaec45b928edb57fd254 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Thu, 11 Apr 2024 17:04:58 +0100 Subject: [PATCH 06/12] Remove custom helpers and use tempfile for all tests --- .../test_observational_data_collector.py | 10 +++++----- .../test_abstract_test_case.py | 10 +++++----- tests/json_front_tests/test_json_class.py | 2 -- tests/specification_tests/test_causal_dag.py | 10 ++++++---- .../test_metamorphic_relations.py | 10 ++++++---- .../test_causal_surrogate_assisted.py | 9 +++++---- .../testing_tests/test_causal_test_adequacy.py | 2 -- tests/testing_tests/test_causal_test_case.py | 18 ++++++++---------- 8 files changed, 35 insertions(+), 36 deletions(-) diff --git a/tests/data_collection_tests/test_observational_data_collector.py b/tests/data_collection_tests/test_observational_data_collector.py index 73ebcd07..97163853 100644 --- a/tests/data_collection_tests/test_observational_data_collector.py +++ b/tests/data_collection_tests/test_observational_data_collector.py @@ -1,5 +1,6 @@ import unittest import os +import shutil, tempfile import pandas as pd from causal_testing.data_collection.data_collector import ObservationalDataCollector from causal_testing.specification.causal_specification import Scenario @@ -7,7 +8,6 @@ from scipy.stats import uniform, rv_discrete from enum import Enum import random -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent class TestObservationalDataCollector(unittest.TestCase): @@ -17,9 +17,9 @@ class Color(Enum): GREEN = "GREEN" BLUE = "BLUE" - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") - self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") + self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv") # Y = 3*X1 + X2*X3 + 10 self.observational_df = pd.DataFrame( {"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40], "Y2": ["RED", "GREEN", "BLUE", "BLUE"]} @@ -66,7 +66,7 @@ def populate_m(data): assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"]))) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) if __name__ == "__main__": diff --git a/tests/generation_tests/test_abstract_test_case.py b/tests/generation_tests/test_abstract_test_case.py index d12f1aa6..fd40f3de 100644 --- a/tests/generation_tests/test_abstract_test_case.py +++ b/tests/generation_tests/test_abstract_test_case.py @@ -1,5 +1,6 @@ import unittest import os +import shutil, tempfile import pandas as pd import numpy as np from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase @@ -7,7 +8,6 @@ from causal_testing.specification.causal_specification import Scenario from causal_testing.specification.variable import Input, Output from scipy.stats import uniform, rv_discrete -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent from causal_testing.testing.causal_test_outcome import Positive from z3 import And from enum import Enum @@ -29,9 +29,9 @@ class TestAbstractTestCase(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") - self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") + self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv") # Y = 3*X1 + X2*X3 + 10 self.observational_df = pd.DataFrame({"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40]}) self.observational_df["Y"] = self.observational_df.apply( @@ -192,7 +192,7 @@ def test_feasible_constraints(self): assert len(concrete_tests) < 1000 def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) if __name__ == "__main__": diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py index e348cc57..8fa49194 100644 --- a/tests/json_front_tests/test_json_class.py +++ b/tests/json_front_tests/test_json_class.py @@ -6,7 +6,6 @@ from causal_testing.testing.estimators import LinearRegressionEstimator, Estimator from causal_testing.testing.causal_test_outcome import NoEffect, Positive -from tests.test_helpers import remove_temp_dir_if_existent from causal_testing.json_front.json_class import JsonUtility, CausalVariables from causal_testing.specification.variable import Input, Output, Meta from causal_testing.specification.scenario import Scenario @@ -321,7 +320,6 @@ def add_modelling_assumptions(self): self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False) def tearDown(self) -> None: - remove_temp_dir_if_existent() if os.path.exists("temp_out.txt"): os.remove("temp_out.txt") diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py index f88a56a7..d3f34136 100644 --- a/tests/specification_tests/test_causal_dag.py +++ b/tests/specification_tests/test_causal_dag.py @@ -1,11 +1,13 @@ import unittest import os +import shutil, tempfile import networkx as nx from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep from causal_testing.specification.scenario import Scenario from causal_testing.specification.variable import Input, Output from causal_testing.testing.base_test_case import BaseTestCase -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent + + class TestCausalDAGIssue90(unittest.TestCase): @@ -14,8 +16,8 @@ class TestCausalDAGIssue90(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M; M -> Y; Z -> M; }""" with open(self.dag_dot_path, "w") as f: f.write(dag_dot) @@ -28,7 +30,7 @@ def test_enumerate_minimal_adjustment_sets(self): self.assertEqual([{"Z"}], adjustment_sets) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) class TestIVAssumptions(unittest.TestCase): diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py index 3e1e998c..dc35e071 100644 --- a/tests/specification_tests/test_metamorphic_relations.py +++ b/tests/specification_tests/test_metamorphic_relations.py @@ -1,10 +1,9 @@ import unittest import os - +import shutil, tempfile import pandas as pd from itertools import combinations -from tests.test_helpers import create_temp_dir_if_non_existent from causal_testing.specification.causal_dag import CausalDAG from causal_testing.specification.causal_specification import Scenario from causal_testing.specification.metamorphic_relation import ( @@ -69,8 +68,8 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D class TestMetamorphicRelation(unittest.TestCase): def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph DAG { rankdir=LR; X1 -> Z; Z -> M; M -> Y; X2 -> Z; X3 -> M;}""" with open(self.dag_dot_path, "w") as f: f.write(dag_dot) @@ -88,6 +87,9 @@ def setUp(self) -> None: self.scenario, self.default_control_input_config, self.default_treatment_input_config ) + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir_path) + def test_should_cause_metamorphic_relations_correct_spec(self): """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program.""" causal_dag = CausalDAG(self.dag_dot_path) diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py index 43afe0e4..43fd98a7 100644 --- a/tests/surrogate_tests/test_causal_surrogate_assisted.py +++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py @@ -7,8 +7,9 @@ from causal_testing.surrogate.causal_surrogate_assisted import SimulationResult, CausalSurrogateAssistedTestCase, Simulator from causal_testing.surrogate.surrogate_search_algorithms import GeneticSearchAlgorithm from causal_testing.testing.estimators import CubicSplineRegressionEstimator -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent + import os +import shutil, tempfile import pandas as pd import numpy as np @@ -43,8 +44,8 @@ def setUpClass(cls) -> None: cls.class_df = load_class_df() def setUp(self): - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M [included=1, expected=positive]; M -> Y [included=1, expected=negative]; Z -> M; }""" with open(self.dag_dot_path, "w") as f: f.write(dag_dot) @@ -199,7 +200,7 @@ def test_causal_surrogate_assisted_execution_incorrect_search_config(self): custom_data_aggregator=data_double_aggregator) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) def load_class_df(): """Get the testing data and put into a dataframe.""" diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py index 0435dd54..1f8d2ffa 100644 --- a/tests/testing_tests/test_causal_test_adequacy.py +++ b/tests/testing_tests/test_causal_test_adequacy.py @@ -10,7 +10,6 @@ from causal_testing.testing.causal_test_suite import CausalTestSuite from causal_testing.testing.causal_test_adequacy import DAGAdequacy from causal_testing.testing.causal_test_outcome import NoEffect, Positive -from tests.test_helpers import remove_temp_dir_if_existent from causal_testing.json_front.json_class import JsonUtility, CausalVariables from causal_testing.specification.variable import Input, Output, Meta from causal_testing.specification.scenario import Scenario @@ -255,6 +254,5 @@ def test_dag_adequacy_independent_other_way(self): ) def tearDown(self) -> None: - remove_temp_dir_if_existent() if os.path.exists("temp_out.txt"): os.remove("temp_out.txt") diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py index 774a0c8d..4d081a62 100644 --- a/tests/testing_tests/test_causal_test_case.py +++ b/tests/testing_tests/test_causal_test_case.py @@ -1,9 +1,10 @@ import unittest import os +import tempfile +import shutil import pandas as pd import numpy as np -from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent from causal_testing.specification.causal_specification import CausalSpecification, Scenario from causal_testing.specification.variable import Input, Output from causal_testing.specification.causal_dag import CausalDAG @@ -44,9 +45,6 @@ def test_str(self): " {Output: C::float}: ExactValue: 4±0.2.", ) - def tearDown(self) -> None: - remove_temp_dir_if_existent() - class TestCausalTestExecution(unittest.TestCase): """Test the causal test execution workflow using observational data. @@ -57,8 +55,8 @@ class TestCausalTestExecution(unittest.TestCase): def setUp(self) -> None: # 1. Create Causal DAG - temp_dir_path = create_temp_dir_if_non_existent() - dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { A -> C; D -> A; D -> C}""" with open(dag_dot_path, "w") as file: file.write(dag_dot) @@ -88,7 +86,7 @@ def setUp(self) -> None: df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))}) # D = exogenous df["A"] = [1 if d > 50 else 0 for d in df["D"]] df["C"] = df["D"] + (4 * (df["A"] + 2)) # C = (4*(A+2)) + D - self.observational_data_csv_path = os.path.join(temp_dir_path, "observational_data.csv") + self.observational_data_csv_path = os.path.join(self.temp_dir_path, "observational_data.csv") df.to_csv(self.observational_data_csv_path, index=False) # 5. Create observational data collector @@ -101,6 +99,9 @@ def setUp(self) -> None: self.treatment_value = 1 self.control_value = 0 + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir_path) + def test_check_minimum_adjustment_set(self): """Check that the minimum adjustment set is correctly made""" minimal_adjustment_set = self.causal_dag.identification(self.base_test_case) @@ -215,6 +216,3 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel ) causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector) pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1) - - def tearDown(self) -> None: - remove_temp_dir_if_existent() From f62011bcf5dc59cc0eab9bf0dd870eaef9d9120d Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Thu, 11 Apr 2024 17:05:45 +0100 Subject: [PATCH 07/12] Remove CausalForestDML dependency --- causal_testing/testing/estimators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py index 09b861c0..307a5d28 100644 --- a/causal_testing/testing/estimators.py +++ b/causal_testing/testing/estimators.py @@ -9,7 +9,6 @@ import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf -from econml.dml import CausalForestDML from patsy import dmatrix # pylint: disable = no-name-in-module from patsy import ModelDesc from sklearn.ensemble import GradientBoostingRegressor From 355a42da6547f733141159760021af71263a6816 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:51:54 +0100 Subject: [PATCH 08/12] Update DAG tests to use TempFile --- tests/specification_tests/test_causal_dag.py | 50 ++++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py index d3f34136..c020ae67 100644 --- a/tests/specification_tests/test_causal_dag.py +++ b/tests/specification_tests/test_causal_dag.py @@ -35,8 +35,8 @@ def tearDown(self) -> None: class TestIVAssumptions(unittest.TestCase): def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { I -> X; X -> Y; U -> X; U -> Y;}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -63,7 +63,9 @@ def test_common_cause(self): causal_dag.graph.add_edge("U", "I") with self.assertRaises(ValueError): causal_dag.check_iv_assumptions("X", "Y", "I") - + + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir_path) class TestCausalDAG(unittest.TestCase): """ @@ -74,8 +76,8 @@ class TestCausalDAG(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C;}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -107,7 +109,7 @@ def test_to_dot_string(self): self.assertEqual(causal_dag.to_dot_string(), """digraph G {\nA -> B;\nB -> C;\nD -> A;\nD -> C;\n}""") def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) class TestCyclicCausalDAG(unittest.TestCase): @@ -116,8 +118,8 @@ class TestCyclicCausalDAG(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C; C -> A;}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -127,7 +129,7 @@ def test_invalid_causal_dag(self): self.assertRaises(nx.HasACycle, CausalDAG, self.dag_dot_path) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) class TestDAGDirectEffectIdentification(unittest.TestCase): @@ -136,8 +138,8 @@ class TestDAGDirectEffectIdentification(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { X1->X2;X2->V;X2->D1;X2->D2;D1->Y;D1->D2;Y->D3;Z->X2;Z->Y;}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -152,7 +154,9 @@ def test_direct_effect_adjustment_sets_no_adjustment(self): causal_dag = CausalDAG(self.dag_dot_path) adjustment_sets = causal_dag.direct_effect_adjustment_sets(["X2"], ["D1"]) self.assertEqual(list(adjustment_sets), [set()]) - + + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir_path) class TestDAGIdentification(unittest.TestCase): """ @@ -160,8 +164,8 @@ class TestDAGIdentification(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { X1->X2;X2->V;X2->D1;X2->D2;D1->Y;D1->D2;Y->D3;Z->X2;Z->Y;}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -339,8 +343,7 @@ def test_dag_with_non_character_nodes(self): self.assertEqual(adjustment_sets, [{"aa"}, {"la"}, {"va"}]) def tearDown(self) -> None: - remove_temp_dir_if_existent() - + shutil.rmtree(self.temp_dir_path) class TestDependsOnOutputs(unittest.TestCase): """ @@ -352,8 +355,8 @@ def setUp(self) -> None: from causal_testing.specification.variable import Input, Output, Meta from causal_testing.specification.scenario import Scenario - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C}""" f = open(self.dag_dot_path, "w") f.write(dag_dot) @@ -391,7 +394,7 @@ def test_depends_on_outputs_input(self): self.assertFalse(causal_dag.depends_on_outputs("D", self.scenario)) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) class TestUndirectedGraphAlgorithms(unittest.TestCase): @@ -431,9 +434,6 @@ def test_list_all_min_sep(self): min_separators = set(frozenset(min_separator) for min_separator in min_separators) self.assertEqual({frozenset({2, 3}), frozenset({3, 4}), frozenset({4, 5})}, min_separators) - def tearDown(self) -> None: - remove_temp_dir_if_existent() - class TestHiddenVariableDAG(unittest.TestCase): """ @@ -441,8 +441,8 @@ class TestHiddenVariableDAG(unittest.TestCase): """ def setUp(self) -> None: - temp_dir_path = create_temp_dir_if_non_existent() - self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot") + self.temp_dir_path = tempfile.mkdtemp() + self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot") dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M; M -> Y; Z -> M; }""" with open(self.dag_dot_path, "w") as f: f.write(dag_dot) @@ -463,4 +463,4 @@ def test_hidden_varaible_adjustment_sets(self): self.assertNotEqual(adjustment_sets, adjustment_sets_with_hidden) def tearDown(self) -> None: - remove_temp_dir_if_existent() + shutil.rmtree(self.temp_dir_path) From d81bb5011e252535aa89fabc6e97446e931d474f Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 23 Apr 2024 13:34:10 +0100 Subject: [PATCH 09/12] Dependency Updates --- pyproject.toml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fb1f26f7..cf8ca872 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,17 +16,17 @@ license = { text = "MIT" } keywords = ["causal inference", "verification"] dependencies = [ "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems - "fitter~=1.4", + "fitter~=1.7", "lhsmdu~=1.1", "networkx~=2.6", - "numpy~=1.23", - "pandas~=1.3", - "scikit_learn~=1.1", + "numpy~=1.26", + "pandas~=1.5", + "scikit_learn~=1.4", "scipy~=1.7", - "statsmodels~=0.13", - "tabulate~=0.8", - "pydot~=1.4", - "pygad~=3.2" + "statsmodels~=0.14", + "tabulate~=0.9", + "pydot~=2.0", + "pygad~=3.3" ] dynamic = ["version"] From 8052df56084eed5e86959f52f56117fec07b95a1 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 23 Apr 2024 13:39:21 +0100 Subject: [PATCH 10/12] black formatting --- .../generation/abstract_causal_test_case.py | 1 + causal_testing/json_front/json_class.py | 6 ++-- .../specification/metamorphic_relation.py | 8 ++++-- causal_testing/specification/scenario.py | 1 + .../surrogate/causal_surrogate_assisted.py | 2 +- .../surrogate/surrogate_search_algorithms.py | 8 ++++-- causal_testing/testing/base_test_case.py | 1 + .../testing/causal_test_adequacy.py | 1 + causal_testing/testing/causal_test_case.py | 1 + causal_testing/testing/causal_test_outcome.py | 28 +++++++++++-------- causal_testing/testing/causal_test_result.py | 9 ++++-- causal_testing/testing/causal_test_suite.py | 1 + causal_testing/testing/estimators.py | 4 +-- causal_testing/utils/validation.py | 1 + 14 files changed, 45 insertions(+), 27 deletions(-) diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py index 762fd16d..1bf30f57 100644 --- a/causal_testing/generation/abstract_causal_test_case.py +++ b/causal_testing/generation/abstract_causal_test_case.py @@ -1,4 +1,5 @@ """This module contains the class AbstractCausalTestCase, which generates concrete test cases""" + import itertools import logging from enum import Enum diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py index 88386441..cca98a20 100644 --- a/causal_testing/json_front/json_class.py +++ b/causal_testing/json_front/json_class.py @@ -108,9 +108,9 @@ def _create_abstract_test_case(self, test, mutates, effects): self.scenario.variables[variable]: effects[effect] for variable, effect in test["expected_effect"].items() }, - effect_modifiers={self.scenario.variables[v] for v in test["effect_modifiers"]} - if "effect_modifiers" in test - else {}, + effect_modifiers=( + {self.scenario.variables[v] for v in test["effect_modifiers"]} if "effect_modifiers" in test else {} + ), estimate_type=test["estimate_type"], effect=test.get("effect", "total"), ) diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py index e8b6978f..9d8c8afb 100644 --- a/causal_testing/specification/metamorphic_relation.py +++ b/causal_testing/specification/metamorphic_relation.py @@ -73,9 +73,11 @@ def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed: source_follow_up_test_inputs[[follow_up_input]] .rename(columns={follow_up_input: self.treatment_var}) .to_dict(orient="records"), - test_inputs.to_dict(orient="records") - if not test_inputs.empty - else [{}] * len(source_follow_up_test_inputs), + ( + test_inputs.to_dict(orient="records") + if not test_inputs.empty + else [{}] * len(source_follow_up_test_inputs) + ), ) ] diff --git a/causal_testing/specification/scenario.py b/causal_testing/specification/scenario.py index 63753d1e..7e984abd 100644 --- a/causal_testing/specification/scenario.py +++ b/causal_testing/specification/scenario.py @@ -1,4 +1,5 @@ """This module holds the Scenario Class""" + from collections.abc import Iterable, Mapping from tabulate import tabulate diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py index 77ef88f7..74f309be 100644 --- a/causal_testing/surrogate/causal_surrogate_assisted.py +++ b/causal_testing/surrogate/causal_surrogate_assisted.py @@ -19,7 +19,7 @@ class SimulationResult: relationship: str -class SearchAlgorithm(ABC): # pylint: disable=too-few-public-methods +class SearchAlgorithm(ABC): # pylint: disable=too-few-public-methods """Class to be inherited with the search algorithm consisting of a search function and the fitness function of the space to be searched""" diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py index 94984b6a..75628622 100644 --- a/causal_testing/surrogate/surrogate_search_algorithms.py +++ b/causal_testing/surrogate/surrogate_search_algorithms.py @@ -1,4 +1,5 @@ """Module containing implementation of search algorithm for surrogate search """ + # Fitness functions are required to be iteratively defined, including all variables within. from operator import itemgetter @@ -26,7 +27,7 @@ def __init__(self, delta=0.05, config: dict = None) -> None: # pylint: disable=too-many-locals def search( - self, surrogate_models: list[CubicSplineRegressionEstimator], specification: CausalSpecification + self, surrogate_models: list[CubicSplineRegressionEstimator], specification: CausalSpecification ) -> list: solutions = [] @@ -47,7 +48,8 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument ate = surrogate.estimate_ate_calculated(adjustment_dict) if len(ate) > 1: raise ValueError( - "Multiple ate values provided but currently only single values supported in this method") + "Multiple ate values provided but currently only single values supported in this method" + ) return contradiction_function(ate[0]) gene_types, gene_space = self.create_gene_types(surrogate, specification) @@ -84,7 +86,7 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument @staticmethod def create_gene_types( - surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification + surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification ) -> tuple[list, list]: """Generate the gene_types and gene_space for a given fitness function and specification :param surrogate_model: Instance of a CubicSplineRegressionEstimator diff --git a/causal_testing/testing/base_test_case.py b/causal_testing/testing/base_test_case.py index 5da698dc..2cc02304 100644 --- a/causal_testing/testing/base_test_case.py +++ b/causal_testing/testing/base_test_case.py @@ -1,4 +1,5 @@ """This module contains the BaseTestCase dataclass, which stores the information required for identification""" + from dataclasses import dataclass from causal_testing.specification.variable import Variable from causal_testing.testing.effect import Effect diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py index dfa43a90..2a9bff93 100644 --- a/causal_testing/testing/causal_test_adequacy.py +++ b/causal_testing/testing/causal_test_adequacy.py @@ -1,6 +1,7 @@ """ This module contains code to measure various aspects of causal test adequacy. """ + from itertools import combinations from copy import deepcopy import pandas as pd diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py index b8a43f3b..da47c126 100644 --- a/causal_testing/testing/causal_test_case.py +++ b/causal_testing/testing/causal_test_case.py @@ -1,4 +1,5 @@ """This module contains the CausalTestCase class, a class that holds the information required for a causal test""" + import logging from typing import Any import numpy as np diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py index 0c3ae1e4..3846b514 100644 --- a/causal_testing/testing/causal_test_outcome.py +++ b/causal_testing/testing/causal_test_outcome.py @@ -29,10 +29,12 @@ class SomeEffect(CausalTestOutcome): def apply(self, res: CausalTestResult) -> bool: if res.test_value.type == "risk_ratio": return any( - 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())) - if res.test_value.type in ('coefficient', 'ate'): + 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()) + ) + if res.test_value.type in ("coefficient", "ate"): return any( - 0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())) + 0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()) + ) raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome") @@ -51,17 +53,19 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05): def apply(self, res: CausalTestResult) -> bool: if res.test_value.type == "risk_ratio": - return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in - zip(res.ci_low(), res.ci_high(), res.test_value.value)) - if res.test_value.type in ('coefficient', 'ate'): + return any( + ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) + for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value) + ) + if res.test_value.type in ("coefficient", "ate"): value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value] return ( - sum( - not ((ci_low < 0 < ci_high) or abs(v) < self.atol) - for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value) - ) - / len(value) - < self.ctol + sum( + not ((ci_low < 0 < ci_high) or abs(v) < self.atol) + for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value) + ) + / len(value) + < self.ctol ) raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome") diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py index afae6195..65a2085e 100644 --- a/causal_testing/testing/causal_test_result.py +++ b/causal_testing/testing/causal_test_result.py @@ -1,6 +1,7 @@ """This module contains the CausalTestResult class, which is a container for the results of a causal test, and the TestValue dataclass. """ + from typing import Any from dataclasses import dataclass import pandas as pd @@ -86,9 +87,11 @@ def to_dict(self, json=False): "outcome": self.estimator.outcome, "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set, "effect_measure": self.test_value.type, - "effect_estimate": self.test_value.value.to_dict() - if json and hasattr(self.test_value.value, "to_dict") - else self.test_value.value, + "effect_estimate": ( + self.test_value.value.to_dict() + if json and hasattr(self.test_value.value, "to_dict") + else self.test_value.value + ), "ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(), "ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(), } diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py index d9c973a6..47c5ef98 100644 --- a/causal_testing/testing/causal_test_suite.py +++ b/causal_testing/testing/causal_test_suite.py @@ -1,5 +1,6 @@ """This module contains the CausalTestSuite class, for details on using it: https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html""" + import logging from collections import UserDict diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py index 307a5d28..e1d323fd 100644 --- a/causal_testing/testing/estimators.py +++ b/causal_testing/testing/estimators.py @@ -1,5 +1,6 @@ """This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator, LinearRegressionEstimator""" + import logging from abc import ABC, abstractmethod from typing import Any @@ -351,7 +352,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]: model = self._run_linear_regression() newline = "\n" patsy_md = ModelDesc.from_formula(self.treatment) - if any((self.df.dtypes[factor.name()] == 'object' for factor in patsy_md.rhs_termlist[1].factors)): + if any((self.df.dtypes[factor.name()] == "object" for factor in patsy_md.rhs_termlist[1].factors)): design_info = dmatrix(self.formula.split("~")[1], self.df).design_info treatment = design_info.column_names[design_info.term_name_slices[self.treatment]] else: @@ -590,4 +591,3 @@ def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.S ci_high = pd.Series(bootstraps[bootstrap_size - bound]) return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high] - diff --git a/causal_testing/utils/validation.py b/causal_testing/utils/validation.py index 7a29ac60..63df5dc3 100644 --- a/causal_testing/utils/validation.py +++ b/causal_testing/utils/validation.py @@ -1,4 +1,5 @@ """This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques""" + import math import numpy as np from scipy.stats import t From 2b0ea8180e57308fc35669743c5eaa648facc7b9 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 23 Apr 2024 14:46:57 +0100 Subject: [PATCH 11/12] Removed unused imports --- causal_testing/testing/estimators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py index e1d323fd..baebb679 100644 --- a/causal_testing/testing/estimators.py +++ b/causal_testing/testing/estimators.py @@ -12,7 +12,6 @@ import statsmodels.formula.api as smf from patsy import dmatrix # pylint: disable = no-name-in-module from patsy import ModelDesc -from sklearn.ensemble import GradientBoostingRegressor from statsmodels.regression.linear_model import RegressionResultsWrapper from statsmodels.tools.sm_exceptions import PerfectSeparationError From 5b95def6b069e1e4139ce029d3457258ce759041 Mon Sep 17 00:00:00 2001 From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:19:33 +0100 Subject: [PATCH 12/12] Add multiple python versions to CI tests --- .github/workflows/ci-tests-drafts.yaml | 2 +- .github/workflows/ci-tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-tests-drafts.yaml b/.github/workflows/ci-tests-drafts.yaml index 18b60443..5617027b 100644 --- a/.github/workflows/ci-tests-drafts.yaml +++ b/.github/workflows/ci-tests-drafts.yaml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - python-version: ["3.9"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 - name: Set up Python using Miniconda diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index df622814..e45e57e1 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - python-version: ["3.9"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 - name: Set up Python using Miniconda