From 565afd6cfde35585e804a27b3c8e949d61709389 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 22 Aug 2023 16:27:46 +0100
Subject: [PATCH 01/12] Remove CausalForestEstimator

---
 causal_testing/testing/estimators.py | 98 ----------------------------
 1 file changed, 98 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 4e56562c..58ee3fa3 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -9,7 +9,6 @@
 import pandas as pd
 import statsmodels.api as sm
 import statsmodels.formula.api as smf
-from econml.dml import CausalForestDML
 from patsy import dmatrix
 
 from sklearn.ensemble import GradientBoostingRegressor
@@ -504,100 +503,3 @@ def estimate_coefficient(self, bootstrap_size=100):
 
         return self.estimate_iv_coefficient(self.df), (ci_low, ci_high)
 
-
-class CausalForestEstimator(Estimator):
-    """A causal random forest estimator is a non-parametric estimator which recursively partitions the covariate space
-    to learn a low-dimensional representation of treatment effect heterogeneity. This form of estimator is best suited
-    to the estimation of heterogeneous treatment effects i.e. the estimated effect for every sample rather than the
-    population average.
-    """
-
-    def add_modelling_assumptions(self):
-        """Add any modelling assumptions to the estimator.
-
-        :return self: Update self.modelling_assumptions
-        """
-        self.modelling_assumptions += "Non-parametric estimator: no restrictions imposed on the data."
-
-    def estimate_ate(self) -> float:
-        """Estimate the average treatment effect.
-
-        :return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
-        """
-        # Remove any NA containing rows
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-
-        # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
-        if self.effect_modifiers:
-            effect_modifier_df = reduced_df[list(self.effect_modifiers)]
-        else:
-            effect_modifier_df = reduced_df[list(self.adjustment_set)]
-        confounders_df = reduced_df[list(self.adjustment_set)]
-        treatment_df = np.ravel(reduced_df[[self.treatment]])
-        outcome_df = np.ravel(reduced_df[[self.outcome]])
-
-        # Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model
-        model = CausalForestDML(
-            model_y=GradientBoostingRegressor(),
-            model_t=GradientBoostingRegressor(),
-        )
-        model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
-
-        # Obtain the ATE and 95% confidence intervals
-        ate = model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
-        ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
-        ci_low, ci_high = ate_interval[0], ate_interval[1]
-        return ate, [ci_low, ci_high]
-
-    def estimate_cates(self) -> pd.DataFrame:
-        """Estimate the conditional average treatment effect for each sample in the data as a function of a set of
-        covariates (X) i.e. effect modifiers. That is, the predicted change in outcome caused by the intervention
-        (change in treatment from control to treatment value) for every execution of the system-under-test, taking into
-        account the value of each effect modifier X. As a result, for every unique setting of the set of covariates X,
-        we expect a different CATE.
-
-        :return results_df: A dataframe containing a conditional average treatment effect, 95% confidence intervals, and
-        the covariate (effect modifier) values for each sample.
-        """
-
-        # Remove any NA containing rows
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-
-        # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
-        if self.effect_modifiers:
-            effect_modifier_df = reduced_df[list(self.effect_modifiers)]
-        else:
-            raise ValueError("CATE requires the user to define a set of effect modifiers.")
-
-        if self.adjustment_set:
-            confounders_df = reduced_df[list(self.adjustment_set)]
-        else:
-            confounders_df = None
-        treatment_df = reduced_df[[self.treatment]]
-        outcome_df = reduced_df[[self.outcome]]
-
-        # Fit a model to the data
-        model = CausalForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor())
-        model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
-
-        # Obtain CATES and confidence intervals
-        conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
-        [ci_low, ci_high] = model.effect_interval(
-            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
-        )
-
-        # Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
-        results_df = pd.DataFrame(columns=["cate", "ci_low", "ci_high"])
-        results_df["cate"] = list(conditional_ates)
-        results_df["ci_low"] = list(ci_low.flatten())
-        results_df["ci_high"] = list(ci_high.flatten())
-        effect_modifier_df.reset_index(drop=True, inplace=True)
-        results_df[list(self.effect_modifiers)] = effect_modifier_df
-        results_df.sort_values(by=list(self.effect_modifiers), inplace=True)
-        return results_df, None

From 8b1321a56e47fb50160e7b8f625721a8aca80ef2 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 22 Aug 2023 17:09:12 +0100
Subject: [PATCH 02/12] Remove all references of CausalForestEstimator

---
 causal_testing/specification/causal_dag.py    |  2 +-
 causal_testing/testing/estimators.py          |  2 +-
 examples/poisson/example_run_causal_tests.py  |  3 +-
 tests/json_front_tests/test_json_class.py     | 10 ++-
 tests/testing_tests/test_causal_test_case.py  | 42 +-----------
 tests/testing_tests/test_causal_test_suite.py | 37 ++++++-----
 tests/testing_tests/test_estimators.py        | 64 -------------------
 7 files changed, 31 insertions(+), 129 deletions(-)

diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
index 849f8e39..3ce9fd83 100644
--- a/causal_testing/specification/causal_dag.py
+++ b/causal_testing/specification/causal_dag.py
@@ -66,7 +66,7 @@ def list_all_min_sep(
         # 7. Check that there exists at least one neighbour of the treatment nodes that is not in the outcome node set
         if treatment_node_set_neighbours.difference(outcome_node_set):
             # 7.1. If so, sample a random node from the set of treatment nodes' neighbours not in the outcome node set
-            node = set(sample(treatment_node_set_neighbours.difference(outcome_node_set), 1))
+            node = set(sample(sorted(treatment_node_set_neighbours.difference(outcome_node_set)), 1))
 
             # 7.2. Add this node to the treatment node set and recurse (left branch)
             yield from list_all_min_sep(
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 58ee3fa3..3293be6c 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -1,5 +1,5 @@
 """This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator,
-LinearRegressionEstimator and CausalForestEstimator"""
+LinearRegressionEstimator"""
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py
index bfca6fe8..50ee8458 100644
--- a/examples/poisson/example_run_causal_tests.py
+++ b/examples/poisson/example_run_causal_tests.py
@@ -3,7 +3,7 @@
 import scipy
 import os
 
-from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator
 from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome
 from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.json_front.json_class import JsonUtility
@@ -127,7 +127,6 @@ def populate_num_shapes_unit(data):
 
 estimators = {
     "WidthHeightEstimator": WidthHeightEstimator,
-    "CausalForestEstimator": CausalForestEstimator,
     "LinearRegressionEstimator": LinearRegressionEstimator,
 }
 
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
index e9a25da5..e348cc57 100644
--- a/tests/json_front_tests/test_json_class.py
+++ b/tests/json_front_tests/test_json_class.py
@@ -4,7 +4,7 @@
 import scipy
 import os
 
-from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator, Estimator
 from causal_testing.testing.causal_test_outcome import NoEffect, Positive
 from tests.test_helpers import remove_temp_dir_if_existent
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
@@ -292,12 +292,16 @@ def test_no_data_provided(self):
             json_class.setup(self.scenario)
 
     def test_estimator_formula_type_check(self):
+        class ExampleEstimator(Estimator):
+            def add_modelling_assumptions(self):
+                pass
+
         example_test = {
             "tests": [
                 {
                     "name": "test1",
                     "mutations": {"test_input": "Increase"},
-                    "estimator": "CausalForestEstimator",
+                    "estimator": "ExampleEstimator",
                     "estimate_type": "ate",
                     "effect_modifiers": [],
                     "expected_effect": {"test_output": "Positive"},
@@ -312,7 +316,7 @@ def test_estimator_formula_type_check(self):
             "Increase": lambda x: self.json_class.scenario.treatment_variables[x].z3
                                   > self.json_class.scenario.variables[x].z3
         }
-        estimators = {"CausalForestEstimator": CausalForestEstimator}
+        estimators = {"ExampleEstimator": ExampleEstimator}
         with self.assertRaises(TypeError):
             self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
 
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 2e2ab52e..51f7ed0b 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -10,7 +10,7 @@
 from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_outcome import ExactValue
-from causal_testing.testing.estimators import CausalForestEstimator, LinearRegressionEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator
 from causal_testing.testing.base_test_case import BaseTestCase
 
 
@@ -106,19 +106,6 @@ def test_check_minimum_adjustment_set(self):
         minimal_adjustment_set = self.causal_dag.identification(self.base_test_case)
         self.assertEqual(minimal_adjustment_set, {"D"})
 
-    def test_execute_test_observational_causal_forest_estimator(self):
-        """Check that executing the causal test case returns the correct results for the dummy data using a causal
-        forest estimator."""
-        estimation_model = CausalForestEstimator(
-            "A",
-            self.treatment_value,
-            self.control_value,
-            self.minimal_adjustment_set,
-            "C",
-            self.df,
-        )
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1)
 
     def test_invalid_causal_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -229,32 +216,5 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
         self.assertAlmostEqual(round(causal_test_result.test_value.value, 1), 4, delta=1)
 
-    def test_execute_observational_causal_forest_estimator_cates(self):
-        """Check that executing the causal test case returns the correct conditional average treatment effects for
-        dummy data with effect multiplicative effect modification. C ~ (4*(A+2) + D)*M"""
-        # Add some effect modifier M that has a multiplicative effect on C
-        self.df["M"] = np.random.randint(1, 5, len(self.df))
-        self.df["C"] *= self.df["M"]
-        estimation_model = CausalForestEstimator(
-            "A",
-            self.treatment_value,
-            self.control_value,
-            self.minimal_adjustment_set,
-            "C",
-            self.df,
-            effect_modifiers={"M": None},
-        )
-        self.causal_test_case.estimate_type = "cates"
-        causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        causal_test_result = causal_test_result.test_value.value
-        # Check that each effect modifier's strata has a greater ATE than the last (ascending order)
-        causal_test_result_m1 = causal_test_result.loc[causal_test_result["M"] == 1]
-        causal_test_result_m2 = causal_test_result.loc[causal_test_result["M"] == 2]
-        causal_test_result_m3 = causal_test_result.loc[causal_test_result["M"] == 3]
-        causal_test_result_m4 = causal_test_result.loc[causal_test_result["M"] == 4]
-        self.assertLess(causal_test_result_m1["cate"].mean(), causal_test_result_m2["cate"].mean())
-        self.assertLess(causal_test_result_m2["cate"].mean(), causal_test_result_m3["cate"].mean())
-        self.assertLess(causal_test_result_m3["cate"].mean(), causal_test_result_m4["cate"].mean())
-
     def tearDown(self) -> None:
         remove_temp_dir_if_existent()
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index b3d0f448..96fd766a 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -7,7 +7,7 @@
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.causal_test_outcome import ExactValue
-from causal_testing.testing.estimators import CausalForestEstimator, LinearRegressionEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator, LogisticRegressionEstimator
 from causal_testing.specification.causal_specification import CausalSpecification, Scenario
 from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
@@ -100,19 +100,22 @@ def test_execute_test_suite_single_base_test_case(self):
         causal_test_case_result = causal_test_results[self.base_test_case]
         self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value, 4, delta=1e-10)
 
-    def test_execute_test_suite_multiple_estimators(self):
-        """Check that executing a test suite with multiple estimators returns correct results for the dummy data
-        for each estimator
-        """
-        estimators = [LinearRegressionEstimator, CausalForestEstimator]
-        test_suite_2_estimators = CausalTestSuite()
-        test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)]
-        test_suite_2_estimators.add_test_object(
-            base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
-        )
-        causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification)
-        causal_test_case_result = causal_test_results[self.base_test_case]
-        linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
-        causal_forrest_result = causal_test_case_result["CausalForestEstimator"][0]
-        self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
-        self.assertAlmostEqual(causal_forrest_result.test_value.value, 4, delta=1e-1)
+    # Without CausalForestEstimator we now only have 2 estimators. Unfortunately LogicisticRegressionEstimator does not
+    # currently work with TestSuite. So for now removed test
+
+    # def test_execute_test_suite_multiple_estimators(self):
+    #     """Check that executing a test suite with multiple estimators returns correct results for the dummy data
+    #     for each estimator
+    #     """
+    #     estimators = [LinearRegressionEstimator, LogisticRegressionEstimator]
+    #     test_suite_2_estimators = CausalTestSuite()
+    #     test_list = [CausalTestCase(self.base_test_case, self.expected_causal_effect, 0, 1)]
+    #     test_suite_2_estimators.add_test_object(
+    #         base_test_case=self.base_test_case, causal_test_case_list=test_list, estimators_classes=estimators
+    #     )
+    #     causal_test_results = test_suite_2_estimators.execute_test_suite(self.data_collector, self.causal_specification)
+    #     causal_test_case_result = causal_test_results[self.base_test_case]
+    #     linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
+    #     logistic_regression_estimator = causal_test_case_result["LogisticRegressionEstimator"][0]
+    #     self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
+    #     self.assertAlmostEqual(logistic_regression_estimator.test_value.value, 4, delta=1e-1)
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index 835a1144..51faad37 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -4,7 +4,6 @@
 import matplotlib.pyplot as plt
 from causal_testing.testing.estimators import (
     LinearRegressionEstimator,
-    CausalForestEstimator,
     LogisticRegressionEstimator,
     InstrumentalVariableEstimator,
 )
@@ -409,69 +408,6 @@ def test_program_11_2_with_robustness_validation(self):
         self.assertEqual(round(cv.estimate_robustness(model)["treatments"], 4), 0.7353)
 
 
-class TestCausalForestEstimator(unittest.TestCase):
-    """Test the linear regression estimator against the programming exercises in Section 2 of Hernán and Robins [1].
-
-    Reference: Hernán MA, Robins JM (2020). Causal Inference: What If. Boca Raton: Chapman & Hall/CRC.
-    Link: https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/
-    """
-
-    @classmethod
-    def setUpClass(cls) -> None:
-        cls.nhefs_df = load_nhefs_df()
-        cls.chapter_11_df = load_chapter_11_df()
-
-    def test_program_15_ate(self):
-        """Test whether our causal forest implementation produces the similar ATE to program 15.1 (p. 163, 184)."""
-        df = self.nhefs_df
-        covariates = {
-            "sex",
-            "race",
-            "age",
-            "edu_2",
-            "edu_3",
-            "edu_4",
-            "edu_5",
-            "exercise_1",
-            "exercise_2",
-            "active_1",
-            "active_2",
-            "wt71",
-            "smokeintensity",
-            "smokeyrs",
-        }
-        causal_forest = CausalForestEstimator("qsmk", 1, 0, covariates, "wt82_71", df, {"smokeintensity": 40})
-        ate, _ = causal_forest.estimate_ate()
-        self.assertGreater(round(ate, 1), 2.5)
-        self.assertLess(round(ate, 1), 4.5)
-
-    def test_program_15_cate(self):
-        """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184)."""
-        df = self.nhefs_df
-        smoking_intensity_5_and_40_df = df.loc[(df["smokeintensity"] == 5) | (df["smokeintensity"] == 40)]
-        covariates = {
-            "sex",
-            "race",
-            "age",
-            "edu_2",
-            "edu_3",
-            "edu_4",
-            "edu_5",
-            "exercise_1",
-            "exercise_2",
-            "active_1",
-            "active_2",
-            "wt71",
-            "smokeintensity",
-            "smokeyrs",
-        }
-        causal_forest = CausalForestEstimator(
-            "qsmk", 1, 0, covariates, "wt82_71", smoking_intensity_5_and_40_df, {"smokeintensity": 40}
-        )
-        cates_df, _ = causal_forest.estimate_cates()
-        self.assertGreater(cates_df["cate"].mean(), 0)
-
-
 class TestLinearRegressionInteraction(unittest.TestCase):
     """Test linear regression for estimating effects involving interaction."""
 

From f30ca8f08799acb376fe893258507e28c8fae80e Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 23 Aug 2023 09:53:01 +0100
Subject: [PATCH 03/12] Remove econml from pyproject.toml

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 46fa2c0b..78309aca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ license = { text = "MIT" }
 keywords = ["causal inference", "verification"]
 dependencies = [
     "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems
-    "econml~=0.12",
     "fitter~=1.4",
     "lhsmdu~=1.1",
     "networkx~=2.6",

From f34a6d1a171a60fdc7e2cfdffcb7eb2ce7a0215a Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:42:13 +0100
Subject: [PATCH 04/12] Remove helpers and demonstrate tempfile

---
 tests/test_helpers.py                         | 27 -------------------
 tests/testing_tests/test_causal_test_suite.py | 10 ++++---
 2 files changed, 7 insertions(+), 30 deletions(-)
 delete mode 100644 tests/test_helpers.py

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
deleted file mode 100644
index 4b77d2be..00000000
--- a/tests/test_helpers.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""A library of helper methods for the causal testing framework tests."""
-import sys
-from os import mkdir
-from os.path import join, dirname, realpath, exists
-from shutil import rmtree
-
-
-def create_temp_dir_if_non_existent():
-    """Create a temporary directory in the current working directory if one does not exist already.
-
-    Create a temporary directory named temp in the current working directory provided it does not already exist, and
-    then return the path to the temporary directory (regardless of whether it existed previously or has just been
-    created).
-
-    :return: Path to the temporary directory.
-    """
-    temp_dir = join(dirname(realpath(sys.argv[0])), "temp/")
-    if not exists(temp_dir):
-        mkdir(temp_dir)
-    return temp_dir
-
-
-def remove_temp_dir_if_existent():
-    """Remove a temporary directory from the current working directory if one exists."""
-    temp_dir = join(dirname(realpath(sys.argv[0])), "temp/")
-    if exists(temp_dir):
-        rmtree(temp_dir)
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index df0c2eb3..d6af77cb 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -1,6 +1,8 @@
 import unittest
 import os
+import tempfile
 import numpy as np
+import shutil
 import pandas as pd
 from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_case import CausalTestCase
@@ -10,7 +12,6 @@
 from causal_testing.testing.estimators import LinearRegressionEstimator, LogisticRegressionEstimator
 from causal_testing.specification.causal_specification import CausalSpecification, Scenario
 from causal_testing.data_collection.data_collector import ObservationalDataCollector
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 from causal_testing.specification.causal_dag import CausalDAG
 
 
@@ -29,8 +30,8 @@ def setUp(self) -> None:
         self.scenario = Scenario({A, C, D})
 
         # 2. Create DAG and dummy data and write to csvs
-        temp_dir_path = create_temp_dir_if_non_existent()
-        dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { A -> C; D -> A; D -> C}"""
         with open(dag_dot_path, "w") as file:
             file.write(dag_dot)
@@ -64,6 +65,9 @@ def setUp(self) -> None:
 
         self.data_collector = ObservationalDataCollector(self.scenario, self.df)
 
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
+
     def test_adding_test_object(self):
         "test an object can be added to the test_suite using the add_test_object function"
         test_suite = CausalTestSuite()

From 89dba726e95782fed90db6ccaba4443bd18adc7f Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:44:02 +0100
Subject: [PATCH 05/12] Remove CausalForestEstimator test

---
 tests/testing_tests/test_estimators.py | 27 --------------------------
 1 file changed, 27 deletions(-)

diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index 76457170..cac19afc 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -437,33 +437,6 @@ def test_program_11_3_cublic_spline(self):
         self.assertAlmostEqual(ate_1[0] * 2, ate_2[0])
 
 
-    def test_program_15_cate(self):
-        """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184)."""
-        df = self.nhefs_df
-        smoking_intensity_5_and_40_df = df.loc[(df["smokeintensity"] == 5) | (df["smokeintensity"] == 40)]
-        covariates = {
-            "sex",
-            "race",
-            "age",
-            "edu_2",
-            "edu_3",
-            "edu_4",
-            "edu_5",
-            "exercise_1",
-            "exercise_2",
-            "active_1",
-            "active_2",
-            "wt71",
-            "smokeintensity",
-            "smokeyrs",
-        }
-        causal_forest = CausalForestEstimator(
-            "qsmk", 1, 0, covariates, "wt82_71", smoking_intensity_5_and_40_df, {"smokeintensity": 40}
-        )
-        cates_df, _ = causal_forest.estimate_cates()
-        self.assertGreater(cates_df["cate"].mean(), 0)
-
-
 class TestLinearRegressionInteraction(unittest.TestCase):
     """Test linear regression for estimating effects involving interaction."""
 

From c9f0fc8fa3e32d4f9c56eaec45b928edb57fd254 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Thu, 11 Apr 2024 17:04:58 +0100
Subject: [PATCH 06/12] Remove custom helpers and use tempfile for all tests

---
 .../test_observational_data_collector.py       | 10 +++++-----
 .../test_abstract_test_case.py                 | 10 +++++-----
 tests/json_front_tests/test_json_class.py      |  2 --
 tests/specification_tests/test_causal_dag.py   | 10 ++++++----
 .../test_metamorphic_relations.py              | 10 ++++++----
 .../test_causal_surrogate_assisted.py          |  9 +++++----
 .../testing_tests/test_causal_test_adequacy.py |  2 --
 tests/testing_tests/test_causal_test_case.py   | 18 ++++++++----------
 8 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/tests/data_collection_tests/test_observational_data_collector.py b/tests/data_collection_tests/test_observational_data_collector.py
index 73ebcd07..97163853 100644
--- a/tests/data_collection_tests/test_observational_data_collector.py
+++ b/tests/data_collection_tests/test_observational_data_collector.py
@@ -1,5 +1,6 @@
 import unittest
 import os
+import shutil, tempfile
 import pandas as pd
 from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.specification.causal_specification import Scenario
@@ -7,7 +8,6 @@
 from scipy.stats import uniform, rv_discrete
 from enum import Enum
 import random
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 
 
 class TestObservationalDataCollector(unittest.TestCase):
@@ -17,9 +17,9 @@ class Color(Enum):
             GREEN = "GREEN"
             BLUE = "BLUE"
 
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
+        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
         # Y = 3*X1 + X2*X3 + 10
         self.observational_df = pd.DataFrame(
             {"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40], "Y2": ["RED", "GREEN", "BLUE", "BLUE"]}
@@ -66,7 +66,7 @@ def populate_m(data):
         assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"])))
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 if __name__ == "__main__":
diff --git a/tests/generation_tests/test_abstract_test_case.py b/tests/generation_tests/test_abstract_test_case.py
index d12f1aa6..fd40f3de 100644
--- a/tests/generation_tests/test_abstract_test_case.py
+++ b/tests/generation_tests/test_abstract_test_case.py
@@ -1,5 +1,6 @@
 import unittest
 import os
+import shutil, tempfile
 import pandas as pd
 import numpy as np
 from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
@@ -7,7 +8,6 @@
 from causal_testing.specification.causal_specification import Scenario
 from causal_testing.specification.variable import Input, Output
 from scipy.stats import uniform, rv_discrete
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 from causal_testing.testing.causal_test_outcome import Positive
 from z3 import And
 from enum import Enum
@@ -29,9 +29,9 @@ class TestAbstractTestCase(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
+        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
         # Y = 3*X1 + X2*X3 + 10
         self.observational_df = pd.DataFrame({"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40]})
         self.observational_df["Y"] = self.observational_df.apply(
@@ -192,7 +192,7 @@ def test_feasible_constraints(self):
         assert len(concrete_tests) < 1000
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 if __name__ == "__main__":
diff --git a/tests/json_front_tests/test_json_class.py b/tests/json_front_tests/test_json_class.py
index e348cc57..8fa49194 100644
--- a/tests/json_front_tests/test_json_class.py
+++ b/tests/json_front_tests/test_json_class.py
@@ -6,7 +6,6 @@
 
 from causal_testing.testing.estimators import LinearRegressionEstimator, Estimator
 from causal_testing.testing.causal_test_outcome import NoEffect, Positive
-from tests.test_helpers import remove_temp_dir_if_existent
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
 from causal_testing.specification.variable import Input, Output, Meta
 from causal_testing.specification.scenario import Scenario
@@ -321,7 +320,6 @@ def add_modelling_assumptions(self):
             self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
         if os.path.exists("temp_out.txt"):
             os.remove("temp_out.txt")
 
diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
index f88a56a7..d3f34136 100644
--- a/tests/specification_tests/test_causal_dag.py
+++ b/tests/specification_tests/test_causal_dag.py
@@ -1,11 +1,13 @@
 import unittest
 import os
+import shutil, tempfile
 import networkx as nx
 from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.base_test_case import BaseTestCase
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
+
+
 
 
 class TestCausalDAGIssue90(unittest.TestCase):
@@ -14,8 +16,8 @@ class TestCausalDAGIssue90(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M; M -> Y; Z -> M; }"""
         with open(self.dag_dot_path, "w") as f:
             f.write(dag_dot)
@@ -28,7 +30,7 @@ def test_enumerate_minimal_adjustment_sets(self):
         self.assertEqual([{"Z"}], adjustment_sets)
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 class TestIVAssumptions(unittest.TestCase):
diff --git a/tests/specification_tests/test_metamorphic_relations.py b/tests/specification_tests/test_metamorphic_relations.py
index 3e1e998c..dc35e071 100644
--- a/tests/specification_tests/test_metamorphic_relations.py
+++ b/tests/specification_tests/test_metamorphic_relations.py
@@ -1,10 +1,9 @@
 import unittest
 import os
-
+import shutil, tempfile
 import pandas as pd
 from itertools import combinations
 
-from tests.test_helpers import create_temp_dir_if_non_existent
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.causal_specification import Scenario
 from causal_testing.specification.metamorphic_relation import (
@@ -69,8 +68,8 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D
 
 class TestMetamorphicRelation(unittest.TestCase):
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph DAG { rankdir=LR; X1 -> Z; Z -> M; M -> Y; X2 -> Z; X3 -> M;}"""
         with open(self.dag_dot_path, "w") as f:
             f.write(dag_dot)
@@ -88,6 +87,9 @@ def setUp(self) -> None:
             self.scenario, self.default_control_input_config, self.default_treatment_input_config
         )
 
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
+
     def test_should_cause_metamorphic_relations_correct_spec(self):
         """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program."""
         causal_dag = CausalDAG(self.dag_dot_path)
diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py
index 43afe0e4..43fd98a7 100644
--- a/tests/surrogate_tests/test_causal_surrogate_assisted.py
+++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py
@@ -7,8 +7,9 @@
 from causal_testing.surrogate.causal_surrogate_assisted import SimulationResult, CausalSurrogateAssistedTestCase, Simulator
 from causal_testing.surrogate.surrogate_search_algorithms import GeneticSearchAlgorithm
 from causal_testing.testing.estimators import CubicSplineRegressionEstimator
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
+
 import os
+import shutil, tempfile
 import pandas as pd
 import numpy as np
 
@@ -43,8 +44,8 @@ def setUpClass(cls) -> None:
         cls.class_df = load_class_df()
 
     def setUp(self):
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M [included=1, expected=positive]; M -> Y [included=1, expected=negative]; Z -> M; }"""
         with open(self.dag_dot_path, "w") as f:
             f.write(dag_dot)
@@ -199,7 +200,7 @@ def test_causal_surrogate_assisted_execution_incorrect_search_config(self):
                           custom_data_aggregator=data_double_aggregator)
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 def load_class_df():
     """Get the testing data and put into a dataframe."""
diff --git a/tests/testing_tests/test_causal_test_adequacy.py b/tests/testing_tests/test_causal_test_adequacy.py
index 0435dd54..1f8d2ffa 100644
--- a/tests/testing_tests/test_causal_test_adequacy.py
+++ b/tests/testing_tests/test_causal_test_adequacy.py
@@ -10,7 +10,6 @@
 from causal_testing.testing.causal_test_suite import CausalTestSuite
 from causal_testing.testing.causal_test_adequacy import DAGAdequacy
 from causal_testing.testing.causal_test_outcome import NoEffect, Positive
-from tests.test_helpers import remove_temp_dir_if_existent
 from causal_testing.json_front.json_class import JsonUtility, CausalVariables
 from causal_testing.specification.variable import Input, Output, Meta
 from causal_testing.specification.scenario import Scenario
@@ -255,6 +254,5 @@ def test_dag_adequacy_independent_other_way(self):
         )
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
         if os.path.exists("temp_out.txt"):
             os.remove("temp_out.txt")
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 774a0c8d..4d081a62 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -1,9 +1,10 @@
 import unittest
 import os
+import tempfile
+import shutil
 import pandas as pd
 import numpy as np
 
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 from causal_testing.specification.causal_specification import CausalSpecification, Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.specification.causal_dag import CausalDAG
@@ -44,9 +45,6 @@ def test_str(self):
             " {Output: C::float}: ExactValue: 4±0.2.",
         )
 
-    def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
-
 
 class TestCausalTestExecution(unittest.TestCase):
     """Test the causal test execution workflow using observational data.
@@ -57,8 +55,8 @@ class TestCausalTestExecution(unittest.TestCase):
 
     def setUp(self) -> None:
         # 1. Create Causal DAG
-        temp_dir_path = create_temp_dir_if_non_existent()
-        dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { A -> C; D -> A; D -> C}"""
         with open(dag_dot_path, "w") as file:
             file.write(dag_dot)
@@ -88,7 +86,7 @@ def setUp(self) -> None:
         df = pd.DataFrame({"D": list(np.random.normal(60, 10, 1000))})  # D = exogenous
         df["A"] = [1 if d > 50 else 0 for d in df["D"]]
         df["C"] = df["D"] + (4 * (df["A"] + 2))  # C = (4*(A+2)) + D
-        self.observational_data_csv_path = os.path.join(temp_dir_path, "observational_data.csv")
+        self.observational_data_csv_path = os.path.join(self.temp_dir_path, "observational_data.csv")
         df.to_csv(self.observational_data_csv_path, index=False)
 
         # 5. Create observational data collector
@@ -101,6 +99,9 @@ def setUp(self) -> None:
         self.treatment_value = 1
         self.control_value = 0
 
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
+
     def test_check_minimum_adjustment_set(self):
         """Check that the minimum adjustment set is correctly made"""
         minimal_adjustment_set = self.causal_dag.identification(self.base_test_case)
@@ -215,6 +216,3 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
         pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
-
-    def tearDown(self) -> None:
-        remove_temp_dir_if_existent()

From f62011bcf5dc59cc0eab9bf0dd870eaef9d9120d Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Thu, 11 Apr 2024 17:05:45 +0100
Subject: [PATCH 07/12] Remove CausalForestDML dependency

---
 causal_testing/testing/estimators.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 09b861c0..307a5d28 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -9,7 +9,6 @@
 import pandas as pd
 import statsmodels.api as sm
 import statsmodels.formula.api as smf
-from econml.dml import CausalForestDML
 from patsy import dmatrix  # pylint: disable = no-name-in-module
 from patsy import ModelDesc
 from sklearn.ensemble import GradientBoostingRegressor

From 355a42da6547f733141159760021af71263a6816 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 23 Apr 2024 11:51:54 +0100
Subject: [PATCH 08/12] Update DAG tests to use TempFile

---
 tests/specification_tests/test_causal_dag.py | 50 ++++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
index d3f34136..c020ae67 100644
--- a/tests/specification_tests/test_causal_dag.py
+++ b/tests/specification_tests/test_causal_dag.py
@@ -35,8 +35,8 @@ def tearDown(self) -> None:
 
 class TestIVAssumptions(unittest.TestCase):
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { I -> X; X -> Y; U -> X; U -> Y;}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -63,7 +63,9 @@ def test_common_cause(self):
         causal_dag.graph.add_edge("U", "I")
         with self.assertRaises(ValueError):
             causal_dag.check_iv_assumptions("X", "Y", "I")
-
+    
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
 
 class TestCausalDAG(unittest.TestCase):
     """
@@ -74,8 +76,8 @@ class TestCausalDAG(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C;}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -107,7 +109,7 @@ def test_to_dot_string(self):
         self.assertEqual(causal_dag.to_dot_string(), """digraph G {\nA -> B;\nB -> C;\nD -> A;\nD -> C;\n}""")
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 class TestCyclicCausalDAG(unittest.TestCase):
@@ -116,8 +118,8 @@ class TestCyclicCausalDAG(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C; C -> A;}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -127,7 +129,7 @@ def test_invalid_causal_dag(self):
         self.assertRaises(nx.HasACycle, CausalDAG, self.dag_dot_path)
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 class TestDAGDirectEffectIdentification(unittest.TestCase):
@@ -136,8 +138,8 @@ class TestDAGDirectEffectIdentification(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { X1->X2;X2->V;X2->D1;X2->D2;D1->Y;D1->D2;Y->D3;Z->X2;Z->Y;}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -152,7 +154,9 @@ def test_direct_effect_adjustment_sets_no_adjustment(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         adjustment_sets = causal_dag.direct_effect_adjustment_sets(["X2"], ["D1"])
         self.assertEqual(list(adjustment_sets), [set()])
-
+    
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)
 
 class TestDAGIdentification(unittest.TestCase):
     """
@@ -160,8 +164,8 @@ class TestDAGIdentification(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { X1->X2;X2->V;X2->D1;X2->D2;D1->Y;D1->D2;Y->D3;Z->X2;Z->Y;}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -339,8 +343,7 @@ def test_dag_with_non_character_nodes(self):
         self.assertEqual(adjustment_sets, [{"aa"}, {"la"}, {"va"}])
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
-
+        shutil.rmtree(self.temp_dir_path)
 
 class TestDependsOnOutputs(unittest.TestCase):
     """
@@ -352,8 +355,8 @@ def setUp(self) -> None:
         from causal_testing.specification.variable import Input, Output, Meta
         from causal_testing.specification.scenario import Scenario
 
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph G { A -> B; B -> C; D -> A; D -> C}"""
         f = open(self.dag_dot_path, "w")
         f.write(dag_dot)
@@ -391,7 +394,7 @@ def test_depends_on_outputs_input(self):
         self.assertFalse(causal_dag.depends_on_outputs("D", self.scenario))
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 class TestUndirectedGraphAlgorithms(unittest.TestCase):
@@ -431,9 +434,6 @@ def test_list_all_min_sep(self):
         min_separators = set(frozenset(min_separator) for min_separator in min_separators)
         self.assertEqual({frozenset({2, 3}), frozenset({3, 4}), frozenset({4, 5})}, min_separators)
 
-    def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
-
 
 class TestHiddenVariableDAG(unittest.TestCase):
     """
@@ -441,8 +441,8 @@ class TestHiddenVariableDAG(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
         dag_dot = """digraph DAG { rankdir=LR; Z -> X; X -> M; M -> Y; Z -> M; }"""
         with open(self.dag_dot_path, "w") as f:
             f.write(dag_dot)
@@ -463,4 +463,4 @@ def test_hidden_varaible_adjustment_sets(self):
         self.assertNotEqual(adjustment_sets, adjustment_sets_with_hidden)
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)

From d81bb5011e252535aa89fabc6e97446e931d474f Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 23 Apr 2024 13:34:10 +0100
Subject: [PATCH 09/12] Dependency Updates

---
 pyproject.toml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fb1f26f7..cf8ca872 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,17 +16,17 @@ license = { text = "MIT" }
 keywords = ["causal inference", "verification"]
 dependencies = [
     "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems
-    "fitter~=1.4",
+    "fitter~=1.7",
     "lhsmdu~=1.1",
     "networkx~=2.6",
-    "numpy~=1.23",
-    "pandas~=1.3",
-    "scikit_learn~=1.1",
+    "numpy~=1.26",
+    "pandas~=1.5",
+    "scikit_learn~=1.4",
     "scipy~=1.7",
-    "statsmodels~=0.13",
-    "tabulate~=0.8",
-    "pydot~=1.4",
-    "pygad~=3.2"
+    "statsmodels~=0.14",
+    "tabulate~=0.9",
+    "pydot~=2.0",
+    "pygad~=3.3"
 ]
 dynamic = ["version"]
 

From 8052df56084eed5e86959f52f56117fec07b95a1 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 23 Apr 2024 13:39:21 +0100
Subject: [PATCH 10/12] black formatting

---
 .../generation/abstract_causal_test_case.py   |  1 +
 causal_testing/json_front/json_class.py       |  6 ++--
 .../specification/metamorphic_relation.py     |  8 ++++--
 causal_testing/specification/scenario.py      |  1 +
 .../surrogate/causal_surrogate_assisted.py    |  2 +-
 .../surrogate/surrogate_search_algorithms.py  |  8 ++++--
 causal_testing/testing/base_test_case.py      |  1 +
 .../testing/causal_test_adequacy.py           |  1 +
 causal_testing/testing/causal_test_case.py    |  1 +
 causal_testing/testing/causal_test_outcome.py | 28 +++++++++++--------
 causal_testing/testing/causal_test_result.py  |  9 ++++--
 causal_testing/testing/causal_test_suite.py   |  1 +
 causal_testing/testing/estimators.py          |  4 +--
 causal_testing/utils/validation.py            |  1 +
 14 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py
index 762fd16d..1bf30f57 100644
--- a/causal_testing/generation/abstract_causal_test_case.py
+++ b/causal_testing/generation/abstract_causal_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the class AbstractCausalTestCase, which generates concrete test cases"""
+
 import itertools
 import logging
 from enum import Enum
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
index 88386441..cca98a20 100644
--- a/causal_testing/json_front/json_class.py
+++ b/causal_testing/json_front/json_class.py
@@ -108,9 +108,9 @@ def _create_abstract_test_case(self, test, mutates, effects):
                 self.scenario.variables[variable]: effects[effect]
                 for variable, effect in test["expected_effect"].items()
             },
-            effect_modifiers={self.scenario.variables[v] for v in test["effect_modifiers"]}
-            if "effect_modifiers" in test
-            else {},
+            effect_modifiers=(
+                {self.scenario.variables[v] for v in test["effect_modifiers"]} if "effect_modifiers" in test else {}
+            ),
             estimate_type=test["estimate_type"],
             effect=test.get("effect", "total"),
         )
diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py
index e8b6978f..9d8c8afb 100644
--- a/causal_testing/specification/metamorphic_relation.py
+++ b/causal_testing/specification/metamorphic_relation.py
@@ -73,9 +73,11 @@ def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed:
                 source_follow_up_test_inputs[[follow_up_input]]
                 .rename(columns={follow_up_input: self.treatment_var})
                 .to_dict(orient="records"),
-                test_inputs.to_dict(orient="records")
-                if not test_inputs.empty
-                else [{}] * len(source_follow_up_test_inputs),
+                (
+                    test_inputs.to_dict(orient="records")
+                    if not test_inputs.empty
+                    else [{}] * len(source_follow_up_test_inputs)
+                ),
             )
         ]
 
diff --git a/causal_testing/specification/scenario.py b/causal_testing/specification/scenario.py
index 63753d1e..7e984abd 100644
--- a/causal_testing/specification/scenario.py
+++ b/causal_testing/specification/scenario.py
@@ -1,4 +1,5 @@
 """This module holds the Scenario Class"""
+
 from collections.abc import Iterable, Mapping
 
 from tabulate import tabulate
diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
index 77ef88f7..74f309be 100644
--- a/causal_testing/surrogate/causal_surrogate_assisted.py
+++ b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -19,7 +19,7 @@ class SimulationResult:
     relationship: str
 
 
-class SearchAlgorithm(ABC): # pylint: disable=too-few-public-methods
+class SearchAlgorithm(ABC):  # pylint: disable=too-few-public-methods
     """Class to be inherited with the search algorithm consisting of a search function and the fitness function of the
     space to be searched"""
 
diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 94984b6a..75628622 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -1,4 +1,5 @@
 """Module containing implementation of search algorithm for surrogate search """
+
 # Fitness functions are required to be iteratively defined, including all variables within.
 
 from operator import itemgetter
@@ -26,7 +27,7 @@ def __init__(self, delta=0.05, config: dict = None) -> None:
 
     # pylint: disable=too-many-locals
     def search(
-            self, surrogate_models: list[CubicSplineRegressionEstimator], specification: CausalSpecification
+        self, surrogate_models: list[CubicSplineRegressionEstimator], specification: CausalSpecification
     ) -> list:
         solutions = []
 
@@ -47,7 +48,8 @@ def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
                 ate = surrogate.estimate_ate_calculated(adjustment_dict)
                 if len(ate) > 1:
                     raise ValueError(
-                        "Multiple ate values provided but currently only single values supported in this method")
+                        "Multiple ate values provided but currently only single values supported in this method"
+                    )
                 return contradiction_function(ate[0])
 
             gene_types, gene_space = self.create_gene_types(surrogate, specification)
@@ -84,7 +86,7 @@ def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
 
     @staticmethod
     def create_gene_types(
-            surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
+        surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
     ) -> tuple[list, list]:
         """Generate the gene_types and gene_space for a given fitness function and specification
         :param surrogate_model: Instance of a CubicSplineRegressionEstimator
diff --git a/causal_testing/testing/base_test_case.py b/causal_testing/testing/base_test_case.py
index 5da698dc..2cc02304 100644
--- a/causal_testing/testing/base_test_case.py
+++ b/causal_testing/testing/base_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the BaseTestCase dataclass, which stores the information required for identification"""
+
 from dataclasses import dataclass
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.effect import Effect
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
index dfa43a90..2a9bff93 100644
--- a/causal_testing/testing/causal_test_adequacy.py
+++ b/causal_testing/testing/causal_test_adequacy.py
@@ -1,6 +1,7 @@
 """
 This module contains code to measure various aspects of causal test adequacy.
 """
+
 from itertools import combinations
 from copy import deepcopy
 import pandas as pd
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index b8a43f3b..da47c126 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the CausalTestCase class, a class that holds the information required for a causal test"""
+
 import logging
 from typing import Any
 import numpy as np
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 0c3ae1e4..3846b514 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -29,10 +29,12 @@ class SomeEffect(CausalTestOutcome):
     def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "risk_ratio":
             return any(
-                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
-        if res.test_value.type in ('coefficient', 'ate'):
+                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
+            )
+        if res.test_value.type in ("coefficient", "ate"):
             return any(
-                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
+            )
 
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
@@ -51,17 +53,19 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "risk_ratio":
-            return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in
-                       zip(res.ci_low(), res.ci_high(), res.test_value.value))
-        if res.test_value.type in ('coefficient', 'ate'):
+            return any(
+                ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol)
+                for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value)
+            )
+        if res.test_value.type in ("coefficient", "ate"):
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
             return (
-                    sum(
-                        not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
-                        for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
-                    )
-                    / len(value)
-                    < self.ctol
+                sum(
+                    not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
+                    for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
+                )
+                / len(value)
+                < self.ctol
             )
 
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index afae6195..65a2085e 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -1,6 +1,7 @@
 """This module contains the CausalTestResult class, which is a container for the results of a causal test, and the
 TestValue dataclass.
 """
+
 from typing import Any
 from dataclasses import dataclass
 import pandas as pd
@@ -86,9 +87,11 @@ def to_dict(self, json=False):
             "outcome": self.estimator.outcome,
             "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
             "effect_measure": self.test_value.type,
-            "effect_estimate": self.test_value.value.to_dict()
-            if json and hasattr(self.test_value.value, "to_dict")
-            else self.test_value.value,
+            "effect_estimate": (
+                self.test_value.value.to_dict()
+                if json and hasattr(self.test_value.value, "to_dict")
+                else self.test_value.value
+            ),
             "ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(),
             "ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(),
         }
diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
index d9c973a6..47c5ef98 100644
--- a/causal_testing/testing/causal_test_suite.py
+++ b/causal_testing/testing/causal_test_suite.py
@@ -1,5 +1,6 @@
 """This module contains the CausalTestSuite class, for details on using it:
 https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""
+
 import logging
 
 from collections import UserDict
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 307a5d28..e1d323fd 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -1,5 +1,6 @@
 """This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator,
 LinearRegressionEstimator"""
+
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -351,7 +352,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         model = self._run_linear_regression()
         newline = "\n"
         patsy_md = ModelDesc.from_formula(self.treatment)
-        if any((self.df.dtypes[factor.name()] == 'object' for factor in patsy_md.rhs_termlist[1].factors)):
+        if any((self.df.dtypes[factor.name()] == "object" for factor in patsy_md.rhs_termlist[1].factors)):
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
         else:
@@ -590,4 +591,3 @@ def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.S
         ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
         return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
-
diff --git a/causal_testing/utils/validation.py b/causal_testing/utils/validation.py
index 7a29ac60..63df5dc3 100644
--- a/causal_testing/utils/validation.py
+++ b/causal_testing/utils/validation.py
@@ -1,4 +1,5 @@
 """This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques"""
+
 import math
 import numpy as np
 from scipy.stats import t

From 2b0ea8180e57308fc35669743c5eaa648facc7b9 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 23 Apr 2024 14:46:57 +0100
Subject: [PATCH 11/12] Removed unused imports

---
 causal_testing/testing/estimators.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index e1d323fd..baebb679 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -12,7 +12,6 @@
 import statsmodels.formula.api as smf
 from patsy import dmatrix  # pylint: disable = no-name-in-module
 from patsy import ModelDesc
-from sklearn.ensemble import GradientBoostingRegressor
 from statsmodels.regression.linear_model import RegressionResultsWrapper
 from statsmodels.tools.sm_exceptions import PerfectSeparationError
 

From 5b95def6b069e1e4139ce029d3457258ce759041 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 23 Apr 2024 15:19:33 +0100
Subject: [PATCH 12/12] Add multiple python versions to CI tests

---
 .github/workflows/ci-tests-drafts.yaml | 2 +-
 .github/workflows/ci-tests.yaml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-tests-drafts.yaml b/.github/workflows/ci-tests-drafts.yaml
index 18b60443..5617027b 100644
--- a/.github/workflows/ci-tests-drafts.yaml
+++ b/.github/workflows/ci-tests-drafts.yaml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["3.9"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python using Miniconda
diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml
index df622814..e45e57e1 100644
--- a/.github/workflows/ci-tests.yaml
+++ b/.github/workflows/ci-tests.yaml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["3.9"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python using Miniconda