Merge pull request #280 from CITCOM-project/python-311-compatible

Python 312 compatible
CITCOM-project · Apr 30, 2024 · 4a86d9c · 4a86d9c
2 parents 61fa736 + d74e853
commit 4a86d9c
Show file tree

Hide file tree

Showing 27 changed files with 132 additions and 339 deletions.
diff --git a/.github/workflows/ci-tests-drafts.yaml b/.github/workflows/ci-tests-drafts.yaml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["3.9"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python using Miniconda

diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["3.9"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python using Miniconda

diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the class AbstractCausalTestCase, which generates concrete test cases"""
+
 import itertools
 import logging
 from enum import Enum

diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -108,9 +108,9 @@ def _create_abstract_test_case(self, test, mutates, effects):
                 self.scenario.variables[variable]: effects[effect]
                 for variable, effect in test["expected_effect"].items()
             },
-            effect_modifiers={self.scenario.variables[v] for v in test["effect_modifiers"]}
-            if "effect_modifiers" in test
-            else {},
+            effect_modifiers=(
+                {self.scenario.variables[v] for v in test["effect_modifiers"]} if "effect_modifiers" in test else {}
+            ),
             estimate_type=test["estimate_type"],
             effect=test.get("effect", "total"),
         )

diff --git a/causal_testing/specification/metamorphic_relation.py b/causal_testing/specification/metamorphic_relation.py
@@ -73,9 +73,11 @@ def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed:
                 source_follow_up_test_inputs[[follow_up_input]]
                 .rename(columns={follow_up_input: self.treatment_var})
                 .to_dict(orient="records"),
-                test_inputs.to_dict(orient="records")
-                if not test_inputs.empty
-                else [{}] * len(source_follow_up_test_inputs),
+                (
+                    test_inputs.to_dict(orient="records")
+                    if not test_inputs.empty
+                    else [{}] * len(source_follow_up_test_inputs)
+                ),
             )
         ]
 

diff --git a/causal_testing/specification/scenario.py b/causal_testing/specification/scenario.py
@@ -1,4 +1,5 @@
 """This module holds the Scenario Class"""
+
 from collections.abc import Iterable, Mapping
 
 from tabulate import tabulate

diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -1,4 +1,5 @@
 """Module containing implementation of search algorithm for surrogate search """
+
 # Fitness functions are required to be iteratively defined, including all variables within.
 
 from operator import itemgetter

diff --git a/causal_testing/testing/base_test_case.py b/causal_testing/testing/base_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the BaseTestCase dataclass, which stores the information required for identification"""
+
 from dataclasses import dataclass
 from causal_testing.specification.variable import Variable
 from causal_testing.testing.effect import Effect

diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
@@ -1,6 +1,7 @@
 """
 This module contains code to measure various aspects of causal test adequacy.
 """
+
 from itertools import combinations
 from copy import deepcopy
 import pandas as pd

diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
@@ -1,4 +1,5 @@
 """This module contains the CausalTestCase class, a class that holds the information required for a causal test"""
+
 import logging
 from typing import Any
 import numpy as np

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
@@ -1,6 +1,7 @@
 """This module contains the CausalTestResult class, which is a container for the results of a causal test, and the
 TestValue dataclass.
 """
+
 from typing import Any
 from dataclasses import dataclass
 import pandas as pd
@@ -86,9 +87,11 @@ def to_dict(self, json=False):
             "outcome": self.estimator.outcome,
             "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
             "effect_measure": self.test_value.type,
-            "effect_estimate": self.test_value.value.to_dict()
-            if json and hasattr(self.test_value.value, "to_dict")
-            else self.test_value.value,
+            "effect_estimate": (
+                self.test_value.value.to_dict()
+                if json and hasattr(self.test_value.value, "to_dict")
+                else self.test_value.value
+            ),
             "ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(),
             "ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(),
         }

diff --git a/causal_testing/testing/causal_test_suite.py b/causal_testing/testing/causal_test_suite.py
@@ -1,5 +1,6 @@
 """This module contains the CausalTestSuite class, for details on using it:
 https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""
+
 import logging
 
 from collections import UserDict

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -1,5 +1,6 @@
 """This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator,
-LinearRegressionEstimator and CausalForestEstimator"""
+LinearRegressionEstimator"""
+
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -9,10 +10,8 @@
 import pandas as pd
 import statsmodels.api as sm
 import statsmodels.formula.api as smf
-from econml.dml import CausalForestDML
 from patsy import dmatrix  # pylint: disable = no-name-in-module
 from patsy import ModelDesc
-from sklearn.ensemble import GradientBoostingRegressor
 from statsmodels.regression.linear_model import RegressionResultsWrapper
 from statsmodels.tools.sm_exceptions import PerfectSeparationError
 
@@ -352,6 +351,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         model = self._run_linear_regression()
         newline = "\n"
         patsy_md = ModelDesc.from_formula(self.treatment)
+
         if any(
             (
                 self.df.dtypes[factor.name()] == "object"
@@ -360,6 +360,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
                 if factor.name() in self.df.dtypes
             )
         ):
+
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
         else:
@@ -598,101 +599,3 @@ def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.S
         ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
         return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
-
-
-class CausalForestEstimator(Estimator):
-    """A causal random forest estimator is a non-parametric estimator which recursively partitions the covariate space
-    to learn a low-dimensional representation of treatment effect heterogeneity. This form of estimator is best suited
-    to the estimation of heterogeneous treatment effects i.e. the estimated effect for every sample rather than the
-    population average.
-    """
-
-    def add_modelling_assumptions(self):
-        """Add any modelling assumptions to the estimator.
-
-        :return self: Update self.modelling_assumptions
-        """
-        self.modelling_assumptions.append("Non-parametric estimator: no restrictions imposed on the data.")
-
-    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
-        """Estimate the average treatment effect.
-
-        :return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
-        """
-        # Remove any NA containing rows
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-
-        # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
-        if self.effect_modifiers:
-            effect_modifier_df = reduced_df[list(self.effect_modifiers)]
-        else:
-            effect_modifier_df = reduced_df[list(self.adjustment_set)]
-        confounders_df = reduced_df[list(self.adjustment_set)]
-        treatment_df = np.ravel(reduced_df[[self.treatment]])
-        outcome_df = np.ravel(reduced_df[[self.outcome]])
-
-        # Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model
-        model = CausalForestDML(
-            model_y=GradientBoostingRegressor(),
-            model_t=GradientBoostingRegressor(),
-        )
-        model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
-
-        # Obtain the ATE and 95% confidence intervals
-        ate = pd.Series(model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value))
-        ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
-        ci_low, ci_high = pd.Series(ate_interval[0]), pd.Series(ate_interval[1])
-        return ate, [ci_low, ci_high]
-
-    def estimate_cates(self) -> pd.DataFrame:
-        """Estimate the conditional average treatment effect for each sample in the data as a function of a set of
-        covariates (X) i.e. effect modifiers. That is, the predicted change in outcome caused by the intervention
-        (change in treatment from control to treatment value) for every execution of the system-under-test, taking into
-        account the value of each effect modifier X. As a result, for every unique setting of the set of covariates X,
-        we expect a different CATE.
-
-        :return results_df: A dataframe containing a conditional average treatment effect, 95% confidence intervals, and
-        the covariate (effect modifier) values for each sample.
-        """
-
-        # Remove any NA containing rows
-        reduced_df = self.df.copy()
-        necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
-        missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
-        reduced_df = reduced_df[~missing_rows]
-
-        # Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
-        if self.effect_modifiers:
-            effect_modifier_df = reduced_df[list(self.effect_modifiers)]
-        else:
-            raise ValueError("CATE requires the user to define a set of effect modifiers.")
-
-        if self.adjustment_set:
-            confounders_df = reduced_df[list(self.adjustment_set)]
-        else:
-            confounders_df = None
-        treatment_df = reduced_df[[self.treatment]]
-        outcome_df = reduced_df[[self.outcome]]
-
-        # Fit a model to the data
-        model = CausalForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor())
-        model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
-
-        # Obtain CATES and confidence intervals
-        conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
-        [ci_low, ci_high] = model.effect_interval(
-            effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
-        )
-
-        # Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
-        results_df = pd.DataFrame(columns=["cate", "ci_low", "ci_high"])
-        results_df["cate"] = list(conditional_ates)
-        results_df["ci_low"] = list(ci_low.flatten())
-        results_df["ci_high"] = list(ci_high.flatten())
-        effect_modifier_df.reset_index(drop=True, inplace=True)
-        results_df[list(self.effect_modifiers)] = effect_modifier_df
-        results_df.sort_values(by=list(self.effect_modifiers), inplace=True)
-        return results_df, None
diff --git a/causal_testing/utils/validation.py b/causal_testing/utils/validation.py
@@ -1,4 +1,5 @@
 """This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques"""
+
 import math
 import numpy as np
 from scipy.stats import t

diff --git a/examples/poisson/example_run_causal_tests.py b/examples/poisson/example_run_causal_tests.py
@@ -3,7 +3,7 @@
 import scipy
 import os
 
-from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator
+from causal_testing.testing.estimators import LinearRegressionEstimator
 from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome
 from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.json_front.json_class import JsonUtility
@@ -127,7 +127,6 @@ def populate_num_shapes_unit(data):
 
 estimators = {
     "WidthHeightEstimator": WidthHeightEstimator,
-    "CausalForestEstimator": CausalForestEstimator,
     "LinearRegressionEstimator": LinearRegressionEstimator,
 }
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,18 +16,17 @@ license = { text = "MIT" }
 keywords = ["causal inference", "verification"]
 dependencies = [
     "z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems
-    "econml~=0.12",
-    "fitter~=1.4",
+    "fitter~=1.7",
     "lhsmdu~=1.1",
     "networkx~=2.6",
-    "numpy~=1.23",
-    "pandas~=1.3",
-    "scikit_learn~=1.1",
+    "numpy~=1.26",
+    "pandas~=1.5",
+    "scikit_learn~=1.4",
     "scipy~=1.7",
-    "statsmodels~=0.13",
-    "tabulate~=0.8",
-    "pydot~=1.4",
-    "pygad~=3.2"
+    "statsmodels~=0.14",
+    "tabulate~=0.9",
+    "pydot~=2.0",
+    "pygad~=3.3"
 ]
 dynamic = ["version"]
 

diff --git a/tests/data_collection_tests/test_observational_data_collector.py b/tests/data_collection_tests/test_observational_data_collector.py
@@ -1,13 +1,13 @@
 import unittest
 import os
+import shutil, tempfile
 import pandas as pd
 from causal_testing.data_collection.data_collector import ObservationalDataCollector
 from causal_testing.specification.causal_specification import Scenario
 from causal_testing.specification.variable import Input, Output, Meta
 from scipy.stats import uniform, rv_discrete
 from enum import Enum
 import random
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 
 
 class TestObservationalDataCollector(unittest.TestCase):
@@ -17,9 +17,9 @@ class Color(Enum):
             GREEN = "GREEN"
             BLUE = "BLUE"
 
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
+        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
         # Y = 3*X1 + X2*X3 + 10
         self.observational_df = pd.DataFrame(
             {"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40], "Y2": ["RED", "GREEN", "BLUE", "BLUE"]}
@@ -66,7 +66,7 @@ def populate_m(data):
         assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"])))
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 if __name__ == "__main__":

diff --git a/tests/generation_tests/test_abstract_test_case.py b/tests/generation_tests/test_abstract_test_case.py
@@ -1,13 +1,13 @@
 import unittest
 import os
+import shutil, tempfile
 import pandas as pd
 import numpy as np
 from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
 from causal_testing.generation.enum_gen import EnumGen
 from causal_testing.specification.causal_specification import Scenario
 from causal_testing.specification.variable import Input, Output
 from scipy.stats import uniform, rv_discrete
-from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
 from causal_testing.testing.causal_test_outcome import Positive
 from z3 import And
 from enum import Enum
@@ -29,9 +29,9 @@ class TestAbstractTestCase(unittest.TestCase):
     """
 
     def setUp(self) -> None:
-        temp_dir_path = create_temp_dir_if_non_existent()
-        self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
-        self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
+        self.temp_dir_path = tempfile.mkdtemp()
+        self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
+        self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
         # Y = 3*X1 + X2*X3 + 10
         self.observational_df = pd.DataFrame({"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40]})
         self.observational_df["Y"] = self.observational_df.apply(
@@ -192,7 +192,7 @@ def test_feasible_constraints(self):
         assert len(concrete_tests) < 1000
 
     def tearDown(self) -> None:
-        remove_temp_dir_if_existent()
+        shutil.rmtree(self.temp_dir_path)
 
 
 if __name__ == "__main__":