Skip to content

Commit

Permalink
Merge pull request #280 from CITCOM-project/python-311-compatible
Browse files Browse the repository at this point in the history
Python 312 compatible
  • Loading branch information
christopher-wild authored Apr 30, 2024
2 parents 61fa736 + d74e853 commit 4a86d9c
Show file tree
Hide file tree
Showing 27 changed files with 132 additions and 339 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests-drafts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "windows-latest", "macos-latest"]
python-version: ["3.9"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2
- name: Set up Python using Miniconda
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "windows-latest", "macos-latest"]
python-version: ["3.9"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2
- name: Set up Python using Miniconda
Expand Down
1 change: 1 addition & 0 deletions causal_testing/generation/abstract_causal_test_case.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains the class AbstractCausalTestCase, which generates concrete test cases"""

import itertools
import logging
from enum import Enum
Expand Down
6 changes: 3 additions & 3 deletions causal_testing/json_front/json_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ def _create_abstract_test_case(self, test, mutates, effects):
self.scenario.variables[variable]: effects[effect]
for variable, effect in test["expected_effect"].items()
},
effect_modifiers={self.scenario.variables[v] for v in test["effect_modifiers"]}
if "effect_modifiers" in test
else {},
effect_modifiers=(
{self.scenario.variables[v] for v in test["effect_modifiers"]} if "effect_modifiers" in test else {}
),
estimate_type=test["estimate_type"],
effect=test.get("effect", "total"),
)
Expand Down
8 changes: 5 additions & 3 deletions causal_testing/specification/metamorphic_relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ def generate_follow_up(self, n_tests: int, min_val: float, max_val: float, seed:
source_follow_up_test_inputs[[follow_up_input]]
.rename(columns={follow_up_input: self.treatment_var})
.to_dict(orient="records"),
test_inputs.to_dict(orient="records")
if not test_inputs.empty
else [{}] * len(source_follow_up_test_inputs),
(
test_inputs.to_dict(orient="records")
if not test_inputs.empty
else [{}] * len(source_follow_up_test_inputs)
),
)
]

Expand Down
1 change: 1 addition & 0 deletions causal_testing/specification/scenario.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module holds the Scenario Class"""

from collections.abc import Iterable, Mapping

from tabulate import tabulate
Expand Down
1 change: 1 addition & 0 deletions causal_testing/surrogate/surrogate_search_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module containing implementation of search algorithm for surrogate search """

# Fitness functions are required to be iteratively defined, including all variables within.

from operator import itemgetter
Expand Down
1 change: 1 addition & 0 deletions causal_testing/testing/base_test_case.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains the BaseTestCase dataclass, which stores the information required for identification"""

from dataclasses import dataclass
from causal_testing.specification.variable import Variable
from causal_testing.testing.effect import Effect
Expand Down
1 change: 1 addition & 0 deletions causal_testing/testing/causal_test_adequacy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module contains code to measure various aspects of causal test adequacy.
"""

from itertools import combinations
from copy import deepcopy
import pandas as pd
Expand Down
1 change: 1 addition & 0 deletions causal_testing/testing/causal_test_case.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains the CausalTestCase class, a class that holds the information required for a causal test"""

import logging
from typing import Any
import numpy as np
Expand Down
9 changes: 6 additions & 3 deletions causal_testing/testing/causal_test_result.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains the CausalTestResult class, which is a container for the results of a causal test, and the
TestValue dataclass.
"""

from typing import Any
from dataclasses import dataclass
import pandas as pd
Expand Down Expand Up @@ -86,9 +87,11 @@ def to_dict(self, json=False):
"outcome": self.estimator.outcome,
"adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
"effect_measure": self.test_value.type,
"effect_estimate": self.test_value.value.to_dict()
if json and hasattr(self.test_value.value, "to_dict")
else self.test_value.value,
"effect_estimate": (
self.test_value.value.to_dict()
if json and hasattr(self.test_value.value, "to_dict")
else self.test_value.value
),
"ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(),
"ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(),
}
Expand Down
1 change: 1 addition & 0 deletions causal_testing/testing/causal_test_suite.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module contains the CausalTestSuite class, for details on using it:
https://causal-testing-framework.readthedocs.io/en/latest/test_suite.html"""

import logging

from collections import UserDict
Expand Down
105 changes: 4 additions & 101 deletions causal_testing/testing/estimators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module contains the Estimator abstract class, as well as its concrete extensions: LogisticRegressionEstimator,
LinearRegressionEstimator and CausalForestEstimator"""
LinearRegressionEstimator"""

import logging
from abc import ABC, abstractmethod
from typing import Any
Expand All @@ -9,10 +10,8 @@
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from econml.dml import CausalForestDML
from patsy import dmatrix # pylint: disable = no-name-in-module
from patsy import ModelDesc
from sklearn.ensemble import GradientBoostingRegressor
from statsmodels.regression.linear_model import RegressionResultsWrapper
from statsmodels.tools.sm_exceptions import PerfectSeparationError

Expand Down Expand Up @@ -352,6 +351,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
model = self._run_linear_regression()
newline = "\n"
patsy_md = ModelDesc.from_formula(self.treatment)

if any(
(
self.df.dtypes[factor.name()] == "object"
Expand All @@ -360,6 +360,7 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
if factor.name() in self.df.dtypes
)
):

design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
else:
Expand Down Expand Up @@ -598,101 +599,3 @@ def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.S
ci_high = pd.Series(bootstraps[bootstrap_size - bound])

return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]


class CausalForestEstimator(Estimator):
"""A causal random forest estimator is a non-parametric estimator which recursively partitions the covariate space
to learn a low-dimensional representation of treatment effect heterogeneity. This form of estimator is best suited
to the estimation of heterogeneous treatment effects i.e. the estimated effect for every sample rather than the
population average.
"""

def add_modelling_assumptions(self):
"""Add any modelling assumptions to the estimator.
:return self: Update self.modelling_assumptions
"""
self.modelling_assumptions.append("Non-parametric estimator: no restrictions imposed on the data.")

def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
"""Estimate the average treatment effect.
:return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
"""
# Remove any NA containing rows
reduced_df = self.df.copy()
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
reduced_df = reduced_df[~missing_rows]

# Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
if self.effect_modifiers:
effect_modifier_df = reduced_df[list(self.effect_modifiers)]
else:
effect_modifier_df = reduced_df[list(self.adjustment_set)]
confounders_df = reduced_df[list(self.adjustment_set)]
treatment_df = np.ravel(reduced_df[[self.treatment]])
outcome_df = np.ravel(reduced_df[[self.outcome]])

# Fit the model to the data using a gradient boosting regressor for both the treatment and outcome model
model = CausalForestDML(
model_y=GradientBoostingRegressor(),
model_t=GradientBoostingRegressor(),
)
model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)

# Obtain the ATE and 95% confidence intervals
ate = pd.Series(model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value))
ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
ci_low, ci_high = pd.Series(ate_interval[0]), pd.Series(ate_interval[1])
return ate, [ci_low, ci_high]

def estimate_cates(self) -> pd.DataFrame:
"""Estimate the conditional average treatment effect for each sample in the data as a function of a set of
covariates (X) i.e. effect modifiers. That is, the predicted change in outcome caused by the intervention
(change in treatment from control to treatment value) for every execution of the system-under-test, taking into
account the value of each effect modifier X. As a result, for every unique setting of the set of covariates X,
we expect a different CATE.
:return results_df: A dataframe containing a conditional average treatment effect, 95% confidence intervals, and
the covariate (effect modifier) values for each sample.
"""

# Remove any NA containing rows
reduced_df = self.df.copy()
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
reduced_df = reduced_df[~missing_rows]

# Split data into effect modifiers (X), confounders (W), treatments (T), and outcome (Y)
if self.effect_modifiers:
effect_modifier_df = reduced_df[list(self.effect_modifiers)]
else:
raise ValueError("CATE requires the user to define a set of effect modifiers.")

if self.adjustment_set:
confounders_df = reduced_df[list(self.adjustment_set)]
else:
confounders_df = None
treatment_df = reduced_df[[self.treatment]]
outcome_df = reduced_df[[self.outcome]]

# Fit a model to the data
model = CausalForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor())
model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)

# Obtain CATES and confidence intervals
conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
[ci_low, ci_high] = model.effect_interval(
effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
)

# Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)
results_df = pd.DataFrame(columns=["cate", "ci_low", "ci_high"])
results_df["cate"] = list(conditional_ates)
results_df["ci_low"] = list(ci_low.flatten())
results_df["ci_high"] = list(ci_high.flatten())
effect_modifier_df.reset_index(drop=True, inplace=True)
results_df[list(self.effect_modifiers)] = effect_modifier_df
results_df.sort_values(by=list(self.effect_modifiers), inplace=True)
return results_df, None
1 change: 1 addition & 0 deletions causal_testing/utils/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains the CausalValidator class for performing Quantitive Bias Analysis techniques"""

import math
import numpy as np
from scipy.stats import t
Expand Down
3 changes: 1 addition & 2 deletions examples/poisson/example_run_causal_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import scipy
import os

from causal_testing.testing.estimators import LinearRegressionEstimator, CausalForestEstimator
from causal_testing.testing.estimators import LinearRegressionEstimator
from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect, CausalTestOutcome
from causal_testing.testing.causal_test_result import CausalTestResult
from causal_testing.json_front.json_class import JsonUtility
Expand Down Expand Up @@ -127,7 +127,6 @@ def populate_num_shapes_unit(data):

estimators = {
"WidthHeightEstimator": WidthHeightEstimator,
"CausalForestEstimator": CausalForestEstimator,
"LinearRegressionEstimator": LinearRegressionEstimator,
}

Expand Down
17 changes: 8 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,17 @@ license = { text = "MIT" }
keywords = ["causal inference", "verification"]
dependencies = [
"z3_solver~=4.11.2", # z3_solver does not follow semantic versioning and tying to 4.11 introduces problems
"econml~=0.12",
"fitter~=1.4",
"fitter~=1.7",
"lhsmdu~=1.1",
"networkx~=2.6",
"numpy~=1.23",
"pandas~=1.3",
"scikit_learn~=1.1",
"numpy~=1.26",
"pandas~=1.5",
"scikit_learn~=1.4",
"scipy~=1.7",
"statsmodels~=0.13",
"tabulate~=0.8",
"pydot~=1.4",
"pygad~=3.2"
"statsmodels~=0.14",
"tabulate~=0.9",
"pydot~=2.0",
"pygad~=3.3"
]
dynamic = ["version"]

Expand Down
10 changes: 5 additions & 5 deletions tests/data_collection_tests/test_observational_data_collector.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import unittest
import os
import shutil, tempfile
import pandas as pd
from causal_testing.data_collection.data_collector import ObservationalDataCollector
from causal_testing.specification.causal_specification import Scenario
from causal_testing.specification.variable import Input, Output, Meta
from scipy.stats import uniform, rv_discrete
from enum import Enum
import random
from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent


class TestObservationalDataCollector(unittest.TestCase):
Expand All @@ -17,9 +17,9 @@ class Color(Enum):
GREEN = "GREEN"
BLUE = "BLUE"

temp_dir_path = create_temp_dir_if_non_existent()
self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
self.temp_dir_path = tempfile.mkdtemp()
self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
# Y = 3*X1 + X2*X3 + 10
self.observational_df = pd.DataFrame(
{"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40], "Y2": ["RED", "GREEN", "BLUE", "BLUE"]}
Expand Down Expand Up @@ -66,7 +66,7 @@ def populate_m(data):
assert all((m == 2 * x1 for x1, m in zip(data["X1"], data["M"])))

def tearDown(self) -> None:
remove_temp_dir_if_existent()
shutil.rmtree(self.temp_dir_path)


if __name__ == "__main__":
Expand Down
10 changes: 5 additions & 5 deletions tests/generation_tests/test_abstract_test_case.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import unittest
import os
import shutil, tempfile
import pandas as pd
import numpy as np
from causal_testing.generation.abstract_causal_test_case import AbstractCausalTestCase
from causal_testing.generation.enum_gen import EnumGen
from causal_testing.specification.causal_specification import Scenario
from causal_testing.specification.variable import Input, Output
from scipy.stats import uniform, rv_discrete
from tests.test_helpers import create_temp_dir_if_non_existent, remove_temp_dir_if_existent
from causal_testing.testing.causal_test_outcome import Positive
from z3 import And
from enum import Enum
Expand All @@ -29,9 +29,9 @@ class TestAbstractTestCase(unittest.TestCase):
"""

def setUp(self) -> None:
temp_dir_path = create_temp_dir_if_non_existent()
self.dag_dot_path = os.path.join(temp_dir_path, "dag.dot")
self.observational_df_path = os.path.join(temp_dir_path, "observational_data.csv")
self.temp_dir_path = tempfile.mkdtemp()
self.dag_dot_path = os.path.join(self.temp_dir_path, "dag.dot")
self.observational_df_path = os.path.join(self.temp_dir_path, "observational_data.csv")
# Y = 3*X1 + X2*X3 + 10
self.observational_df = pd.DataFrame({"X1": [1, 2, 3, 4], "X2": [5, 6, 7, 8], "X3": [10, 20, 30, 40]})
self.observational_df["Y"] = self.observational_df.apply(
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_feasible_constraints(self):
assert len(concrete_tests) < 1000

def tearDown(self) -> None:
remove_temp_dir_if_existent()
shutil.rmtree(self.temp_dir_path)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 4a86d9c

Please sign in to comment.