CamDavidsonPilon · CamDavidsonPilon · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -9,7 +9,7 @@ jobs:
       fail-fast: true
       matrix:
         os: ["ubuntu-latest"]
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Checkout source

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,12 @@
 ## Changelog
 
-#### 0.28.0 - 2023-01-03
+#### 0.29.0 - 2024-06-25
+ - update dependencies (pandas >= 2.1)
+ - update dependencies (scipy >= 1.7)
+
+
+
+#### 0.28.0 - 2024-01-03
  - Fixes bins that are far into the future with using `survival_table_from_events`, see #1587
  - Removed `sklean_adaptor`. It was a terrible hack, and causing more confusion and support debt than I want. This cleans up our API and simplifies the library. ✨ There's no replacement, and I doubt I'll introduce one ✨
  - Fix pandas>=2.0 compatibility.

diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -3223,7 +3223,7 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p
 
             for stratum, stratified_X in df.groupby(self.strata):
                 log_lambdas_ = anp.array(
-                    [0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)]
+                    [0] + [self.params_.loc[self._strata_labeler(stratum, i)].iloc[0] for i in range(2, self.n_breakpoints + 2)]
                 )
                 lambdas_ = np.exp(log_lambdas_)
 
@@ -3237,7 +3237,9 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p
             return cumulative_hazard
 
         else:
-            log_lambdas_ = np.array([0] + [self.params_[param][0] for param in self._fitted_parameter_names if param != "beta_"])
+            log_lambdas_ = np.array(
+                [0] + [self.params_.loc[param].iloc[0] for param in self._fitted_parameter_names if param != "beta_"]
+            )
             lambdas_ = np.exp(log_lambdas_)
 
             Xs = self.regressors.transform_df(df)

diff --git a/lifelines/fitters/npmle.py b/lifelines/fitters/npmle.py
@@ -291,7 +291,7 @@ def reconstruct_survival_function(
 
     # First backfill at events between known observations
     # Second fill all events _outside_ known obs with running_sum
-    return full_dataframe.combine_first(df).bfill().fillna(running_sum).clip(lower=0.0)
+    return full_dataframe.combine_first(df).astype(float).bfill().fillna(running_sum).clip(lower=0.0)
 
 
 def npmle_compute_confidence_intervals(left, right, mle_, alpha=0.05, samples=1000):

diff --git a/lifelines/fitters/piecewise_exponential_regression_fitter.py b/lifelines/fitters/piecewise_exponential_regression_fitter.py
@@ -66,7 +66,7 @@ def _add_penalty(self, params, neg_ll):
         coef_penalty = 0
         if self.penalizer > 0:
             for i in range(params_stacked.shape[1]):
-                if not self._cols_to_not_penalize[i]:
+                if not self._cols_to_not_penalize.iloc[i]:
                     coef_penalty = coef_penalty + (params_stacked[:, i]).var()
 
         return neg_ll + self.penalizer * coef_penalty

diff --git a/lifelines/generate_datasets.py b/lifelines/generate_datasets.py
@@ -5,7 +5,7 @@
 
 from scipy import stats
 from scipy.optimize import newton
-from scipy.integrate import cumtrapz
+from scipy.integrate import cumulative_trapezoid
 
 random = np.random
 
@@ -308,7 +308,7 @@ def cumulative_integral(fx, x):
     fx: (n,d) numpy array, what you want to integral of
     x: (n,) numpy array, location to integrate over.
     """
-    return cumtrapz(fx.T, x, initial=0).T
+    return cumulative_trapezoid(fx.T, x, initial=0).T
 
 
 def construct_survival_curves(hazard_rates, timelines):

diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py
@@ -2008,7 +2008,7 @@ def test_joblib_serialization(self, rossi, regression_models):
     def test_fit_will_accept_object_dtype_as_event_col(self, regression_models_sans_strata_model, rossi):
         # issue #638
         rossi["arrest"] = rossi["arrest"].astype(object)
-        rossi["arrest"].iloc[0] = None
+        rossi.loc[0, "arrest"] = None
 
         assert rossi["arrest"].dtype == object
         rossi = rossi.dropna()

diff --git a/lifelines/tests/utils/test_utils.py b/lifelines/tests/utils/test_utils.py
@@ -347,7 +347,7 @@ def test_survival_table_from_events_at_risk_column():
         1.0,
     ]
     df = utils.survival_table_from_events(df["T"], df["E"])
-    assert list(df["at_risk"][1:]) == expected  # skip the first event as that is the birth time, 0.
+    assert list(df["at_risk"].loc[1:]) == expected  # skip the first event as that is the birth time, 0.
 
 
 def test_survival_table_to_events_casts_to_float():

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
@@ -556,7 +556,7 @@ def _group_event_table_by_intervals(event_table, intervals) -> pd.DataFrame:
 
         intervals = np.arange(0, event_max + bin_width, bin_width)
 
-    event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True)).agg(
+    event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True), observed=False).agg(
         {"removed": ["sum"], "observed": ["sum"], "censored": ["sum"], "at_risk": ["max"]}
     )
     # convert columns from multiindex
@@ -648,7 +648,7 @@ def datetimes_to_durations(
         the units of time to use.  See Pandas 'freq'. Default 'D' for days.
     dayfirst: bool, optional (default=False)
         see Pandas `to_datetime`
-    na_values : list, optional
+    na_values : list[str], optional
         list of values to recognize as NA/NaN. Ex: ['', 'NaT']
     format:
         see Pandas `to_datetime`
@@ -679,7 +679,7 @@ def datetimes_to_durations(
     start_times = pd.Series(start_times).copy()
     end_times = pd.Series(end_times).copy()
 
-    C = ~(pd.isnull(end_times).values | end_times.isin(na_values or [""]))
+    C = ~(pd.isnull(end_times).values | end_times.astype(str).isin(na_values or [""]))
     end_times[~C] = fill_date_
     start_times_ = pd.to_datetime(start_times, dayfirst=dayfirst, format=format)
     end_times_ = pd.to_datetime(end_times, dayfirst=dayfirst, errors="coerce", format=format)
@@ -1464,7 +1464,7 @@ def expand(df, cvs):
     cv = cv.sort_values([id_col, duration_col])
     cvs = cv.pipe(remove_redundant_rows).pipe(transform_cv_to_long_format).groupby(id_col, sort=True)
 
-    long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True).apply(expand, cvs=cvs)
+    long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True)[long_form_df.columns].apply(expand, cvs=cvs)
     return long_form_df.reset_index(drop=True)
 
 
@@ -1506,7 +1506,7 @@ def covariates_from_event_matrix(df, id_col) -> pd.DataFrame:
     """
     df = df.set_index(id_col)
     df = df.fillna(np.inf)
-    df = df.stack(dropna=False).reset_index()
+    df = df.stack(future_stack=True).reset_index()
     df.columns = [id_col, "event", "duration"]
     df["_counter"] = 1
     return (

diff --git a/lifelines/version.py b/lifelines/version.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-__version__ = "0.28.0"
+__version__ = "0.29.0"
diff --git a/reqs/base-requirements.txt b/reqs/base-requirements.txt
@@ -1,6 +1,6 @@
 numpy>=1.14.0,<2.0
-scipy>=1.2.0
-pandas>=1.2.0
+scipy>=1.7.0
+pandas>=2.1
 matplotlib>=3.0
 autograd>=1.5
 autograd-gamma>=0.3

diff --git a/reqs/dev-requirements.txt b/reqs/dev-requirements.txt
@@ -15,8 +15,8 @@ black
 dill>=0.3.6
 statsmodels
 flaky
-scikit-learn>=0.22.0
 Jinja2
+joblib
 
 # ex: `py.test` in the docs/ folder. See conftest.py in docs/ first
 sybil