From e637875a397b227e5dc6b0789e4f0e663e58e3e1 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Tue, 2 Jan 2024 12:49:41 +0000
Subject: [PATCH] MAINT: Add NumPy 2 Compat

---
 azure-pipelines.yml                       |  1 -
 ci/azure_template_posix.yml               | 10 ++++++++--
 ci/install-posix.sh                       |  6 ++++++
 linearmodels/compat/pandas.py             |  9 +++++++++
 linearmodels/iv/results.py                |  6 +++++-
 linearmodels/panel/data.py                |  7 ++++++-
 linearmodels/panel/utility.py             |  4 +++-
 linearmodels/shared/hypotheses.py         | 12 ++++++------
 linearmodels/tests/panel/_utility.py      |  4 +++-
 linearmodels/tests/shared/test_utility.py |  4 +++-
 pyproject.toml                            |  2 +-
 11 files changed, 50 insertions(+), 15 deletions(-)
 create mode 100644 linearmodels/compat/pandas.py

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d2105166a2..6b46859021 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -10,7 +10,6 @@ variables:
   VML_NUM_THREADS: 1
   OPENBLAS_NUM_THREADS: 1
   PYTHONHASHSEED: 12345678 # Ensure tests are correctly gathered by xdist
-  SETUPTOOLS_USE_DISTUTILS: "stdlib"
   TEST_INSTALL: false
   MPLBACKEND: agg
   coverage: true
diff --git a/ci/azure_template_posix.yml b/ci/azure_template_posix.yml
index 234448f30b..22231d9667 100644
--- a/ci/azure_template_posix.yml
+++ b/ci/azure_template_posix.yml
@@ -60,10 +60,16 @@ jobs:
       python311_latest:
         python.version: '3.11'
         XXHASH: true
-      python311_copy_on_write:
-        python.version: '3.11'
+      python312_latest:
+        python.version: '3.12'
+        XXHASH: true
+      python312_copy_on_write:
+        python.version: '3.12'
         XXHASH: true
         LM_TEST_COPY_ON_WRITE: 1
+      python312_pre:
+        python.version: '3.12'
+        PIP_PRE: true
     maxParallel: 10
 
   steps:
diff --git a/ci/install-posix.sh b/ci/install-posix.sh
index 06073d6b34..11dc3272a2 100644
--- a/ci/install-posix.sh
+++ b/ci/install-posix.sh
@@ -13,3 +13,9 @@ if [[ -n ${FORMULAIC} ]]; then CMD="$CMD formulaic~=${FORMULAIC}"; fi
 if [[ -n ${XXHASH} ]]; then CMD="$CMD xxhash"; fi
 echo "$CMD"
 eval "$CMD"
+
+if [ "${PIP_PRE}" = true ]; then
+  python -m pip uninstall -y numpy pandas scipy matplotlib statsmodels
+  python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy pandas scipy matplotlib --upgrade --use-deprecated=legacy-resolver
+  python -m pip install git+https://github.com/statsmodels/statsmodels.git --upgrade --no-build-isolation -v
+fi
\ No newline at end of file
diff --git a/linearmodels/compat/pandas.py b/linearmodels/compat/pandas.py
new file mode 100644
index 0000000000..3b424f218d
--- /dev/null
+++ b/linearmodels/compat/pandas.py
@@ -0,0 +1,9 @@
+from packaging.version import parse
+import pandas as pd
+
+PANDAS_VERSION = parse(pd.__version__)
+PD_GTE_21 = not (PANDAS_VERSION <= parse("2.0.99"))
+PD_LT_22 = PANDAS_VERSION <= parse("2.1.99")
+ANNUAL_FREQ = "A-DEC" if PD_LT_22 else "YE-DEC"
+
+__all__ = ["ANNUAL_FREQ", "PD_GTE_21", "PD_LT_22"]
diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py
index 663ecdc9cb..bde9620d23 100644
--- a/linearmodels/iv/results.py
+++ b/linearmodels/iv/results.py
@@ -764,7 +764,11 @@ def diagnostics(self) -> DataFrame:
         ]
         out_df = out_df[cols]
         for col in out_df:
-            out_df[col] = to_numeric(out_df[col], errors="ignore")
+            try:
+                out_df[col] = to_numeric(out_df[col])
+            except ValueError:
+                # If an error is raised, ignore and keep the column
+                pass
 
         return out_df
 
diff --git a/linearmodels/panel/data.py b/linearmodels/panel/data.py
index 64e7d4cf42..153bdff27f 100644
--- a/linearmodels/panel/data.py
+++ b/linearmodels/panel/data.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from linearmodels.compat.pandas import PD_GTE_21
+
 from collections.abc import Hashable, Sequence
 from itertools import product
 from typing import Literal, Union, cast, overload
@@ -227,7 +229,8 @@ def __init__(
                 if copy:
                     self._frame = self._frame.copy()
             else:
-                self._frame = DataFrame({var_name: x.T.stack(dropna=False)})
+                options = {"future_stack": True} if PD_GTE_21 else {"dropna": False}
+                self._frame = DataFrame({var_name: x.T.stack(**options)})
         elif isinstance(x, np.ndarray):
             if x.ndim not in (2, 3):
                 raise ValueError("2 or 3-d array required for numpy input")
@@ -298,6 +301,8 @@ def drop(self, locs: Series | BoolArray) -> None:
             Boolean array indicating observations to drop with reference to
             the dataframe view of the data
         """
+        if isinstance(locs, Series):
+            locs = np.asarray(locs)
         self._frame = self._frame.loc[~locs.ravel()]
         self._frame = self._minimize_multiindex(self._frame)
         # Reset panel and shape after a drop
diff --git a/linearmodels/panel/utility.py b/linearmodels/panel/utility.py
index 1aa7a5a78f..ec5a1c8054 100644
--- a/linearmodels/panel/utility.py
+++ b/linearmodels/panel/utility.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from linearmodels.compat.pandas import ANNUAL_FREQ
+
 from collections import defaultdict
 from typing import NamedTuple, TypeVar, cast
 
@@ -613,7 +615,7 @@ def generate_panel_data(
         x.flat[locs] = np.nan
 
     entities = [f"firm{i}" for i in range(n)]
-    time = [dt for dt in date_range("1-1-1900", periods=t, freq="A-DEC")]
+    time = [dt for dt in date_range("1-1-1900", periods=t, freq=ANNUAL_FREQ)]
     var_names = [f"x{i}" for i in range(k)]
     if const:
         var_names[1:] = var_names[:-1]
diff --git a/linearmodels/shared/hypotheses.py b/linearmodels/shared/hypotheses.py
index 18a55cc21f..05b61c59b6 100644
--- a/linearmodels/shared/hypotheses.py
+++ b/linearmodels/shared/hypotheses.py
@@ -117,13 +117,13 @@ class InvalidTestStatistic(WaldTestStatistic):
 
     def __init__(self, reason: str, *, name: str | None = None) -> None:
         self._reason = reason
-        super().__init__(np.NaN, "", df=1, df_denom=1, name=name)
+        super().__init__(np.nan, "", df=1, df_denom=1, name=name)
         self.dist_name = "None"
 
     @property
     def pval(self) -> float:
-        """Always returns np.NaN"""
-        return np.NaN
+        """Always returns np.nan"""
+        return np.nan
 
     @property
     def critical_values(self) -> None:
@@ -158,13 +158,13 @@ def __init__(self, *, reason: str | None = None, name: str | None = None):
         if reason is None:
             self._reason = "Test is not applicable to model specification"
 
-        super().__init__(np.NaN, "", df=1, df_denom=1, name=name)
+        super().__init__(np.nan, "", df=1, df_denom=1, name=name)
         self.dist_name = "None"
 
     @property
     def pval(self) -> float:
-        """Always returns np.NaN"""
-        return np.NaN
+        """Always returns np.nan"""
+        return np.nan
 
     @property
     def critical_values(self) -> None:
diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py
index bbd771d38b..9825f4c967 100644
--- a/linearmodels/tests/panel/_utility.py
+++ b/linearmodels/tests/panel/_utility.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from linearmodels.compat.pandas import ANNUAL_FREQ
+
 from typing import Literal
 
 import numpy as np
@@ -121,7 +123,7 @@ def generate_data(
         return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
 
     entities = ["firm" + str(i) for i in range(n)]
-    time = date_range("1-1-1900", periods=t, freq="A-DEC")
+    time = date_range("1-1-1900", periods=t, freq=ANNUAL_FREQ)
     var_names = ["x" + str(i) for i in range(k)]
     # y = DataFrame(y, index=time, columns=entities)
     y_df = panel_to_frame(
diff --git a/linearmodels/tests/shared/test_utility.py b/linearmodels/tests/shared/test_utility.py
index 76f383d396..abe6ae7620 100644
--- a/linearmodels/tests/shared/test_utility.py
+++ b/linearmodels/tests/shared/test_utility.py
@@ -1,3 +1,5 @@
+from linearmodels.compat.pandas import ANNUAL_FREQ
+
 import pickle
 import random
 import string
@@ -224,7 +226,7 @@ def test_panel_to_midf():
             ],
         )
     )
-    times = pd.date_range("1999-12-31", freq="A-DEC", periods=7)
+    times = pd.date_range("1999-12-31", freq=ANNUAL_FREQ, periods=7)
     var_names = [f"x.{i}" for i in range(1, 4)]
     df3 = panel_to_frame(x, var_names, times, entities, True)
     mi = pd.MultiIndex.from_product([times, entities])
diff --git a/pyproject.toml b/pyproject.toml
index 5d0d5e5294..3426fa102c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ requires = [
   "wheel",
   "setuptools_scm[toml]>=7,<8",
   "oldest-supported-numpy",
-  "numpy; python_version>='3.12'",
+  "numpy; python_version>='3.13'",
   "cython>=0.29.34"
 ]
 build-backend = "setuptools.build_meta"