diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 13e0cd2b..573efca1 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -1,10 +1,7 @@
 import sys
 import time
 import warnings
-from typing import List
 from typing import Literal
-from typing import Optional
-from typing import Union
 from typing import cast
 
 import anndata as ad  # type: ignore
@@ -202,9 +199,9 @@ class DeseqDataSet(ad.AnnData):
     def __init__(
         self,
         *,
-        adata: Optional[ad.AnnData] = None,
-        counts: Optional[pd.DataFrame] = None,
-        metadata: Optional[pd.DataFrame] = None,
+        adata: ad.AnnData | None = None,
+        counts: pd.DataFrame | None = None,
+        metadata: pd.DataFrame | None = None,
         design: str | pd.DataFrame = "~condition",
         design_factors: str | list[str] | None = None,
         continuous_factors: list[str] | None = None,
@@ -217,8 +214,8 @@ def __init__(
         refit_cooks: bool = True,
         min_replicates: int = 7,
         beta_tol: float = 1e-8,
-        n_cpus: Optional[int] = None,
-        inference: Optional[Inference] = None,
+        n_cpus: int | None = None,
+        inference: Inference | None = None,
         quiet: bool = False,
         low_memory: bool = False,
     ) -> None:
@@ -342,7 +339,7 @@ def variables(self):
     def vst(
         self,
         use_design: bool = False,
-        fit_type: Optional[Literal["parametric", "mean"]] = None,
+        fit_type: Literal["parametric", "mean"] | None = None,
     ) -> None:
         """Fit a variance stabilizing transformation, and apply it to normalized counts.
 
@@ -427,7 +424,7 @@ def vst_fit(
             self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy()
             del self.obsm["design_matrix_buffer"]
 
-    def vst_transform(self, counts: Optional[np.ndarray] = None) -> np.ndarray:
+    def vst_transform(self, counts: np.ndarray | None = None) -> np.ndarray:
         """Apply the variance stabilizing transformation.
 
         Uses the results from the ``vst_fit`` method.
@@ -501,7 +498,7 @@ def vst_transform(self, counts: Optional[np.ndarray] = None) -> np.ndarray:
                 f"Found fit_type '{self.vst_fit_type}'. Expected 'parametric' or 'mean'."
             )
 
-    def deseq2(self, fit_type: Optional[Literal["parametric", "mean"]] = None) -> None:
+    def deseq2(self, fit_type: Literal["parametric", "mean"] | None = None) -> None:
         """Perform dispersion and log fold-change (LFC) estimation.
 
         Wrapper for the first part of the PyDESeq2 pipeline.
@@ -568,10 +565,8 @@ def contrast(self, *args, **kwargs):
 
     def fit_size_factors(
         self,
-        fit_type: Optional[Literal["ratio", "poscounts", "iterative"]] = None,
-        control_genes: Optional[
-            Union[np.ndarray, List[str], List[int], pd.Index]
-        ] = None,
+        fit_type: Literal["ratio", "poscounts", "iterative"] | None = None,
+        control_genes: np.ndarray | list[str] | list[int] | pd.Index | None = None,
     ) -> None:
         """Fit sample-wise deseq2 normalization (size) factors.
 
@@ -599,7 +594,7 @@ def fit_size_factors(
         fit_type : str
             The normalization method to use: "ratio", "poscounts" or "iterative".
             (default: ``"ratio"``).
-        control_genes : ndarray, list, pandas.Index, or None
+        control_genes : ndarray, list, or pandas.Index, optional
             Genes to use as control genes for size factor fitting. If None, all genes
             are used. (default: ``None``).
         """
@@ -1127,7 +1122,7 @@ def _fit_MoM_dispersions(self) -> None:
         )
 
     def plot_dispersions(
-        self, log: bool = True, save_path: Optional[str] = None, **kwargs
+        self, log: bool = True, save_path: str | None = None, **kwargs
     ) -> None:
         """Plot dispersions.
 
@@ -1139,7 +1134,7 @@ def plot_dispersions(
         log : bool
             Whether to log scale x and y axes (``default=True``).
 
-        save_path : str or None
+        save_path : str, optional
             The path where to save the plot. If left None, the plot won't be saved
             (``default=None``).
 
diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 1a25e52b..9bd67bad 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -1,6 +1,4 @@
 from typing import Literal
-from typing import Optional
-from typing import Tuple
 
 import numpy as np
 import pandas as pd
@@ -41,7 +39,7 @@ def __init__(
         self,
         joblib_verbosity: int = 0,
         batch_size: int = 128,
-        n_cpus: Optional[int] = None,
+        n_cpus: int | None = None,
         backend: str = "loky",
     ):
         self._joblib_verbosity = joblib_verbosity
@@ -94,7 +92,7 @@ def irls(  # noqa: D102
         max_beta: float = 30,
         optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
         maxiter: int = 250,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         with parallel_backend(self._backend, inner_max_num_threads=1):
             res = Parallel(
                 n_jobs=self.n_cpus,
@@ -133,11 +131,11 @@ def alpha_mle(  # noqa: D102
         alpha_hat: np.ndarray,
         min_disp: float,
         max_disp: float,
-        prior_disp_var: Optional[float] = None,
+        prior_disp_var: float | None = None,
         cr_reg: bool = True,
         prior_reg: bool = False,
         optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray]:
         with parallel_backend(self._backend, inner_max_num_threads=1):
             res = Parallel(
                 n_jobs=self.n_cpus,
@@ -171,10 +169,10 @@ def wald_test(  # noqa: D102
         ridge_factor: np.ndarray,
         contrast: np.ndarray,
         lfc_null: np.ndarray,
-        alt_hypothesis: Optional[
-            Literal["greaterAbs", "lessAbs", "greater", "less"]
-        ] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        alt_hypothesis: (
+            Literal["greaterAbs", "lessAbs", "greater", "less"] | None
+        ) = None,
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         num_genes = mu.shape[1]
         with parallel_backend(self._backend, inner_max_num_threads=1):
             res = Parallel(
@@ -201,7 +199,7 @@ def wald_test(  # noqa: D102
 
     def dispersion_trend_gamma_glm(  # noqa: D102
         self, covariates: pd.Series, targets: pd.Series
-    ) -> Tuple[np.ndarray, np.ndarray, bool]:
+    ) -> tuple[np.ndarray, np.ndarray, bool]:
         covariates_w_intercept = covariates.to_frame()
         covariates_w_intercept.insert(0, "intercept", 1)
         covariates_fit = covariates_w_intercept.values
@@ -241,7 +239,7 @@ def lfc_shrink_nbinom_glm(  # noqa: D102
         prior_scale: float,
         optimizer: str,
         shrink_index: int,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         with parallel_backend(self._backend, inner_max_num_threads=1):
             num_genes = counts.shape[1]
             res = Parallel(
diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py
index ebb9111e..ccb9945e 100644
--- a/pydeseq2/ds.py
+++ b/pydeseq2/ds.py
@@ -1,8 +1,6 @@
 import sys
 import time
-from typing import List
 from typing import Literal
-from typing import Optional
 
 # import anndata as ad
 import numpy as np
@@ -57,7 +55,7 @@ class DeseqStats:
     lfc_null : float
         The (log2) log fold change under the null hypothesis. (default: ``0``).
 
-    alt_hypothesis : str or None
+    alt_hypothesis : str, optional
         The alternative hypothesis for computing wald p-values. By default, the normal
         Wald test assesses deviation of the estimated log fold change from the null
         hypothesis, as given by ``lfc_null``.
@@ -81,7 +79,7 @@ class DeseqStats:
     lfc_null : float
         The (log2) log fold change under the null hypothesis.
 
-    alt_hypothesis : str or None
+    alt_hypothesis : str, optional
         The alternative hypothesis for computing wald p-values.
 
     contrast_vector : ndarray
@@ -132,16 +130,16 @@ class DeseqStats:
     def __init__(
         self,
         dds: DeseqDataSet,
-        contrast: List[str] | np.ndarray,
+        contrast: list[str] | np.ndarray,
         alpha: float = 0.05,
         cooks_filter: bool = True,
         independent_filter: bool = True,
-        prior_LFC_var: Optional[np.ndarray] = None,
+        prior_LFC_var: np.ndarray | None = None,
         lfc_null: float = 0.0,
-        alt_hypothesis: Optional[
-            Literal["greaterAbs", "lessAbs", "greater", "less"]
-        ] = None,
-        inference: Optional[Inference] = None,
+        alt_hypothesis: (
+            Literal["greaterAbs", "lessAbs", "greater", "less"] | None
+        ) = None,
+        inference: Inference | None = None,
         quiet: bool = False,
     ) -> None:
         assert (
@@ -436,7 +434,7 @@ def lfc_shrink(self, coeff: str, adapt: bool = True) -> None:
             if not self.quiet:
                 print(self.results_df)
 
-    def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs):
+    def plot_MA(self, log: bool = True, save_path: str | None = None, **kwargs):
         """
         Create an log ratio (M)-average (A) plot using matplotlib.
 
@@ -449,7 +447,7 @@ def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs):
         log : bool
             Whether or not to log scale x and y axes (``default=True``).
 
-        save_path : str or None
+        save_path : str, optional
             The path where to save the plot. If left None, the plot won't be saved
             (``default=None``).
 
diff --git a/pydeseq2/grid_search.py b/pydeseq2/grid_search.py
index e1be50ee..a8f1e77b 100644
--- a/pydeseq2/grid_search.py
+++ b/pydeseq2/grid_search.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import numpy as np
 from scipy.special import gammaln  # type: ignore
 
@@ -58,7 +56,7 @@ def grid_fit_alpha(
     alpha_hat: float,
     min_disp: float,
     max_disp: float,
-    prior_disp_var: Optional[float] = None,
+    prior_disp_var: float | None = None,
     cr_reg: bool = True,
     prior_reg: bool = False,
     grid_length: int = 100,
@@ -87,7 +85,7 @@ def grid_fit_alpha(
     max_disp : float
         Upper threshold for dispersion parameters.
 
-    prior_disp_var : float
+    prior_disp_var : float, optional
         Prior dispersion variance.
 
     cr_reg : bool
diff --git a/pydeseq2/inference.py b/pydeseq2/inference.py
index 801a86c4..40bff73b 100644
--- a/pydeseq2/inference.py
+++ b/pydeseq2/inference.py
@@ -1,8 +1,6 @@
 from abc import ABC
 from abc import abstractmethod
 from typing import Literal
-from typing import Optional
-from typing import Tuple
 
 import numpy as np
 import pandas as pd
@@ -57,7 +55,7 @@ def irls(
         max_beta: float = 30,
         optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
         maxiter: int = 250,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         r"""Fit a NB GLM wit log-link to predict counts from the design matrix.
 
         See equations (1-2) in the DESeq2 paper.
@@ -128,11 +126,11 @@ def alpha_mle(
         alpha_hat: np.ndarray,
         min_disp: float,
         max_disp: float,
-        prior_disp_var: Optional[float] = None,
+        prior_disp_var: float | None = None,
         cr_reg: bool = True,
         prior_reg: bool = False,
         optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Estimate the dispersion parameter of a negative binomial GLM.
 
         Parameters
@@ -155,7 +153,7 @@ def alpha_mle(
         max_disp : float
             Upper threshold for dispersion parameters.
 
-        prior_disp_var : float
+        prior_disp_var : float, optional
             Prior dispersion variance.
 
         cr_reg : bool
@@ -188,10 +186,10 @@ def wald_test(
         ridge_factor: np.ndarray,
         contrast: np.ndarray,
         lfc_null: np.ndarray,
-        alt_hypothesis: Optional[
-            Literal["greaterAbs", "lessAbs", "greater", "less"]
-        ] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        alt_hypothesis: (
+            Literal["greaterAbs", "lessAbs", "greater", "less"] | None
+        ) = None,
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Run Wald test for differential expression.
 
         Computes Wald statistics, standard error and p-values from
@@ -285,7 +283,7 @@ def fit_moments_dispersions(
     @abstractmethod
     def dispersion_trend_gamma_glm(
         self, covariates: pd.Series, targets: pd.Series
-    ) -> Tuple[np.ndarray, np.ndarray, bool]:
+    ) -> tuple[np.ndarray, np.ndarray, bool]:
         """Fit a gamma glm on gene dispersions.
 
         The intercept should be concatenated in this method
@@ -319,7 +317,7 @@ def lfc_shrink_nbinom_glm(
         prior_scale: float,
         optimizer: str,
         shrink_index: int,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Fit a negative binomial MAP LFC using an apeGLM prior.
 
         Only the LFC is shrinked, and not the intercept.
diff --git a/pydeseq2/preprocessing.py b/pydeseq2/preprocessing.py
index 7704526a..0c0a5a34 100644
--- a/pydeseq2/preprocessing.py
+++ b/pydeseq2/preprocessing.py
@@ -1,13 +1,10 @@
-from typing import Tuple
-from typing import Union
-
 import numpy as np
 import pandas as pd
 
 
 def deseq2_norm(
-    counts: Union[pd.DataFrame, np.ndarray]
-) -> Tuple[Union[pd.DataFrame, np.ndarray], Union[pd.DataFrame, np.ndarray]]:
+    counts: pd.DataFrame | np.ndarray,
+) -> tuple[pd.DataFrame | np.ndarray, pd.DataFrame | np.ndarray]:
     """Return normalized counts and size_factors.
 
     Uses the median of ratios method.
@@ -31,9 +28,7 @@ def deseq2_norm(
     return deseq2_counts, size_factors
 
 
-def deseq2_norm_fit(
-    counts: Union[pd.DataFrame, np.ndarray]
-) -> Tuple[np.ndarray, np.ndarray]:
+def deseq2_norm_fit(counts: pd.DataFrame | np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     """Return ``logmeans`` and ``filtered_genes``, needed in the median of ratios method.
 
     ``Logmeans`` and ``filtered_genes`` can then be used to normalize external datasets.
@@ -62,10 +57,10 @@ def deseq2_norm_fit(
 
 
 def deseq2_norm_transform(
-    counts: Union[pd.DataFrame, np.ndarray],
+    counts: pd.DataFrame | np.ndarray,
     logmeans: np.ndarray,
     filtered_genes: np.ndarray,
-) -> Tuple[Union[pd.DataFrame, np.ndarray], Union[pd.DataFrame, np.ndarray]]:
+) -> tuple[pd.DataFrame | np.ndarray, pd.DataFrame | np.ndarray]:
     """Return normalized counts and size factors from the median of ratios method.
 
     Can be applied on external dataset, using the ``logmeans`` and ``filtered_genes``
diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py
index db09e90b..5b031320 100644
--- a/pydeseq2/utils.py
+++ b/pydeseq2/utils.py
@@ -3,9 +3,6 @@
 from math import floor
 from pathlib import Path
 from typing import Literal
-from typing import Optional
-from typing import Tuple
-from typing import Union
 from typing import cast
 
 import numpy as np
@@ -109,7 +106,7 @@ def load_example_data(
     return df
 
 
-def test_valid_counts(counts: Union[pd.DataFrame, np.ndarray]) -> None:
+def test_valid_counts(counts: pd.DataFrame | np.ndarray) -> None:
     """Test that the count matrix contains valid inputs.
 
     More precisely, test that inputs are non-negative integers.
@@ -138,9 +135,9 @@ def test_valid_counts(counts: Union[pd.DataFrame, np.ndarray]) -> None:
 
 
 def dispersion_trend(
-    normed_mean: Union[float, np.ndarray],
-    coeffs: Union["pd.Series[float]", np.ndarray],
-) -> Union[float, np.ndarray]:
+    normed_mean: float | np.ndarray,
+    coeffs: pd.Series | np.ndarray,
+) -> float | np.ndarray:
     r"""Return dispersion trend from normalized counts.
 
     :math:`a_1/ \mu + a_0`.
@@ -165,8 +162,8 @@ def dispersion_trend(
 
 
 def nb_nll(
-    counts: np.ndarray, mu: np.ndarray, alpha: Union[float, np.ndarray]
-) -> Union[float, np.ndarray]:
+    counts: np.ndarray, mu: np.ndarray, alpha: float | np.ndarray
+) -> float | np.ndarray:
     r"""Neg log-likelihood of a negative binomial of parameters ``mu`` and ``alpha``.
 
     Mathematically, if ``counts`` is a vector of counting entries :math:`y_i`
@@ -285,7 +282,7 @@ def irls_solver(
     max_beta: float = 30,
     optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
     maxiter: int = 250,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, bool]:
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, bool]:
     r"""Fit a NB GLM wit log-link to predict counts from the design matrix.
 
     See equations (1-2) in the DESeq2 paper.
@@ -449,11 +446,11 @@ def fit_alpha_mle(
     alpha_hat: float,
     min_disp: float,
     max_disp: float,
-    prior_disp_var: Optional[float] = None,
+    prior_disp_var: float | None = None,
     cr_reg: bool = True,
     prior_reg: bool = False,
     optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
-) -> Tuple[float, bool]:
+) -> tuple[float, bool]:
     """Estimate the dispersion parameter of a negative binomial GLM.
 
     Note: it is possible to pass counts, design_matrix and mu arguments in the form of
@@ -568,7 +565,7 @@ def dloss(log_alpha: float) -> float:
         )
 
 
-def trimmed_mean(x: np.ndarray, trim: float = 0.1, **kwargs) -> Union[float, np.ndarray]:
+def trimmed_mean(x: np.ndarray, trim: float = 0.1, **kwargs) -> float | np.ndarray:
     """Return trimmed mean.
 
     Compute the mean after trimming data of its smallest and largest quantiles.
@@ -656,7 +653,7 @@ def trimfn(x: float) -> int:
 
 def trimmed_variance(
     x: np.ndarray, trim: float = 0.125, axis: int = 0
-) -> Union[float, np.ndarray]:
+) -> float | np.ndarray:
     """Return trimmed variance.
 
     Compute the variance after trimming data of its smallest and largest quantiles.
@@ -727,8 +724,8 @@ def wald_test(
     ridge_factor: np.ndarray,
     contrast: np.ndarray,
     lfc_null: float,
-    alt_hypothesis: Optional[Literal["greaterAbs", "lessAbs", "greater", "less"]],
-) -> Tuple[float, float, float]:
+    alt_hypothesis: Literal["greaterAbs", "lessAbs", "greater", "less"] | None,
+) -> tuple[float, float, float]:
     """Run Wald test for differential expression.
 
     Computes Wald statistics, standard error and p-values from
@@ -757,7 +754,7 @@ def wald_test(
     lfc_null : float
         The (log2) log fold change under the null hypothesis.
 
-    alt_hypothesis : str or None
+    alt_hypothesis : str, optional
         The alternative hypothesis for computing wald p-values.
 
     Returns
@@ -962,14 +959,14 @@ def robust_method_of_moments_disp(
     return alpha
 
 
-def get_num_processes(n_cpus: Optional[int] = None) -> int:
+def get_num_processes(n_cpus: int | None) -> int:
     """Return the number of processes to use for multiprocessing.
 
     Returns the maximum number of available cpus by default.
 
     Parameters
     ----------
-    n_cpus : int or None
+    n_cpus : int, optional
         Desired number of cpus. If ``None``, will return the number of available cpus.
         (default: ``None``).
 
@@ -998,7 +995,7 @@ def nbinomGLM(
     prior_scale: float,
     optimizer="L-BFGS-B",
     shrink_index: int = 1,
-) -> Tuple[np.ndarray, np.ndarray, bool]:
+) -> tuple[np.ndarray, np.ndarray, bool]:
     """Fit a negative binomial MAP LFC using an apeGLM prior.
 
     Only the LFC is shrinked, and not the intercept.
@@ -1234,7 +1231,7 @@ def make_scatter(
     legend_labels: list,
     x_val: np.array,
     log: bool = True,
-    save_path: Optional[str] = None,
+    save_path: str | None = None,
     **kwargs,
 ) -> None:
     """
@@ -1256,7 +1253,7 @@ def make_scatter(
     log : bool
         Whether or not to log scale features and targets axes (``default=True``).
 
-    save_path : str or None
+    save_path : str, optional
         The path where to save the plot. If left None, the plot won't be saved
         (``default=None``).
 
@@ -1303,9 +1300,9 @@ def make_MA_plot(
     results_df: pd.DataFrame,
     padj_thresh: float = 0.05,
     log: bool = True,
-    save_path: Optional[str] = None,
+    save_path: str | None = None,
     lfc_null: float = 0,
-    alt_hypothesis: Optional[Literal["greaterAbs", "lessAbs", "greater", "less"]] = None,
+    alt_hypothesis: Literal["greaterAbs", "lessAbs", "greater", "less"] | None = None,
     **kwargs,
 ) -> None:
     """
@@ -1326,14 +1323,14 @@ def make_MA_plot(
     log : bool
         Whether or not to log scale features and targets axes (``default=True``).
 
-    save_path : str or None
+    save_path : str, optional
         The path where to save the plot. If left None, the plot won't be saved
         (``default=None``).
 
     lfc_null : float
         The (log2) log fold change under the null hypothesis. (default: ``0``).
 
-    alt_hypothesis : str or None
+    alt_hypothesis : str, optional
         The alternative hypothesis for computing wald p-values. (default: ``None``).
 
     **kwargs