diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 13e0cd2b..573efca1 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -1,10 +1,7 @@ import sys import time import warnings -from typing import List from typing import Literal -from typing import Optional -from typing import Union from typing import cast import anndata as ad # type: ignore @@ -202,9 +199,9 @@ class DeseqDataSet(ad.AnnData): def __init__( self, *, - adata: Optional[ad.AnnData] = None, - counts: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, + adata: ad.AnnData | None = None, + counts: pd.DataFrame | None = None, + metadata: pd.DataFrame | None = None, design: str | pd.DataFrame = "~condition", design_factors: str | list[str] | None = None, continuous_factors: list[str] | None = None, @@ -217,8 +214,8 @@ def __init__( refit_cooks: bool = True, min_replicates: int = 7, beta_tol: float = 1e-8, - n_cpus: Optional[int] = None, - inference: Optional[Inference] = None, + n_cpus: int | None = None, + inference: Inference | None = None, quiet: bool = False, low_memory: bool = False, ) -> None: @@ -342,7 +339,7 @@ def variables(self): def vst( self, use_design: bool = False, - fit_type: Optional[Literal["parametric", "mean"]] = None, + fit_type: Literal["parametric", "mean"] | None = None, ) -> None: """Fit a variance stabilizing transformation, and apply it to normalized counts. @@ -427,7 +424,7 @@ def vst_fit( self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy() del self.obsm["design_matrix_buffer"] - def vst_transform(self, counts: Optional[np.ndarray] = None) -> np.ndarray: + def vst_transform(self, counts: np.ndarray | None = None) -> np.ndarray: """Apply the variance stabilizing transformation. Uses the results from the ``vst_fit`` method. @@ -501,7 +498,7 @@ def vst_transform(self, counts: Optional[np.ndarray] = None) -> np.ndarray: f"Found fit_type '{self.vst_fit_type}'. Expected 'parametric' or 'mean'." ) - def deseq2(self, fit_type: Optional[Literal["parametric", "mean"]] = None) -> None: + def deseq2(self, fit_type: Literal["parametric", "mean"] | None = None) -> None: """Perform dispersion and log fold-change (LFC) estimation. Wrapper for the first part of the PyDESeq2 pipeline. @@ -568,10 +565,8 @@ def contrast(self, *args, **kwargs): def fit_size_factors( self, - fit_type: Optional[Literal["ratio", "poscounts", "iterative"]] = None, - control_genes: Optional[ - Union[np.ndarray, List[str], List[int], pd.Index] - ] = None, + fit_type: Literal["ratio", "poscounts", "iterative"] | None = None, + control_genes: np.ndarray | list[str] | list[int] | pd.Index | None = None, ) -> None: """Fit sample-wise deseq2 normalization (size) factors. @@ -599,7 +594,7 @@ def fit_size_factors( fit_type : str The normalization method to use: "ratio", "poscounts" or "iterative". (default: ``"ratio"``). - control_genes : ndarray, list, pandas.Index, or None + control_genes : ndarray, list, or pandas.Index, optional Genes to use as control genes for size factor fitting. If None, all genes are used. (default: ``None``). """ @@ -1127,7 +1122,7 @@ def _fit_MoM_dispersions(self) -> None: ) def plot_dispersions( - self, log: bool = True, save_path: Optional[str] = None, **kwargs + self, log: bool = True, save_path: str | None = None, **kwargs ) -> None: """Plot dispersions. @@ -1139,7 +1134,7 @@ def plot_dispersions( log : bool Whether to log scale x and y axes (``default=True``). - save_path : str or None + save_path : str, optional The path where to save the plot. If left None, the plot won't be saved (``default=None``). diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index 1a25e52b..9bd67bad 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -1,6 +1,4 @@ from typing import Literal -from typing import Optional -from typing import Tuple import numpy as np import pandas as pd @@ -41,7 +39,7 @@ def __init__( self, joblib_verbosity: int = 0, batch_size: int = 128, - n_cpus: Optional[int] = None, + n_cpus: int | None = None, backend: str = "loky", ): self._joblib_verbosity = joblib_verbosity @@ -94,7 +92,7 @@ def irls( # noqa: D102 max_beta: float = 30, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", maxiter: int = 250, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: with parallel_backend(self._backend, inner_max_num_threads=1): res = Parallel( n_jobs=self.n_cpus, @@ -133,11 +131,11 @@ def alpha_mle( # noqa: D102 alpha_hat: np.ndarray, min_disp: float, max_disp: float, - prior_disp_var: Optional[float] = None, + prior_disp_var: float | None = None, cr_reg: bool = True, prior_reg: bool = False, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: with parallel_backend(self._backend, inner_max_num_threads=1): res = Parallel( n_jobs=self.n_cpus, @@ -171,10 +169,10 @@ def wald_test( # noqa: D102 ridge_factor: np.ndarray, contrast: np.ndarray, lfc_null: np.ndarray, - alt_hypothesis: Optional[ - Literal["greaterAbs", "lessAbs", "greater", "less"] - ] = None, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + alt_hypothesis: ( + Literal["greaterAbs", "lessAbs", "greater", "less"] | None + ) = None, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: num_genes = mu.shape[1] with parallel_backend(self._backend, inner_max_num_threads=1): res = Parallel( @@ -201,7 +199,7 @@ def wald_test( # noqa: D102 def dispersion_trend_gamma_glm( # noqa: D102 self, covariates: pd.Series, targets: pd.Series - ) -> Tuple[np.ndarray, np.ndarray, bool]: + ) -> tuple[np.ndarray, np.ndarray, bool]: covariates_w_intercept = covariates.to_frame() covariates_w_intercept.insert(0, "intercept", 1) covariates_fit = covariates_w_intercept.values @@ -241,7 +239,7 @@ def lfc_shrink_nbinom_glm( # noqa: D102 prior_scale: float, optimizer: str, shrink_index: int, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: with parallel_backend(self._backend, inner_max_num_threads=1): num_genes = counts.shape[1] res = Parallel( diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index ebb9111e..ccb9945e 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -1,8 +1,6 @@ import sys import time -from typing import List from typing import Literal -from typing import Optional # import anndata as ad import numpy as np @@ -57,7 +55,7 @@ class DeseqStats: lfc_null : float The (log2) log fold change under the null hypothesis. (default: ``0``). - alt_hypothesis : str or None + alt_hypothesis : str, optional The alternative hypothesis for computing wald p-values. By default, the normal Wald test assesses deviation of the estimated log fold change from the null hypothesis, as given by ``lfc_null``. @@ -81,7 +79,7 @@ class DeseqStats: lfc_null : float The (log2) log fold change under the null hypothesis. - alt_hypothesis : str or None + alt_hypothesis : str, optional The alternative hypothesis for computing wald p-values. contrast_vector : ndarray @@ -132,16 +130,16 @@ class DeseqStats: def __init__( self, dds: DeseqDataSet, - contrast: List[str] | np.ndarray, + contrast: list[str] | np.ndarray, alpha: float = 0.05, cooks_filter: bool = True, independent_filter: bool = True, - prior_LFC_var: Optional[np.ndarray] = None, + prior_LFC_var: np.ndarray | None = None, lfc_null: float = 0.0, - alt_hypothesis: Optional[ - Literal["greaterAbs", "lessAbs", "greater", "less"] - ] = None, - inference: Optional[Inference] = None, + alt_hypothesis: ( + Literal["greaterAbs", "lessAbs", "greater", "less"] | None + ) = None, + inference: Inference | None = None, quiet: bool = False, ) -> None: assert ( @@ -436,7 +434,7 @@ def lfc_shrink(self, coeff: str, adapt: bool = True) -> None: if not self.quiet: print(self.results_df) - def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs): + def plot_MA(self, log: bool = True, save_path: str | None = None, **kwargs): """ Create an log ratio (M)-average (A) plot using matplotlib. @@ -449,7 +447,7 @@ def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs): log : bool Whether or not to log scale x and y axes (``default=True``). - save_path : str or None + save_path : str, optional The path where to save the plot. If left None, the plot won't be saved (``default=None``). diff --git a/pydeseq2/grid_search.py b/pydeseq2/grid_search.py index e1be50ee..a8f1e77b 100644 --- a/pydeseq2/grid_search.py +++ b/pydeseq2/grid_search.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np from scipy.special import gammaln # type: ignore @@ -58,7 +56,7 @@ def grid_fit_alpha( alpha_hat: float, min_disp: float, max_disp: float, - prior_disp_var: Optional[float] = None, + prior_disp_var: float | None = None, cr_reg: bool = True, prior_reg: bool = False, grid_length: int = 100, @@ -87,7 +85,7 @@ def grid_fit_alpha( max_disp : float Upper threshold for dispersion parameters. - prior_disp_var : float + prior_disp_var : float, optional Prior dispersion variance. cr_reg : bool diff --git a/pydeseq2/inference.py b/pydeseq2/inference.py index 801a86c4..40bff73b 100644 --- a/pydeseq2/inference.py +++ b/pydeseq2/inference.py @@ -1,8 +1,6 @@ from abc import ABC from abc import abstractmethod from typing import Literal -from typing import Optional -from typing import Tuple import numpy as np import pandas as pd @@ -57,7 +55,7 @@ def irls( max_beta: float = 30, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", maxiter: int = 250, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: r"""Fit a NB GLM wit log-link to predict counts from the design matrix. See equations (1-2) in the DESeq2 paper. @@ -128,11 +126,11 @@ def alpha_mle( alpha_hat: np.ndarray, min_disp: float, max_disp: float, - prior_disp_var: Optional[float] = None, + prior_disp_var: float | None = None, cr_reg: bool = True, prior_reg: bool = False, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: """Estimate the dispersion parameter of a negative binomial GLM. Parameters @@ -155,7 +153,7 @@ def alpha_mle( max_disp : float Upper threshold for dispersion parameters. - prior_disp_var : float + prior_disp_var : float, optional Prior dispersion variance. cr_reg : bool @@ -188,10 +186,10 @@ def wald_test( ridge_factor: np.ndarray, contrast: np.ndarray, lfc_null: np.ndarray, - alt_hypothesis: Optional[ - Literal["greaterAbs", "lessAbs", "greater", "less"] - ] = None, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + alt_hypothesis: ( + Literal["greaterAbs", "lessAbs", "greater", "less"] | None + ) = None, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Run Wald test for differential expression. Computes Wald statistics, standard error and p-values from @@ -285,7 +283,7 @@ def fit_moments_dispersions( @abstractmethod def dispersion_trend_gamma_glm( self, covariates: pd.Series, targets: pd.Series - ) -> Tuple[np.ndarray, np.ndarray, bool]: + ) -> tuple[np.ndarray, np.ndarray, bool]: """Fit a gamma glm on gene dispersions. The intercept should be concatenated in this method @@ -319,7 +317,7 @@ def lfc_shrink_nbinom_glm( prior_scale: float, optimizer: str, shrink_index: int, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Fit a negative binomial MAP LFC using an apeGLM prior. Only the LFC is shrinked, and not the intercept. diff --git a/pydeseq2/preprocessing.py b/pydeseq2/preprocessing.py index 7704526a..0c0a5a34 100644 --- a/pydeseq2/preprocessing.py +++ b/pydeseq2/preprocessing.py @@ -1,13 +1,10 @@ -from typing import Tuple -from typing import Union - import numpy as np import pandas as pd def deseq2_norm( - counts: Union[pd.DataFrame, np.ndarray] -) -> Tuple[Union[pd.DataFrame, np.ndarray], Union[pd.DataFrame, np.ndarray]]: + counts: pd.DataFrame | np.ndarray, +) -> tuple[pd.DataFrame | np.ndarray, pd.DataFrame | np.ndarray]: """Return normalized counts and size_factors. Uses the median of ratios method. @@ -31,9 +28,7 @@ def deseq2_norm( return deseq2_counts, size_factors -def deseq2_norm_fit( - counts: Union[pd.DataFrame, np.ndarray] -) -> Tuple[np.ndarray, np.ndarray]: +def deseq2_norm_fit(counts: pd.DataFrame | np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Return ``logmeans`` and ``filtered_genes``, needed in the median of ratios method. ``Logmeans`` and ``filtered_genes`` can then be used to normalize external datasets. @@ -62,10 +57,10 @@ def deseq2_norm_fit( def deseq2_norm_transform( - counts: Union[pd.DataFrame, np.ndarray], + counts: pd.DataFrame | np.ndarray, logmeans: np.ndarray, filtered_genes: np.ndarray, -) -> Tuple[Union[pd.DataFrame, np.ndarray], Union[pd.DataFrame, np.ndarray]]: +) -> tuple[pd.DataFrame | np.ndarray, pd.DataFrame | np.ndarray]: """Return normalized counts and size factors from the median of ratios method. Can be applied on external dataset, using the ``logmeans`` and ``filtered_genes`` diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index db09e90b..5b031320 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -3,9 +3,6 @@ from math import floor from pathlib import Path from typing import Literal -from typing import Optional -from typing import Tuple -from typing import Union from typing import cast import numpy as np @@ -109,7 +106,7 @@ def load_example_data( return df -def test_valid_counts(counts: Union[pd.DataFrame, np.ndarray]) -> None: +def test_valid_counts(counts: pd.DataFrame | np.ndarray) -> None: """Test that the count matrix contains valid inputs. More precisely, test that inputs are non-negative integers. @@ -138,9 +135,9 @@ def test_valid_counts(counts: Union[pd.DataFrame, np.ndarray]) -> None: def dispersion_trend( - normed_mean: Union[float, np.ndarray], - coeffs: Union["pd.Series[float]", np.ndarray], -) -> Union[float, np.ndarray]: + normed_mean: float | np.ndarray, + coeffs: pd.Series | np.ndarray, +) -> float | np.ndarray: r"""Return dispersion trend from normalized counts. :math:`a_1/ \mu + a_0`. @@ -165,8 +162,8 @@ def dispersion_trend( def nb_nll( - counts: np.ndarray, mu: np.ndarray, alpha: Union[float, np.ndarray] -) -> Union[float, np.ndarray]: + counts: np.ndarray, mu: np.ndarray, alpha: float | np.ndarray +) -> float | np.ndarray: r"""Neg log-likelihood of a negative binomial of parameters ``mu`` and ``alpha``. Mathematically, if ``counts`` is a vector of counting entries :math:`y_i` @@ -285,7 +282,7 @@ def irls_solver( max_beta: float = 30, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", maxiter: int = 250, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray, bool]: +) -> tuple[np.ndarray, np.ndarray, np.ndarray, bool]: r"""Fit a NB GLM wit log-link to predict counts from the design matrix. See equations (1-2) in the DESeq2 paper. @@ -449,11 +446,11 @@ def fit_alpha_mle( alpha_hat: float, min_disp: float, max_disp: float, - prior_disp_var: Optional[float] = None, + prior_disp_var: float | None = None, cr_reg: bool = True, prior_reg: bool = False, optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B", -) -> Tuple[float, bool]: +) -> tuple[float, bool]: """Estimate the dispersion parameter of a negative binomial GLM. Note: it is possible to pass counts, design_matrix and mu arguments in the form of @@ -568,7 +565,7 @@ def dloss(log_alpha: float) -> float: ) -def trimmed_mean(x: np.ndarray, trim: float = 0.1, **kwargs) -> Union[float, np.ndarray]: +def trimmed_mean(x: np.ndarray, trim: float = 0.1, **kwargs) -> float | np.ndarray: """Return trimmed mean. Compute the mean after trimming data of its smallest and largest quantiles. @@ -656,7 +653,7 @@ def trimfn(x: float) -> int: def trimmed_variance( x: np.ndarray, trim: float = 0.125, axis: int = 0 -) -> Union[float, np.ndarray]: +) -> float | np.ndarray: """Return trimmed variance. Compute the variance after trimming data of its smallest and largest quantiles. @@ -727,8 +724,8 @@ def wald_test( ridge_factor: np.ndarray, contrast: np.ndarray, lfc_null: float, - alt_hypothesis: Optional[Literal["greaterAbs", "lessAbs", "greater", "less"]], -) -> Tuple[float, float, float]: + alt_hypothesis: Literal["greaterAbs", "lessAbs", "greater", "less"] | None, +) -> tuple[float, float, float]: """Run Wald test for differential expression. Computes Wald statistics, standard error and p-values from @@ -757,7 +754,7 @@ def wald_test( lfc_null : float The (log2) log fold change under the null hypothesis. - alt_hypothesis : str or None + alt_hypothesis : str, optional The alternative hypothesis for computing wald p-values. Returns @@ -962,14 +959,14 @@ def robust_method_of_moments_disp( return alpha -def get_num_processes(n_cpus: Optional[int] = None) -> int: +def get_num_processes(n_cpus: int | None) -> int: """Return the number of processes to use for multiprocessing. Returns the maximum number of available cpus by default. Parameters ---------- - n_cpus : int or None + n_cpus : int, optional Desired number of cpus. If ``None``, will return the number of available cpus. (default: ``None``). @@ -998,7 +995,7 @@ def nbinomGLM( prior_scale: float, optimizer="L-BFGS-B", shrink_index: int = 1, -) -> Tuple[np.ndarray, np.ndarray, bool]: +) -> tuple[np.ndarray, np.ndarray, bool]: """Fit a negative binomial MAP LFC using an apeGLM prior. Only the LFC is shrinked, and not the intercept. @@ -1234,7 +1231,7 @@ def make_scatter( legend_labels: list, x_val: np.array, log: bool = True, - save_path: Optional[str] = None, + save_path: str | None = None, **kwargs, ) -> None: """ @@ -1256,7 +1253,7 @@ def make_scatter( log : bool Whether or not to log scale features and targets axes (``default=True``). - save_path : str or None + save_path : str, optional The path where to save the plot. If left None, the plot won't be saved (``default=None``). @@ -1303,9 +1300,9 @@ def make_MA_plot( results_df: pd.DataFrame, padj_thresh: float = 0.05, log: bool = True, - save_path: Optional[str] = None, + save_path: str | None = None, lfc_null: float = 0, - alt_hypothesis: Optional[Literal["greaterAbs", "lessAbs", "greater", "less"]] = None, + alt_hypothesis: Literal["greaterAbs", "lessAbs", "greater", "less"] | None = None, **kwargs, ) -> None: """ @@ -1326,14 +1323,14 @@ def make_MA_plot( log : bool Whether or not to log scale features and targets axes (``default=True``). - save_path : str or None + save_path : str, optional The path where to save the plot. If left None, the plot won't be saved (``default=None``). lfc_null : float The (log2) log fold change under the null hypothesis. (default: ``0``). - alt_hypothesis : str or None + alt_hypothesis : str, optional The alternative hypothesis for computing wald p-values. (default: ``None``). **kwargs