MAINT Factor out inference routines (#191)

* Create inference.py * Create default_inference.py * Update dds.py * Update ds.py * Update plot_minimal_pydeseq2_pipeline.py * Update plot_pandas_io_example.py * Update plot_step_by_step.py * don't process non-zeros in utils * Update dds.py * formatting * formatting * ignore typing * ignore typing * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * noqa * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * CI remove test_docstrings (duplicate with ruff, which runs pydocstyle) (#196) * docs: fix docs, incorporate new classes * refactor: remove pass statements from abstractmethods with docstrings * style: rename MLE_lfc variable to lowercase mle_lfc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Boris Muzellec <[email protected]> Co-authored-by: Boris MUZELLEC <[email protected]>
owkin · Nov 24, 2023 · e64c413 · e64c413
1 parent a7ede89
commit e64c413
Show file tree

Hide file tree

Showing 12 changed files with 761 additions and 222 deletions.
diff --git a/docs/source/api/docstrings/pydeseq2.default_inference.DefaultInference.rst b/docs/source/api/docstrings/pydeseq2.default_inference.DefaultInference.rst
@@ -0,0 +1,19 @@
+pydeseq2.default_inference.DefaultInference
+===========================================
+
+.. currentmodule:: pydeseq2.default_inference
+
+.. autoclass:: DefaultInference
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~DefaultInference.lin_reg_mu
+      ~DefaultInference.irls
+      ~DefaultInference.alpha_mle
+      ~DefaultInference.wald_test
+      ~DefaultInference.fit_rough_dispersions
+      ~DefaultInference.fit_moments_dispersions
+      ~DefaultInference.dispersion_trend_gamma_glm
+      ~DefaultInference.lfc_shrink_nbinom_glm
diff --git a/docs/source/api/docstrings/pydeseq2.inference.Inference.rst b/docs/source/api/docstrings/pydeseq2.inference.Inference.rst
@@ -0,0 +1,19 @@
+pydeseq2.inference.Inference
+=============================
+
+.. currentmodule:: pydeseq2.inference
+
+.. autoclass:: Inference
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~Inference.lin_reg_mu
+      ~Inference.irls
+      ~Inference.alpha_mle
+      ~Inference.wald_test
+      ~Inference.fit_rough_dispersions
+      ~Inference.fit_moments_dispersions
+      ~Inference.dispersion_trend_gamma_glm
+      ~Inference.lfc_shrink_nbinom_glm
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
@@ -11,6 +11,8 @@ PyDESeq2
 
     ~dds.DeseqDataSet
     ~ds.DeseqStats
+    ~inference.Inference
+    ~default_inference.DefaultInference
     ~utils
     ~grid_search
     ~preprocessing

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -113,6 +113,9 @@
 
 napoleon_type_aliases = {
     "DeseqDataSet": ":class:`DeseqDataSet <pydeseq2.dds.DeseqDataSet>`",
+    "Inference": ":class:`Inference <pydeseq2.inference.Inference>`",
+    "DefaultInference": ":class:`DefaultInference "
+    "<pydeseq2.default_inference.DefaultInference>`",
 }
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/examples/plot_minimal_pydeseq2_pipeline.py b/examples/plot_minimal_pydeseq2_pipeline.py
@@ -18,6 +18,7 @@
 import pickle as pkl
 
 from pydeseq2.dds import DeseqDataSet
+from pydeseq2.default_inference import DefaultInference
 from pydeseq2.ds import DeseqStats
 from pydeseq2.utils import load_example_data
 
@@ -129,12 +130,13 @@
 # log-fold change (LFC) parameters from the data, and stores them.
 #
 
+inference = DefaultInference(n_cpus=8)
 dds = DeseqDataSet(
     counts=counts_df,
     metadata=metadata,
     design_factors="condition",
     refit_cooks=True,
-    n_cpus=8,
+    inference=inference,
 )
 
 # %%
@@ -217,7 +219,7 @@
 # should be a *fitted* :class:`DeseqDataSet <pydeseq2.dds.DeseqDataSet>`
 # object.
 
-stat_res = DeseqStats(dds, n_cpus=8)
+stat_res = DeseqStats(dds, inference=inference)
 
 # %%
 # It also has a set of optional keyword arguments (see the :doc:`API documentation
@@ -319,7 +321,7 @@
     metadata=metadata,
     design_factors=["group", "condition"],
     refit_cooks=True,
-    n_cpus=8,
+    inference=inference,
 )
 # %%
 # .. note::
@@ -354,7 +356,7 @@
 # ``contrast=["condition", "B", "A"]``.
 #
 
-stat_res_B_vs_A = DeseqStats(dds, contrast=["condition", "B", "A"], n_cpus=8)
+stat_res_B_vs_A = DeseqStats(dds, contrast=["condition", "B", "A"], inference=inference)
 
 # %%
 # .. note::
@@ -381,7 +383,7 @@
 # :class:`DeseqDataSet <pydeseq2.dds.DeseqDataSet>`
 # with ``contrast=["group", "Y", "X"]``, and run the analysis again.
 
-stat_res_Y_vs_X = DeseqStats(dds, contrast=["group", "Y", "X"], n_cpus=8)
+stat_res_Y_vs_X = DeseqStats(dds, contrast=["group", "Y", "X"], inference=inference)
 stat_res_Y_vs_X.summary()
 
 # %%

diff --git a/examples/plot_pandas_io_example.py b/examples/plot_pandas_io_example.py
@@ -23,6 +23,7 @@
 import pandas as pd
 
 from pydeseq2.dds import DeseqDataSet
+from pydeseq2.default_inference import DefaultInference
 from pydeseq2.ds import DeseqStats
 
 # Replace this with the path to directory where you would like results to be saved
@@ -121,12 +122,13 @@
 # object from the count and metadata data that were just loaded.
 #
 
+inference = DefaultInference(n_cpus=8)
 dds = DeseqDataSet(
     counts=counts_df,
     metadata=metadata,
     design_factors="condition",
     refit_cooks=True,
-    n_cpus=8,
+    inference=inference,
 )
 
 # %%
@@ -195,7 +197,7 @@
 # compute p-values and adjusted p-values for differential expresion. This is the role of
 # the :class:`DeseqStats <ds.DeseqStats>` class.
 
-stat_res = DeseqStats(dds, n_cpus=8)
+stat_res = DeseqStats(dds, inference=inference)
 
 # %%
 # PyDESeq2 computes p-values using Wald tests. This can be done using the

diff --git a/examples/plot_step_by_step.py b/examples/plot_step_by_step.py
@@ -22,6 +22,7 @@
 import pickle as pkl
 
 from pydeseq2.dds import DeseqDataSet
+from pydeseq2.default_inference import DefaultInference
 from pydeseq2.ds import DeseqStats
 from pydeseq2.utils import load_example_data
 
@@ -87,13 +88,14 @@
 #   in the case of the provided synthetic data, there won't be any Cooks
 #   outliers.
 
+inference = DefaultInference(n_cpus=8)
 dds = DeseqDataSet(
     counts=counts_df,
     metadata=metadata,
     design_factors="condition",  # compare samples based on the "condition"
     # column ("B" vs "A")
     refit_cooks=True,
-    n_cpus=8,
+    inference=inference,
 )
 
 # %%