From 3c09bbc6edfd1a01a521d2ab048df337572e970f Mon Sep 17 00:00:00 2001 From: bramvandesande Date: Thu, 14 Jun 2018 10:27:12 +0200 Subject: [PATCH] - Changed dependency: arboretum package was renamed to arboreto. - Set version of loompy package to 2.0.2 because setup.py in more recent versions is not compatible with python 3.5. --- README.rst | 14 +++++++------- docs/index.rst | 14 +++++++------- notebooks/pySCENIC - Full pipeline.ipynb | 4 ++-- requirements.dev.txt | 2 +- requirements.txt | 5 ++--- scripts/hpc-grnboost.py | 4 ++-- src/pyscenic/cli/pyscenic.py | 4 ++-- src/pyscenic/prune.py | 1 + 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/README.rst b/README.rst index a7294f3..543644a 100644 --- a/README.rst +++ b/README.rst @@ -14,13 +14,13 @@ in no time. The latter is achieved via the dask_ framework for distributed compu The pipeline has three steps: -1. First transcription factors (TFs) and their target genes, together defining a regulon, are derived using gene inference methods which solely rely on correlations between expression of genes across cells. The arboretum_ package is used for this step. +1. First transcription factors (TFs) and their target genes, together defining a regulon, are derived using gene inference methods which solely rely on correlations between expression of genes across cells. The arboreto_ package is used for this step. 2. These regulons are refined by pruning targets that do not have an enrichment for a corresponding motif of the TF effectively separating direct from indirect targets based on the presence of cis-regulatory footprints. 3. Finally, the original cells are differentiated and clustered on the activity of these discovered regulons. .. note:: - The most impactfull speed improvement is introduced by the arboretum_ package in step 1. This package provides an alternative to GENIE3 [3]_ called GRNBoost2. This package can be controlled from within pySCENIC. + The most impactfull speed improvement is introduced by the arboreto_ package in step 1. This package provides an alternative to GENIE3 [3]_ called GRNBoost2. This package can be controlled from within pySCENIC. .. sidebar:: **Quick Start** @@ -103,8 +103,8 @@ First we import the necessary modules and declare some constants: from dask.diagnostics import ProgressBar - from arboretum.utils import load_tf_names - from arboretum.algo import grnboost2 + from arboreto.utils import load_tf_names + from arboreto.algo import grnboost2 from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase from pyscenic.utils import modules_from_adjacencies, load_motifs @@ -172,10 +172,10 @@ Phase I: Inference of co-expression modules In the initial phase of the pySCENIC pipeline the single cell expression profiles are used to infer co-expression modules from. -Run GENIE3 or GRNBoost from arboretum_ to infer co-expression modules +Run GENIE3 or GRNBoost from arboreto_ to infer co-expression modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The arboretum package is used for this phase of the pipeline. For this notebook only a sample of 1,000 cells is used +The arboreto package is used for this phase of the pipeline. For this notebook only a sample of 1,000 cells is used for the co-expression module inference is used. .. code-block:: python @@ -321,7 +321,7 @@ References .. _distributed: https://distributed.readthedocs.io/en/latest/ .. _LCB: https://aertslab.org .. _feather: https://github.com/wesm/feather -.. _arboretum: https://arboretum.readthedocs.io +.. _arboreto: https://arboreto.readthedocs.io .. _notebooks: https://github.com/aertslab/pySCENIC/tree/master/notebooks .. _issue: https://github.com/aertslab/pySCENIC/issues/new .. _SCENIC: http://scenic.aertslab.org diff --git a/docs/index.rst b/docs/index.rst index 7282873..4043623 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,12 +14,12 @@ in no time. The latter is achieved via the dask_ framework for distributed compu The pipeline has three steps: -1. First transcription factors (TFs) and their target genes, together defining a regulon, are derived using gene inference methods which solely rely on correlations between expression of genes across cells. The arboretum_ package is used for this step. +1. First transcription factors (TFs) and their target genes, together defining a regulon, are derived using gene inference methods which solely rely on correlations between expression of genes across cells. The arboreto_ package is used for this step. 2. These regulons are refined by pruning targets that do not have an enrichment for a corresponding motif of the TF effectively separating direct from indirect targets based on the presence of cis-regulatory footprints. 3. Finally, the original cells are differentiated and clustered on the activity of these discovered regulons. .. note:: - The most impactfull speed improvement is introduced by the arboretum_ package in step 1. This package provides an alternative to GENIE3 [3]_ called GRNBoost2. This package can be controlled from within pySCENIC. + The most impactfull speed improvement is introduced by the arboreto_ package in step 1. This package provides an alternative to GENIE3 [3]_ called GRNBoost2. This package can be controlled from within pySCENIC. .. sidebar:: **Quick Start** @@ -97,8 +97,8 @@ First we import the necessary modules and declare some constants: from dask.diagnostics import ProgressBar - from arboretum.utils import load_tf_names - from arboretum.algo import grnboost2 + from arboreto.utils import load_tf_names + from arboreto.algo import grnboost2 from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase from pyscenic.utils import modules_from_adjacencies, load_motifs @@ -166,10 +166,10 @@ Phase I: Inference of co-expression modules In the initial phase of the pySCENIC pipeline the single cell expression profiles are used to infer co-expression modules from. -Run GENIE3 or GRNBoost from arboretum_ to infer co-expression modules +Run GENIE3 or GRNBoost from arboreto_ to infer co-expression modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The arboretum package is used for this phase of the pipeline. For this notebook only a sample of 1,000 cells is used +The arboreto package is used for this phase of the pipeline. For this notebook only a sample of 1,000 cells is used for the co-expression module inference is used. .. code-block:: python @@ -315,7 +315,7 @@ References .. _distributed: https://distributed.readthedocs.io/en/latest/ .. _LCB: https://aertslab.org .. _feather: https://github.com/wesm/feather -.. _arboretum: https://arboretum.readthedocs.io +.. _arboreto: https://arboreto.readthedocs.io .. _notebooks: https://github.com/aertslab/pySCENIC/tree/master/notebooks .. _issue: https://github.com/aertslab/pySCENIC/issues/new .. _SCENIC: http://scenic.aertslab.org diff --git a/notebooks/pySCENIC - Full pipeline.ipynb b/notebooks/pySCENIC - Full pipeline.ipynb index 01843f4..3f8555a 100644 --- a/notebooks/pySCENIC - Full pipeline.ipynb +++ b/notebooks/pySCENIC - Full pipeline.ipynb @@ -26,8 +26,8 @@ "import os, glob\n", "import pickle\n", "\n", - "from arboretum.utils import load_tf_names\n", - "from arboretum.algo import grnboost2\n", + "from arboreto.utils import load_tf_names\n", + "from arboreto.algo import grnboost2\n", "\n", "from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase\n", "from pyscenic.utils import modules_from_adjacencies\n", diff --git a/requirements.dev.txt b/requirements.dev.txt index 8d5fc16..69c0fa1 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,5 +1,5 @@ cytoolz -arboretum +arboreto multiprocessing_on_dill numba==0.37.0 llvmlite==0.22.0 diff --git a/requirements.txt b/requirements.txt index d574e38..a4b696d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,16 +9,15 @@ pandas>=0.20.1 cloudpickle dask>=0.17.2 distributed>=1.21.6 -arboretum pyarrow==0.8.0 feather-format -arboretum +arboreto boltons setuptools pyyaml tqdm interlap umap-learn -loompy +loompy==2.0.2 networkx matplotlib diff --git a/scripts/hpc-grnboost.py b/scripts/hpc-grnboost.py index c9d55e7..81f98eb 100644 --- a/scripts/hpc-grnboost.py +++ b/scripts/hpc-grnboost.py @@ -4,8 +4,8 @@ import os, sys, glob import datetime from configparser import ConfigParser -from arboretum.algo import grnboost2 -from arboretum.utils import load_tf_names +from arboreto.algo import grnboost2 +from arboreto.utils import load_tf_names from dask.distributed import LocalCluster, Client #from cytoolz import mapcat import logging diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py index f9e77f4..1d32841 100644 --- a/src/pyscenic/cli/pyscenic.py +++ b/src/pyscenic/cli/pyscenic.py @@ -5,8 +5,8 @@ import logging from dask.diagnostics import ProgressBar from multiprocessing import cpu_count -from arboretum.algo import grnboost2 -from arboretum.utils import load_tf_names +from arboreto.algo import grnboost2 +from arboreto.utils import load_tf_names from pyscenic.utils import load_from_yaml, modules_from_adjacencies from pyscenic.rnkdb import opendb, RankingDatabase diff --git a/src/pyscenic/prune.py b/src/pyscenic/prune.py index d5acfdd..32aaca7 100644 --- a/src/pyscenic/prune.py +++ b/src/pyscenic/prune.py @@ -248,6 +248,7 @@ def wrap(data): #def memoize(db: Type[RankingDatabase]) -> Type[RankingDatabase]: # return MemoryDecorator(db) #delayed_or_future_dbs = list(map(wrap, map(memoize, rnkdbs))) + # Check also latest Stackoverflow message: https://stackoverflow.com/questions/50795901/dask-scatter-broadcast-a-list delayed_or_future_dbs = list(map(wrap, rnkdbs)) # 3. The gene signatures: these signatures become large when chunking them, therefore chunking is overruled # when using dask.distributed.