Skip to content

Commit

Permalink
Merge pull request #90 from aertslab/binarizationMP
Browse files Browse the repository at this point in the history
Updated the binarization function to use multiprocessing
  • Loading branch information
bramvds authored Jul 28, 2019
2 parents eab4597 + f1a1846 commit 778356f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 5 deletions.
7 changes: 5 additions & 2 deletions src/pyscenic/binarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sklearn import mixture

from pyscenic.diptest import diptst
from multiprocessing import Pool


def derive_threshold(auc_mtx: pd.DataFrame, regulon_name: str, method: str = 'hdt') -> float:
Expand Down Expand Up @@ -59,7 +60,7 @@ def isbimodal(data, method):
method='bounded').x[0]


def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,float]]=None) -> (pd.DataFrame, pd.Series):
def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,float]]=None, num_workers=1) -> (pd.DataFrame, pd.Series):
"""
"Binarize" the supplied AUC matrix, i.e. decide if for each cells in the matrix a regulon is active or not based
on the bimodal distribution of the AUC values for that regulon.
Expand All @@ -69,7 +70,9 @@ def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,floa
:return: A "binarized" dataframe and a series containing the AUC threshold used for each regulon.
"""
def derive_thresholds(auc_mtx):
return pd.Series(index=auc_mtx.columns, data=[derive_threshold(auc_mtx, name) for name in tqdm(auc_mtx.columns)])
with Pool( processes=num_workers ) as p:
thrs = p.starmap( derive_threshold, [ (auc_mtx, c) for c in auc_mtx.columns ] )
return pd.Series(index=auc_mtx.columns, data=thrs)
thresholds = derive_thresholds(auc_mtx)
if threshold_overides is not None:
thresholds[list(threshold_overides.keys())] = list(threshold_overides.values())
Expand Down
2 changes: 1 addition & 1 deletion src/pyscenic/cli/pyscenic.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def aucell_command(args):
if extension == '.loom':
try:
copyfile(args.expression_mtx_fname.name, args.output.name)
append_auc_mtx(args.output.name, auc_mtx, signatures)
append_auc_mtx(args.output.name, auc_mtx, signatures, args.num_workers)
except OSError as e:
LOGGER.error("Expression matrix should be provided in the loom file format.")
sys.exit(1)
Expand Down
4 changes: 2 additions & 2 deletions src/pyscenic/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def compress_meta(meta):
return base64.b64encode(zlib.compress(json.dumps(meta).encode('ascii'))).decode('ascii')


def append_auc_mtx(fname: str, auc_mtx: pd.DataFrame, regulons: Sequence[Type[GeneSignature]]) -> None:
def append_auc_mtx(fname: str, auc_mtx: pd.DataFrame, regulons: Sequence[Type[GeneSignature]], num_workers=1) -> None:
"""
Append AUC matrix to loom file.
Expand All @@ -238,7 +238,7 @@ def fetch_logo(context):
name2logo = {}

# Binarize matrix for AUC thresholds.
_, auc_thresholds = binarize(auc_mtx)
_, auc_thresholds = binarize(auc_mtx, num_workers=1)
regulon_thresholds = [{"regulon": name,
"defaultThresholdValue":(threshold if isinstance(threshold, float) else threshold[0]),
"defaultThresholdName": "guassian_mixture_split",
Expand Down

0 comments on commit 778356f

Please sign in to comment.