diff --git a/src/pyscenic/binarization.py b/src/pyscenic/binarization.py index f6529f4..b1fae45 100644 --- a/src/pyscenic/binarization.py +++ b/src/pyscenic/binarization.py @@ -12,6 +12,7 @@ from sklearn import mixture from pyscenic.diptest import diptst +from multiprocessing import Pool def derive_threshold(auc_mtx: pd.DataFrame, regulon_name: str, method: str = 'hdt') -> float: @@ -59,7 +60,7 @@ def isbimodal(data, method): method='bounded').x[0] -def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,float]]=None) -> (pd.DataFrame, pd.Series): +def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,float]]=None, num_workers=1) -> (pd.DataFrame, pd.Series): """ "Binarize" the supplied AUC matrix, i.e. decide if for each cells in the matrix a regulon is active or not based on the bimodal distribution of the AUC values for that regulon. @@ -69,7 +70,9 @@ def binarize(auc_mtx: pd.DataFrame, threshold_overides:Optional[Mapping[str,floa :return: A "binarized" dataframe and a series containing the AUC threshold used for each regulon. """ def derive_thresholds(auc_mtx): - return pd.Series(index=auc_mtx.columns, data=[derive_threshold(auc_mtx, name) for name in tqdm(auc_mtx.columns)]) + with Pool( processes=num_workers ) as p: + thrs = p.starmap( derive_threshold, [ (auc_mtx, c) for c in auc_mtx.columns ] ) + return pd.Series(index=auc_mtx.columns, data=thrs) thresholds = derive_thresholds(auc_mtx) if threshold_overides is not None: thresholds[list(threshold_overides.keys())] = list(threshold_overides.values()) diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py index d3db386..03cebbb 100644 --- a/src/pyscenic/cli/pyscenic.py +++ b/src/pyscenic/cli/pyscenic.py @@ -196,7 +196,7 @@ def aucell_command(args): if extension == '.loom': try: copyfile(args.expression_mtx_fname.name, args.output.name) - append_auc_mtx(args.output.name, auc_mtx, signatures) + append_auc_mtx(args.output.name, auc_mtx, signatures, args.num_workers) except OSError as e: LOGGER.error("Expression matrix should be provided in the loom file format.") sys.exit(1) diff --git a/src/pyscenic/cli/utils.py b/src/pyscenic/cli/utils.py index 1663360..1cc62bc 100644 --- a/src/pyscenic/cli/utils.py +++ b/src/pyscenic/cli/utils.py @@ -217,7 +217,7 @@ def compress_meta(meta): return base64.b64encode(zlib.compress(json.dumps(meta).encode('ascii'))).decode('ascii') -def append_auc_mtx(fname: str, auc_mtx: pd.DataFrame, regulons: Sequence[Type[GeneSignature]]) -> None: +def append_auc_mtx(fname: str, auc_mtx: pd.DataFrame, regulons: Sequence[Type[GeneSignature]], num_workers=1) -> None: """ Append AUC matrix to loom file. @@ -238,7 +238,7 @@ def fetch_logo(context): name2logo = {} # Binarize matrix for AUC thresholds. - _, auc_thresholds = binarize(auc_mtx) + _, auc_thresholds = binarize(auc_mtx, num_workers=1) regulon_thresholds = [{"regulon": name, "defaultThresholdValue":(threshold if isinstance(threshold, float) else threshold[0]), "defaultThresholdName": "guassian_mixture_split",