Skip to content

Commit

Permalink
Merge pull request #531 from aertslab/v1.0a2
Browse files Browse the repository at this point in the history
V1.0a2
  • Loading branch information
SeppeDeWinter authored Jan 13, 2025
2 parents c72b098 + e63278b commit 86fa25e
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 28 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ __pycahe__/
*.py[cod]
scplus_rtd
.python-version
build/*
*.egg*
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ authors = [
]
description = "SCENIC+ is a python package to build gene regulatory networks (GRNs) using combined or separate single-cell gene expression (scRNA-seq) and single-cell chromatin accessibility (scATAC-seq) data."
readme = "README.md"
version = "1.0a1"
requires-python = ">=3.8"
version = "1.0a2"
requires-python = ">=3.8,<=3.11.8"
keywords = ["scATAC", "GRN inference", "eGRN inference"]
license = { file = "LICENCE.txt" }
classifiers = [
Expand Down
8 changes: 6 additions & 2 deletions src/scenicplus/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,8 @@ def infer_grn(
keep_only_activating: bool,
rho_threshold: float,
min_target_genes: int,
n_cpu: int):
n_cpu: int,
seed: int):
"""
Infer gene regulatory network.
Expand Down Expand Up @@ -869,6 +870,8 @@ def infer_grn(
Minimum number of target genes.
n_cpu : int
Number of parallel processes to run.
seed: int
Random seed to use.
"""
from scenicplus.grn_builder.gsea_approach import build_grn
Expand Down Expand Up @@ -906,7 +909,8 @@ def infer_grn(
min_target_genes=min_target_genes,
n_cpu=n_cpu,
merge_eRegulons=True,
disable_tqdm=False)
disable_tqdm=False,
seed=seed)

log.info("Formatting eGRN as table.")
eRegulon_metadata = _format_egrns(
Expand Down
8 changes: 7 additions & 1 deletion src/scenicplus/cli/scenicplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,8 @@ def eGRN(arg):
keep_only_activating=arg.keep_only_activating_eRegulons,
rho_threshold=arg.rho_threshold,
min_target_genes=arg.min_target_genes,
n_cpu=arg.n_cpu)
n_cpu=arg.n_cpu,
seed=arg.seed)

parser.set_defaults(func=eGRN)
# Required arguments
Expand Down Expand Up @@ -984,6 +985,11 @@ def eGRN(arg):
action="store", type=int, required=False,
default=1,
help="Number of cores to use. Default is 1.")
parser.add_argument(
"--seed", dest="seed",
action="store", type=int, required=False,
default=666,
help="Seed to use. Default is 666.")

def add_parser_for_aucell(subparser:argparse._SubParsersAction):
parser:argparse.ArgumentParser = subparser.add_parser(
Expand Down
10 changes: 7 additions & 3 deletions src/scenicplus/data_wrangling/adata_cistopic_wrangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ def process_multiome_data(
GEX_gene_metadata = GEX_anndata.var.copy(deep=True)
GEX_cell_metadata = GEX_anndata.obs.copy(deep=True).loc[
common_cells]
ACC_region_metadata = cisTopic_obj.region_data.copy(deep=True).loc[
imputed_acc_obj.feature_names]
ACC_region_metadata = pd.DataFrame(index = imputed_acc_obj.feature_names)
ACC_region_metadata = ACC_region_metadata.merge(
cisTopic_obj.region_data, left_index = True, right_index = True, how = "left")
ACC_region_metadata = ACC_region_metadata.loc[imputed_acc_obj.feature_names]
ACC_cell_metadata = cisTopic_obj.cell_data.copy(deep=True).loc[
common_cells]

Expand Down Expand Up @@ -331,7 +333,9 @@ def process_non_multiome_data(
# generate cell metadata
metadata_cell = pd.DataFrame(index=meta_cell_names_ACC,
data={key_to_group_by: [x.split(meta_cell_split)[0] for x in meta_cell_names_ACC]})
ACC_region_metadata = cisTopic_obj.region_data.copy(deep=True)
ACC_region_metadata = pd.DataFrame(index = imputed_acc_obj.feature_names)
ACC_region_metadata = ACC_region_metadata.merge(
cisTopic_obj.region_data, left_index = True, right_index = True, how = "left")
ACC_region_metadata_subset = ACC_region_metadata.loc[imputed_acc_obj.feature_names]

mudata = MuData(
Expand Down
18 changes: 4 additions & 14 deletions src/scenicplus/grn_builder/gsea.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from gseapy.algorithm import enrichment_score
from gseapy.algorithm import gsea_compute
from gseapy.plot import GSEAPlot
import numpy as np
import pandas as pd

seed = 666


def run_gsea(ranked_gene_list: pd.Series,
gene_set: list,
n_perm: int = 1000,
ascending: bool = False,
return_res: bool = False,
name: str = 'gene_set'):
name: str = 'gene_set',
seed: int = 555):
"""
Calculates gene set enrichment score (and estimates its significance) and leading edge for the gene set in the ranked gene list using gseapy prerank.
Expand All @@ -39,7 +35,7 @@ def run_gsea(ranked_gene_list: pd.Series,
gmt = {name: list(gene_set)}
gsea_results, ind, rank_ES, gs = gsea_compute(data=ranked_gene_list, n=n_perm, gmt=gmt,
weighted_score_type=1, permutation_type='gene_set', method=None,
pheno_pos='Pos', pheno_neg='Neg', classes=None, ascending=ascending)
pheno_pos='Pos', pheno_neg='Neg', classes=None, ascending=ascending, seed = seed)

# extract enrichment scores
gseale = list(gsea_results)[0]
Expand All @@ -62,13 +58,7 @@ def run_gsea(ranked_gene_list: pd.Series,
pval = gseale[2]
fdr = gseale[3]
LE = list(map(str, ranked_gene_list.iloc[ldg_pos].index))
if return_res:
res = GSEAPlot(ranked_gene_list, name, ind, nes, pval, fdr, RES,
pheno_pos='', pheno_neg='', figsize=(6, 5.5),
cmap='seismic', ofname=None)
return nes, pval, LE, res
else:
return nes, pval, LE
return nes, pval, LE


def run_enrichr(ranked_gene_list: np.array,
Expand Down
18 changes: 12 additions & 6 deletions src/scenicplus/grn_builder/gsea_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@
logging.basicConfig(level=level, format=format, handlers=handlers)
log = logging.getLogger('GSEA')


def _run_gsea_for_e_module(
e_module:eRegulon,
rnk:pd.Series,
gsea_n_perm:int,
context: frozenset):
context: frozenset,
seed: int):
"""
Helper function to run gsea for single e_module
Expand All @@ -92,11 +94,12 @@ def _run_gsea_for_e_module(
NES, pval, LE_genes = run_gsea(
ranked_gene_list=rnk,
gene_set=gene_set,
n_perm=gsea_n_perm)
n_perm=gsea_n_perm,
seed=seed)
except:
NES = np.nan
pval = np.nan
LE_genes = np.nan
LE_genes = [np.nan]
return eRegulon(
transcription_factor=TF,
cistrome_name=e_module.cistrome_name,
Expand Down Expand Up @@ -134,6 +137,7 @@ def build_grn(
n_cpu=1,
merge_eRegulons=True,
disable_tqdm=False,
seed=555,
**kwargs) -> List[eRegulon]:
log.info('Thresholding region to gene relationships')
# some tfs are missing from tf_to_gene because they are not
Expand Down Expand Up @@ -185,9 +189,10 @@ def build_grn(
e_module=e_module,
rnk=TF_to_ranking_pos[e_module.transcription_factor],
gsea_n_perm=gsea_n_perm,
context=frozenset(['positive tf2g']))
context=frozenset(['positive tf2g']),
seed=seed)
for e_module in tqdm(
e_modules,
e_modules,
total = len(e_modules),
desc="Running for Positive TF to gene")
if e_module.transcription_factor in pos_TFs)
Expand All @@ -198,7 +203,8 @@ def build_grn(
e_module=e_module,
rnk=TF_to_ranking_neg[e_module.transcription_factor],
gsea_n_perm=gsea_n_perm,
context=frozenset(['negative tf2g']))
context=frozenset(['negative tf2g']),
seed=seed)
for e_module in tqdm(
e_modules,
total = len(e_modules),
Expand Down

0 comments on commit 86fa25e

Please sign in to comment.