Skip to content

Commit

Permalink
Added geneset_plot_multi in ov.bulk to visualize the multi result…
Browse files Browse the repository at this point in the history
…s of enrichment. #103
  • Loading branch information
Starlitnightly committed Jul 16, 2024
1 parent f8d8006 commit 6f3f4cb
Show file tree
Hide file tree
Showing 7 changed files with 649 additions and 270 deletions.
83 changes: 83 additions & 0 deletions omicverse/bulk/_Enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,89 @@ def geneset_enrichment_GSEA(gene_rnk:pd.DataFrame,pathways_dict:dict,
enrich_res['P-value']=enrich_res['fdr']
return enrich_res

def geneset_plot_multi(enr_dict,colors_dict,num:int=5,fontsize=10,
fig_title:str='',fig_xlabel:str='Fractions of genes',
figsize:tuple=(2,4),cmap:str='YlGnBu',
text_knock:int=5,text_maxsize:int=20,ax=None,
):
"""
Enrichment multi genesets analysis using GSEA
Arguments:
enr_dict: A dictionary of enrichment results.
colors_dict: A dictionary of colors for each gene set.
num: The number of enriched terms to plot. Default is 5.
fontsize: The fontsize of the plot. Default is 10.
fig_title: The title of the plot. Default is an empty string.
fig_xlabel: The label of the x-axis. Default is 'Fractions of genes'.
figsize: The size of the plot. Default is (2,4).
cmap: The colormap to use for the plot. Default is 'YlGnBu'.
text_knock: The number of characters to knock off the end of the term name. Default is 5.
text_maxsize: The maximum fontsize of the term names. Default is 20.
ax: A matplotlib.axes.Axes object.
"""
from PyComplexHeatmap import HeatmapAnnotation,DotClustermapPlotter,anno_label,anno_simple,AnnotationBase
for key in enr_dict.keys():
enr_dict[key]['Type']=key
enr_all=pd.concat([enr_dict[i].iloc[:num] for i in enr_dict.keys()],axis=0)
enr_all['Term']=[ov.utils.plot_text_set(i.split('(')[0],text_knock=text_knock,text_maxsize=text_maxsize) for i in enr_all.Term.tolist()]
enr_all.index=enr_all.Term
enr_all['Term1']=[i for i in enr_all.index.tolist()]
del enr_all['Term']

colors=colors_dict

left_ha = HeatmapAnnotation(
label=anno_label(enr_all.Type, merge=True,rotation=0,colors=colors,relpos=(1,0.8)),
Category=anno_simple(enr_all.Type,cmap='Set1',
add_text=False,legend=False,colors=colors),
axis=0,verbose=0,label_kws={'rotation':45,'horizontalalignment':'left','visible':False})
right_ha = HeatmapAnnotation(
label=anno_label(enr_all.Term1, merge=True,rotation=0,relpos=(0,0.5),arrowprops=dict(visible=True),
colors=enr_all.assign(color=enr_all.Type.map(colors)).set_index('Term1').color.to_dict(),
fontsize=fontsize,luminance=0.8,height=2),
axis=0,verbose=0,#label_kws={'rotation':45,'horizontalalignment':'left'},
orientation='right')
if ax==None:
fig, ax = plt.subplots(figsize=figsize)
else:
ax=ax
#plt.figure(figsize=figsize)
cm = DotClustermapPlotter(data=enr_all, x='fraction',y='Term1',value='logp',c='logp',s='num',
cmap=cmap,
row_cluster=True,#col_cluster=True,#hue='Group',
#cmap={'Group1':'Greens','Group2':'OrRd'},
vmin=-1*np.log10(0.1),vmax=-1*np.log10(1e-10),
#colors={'Group1':'yellowgreen','Group2':'orange'},
#marker={'Group1':'*','Group2':'$\\ast$'},
show_rownames=True,show_colnames=False,row_dendrogram=False,
col_names_side='top',row_names_side='right',
xticklabels_kws={'labelrotation': 30, 'labelcolor': 'blue','labelsize':fontsize},
#yticklabels_kws={'labelsize':10},
#top_annotation=col_ha,left_annotation=left_ha,right_annotation=right_ha,
left_annotation=left_ha,right_annotation=right_ha,
spines=False,
row_split=enr_all.Type,# row_split_gap=1,
#col_split=df_col.Group,col_split_gap=0.5,
verbose=1,legend_gap=10,
#dot_legend_marker='*',
xlabel='Fractions of genes',xlabel_side="bottom",
xlabel_kws=dict(labelpad=8,fontweight='normal',fontsize=fontsize+2),
# xlabel_bbox_kws=dict(facecolor=facecolor)
)
tesr=plt.gcf().axes
for ax in plt.gcf().axes:
if hasattr(ax, 'get_xlabel'):
if ax.get_xlabel() == 'Fractions of genes': # 假设 colorbar 有一个特定的标签
cbar = ax
cbar.grid(False)
if ax.get_ylabel() == 'logp': # 假设 colorbar 有一个特定的标签
cbar = ax
cbar.tick_params(labelsize=fontsize+2)
cbar.set_ylabel(r'$−Log_{10}(P_{adjusted})$',fontsize=fontsize+2)
cbar.grid(False)
return ax

def geneset_plot(enrich_res,num:int=10,node_size:list=[5,10,15],
cax_loc:list=[2, 0.55, 0.5, 0.02],cax_fontsize:int=12,
Expand Down
2 changes: 1 addition & 1 deletion omicverse/bulk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#from Pyomic.bulk.Gene_module import pywgcna

from ._Gene_module import pyWGCNA
from ._Enrichment import pyGSEA,pyGSE,geneset_enrichment,geneset_plot,geneset_enrichment_GSEA
from ._Enrichment import pyGSEA,pyGSE,geneset_enrichment,geneset_plot,geneset_enrichment_GSEA,geneset_plot_multi
from ._network import pyPPI,string_interaction,string_map,generate_G
from ._chm13 import get_chm13_gene,find_chm13_gene
from ._Deseq2 import pyDEG,deseq2_normalize,estimateSizeFactors,estimateDispersions,Matrix_ID_mapping,data_drop_duplicates_index
Expand Down
26 changes: 23 additions & 3 deletions omicverse/pp/_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,11 +780,31 @@ def leiden(adata, **kwargs):
rsc.tl.leiden(adata, **kwargs)


def score_genes_cell_cycle(adata,s_genes=None, g2m_genes=None):
def score_genes_cell_cycle(adata,species='human',s_genes=None, g2m_genes=None):
"""
Score cell cycle .
Arguments:
adata: Annotated data matrix with rows for cells and columns for genes.
species: The species of the data. It can be either 'human' or 'mouse'.
s_genes: The list of genes that are specific to the S phase of the cell cycle.
g2m_genes: The list of genes that are specific to the G2/M phase of the cell cycle.
"""
if s_genes==None:
s_genes=['MCM5', 'PCNA', 'TYMS', 'FEN1', 'MCM2', 'MCM4', 'RRM1', 'UNG', 'GINS2', 'MCM6', 'CDCA7', 'DTL', 'PRIM1', 'UHRF1', 'MLF1IP', 'HELLS', 'RFC2', 'RPA2', 'NASP', 'RAD51AP1', 'GMNN', 'WDR76', 'SLBP', 'CCNE2', 'UBR7', 'POLD3', 'MSH2', 'ATAD2', 'RAD51', 'RRM2', 'CDC45', 'CDC6', 'EXO1', 'TIPIN', 'DSCC1', 'BLM', 'CASP8AP2', 'USP1', 'CLSPN', 'POLA1', 'CHAF1B', 'BRIP1', 'E2F8']
if species=='human':
s_genes=['MCM5', 'PCNA', 'TYMS', 'FEN1', 'MCM2', 'MCM4', 'RRM1', 'UNG', 'GINS2', 'MCM6', 'CDCA7', 'DTL', 'PRIM1', 'UHRF1', 'MLF1IP', 'HELLS', 'RFC2', 'RPA2', 'NASP', 'RAD51AP1', 'GMNN', 'WDR76', 'SLBP', 'CCNE2', 'UBR7', 'POLD3', 'MSH2', 'ATAD2', 'RAD51', 'RRM2', 'CDC45', 'CDC6', 'EXO1', 'TIPIN', 'DSCC1', 'BLM', 'CASP8AP2', 'USP1', 'CLSPN', 'POLA1', 'CHAF1B', 'BRIP1', 'E2F8']
elif species=='mouse':
s_genes=['Cdca7', 'Mcm4', 'Mcm7', 'Rfc2', 'Ung', 'Mcm6', 'Rrm1', 'Slbp', 'Pcna', 'Atad2', 'Tipin', 'Mcm5', 'Uhrf1', 'Polr1b', 'Dtl', 'Prim1', 'Fen1', 'Hells', 'Gmnn', 'Pold3', 'Nasp', 'Chaf1b', 'Gins2', 'Pola1', 'Msh2', 'Casp8ap2', 'Cdc6', 'Ubr7', 'Ccne2', 'Wdr76', 'Tyms', 'Cdc45', 'Clspn', 'Rrm2', 'Dscc1', 'Rad51', 'Usp1', 'Exo1', 'Blm', 'Rad51ap1', 'Cenpu', 'E2f8', 'Mrpl36']
else:
s_genes=s_genes
if g2m_genes==None:
g2m_genes=['HMGB2', 'CDK1', 'NUSAP1', 'UBE2C', 'BIRC5', 'TPX2', 'TOP2A', 'NDC80', 'CKS2', 'NUF2', 'CKS1B', 'MKI67', 'TMPO', 'CENPF', 'TACC3', 'FAM64A', 'SMC4', 'CCNB2', 'CKAP2L', 'CKAP2', 'AURKB', 'BUB1', 'KIF11', 'ANP32E', 'TUBB4B', 'GTSE1', 'KIF20B', 'HJURP', 'CDCA3', 'HN1', 'CDC20', 'TTK', 'CDC25C', 'KIF2C', 'RANGAP1', 'NCAPD2', 'DLGAP5', 'CDCA2', 'CDCA8', 'ECT2', 'KIF23', 'HMMR', 'AURKA', 'PSRC1', 'ANLN', 'LBR', 'CKAP5', 'CENPE', 'CTCF', 'NEK2', 'G2E3', 'GAS2L3', 'CBX5', 'CENPA']
if species=='human':
g2m_genes=['HMGB2', 'CDK1', 'NUSAP1', 'UBE2C', 'BIRC5', 'TPX2', 'TOP2A', 'NDC80', 'CKS2', 'NUF2', 'CKS1B', 'MKI67', 'TMPO', 'CENPF', 'TACC3', 'FAM64A', 'SMC4', 'CCNB2', 'CKAP2L', 'CKAP2', 'AURKB', 'BUB1', 'KIF11', 'ANP32E', 'TUBB4B', 'GTSE1', 'KIF20B', 'HJURP', 'CDCA3', 'HN1', 'CDC20', 'TTK', 'CDC25C', 'KIF2C', 'RANGAP1', 'NCAPD2', 'DLGAP5', 'CDCA2', 'CDCA8', 'ECT2', 'KIF23', 'HMMR', 'AURKA', 'PSRC1', 'ANLN', 'LBR', 'CKAP5', 'CENPE', 'CTCF', 'NEK2', 'G2E3', 'GAS2L3', 'CBX5', 'CENPA']
elif species=='mouse':
g2m_genes=['Cbx5', 'Aurkb', 'Cks1b', 'Cks2', 'Jpt1', 'Hmgb2', 'Anp32e', 'Lbr', 'Tmpo', 'Top2a', 'Tacc3', 'Tubb4b', 'Ncapd2', 'Rangap1', 'Cdk1', 'Smc4', 'Kif20b', 'Cdca8', 'Ckap2', 'Ndc80', 'Dlgap5', 'Hjurp', 'Ckap5', 'Bub1', 'Ckap2l', 'Ect2', 'Kif11', 'Birc5', 'Cdca2', 'Nuf2', 'Cdca3', 'Nusap1', 'Ttk', 'Aurka', 'Mki67', 'Pimreg', 'Ccnb2', 'Tpx2', 'Hjurp', 'Anln', 'Kif2c', 'Cenpe', 'Gtse1', 'Kif23', 'Cdc20', 'Ube2c', 'Cenpf', 'Cenpa', 'Hmmr', 'Ctcf', 'Psrc1', 'Cdc25c', 'Nek2', 'Gas2l3', 'G2e3']
else:
g2m_genes=g2m_genes
sc.tl.score_genes_cell_cycle(adata,s_genes=s_genes, g2m_genes=g2m_genes)


Expand Down
8 changes: 4 additions & 4 deletions omicverse/pp/_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,13 @@ def qc_cpu(adata:anndata.AnnData, mode='seurat',
adata.obs['sccomposite_doublet']=0
adata.obs['sccomposite_consistency']=0
if batch_key is None:
from _sccomposite import composite_rna
from ._sccomposite import composite_rna
multiplet_classification, consistency = composite_rna(adata)
adata.obs['sccomposite_doublet']=multiplet_classification
adata.obs['sccomposite_consistency']=consistency
else:
for batch in adata.obs[batch_key].unique():
from _sccomposite import composite_rna
from ._sccomposite import composite_rna
adata_batch=adata[adata.obs[batch_key]==batch]
multiplet_classification, consistency = composite_rna(adata_batch)
adata.obs.loc[adata_batch.obs.index,'sccomposite_doublet']=multiplet_classification
Expand Down Expand Up @@ -381,13 +381,13 @@ def qc_gpu(adata, mode='seurat',
adata.obs['sccomposite_doublet']=0
adata.obs['sccomposite_consistency']=0
if batch_key is None:
from _sccomposite import composite_rna
from ._sccomposite import composite_rna
multiplet_classification, consistency = composite_rna(adata)
adata.obs['sccomposite_doublet']=multiplet_classification
adata.obs['sccomposite_consistency']=consistency
else:
for batch in adata.obs[batch_key].unique():
from _sccomposite import composite_rna
from ._sccomposite import composite_rna
adata_batch=adata[adata.obs[batch_key]==batch]
multiplet_classification, consistency = composite_rna(adata_batch)
adata.obs.loc[adata_batch.obs.index,'sccomposite_doublet']=multiplet_classification
Expand Down
2 changes: 2 additions & 0 deletions omicverse_guide/docs/Release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Support Raw Windows platform
- Fixed an error of `pyTCGA.survival_analysis` when the matrix is sparse. #62, #68, #95
- Added tqdm to visualize the process of `pyTCGA.survial_analysis_all`
- Fixed an error of `data_drop_duplicates_index` with remove duplicate indexes to retain only the highest expressed genes #45
- Added `geneset_plot_multi` in `ov.bulk` to visualize the multi results of enrichment. #103

### Single Module

Expand All @@ -412,3 +413,4 @@ Support Raw Windows platform
### PP Module
- Fixed an error of `ov.pp.pca` when pcs smaller than 13. #102
- Added `COMPOSITE` in `ov.pp.qc`'s method to predicted doublet cells. #103
- Added `species` argument in `score_genes_cell_cycle` to calculate the cell phase without gene manual input
Loading

0 comments on commit 6f3f4cb

Please sign in to comment.