Skip to content

Commit

Permalink
Merge pull request #54 from ZhimingYe/DotPlot_supports_Data_frame
Browse files Browse the repository at this point in the history
Dot plot supports data frame
  • Loading branch information
ZhimingYe authored Dec 19, 2024
2 parents efb06a3 + 14bc04e commit 032d579
Show file tree
Hide file tree
Showing 10 changed files with 195 additions and 92 deletions.
10 changes: 3 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: EnrichGT
Title: Parse and cluster biological enrichment result to great tables
Version: 0.5.4
Version: 0.5.8
Author: Zhiming Ye
Maintainer: Zhiming Ye <[email protected]>
Description: Parse and cluster biological enrichment result to great
Expand Down Expand Up @@ -39,12 +39,8 @@ Imports:
text2vec,
tibble,
umap,
utils
Suggests:
org.Hs.eg.db,
readr,
testthat (>= 3.0.0),
withr
utils,
xfun
LinkingTo:
Rcpp
Config/testthat/edition: 3
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(convert_annotations_genes)
export(database_CollecTRI_human)
export(database_CollecTRI_mouse)
export(database_GO_ALL)
Expand Down Expand Up @@ -53,4 +54,5 @@ importFrom(text2vec,itoken)
importFrom(text2vec,vocab_vectorizer)
importFrom(umap,umap)
importFrom(utils,stack)
importFrom(xfun,md5)
useDynLib(EnrichGT)
53 changes: 8 additions & 45 deletions R/egt_lib.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,52 +121,15 @@ is_numeric_string <- function(x) {
}
}


#' @importFrom umap umap
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 theme_classic
#' @importFrom ggrepel geom_text_repel
.egtUMAP <- function(x){
mat<-x@document_term_matrix
umap_result <- umap::umap(mat)
umap_df <- data.frame(ID=rownames(umap_result[["layout"]]),
UMAP1 = umap_result$layout[, 1],
UMAP2 = umap_result$layout[, 2])
udf<-x@enriched_result |> left_join(umap_df,by="ID")
fig<-ggplot(udf, aes(x = UMAP1, y = UMAP2, color = Cluster)) +
geom_point(size = 2) +
geom_text_repel(aes(label = Description),
size = 3,
max.overlaps = 20,
box.padding = 0.3,
point.padding = 0.2) +
labs(title = "Enrichment Results",
x = "UMAP1", y = "UMAP2") +
theme_classic()
return(fig)
}

cocol <- function(n,favor=1,returnColor=F) {
if(favor == 3){
colorSpace <- c('#E41A1C','#377EB8','#4DAF4A','#984EA3','#F29403','#F781BF','#BC9DCC','#A65628','#54B0E4','#222F75','#1B9E77','#B2DF8A',
'#E3BE00','#FB9A99','#E7298A','#910241','#00CDD1','#A6CEE3','#CE1261','#5E4FA2','#8CA77B','#00441B','#DEDC00','#DCF0B9','#8DD3C7','#999999')
}else if(favor == 2){
colorSpace<-c ("#7ca7ae","#a3b3c9","#788ab2","#edbacd","#687050","#b8c0a8","#908088","#e1b19e","#7fc4da","#e8dff4","#b7988f","#c59d17","#92a761","#75aa7a","#efdfbb","#fabb6e","#fc8002","#addb88","#369f2d","#fac7b3","#ee4431","#b9181a","#cedfef","#92c2dd","#4995c6","#1663a9","#bab4d5","#614099","#45aab4","#038db2","#f9637c","#fe7966","#fff4de","#81d0bb","#a5b8f3","#feaac2","#66C2A5","#8DA0CB","#E78AC3","#A6D854","#FFD92F","#E5C494","#B3B3B3")
}else if(favor == 1){
colorSpace<-c('#0ca9ce', '#78cfe5', '#c6ecf1', '#ff6f81', '#ff9c8f', '#ffc2c0','#d386bf','#cdb1d2', '#fae6f0', '#eb6fa6', '#ff88b5', '#00b1a5',"#ffa68f","#ffca75","#b8d8c9","#97bc83","#009f93","#448c99","#db888e","#e397a4","#ead0c7", "#8f9898","#bfcfcb")
}
if(!returnColor){
if (n <= length(colorSpace)) {
colors <- colorSpace[1:n]
} else {
colors <- grDevices::colorRampPalette(colorSpace)(n)
}
message_egt<-function(x,Type=0){
if(Type==0){
cli_alert_info(x)
}
else{
colors<-colorSpace
if(Type==1){
cli_alert_danger(x)
}
return(colors)
}

nsimp<-function(){

}
146 changes: 125 additions & 21 deletions R/figures.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#' Visualize results generated form `EnrichGT()` using simple plot
#' Visualize enrichment results using simple plot
#'
#' @description
#' This plot is the most widely like `enrichplot::dotplot()`used method to visualize enriched terms. It shows the enrichment scores (e.g. p values) and gene ratio or NES as dot size and color / or bar height. Users can specify the number of terms using `ntop` or selected terms to color via the `low.col` and `hi.col`.
#' @param x an EnrichGT object
#' @param x a data frame form enriched result like `egt_enrichment_analysis()` or `egt_gsea_analysis()`, or an re-clustered `EnrichGT` object
#' @param ntop Show top N in each cluster
#' @param low.col the color for the lowest
#' @param hi.col the color for the highest
#' @param max_len_descript the label format length, default as 40.
#' @param P.adj If pass an origin data.frame from original enriched result, you can specify the P-adjust value cut off. If is null, default is 0.05. When passing `EnrichGT_obj`, this filter is previously done by `egt_recluster_analysis`.
#' @param ... Other param
#'
#' @returns a ggplot2 object
Expand Down Expand Up @@ -34,11 +35,57 @@
#' @importFrom methods is
#' @importFrom forcats fct_reorder
#' @author Zhiming Ye
egt_plot_results <- function(x,...){
if(sum(colnames(x@enriched_result)=="absNES")==0){
figure0<-ORA2dp(x,...)
}else{
figure0<-GSEA2dp(x,...)
egt_plot_results <- function(x,ntop=30,...,P.adj=NULL){
if(class(x)[1]=="EnrichGT_obj"){
if(sum(colnames(x@enriched_result)=="absNES")==0){
figure0<-ORA2dp(x,...)
}else{
figure0<-GSEA2dp(x,...)
}
}else if(is.data.frame(x)){
plotingTemp <- new.env()
tryCatch({
assign("PadjVal",P.adj,envir = plotingTemp)
if(is.null(P.adj)){stop()}
},error=function(e){
assign("PadjVal",0.05,envir = plotingTemp)
cli::cli_alert_info("Use Default P-adjust cut-off 0.05. You can pass `P.adj=xxx` arugument to filter. ")
})
if(sum(colnames(x)=="NES")!=0){
InnerDF<-x |> dplyr::filter(p.adjust<(plotingTemp$PadjVal)) |> dplyr::select(ID,Description,NES,`p.adjust`,core_enrichment) # Need Fix
obj<-InnerDF |>
dplyr::mutate(absNES=abs(NES)) |>
dplyr::mutate(Reg=ifelse(NES>0,"red","forestgreen")) |>
dplyr::mutate(Padj = signif(p.adjust, 2),absNES=signif(absNES, 4)) |>
dplyr::select(Description,ID,Reg,absNES,Padj,core_enrichment) |>
dplyr::mutate(core_enrichment=gsub("/",", ",core_enrichment)) |>
dplyr::mutate(Cluster="GSEA Results")
obj <- obj |>
dplyr::group_by(core_enrichment) |>
dplyr::arrange(desc(absNES), .by_group = TRUE) |>
dplyr::slice_head(n = 2) |>
dplyr::ungroup()
obj <- obj |>
dplyr::group_by(Reg) |>
dplyr::arrange(desc(absNES), .by_group = TRUE) |>
dplyr::slice_head(n = round(round(ntop / 2) + 1 + round(ntop/20))) |> # For balance
dplyr::ungroup() # Balance up and down
figure0<-GSEA2dp(obj,ntop=ntop,...)
}else{
InnerDF<-x |> dplyr::filter(p.adjust<(plotingTemp$PadjVal)) |> dplyr::select(ID,Description,GeneRatio,`p.adjust`,geneID,Count) # Need Fix
obj<-InnerDF |>
dplyr::mutate(PCT=sapply(InnerDF$GeneRatio,function(x)eval(parse(text = x)))*100) |>
dplyr::mutate(Padj = signif(p.adjust, 2),PCT=signif(PCT, 2)) |>
dplyr::select(Description,ID,Count,PCT,Padj,geneID) |>
dplyr::mutate(geneID=gsub("/",", ",geneID)) |>
dplyr::mutate(Cluster="ORA Results")
obj <- obj |>
dplyr::group_by(geneID) |>
dplyr::arrange(desc(Count), .by_group = TRUE) |>
dplyr::slice_head(n = 2) |>
dplyr::ungroup()
figure0<-ORA2dp(obj,ntop=ntop,...)
}
}
return(figure0)
}
Expand Down Expand Up @@ -72,53 +119,110 @@ egt_plot_umap <- function(x,...){
return(px)
}

ORA2dp<-function(x,ntop = 7,low.col="#78cfe5",hi.col="#ff6f81",max_len_descript=40,...){
if(is.list(x)){
ORA2dp<-function(x,ntop = 7,low.col="#ff6f81",hi.col="#78cfe5",max_len_descript=40,...){
if(is.list(x) & !is.data.frame(x)){
cli::cli_abort("For a list object, please run plotting for every object inside list, instead of the whole list.")
}
TempPlotingEnv <- new.env()
tryCatch({
if(dim(x@enriched_result)[1]<2 | sum(colnames(x@enriched_result)=="Count")==0){
cli::cli_abort("ERROR! ")
}else{
assign("df0",x@enriched_result,envir = TempPlotingEnv)
}
},error=function(e){
cli::cli_abort("Not EnrichGT object! Please run `EnrichGT()` first.")
assign("df0",x,envir = TempPlotingEnv)
cli::cli_alert_warning("You are drawing origin results, for better result you can re-cluster it by egt_recluster_analysis()")
})
tryCatch({
df <- x@enriched_result |>
df <- TempPlotingEnv$df0 |>
group_by(Cluster) |>
slice_min(order_by = Count, n = ntop, with_ties = FALSE) |>
slice_max(order_by = Count, n = ntop, with_ties = FALSE) |>
ungroup()
},error=function(e){
cli::cli_alert_warning("Subset ERROR! ")
df <-x@enriched_result
df <-TempPlotingEnv$df0
})
df$Description<-shorten_labels_words(df$Description,max_length = max_len_descript)
px<-ggplot(df,aes(x = PCT, y = fct_reorder(Description, PCT), size=Count, color=Padj))+geom_point()+scale_color_continuous(low=low.col, high=hi.col, name = "adjustedP",guide=guide_colorbar(reverse=F))+scale_size(range=c(2, 8))+xlab("Gene Ratio")+ylab("Gene Sets")+facet_grid(Cluster~.,scales="free",space="free_y")+theme_bw()
px<-ggplot(df,aes(x = PCT, y = fct_reorder(Description, PCT), size=Count, color=Padj))+geom_point()+scale_color_continuous(low=low.col, high=hi.col, name = "adjustedP",guide=guide_colorbar(reverse=F))+scale_size(range=c(2, 8))+xlab("Gene Ratio(%)")+ylab("Gene Sets")+facet_grid(Cluster~.,scales="free",space="free_y")+theme_bw()
return(px)
}

GSEA2dp<-function(x,ntop = 7,low.col="#78cfe5",hi.col="#ff6f81",max_len_descript=40,...){
if(is.list(x)){
GSEA2dp<-function(x,ntop = 7,low.col="#ff6f81",hi.col="#78cfe5",max_len_descript=40,...){
if(is.list(x) & !is.data.frame(x)){
cli::cli_abort("For a list object, please run plotting for every object inside list, instead of the whole list.")
}
TempPlotingEnv <- new.env()
tryCatch({
if(dim(x@enriched_result)[1]<2 | sum(colnames(x@enriched_result)=="absNES")==0){
cli::cli_abort("ERROR! ")
}else{
assign("df0",x@enriched_result,envir = TempPlotingEnv)
}
},error=function(e){
cli::cli_abort("Not EnrichGT object! Please run `EnrichGT()` first.")
assign("df0",x,envir = TempPlotingEnv)
cli::cli_alert_warning("You are drawing origin results, for better result you can re-cluster it by egt_recluster_analysis()")
})
tryCatch({
df <- x@enriched_result |>
df <- TempPlotingEnv$df0 |>
group_by(Cluster) |>
slice_min(order_by = absNES, n = ntop, with_ties = FALSE) |>
slice_max(order_by = absNES, n = ntop, with_ties = FALSE) |>
ungroup()
},error=function(e){
cli::cli_alert_warning("Subset ERROR! ")
df <-x@enriched_result
df <-TempPlotingEnv$df0
})
df$NES<-ifelse(df$Reg=="UpReg",df$absNES*(1),df$absNES*(-1))
df$NES<-ifelse((df$Reg=="UpReg"|df$Reg=="red"),df$absNES*(1),df$absNES*(-1)) # For the different input
df$Description<-shorten_labels_words(df$Description,max_length = max_len_descript)
px<-ggplot(df,aes(x = NES, y = fct_reorder(Description, absNES), fill=Padj))+geom_col()+scale_fill_continuous(low=low.col, high=hi.col, name = "adjustedP",guide=guide_colorbar(reverse=F))+scale_size(range=c(2, 8))+xlab("Normalize Enrichment Score(NES)")+ylab("Gene Sets")+facet_grid(Cluster~.,scales="free",space="free_y")+theme_bw()
return(px)
}


#' @importFrom umap umap
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 theme_classic
#' @importFrom ggrepel geom_text_repel
.egtUMAP <- function(x){
mat<-x@document_term_matrix
umap_result <- umap::umap(mat)
umap_df <- data.frame(ID=rownames(umap_result[["layout"]]),
UMAP1 = umap_result$layout[, 1],
UMAP2 = umap_result$layout[, 2])
udf<-x@enriched_result |> left_join(umap_df,by="ID")
fig<-ggplot(udf, aes(x = UMAP1, y = UMAP2, color = Cluster)) +
geom_point(size = 2) +
geom_text_repel(aes(label = Description),
size = 3,
max.overlaps = 20,
box.padding = 0.3,
point.padding = 0.2) +
labs(title = "Enrichment Results",
x = "UMAP1", y = "UMAP2") +
theme_classic()
return(fig)
}

cocol <- function(n,favor=1,returnColor=F) {
if(favor == 3){
colorSpace <- c('#E41A1C','#377EB8','#4DAF4A','#984EA3','#F29403','#F781BF','#BC9DCC','#A65628','#54B0E4','#222F75','#1B9E77','#B2DF8A',
'#E3BE00','#FB9A99','#E7298A','#910241','#00CDD1','#A6CEE3','#CE1261','#5E4FA2','#8CA77B','#00441B','#DEDC00','#DCF0B9','#8DD3C7','#999999')
}else if(favor == 2){
colorSpace<-c ("#7ca7ae","#a3b3c9","#788ab2","#edbacd","#687050","#b8c0a8","#908088","#e1b19e","#7fc4da","#e8dff4","#b7988f","#c59d17","#92a761","#75aa7a","#efdfbb","#fabb6e","#fc8002","#addb88","#369f2d","#fac7b3","#ee4431","#b9181a","#cedfef","#92c2dd","#4995c6","#1663a9","#bab4d5","#614099","#45aab4","#038db2","#f9637c","#fe7966","#fff4de","#81d0bb","#a5b8f3","#feaac2","#66C2A5","#8DA0CB","#E78AC3","#A6D854","#FFD92F","#E5C494","#B3B3B3")
}else if(favor == 1){
colorSpace<-c('#0ca9ce', '#78cfe5', '#c6ecf1', '#ff6f81', '#ff9c8f', '#ffc2c0','#d386bf','#cdb1d2', '#fae6f0', '#eb6fa6', '#ff88b5', '#00b1a5',"#ffa68f","#ffca75","#b8d8c9","#97bc83","#009f93","#448c99","#db888e","#e397a4","#ead0c7", "#8f9898","#bfcfcb")
}
if(!returnColor){
if (n <= length(colorSpace)) {
colors <- colorSpace[1:n]
} else {
colors <- grDevices::colorRampPalette(colorSpace)(n)
}
}
else{
colors<-colorSpace
}
return(colors)
}
24 changes: 24 additions & 0 deletions R/getDB.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@

db_getter_env<-new.env()

cvgs<-function(genes,from_what,to_what,orgDB){
loadNamespace("AnnotationDbi")
x <- AnnotationDbi::select(orgDB, keys =genes,
keytype = from_what, columns = c(from_what,to_what))
return(x)
}

#' Convert gene annotations from any keys to any keys
#'
#' @param genes gene vector
#' @param from_what input type (like "SYMBOL","ENTREZID","ENSEMBL","GENENAME",...), keys should be supported by AnnotationDbi. Search for the help page of AnnotationDbi for further help.
#' @param to_what output type (like "SYMBOL","ENTREZID","ENSEMBL","GENENAME",...), keys should be supported by AnnotationDbi. Search for the help page of AnnotationDbi for further help. Can be multiple items E.g. `c("ENTREZID","ENSEMBL","GENENAME")`
#' @param orgDB human = org.Hs.eg.db, mouse = org.Mm.eg.db, search BioConductor website for further help
#'
#' @returns a data.frame
#' @export
#'
#' @examples
convert_annotations_genes <- function(genes,from_what,to_what,orgDB){
assign("cvgs",cvgs,envir = db_getter_env)
x <- db_getter_env$cvgs(genes,from_what,to_what,orgDB)
return(x)
}

database_GO <- function(OrgDB,ONTOLOGY,...) {
t1 <- Sys.time()
loadNamespace("dplyr")
Expand Down
13 changes: 0 additions & 13 deletions R/messages.R

This file was deleted.

4 changes: 3 additions & 1 deletion dev/config_attachment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ dir.r: R
dir.v: vignettes
dir.t: tests
extra.suggests: ~
pkg_ignore: "clusterProfiler"
pkg_ignore:
- "clusterProfiler"
- "org.Hs.eg.db"
document: yes
normalize: yes
inside_rmd: no
Expand Down
23 changes: 23 additions & 0 deletions man/convert_annotations_genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/database_from_gmt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 032d579

Please sign in to comment.