Skip to content

Commit

Permalink
Update function remove_bg_exp() using Gaussian distribution percentil…
Browse files Browse the repository at this point in the history
…es, and bump version.
  • Loading branch information
Gene233 committed Apr 11, 2024
1 parent e0df38a commit 0c5dd00
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 25 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mastR
Title: Markers Automated Screening Tool in R
Version: 1.3.4
Version: 1.3.5
Authors@R:
c(
person("Jinjin", "Chen", email = "[email protected]", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7923-5723")),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@
# mastR 1.3.4

* Update function `sig_gseaplot()` to allow more custom arguments for `enrichplot::gseaplot2()`.

# mastR 1.3.5

* Update function `remove_bg_exp()` using Gaussian distribution percentiles to replace min-max scaling as relative exppression within each sample..
5 changes: 3 additions & 2 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,9 @@ setGeneric(
#' @description Specify signatures against specific tissues or cell lines by
#' removing genes with high expression in the background.
#'
#' @param sig_data expression object, can be matrix or DGEList, as signal data
#' @param bg_data 'CCLE' or expression object as background data
#' @param sig_data log-transformed expression object, can be matrix or DGEList,
#' as signal data
#' @param bg_data 'CCLE' or log-transformed expression object as background data
#' @param markers vector, a vector of gene names, listed the gene symbols to be
#' filtered. Must be gene SYMBOLs
#' @param s_group_col vector or character, to specify the group of signal
Expand Down
21 changes: 12 additions & 9 deletions R/DE_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,21 @@ voom_fit_treat <- function(dge,
rownames(design) <- colnames(dge)

## make contrast matrix
if("contrast_mat" %in% names(list(...))) {
if ("contrast_mat" %in% names(list(...))) {
contrast.mat <- list(...)[["contrast_mat"]]
## check contrast.mat validity
stopifnot("contrast.mat must be a matrix!" = is.matrix(contrast.mat),
"contrast.mat levels/rownames don't match design matrix!" =
identical(rownames(contrast.mat), colnames(design)))
}else {
stopifnot(
"contrast.mat must be a matrix!" = is.matrix(contrast.mat),
"contrast.mat levels/rownames don't match design matrix!" =
identical(rownames(contrast.mat), colnames(design))
)
} else {
contrast.mat <- limma::makeContrasts(
contrasts = c(sprintf("%s-%s",
make.names(target_group),
make.names(setdiff(dge$samples$group, make.names(target_group)))
)),
contrasts = c(sprintf(
"%s-%s",
make.names(target_group),
make.names(setdiff(dge$samples$group, make.names(target_group)))
)),
## target_group vs all the rest respectively
## if group = TRUE, it's target_group vs Others
levels = design
Expand Down
10 changes: 6 additions & 4 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -630,10 +630,12 @@ gsea_plot_init <- function(gse, pvalue_table = FALSE, ...) {
message(ms)
p <- ggpubr::as_ggplot(grid::textGrob(ms))
} else {
pars_new <- modifyList(pars, list(x = gse[[n]],
geneSetID = seq_len(nrow(gse[[n]])),
pvalue_table = pvalue_table,
title = n))
pars_new <- modifyList(pars, list(
x = gse[[n]],
geneSetID = seq_len(nrow(gse[[n]])),
pvalue_table = pvalue_table,
title = n
))
p <- do.call(enrichplot::gseaplot2, pars_new)
p <- patchwork::wrap_elements(patchwork::wrap_plots(p, ncol = 1))
# ## add pvalue_table
Expand Down
24 changes: 19 additions & 5 deletions R/remove_bg_exp-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ setMethod(

#' Remove genes show high signal in the background expression data from markers.
#'
#' @param sig_mat expression matrix of interested signal data
#' @param bg_mat expression matrix of interested background data
#' @param sig_mat log-transformed expression matrix of interested signal data
#' @param bg_mat log-transformed expression matrix of interested background data
#' @param markers vector, a vector of gene names, listed the gene symbols to be
#' filtered. Must be gene SYMBOLs.
#' @param snr num, the cutoff of SNR to screen markers which are not or lowly
Expand Down Expand Up @@ -530,9 +530,13 @@ remove_bg_exp_mat <- function(
}
markers <- subset(markers, SYMBOL %in% m_in)

## scale data by column
bg_mat <- scale_0_1(bg_mat)
sig_mat <- scale_0_1(sig_mat)
# ## scale data by column
# bg_mat <- scale_0_1(bg_mat)
# sig_mat <- scale_0_1(sig_mat)

## transform data into Gaussian percentile by column
bg_mat <- percent_norm(bg_mat)
sig_mat <- percent_norm(sig_mat)

## select markers
## bg_mat common genes
Expand Down Expand Up @@ -621,5 +625,15 @@ scale_0_1 <- function(mat) {
return(mat)
}

# helper: transform data into percentile of normal distribution
percent_norm <- function(mat) {
mus <- colMeans(mat, na.rm = TRUE)
sds <- apply(mat, 2, sd, na.rm = TRUE) * sqrt((nrow(mat) - 1) / nrow(mat))

# mat <- pnorm(mat, mean = rep(mus, each = nrow(mat)), sd = rep(sds, each = nrow(mat)))
mat <- t(pnorm(t(mat), mean = mus, sd = sds)) # faster
return(mat)
}


utils::globalVariables(c("gene_name", "rna_expression", "SYMBOL"))
5 changes: 3 additions & 2 deletions man/remove_bg_exp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/remove_bg_exp_mat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0c5dd00

Please sign in to comment.