Skip to content

Commit

Permalink
try out paired testing, for #23.
Browse files Browse the repository at this point in the history
  • Loading branch information
jakob-wirbel committed Jul 28, 2021
1 parent 93d4429 commit 04b42f3
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 45 deletions.
158 changes: 116 additions & 42 deletions R/check_associations.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' detect.lim = 1e-06, pr.cutoff = 1e-6, max.show = 50,
#' plot.type = "quantile.box",
#' panels = c("fc","auroc"), prompt = TRUE,
#' feature.type = 'filtered', verbose = 1)
#' feature.type = 'filtered', paired=NULL, verbose = 1)
#'
#'@param siamcat object of class \link{siamcat-class}
#'
Expand Down Expand Up @@ -59,7 +59,10 @@
#'
#' If \code{feature.type} is \code{"normalized"}, the normalized abundances
#' will not be log10-transformed.
#'
#'
#'@param paired character, column name of the meta-variable containing
#' information for a paired test
#'
#' @param verbose integer, control output: \code{0} for no output at all,
#' \code{1} for only information about progress and success, \code{2} for
#' normal level of information and \code{3} for full debug information,
Expand Down Expand Up @@ -117,7 +120,7 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
alpha = 0.05, mult.corr = "fdr", sort.by = "fc", detect.lim = 1e-06,
pr.cutoff = 1e-6, max.show = 50, plot.type = "quantile.box",
panels = c("fc", "auroc"), prompt=TRUE, feature.type='filtered',
verbose = 1) {
paired=NULL, verbose = 1) {

if (verbose > 1)
message("+ starting check.associations")
Expand Down Expand Up @@ -169,10 +172,50 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
}
# check label
label <- label(siamcat)
meta <- meta(siamcat)
if (label$type == 'TEST'){
stop('Can not check assocations for a',
' SIAMCAT object with TEST label! Exiting...')
}
# check paired information
if (!is.null(paired)){
if (!paired %in% colnames(meta)){
stop(paste0("Column with pairing information not present in",
" the metadata. Exiting..."))
}
# check that each entry in "paired" has two samples associated with
# a different label, filter out the rest
groups <- unique(meta[[paired]])
if (verbose > 2) message("+++ Starting with ", length(groups),
" pairings")
groups.red <- groups[vapply(groups, FUN = function(x){
temp <- label$label[rownames(meta[meta[[paired]]==x,])]
if (length(unique(temp))!=2){
return(FALSE)
} else if (length(temp)!=2){
return(FALSE)
}
return(TRUE)
}, FUN.VALUE = logical(1))]
if (length(groups.red) > 5){
if (verbose > 2) {
message("+++ Keeping ", length(groups.red),
" pairings with exactly two samples!")
}
} else {
stop(paste0("Per pairing, exactly 2 samples with different",
" label are needed! Only ", length(groups.red),
" pairing fulfill this requirement."))
}
meta.red <- meta[meta[[paired]] %in% groups.red,]
pairing.info <- meta.red[[paired]]
names(pairing.info) <- rownames(meta.red)
feat <- feat[,names(pairing.info)]
label$label <- label$label[names(pairing.info)]
} else {
pairing.info <- NULL
}

# check fn.plot
if (is.null(fn.plot)) {
message(paste0('### WARNING: Not plotting to a pdf-file.\n',
Expand Down Expand Up @@ -213,6 +256,7 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
alpha = alpha,
probs.fc = probs.fc,
take.log=ifelse(feature.type == 'normalized', FALSE, TRUE),
pairing.info = pairing.info,
verbose = verbose
)
# update siamcat
Expand All @@ -221,14 +265,14 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
assoc.param=list(detect.lim=result.list$detect.lim,
pr.cutoff=pr.cutoff, probs.fc=probs.fc,
mult.corr=mult.corr, alpha=alpha,
feature.type=feature.type))
feature.type=feature.type, paired=!is.null(paired)))
} else {
# if already existing, check parameters
old.params <- assoc_param(siamcat)
new.params <- list(detect.lim=detect.lim,
pr.cutoff=pr.cutoff, probs.fc=probs.fc,
mult.corr=mult.corr, alpha=alpha,
feature.type=feature.type)
feature.type=feature.type, paired=!is.null(paired))
check <- any(all.equal(new.params, old.params) == TRUE)
check <- all(check, nrow(associations(siamcat)) == nrow(feat))
check <- all(check,
Expand All @@ -249,6 +293,7 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
alpha = alpha,
probs.fc = probs.fc,
take.log=ifelse(feature.type == 'normalized', FALSE, TRUE),
pairing.info = pairing.info,
verbose = verbose
)
# update siamcat
Expand All @@ -257,7 +302,7 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
assoc.param=list(detect.lim=result.list$detect.lim,
pr.cutoff=pr.cutoff, probs.fc=probs.fc,
mult.corr=mult.corr, alpha=alpha,
feature.type=feature.type))
feature.type=feature.type, paired=!is.null(paired)))
}
}

Expand All @@ -268,6 +313,7 @@ check.associations <- function(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
if (is.null(temp)){
return(siamcat)
}

effect.size <- result.list$effect.size[temp$idx, , drop=FALSE]
truncated <- temp$truncated
detect.lim <- result.list$detect.lim
Expand Down Expand Up @@ -1034,7 +1080,6 @@ associations.quantile.box.plot <- function(data.mat, label, take.log=TRUE, col,
message("+ starting associations.quantile.box.plot")
pos.col <- col[2]
neg.col <- col[1]

p.label <- max(label$info)
n.label <- min(label$info)
p.idx <- which(label$label == p.label)
Expand Down Expand Up @@ -1259,7 +1304,7 @@ associations.quantile.rect.sub.plot <-
#' @keywords internal
analyse.binary.marker <- function(feat, label, detect.lim, colors,
pr.cutoff, mult.corr, alpha, max.show, sort.by, probs.fc = seq(.1, .9, .05),
take.log=TRUE, verbose = 1) {
take.log=TRUE, pairing.info=NULL, verbose = 1) {
if (verbose > 1)
message("+ starting analyse.binary.marker")
s.time <- proc.time()[3]
Expand Down Expand Up @@ -1289,39 +1334,70 @@ analyse.binary.marker <- function(feat, label, detect.lim, colors,
positive.label <- max(label$info)
negative.label <- min(label$info)

if (is.null(pairing.info)){
feat <- feat[,names(label$label)]
} else {
feat <- feat[,names(pairing.info)]
}

if (verbose)
pb <- progress_bar$new(total = nrow(feat))

effect.size <- data.frame(t(apply(feat, 1, FUN = function(x) {
# pseudo-fold change as differential quantile area
if (take.log == TRUE){
q.p <- quantile(log10(x[which(label$label == positive.label)] +
detect.lim), probs = probs.fc)
q.n <- quantile(log10(x[which(label$label == negative.label)] +
detect.lim), probs = probs.fc)
if (is.null(pairing.info)){
x.pos <- x[which(label$label==positive.label)]
x.neg <- x[which(label$label==negative.label)]
if (take.log == TRUE){
q.p <- quantile(log10(x.pos + detect.lim), probs = probs.fc)
q.n <- quantile(log10(x.neg + detect.lim), probs = probs.fc)
} else {
q.p <- quantile(x.pos, probs = probs.fc)
q.n <- quantile(x.neg, probs = probs.fc)
}
fc <- sum(q.p - q.n) / length(q.p)

# wilcoxon
p.val <- wilcox.test(x.pos, x.neg, exact = FALSE)$p.value

# AU-ROC
temp <- roc(cases = x.pos, controls=x.neg, ci = TRUE,
direction = '<')
aucs <- c(temp$ci)

# prevalence shift
temp.n <- mean(x.neg >= pr.cutoff)
temp.p <- mean(x.pos >= pr.cutoff)
pr.shift <- c(temp.p - temp.n, temp.n, temp.p)
} else {
q.p <- quantile(x[which(label$label == positive.label)],
probs = probs.fc)
q.n <- quantile(x[which(label$label == negative.label)],
probs = probs.fc)
pairing.info <- sort(pairing.info)
x.pos <- x[names(which(label$label[names(pairing.info)] ==
positive.label))]
x.neg <- x[names(which(label$label[names(pairing.info)] ==
negative.label))]
if (take.log == TRUE){
fc <- mean(log10(x.pos + detect.lim) -
log10(x.neg + detect.lim))
} else {
fc <- mean(x.pos-x.neg)
}
p.val <- wilcox.test(x.pos, x.neg, exact=FALSE,
paired=TRUE)$p.value
# AU-ROC
temp <- roc(cases = x.pos,
controls = x.neg,
ci = TRUE,
direction = '<')
aucs <- c(temp$ci)

# prevalence shift
temp.n <- mean(x.neg >= pr.cutoff)
temp.p <- mean(x.pos >= pr.cutoff)
pr.shift <- c(temp.p - temp.n, temp.n, temp.p)

}
fc <- sum(q.p - q.n) / length(q.p)

# wilcoxon
p.val <- wilcox.test(x[which(label$label == negative.label)],
x[which(label$label == positive.label)], exact = FALSE)$p.value

# AU-ROC
temp <- roc(predictor = x, response = label$label, ci = TRUE,
direction = '<', levels = label$info)
aucs <- c(temp$ci)

# prevalence shift
temp.n <- sum(x[which(label$label == negative.label)] >= pr.cutoff) /
length(which(label$label == negative.label))
temp.p <- sum(x[which(label$label == positive.label)] >= pr.cutoff) /
length(which(label$label == positive.label))
pr.shift <- c(temp.p - temp.n, temp.n, temp.p)


if (verbose)
pb$tick()
return(c('fc' = fc, 'p.val' = p.val, 'auc' = aucs[2],
Expand All @@ -1335,20 +1411,18 @@ analyse.binary.marker <- function(feat, label, detect.lim, colors,
ifelse(effect.size[, 'auc'] >= 0.5, colors[2], colors[1])

### Apply multi-hypothesis testing correction
if (!tolower(mult.corr) %in% c("holm", "hochberg", "hommel", "bonferroni",
if (!mult.corr %in% c("holm", "hochberg", "hommel", "bonferroni",
"BH", "BY", "fdr", "none")) {
stop(
"! Unknown multiple testing correction method:', mult.corr,'
Stopping!\n Must of one of c('none','bonferroni', 'holm','fdr',
'bhy')"
)
stop("Unknown multiple testing correction method: '", mult.corr,
"'. Exiting!\n Must of one of c('holm', 'hochberg', 'hommel', ",
"'bonferroni', 'BH', 'BY', 'fdr', none')")
}
if (mult.corr == 'none') {
warning('WARNING: No multiple hypothesis testing performed.')
effect.size$p.adj <- effect.size$p.val
} else {
effect.size$p.adj <-
p.adjust(effect.size$p.val, method = tolower(mult.corr))
p.adjust(effect.size$p.val, method = mult.corr)
}

if (verbose > 1)
Expand Down
7 changes: 4 additions & 3 deletions R/siamcat_all_class_definitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ check.assoc <- function(object){
}
# check that assoc.param contains all entries
if (!all(names(object$assoc.param) == c('detect.lim', 'pr.cutoff',
'probs.fc', 'mult.corr', 'alpha', 'feature.type'))){
'probs.fc', 'mult.corr', 'alpha', 'feature.type', 'paired'))){
msg <- 'Association testing parameters do not contain all entries!'
errors <- c(errors, msg)
}
# check that all entries are valid and in the expected ranges
if (!all(vapply(object$assoc.param, class,
FUN.VALUE=character(1)) == c('numeric', 'numeric', 'numeric',
'character', 'numeric', 'character'))){
'character', 'numeric', 'character', 'logical'))){
msg<-'Association testing parameters do not contain the expected classes!'
errors <- c(errors, msg)
}
Expand All @@ -117,7 +117,8 @@ check.assoc <- function(object){
}
# mult.corr
if (!object$assoc.param$mult.corr %in%
c('none', 'bonferroni', 'holm', 'fdr', 'bhy')){
c("holm", "hochberg", "hommel", "bonferroni",
"BH", "BY", "fdr", "none")){
msg<-'Multiple testing correction method not valid!'
errors <- c(errors, msg)
}
Expand Down

0 comments on commit 04b42f3

Please sign in to comment.