diff --git a/DESCRIPTION b/DESCRIPTION index 5a13fa9e9..9002911c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xcms -Version: 1.53.0 -Date: 2017-04-16 +Version: 2.99.1 +Date: 2017-05-10 Title: LC/MS and GC/MS Data Analysis Author: Colin A. Smith , Ralf Tautenhahn , @@ -67,7 +67,6 @@ Collate: 'functions-OnDiskMSnExp.R' 'functions-ProcessHistory.R' 'functions-XCMSnExp.R' - 'functions-normalization.R' 'functions-xcmsEIC.R' 'functions-xcmsFragments.R' 'functions-xcmsRaw.R' diff --git a/NAMESPACE b/NAMESPACE index f397d240d..0ac1d5175 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,7 +10,7 @@ importClassesFrom("Biobase", "AnnotatedDataFrame", "Versioned") importMethodsFrom("Biobase", "classVersion", "classVersion<-", "phenoData", "phenoData<-", "pData", "rowMedians") -importFrom("graphics", "plot", "image", "boxplot") +importFrom("graphics", "plot", "image", "boxplot", "matplot", "rect", "axis") importFrom("mzR", "peaks", "close", "openMSfile", "header") importFrom("lattice", "levelplot", "panel.rect", "panel.levelplot") importFrom("plyr", "rbind.fill") @@ -206,7 +206,10 @@ export( "do_adjustRtime_peakGroups", "processHistoryTypes", "adjustRtimePeakGroups", - "plotAdjustedRtime" + "plotAdjustedRtime", + "plotChromatogram", + "highlightChromPeaks", + "plotChromPeakDensity" ) ## New analysis methods @@ -429,5 +432,6 @@ exportMethods("hasChromPeaks", "productMz", "fillChromPeaks", "as.data.frame", - "dropFilledChromPeaks" + "dropFilledChromPeaks", + "extractMsData" ) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index ff86372d9..46380832b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -81,7 +81,8 @@ setGeneric("extraPeaks<-", function(object, value) standardGeneric("extraPeaks<-")) setGeneric("extractChromatograms", function(object, ...) standardGeneric("extractChromatograms")) - +setGeneric("extractMsData", function(object, ...) + standardGeneric("extractMsData")) ## F setGeneric("factorDiag", function(object) standardGeneric("factorDiag")) diff --git a/R/DataClasses.R b/R/DataClasses.R index 800886301..9bde64946 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -614,10 +614,11 @@ setClass("CentWaveParam", msg <- c(msg, paste0("'roiList' does not provide ", "all required fields!")) } - if (length(object@roiList) > 0 & - length(object@roiList) != length(object@roiScales)) - msg <- c(msg, paste0("'roiScales' has to have the same", - " length than 'roiList'.")) + if (length(object@roiScales) > 0) { + if (length(object@roiList) != length(object@roiScales)) + msg <- c(msg, paste0("'roiScales' has to have the same", + " length than 'roiList'.")) + } if (length(msg)) msg else @@ -1377,7 +1378,9 @@ NULL #' @seealso The \code{\link{do_groupChromPeaks_density}} core #' API function and \code{\link{group.density}} for the old user interface. #' -#' @seealso \code{\link{featureDefinitions}} and +#' @seealso \code{\link{plotChromPeakDensity}} to plot peak densities and +#' evaluate different algorithm settings. +#' \code{\link{featureDefinitions}} and #' \code{\link{featureValues,XCMSnExp-method}} for methods to access the #' features (i.e. the peak grouping results). #' @@ -2243,13 +2246,21 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' @seealso \code{\linkS4class{xcmsSet}} for the old implementation. #' \code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} #' and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. +#' #' \code{\link{findChromPeaks}} for available peak detection methods #' returning a \code{XCMSnExp} object as a result. +#' #' \code{\link{groupChromPeaks}} for available peak grouping #' methods and \code{\link{featureDefinitions}} for the method to extract #' the feature definitions representing the peak grouping results. #' \code{\link{adjustRtime}} for retention time adjustment methods. #' +#' \code{\link{extractChromatograms}} to extract MS data as +#' \code{\link{Chromatogram}} objects. +#' +#' \code{\link{extractMsData}} for the method to extract MS data as +#' \code{data.frame}s. +#' #' @rdname XCMSnExp-class #' #' @examples @@ -2415,6 +2426,8 @@ setClass("XCMSnExp", #' @seealso \code{\link{extractChromatograms}} for the method to extract #' \code{Chromatogram} objects from \code{\link{XCMSnExp}} or #' \code{\link[MSnbase]{OnDiskMSnExp}} objects. +#' +#' \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. setClass("Chromatogram", slots = c( rtime = "numeric", @@ -2430,8 +2443,8 @@ setClass("Chromatogram", prototype = prototype( rtime = numeric(), intensity = numeric(), - mz = c(0, 0), - filterMz = c(0, 0), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), precursorMz = c(NA_real_, NA_real_), productMz = c(NA_real_, NA_real_), fromFile = integer(), diff --git a/R/MPI.R b/R/MPI.R index db770866b..1d4a7b2cf 100644 --- a/R/MPI.R +++ b/R/MPI.R @@ -90,12 +90,12 @@ fillPeaksChromPar <- function(arg) { if (length(params$dataCorrection) > 1) { ## Note: dataCorrection (as set in the xcmsSet function) is either ## 1 for all or for none. - if (any(params$dataCorrection) == 1) + if (any(params$dataCorrection == 1)) lcraw <- stitch(lcraw, AutoLockMass(lcraw)) } if (exists("params$polarity") && length(params$polarity) >0) { - if (length(params$polarity) >0) { + if (length(params$polarity) > 0) { ## Retain wanted polarity only lcraws <- split(lcraw, lcraw@polarity, DROP=TRUE) lcraw <- lcraws[[params$polarity]] diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index 619f13b3a..8a8c51327 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -106,6 +106,9 @@ do_adjustRtime_peakGroups <- } else rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, nSamples, missingSample, extraPeaks) + ## Fix for issue #175 + if (length(rt) == 0) + stop("No peak groups found in the data for the provided settings") ## ## Check if we have peak groups with almost the same retention time. If yes ## ## select the best matching peaks among these. ## rtmeds <- rowMedians(rt, na.rm = TRUE) @@ -477,5 +480,9 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, ## retention time that is calculated over ALL peaks within the peak ## group, not only to one peak selected for each sample (for multi ## peak per sample assignments). - rt[order(rowMedians(rt, na.rm = TRUE)), , drop = FALSE] + ## Fix for issue #175 + if (is(rt, "matrix")) { + rt <- rt[order(rowMedians(rt, na.rm = TRUE)), , drop = FALSE] + } + rt } diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index 3d2153c81..401e50172 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -466,7 +466,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzmean <- do.call(mzCenterFun, list(mz = mz.value, intensity = mz.int)) - + ## Compute dppm only if needed dppm <- NA if (verboseColumns) { @@ -980,8 +980,13 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ## @MOD1: Remove mz values for which no intensity was ## measured. Would be better if getEIC returned NA ## if nothing was measured. + mzorig <- mz.value mz.value <- mz.value[mz.int > 0] mz.int <- mz.int[mz.int > 0] + ## Call next to avoid reporting peaks without mz + ## values (issue #165). + if (length(mz.value) == 0) + next ## cat("mz.value: ", paste0(mz.value, collapse = ", "), ## "\n") diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R index d488a534c..c118a2e57 100644 --- a/R/do_groupChromPeaks-functions.R +++ b/R/do_groupChromPeaks-functions.R @@ -114,6 +114,7 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, densFrom <- rtRange[1] - 3 * bw densTo <- rtRange[2] + 3 * bw + densN <- max(512, 2^(ceiling(log2(diff(rtRange) / (bw / 2))))) endIdx <- 0 num <- 0 gcount <- integer(nSampleGroups) @@ -125,7 +126,8 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, if (endIdx - startIdx < 0) next curMat <- peaks[startIdx:endIdx, , drop = FALSE] - den <- density(curMat[, "rt"], bw = bw, from = densFrom, to = densTo) + den <- density(curMat[, "rt"], bw = bw, from = densFrom, to = densTo, + n = densN) maxden <- max(den$y) deny <- den$y ## gmat <- matrix(nrow = 5, ncol = 2 + gcount) @@ -240,7 +242,8 @@ do_groupChromPeaks_density_par <- function(peaks, sampleGroups, sampleGroupTbl, minFr, minSmpls, sampleGroupNms, peakOrdr) { den <- density(z[, "rt"], bw = bw, from = rtr[1] - 3 * bw, - to = rtr[2] + 3 * bw) + to = rtr[2] + 3 * bw, + n = max(512, 2^(ceiling(log2(diff(rtr) / (bw / 2)))))) maxden <- max(den$y) deny <- den$y snum <- 0 diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R index 4ddf44e8e..56b467595 100644 --- a/R/functions-Chromatogram.R +++ b/R/functions-Chromatogram.R @@ -5,30 +5,33 @@ names(.SUPPORTED_AGG_FUN_CHROM) <- "Intensity representing the minimum intensity across the mz range.", "Intensity representing the mean intensity across the mz range.") -##' @title Validation function for Chromatogram objects -##' -##' @description This function can be used instead of the \code{validObject} to -##' check if the chromatogram is valid, without having to call the validity -##' method on all super classes. -##' -##' @param object A \code{Chromatogram} object. -##' -##' @return \code{TRUE} if the \code{object} is valid and the error messages -##' otherwise (i.e. a \code{character}). -##' @author Johannes Rainer -##' @noRd +#' @title Validation function for Chromatogram objects +#' +#' @description This function can be used instead of the \code{validObject} to +#' check if the chromatogram is valid, without having to call the validity +#' method on all super classes. +#' +#' @param object A \code{Chromatogram} object. +#' +#' @return \code{TRUE} if the \code{object} is valid and the error messages +#' otherwise (i.e. a \code{character}). +#' +#' @author Johannes Rainer +#' +#' @noRd validChromatogram <- function(object) { msg <- character() if (length(object@rtime) != length(object@intensity)) msg <- c(msg, "Length of 'rt' and 'intensity' have to match!") - if (is.unsorted(object@mz)) - msg <- c(msg, "'mz' has to be increasingly ordered!") if (is.unsorted(object@rtime)) msg <- c(msg, paste0("'rtime' has to be increasingly ordered!")) if (length(object@mz) > 0 & length(object@mz) != 2) msg <- c(msg, paste0("'mz' is supposed to contain the ", "minimum and maximum mz values for the ", "chromatogram.")) + if (!all(is.na(object@mz))) + if (is.unsorted(object@mz)) + msg <- c(msg, "'mz' has to be increasingly ordered!") if (length(object@filterMz) > 0 & length(object@filterMz) != 2) msg <- c(msg, paste0("'filterMz' is supposed to contain the ", "minimum and maximum mz values of the filter", @@ -54,44 +57,45 @@ validChromatogram <- function(object) { else msg } -##' @description \code{Chromatogram}: create an instance of the -##' \code{Chromatogram} class. -##' -##' @param rtime \code{numeric} with the retention times (length has to be equal -##' to the length of \code{intensity}). -##' -##' @param intensity \code{numeric} with the intensity values (length has to be -##' equal to the length of \code{rtime}). -##' -##' @param mz \code{numeric(2)} representing the mz value range (min, max) -##' on which the chromatogram was created. This is supposed to contain the -##' \emph{real} range of mz values in contrast to the \code{filterMz} below. -##' If not applicable use \code{mzrange = c(0, 0)}. -##' -##' @param filterMz \code{numeric(2)} representing the mz value range (min, -##' max) that was used to filter the original object on mz dimension. If not -##' applicable use \code{filterMz = c(0, 0)}. -##' -##' @param precursorMz \code{numeric(2)} for SRM/MRM transitions. -##' Represents the mz of the precursor ion. See details for more information. -##' -##' @param productMz \code{numeric(2)} for SRM/MRM transitions. -##' Represents the mz of the product. See details for more information. -##' -##' @param fromFile \code{integer(1)} the index of the file within the -##' \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the -##' chromatogram was extracted. -##' -##' @param aggregationFun \code{character} string specifying the function that -##' was used to aggregate intensity values for the same retention time across the -##' mz range. Supported are \code{"sum"} (total ion chromatogram), \code{"max"} -##' (base peak chromatogram), \code{"min"} and \code{"mean"}. -##' -##' @slot .__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun See corresponding parameter above. -##' -##' @rdname Chromatogram-class +#' @description \code{Chromatogram}: create an instance of the +#' \code{Chromatogram} class. +#' +#' @param rtime \code{numeric} with the retention times (length has to be equal +#' to the length of \code{intensity}). +#' +#' @param intensity \code{numeric} with the intensity values (length has to be +#' equal to the length of \code{rtime}). +#' +#' @param mz \code{numeric(2)} representing the mz value range (min, max) +#' on which the chromatogram was created. This is supposed to contain the +#' \emph{real} range of mz values in contrast to the \code{filterMz} below. +#' If not applicable use \code{mzrange = c(0, 0)}. +#' +#' @param filterMz \code{numeric(2)} representing the mz value range (min, +#' max) that was used to filter the original object on mz dimension. If not +#' applicable use \code{filterMz = c(0, 0)}. +#' +#' @param precursorMz \code{numeric(2)} for SRM/MRM transitions. +#' Represents the mz of the precursor ion. See details for more information. +#' +#' @param productMz \code{numeric(2)} for SRM/MRM transitions. +#' Represents the mz of the product. See details for more information. +#' +#' @param fromFile \code{integer(1)} the index of the file within the +#' \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the +#' chromatogram was extracted. +#' +#' @param aggregationFun \code{character} string specifying the function that +#' was used to aggregate intensity values for the same retention time across +#' the mz range. Supported are \code{"sum"} (total ion chromatogram), +#' \code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}. +#' +#' @slot .__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun See corresponding parameter above. +#' +#' @rdname Chromatogram-class Chromatogram <- function(rtime = numeric(), intensity = numeric(), - mz = c(0, 0), filterMz = c(0, 0), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), precursorMz = c(NA_real_, NA_real_), productMz = c(NA_real_, NA_real_), fromFile = integer(), @@ -107,3 +111,229 @@ Chromatogram <- function(rtime = numeric(), intensity = numeric(), precursorMz = range(precursorMz), productMz = range(productMz), fromFile = as.integer(fromFile), aggregationFun = aggregationFun)) } + +#' @title Plot Chromatogram objects +#' +#' @description \code{plotChromatogram} creates a chromatogram plot for a +#' single \code{Chromatogram} object or a \code{list} of +#' \code{\link{Chromatogram}} objects (one line for each +#' \code{\link{Chromatogram}}/sample). +#' +#' @details The \code{plotChromatogram} function allows to efficiently plot +#' the chromatograms of several samples into a single plot. +#' +#' @param x For \code{plotChromatogram}: \code{list} of +#' \code{\link{Chromatogram}} objects. Such as extracted from an +#' \code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} +#' method. +#' For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected +#' peaks. +#' +#' @param rt For \code{plotChromatogram}: \code{numeric(2)}, optional parameter +#' to subset each \code{Chromatogram} by retention time prior to plotting. +#' Alternatively, the plot could be subsetted by passing a \code{xlim} +#' parameter. +#' For \code{highlightChromPeaks}: \code{numeric(2)} with the +#' retention time range from which peaks should be extracted and plotted. +#' +#' @param col For \code{plotChromatogram}: color definition for each +#' line/sample. Has to have the same length as samples/elements in \code{x}, +#' otherwise \code{col[1]} is recycled to generate a vector of +#' \code{length(x)}. +#' For \code{highlightChromPeaks}: color to be used to fill the +#' rectangle. +#' +#' @param lty the line type. See \code{\link[graphics]{plot}} for more details. +#' +#' @param type the plotting type. See \code{\link[graphics]{plot}} for more +#' details. +#' For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +#' should be highlighted: \code{type = "rect"} draws a rectangle +#' representing the peak definition, \code{type = "point"} indicates a +#' chromatographic peak with a single point at the position of the peak's +#' \code{"rt"} and \code{"maxo"}. +#' +#' @param xlab \code{character(1)} with the label for the x-axis. +#' +#' @param ylab \code{character(1)} with the label for the y-axis. +#' +#' @param main The title for the plot. For \code{plotChromatogram}: if +#' \code{main = NULL} the mz range of the \code{Chromatogram} object(s) will +#' be used as the title. +#' +#' @param ... additional parameters to the \code{\link{matplot}} or \code{plot} +#' function. +#' +#' @seealso \code{\link{extractChromatograms}} for how to extract a list of +#' \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} +#' objects. +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' ## Perform a fast peak detection. +#' library(xcms) +#' library(faahKO) +#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_3_files) +#' +#' od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) +#' +#' rtr <- c(2600, 2750) +#' mzr <- c(344, 344) +#' chrs <- extractChromatograms(od, rt = rtr, mz = mzr) +#' +#' ## Plot a single chromatogram +#' plotChromatogram(chrs[[1]]) +#' +#' ## Plot all chromatograms at once, using different colors for each. +#' plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) +#' +#' ## Highlight identified chromatographic peaks. +#' highlightChromPeaks(od, rt = rtr, mz = mzr, +#' col = c("#FF000005", "#00FF0005", "#0000FF05"), +#' border = c("#FF000040", "#00FF0040", "#0000FF40")) +#' +plotChromatogram <- function(x, rt, col = "#00000060", + lty = 1, type = "l", xlab = "retention time", + ylab = "intensity", main = NULL, ...) { + if (!is.list(x) & !is(x, "Chromatogram")) + stop("'x' should be a Chromatogram object or a list of Chromatogram", + " objects.") + if (is(x, "Chromatogram")) + x <- list(x) + isOK <- lapply(x, function(z) { + if (is(z, "Chromatogram")) { + return(TRUE) + } else { + if (is.na(z)) + return(TRUE) + } + FALSE + }) + if (any(!unlist(isOK))) + stop("if 'x' is a list it should only contain Chromatogram objects") + ## Subset the Chromatogram objects if rt provided. + if (!missing(rt)) { + rt <- range(rt) + x <- lapply(x, function(z) { + if (is(z, "Chromatogram")) + filterRt(z, rt = rt) + }) + } + if (length(col) != length(x)) { + col <- rep(col[1], length(x)) + } + ## If main is NULL use the mz range. + if (is.null(main)) { + mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) + main <- paste0(format(mzr, digits = 7), collapse = " - ") + } + ## Number of measurements we've got per chromatogram. This can be different + ## between samples, from none (if not a single measurement in the rt/mz) + ## to the number of data points that were actually measured. + lens <- unique(lengths(x)) + max_len <- max(lens) + max_len_vec <- rep_len(NA, max_len) + ## Generate the matrix of rt values, columns are samples, rows retention + ## time values. Fill each column with NAs up to the maximum number of values + ## we've got in a sample/file. + rts <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (cur_len == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- rtime(z) + max_len_vec + } + })) + ## Same for the intensities. + ints <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (length(z) == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- intensity(z) + max_len_vec + } + })) + ## Define the x and y limits + x_lim <- c(0, 1) + y_lim <- c(0, 1) + if (all(is.na(rts))) + if (!missing(rt)) + x_lim <- range(rt) + else + x_lim <- range(rts, na.rm = TRUE, finite = TRUE) + if (!all(is.na(ints))) + y_lim <- range(ints, na.rm = TRUE, finite = TRUE) + ## Identify columns that have only NAs in either intensity or rt - these + ## will not be plotted. + keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | + apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) + ## Finally plot the data. + if (length(keepCol)) { + matplot(x = rts[, keepCol, drop = FALSE], + y = ints[, keepCol, drop = FALSE], type = type, lty = lty, + col = col[keepCol], xlab = xlab, ylab = ylab, main = main, + ...) + } else + plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, + xlim = x_lim, ylim = y_lim) +} + + + +#' @description The \code{highlightChromPeaks} function adds chromatographic +#' peak definitions to an existing plot, such as one created by the +#' \code{plotChromatograms} function. +#' +#' @param mz \code{numeric(2)} with the mz range from which the peaks should +#' be extracted and plotted. +#' +#' @param border colors to be used to color the border of the rectangles. Has to +#' be equal to the number of samples in \code{x}. +#' +#' @param lwd \code{numeric(1)} defining the width of the line/border. +#' +#' @rdname plotChromatogram +highlightChromPeaks <- function(x, rt, mz, + border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), + ...) { + type <- match.arg(type) + if (missing(rt)) + rt <- c(-Inf, Inf) + if (missing(mz)) + mz <- c(-Inf, Inf) + if (!is(x, "XCMSnExp")) + stop("'x' has to be a XCMSnExp object") + if (!hasChromPeaks(x)) + stop("'x' does not contain any detected peaks") + pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0) + if (length(col) != length(fileNames(x))) + col <- rep(col[1], length(fileNames(x))) + if (length(border) != length(fileNames(x))) + border <- rep(border[1], length(fileNames(x))) + if (length(pks)) { + if (type == "rect") + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], + border = border[pks[, "sample"]], lwd = lwd, + col = col[pks[, "sample"]]) + if (type == "point") { + if (any(is.na(col))) + col <- border + ## Draw a star at the position defined by the "rt" column + points(x = pks[, "rt"], y = pks[, "maxo"], + col = col[pks[, "sample"]], ...) + } + } +} + diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index e9cf3cab0..e756540bf 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -1,12 +1,14 @@ #' @include DataClasses.R functions-utils.R -##' Takes a XCMSnExp and drops ProcessHistory steps from the @.processHistory -##' slot matching the provided type. -##' -##' @param num which should be dropped? If \code{-1} all matching will be dropped, -##' otherwise just the most recent num. -##' @return The XCMSnExp input object with selected ProcessHistory steps dropped. -##' @noRd +#' @description Takes a XCMSnExp and drops ProcessHistory steps from the +#' @.processHistory slot matching the provided type. +#' +#' @param num which should be dropped? If \code{-1} all matching will be dropped, +#' otherwise just the most recent num. +#' +#' @return The XCMSnExp input object with selected ProcessHistory steps dropped. +#' +#' @noRd dropProcessHistories <- function(x, type, num = -1) { x@.processHistory <- dropProcessHistoriesList(processHistory(x), type = type, num = num) @@ -32,8 +34,9 @@ dropProcessHistoriesList <- function(x, type, num = -1) { return(x) } -##' Convert an XCMSnExp to an xcmsSet. -##' @noRd +#' Convert an XCMSnExp to an xcmsSet. +#' +#' @noRd .XCMSnExp2xcmsSet <- function(from) { if (any(msLevel(from) > 1)) stop("Coercing an XCMSnExp with MS level > 1 is not yet supported!") @@ -117,122 +120,29 @@ dropProcessHistoriesList <- function(x, type, num = -1) { return(xs) } -##' @title Extract spectra subsets -##' -##' Extract subsets of spectra matching the given mz and retention time ranges. -##' @noRd -.spectraSubsets <- function(x, rtrange, mzrange) { - ## o Should allow to provide single ranges, but also matrices of rtrange - ## and/or mzranges. - ## o Perform the data fetching by file. - ## o Return the (mz subsetted) spectra by file and by ranges. - ## o Since data should be processed on a by-file basis representation of the - ## result as a list (files) of list(ranges) of list(spectra) seems to be - ## best. - ## SEE runit.XCMSnExp.R, -} - -## This is somewhat similar to the getEIC, just that it extracts for each -## mz/rt range pair a data.frame with rt, mz, intensity per sample. -## This version works on a single rt/mz range pair at a time. -## CHECK: -## 1) mz range outside. -## 2) rt range outside. -.extractMsData <- function(x, rtrange, mzrange) { - ## Subset the OnDiskMSnExp - fns <- fileNames(x) - subs <- filterMz(filterRt(x, rt = rtrange), mz = mzrange) - if (length(subs) == 0) { - return(NULL) - } - fromF <- base::match(fileNames(subs), fns) - ## Now extract mz-intensity pairs from each spectrum. - ## system.time( - ## suppressWarnings( - ## dfs <- spectrapply(tmp, as.data.frame) - ## ) - ## ) ## 0.73sec - ## system.time( - suppressWarnings( - dfs <- spectrapply(subs, FUN = function(z) { - if (peaksCount(z)) - return(base::data.frame(rt = rep_len(rtime(z), length(z@mz)), - as.data.frame(z))) - else - return(base::data.frame(rt = numeric(), mz = numeric(), - i = integer())) - }) - ) - ## Now I want to rbind the spectrum data frames per file - ff <- fromFile(subs) - L <- split(dfs, f = ff) - L <- lapply(L, do.call, what = rbind) - ## Put them into a vector same length that we have files. - res <- vector(mode = "list", length = length(fns)) - res[fromF] <- L - return(res) -} - -## Same as above, but we're applying a function - or none. -.sliceApply <- function(x, FUN = NULL, rtrange, mzrange) { - fns <- fileNames(x) - if (is(x, "XCMSnExp")) { - ## Now, the filterRt might get heavy for XCMSnExp objects if we're - ## filtering also the chromatographic peaks and features! - msfd <- new("MsFeatureData") - if (hasAdjustedRtime(x)) { - ## just copy over the retention time. - msfd$adjustedRtime <- x@msFeatureData$adjustedRtime - } - lockEnvironment(msfd, bindings = TRUE) - x@msFeatureData <- msfd - ## with an XCMSnExp without chrom. peaks and features filterRt - ## should be faster. - } - subs <- filterMz(filterRt(x, rt = rtrange), mz = mzrange) - if (base::length(subs) == 0) { - return(list()) - } - fromF <- base::match(fileNames(subs), fns) - suppressWarnings( - res <- spectrapply(subs, FUN = FUN) - ) - ## WARN: fromFile reported within the Spectra might not be correct!!! - return(res) -} - -##' @description Extract a chromatogram from an \code{OnDiskMSnExp} or -##' \code{XCMSnExp} subsetting to the provided retention time range -##' (\code{rt}) and using the function \code{aggregationFun} to aggregate -##' intensity values for the same retention time across the mz range -##' (\code{mz}). -##' -##' @param x An \code{OnDiskMSnExp} or \code{XCMSnExp} object. -##' -##' @param rt \code{numeric(2)} providing the lower and upper retention time. It -##' is also possible to submit a \code{numeric(1)} in which case \code{range} is -##' called on it to transform it to a \code{numeric(2)}. -##' -##' @param mz \code{numeric(2)} providing the lower and upper mz value for -##' the mz range. It is also possible to submit a \code{numeric(1)} in which case -##' \code{range} is called on it to transform it to a \code{numeric(2)}. -##' -##' @param aggregationFun The function to be used to aggregate intensity values -##' across the mz range for the same retention time. -##' -##' @param ... Additional arguments to be passed to the object's \code{rtime} -##' call. -##' -##' @return A \code{list} with the \code{Chromatogram} objects. If no data was -##' present for the specified \code{rtrange} and \code{mzrange} the function -##' returns a \code{list} of length \code{0}. -##' -##' @author Johannes Rainer -##' @noRd -.extractChromatogram <- function(x, rt, mz, aggregationFun = "sum", ...) { - if (!any(.SUPPORTED_AGG_FUN_CHROM == aggregationFun)) - stop("'aggregationFun' should be one of ", - paste0("'", .SUPPORTED_AGG_FUN_CHROM, "'", collapse = ", ")) +#' @description Extract a \code{data.frame} of retention time, mz and intensity +#' values from each file/sample in the provided rt-mz range. +#' +#' @note Ideally, \code{x} should be an \code{OnDiskMSnExp} object as subsetting +#' of a \code{XCMSnExp} object is more costly (removing of preprocessing +#' results, restoring data etc). If retention times reported in the +#' featureData are replaced by adjusted retention times, these are set +#' in the Spectrum objects as retention time. +#' +#' @param x An \code{OnDiskMSnExp} object. +#' +#' @param rt \code{numeric(2)} with the retention time range from which the +#' data should be extracted. +#' +#' @param mz \code{numeric(2)} with the mz range. +#' +#' @param return A \code{list} with length equal to the number of files and +#' each element being a \code{data.frame} with the extracted values. +#' +#' @noRd +#' +#' @author Johannes Rainer +.extractMsData <- function(x, rt, mz) { if (!missing(rt)) { rt <- range(rt, na.rm = TRUE) if (length(rt) != 2) @@ -247,203 +157,350 @@ dropProcessHistoriesList <- function(x, type, num = -1) { ## Subset the object based on rt and mz range. subs <- filterMz(filterRt(x, rt = rt), mz = mz) if (length(subs) == 0) { - return(list()) + ## Return a list with empty data.frames + empty_df <- data.frame(rt = numeric(), mz = numeric(), i = integer()) + return(lapply(1:length(fileNames(x)), FUN = function(z){empty_df})) } - ## Now, call spectrapply on the object to return the data we need from each - ## Spectrum: the aggregated intensity values per spectrum and the mz value - ## range. - ## Note: we're returning NA in case we don't have a valid measurement for a - ## retention time within the specified mz suppressWarnings( - res <- spectrapply(subs, FUN = function(z) { + dfs <- spectrapply(subs, FUN = function(z) { if (!z@peaksCount) - return(c(NA_real_, NA_real_, NA_real_)) - ## return(list()) - return(c(range(z@mz), do.call(aggregationFun, list(z@intensity)))) + return(data.frame(rt = numeric(), mz = numeric(), + i = integer())) + data.frame(rt = rep_len(z@rt, length(z@mz)), + mz = z@mz, i = z@intensity) }) ) - ## Do I want to drop the names? - nas <- unlist(lapply(res, function(z) is.na(z[3])), use.names = FALSE) - if (all(nas)) - return(list()) - ## not_empty <- base::which(base::lengths(res) > 0) - ## if (length(not_empty)) { - ## res <- split(res[not_empty], f = fromFile(subs)[not_empty]) - ## rtm <- split(rtime(subs, ...)[not_empty], f = fromFile(subs)[not_empty]) - res <- split(res, f = fromFile(subs)) - rtm <- split(rtime(subs, ...), f = fromFile(subs)) - ## We want to have one Chromatogram per file. - ## Let's use a simple for loop here - no need for an mapply (yet). - resL <- vector("list", length(res)) - for (i in 1:length(res)) { - allVals <- unlist(res[[i]], use.names = FALSE) - idx <- seq(3, length(allVals), by = 3) - mzr <- range(allVals[-idx], na.rm = TRUE, finite = TRUE) - ## Or should we drop the names completely? - ints <- allVals[idx] - names(ints) <- names(rtm[[i]]) - resL[[i]] <- Chromatogram(rtime = rtm[[i]], - intensity = ints, mz = mzr, - filterMz = fmzr, - fromFile = as.integer(names(res)[i]), - aggregationFun = aggregationFun) - } - return(resL) - ## } else { - ## return(list()) - ## } -} + fns <- fileNames(x) + fromF <- base::match(fileNames(subs), fns) -#' Integrates the intensities for chromatograpic peak(s). This is supposed to be -#' called by the fillChromPeaks method. -#' -#' @note Use one of .getPeakInt2 or .getPeakInt3 instead! -#' -#' @param object An \code{XCMSnExp} object representing a single sample. -#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. -#' @noRd -.getPeakInt <- function(object, peakArea) { - if (length(fileNames(object)) != 1) - stop("'object' should be an XCMSnExp for a single file!") - res <- numeric(nrow(peakArea)) - for (i in 1:length(res)) { - rtr <- peakArea[i, c("rtmin", "rtmax")] - chr <- extractChromatograms(object, - rt = rtr, - mz = peakArea[i, c("mzmin", "mzmax")])[[1]] - if (length(chr)) - res[i] <- sum(intensity(chr), na.rm = TRUE) * - ((rtr[2] - rtr[1]) / (length(chr) - 1)) - else - res[i] <- NA_real_ - } - return(unname(res)) + ## Now I want to rbind the spectrum data frames per file + L <- split(dfs, f = fromFile(subs)) + L <- lapply(L, do.call, what = rbind) + ## Put them into a vector same length that we have files. + res <- vector(mode = "list", length = length(fns)) + res[fromF] <- L + return(res) } -#' Integrates the intensities for chromatograpic peak(s). This is supposed to be -#' called by the fillChromPeaks method. + +#' @description This function extracts chromatograms efficiently for multiple +#' rt and mz ranges by loading the data per file only once and performing +#' the mz subsetting on the already loaded Spectrum1 classes. #' -#' @note This reads the full data first and does the subsetting later in R. +#' @note Ensure that x is an OnDiskMSnExp and not an e.g. XCMSnExp object. +#' Subsetting etc an XCMSnExp might take longer. #' -#' @param object An \code{XCMSnExp} object representing a single sample. -#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. -#' @noRd -.getPeakInt2 <- function(object, peakArea) { - if (length(fileNames(object)) != 1) - stop("'object' should be an XCMSnExp for a single file!") - res <- numeric(nrow(peakArea)) - spctr <- spectra(object, BPPARAM = SerialParam()) - mzs <- lapply(spctr, mz) - valsPerSpect <- lengths(mzs) - ints <- unlist(lapply(spctr, intensity), use.names = FALSE) - rm(spctr) - mzs <- unlist(mzs, use.names = FALSE) - rtim <- rtime(object) - for (i in 1:length(res)) { - rtr <- peakArea[i, c("rtmin", "rtmax")] - mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, - valsPerSpect = valsPerSpect, rtrange = rtr, - mzrange = peakArea[i, c("mzmin", "mzmax")]) - if (length(mtx)) { - if (!all(is.na(mtx[, 3]))) { - ## How to calculate the area: (1)sum of all intensities / (2)by - ## the number of data points (REAL ones, considering also NAs) - ## and multiplied with the (3)rt width. - ## (1) sum(mtx[, 3], na.rm = TRUE) - ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used - ## nrow(mtx) here, which would correspond to the non-NA - ## intensities within the rt range we don't get the same results - ## as e.g. centWave. - ## (3) rtr[2] - rtr[1] - res[i] <- sum(mtx[, 3], na.rm = TRUE) * - ((rtr[2] - rtr[1]) / - (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) - } else { - res[i] <- NA_real_ - } - } else { - res[i] <- NA_real_ - } - } - return(unname(res)) -} - -#' Integrates the intensities for chromatograpic peak(s). This is supposed to be -#' called by the fillChromPeaks method. +#' @param rt \code{matrix} with two columns and number of rows corresponding to +#' the number of ranges to extract. +#' +#' @param mz \code{matrix} with two columns and number of rows corresponding to +#' the number of ranges to extract. nrow of rt and mz have to match. #' -#' @note This reads the full data first and does the subsetting later in R. This -#' function uses the C getEIC function. +#' @param x OnDiskMSnExp object from which to extract the chromatograms. +#' +#' @param return.type either \code{"list"} or \code{"matrix"} to return the +#' result as a list or as a matrix. +#' +#' @param missingValue value to be used as intensity if no signal was measured +#' for a given rt. #' -#' @param object An \code{XCMSnExp} object representing a single sample. -#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' @return A \code{list} or \code{matrix} with the \code{Chromatogram} objects. +#' If no data was present for the specified \code{rtrange} and +#' \code{mzrange} the function returns a \code{list} of length \code{0}. +#' The \code{list} is arranged first by ranges and then by files, such that +#' \code{result[[1]]} returns a \code{list} of \code{Chromatogram} objects +#' for the same rt/mz range. +#' For \code{return.type = "matrix"} a \code{matrix} is returned with rows +#' corresponding to ranges and columns to files/samples. \code{result[, 1]} +#' will thus return a \code{list} of \code{Chromatogram} objects for the +#' first sample/file, while \code{result[1, ]} returns a \code{list} of +#' \code{Chromatogram} objects for the same rt/mz range for all files. +#' +#' @author Johannes Rainer #' #' @noRd -.getPeakInt3 <- function(object, peakArea) { - if (length(fileNames(object)) != 1) - stop("'object' should be an XCMSnExp for a single file!") - if (nrow(peakArea) == 0) { - return(numeric()) +.extractMultipleChromatograms <- function(x, rt, mz, aggregationFun = "sum", + BPPARAM = bpparam(), + return.type = c("list", "matrix"), + missingValue = NA_real_) { + return.type <- match.arg(return.type) + missingValue <- as.numeric(missingValue) + if (!any(.SUPPORTED_AGG_FUN_CHROM == aggregationFun)) + stop("'aggregationFun' should be one of ", + paste0("'", .SUPPORTED_AGG_FUN_CHROM, "'", collapse = ", ")) + ## Ensure we're working on MS1 only! + x <- filterMsLevel(x, 1) + if (length(x) == 0) + return(list()) + nranges <- 1 + if (missing(rt)) + rt <- matrix(c(-Inf, Inf), nrow = 1) + if (missing(mz)) + mz <- matrix(c(-Inf, Inf), nrow = 1) + if (!missing(rt)) { + if (ncol(rt) != 2) + stop("'rt' has to be a matrix with two columns") + ## Replicate if nrow rt is 1 to match nrow of mz. + if (nrow(rt) == 1) + rt <- matrix(rep(rt, nrow(mz)), ncol = 2, byrow = TRUE) } - res <- matrix(ncol = 4, nrow = nrow(peakArea)) - res <- numeric(nrow(peakArea)) - spctr <- spectra(object, BPPARAM = SerialParam()) - mzs <- lapply(spctr, mz) - valsPerSpect <- lengths(mzs) - scanindex <- valueCount2ScanIndex(valsPerSpect) ## Index vector for C calls - ints <- unlist(lapply(spctr, intensity), use.names = FALSE) - rm(spctr) - mzs <- unlist(mzs, use.names = FALSE) - rtim <- rtime(object) - for (i in 1:length(res)) { - rtr <- peakArea[i, c("rtmin", "rtmax")] - sr <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) - eic <- .Call("getEIC", mzs, ints, scanindex, - as.double(peakArea[i, c("mzmin", "mzmax")]), - as.integer(sr), as.integer(length(scanindex)), - PACKAGE = "xcms") - if (length(eic$intensity)) { - ## How to calculate the area: (1)sum of all intensities / (2)by - ## the number of data points (REAL ones, considering also NAs) - ## and multiplied with the (3)rt width. - if (!all(is.na(eic$intensity)) && !all(eic$intensity == 0)) { - res[i] <- sum(eic$intensity, na.rm = TRUE) * - ((rtr[2] - rtr[1]) / (length(eic$intensity) - 1)) - } else { - res[i] <- NA_real_ + if (!missing(mz)) { + if (ncol(mz) != 2) + stop("'mz' has to be a matrix with two coliumns") + if (nrow(mz) == 1) + mz <- matrix(rep(mz, nrow(rt)), ncol = 2, byrow = TRUE) + } + if (nrow(rt) != nrow(mz)) + stop("dimensions of 'rt' and 'mz' have to match") + ## Identify indices of all spectra that are within the rt ranges. + rtimes <- rtime(x) + + ## 1) Subset x keeping all spectra that fall into any of the provided rt + ## ranges. + keep_idx <- unlist(apply(rt, MARGIN = 1, function(z) + which(rtimes >= z[1] & rtimes <= z[2])), use.names = FALSE) + keep_idx <- sort(unique(as.integer(keep_idx))) + if (length(keep_idx) == 0) + return(list()) + subs <- x[keep_idx] + + ## 2) Call the final subsetting on each file separately. + subs_by_file <- splitByFile(subs, f = factor(seq_along(fileNames(subs)))) + suppressWarnings( + res <- bpmapply( + subs_by_file, + seq_along(fileNames(subs)), + FUN = function(cur_sample, cur_file, rtm, mzm, aggFun) { + ## Load all spectra for that file. applies also any proc steps + sps <- spectra(cur_sample) + rts <- rtime(cur_sample) + cur_res <- vector("list", nrow(rtm)) + ## Loop through rt and mz. + for (i in 1:nrow(rtm)) { + ## - Select all spectra within that range and call a + ## function on them that does first filterMz and then + ## aggregate the values per spectrum. + in_rt <- rts >= rtm[i, 1] & rts <= rtm[i, 2] + ## Return an empty Chromatogram if there is no spectrum/scan + ## within the retention time range. + if (!any(in_rt)) { + cur_res[[i]] <- Chromatogram( + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + next + } + cur_sps <- lapply( + sps[in_rt], + function(spct, filter_mz, aggFun) { + spct <- filterMz(spct, filter_mz) + ## Now aggregate the values. + if (!spct@peaksCount) + return(c(NA_real_, NA_real_, missingValue)) + return(c(range(spct@mz, na.rm = TRUE, finite = TRUE), + do.call( + aggFun, + list(spct@intensity, na.rm = TRUE)))) + }, filter_mz = mzm[i, ], aggFun = aggFun) + ## Now build the Chromatogram class. + allVals <- unlist(cur_sps, use.names = FALSE) + idx <- seq(3, length(allVals), by = 3) + ## Or should we drop the names completely? + ints <- allVals[idx] + names(ints) <- names(cur_sps) + ## Don't return a Chromatogram object if no values. + if (!all(is.na(ints))) { + cur_res[[i]] <- Chromatogram( + rtime = rts[in_rt], + intensity = ints, + mz = range(allVals[-idx], na.rm = TRUE, + finite = TRUE), + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + } else { + ## If no measurement if non-NA, still report the NAs and + ## use the filter mz as mz. + cur_res[[i]] <- Chromatogram( + rtime = rts[in_rt], + intensity = ints, + mz = mzm[i, ], + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + } + } + cur_res + }, MoreArgs = list(rtm = rt, mzm = mz, aggFun = aggregationFun), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + ) + ## Ensure that the lists have the same length than there are samples! + fns <- fileNames(x) + fromF <- base::match(fileNames(subs), fns) + + ## If we've got some files in which we don't have any signal in any range, + ## fill it with empty Chromatograms. This ensures that the result has + ## ALWAYS the same length than there are samples. + if (length(res) != length(fns)) { + res_all_files <- vector(mode = "list", length = length(fns)) + res_all_files[fromF] <- res + empties <- which(lengths(res_all_files) == 0) + ## fill these + for (i in 1:length(empties)) { + empty_list <- vector(mode = "list", length = nrow(rt)) + for(j in 1:nrow(rt)) { + empty_list[j] <- Chromatogram(filterMz = mz[i, ], + fromFile = as.integer(i), + aggregationFun = aggregationFun) } - } else { - res[i] <- NA_real_ + res_all_files[[empties[i]]] <- empty_list + } + res <- res_all_files + } + ## Now I need to re-arrange the result. + if (return.type == "list") { + ## Got [[file]][[range]], but want to have [[range]][[file]] + final_res <- vector("list", nrow(rt)) + for (i in 1:nrow(rt)) { + final_res[[i]] <- lapply(res, FUN = `[[`, i) } + if (nrow(rt) == 1) + final_res <- final_res[[1]] + } + if (return.type == "matrix") { + final_res <- do.call(cbind, res) } - return(unname(res)) + final_res } -#' Integrates the intensities for chromatograpic peak(s). This is supposed to be -#' called by the fillChromPeaks method. +## #' @description Integrates the intensities for chromatograpic peak(s). This is +## #' supposed to be called by the fillChromPeaks method. +## #' +## #' @note This reads the full data first and does the subsetting later in R. +## #' +## #' @param object An \code{XCMSnExp} object representing a single sample. +## #' +## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +## #' +## #' @noRd +## .getPeakInt2 <- function(object, peakArea) { +## if (length(fileNames(object)) != 1) +## stop("'object' should be an XCMSnExp for a single file!") +## res <- numeric(nrow(peakArea)) +## spctr <- spectra(object, BPPARAM = SerialParam()) +## mzs <- lapply(spctr, mz) +## valsPerSpect <- lengths(mzs) +## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) +## rm(spctr) +## mzs <- unlist(mzs, use.names = FALSE) +## rtim <- rtime(object) +## for (i in 1:length(res)) { +## rtr <- peakArea[i, c("rtmin", "rtmax")] +## mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, +## valsPerSpect = valsPerSpect, rtrange = rtr, +## mzrange = peakArea[i, c("mzmin", "mzmax")]) +## if (length(mtx)) { +## if (!all(is.na(mtx[, 3]))) { +## ## How to calculate the area: (1)sum of all intensities / (2)by +## ## the number of data points (REAL ones, considering also NAs) +## ## and multiplied with the (3)rt width. +## ## (1) sum(mtx[, 3], na.rm = TRUE) +## ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used +## ## nrow(mtx) here, which would correspond to the non-NA +## ## intensities within the rt range we don't get the same results +## ## as e.g. centWave. +## ## (3) rtr[2] - rtr[1] +## res[i] <- sum(mtx[, 3], na.rm = TRUE) * +## ((rtr[2] - rtr[1]) / +## (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) +## } else { +## res[i] <- NA_real_ +## } +## } else { +## res[i] <- NA_real_ +## } +## } +## return(unname(res)) +## } + +## #' @description Integrates the intensities for chromatograpic peak(s). This is +## #' supposed to be called by the fillChromPeaks method. +## #' +## #' @note This reads the full data first and does the subsetting later in R. This +## #' function uses the C getEIC function. +## #' +## #' @param object An \code{XCMSnExp} object representing a single sample. +## #' +## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +## #' +## #' @noRd +## .getPeakInt3 <- function(object, peakArea) { +## if (length(fileNames(object)) != 1) +## stop("'object' should be an XCMSnExp for a single file!") +## if (nrow(peakArea) == 0) { +## return(numeric()) +## } +## res <- matrix(ncol = 4, nrow = nrow(peakArea)) +## res <- numeric(nrow(peakArea)) +## spctr <- spectra(object, BPPARAM = SerialParam()) +## mzs <- lapply(spctr, mz) +## valsPerSpect <- lengths(mzs) +## scanindex <- valueCount2ScanIndex(valsPerSpect) ## Index vector for C calls +## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) +## rm(spctr) +## mzs <- unlist(mzs, use.names = FALSE) +## rtim <- rtime(object) +## for (i in 1:length(res)) { +## rtr <- peakArea[i, c("rtmin", "rtmax")] +## sr <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) +## eic <- .Call("getEIC", mzs, ints, scanindex, +## as.double(peakArea[i, c("mzmin", "mzmax")]), +## as.integer(sr), as.integer(length(scanindex)), +## PACKAGE = "xcms") +## if (length(eic$intensity)) { +## ## How to calculate the area: (1)sum of all intensities / (2)by +## ## the number of data points (REAL ones, considering also NAs) +## ## and multiplied with the (3)rt width. +## if (!all(is.na(eic$intensity)) && !all(eic$intensity == 0)) { +## res[i] <- sum(eic$intensity, na.rm = TRUE) * +## ((rtr[2] - rtr[1]) / (length(eic$intensity) - 1)) +## } else { +## res[i] <- NA_real_ +## } +## } else { +## res[i] <- NA_real_ +## } +## } +## return(unname(res)) +## } + + +#' @description Integrates the intensities for chromatograpic peak(s). This is +#' supposed to be called by the fillChromPeaks method. #' #' @note This reads the full data first and does the subsetting later in R. #' #' @param object An \code{XCMSnExp} object representing a single sample. #' -#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, -#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' @param peakArea A \code{matrix} with the peak definition, i.e. +#' \code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. #' #' @param sample_idx \code{integer(1)} with the index of the sample in the -#' object. +#' object. #' #' @param mzCenterFun Name of the function to be used to calculate the mz value. -#' Defaults to \code{weighted.mean}, i.e. the intensity weighted mean mz. +#' Defaults to \code{weighted.mean}, i.e. the intensity weighted mean mz. #' #' @param cn \code{character} with the names of the result matrix. #' #' @return A \code{matrix} with at least columns \code{"mz"}, \code{"rt"}, -#' \code{"into"} and \code{"maxo"} with the by intensity weighted mean of mz, -#' rt or the maximal intensity in the area, the integrated signal in the area -#' and the maximal signal in the area. +#' \code{"into"} and \code{"maxo"} with the by intensity weighted mean of +#' mz, rt or the maximal intensity in the area, the integrated signal in +#' the area and the maximal signal in the area. +#' #' @noRd .getChromPeakData <- function(object, peakArea, sample_idx, mzCenterFun = "weighted.mean", @@ -932,3 +989,166 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", } } +#' @title Plot chromatographic peak density along the retention time axis +#' +#' @description Plot the density of chromatographic peaks along the retention +#' time axis and indicate which peaks would be grouped into the same feature +#' based using the \emph{peak density} correspondence method. Settings for +#' the \emph{peak density} method can be passed with an +#' \code{\link{PeakDensityParam}} object to parameter \code{param}. +#' +#' @details The \code{plotChromPeakDensity} function allows to evaluate +#' different settings for the \emph{peak density} on an mz slice of +#' interest (e.g. containing chromatographic peaks corresponding to a known +#' metabolite). +#' The plot shows the individual peaks that were detected within the +#' specified \code{mz} slice at their retention time (x-axis) and sample in +#' which they were detected (y-axis). The density function is plotted as a +#' black line. Parameters for the \code{density} function are taken from the +#' \code{param} object. Grey rectangles indicate which chromatographic peaks +#' would be grouped into a feature by the \emph{peak density} correspondence +#' method. Parameters for the algorithm are also taken from \code{param}. +#' See \code{\link{groupChromPeaks-density}} for more information about the +#' algorithm and its supported settings. +#' +#' @param object A \code{\link{XCMSnExp}} object with identified +#' chromatographic peaks. +#' +#' @param mz \code{numeric(2)} defining an mz range for which the peak density +#' should be plotted. +#' +#' @param rt \code{numeric(2)} defining an optional rt range for which the +#' peak density should be plotted. Defaults to the absolute retention time +#' range of \code{object}. +#' +#' @param param \code{\link{PeakDensityParam}} from which parameters for the +#' \emph{peak density} correspondence algorithm can be extracted. +#' +#' @param col Color to be used for the individual samples. Length has to be 1 +#' or equal to the number of samples in \code{object}. +#' +#' @param xlab \code{character(1)} with the label for the x-axis. +#' +#' @param ylab \code{character(1)} with the label for the y-axis. +#' +#' @param xlim \code{numeric(2)} representing the limits for the x-axis. +#' Defaults to the range of the \code{rt} parameter. +#' +#' @param ... Additional parameters to be passed to the \code{plot} function. +#' +#' @return The function is called for its side effect, i.e. to create a plot. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{groupChromPeaks-density}} for details on the +#' \emph{peak density} correspondence method and supported settings. +#' +#' @examples +#' +#' ## Below we perform first a peak detection (using the centWave +#' ## method) on some of the test files from the faahKO package. +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the centWave method. +#' res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 1000)) +#' +#' ## Align the samples using obiwarp +#' res <- adjustRtime(res, param = ObiwarpParam()) +#' +#' ## Plot the chromatographic peak density for a specific mz range to evaluate +#' ## different peak density correspondence settings. +#' mzr <- c(305.05, 305.15) +#' +#' plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(), pch = 16) +#' +#' ## Use a larger bandwidth +#' plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60), +#' pch = 16) +#' ## Neighboring peaks are now fused into one. +#' +#' ## Require the chromatographic peak to be present in all samples of a group +#' plotChromPeakDensity(res, mz = mzr, pch = 16, +#' param = PeakDensityParam(minFraction = 1)) +plotChromPeakDensity <- function(object, mz, rt, param = PeakDensityParam(), + col = "#00000080", xlab = "retention time", + ylab = "sample", xlim = range(rt), ...) { + if (missing(object)) + stop("Required parameter 'object' is missing") + if (!is(object, "XCMSnExp")) + stop("'object' must be an XCMSnExp object") + if (!hasChromPeaks(object)) + stop("No chromatographic peaks present in 'object'") + if (missing(mz)) + mz <- c(-Inf, Inf) + if (missing(rt)) + rt <- range(rtime(object)) + mz <- range(mz) + rt <- range(rt) + ## Get all the data we require. + nsamples <- length(fileNames(object)) + if (length(col) != nsamples) + col <- rep_len(col[1], nsamples) + pks <- chromPeaks(object, mz = mz, rt = rt) + if (nrow(pks)) { + ## Extract parameters from the param object + bw = bw(param) + ## That's Jan Stanstrup's fix (issue #161). + densN <- max(512, 2^(ceiling(log2(diff(rt) / (bw / 2))))) + sample_groups <- sampleGroups(param) + if (length(sample_groups) == 0) + sample_groups <- rep(1, nsamples) + if (length(sample_groups) != nsamples) + stop("If provided, the 'sampleGroups' parameter in the 'param' ", + "class has to have the same length than there are samples ", + "in 'object'") + sample_groups_table <- table(sample_groups) + dens_from <- rt[1] - 3 * bw + dens_to <- rt[2] + 3 * bw + dens <- density(pks[, "rt"], bw = bw, from = dens_from, to = dens_to, + n = densN) + yl <- c(0, max(dens$y)) + ypos <- seq(from = 0, to = yl[2], length.out = nsamples) + ## Plot the peaks as points. + plot(x = pks[, "rt"], y = ypos[pks[, "sample"]], xlim = xlim, + col = col[pks[, "sample"]], xlab = xlab, yaxt = "n", ylab = ylab, + main = paste0(format(mz, digits = 7), collapse = " - "), ...) + axis(side = 2, at = ypos, labels = 1:nsamples) + points(x = dens$x, y = dens$y, type = "l") + ## Estimate what would be combined to a feature + ## Code is taken from do_groupChromPeaks_density + dens_max <- max(dens$y) + dens_y <- dens$y + snum <- 0 + while(dens_y[max_y <- which.max(dens_y)] > dens_max / 20 && + snum < maxFeatures(param)) { + feat_range <- xcms:::descendMin(dens_y, max_y) + dens_y[feat_range[1]:feat_range[2]] <- 0 + feat_idx <- which(pks[, "rt"] >= dens$x[feat_range[1]] & + pks[, "rt"] <= dens$x[feat_range[2]]) + tt <- table(sample_groups[pks[feat_idx, "sample"]]) + if (!any(tt / sample_groups_table[names(tt)] >= + minFraction(param) & tt >= minSamples(param))) + next + rect(xleft = min(pks[feat_idx, "rt"]), ybottom = 0, + xright = max(pks[feat_idx, "rt"]), ytop = yl[2], + border = "#00000040", col = "#00000020") + } + } else { + plot(3, 3, pch = NA, xlim = rt, xlab = xlab, + main = paste0(format(mz, digits = 7), collapse = " - ")) + } +} + +## Plot the chromatographic peaks for a file in a two dimensional plot. +## plotChromPeakImage... +## @description Plots the + +## Find mz ranges with multiple peaks per sample. +## Use the density distribution for that? with a bandwidth = 0.001, check +## density method for that... diff --git a/R/functions-normalization.R b/R/functions-normalization.R deleted file mode 100644 index 5f1c7fb47..000000000 --- a/R/functions-normalization.R +++ /dev/null @@ -1,118 +0,0 @@ -#' @include DataClasses.R functions-utils.R - - -#' @title Fit linear model row-wise to a matrix or data.frame -#' -#' @description Simple function to fit linear models row-wise to the provided -#' data. -#' -#' @details For \code{method = "lmrob"} robust regression is performed using -#' the \code{\link[robustbase]{lmrob}} function with settings -#' \code{settings = "KS2014"} and \code{method = "SMDB"}. -#' The function will perform by default parallel fitting of the models -#' based on the global parallel processing settings. -#' -#' @note Between batch correction in the form of \code{y ~ idx * batch} is -#' currently problematic, because we don't yet check if there are too few -#' values within each batch. -#' -#' @param formula \code{formula} representing the model. -#' -#' @param data \code{data.frame} containing the data to be fitted (e.g. the -#' \code{pData} of an \code{\link{XCMSnExp}} object. -#' -#' @param y \code{matrix} or \code{data.frame} with the response variable. The -#' model is fit to each row of this matrix (which can be e.g. the -#' \code{\link{featureValues}} matrix). -#' -#' @param minVals \code{integer(1)} defining the minimum number of values to be -#' used for the fitting. Model fitting is skipped for rows in \code{y} with -#' less than \code{minVals} non-NA values. -#' -#' @param method \code{character} defining the method/function to be used for -#' model fitting. Allowed values are \code{"lm"} for least squares -#' regression and \code{"lmrob"} for robust regression using the -#' \code{\link[robustbase]{lmrob}} function. -#' -#' @param BPPARAM optional parameter specifying parallel processing settings. -#' -#' @return A \code{list} with the fitted linear models or \code{NULL} for rows -#' with too few data points. -#' -#' @noRd -#' -#' @author Johannes Rainer -fitModel <- function(formula, data, y, minVals = 4, - method = c("lm", "lmrob"), BPPARAM = bpparam()) { - method <- match.arg(method, c("lm", "lmrob")) - if (missing(formula) || !is(formula, "formula")) - stop("'formula' has to be submitted and should be a formula!") - if (missing(data) || !is(data, "data.frame")) - stop("'data' has to be a 'data.frame'!") - if (missing(y)) - stop("'y' is missing with no default.") - if (ncol(y) != nrow(data)) - stop("ncol(y) has to match nrow(data)!") - ## Check that 'data' contains the variables we're looking for. - vars <- all.vars(formula) - if (vars[1] != "y") - stop("'formula' should start with 'y ~'") - if (!all(vars[-1] %in% colnames(data))) - stop("All of the variables from 'formula' have to be present in 'data'") - ## data shouldn't contain a column y. - if (any(colnames(data) == "y")) - stop("'data' should not contain a column named 'y'") - ## Done with checking. - force(y) - force(data) - force(formula) - force(minVals) - force(method) - force(BPPARAM) - ## Subset data to contain only explanatory variables - data <- data[, vars[-1], drop = FALSE] - if (is.null(rownames(y))) - rownames(y) <- 1:nrow(y) - y <- split.data.frame(y, f = rownames(y)) - ## Determine fetures we skip because of too few data points. - do_em <- which(unlist(lapply(y, function(z) sum(!is.na(z)) >= minVals))) - res <- vector("list", length(y)) - names(res) <- names(y) - sttngs <- list() - if (method == "lmrob") { - ## Force use of the KS2014 settings in lmrob and increase the - ## scale-finding iterations to avoid some of the warnings. - ## sttngs <- robustbase::lmrob.control("KS2014") - ## sttngs$maxit.scale <- 10000 - ## sttngs$k.max <- 10000 - ## sttngs$refine.tol <- 1e-7 - } - if (length(do_em)) { - ## fit the model - res[do_em] <- bplapply(y[do_em], FUN = function(z, formula., data., - minVals., lmeth, - sttngs) { - ## TODO: need to check what happens if we're also performing between - ## batch correction and we have too few samples per batch! - ## ## Removing all missing values - could eventually skip that. - ## z <- as.numeric(z) - ## not_na <- !is.na(z) - ## data. <- droplevels(data.frame(y = z[not_na], - ## data.[not_na, , drop = FALSE])) - data. <- data.frame(y = as.numeric(z), data.) - if (lmeth == "lm") - return(lm(formula., data = data., model = FALSE)) - if (lmeth == "lmrob") { - ## set.seed(123) - ## return(robustbase::lmrob(formula., data = data., model = FALSE, - ## setting = sttngs)) - } - if (lmeth == "rlm") - stop("Not yet implemented") - ## return(MASS::rlm(formula., data = data.)) - }, formula. = formula, data. = data, minVals. = minVals, lmeth = method, - sttngs = sttngs, BPPARAM = BPPARAM) - } - res -} - diff --git a/R/functions-utils.R b/R/functions-utils.R index 3f26387cc..5915f5207 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -4,16 +4,21 @@ ############################################################ ## valueCount2ScanIndex ## -## @description Simple helper function that converts the number of values -## per scan/spectrum to an integer vector that can be passed to the base -## xcms functions/downstream C functions. -## -## @title Create index vector for internal C calls -## @param valCount Numeric vector representing the number of values per -## spectrum. -## @return An integer vector with the index (0-based) in the mz or intensity -## vectors indicating the start of a spectrum. -## @author Johannes Rainer +#' @title Create index vector for internal C calls +#' +#' @description Simple helper function that converts the number of values +#' per scan/spectrum to an integer vector that can be passed to the base +#' xcms functions/downstream C functions. +#' +#' @param valCount Numeric vector representing the number of values per +#' spectrum. +#' +#' @return An integer vector with the index (0-based) in the mz or intensity +#' vectors indicating the start of a spectrum. +#' +#' @author Johannes Rainer +#' +#' @noRd valueCount2ScanIndex <- function(valCount){ ## Convert into 0 based. valCount <- cumsum(valCount) @@ -27,24 +32,27 @@ valueCount2ScanIndex <- function(valCount){ ## code instead of the new implementations. ## This sets options. ## -##' @title Enable usage of old xcms code -##' -##' @description This function allows to enable the usage of old, partially -##' deprecated code from xcms by setting a corresponding global option. See -##' details for functions affected. -##' -##' @note Usage of old code is strongly dicouraged. This function is thought -##' to be used mainly in the transition phase from xcms to xcms version 3. -##' @details The functions/methods that will be affected by this are: -##' \itemize{ -##' \item \code{\link{do_findChromPeaks_matchedFilter}} -##' } -##' @param x logical(1) to specify whether or not original -##' old code should be used in corresponding functions. If not provided the -##' function simply returns the value of the global option. -##' @return logical(1) indicating whether old code is being -##' used. -##' @author Johannes Rainer +#' @title Enable usage of old xcms code +#' +#' @description This function allows to enable the usage of old, partially +#' deprecated code from xcms by setting a corresponding global option. See +#' details for functions affected. +#' +#' @note Usage of old code is strongly dicouraged. This function is thought +#' to be used mainly in the transition phase from xcms to xcms version 3. +#' +#' @details The functions/methods that will be affected by this are: +#' \itemize{ +#' \item \code{\link{do_findChromPeaks_matchedFilter}} +#' } +#' +#' @param x logical(1) to specify whether or not original +#' old code should be used in corresponding functions. If not provided the +#' function simply returns the value of the global option. +#' +#' @return logical(1) indicating whether old code is being used. +#' +#' @author Johannes Rainer useOriginalCode <- function(x) { if (missing(x)) { res <- options()$BioC$xcms$useOriginalCode @@ -69,13 +77,19 @@ useOriginalCode <- function(x) { ## matchedFilter = ".matchedFilter_orig" ## ) -##' @title Copy the content from an environment to another one -##' This function copies the content of an environment into another one. -##' @param env environment from which to copy. -##' @param inheritLocks logical(1) whether the locking status should be copied -##' too -##' @return an env. -##' @noRd +#' @title Copy the content from an environment to another one +#' +#' @description This function copies the content of an environment into another +#' one. +#' +#' @param env environment from which to copy. +#' +#' @param inheritLocks logical(1) whether the locking status should be copied +#' too. +#' +#' @return an env. +#' +#' @noRd .copy_env <- function(env, inheritLocks = FALSE) { new_e <- new.env(parent = emptyenv()) eNames <- ls(env, all.names = TRUE) @@ -101,48 +115,48 @@ useOriginalCode <- function(x) { ############################################################ ## .createProfileMatrix -##' @title Create the profile matrix -##' -##' @description This function creates a \emph{profile} matrix, i.e. a rt times -##' m/z matrix of aggregated intensity values with values aggregated within -##' bins along the m/z dimension. -##' -##' @details This is somewhat the successor function for the deprecated -##' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, -##' \code{profBinLinBaseM} and \code{profIntLin}). -##' -##' @param mz Numeric representing the m/z values across all scans/spectra. -##' -##' @param int Numeric representing the intensity values across all -##' scans/spectra. -##' -##' @param valsPerSpect Numeric representing the number of measurements for each -##' scan/spectrum. -##' -##' @param method A character string specifying the profile matrix generation -##' method. Allowed are \code{"bin"}, \code{"binlin"}, -##' \code{"binlinbase"} and \code{"intlin"}. -##' -##' @param step Numeric specifying the size of the m/z bins. -##' -##' @param baselevel Numeric specifying the base value. -##' -##' @param basespace Numeric. -##' -##' @param mzrange. numeric(2) optionally specifying the mz value range -##' for binning. This is to adopt the old profStepPad<- method used for -##' obiwarp retention time correction that did the binning from -##' whole-number limits. -##' -##' @param returnBreaks logical(1): hack to return the breaks of the bins. -##' Setting this to TRUE causes the function to return a \code{list} with -##' elements \code{"$profMat"} and \code{"breaks"}. -##' -##' @param baseValue numeric(1) defining the value to be returned if no signal -##' was found in the corresponding bin. Defaults to 0 for backward -##' compatibility. -##' -##' @noRd +#' @title Create the profile matrix +#' +#' @description This function creates a \emph{profile} matrix, i.e. a rt times +#' m/z matrix of aggregated intensity values with values aggregated within +#' bins along the m/z dimension. +#' +#' @details This is somewhat the successor function for the deprecated +#' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, +#' \code{profBinLinBaseM} and \code{profIntLin}). +#' +#' @param mz Numeric representing the m/z values across all scans/spectra. +#' +#' @param int Numeric representing the intensity values across all +#' scans/spectra. +#' +#' @param valsPerSpect Numeric representing the number of measurements for each +#' scan/spectrum. +#' +#' @param method A character string specifying the profile matrix generation +#' method. Allowed are \code{"bin"}, \code{"binlin"}, +#' \code{"binlinbase"} and \code{"intlin"}. +#' +#' @param step Numeric specifying the size of the m/z bins. +#' +#' @param baselevel Numeric specifying the base value. +#' +#' @param basespace Numeric. +#' +#' @param mzrange. numeric(2) optionally specifying the mz value range +#' for binning. This is to adopt the old profStepPad<- method used for +#' obiwarp retention time correction that did the binning from +#' whole-number limits. +#' +#' @param returnBreaks logical(1): hack to return the breaks of the bins. +#' Setting this to TRUE causes the function to return a \code{list} with +#' elements \code{"$profMat"} and \code{"breaks"}. +#' +#' @param baseValue numeric(1) defining the value to be returned if no signal +#' was found in the corresponding bin. Defaults to 0 for backward +#' compatibility. +#' +#' @noRd .createProfileMatrix <- function(mz, int, valsPerSpect, method, step = 0.1, baselevel = NULL, basespace = NULL, @@ -236,3 +250,77 @@ useOriginalCode <- function(x) { .featureIDs <- function(x) { sprintf(paste0("FT%0", ceiling(log10(x + 1L)), "d"), 1:x) } + +#' @description Expands stretches of TRUE values in \code{x} by one on both +#' sides. +#' +#' @note The return value for a \code{NA} is always \code{FALSE}. +#' +#' @param x \code{logical} vector. +#' +#' @author Johannes Rainer +#' +#' @noRd +.grow_trues <- function(x) { + previous <- NA + x_new <- rep_len(FALSE, length(x)) + for (i in 1:length(x)) { + if (is.na(x[i])) { + previous <- NA + next + } + ## If current element is TRUE + if (x[i]) { + x_new[i] <- TRUE + ## if last element was FALSE, set last element to TRUE + if (!is.na(previous) && !previous) + x_new[i - 1] <- TRUE + } else { + ## if previous element was TRUE, set current to TRUE. + if (!is.na(previous) && previous) + x_new[i] <- TRUE + } + previous <- x[i] + } + x_new +} + +#' @title Weighted mean around maximum +#' +#' @describe Calculate a weighted mean of the values around the value with the +#' largest weight. \code{x} could e.g. be mz values and \code{w} the +#' corresponding intensity values. +#' +#' @param x \code{numeric} vector from which the weighted mean should be +#' calculated. +#' +#' @param w \code{numeric} of same length than \code{x} with the weights. +#' +#' @param i \code{integer(1)} defining the number of data points left and right +#' of the index with the largest weight that should be considered for the +#' weighted mean calculation. +#' +#' @return The weighted mean value. +#' +#' @author Johannes Rainer +#' +#' @noRd +#' +#' @examples +#' +#' mz <- c(124.0796, 124.0812, 124.0828, 124.0843, 124.0859, 124.0875, +#' 124.0890, 124.0906, 124.0922, 124.0938, 124.0953, 124.0969) +#' ints <- c(10193.8, 28438.0, 56987.6, 85107.6, 102531.6, 104262.6, +#' 89528.8, 61741.2, 33485.8, 14146.6, 5192.2, 1630.2) +#' +#' plot(mz, ints) +#' +#' ## What would be found by the max: +#' abline(v = mz[which.max(ints)], col = "grey") +#' ## What does the weighted mean around apex return: +#' abline(v = weightedMeanAroundApex(mz, ints, i = 2), col = "blue") +weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { + max_idx <- which.max(w) + seq_idx <- max(1, max_idx - i):min(length(x), max_idx + i) + weighted.mean(x[seq_idx], w[seq_idx]) +} diff --git a/R/functions-xcmsRaw.R b/R/functions-xcmsRaw.R index 3f7f7d11a..b2476294e 100644 --- a/R/functions-xcmsRaw.R +++ b/R/functions-xcmsRaw.R @@ -47,7 +47,7 @@ xcmsRaw <- function(filename, profstep = 1, profmethod = "bin", if (min(scanrange) < 1 | max(scanrange) > length(object@scantime)) { scanrange[1] <- max(1, scanrange[1]) scanrange[2] <- min(length(object@scantime), scanrange[2]) - message("Provided scanrange was adjusted to ", scanrange) + message("Provided scanrange was adjusted to ", scanrange[1]," - ", scanrange[2]) } if (!is.null(rawdata$acquisitionNum)) { ## defined only for mzData and mzXML diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R index c816c495c..61dae643d 100644 --- a/R/methods-Chromatogram.R +++ b/R/methods-Chromatogram.R @@ -1,4 +1,4 @@ -#' @include DataClasses.R functions-Chromatogram.R +#' @include DataClasses.R functions-Chromatogram.R functions-utils.R setMethod("initialize", "Chromatogram", function(.Object, ...) { classVersion(.Object)["Chromatogram"] <- "0.0.1" @@ -6,7 +6,7 @@ setMethod("initialize", "Chromatogram", function(.Object, ...) { }) -##' @rdname Chromatogram-class +#' @rdname Chromatogram-class setMethod("show", "Chromatogram", function(object) { cat("Object of class: ", class(object), "\n", sep = "") if (length(object@aggregationFun)) @@ -15,80 +15,84 @@ setMethod("show", "Chromatogram", function(object) { cat("length of object: ", length(object@rtime), "\n", sep = "") cat("from file: ", object@fromFile, "\n", sep = "") cat("mz range: [", object@mz[1], ", ", object@mz[2], "]\n", sep = "") - rtr <- range(object@rtime) - cat("rt range: [", rtr[1], ", ", rtr[2], "]\n", sep = "") + if (length(object@rtime) > 0) { + rtr <- range(object@rtime) + cat("rt range: [", rtr[1], ", ", rtr[2], "]\n", sep = "") + } }) ## Methods: ## rtime -##' @description \code{rtime} returns the retention times for the rentention time -##' - intensity pairs stored in the chromatogram. -##' -##' @param object A \code{Chromatogram} object. -##' -##' @rdname Chromatogram-class +#' @description \code{rtime} returns the retention times for the rentention time +#' - intensity pairs stored in the chromatogram. +#' +#' @param object A \code{Chromatogram} object. +#' +#' @rdname Chromatogram-class setMethod("rtime", "Chromatogram", function(object) { return(object@rtime) }) ## intensity -##' @description \code{intensity} returns the intensity for the rentention time -##' - intensity pairs stored in the chromatogram. -##' -##' @rdname Chromatogram-class +#' @description \code{intensity} returns the intensity for the rentention time +#' - intensity pairs stored in the chromatogram. +#' +#' @rdname Chromatogram-class setMethod("intensity", "Chromatogram", function(object) { return(object@intensity) }) ## mz -##' @description \code{mz} get the mz (range) of the chromatogram. The -##' function returns a \code{numeric(2)} with the lower and upper mz value. -##' -##' @param filter For \code{mz}: whether the mz range used to filter the -##' original object should be returned (\code{filter = TRUE}), or the mz range -##' calculated on the real data (\code{filter = FALSE}). -##' -##' @rdname Chromatogram-class +#' @description \code{mz} get the mz (range) of the chromatogram. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @param filter For \code{mz}: whether the mz range used to filter the +#' original object should be returned (\code{filter = TRUE}), or the mz +#' range calculated on the real data (\code{filter = FALSE}). +#' +#' @rdname Chromatogram-class setMethod("mz", "Chromatogram", function(object, filter = FALSE) { if (filter) return(object@filterMz) return(object@mz) }) -## ##' @rdname Chromatogram-class +## #' @rdname Chromatogram-class ## setReplaceMethod("mz", "CentWaveParam", function(object, value) { ## object@mzrange <- value ## if (validObject(object)) ## return(object) ## }) -##' @description \code{precursorMz} get the mz of the precursor ion. The -##' function returns a \code{numeric(2)} with the lower and upper mz value. -##' -##' @rdname Chromatogram-class +#' @description \code{precursorMz} get the mz of the precursor ion. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @rdname Chromatogram-class setMethod("precursorMz", "Chromatogram", function(object) { return(object@precursorMz) }) -##' @aliases productMz -##' @description \code{productMz} get the mz of the product chromatogram/ion. The -##' function returns a \code{numeric(2)} with the lower and upper mz value. -##' -##' @rdname Chromatogram-class +#' @aliases productMz +#' +#' @description \code{productMz} get the mz of the product chromatogram/ion. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @rdname Chromatogram-class setMethod("productMz", "Chromatogram", function(object) { return(object@productMz) }) ## aggregationFun -##' @aliases aggregationFun -##' @description \code{aggregationFun,aggregationFun<-} get or set the -##' aggregation function. -##' -##' @rdname Chromatogram-class +#' @aliases aggregationFun +#' +#' @description \code{aggregationFun,aggregationFun<-} get or set the +#' aggregation function. +#' +#' @rdname Chromatogram-class setMethod("aggregationFun", "Chromatogram", function(object) { return(object@aggregationFun) }) -## ##' @rdname Chromatogram-class +## #' @rdname Chromatogram-class ## setReplaceMethod("aggregationFun", "CentWaveParam", function(object, value) { ## object@aggregationFun <- value ## if (validObject(object)) @@ -96,62 +100,62 @@ setMethod("aggregationFun", "Chromatogram", function(object) { ## }) ## fromFile -##' @description \code{fromFile} returns the value from the \code{fromFile} slot. -##' -##' @rdname Chromatogram-class +#' @description \code{fromFile} returns the value from the \code{fromFile} slot. +#' +#' @rdname Chromatogram-class setMethod("fromFile", "Chromatogram", function(object) { return(object@fromFile) }) ## length -##' @description \code{length} returns the length (number of retention time - -##' intensity pairs) of the chromatogram. -##' -##' @param x For \code{as.data.frame} and \code{length}: a \code{Chromatogram} -##' object. -##' -##' @rdname Chromatogram-class +#' @description \code{length} returns the length (number of retention time - +#' intensity pairs) of the chromatogram. +#' +#' @param x For \code{as.data.frame} and \code{length}: a \code{Chromatogram} +#' object. +#' +#' @rdname Chromatogram-class setMethod("length", "Chromatogram", function(x) { return(length(x@rtime)) }) ## as.data.frame -##' @description \code{as.data.frame} returns the \code{rtime} and -##' \code{intensity} values from the object as \code{data.frame}. -##' -##' @rdname Chromatogram-class +#' @description \code{as.data.frame} returns the \code{rtime} and +#' \code{intensity} values from the object as \code{data.frame}. +#' +#' @rdname Chromatogram-class setMethod("as.data.frame", "Chromatogram", function(x) { return(data.frame(rtime = x@rtime, intensity = x@intensity)) }) -##' @description \code{filterRt}: filters the chromatogram based on the provided -##' retention time range. -##' -##' @param rt For \code{filterRt}: \code{numeric(2)} defining the lower and -##' upper retention time for the filtering. -##' -##' @rdname Chromatogram-class -##' -##' @examples -##' -##' ## Create a simple Chromatogram object based on random values. -##' chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), -##' rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) -##' chr -##' -##' ## Get the intensities -##' head(intensity(chr)) -##' -##' ## Get the retention time -##' head(rtime(chr)) -##' -##' ## What is the retention time range of the object? -##' range(rtime(chr)) -##' -##' ## Filter the chromatogram to keep only values between 4 and 10 seconds -##' chr2 <- filterRt(chr, rt = c(4, 10)) -##' -##' range(rtime(chr2)) +#' @description \code{filterRt}: filters the chromatogram based on the provided +#' retention time range. +#' +#' @param rt For \code{filterRt}: \code{numeric(2)} defining the lower and +#' upper retention time for the filtering. +#' +#' @rdname Chromatogram-class +#' +#' @examples +#' +#' ## Create a simple Chromatogram object based on random values. +#' chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), +#' rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) +#' chr +#' +#' ## Get the intensities +#' head(intensity(chr)) +#' +#' ## Get the retention time +#' head(rtime(chr)) +#' +#' ## What is the retention time range of the object? +#' range(rtime(chr)) +#' +#' ## Filter the chromatogram to keep only values between 4 and 10 seconds +#' chr2 <- filterRt(chr, rt = c(4, 10)) +#' +#' range(rtime(chr2)) setMethod("filterRt", "Chromatogram", function(object, rt) { if (missing(rt)) return(object) @@ -168,3 +172,42 @@ setMethod("filterRt", "Chromatogram", function(object, rt) { if (validObject(object)) object }) + +#' @description \code{clean}: \emph{cleans} a \code{Chromatogram} class by +#' removing all \code{0} and \code{NA} intensity signals (along with the +#' associates retention times). By default (if \code{all = FALSE}) \code{0} +#' values that are directly adjacent to peaks are kept too. \code{NA} +#' values are always removed. +#' +#' @param all For \code{clean}: \code{logical(1)} whether all \code{0} intensity +#' value pairs should be removed (defaults to \code{FALSE}). +#' +#' @return For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. +#' +#' @rdname Chromatogram-class +#' +#' @examples +#' +#' ## Create a simple Chromatogram object +#' +#' chr <- Chromatogram(rtime = 1:12, +#' intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) +#' +#' ## Remove 0-intensity values keeping those adjacent to peaks +#' chr <- clean(chr) +#' intensity(chr) +#' +#' ## Remove all 0-intensity values +#' chr <- clean(chr, all = TRUE) +#' intensity(chr) +setMethod("clean", signature = signature("Chromatogram"), + function(object, all = FALSE) { + if (all) + keep <- which(object@intensity > 0) + else + keep <- which(.grow_trues(object@intensity > 0)) + object@intensity <- object@intensity[keep] + object@rtime <- object@rtime[keep] + if (validObject(object)) + object + }) diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index 1d33b9f3a..ff0a8cc87 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -821,7 +821,24 @@ setMethod("adjustRtime", #' @rdname extractChromatograms-method setMethod("extractChromatograms", signature(object = "OnDiskMSnExp"), - function(object, rt, mz, aggregationFun = "sum") { - return(.extractChromatogram(x = object, rt = rt, mz = mz, - aggregationFun = aggregationFun)) + function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { + if (!missing(rt)) { + if (is.null(ncol(rt))) + rt <- matrix(range(rt), ncol = 2, nrow = 1) + } + if (!missing(mz)) { + if (is.null(ncol(mz))) + mz <- matrix(range(mz), ncol = 2, nrow = 1) + } + ## return(.extractChromatogram(x = object, rt = rt, mz = mz, + ## aggregationFun = aggregationFun)) + .extractMultipleChromatograms(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missingValue = missing) + }) + +#' @rdname extractMsData-method +setMethod("extractMsData", signature(object = "OnDiskMSnExp"), + function(object, rt, mz) { + .extractMsData(object, rt = rt, mz = mz) }) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index c371f2043..16149a3be 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -247,7 +247,7 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { #' @rdname XCMSnExp-class setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, rt = numeric(), mz = numeric(), - ppm = 10, type = "any") { + ppm = 0, type = "any") { pks <- chromPeaks(object@msFeatureData) type <- match.arg(type, c("any", "within")) ## Select peaks within rt range. @@ -263,8 +263,10 @@ setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, if (length(mz) && length(pks)) { mz <- range(mz) ## Increase mz by ppm. - mz[1] <- mz[1] - mz[1] * ppm / 1e6 - mz[2] <- mz[2] + mz[2] * ppm / 1e6 + if (is.finite(mz[1])) + mz[1] <- mz[1] - mz[1] * ppm / 1e6 + if (is.finite(mz[2])) + mz[2] <- mz[2] + mz[2] * ppm / 1e6 if (type == "within") keep <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2]) else @@ -1826,16 +1828,25 @@ setMethod("featureValues", #' \code{\link{XCMSnExp}} objects. #' #' @details Arguments \code{rt} and \code{mz} allow to specify the MS -#' data slice from which the chromatogram should be extracted. The -#' parameter \code{aggregationSum} allows to specify the function to be +#' data slice from which the chromatogram should be extracted. +#' The parameter \code{aggregationSum} allows to specify the function to be #' used to aggregate the intensities across the mz range for the same #' retention time. Setting \code{aggregationFun = "sum"} would e.g. allow #' to calculate the \emph{total ion chromatogram} (TIC), #' \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). +#' The length of the extracted \code{Chromatogram} object, i.e. the number +#' of available data points, corresponds to the number of scans/spectra +#' measured in the specified retention time range. If in a specific scan +#' (for a give retention time) no signal was measured in the specified mz +#' range, a \code{NA_real_} is reported as intensity for the retention time +#' (see Notes for more information). This can be changed using the +#' \code{missing} parameter. #' #' @note \code{Chromatogram} objects extracted with \code{extractChromatogram} -#' contain \code{NA_real_} values if, for a given retention time, no valid -#' measurement was available for the provided mz range. +#' contain \code{NA_real_} values if, for a given retention time, no +#' signal was measured in the specified mz range. If no spectrum/scan is +#' present in the defined retention time window a \code{Chromatogram} object +#' of length 0 is returned. #' #' For \code{\link{XCMSnExp}} objects, if adjusted retention times are #' available, the \code{extractChromatograms} method will by default report @@ -1847,17 +1858,17 @@ setMethod("featureValues", #' \code{\link{XCMSnExp}} object from which the chromatograms should be #' extracted. #' -#' @param rt \code{numeric(2)} defining the lower and upper boundary for the -#' retention time range. If not specified, the full retention time range of -#' the original data will be used. It is also possible to submit a -#' \code{numeric(1)} in which case \code{range} is called on it to -#' transform it to a \code{numeric(2)}. +#' @param rt \code{numeric(2)} or two-column \code{matrix} defining the lower +#' and upper boundary for the retention time range(s). If not specified, +#' the full retention time range of the original data will be used. +#' It is also possible to submit a \code{numeric(1)} in which case +#' \code{range} is called on it to transform it to a \code{numeric(2)}. #' -#' @param mz \code{numeric(2)} defining the lower and upper mz value for the -#' MS data slice. If not specified, the chromatograms will be calculated on -#' the full mz range. It is also possible to submit a \code{numeric(1)} in -#' which case \code{range} is called on it to transform it to a -#' \code{numeric(2)}. +#' @param mz \code{numeric(2)} or two-column \code{matrix} defining the lower +#' and upper mz value for the MS data slice(s). If not specified, the +#' chromatograms will be calculated on the full mz range. +#' It is also possible to submit a \code{numeric(1)} in which case +#' \code{range} is called on it to transform it to a \code{numeric(2)}. #' #' @param adjustedRtime For \code{extractChromatograms,XCMSnExp}: whether the #' adjusted (\code{adjustedRtime = TRUE}) or raw retention times @@ -1869,6 +1880,32 @@ setMethod("featureValues", #' aggregate intensity values across the mz value range for the same #' retention time. Allowed values are \code{"sum"}, \code{"max"}, #' \code{"mean"} and \code{"min"}. +#' +#' @param missing \code{numeric(1)} allowing to specify the intensity value to +#' be used if for a given retention time no signal was measured within the +#' mz range of the corresponding scan. Defaults to \code{NA_real_} (see also +#' Details and Notes sections below). Use \code{missing = 0} to resemble the +#' behaviour of the \code{getEIC} from the \code{old} user interface. +#' +#' @return If a single \code{rt} and \code{mz} range was specified, +#' \code{extractChromatograms} returns a \code{list} of +#' \code{\link{Chromatogram}} classes each element being the chromatogram +#' for one of the samples for the specified range. +#' If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing +#' a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the +#' function returns a \code{list} of \code{list}s. The outer list +#' representing results for the various ranges, the inner the result across +#' files. In other words, \code{result[[1]]} returns a \code{list} with +#' \code{Chromatogram} classes length equal to the number of files, each +#' element representing the \code{Chromatogram} for the first rt/mz range +#' for one file. +#' An empty \code{list} is returned if no MS1 data is present in +#' \code{object} or if not a single spectrum is available for any of the +#' provided retention time ranges in \code{rt}. An empty \code{Chromatogram} +#' object is returned at the correponding position in the result \code{list} +#' if for the specific file no scan/spectrum was measured in the provided +#' rt window. In all other cases, a \code{Chromatogram} with length equal +#' to the number of scans/spectra in the provided rt range is returned. #' #' @author Johannes Rainer #' @@ -1876,6 +1913,12 @@ setMethod("featureValues", #' \code{\link{Chromatogram}} for the object representing chromatographic #' data. #' +#' \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or +#' \code{list} of such objects. +#' +#' \code{\link{extractMsData}} for a method to extract the MS data as +#' \code{data.frame}. +#' #' @export #' #' @rdname extractChromatograms-method @@ -1899,13 +1942,37 @@ setMethod("featureValues", #' for(i in c(1, 3)) { #' points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") #' } +#' +#' ## Plot the chromatogram using plotChromatogram +#' plotChromatogram(chrs) +#' +#' ## Extract chromatograms for multiple ranges. +#' mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +#' rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) +#' chrs <- extractChromatograms(od, mz = mzr, rt = rtr) +#' +#' ## Plot the extracted chromatograms +#' par(mfrow = c(1, 2)) +#' plotChromatogram(chrs[[1]]) +#' plotChromatogram(chrs[[2]]) setMethod("extractChromatograms", signature(object = "XCMSnExp"), function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object), - aggregationFun = "sum") { - return(.extractChromatogram(x = object, rt = rt, mz = mz, - aggregationFun = aggregationFun, - adjusted = adjustedRtime)) + aggregationFun = "sum", missing = NA_real_) { + ## Coerce to OnDiskMSnExp. + if (adjustedRtime) + adj_rt <- rtime(object, adjusted = TRUE) + object <- as(object, "OnDiskMSnExp") + if (adjustedRtime) { + ## Replace the original rtime with adjusted ones... + object@featureData$retentionTime <- adj_rt + } + extractChromatograms(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missing = missing) + ## .extractChromatogram(x = object, rt = rt, mz = mz, + ## aggregationFun = aggregationFun, + ## adjusted = adjustedRtime) }) #' @rdname XCMSnExp-class @@ -2281,3 +2348,65 @@ setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { return(object) }) +#' @aliases extractMsData +#' +#' @title Extract a \code{data.frame} containing MS data +#' +#' @description Extract a \code{data.frame} of retention time, mz and intensity +#' values from each file/sample in the provided rt-mz range (or for the full +#' data range if \code{rt} and \code{mz} are not defined). +#' +#' @param object A \code{XCMSnExp} or \code{OnDiskMSnExp} object. +#' +#' @param rt \code{numeric(2)} with the retention time range from which the +#' data should be extracted. +#' +#' @param mz \code{numeric(2)} with the mz range. +#' +#' @param adjustedRtime (for \code{extractMsData,XCMSnExp}): \code{logical(1)} +#' specifying if adjusted or raw retention times should be reported. +#' Defaults to adjusted retention times, if these are present in +#' \code{object}. +#' +#' @return A \code{list} of length equal to the number of samples/files in +#' \code{object}. Each element being a \code{data.frame} with columns +#' \code{"rt"}, \code{"mz"} and \code{"i"} with the retention time, mz and +#' intensity tuples of a file. If no data is available for the mz-rt range +#' in a file a \code{data.frame} with 0 rows is returned for that file. +#' +#' @seealso \code{\link{XCMSnExp}} for the data object. +#' +#' @rdname extractMsData-method +#' +#' @author Johannes Rainer +#' +#' @examples +#' ## Read some files from the test data package. +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Read the full MS data for a defined mz-rt region. +#' res <- extractMsData(raw_data, mz = c(300, 320), rt = c(2700, 2900)) +#' +#' ## We've got one data.frame per file +#' length(res) +#' +#' ## With number of rows: +#' nrow(res[[1]]) +#' +#' head(res[[1]]) +setMethod("extractMsData", "XCMSnExp", + function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object)){ + ## Now, this method takes the adjusted rts, casts the object to + ## an OnDiskMSnExp, eventually replaces the rtime in the + ## featureData with the adjusted retention times (depending on + ## adjustedRtime and calls the method for OnDiskMSnExp. + if (adjustedRtime & hasAdjustedRtime(object)) { + fData(object)$retentionTime <- rtime(object, adjusted = TRUE) + } + object <- as(object, "OnDiskMSnExp") + extractMsData(object, rt = rt, mz = mz) + }) diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index 2280bc34d..c852b995d 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -2302,17 +2302,35 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { ob@tic<-object@tic ob@profparam<-list() - arr<-array(dim=c(2,max(diff(ob@scanindex)), length(ob@scanindex))) + ## Array [x, y, z] with + ## - x: mz and intensity + ## - y: spectrum (1: max measurements within one of the spectra) + ## - z: scans (1: number of spectra) + arr <- array(dim = c(2, max(diff(ob@scanindex)), length(ob@scanindex))) if(lockMass[1] == 1){ lockMass<-lockMass[3:length(lockMass)] } + + ## Remove the last lock mass if it is too close by the end + if ((lockMass[length(lockMass)] + 2) > length(ob@scanindex)) + lockMass <- lockMass[1:(length(lockMass) - 1)] + + ## If the number of lockMass values is not even splitting them into a + ## two-column matrix is not OK (causes also the first lockMass spectrum to + ## be overwritten twice. That's to get rid of the warning in issue #173. + if (length(lockMass) %% 2) + lockMass <- c(lockMass, -99) lockMass<-matrix(lockMass, ncol=2, byrow=TRUE) - if((lockMass[nrow(lockMass),2]+2) > length(ob@scanindex)){ - lockMass<-lockMass[1:(nrow(lockMass)-1),] - } + ## if((lockMass[nrow(lockMass),2]+2) > length(ob@scanindex)){ + ## lockMass<-lockMass[1:(nrow(lockMass)-1),] + ## } + ## We're looping from 1 to length - 1, thus we have to fill in the last + ## scan later. for(i in 1:(length(ob@scanindex)-1)){ - if(any(i == lockMass[,1])){ + if(any(i == lockMass[, 1])){ + ## Place mz and intensity values from the previous scan into the + ## array and fill the rest with NA. arr[1,,i] <-c(object@env$mz[(object@scanindex[(i-1)]+1):object@scanindex[i]], rep(NA, (max(diff(object@scanindex))- length((object@scanindex[(i-1)]+1):object@scanindex[i])) )) @@ -2321,7 +2339,8 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { rep(NA, (max(diff(object@scanindex)) - length((object@scanindex[(i-1)]+1):object@scanindex[i])) )) - } else if(any(i == lockMass[,2])){ + } else if(any(i == lockMass[, 2])){ + ## Place mz and intensity values from the next scan into the array. arr[1,,i] <-c(object@env$mz[(object@scanindex[i+1]+1):object@scanindex[(i+2)]], rep(NA, (max(diff(object@scanindex)) - length((object@scanindex[i+1]+1):object@scanindex[(i+2)])) )) @@ -2331,6 +2350,7 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { length((object@scanindex[i+1]+1):object@scanindex[(i+2)])) )) } else{ + ## Just fill with the actual values. arr[1,,i] <-c(object@env$mz[(object@scanindex[i]+1):object@scanindex[i+1]], rep(NA, (max(diff(object@scanindex))- length((object@scanindex[i]+1):object@scanindex[i+1])) )) @@ -2352,6 +2372,12 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { ob@scanindex[i]<-as.integer(length(na.omit(arr[1,,(i-1)]))+ob@scanindex[(i-1)]) } } + ## Fix for #173: fill also values for the last scan. + last_i <- length(ob@scanindex) + fetch_idx <- (object@scanindex[last_i] + 1):length(object@env$mz) + put_idx <- 1:length(fetch_idx) + arr[1, put_idx, length(ob@scanindex)] <- object@env$mz[fetch_idx] + arr[2, put_idx, length(ob@scanindex)] <- object@env$intensity[fetch_idx] NAidx<-is.na(arr[1,,]) ob@env$mz<-as.numeric(arr[1,,][!NAidx]) diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 9d69e920e..5082d47a4 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -453,7 +453,13 @@ setMethod("retcor", "xcmsSet", function(object, method=getOption("BioC")$xcms$re ...) { ## Backward compatibility for old "methods" if (method == "linear" || method == "loess") { - return(invisible(do.call(retcor.peakgroups, alist(object, smooth=method, ...)))) + args <- list(...) + if (any(names(args) == "smooth")) + warning("Provided argument 'smooth' will be replaced with the ", + "value of 'method', i.e. with ", method) + args$smooth <- method + ## Overwriting eventually provided smooth parameter. + return(invisible(do.call(retcor.peakgroups, c(list(object), args)))) } method <- match.arg(method, getOption("BioC")$xcms$retcor.methods) @@ -731,8 +737,10 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = screen(2) par(mar = c(5.1, 4.1, 0, 2), yaxt = "n") - allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")] - corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2], na.rm = TRUE)[c("x","y")] + allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, + from = rtrange[1], to = rtrange[2])[c("x","y")] + corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], + to = rtrange[2], na.rm = TRUE)[c("x","y")] allden$y <- allden$y / sum(allden$y) corden$y <- corden$y / sum(corden$y) maxden <- max(allden$y, corden$y) @@ -1069,7 +1077,8 @@ setMethod("plotrt", "xcmsSet", function(object, col = NULL, ty = NULL, leg = TRU screen(2) par(mar = c(5.1, 4.1, 0, 2), yaxt = "n") - allden <- density(object@peaks[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")] + allden <- density(object@peaks[,"rt"], bw = diff(rtrange)/200, + from = rtrange[1], to = rtrange[2])[c("x","y")] plot(allden, xlim = rtrange, type = "l", main = "", xlab = "Retention Time", ylab = "Peak Density") abline(h = 0, col = "grey") close.screen(all.screens = TRUE) diff --git a/inst/NEWS b/inst/NEWS index 498b193d6..0380de3a0 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,46 @@ +CHANGES IN VERSION 2.99.1 +------------------------- + +NEW FEATURES: +- extractMsData to extract raw MS data as a data.frame (issue #120). + +BUG FIXES: +- issue #175: an error is now thrown if no peak group was identified for peak + group retention time correction. +- issue #178: scanrange was collapsed when the adjusted range was reported + (pull request by Jan Stanstrup). +- issue #180: error when both parameters method and smooth are provided in the + retcor method. + + +CHANGES IN VERSION 2.99.0 +------------------------- + +NEW FEATURES: +- plotChromatogram and highlightChromPeaks functions. +- plotChromPeakDensity function. +- clean method for Chromatogram classes. + +USER VISIBLE CHANGES: +- Change default for ppm parameter in chromPeaks method to 0. +- extractChromatograms supports extraction of multiple rt and mz ranges. +- New parameter missing for extractChromatograms allowing to specify the + intensity value to be used for rts for which no signal is available within + the mz range. +- extractChromatograms returns Chromatograms of length equal to the number of + scans within the specified rt range, even if no signals are measured + (intensity values are NA). + + +CHANGES IN VERSION 1.53.1 +-------------------------- + +BUG FIXES: +- Increase parameter n for the density call in the peak density correspondence + method. This enables to separate neighboring peaks using small n (issue #161). + Thanks to Jan Stanstrup. + + CHANGES IN VERSION 1.51.11 -------------------------- diff --git a/inst/unitTests/runit.Chromatogram.R b/inst/unitTests/runit.Chromatogram.R index c01a16265..53861f2d1 100644 --- a/inst/unitTests/runit.Chromatogram.R +++ b/inst/unitTests/runit.Chromatogram.R @@ -52,11 +52,11 @@ test_Chromatogram_class <- function() { ch <- xcms:::Chromatogram(mz = c(1, 3)) checkEquals(ch@mz, c(1, 3)) checkEquals(mz(ch), c(1, 3)) - checkEquals(mz(ch, filter = TRUE), c(0, 0)) + checkEquals(mz(ch, filter = TRUE), c(NA_real_, NA_real_)) ch <- xcms:::Chromatogram(filterMz = c(1, 3)) checkEquals(ch@filterMz, c(1, 3)) checkEquals(mz(ch, filter = TRUE), c(1, 3)) - checkEquals(mz(ch, filter = FALSE), c(0, 0)) + checkEquals(mz(ch, filter = FALSE), c(NA_real_, NA_real_)) ch <- xcms:::Chromatogram(precursorMz = 123) checkEquals(ch@precursorMz, c(123, 123)) checkEquals(precursorMz(ch), c(123, 123)) @@ -90,6 +90,7 @@ test_extractChromatograms <- function() { ## OnDiskMSnExp ## TIC chrs <- extractChromatograms(filterFile(od_x, file = 2)) + plotChromatogram(chrs) spctr <- spectra(filterFile(od_x, file = 2)) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) @@ -114,6 +115,10 @@ test_extractChromatograms <- function() { aggregationFun = "max") ints <- unlist(lapply(spctr, function(z) return(max(intensity(z))))) + chrs_2 <- xcms:::.extractMultipleChromatograms(filterFile(xod_x, file = 2), + aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), intensity(chrs_2[[1]])) + checkEquals(intensity(chrs[[1]]), ints) checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) ## with adjusted retention times. @@ -126,7 +131,15 @@ test_extractChromatograms <- function() { aggregationFun = "max") checkEquals(intensity(chrs[[1]]), ints) checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) - + ## Subset to certain mz range in all files. + chrs_adj <- extractChromatograms(xod_xgr, mz = c(300, 330)) + chrs_raw <- extractChromatograms(xod_x, mz = c(300, 330)) + checkTrue(sum(rtime(chrs_adj[[1]]) != rtime(chrs_raw[[1]])) > + length(chrs_raw[[1]]) / 2) + checkEquals(rtime(chrs_adj[[1]]), rtime(xod_xgr, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs_adj[[2]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs_adj[[3]]), rtime(xod_xgr, bySample = TRUE)[[3]]) + ## Now subsetting for mz: tmp <- filterFile(od_x, file = 2) chrs <- extractChromatograms(tmp, mz = c(300, 400)) @@ -219,8 +232,85 @@ test_extractChromatograms <- function() { ## What if we're completely off? chrs <- extractChromatograms(od_x, rt = c(5000, 5500)) checkTrue(length(chrs) == 0) + ## Now rt is within range, but mz is completely off. We expect Chromatograms + ## with same length than there are spectra in the rt range, but all NA + ## values. chrs <- extractChromatograms(od_x, rt = c(2600, 2700), mz = 12000) - checkTrue(length(chrs) == 0) + rts <- split(rtime(od_x), f = fromFile(od_x)) + rts <- lapply(rts, function(z) z[z >= 2600 & z <= 2700]) + checkEquals(lengths(chrs), lengths(chrs)) + ## All have to be NA. + checkTrue(all(unlist(lapply(chrs, function(z) is.na(intensity(z)))))) + + ## Multiple ranges. + rtr <- matrix(c(2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + + checkTrue(all(rtime(chrs[[1]][[1]]) >= 2700 & rtime(chrs[[1]][[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[1]][[2]]) >= 2700 & rtime(chrs[[1]][[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[1]][[3]]) >= 2700 & rtime(chrs[[1]][[3]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]][[1]]) >= 2600 & rtime(chrs[[2]][[1]]) <= 2800)) + checkTrue(all(rtime(chrs[[2]][[2]]) >= 2600 & rtime(chrs[[2]][[2]]) <= 2800)) + checkTrue(all(rtime(chrs[[2]][[3]]) >= 2600 & rtime(chrs[[2]][[3]]) <= 2800)) + spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[1, ]), + mz = mzr[1, ])) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]][[1]])) + checkEquals(ints[[2]], intensity(chrs[[1]][[2]])) + checkEquals(ints[[3]], intensity(chrs[[1]][[3]])) + spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[2, ]), + mz = mzr[2, ])) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[2]][[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]][[2]])) + checkEquals(ints[[3]], intensity(chrs[[2]][[3]])) + + ## Multiple ranges with complete off ranges. + rtr <- matrix(c(2700, 2900, 5000, 5500, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 500, 500, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + checkTrue(length(chrs) == 3) + checkTrue(all(lengths(chrs[[2]]) == 0)) + + rtr <- matrix(c(2700, 2900, 2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 100000, 100000, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + checkTrue(length(chrs) == 3) + ## All values in the 2nd Chromosome object have to be NA. + checkTrue(all(unlist(lapply(chrs[[2]], function(z) is.na(intensity(z)))))) +} + +test_clean_chromatogram <- function() { + chr <- Chromatogram( + rtime = 1:12, + intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(2, 3, 4, 6, 7, 8, 9,10)) + + chr_clnd <- clean(chr, all = TRUE) + checkTrue(length(chr_clnd) == 4) + checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) + + ## With NA + chr <- Chromatogram( + rtime = 1:12, + intensity = c(0, NA, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(3, 4, 6, 7, 8, 9, 10)) + chr <- Chromatogram( + rtime = 1:12, + intensity = c(NA, NA, 20, NA, NA, NA, 123, 124343, 3432, NA, NA, NA)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) } dontrun_test_with_MRM <- function() { diff --git a/inst/unitTests/runit.XCMSnExp.R b/inst/unitTests/runit.XCMSnExp.R index e9e1fe1cb..2c09bb369 100644 --- a/inst/unitTests/runit.XCMSnExp.R +++ b/inst/unitTests/runit.XCMSnExp.R @@ -1088,8 +1088,8 @@ test_signal_integration <- function() { ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) } - pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) - checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) ## Now for matchedfilter. tmp <- findChromPeaks(filterFile(od_x, 2), param = MatchedFilterParam()) @@ -1111,8 +1111,8 @@ test_signal_integration <- function() { ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) } - pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) - checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) ## ## matchedFilter with wide mz bins. ## ## For matchedFilter I will have to do this on the profile matrix! @@ -1162,6 +1162,69 @@ test_adjustRtimePeakGroups <- function() { checkTrue(max(isNa) == 1) } +test_extractMsData <- function() { + ## All the data + ## all <- extractMsData(od_x) + ## checkEquals(length(all), length(fileNames(od_x))) + ## rts <- split(rtime(od_x), f = fromFile(od_x)) + ## checkEquals(lengths(rts), unlist(lapply(all, nrow))) + ## On an OnDiskMSnExp with only mz + mzr <- c(300, 302) + res <- extractMsData(od_x, mz = mzr) + checkEquals(length(res), length(fileNames(od_x))) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + ## On an OnDiskMSnExp with only rt + rtr <- c(2500, 2800) + res <- extractMsData(od_x, rt = rtr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + ## LLLLL TODO Continue here, and then add example to the extractMsData + ## help page. + ## On an OnDiskMSnExp with mz and rt + res <- extractMsData(od_x, rt = rtr, mz = mzr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + + ## XCMSnExp, xod_xgr + ## with adjusted retention times + res <- extractMsData(xod_xgr, rt = rtr, mz = mzr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + ## without adjusted retention times + res_2 <- extractMsData(xod_xgr, adjustedRtime = FALSE, rt = rtr, mz = mzr) + checkTrue(all(res_2[[1]][, "rt"] >= rtr[1] & res_2[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[2]][, "rt"] >= rtr[1] & res_2[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[3]][, "rt"] >= rtr[1] & res_2[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[1]][, "mz"] >= mzr[1] & res_2[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res_2[[2]][, "mz"] >= mzr[1] & res_2[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res_2[[3]][, "mz"] >= mzr[1] & res_2[[3]][, "mz"] <= mzr[2])) + checkTrue(nrow(res[[1]]) != nrow(res_2[[1]])) + checkTrue(nrow(res[[2]]) != nrow(res_2[[2]])) + checkTrue(nrow(res[[3]]) != nrow(res_2[[3]])) + + ## rt and mzr out of range. + res <- extractMsData(od_x, rt = c(6000, 6300), mz = c(0, 3)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) + res <- extractMsData(od_x, rt = c(6000, 6300)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) + res <- extractMsData(od_x, mz = c(0, 3)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) +} + ############################################################ ## Test getEIC alternatives. dontrun_getEIC_alternatives <- function() { @@ -1175,44 +1238,44 @@ dontrun_getEIC_alternatives <- function() { cwp <- CentWaveParam(noise = 10000, snthresh = 40) od_x <- findChromPeaks(od, param = cwp) - ## with this one we get 3 spectras back, one in each file. - rtr <- c(2787, 2788) - res <- filterRt(od_x, rt = rtr) - - ## ----------- - ## That's to test .extractChromatogram - mzr <- c(279, 279) - chrs <- extractChromatograms(od_x, mzrange = mzr) - ## input parameter - x <- od_x - rm(rtrange) - rm(mzrange) - mzrange <- mzr - aggregationFun <- "sum" - ## function call - ## ----------- + ## ## with this one we get 3 spectras back, one in each file. + ## rtr <- c(2787, 2788) + ## res <- filterRt(od_x, rt = rtr) + + ## ## ----------- + ## ## That's to test .extractChromatogram + ## mzr <- c(279, 279) + ## chrs <- extractChromatograms(od_x, mzrange = mzr) + ## ## input parameter + ## x <- od_x + ## rm(rtrange) + ## rm(mzrange) + ## mzrange <- mzr + ## aggregationFun <- "sum" + ## ## function call + ## ## ----------- - od_xg <- groupChromPeaks(od_x, param = PeakDensityParam()) - od_xgr <- adjustRtime(od_xg, param = PeakGroupsParam(span = 0.4)) - - rtr <- as.matrix(featureDefinitions(od_xg)[1:5, c("rtmin", "rtmax")]) - mzr <- as.matrix(featureDefinitions(od_xg)[1:5, c("mzmin", "mzmax")]) - - system.time( - res1 <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) - ) - system.time( - res2 <- xcms:::.extractMsData(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) - ) - system.time( - res1 <- xcms:::.sliceApply(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) - ) - system.time( - res1 <- xcms:::.sliceApply(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) - ) + ## od_xg <- groupChromPeaks(od_x, param = PeakDensityParam()) + ## od_xgr <- adjustRtime(od_xg, param = PeakGroupsParam(span = 0.4)) + + ## rtr <- as.matrix(featureDefinitions(od_xg)[1:5, c("rtmin", "rtmax")]) + ## mzr <- as.matrix(featureDefinitions(od_xg)[1:5, c("mzmin", "mzmax")]) + + ## system.time( + ## res1 <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res2 <- xcms:::.extractMsData(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res1 <- xcms:::.sliceApply(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res1 <- xcms:::.sliceApply(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) - library(profvis) - profvis(res <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ])) + ## library(profvis) + ## profvis(res <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ])) ## Compare with getEIC @@ -1227,11 +1290,38 @@ dontrun_getEIC_alternatives <- function() { rtr <- groups(xs_2)[1:5, c("rtmin", "rtmax")] mzr <- groups(xs_2)[1:5, c("mzmin", "mzmax")] + ## + + register(SerialParam()) + od <- as(od_x, "OnDiskMSnExp") + ## Get all of em. + chrs <- xcms:::.extractMultipleChromatograms(od, rt = rtr, mz = mzr) + for (i in 1:nrow(rtr)) { + chrs1 <- extractChromatograms(od_x, rt = rtr[i, ], mz = mzr[i, ]) + checkEquals(unname(chrs1), unname(chrs[[i]])) + } + + library(microbenchmark) + microbenchmark(xcms:::.extractChromatogram(od, rt = rtr[1, ], mz = mzr[1, ]), + xcms:::.extractMultipleChromatograms(od, rt = rtr[1, , drop = FALSE], + mz = mzr[1, , drop = FALSE]), + times = 10) + + library(profvis) + profvis(xcms:::.extractMultipleChromatograms(od, rt = rtr[1, , drop = FALSE], + mz = mzr[1, , drop = FALSE])) + ## Extract the EIC: system.time( eic <- getEIC(xs_2, rtrange = rtr, mzrange = mzr, rt = "raw") - ) ## 3.7sec + ) ## 5.5 sec + system.time( + eic2 <- xcms:::.extractMultipleChromatograms(od, rt = rtr, mz = mzr) + ) ## 0.13 sec + + + ## Now try to do the same using MSnbase stuff. system.time( res <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) @@ -1269,9 +1359,21 @@ dontrun_getEIC_alternatives <- function() { ############################################################ ## Alternative: do it by file. + ## IF it's an XCMSnExp: coerce to OnDiskMSnExp by replacing the rtime with + ## the adjusted rtime. ## 1) Subset the od selecting all spectra that fall into the rt ranges. - ## 2) Work on that subsetted od: load into memory. - ## 3) loop over the rtrange and mzrange. + ## keep_logical <- have_rt >= rt[1] & have_rt <= rt[2] + ## tmp <- as(object, "OnDiskMSnExp")[base::which(keep_logical)] + ## 2) Call a spectrapply, passing the matrix of rts and mzs. + ## (Load the spectra (without any filtering now).) + ## 3) spectrapply function loops over the rtrange and mzrange: + ## - select all spectra that are within the range. + ## - lapply on those, apply filterMz with the current mz range. + ## - return a list of Chromatogram classes. + ## 4) We get a list of list of Chromatogram objects. [[files]][[ranges]]. + ## Rearrange the lists: [[ranges]][[files]]. + ## Could also put that into a DataFrame... [ranges, files] + ## For a single one: @@ -1281,6 +1383,7 @@ dontrun_getEIC_alternatives <- function() { dfs <- spectrapply(tmp, as.data.frame) ) + ## mz outside: mzrange <- c(600, 601) tmp <- filterMz(filterRt(od, rt = rtrange), mz = mzrange) diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave.R b/inst/unitTests/runit.do_findChromPeaks_centWave.R index ec08e68de..41724742f 100644 --- a/inst/unitTests/runit.do_findChromPeaks_centWave.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave.R @@ -21,15 +21,15 @@ test_findChromPeaks_centWave_peakIntensity <- function() { raw <- readMSData2(fl) options(originalCentWave = TRUE) tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) - ## Use the getPeakInt2 which uses the rawMat function. - pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) - ## Use the getPeakInt3 which uses the getEIC C function. - pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) - ## These fail for the original centWave code. - checkTrue(sum(pkI2 != chromPeaks(tmp)[, "into"]) > length(pkI2) / 2) - ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) - ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) - checkEquals(pkI2, pkI3) + ## ## Use the getPeakInt2 which uses the rawMat function. + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## ## These fail for the original centWave code. + ## checkTrue(sum(pkI2 != chromPeaks(tmp)[, "into"]) > length(pkI2) / 2) + ## ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + ## ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + ## checkEquals(pkI2, pkI3) ## Try with new implementation. options(originalCentWave = FALSE) tmp2 <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) @@ -51,25 +51,25 @@ test_findChromPeaks_centWave_peakIntensity <- function() { plot(cp2[, "rtmin"], chromPeaks(tmp)[, "rtmin"]) ## Very similar plot(cp2[, "rtmax"], chromPeaks(tmp)[, "rtmax"]) ## Very similar ## Use the getPeakInt3 which uses the getEIC C function. - pkI2_2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) - pkI3_2 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) - ## These fail for the original centWave code. - checkEquals(unname(pkI2_2), unname(chromPeaks(tmp2)[, "into"])) - checkEquals(unname(pkI3_2), unname(chromPeaks(tmp2)[, "into"])) - checkEquals(pkI2_2, pkI3_2) + ## pkI2_2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## pkI3_2 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## ## These fail for the original centWave code. + ## checkEquals(unname(pkI2_2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(unname(pkI3_2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(pkI2_2, pkI3_2) ## The same for one of the test files; this works even with the original ## centWave code options(originalCentWave = TRUE) tmp <- filterFile(xod_xgrg, file = 3) - ## Use the getPeakInt2 which uses the rawMat function. - pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) - ## Use the getPeakInt3 which uses the getEIC C function. - pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) - checkEquals(pkI2, pkI3) - checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) - checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + ## ## Use the getPeakInt2 which uses the rawMat function. + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## checkEquals(pkI2, pkI3) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) ## New modified centWave. options(originalCentWave = FALSE) tmp2 <- findChromPeaks(filterFile(faahko_od, file = 3), @@ -77,12 +77,12 @@ test_findChromPeaks_centWave_peakIntensity <- function() { ## Even the identified peaks are identical! checkEquals(chromPeaks(tmp), chromPeaks(tmp2)) ## Use the getPeakInt2 which uses the rawMat function. - pkI2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) - ## Use the getPeakInt3 which uses the getEIC C function. - pkI3 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) - checkEquals(pkI2, pkI3) - checkEquals(unname(pkI2), unname(chromPeaks(tmp2)[, "into"])) - checkEquals(unname(pkI3), unname(chromPeaks(tmp2)[, "into"])) + ## pkI2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## checkEquals(pkI2, pkI3) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(unname(pkI3), unname(chromPeaks(tmp2)[, "into"])) options(originalCentWave = TRUE) } diff --git a/inst/unitTests/runit.fillChromPeaks.R b/inst/unitTests/runit.fillChromPeaks.R index 0a9527d28..97e7f3023 100644 --- a/inst/unitTests/runit.fillChromPeaks.R +++ b/inst/unitTests/runit.fillChromPeaks.R @@ -73,7 +73,7 @@ test_fillChromPeaks <- function() { ## Get the intensities for the first one. pkArea <- apply(tmp, median, MARGIN = 2) chr <- extractChromatograms(res, rt = pkArea[1:2], mz = pkArea[3:4]) - checkTrue(length(chr) == 0) + checkTrue(all(unlist(lapply(chr, function(z) is.na(intensity(z)))))) ## Get also the spectra: spctr <- spectra(filterRt(filterFile(xod_xg, file = 1), rt = pkArea[1:2])) mzs <- unlist(lapply(spctr, mz)) diff --git a/inst/unitTests/runit.functions-XCMSnExp.R b/inst/unitTests/runit.functions-XCMSnExp.R new file mode 100644 index 000000000..a152a9de8 --- /dev/null +++ b/inst/unitTests/runit.functions-XCMSnExp.R @@ -0,0 +1,12 @@ +## Unit tests for functions in functions-XCMSnExp.R + +test_plotChromPeakDensity <- function() { + mzr <- c(305.05, 305.15) + plotChromPeakDensity(xod_x, mz = mzr) + + ## Use the full range. + plotChromPeakDensity(xod_x) + + plotChromPeakDensity(xod_x, mz = c(0, 1)) + plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, xlim = c(2500, 4000)) +} diff --git a/inst/unitTests/runit.functions-utils.R b/inst/unitTests/runit.functions-utils.R new file mode 100644 index 000000000..9f4184e99 --- /dev/null +++ b/inst/unitTests/runit.functions-utils.R @@ -0,0 +1,53 @@ +library(xcms) +library(RUnit) +## Test the .grow_trues +test_grow_trues <- function() { + ## Compare performance with MSnbase:::utils.clean + Test <- c(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, + 1, 0) + Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, + FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + TRUE, TRUE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0) + Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, + TRUE, FALSE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 1, NA, 0, 0, 1) + Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, NA, 1, 0, 0, 1, 0, 0) + Expect <- c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 1, 0, 0, NA, 0, 1) + Expect <- c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(NA, 1, NA, NA, NA, NA, 1) + Expect <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) +} + +benchmark_grow_trues <- function() { + set.seed(123) + Test <- rnorm(n = 30000) + Test[Test < 0] <- 0 + Test2 <- Test > 0 + res_1 <- MSnbase:::utils.clean(Test) + res_2 <- .clean(Test2) + + Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0) + Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE) + res_1 <- MSnbase:::utils.clean(Test) + +} diff --git a/man/Chromatogram-class.Rd b/man/Chromatogram-class.Rd index e3a8db9dd..51ce6a657 100644 --- a/man/Chromatogram-class.Rd +++ b/man/Chromatogram-class.Rd @@ -18,11 +18,12 @@ \alias{length,Chromatogram-method} \alias{as.data.frame,Chromatogram-method} \alias{filterRt,Chromatogram-method} +\alias{clean,Chromatogram-method} \title{Representation of chromatographic MS data} \usage{ -Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(0, 0), - filterMz = c(0, 0), precursorMz = c(NA_real_, NA_real_), - productMz = c(NA_real_, NA_real_), fromFile = integer(), +Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(NA_real_, + NA_real_), filterMz = c(NA_real_, NA_real_), precursorMz = c(NA_real_, + NA_real_), productMz = c(NA_real_, NA_real_), fromFile = integer(), aggregationFun = character()) \S4method{show}{Chromatogram}(object) @@ -46,6 +47,8 @@ Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(0, 0), \S4method{as.data.frame}{Chromatogram}(x) \S4method{filterRt}{Chromatogram}(object, rt) + +\S4method{clean}{Chromatogram}(object, all = FALSE) } \arguments{ \item{rtime}{\code{numeric} with the retention times (length has to be equal @@ -74,21 +77,27 @@ Represents the mz of the product. See details for more information.} chromatogram was extracted.} \item{aggregationFun}{\code{character} string specifying the function that -was used to aggregate intensity values for the same retention time across the -mz range. Supported are \code{"sum"} (total ion chromatogram), \code{"max"} -(base peak chromatogram), \code{"min"} and \code{"mean"}.} +was used to aggregate intensity values for the same retention time across +the mz range. Supported are \code{"sum"} (total ion chromatogram), +\code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}.} \item{object}{A \code{Chromatogram} object.} \item{filter}{For \code{mz}: whether the mz range used to filter the -original object should be returned (\code{filter = TRUE}), or the mz range -calculated on the real data (\code{filter = FALSE}).} +original object should be returned (\code{filter = TRUE}), or the mz +range calculated on the real data (\code{filter = FALSE}).} \item{x}{For \code{as.data.frame} and \code{length}: a \code{Chromatogram} object.} \item{rt}{For \code{filterRt}: \code{numeric(2)} defining the lower and upper retention time for the filtering.} + +\item{all}{For \code{clean}: \code{logical(1)} whether all \code{0} intensity +value pairs should be removed (defaults to \code{FALSE}).} +} +\value{ +For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. } \description{ The \code{Chromatogram} class is designed to store @@ -100,36 +109,42 @@ The \code{Chromatogram} class is designed to store \code{\link{extractChromatograms}}). \code{Chromatogram}: create an instance of the -\code{Chromatogram} class. + \code{Chromatogram} class. \code{rtime} returns the retention times for the rentention time -- intensity pairs stored in the chromatogram. + - intensity pairs stored in the chromatogram. \code{intensity} returns the intensity for the rentention time -- intensity pairs stored in the chromatogram. + - intensity pairs stored in the chromatogram. \code{mz} get the mz (range) of the chromatogram. The -function returns a \code{numeric(2)} with the lower and upper mz value. + function returns a \code{numeric(2)} with the lower and upper mz value. \code{precursorMz} get the mz of the precursor ion. The -function returns a \code{numeric(2)} with the lower and upper mz value. + function returns a \code{numeric(2)} with the lower and upper mz value. \code{productMz} get the mz of the product chromatogram/ion. The -function returns a \code{numeric(2)} with the lower and upper mz value. + function returns a \code{numeric(2)} with the lower and upper mz value. \code{aggregationFun,aggregationFun<-} get or set the -aggregation function. + aggregation function. \code{fromFile} returns the value from the \code{fromFile} slot. \code{length} returns the length (number of retention time - -intensity pairs) of the chromatogram. + intensity pairs) of the chromatogram. \code{as.data.frame} returns the \code{rtime} and -\code{intensity} values from the object as \code{data.frame}. + \code{intensity} values from the object as \code{data.frame}. \code{filterRt}: filters the chromatogram based on the provided -retention time range. + retention time range. + +\code{clean}: \emph{cleans} a \code{Chromatogram} class by + removing all \code{0} and \code{NA} intensity signals (along with the + associates retention times). By default (if \code{all = FALSE}) \code{0} + values that are directly adjacent to peaks are kept too. \code{NA} + values are always removed. } \details{ The \code{mz}, \code{filterMz}, \code{precursorMz} and @@ -170,11 +185,26 @@ range(rtime(chr)) chr2 <- filterRt(chr, rt = c(4, 10)) range(rtime(chr2)) + +## Create a simple Chromatogram object + +chr <- Chromatogram(rtime = 1:12, + intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + +## Remove 0-intensity values keeping those adjacent to peaks +chr <- clean(chr) +intensity(chr) + +## Remove all 0-intensity values +chr <- clean(chr, all = TRUE) +intensity(chr) } \seealso{ \code{\link{extractChromatograms}} for the method to extract \code{Chromatogram} objects from \code{\link{XCMSnExp}} or \code{\link[MSnbase]{OnDiskMSnExp}} objects. + + \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. } \author{ Johannes Rainer diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 7cdff0bfb..2f5caf7aa 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -109,7 +109,7 @@ processHistoryTypes() \S4method{featureDefinitions}{XCMSnExp}(object) <- value \S4method{chromPeaks}{XCMSnExp}(object, bySample = FALSE, rt = numeric(), - mz = numeric(), ppm = 10, type = "any") + mz = numeric(), ppm = 0, type = "any") \S4method{chromPeaks}{XCMSnExp}(object) <- value @@ -545,13 +545,21 @@ head(peaks(xs)) \code{\linkS4class{xcmsSet}} for the old implementation. \code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. + \code{\link{findChromPeaks}} for available peak detection methods returning a \code{XCMSnExp} object as a result. + \code{\link{groupChromPeaks}} for available peak grouping methods and \code{\link{featureDefinitions}} for the method to extract the feature definitions representing the peak grouping results. \code{\link{adjustRtime}} for retention time adjustment methods. + \code{\link{extractChromatograms}} to extract MS data as + \code{\link{Chromatogram}} objects. + + \code{\link{extractMsData}} for the method to extract MS data as + \code{data.frame}s. + \code{\link{fillChromPeaks}} for the method to fill-in eventually missing chromatographic peaks for a feature in some samples. } diff --git a/man/extractChromatograms-method.Rd b/man/extractChromatograms-method.Rd index e7cc1f047..a95568246 100644 --- a/man/extractChromatograms-method.Rd +++ b/man/extractChromatograms-method.Rd @@ -8,39 +8,67 @@ \title{Extracting chromatograms} \usage{ \S4method{extractChromatograms}{OnDiskMSnExp}(object, rt, mz, - aggregationFun = "sum") + aggregationFun = "sum", missing = NA_real_) \S4method{extractChromatograms}{XCMSnExp}(object, rt, mz, - adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum") + adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum", + missing = NA_real_) } \arguments{ \item{object}{Either a \code{\link[MSnbase]{OnDiskMSnExp}} or \code{\link{XCMSnExp}} object from which the chromatograms should be extracted.} -\item{rt}{\code{numeric(2)} defining the lower and upper boundary for the -retention time range. If not specified, the full retention time range of -the original data will be used. It is also possible to submit a -\code{numeric(1)} in which case \code{range} is called on it to -transform it to a \code{numeric(2)}.} +\item{rt}{\code{numeric(2)} or two-column \code{matrix} defining the lower +and upper boundary for the retention time range(s). If not specified, +the full retention time range of the original data will be used. +It is also possible to submit a \code{numeric(1)} in which case +\code{range} is called on it to transform it to a \code{numeric(2)}.} -\item{mz}{\code{numeric(2)} defining the lower and upper mz value for the -MS data slice. If not specified, the chromatograms will be calculated on -the full mz range. It is also possible to submit a \code{numeric(1)} in -which case \code{range} is called on it to transform it to a -\code{numeric(2)}.} +\item{mz}{\code{numeric(2)} or two-column \code{matrix} defining the lower +and upper mz value for the MS data slice(s). If not specified, the +chromatograms will be calculated on the full mz range. +It is also possible to submit a \code{numeric(1)} in which case +\code{range} is called on it to transform it to a \code{numeric(2)}.} \item{aggregationFun}{\code{character} specifying the function to be used to aggregate intensity values across the mz value range for the same retention time. Allowed values are \code{"sum"}, \code{"max"}, \code{"mean"} and \code{"min"}.} +\item{missing}{\code{numeric(1)} allowing to specify the intensity value to +be used if for a given retention time no signal was measured within the +mz range of the corresponding scan. Defaults to \code{NA_real_} (see also +Details and Notes sections below). Use \code{missing = 0} to resemble the +behaviour of the \code{getEIC} from the \code{old} user interface.} + \item{adjustedRtime}{For \code{extractChromatograms,XCMSnExp}: whether the adjusted (\code{adjustedRtime = TRUE}) or raw retention times (\code{adjustedRtime = FALSE}) should be used for filtering and returned in the resulting \code{\link{Chromatogram}} object. Adjusted retention times are used by default if available.} } +\value{ +If a single \code{rt} and \code{mz} range was specified, + \code{extractChromatograms} returns a \code{list} of + \code{\link{Chromatogram}} classes each element being the chromatogram + for one of the samples for the specified range. + If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing + a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the + function returns a \code{list} of \code{list}s. The outer list + representing results for the various ranges, the inner the result across + files. In other words, \code{result[[1]]} returns a \code{list} with + \code{Chromatogram} classes length equal to the number of files, each + element representing the \code{Chromatogram} for the first rt/mz range + for one file. + An empty \code{list} is returned if no MS1 data is present in + \code{object} or if not a single spectrum is available for any of the + provided retention time ranges in \code{rt}. An empty \code{Chromatogram} + object is returned at the correponding position in the result \code{list} + if for the specific file no scan/spectrum was measured in the provided + rt window. In all other cases, a \code{Chromatogram} with length equal + to the number of scans/spectra in the provided rt range is returned. +} \description{ \code{extractChromatograms}: the method allows to extract chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and @@ -48,17 +76,26 @@ times are used by default if available.} } \details{ Arguments \code{rt} and \code{mz} allow to specify the MS - data slice from which the chromatogram should be extracted. The - parameter \code{aggregationSum} allows to specify the function to be + data slice from which the chromatogram should be extracted. + The parameter \code{aggregationSum} allows to specify the function to be used to aggregate the intensities across the mz range for the same retention time. Setting \code{aggregationFun = "sum"} would e.g. allow to calculate the \emph{total ion chromatogram} (TIC), \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). + The length of the extracted \code{Chromatogram} object, i.e. the number + of available data points, corresponds to the number of scans/spectra + measured in the specified retention time range. If in a specific scan + (for a give retention time) no signal was measured in the specified mz + range, a \code{NA_real_} is reported as intensity for the retention time + (see Notes for more information). This can be changed using the + \code{missing} parameter. } \note{ \code{Chromatogram} objects extracted with \code{extractChromatogram} - contain \code{NA_real_} values if, for a given retention time, no valid - measurement was available for the provided mz range. + contain \code{NA_real_} values if, for a given retention time, no + signal was measured in the specified mz range. If no spectrum/scan is + present in the defined retention time window a \code{Chromatogram} object + of length 0 is returned. For \code{\link{XCMSnExp}} objects, if adjusted retention times are available, the \code{extractChromatograms} method will by default report @@ -85,11 +122,30 @@ plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", for(i in c(1, 3)) { points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") } + +## Plot the chromatogram using plotChromatogram +plotChromatogram(chrs) + +## Extract chromatograms for multiple ranges. +mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) +chrs <- extractChromatograms(od, mz = mzr, rt = rtr) + +## Plot the extracted chromatograms +par(mfrow = c(1, 2)) +plotChromatogram(chrs[[1]]) +plotChromatogram(chrs[[2]]) } \seealso{ \code{\link{XCMSnExp}} for the data object. \code{\link{Chromatogram}} for the object representing chromatographic data. + + \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or + \code{list} of such objects. + + \code{\link{extractMsData}} for a method to extract the MS data as + \code{data.frame}. } \author{ Johannes Rainer diff --git a/man/extractMsData-method.Rd b/man/extractMsData-method.Rd new file mode 100644 index 000000000..ac527ff96 --- /dev/null +++ b/man/extractMsData-method.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +\docType{methods} +\name{extractMsData,OnDiskMSnExp-method} +\alias{extractMsData,OnDiskMSnExp-method} +\alias{extractMsData,XCMSnExp-method} +\alias{extractMsData} +\title{Extract a \code{data.frame} containing MS data} +\usage{ +\S4method{extractMsData}{OnDiskMSnExp}(object, rt, mz) + +\S4method{extractMsData}{XCMSnExp}(object, rt, mz, + adjustedRtime = hasAdjustedRtime(object)) +} +\arguments{ +\item{object}{A \code{XCMSnExp} or \code{OnDiskMSnExp} object.} + +\item{rt}{\code{numeric(2)} with the retention time range from which the +data should be extracted.} + +\item{mz}{\code{numeric(2)} with the mz range.} + +\item{adjustedRtime}{(for \code{extractMsData,XCMSnExp}): \code{logical(1)} +specifying if adjusted or raw retention times should be reported. +Defaults to adjusted retention times, if these are present in +\code{object}.} +} +\value{ +A \code{list} of length equal to the number of samples/files in + \code{object}. Each element being a \code{data.frame} with columns + \code{"rt"}, \code{"mz"} and \code{"i"} with the retention time, mz and + intensity tuples of a file. If no data is available for the mz-rt range + in a file a \code{data.frame} with 0 rows is returned for that file. +} +\description{ +Extract a \code{data.frame} of retention time, mz and intensity + values from each file/sample in the provided rt-mz range (or for the full + data range if \code{rt} and \code{mz} are not defined). +} +\examples{ +## Read some files from the test data package. +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) +raw_data <- readMSData2(fls[1:2]) + +## Read the full MS data for a defined mz-rt region. +res <- extractMsData(raw_data, mz = c(300, 320), rt = c(2700, 2900)) + +## We've got one data.frame per file +length(res) + +## With number of rows: +nrow(res[[1]]) + +head(res[[1]]) +} +\seealso{ +\code{\link{XCMSnExp}} for the data object. +} +\author{ +Johannes Rainer +} diff --git a/man/groupChromPeaks-density.Rd b/man/groupChromPeaks-density.Rd index bf13d6532..1c8b2bd42 100644 --- a/man/groupChromPeaks-density.Rd +++ b/man/groupChromPeaks-density.Rd @@ -214,7 +214,9 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" The \code{\link{do_groupChromPeaks_density}} core API function and \code{\link{group.density}} for the old user interface. -\code{\link{featureDefinitions}} and +\code{\link{plotChromPeakDensity}} to plot peak densities and + evaluate different algorithm settings. + \code{\link{featureDefinitions}} and \code{\link{featureValues,XCMSnExp-method}} for methods to access the features (i.e. the peak grouping results). diff --git a/man/plotChromPeakDensity.Rd b/man/plotChromPeakDensity.Rd new file mode 100644 index 000000000..9fe333079 --- /dev/null +++ b/man/plotChromPeakDensity.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{plotChromPeakDensity} +\alias{plotChromPeakDensity} +\title{Plot chromatographic peak density along the retention time axis} +\usage{ +plotChromPeakDensity(object, mz, rt, param = PeakDensityParam(), + col = "#00000080", xlab = "retention time", ylab = "sample", + xlim = range(rt), ...) +} +\arguments{ +\item{object}{A \code{\link{XCMSnExp}} object with identified +chromatographic peaks.} + +\item{mz}{\code{numeric(2)} defining an mz range for which the peak density +should be plotted.} + +\item{rt}{\code{numeric(2)} defining an optional rt range for which the +peak density should be plotted. Defaults to the absolute retention time +range of \code{object}.} + +\item{param}{\code{\link{PeakDensityParam}} from which parameters for the +\emph{peak density} correspondence algorithm can be extracted.} + +\item{col}{Color to be used for the individual samples. Length has to be 1 +or equal to the number of samples in \code{object}.} + +\item{xlab}{\code{character(1)} with the label for the x-axis.} + +\item{ylab}{\code{character(1)} with the label for the y-axis.} + +\item{xlim}{\code{numeric(2)} representing the limits for the x-axis. +Defaults to the range of the \code{rt} parameter.} + +\item{...}{Additional parameters to be passed to the \code{plot} function.} +} +\value{ +The function is called for its side effect, i.e. to create a plot. +} +\description{ +Plot the density of chromatographic peaks along the retention + time axis and indicate which peaks would be grouped into the same feature + based using the \emph{peak density} correspondence method. Settings for + the \emph{peak density} method can be passed with an + \code{\link{PeakDensityParam}} object to parameter \code{param}. +} +\details{ +The \code{plotChromPeakDensity} function allows to evaluate + different settings for the \emph{peak density} on an mz slice of + interest (e.g. containing chromatographic peaks corresponding to a known + metabolite). + The plot shows the individual peaks that were detected within the + specified \code{mz} slice at their retention time (x-axis) and sample in + which they were detected (y-axis). The density function is plotted as a + black line. Parameters for the \code{density} function are taken from the + \code{param} object. Grey rectangles indicate which chromatographic peaks + would be grouped into a feature by the \emph{peak density} correspondence + method. Parameters for the algorithm are also taken from \code{param}. + See \code{\link{groupChromPeaks-density}} for more information about the + algorithm and its supported settings. +} +\examples{ + +## Below we perform first a peak detection (using the centWave +## method) on some of the test files from the faahKO package. +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the peak detection using the centWave method. +res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 1000)) + +## Align the samples using obiwarp +res <- adjustRtime(res, param = ObiwarpParam()) + +## Plot the chromatographic peak density for a specific mz range to evaluate +## different peak density correspondence settings. +mzr <- c(305.05, 305.15) + +plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(), pch = 16) + +## Use a larger bandwidth +plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60), + pch = 16) +## Neighboring peaks are now fused into one. + +## Require the chromatographic peak to be present in all samples of a group +plotChromPeakDensity(res, mz = mzr, pch = 16, + param = PeakDensityParam(minFraction = 1)) +} +\seealso{ +\code{\link{groupChromPeaks-density}} for details on the + \emph{peak density} correspondence method and supported settings. +} +\author{ +Johannes Rainer +} diff --git a/man/plotChromatogram.Rd b/man/plotChromatogram.Rd new file mode 100644 index 000000000..03ff29dba --- /dev/null +++ b/man/plotChromatogram.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Chromatogram.R +\name{plotChromatogram} +\alias{plotChromatogram} +\alias{highlightChromPeaks} +\title{Plot Chromatogram objects} +\usage{ +plotChromatogram(x, rt, col = "#00000060", lty = 1, type = "l", + xlab = "retention time", ylab = "intensity", main = NULL, ...) + +highlightChromPeaks(x, rt, mz, border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), ...) +} +\arguments{ +\item{x}{For \code{plotChromatogram}: \code{list} of +\code{\link{Chromatogram}} objects. Such as extracted from an +\code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} +method. +For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected +peaks.} + +\item{rt}{For \code{plotChromatogram}: \code{numeric(2)}, optional parameter +to subset each \code{Chromatogram} by retention time prior to plotting. +Alternatively, the plot could be subsetted by passing a \code{xlim} +parameter. +For \code{highlightChromPeaks}: \code{numeric(2)} with the +retention time range from which peaks should be extracted and plotted.} + +\item{col}{For \code{plotChromatogram}: color definition for each +line/sample. Has to have the same length as samples/elements in \code{x}, +otherwise \code{col[1]} is recycled to generate a vector of +\code{length(x)}. +For \code{highlightChromPeaks}: color to be used to fill the +rectangle.} + +\item{lty}{the line type. See \code{\link[graphics]{plot}} for more details.} + +\item{type}{the plotting type. See \code{\link[graphics]{plot}} for more +details. +For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +should be highlighted: \code{type = "rect"} draws a rectangle +representing the peak definition, \code{type = "point"} indicates a +chromatographic peak with a single point at the position of the peak's +\code{"rt"} and \code{"maxo"}.} + +\item{xlab}{\code{character(1)} with the label for the x-axis.} + +\item{ylab}{\code{character(1)} with the label for the y-axis.} + +\item{main}{The title for the plot. For \code{plotChromatogram}: if +\code{main = NULL} the mz range of the \code{Chromatogram} object(s) will +be used as the title.} + +\item{...}{additional parameters to the \code{\link{matplot}} or \code{plot} +function.} + +\item{mz}{\code{numeric(2)} with the mz range from which the peaks should +be extracted and plotted.} + +\item{border}{colors to be used to color the border of the rectangles. Has to +be equal to the number of samples in \code{x}.} + +\item{lwd}{\code{numeric(1)} defining the width of the line/border.} +} +\description{ +\code{plotChromatogram} creates a chromatogram plot for a + single \code{Chromatogram} object or a \code{list} of + \code{\link{Chromatogram}} objects (one line for each + \code{\link{Chromatogram}}/sample). + +The \code{highlightChromPeaks} function adds chromatographic + peak definitions to an existing plot, such as one created by the + \code{plotChromatograms} function. +} +\details{ +The \code{plotChromatogram} function allows to efficiently plot + the chromatograms of several samples into a single plot. +} +\examples{ + +## Perform a fast peak detection. +library(xcms) +library(faahKO) +faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_3_files) + +od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) + +rtr <- c(2600, 2750) +mzr <- c(344, 344) +chrs <- extractChromatograms(od, rt = rtr, mz = mzr) + +## Plot a single chromatogram +plotChromatogram(chrs[[1]]) + +## Plot all chromatograms at once, using different colors for each. +plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) + +## Highlight identified chromatographic peaks. +highlightChromPeaks(od, rt = rtr, mz = mzr, + col = c("#FF000005", "#00FF0005", "#0000FF05"), + border = c("#FF000040", "#00FF0040", "#0000FF40")) + +} +\seealso{ +\code{\link{extractChromatograms}} for how to extract a list of + \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} + objects. +} +\author{ +Johannes Rainer +} diff --git a/man/useOriginalCode.Rd b/man/useOriginalCode.Rd index d432f8b74..fb223073f 100644 --- a/man/useOriginalCode.Rd +++ b/man/useOriginalCode.Rd @@ -12,23 +12,22 @@ old code should be used in corresponding functions. If not provided the function simply returns the value of the global option.} } \value{ -logical(1) indicating whether old code is being -used. +logical(1) indicating whether old code is being used. } \description{ This function allows to enable the usage of old, partially -deprecated code from xcms by setting a corresponding global option. See -details for functions affected. + deprecated code from xcms by setting a corresponding global option. See + details for functions affected. } \details{ The functions/methods that will be affected by this are: -\itemize{ -\item \code{\link{do_findChromPeaks_matchedFilter}} -} + \itemize{ + \item \code{\link{do_findChromPeaks_matchedFilter}} + } } \note{ Usage of old code is strongly dicouraged. This function is thought -to be used mainly in the transition phase from xcms to xcms version 3. + to be used mainly in the transition phase from xcms to xcms version 3. } \author{ Johannes Rainer diff --git a/tests/doRUnit.R b/tests/doRUnit.R index 700e4bcd4..729feb8af 100644 --- a/tests/doRUnit.R +++ b/tests/doRUnit.R @@ -42,6 +42,9 @@ if(require("RUnit", quietly=TRUE)) { snthresh = 40)) faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), method = "centWave", noise = 10000, snthresh = 40) + ## faahko_xod <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 5000)) + ## faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), + ## method = "centWave", noise = 5000) ## Doing also the retention time correction etc od_x <- faahko_od xod_x <- faahko_xod diff --git a/vignettes/new_functionality.Rmd b/vignettes/new_functionality.Rmd index 0383cc15c..e658787f0 100644 --- a/vignettes/new_functionality.Rmd +++ b/vignettes/new_functionality.Rmd @@ -133,11 +133,22 @@ Alternatively we can use the `extractChromatograms` method that extracts chromatograms from the object. In the example below we extract the *base peak chromatogram* (BPC) by setting `aggregationFun` to `"max"` and not specifying an `rt` or `mz` range to extract only a data subset. In contrast to the `tic` and `bpi` -methods, this function reads the data from the raw files. +methods, this function reads the data from the raw files. It takes thus more +time to create the plot, but it is based on the actual raw data that is used for +the later analysis - the `tic` and `bpi` methods access only the information that is +stored in the raw data files by the MS detector during the data acquisition. ```{r faahKO-bpi, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} ## Get the base peak chromatograms. This reads data from the files. bpis <- extractChromatograms(raw_data, aggregationFun = "max") +## Plot the list of Chromatogram objects. +plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) +``` + +While the `plotChromatogram` function if very convenient (and fast), it would also +not be too difficult to create the plot manually: + +```{r faahKO-bbpi-manual, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", xlab = "rtime", ylab = "intensity") @@ -148,7 +159,7 @@ for (i in 1:length(bpis)) { ``` Note that we could restrict the analysis to a certain retention time range by -sub-setting `raw_data` with the `filterRt` method. +first sub-setting `raw_data` with the `filterRt` method. In addition we can plot the distribution of the total ion counts per file. In contrast to sub-setting the object we split the numeric vector returned by the @@ -174,7 +185,7 @@ Next we perform the chromatographic peak detection using the *centWave* algorith parameters, but the settings should be adjusted to each experiment individually based on e.g. the expected width of the chromatographic peaks etc. -```{r faahKO-centWave} +```{r faahKO-centWave, message = FALSE, warning = FALSE} ## Defining the settings for the centWave peak detection. cwp <- CentWaveParam(snthresh = 20, noise = 1000) xod <- findChromPeaks(raw_data, param = cwp) @@ -199,8 +210,14 @@ for the updated user interface. Next we extract the chromatogram for the rt-mz region corresponding to one detected chromatographic peak increasing the region in rt dimension by +/- 60 -seconds. In addition we extract also all chromatographic peaks in that region by -passing the same `mz` and `rt` parameters to the `chromPeaks` method. +seconds. In addition we extract also the full chromatogram for the specified mz +range (i.e. the full rt range) and identify all chromatographic peaks in that +region by passing the same `mz` and `rt` parameters to the `chromPeaks` method. + +If two-column matrices are passed to the `extractChromatograms` method with +parameters `rt` and `mz`, the function returns a `list`, each element being a `list` of +`Chromatogram` objects representing the chromatogram for the respective +ranges. ```{r faahKO-chromPeaks-extractChroms, warning = FALSE} rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] @@ -209,32 +226,30 @@ rtr[1] <- rtr[1] - 60 rtr[2] <- rtr[2] + 60 mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] +## Add an rt range that would extract the full chromatogram +rtr <- rbind(c(-Inf, Inf), rtr) +mzr <- rbind(mzr, mzr) + chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) ## In addition we get all peaks detected in the same region pks <- chromPeaks(xod, rt = rtr, mz = mzr) +pks ``` Next we plot the extracted chromatogram for the data and highlight in addition the identified peaks. -```{r faahKO-extracted-chrom-with-peaks, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 8, fig.height = 8} -## Define the limits on x- and y-dimension -xl <- range(lapply(chrs, rtime), na.rm = TRUE) -yl <- range(lapply(chrs, intensity), na.rm = TRUE) -plot(3, 3, pch = NA, main = paste(format(mzr, digits = 6), collapse = "-"), - xlab = "rt", ylab = "intensity", xlim = xl, ylim = yl) -## Plot the chromatogram per sample -for (i in 1:length(chrs)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", - col = sample_colors[pData(xod)$sample_group[i]]) -} -## Highlight the identified chromatographic peaks. -for (i in 1:nrow(pks)) { - rect(xleft = pks[i, "rtmin"], xright = pks[i, "rtmax"], ybottom = 0, - ytop = pks[i, "maxo"], - border = paste0(sample_colors[pData(xod)$sample_group][pks[i, "sample"]], 60)) -} +```{r faahKO-extracted-chrom-with-peaks, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6} +## Plot the full rt range: +plotChromatogram(chrs[[1]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +## And now for the peak range. +plotChromatogram(chrs[[2]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +## Highlight also the identified chromatographic peaks. +highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], + border = paste0(sample_colors[pData(xod)$sample_group], 40)) ``` Note that the `extractChromatograms` does return an `NA` value if in a certain scan @@ -266,13 +281,8 @@ times per sample using the `plotAdjustedRtime` function. bpis <- extractChromatograms(xod, aggregationFun = "max") par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) -plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), - ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", - xlab = "rtime", ylab = "intensity") -for (i in 1:length(bpis)) { - points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", - col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) -} +plotChromatogram(bpis, + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) ## Plot also the difference of adjusted to raw retention time. plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) ``` @@ -299,10 +309,11 @@ The 3rd sample was used as *center* sample against which all other samples were aligned to, hence its adjusted retention times are identical to the raw retention times. -We are again plotting the extracted ion chromatogram for the selected peaks from -above to evaluate the impact of the alignment. +Below we plot the extracted ion chromatogram for the selected peak from the +example above before and after retention time correction to evaluate the impact +of the alignment. -```{r faahKO-extracted-chrom-with-peaks-aligned, echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks after alignment.", fig.align = "center", fig.width = 8, fig.height = 8} +```{r faahKO-extracted-chrom-with-peaks-aligned, echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8} rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] ## Increase the range: rtr[1] <- rtr[1] - 60 @@ -310,26 +321,15 @@ rtr[2] <- rtr[2] + 60 mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) - -## In addition we get all peaks detected in the same region -pks <- chromPeaks(xod, rt = rtr, mz = mzr) - -## Define the limits on x- and y-dimension -xl <- range(lapply(chrs, rtime), na.rm = TRUE) -yl <- range(lapply(chrs, intensity), na.rm = TRUE) -plot(3, 3, pch = NA, main = paste(format(mzr, digits = 6), collapse = "-"), - xlab = "rt", ylab = "intensity", xlim = xl, ylim = yl) -## Plot the chromatogram per sample -for (i in 1:length(chrs)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", - col = sample_colors[pData(xod)$sample_group[i]]) -} -## Highlight the identified chromatographic peaks. -for (i in 1:nrow(pks)) { - rect(xleft = pks[i, "rtmin"], xright = pks[i, "rtmax"], ybottom = 0, - ytop = pks[i, "maxo"], - border = paste0(sample_colors[pData(xod)$sample_group][pks[i, "sample"]], 60)) -} +chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) + +par(mfrow = c(2, 1)) +plotChromatogram(chrs_raw, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +plotChromatogram(chrs, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +highlightChromPeaks(xod, rt = rtr, mzr, + border = paste0(sample_colors[pData(xod)$sample_group], 40)) ``` After alignment, the peaks are nicely overlapping. diff --git a/vignettes/new_functionality.org b/vignettes/new_functionality.org index e9df02811..8411bc4c3 100644 --- a/vignettes/new_functionality.org +++ b/vignettes/new_functionality.org @@ -84,10 +84,12 @@ an LS/GC-MS experiment are referred to as /chromatographic peaks/. The respectiv method to identify such peaks is hence called =findChromPeaks= and the identified peaks can be accessed using the =XCMSnExp= =chromPeaks= method. The results from an correspondence analysis which aims to match and group chromatographic peaks -within and between samples are called /features/. The definition of such mz-rt -features (i.e. the result from the =groupChromPeaks= method) can be accessed /via/ -the =featureDefinitions= method of the =XCMSnExp= class. Finally, alignment -(retention time correction) can be performed using the =adjustRtime= method. +within and between samples are called /features/. A feature corresponds to +individual ions with a unique mass-to-charge ratio (mz) and a unique retention +time (rt). The definition of such mz-rt features (i.e. the result from the +=groupChromPeaks= method) can be accessed /via/ the =featureDefinitions= method of +the =XCMSnExp= class. Finally, alignment (retention time correction) can be +performed using the =adjustRtime= method. The settings for any of the new analysis methods are bundled in /parameter/ classes, one class for each method. This encapsulation of the parameters to a @@ -149,12 +151,25 @@ Alternatively we can use the =extractChromatograms= method that extracts chromatograms from the object. In the example below we extract the /base peak chromatogram/ (BPC) by setting =aggregationFun= to ="max"= and not specifying an =rt= or =mz= range to extract only a data subset. In contrast to the =tic= and =bpi= -methods, this function reads the data from the raw files. +methods, this function reads the data from the raw files. It takes thus more +time to create the plot, but it is based on the actual raw data that is used for +the later analysis - the =tic= and =bpi= methods access only the information that is +stored in the raw data files by the MS detector during the data acquisition. #+NAME: faahKO-bpi #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 ## Get the base peak chromatograms. This reads data from the files. bpis <- extractChromatograms(raw_data, aggregationFun = "max") + ## Plot the list of Chromatogram objects. + plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) + +#+END_SRC + +While the =plotChromatogram= function if very convenient (and fast), it would also +not be too difficult to create the plot manually: + +#+NAME: faahKO-bbpi-manual +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", xlab = "rtime", ylab = "intensity") @@ -162,11 +177,11 @@ methods, this function reads the data from the raw files. points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) } - #+END_SRC + Note that we could restrict the analysis to a certain retention time range by -sub-setting =raw_data= with the =filterRt= method. +first sub-setting =raw_data= with the =filterRt= method. In addition we can plot the distribution of the total ion counts per file. In contrast to sub-setting the object we split the numeric vector returned by the @@ -194,7 +209,7 @@ parameters, but the settings should be adjusted to each experiment individually based on e.g. the expected width of the chromatographic peaks etc. #+NAME: faahKO-centWave -#+BEGIN_SRC R :message = FALSE +#+BEGIN_SRC R :ravel message = FALSE, warning = FALSE ## Defining the settings for the centWave peak detection. cwp <- CentWaveParam(snthresh = 20, noise = 1000) xod <- findChromPeaks(raw_data, param = cwp) @@ -220,8 +235,14 @@ for the updated user interface. Next we extract the chromatogram for the rt-mz region corresponding to one detected chromatographic peak increasing the region in rt dimension by +/- 60 -seconds. In addition we extract also all chromatographic peaks in that region by -passing the same =mz= and =rt= parameters to the =chromPeaks= method. +seconds. In addition we extract also the full chromatogram for the specified mz +range (i.e. the full rt range) and identify all chromatographic peaks in that +region by passing the same =mz= and =rt= parameters to the =chromPeaks= method. + +If two-column matrices are passed to the =extractChromatograms= method with +parameters =rt= and =mz=, the function returns a =list=, each element being a =list= of +=Chromatogram= objects representing the chromatogram for the respective +ranges. #+NAME: faahKO-chromPeaks-extractChroms #+BEGIN_SRC R :ravel warning = FALSE @@ -231,34 +252,31 @@ passing the same =mz= and =rt= parameters to the =chromPeaks= method. rtr[2] <- rtr[2] + 60 mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] + ## Add an rt range that would extract the full chromatogram + rtr <- rbind(c(-Inf, Inf), rtr) + mzr <- rbind(mzr, mzr) + chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) ## In addition we get all peaks detected in the same region pks <- chromPeaks(xod, rt = rtr, mz = mzr) + pks #+END_SRC Next we plot the extracted chromatogram for the data and highlight in addition the identified peaks. #+NAME: faahKO-extracted-chrom-with-peaks -#+BEGIN_SRC R :ravel message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 8, fig.height = 8 - ## Define the limits on x- and y-dimension - xl <- range(lapply(chrs, rtime), na.rm = TRUE) - yl <- range(lapply(chrs, intensity), na.rm = TRUE) - plot(3, 3, pch = NA, main = paste(format(mzr, digits = 6), collapse = "-"), - xlab = "rt", ylab = "intensity", xlim = xl, ylim = yl) - ## Plot the chromatogram per sample - for (i in 1:length(chrs)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", - col = sample_colors[pData(xod)$sample_group[i]]) - } - ## Highlight the identified chromatographic peaks. - for (i in 1:nrow(pks)) { - rect(xleft = pks[i, "rtmin"], xright = pks[i, "rtmax"], ybottom = 0, - ytop = pks[i, "maxo"], - border = paste0(sample_colors[pData(xod)$sample_group][pks[i, "sample"]], 60)) - } - +#+BEGIN_SRC R :ravel message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6 + ## Plot the full rt range: + plotChromatogram(chrs[[1]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + ## And now for the peak range. + plotChromatogram(chrs[[2]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + ## Highlight also the identified chromatographic peaks. + highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], + border = paste0(sample_colors[pData(xod)$sample_group], 40)) #+END_SRC Note that the =extractChromatograms= does return an =NA= value if in a certain scan @@ -292,13 +310,8 @@ times per sample using the =plotAdjustedRtime= function. bpis <- extractChromatograms(xod, aggregationFun = "max") par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) - plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), - ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", - xlab = "rtime", ylab = "intensity") - for (i in 1:length(bpis)) { - points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", - col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) - } + plotChromatogram(bpis, + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) ## Plot also the difference of adjusted to raw retention time. plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) #+END_SRC @@ -326,11 +339,12 @@ The 3rd sample was used as /center/ sample against which all other samples were aligned to, hence its adjusted retention times are identical to the raw retention times. -We are again plotting the extracted ion chromatogram for the selected peaks from -above to evaluate the impact of the alignment. +Below we plot the extracted ion chromatogram for the selected peak from the +example above before and after retention time correction to evaluate the impact +of the alignment. #+NAME: faahKO-extracted-chrom-with-peaks-aligned -#+BEGIN_SRC R :ravel echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks after alignment.", fig.align = "center", fig.width = 8, fig.height = 8 +#+BEGIN_SRC R :ravel echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8 rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] ## Increase the range: rtr[1] <- rtr[1] - 60 @@ -338,27 +352,15 @@ above to evaluate the impact of the alignment. mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) - - ## In addition we get all peaks detected in the same region - pks <- chromPeaks(xod, rt = rtr, mz = mzr) - - ## Define the limits on x- and y-dimension - xl <- range(lapply(chrs, rtime), na.rm = TRUE) - yl <- range(lapply(chrs, intensity), na.rm = TRUE) - plot(3, 3, pch = NA, main = paste(format(mzr, digits = 6), collapse = "-"), - xlab = "rt", ylab = "intensity", xlim = xl, ylim = yl) - ## Plot the chromatogram per sample - for (i in 1:length(chrs)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", - col = sample_colors[pData(xod)$sample_group[i]]) - } - ## Highlight the identified chromatographic peaks. - for (i in 1:nrow(pks)) { - rect(xleft = pks[i, "rtmin"], xright = pks[i, "rtmax"], ybottom = 0, - ytop = pks[i, "maxo"], - border = paste0(sample_colors[pData(xod)$sample_group][pks[i, "sample"]], 60)) - } - + chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) + + par(mfrow = c(2, 1)) + plotChromatogram(chrs_raw, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + plotChromatogram(chrs, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + highlightChromPeaks(xod, rt = rtr, mzr, + border = paste0(sample_colors[pData(xod)$sample_group], 40)) #+END_SRC After alignment, the peaks are nicely overlapping. @@ -508,6 +510,10 @@ highlighted in the plot. ** New naming convention +Peaks identified in LC/GC-MS metabolomics are referred to as /chromatographic +peaks/ where possible to avoid any misconceptions with /mass peaks/ identified in +mz dimension. + Methods for data analysis from the original =xcms= code have been renamed to avoid potential confusions: @@ -520,11 +526,12 @@ potential confusions: + *Correspondence*: =groupChromPeaks= instead of =group= to clearly indicate what is being grouped. Group might be a sample group or a peak group, the latter being referred to also by (mz-rt) /feature/. - + + *Alignment*: =adjustRtime= instead of =retcor= for retention time correction. The word /cor/ in /retcor/ might be easily misinterpreted as /correlation/ instead of correction. + ** New data classes *** =OnDiskMSnExp= diff --git a/vignettes/xcmsPreprocess.Rnw b/vignettes/xcmsPreprocess.Rnw-notrun similarity index 99% rename from vignettes/xcmsPreprocess.Rnw rename to vignettes/xcmsPreprocess.Rnw-notrun index 375d25921..ac8d0b6c5 100755 --- a/vignettes/xcmsPreprocess.Rnw +++ b/vignettes/xcmsPreprocess.Rnw-notrun @@ -53,6 +53,9 @@ corresponding to each step are also given.} library(multtest) library(xcms) library(faahKO) +## Disable default parallel processing since on some platforms it might +## cause delays. +register(SerialParam()) @ \section{Raw Data File Preparation}