diff --git a/DESCRIPTION b/DESCRIPTION index c955dc470..adfde7243 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xcms -Version: 2.99.3 -Date: 2017-06-13 +Version: 2.99.5 +Date: 2017-07-14 Title: LC/MS and GC/MS Data Analysis Author: Colin A. Smith , Ralf Tautenhahn , @@ -14,7 +14,7 @@ Depends: methods, Biobase, BiocParallel (>= 1.8.0), - MSnbase (>= 2.1.10) + MSnbase (>= 2.3.8) Imports: mzR (>= 1.1.6), BiocGenerics, @@ -61,19 +61,18 @@ Collate: 'functions-Params.R' 'do_groupChromPeaks-functions.R' 'fastMatch.R' - 'functions-Chromatogram.R' 'functions-utils.R' 'functions-IO.R' 'functions-OnDiskMSnExp.R' 'functions-ProcessHistory.R' 'functions-XCMSnExp.R' + 'functions-normalization.R' 'functions-xcmsEIC.R' 'functions-xcmsFragments.R' 'functions-xcmsRaw.R' 'functions-xcmsSet.R' 'init.R' 'matchpeaks.R' - 'methods-Chromatogram.R' 'methods-IO.R' 'methods-MsFeatureData.R' 'methods-OnDiskMSnExp.R' diff --git a/NAMESPACE b/NAMESPACE index 0ac1d5175..9cbb81481 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,7 @@ useDynLib(xcms) importFrom("utils", "capture.output") import("methods") -importFrom("ProtGenerics", "peaks") +importFrom("ProtGenerics", "peaks", "chromatogram") importFrom("BiocGenerics", "updateObject", "fileName") ## import("Biobase") importFrom("Biobase", "AnnotatedDataFrame") @@ -44,15 +44,17 @@ importFrom("RANN", "nn2") importFrom("MassSpecWavelet", "peakDetectionCWT", "tuneInPeakInfo") ## MSnbase: -importClassesFrom("MSnbase", "MSnExp", "pSet", "OnDiskMSnExp") +importClassesFrom("MSnbase", "MSnExp", "pSet", "OnDiskMSnExp", "Chromatogram", + "Chromatograms") importMethodsFrom("MSnbase", "intensity", "mz", "rtime", "fileNames", "fromFile", "filterFile", "filterMsLevel", "msLevel", "scanIndex", "spectra", "impute", "isCentroided", "polarity", "[", "bin", "clean", "featureNames", "filterAcquisitionNum", "filterMz", "filterRt", "normalize", "pickPeaks", "removePeaks", "removeReporters", "smooth", "trimMz", "splitByFile", "[[", - "spectrapply", "peaksCount", "precursorMz") -importFrom("MSnbase", "as.data.frame.Spectrum") + "spectrapply", "peaksCount", "precursorMz", "chromatogram", + "plot", "fData", "fData<-") +importFrom("MSnbase", "as.data.frame.Spectrum", "Chromatogram", "Chromatograms") export( "etg", @@ -202,14 +204,15 @@ export( "do_groupChromPeaks_density", "do_groupPeaks_mzClust", "do_groupChromPeaks_nearest", - "Chromatogram", "do_adjustRtime_peakGroups", "processHistoryTypes", "adjustRtimePeakGroups", "plotAdjustedRtime", "plotChromatogram", "highlightChromPeaks", - "plotChromPeakDensity" + "plotChromPeakDensity", + "plotChromPeaks", + "plotChromPeakImage" ) ## New analysis methods @@ -388,7 +391,6 @@ export("CentWaveParam", "MatchedFilterParam", "MassifquantParam", "MSWParam", ## New Classes exportClasses("XCMSnExp", "MsFeatureData", "ProcessHistory", - "Chromatogram", "XProcessHistory" ) ## New methods for these classes @@ -426,12 +428,9 @@ exportMethods("hasChromPeaks", "rtime", "mz", "intensity", - "aggregationFun", "extractChromatograms", - "precursorMz", - "productMz", "fillChromPeaks", - "as.data.frame", "dropFilledChromPeaks", - "extractMsData" + "extractMsData", + "chromatogram" ) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 46380832b..72b106a29 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -16,8 +16,6 @@ setGeneric("adjustRtime", function(object, param, ...) setGeneric("adjustedRtime", function(object, ...) standardGeneric("adjustedRtime")) setGeneric("adjustedRtime<-", function(object, value) standardGeneric("adjustedRtime<-")) -setGeneric("aggregationFun", function(object, ...) - standardGeneric("aggregationFun")) setGeneric("ampTh", function(object, ...) standardGeneric("ampTh")) setGeneric("ampTh<-", function(object, value) standardGeneric("ampTh<-")) setGeneric("AutoLockMass", function(object) standardGeneric("AutoLockMass")) @@ -283,7 +281,6 @@ setGeneric("processParam<-", function(object, value) setGeneric("processType", function(object, ...) standardGeneric("processType")) setGeneric("processType<-", function(object, value) standardGeneric("processType<-")) setGeneric("processHistory", function(object, ...) standardGeneric("processHistory")) -setGeneric("productMz", function(object, value) standardGeneric("productMz")) setGeneric("profinfo", function(object) standardGeneric("profinfo")) setGeneric("profinfo<-", function(object, value) standardGeneric("profinfo<-")) setGeneric("profMat", function(object, ...) standardGeneric("profMat")) diff --git a/R/DataClasses.R b/R/DataClasses.R index 9bde64946..89359fcfb 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -387,6 +387,12 @@ setClass("XProcessHistory", #' @seealso \code{\link{findPeaks}} for the \emph{old} peak detection #' methods. #' +#' \code{\link{plotChromPeaks}} to plot identified chromatographic peaks +#' for one file. +#' +#' \code{\link{highlightChromPeaks}} to highlight identified chromatographic +#' peaks in an extracted ion chromatogram plot. +#' #' @author Johannes Rainer NULL #> NULL @@ -725,8 +731,9 @@ NULL #' #' @examples #' -#' ## Create a MatchedFilterParam object -#' mfp <- MatchedFilterParam(binSize = 0.5) +#' ## Create a MatchedFilterParam object. Note that we use a unnecessarily large +#' ## binSize parameter to reduce the run-time of the example. +#' mfp <- MatchedFilterParam(binSize = 5) #' ## Change snthresh parameter #' snthresh(mfp) <- 15 #' mfp @@ -738,7 +745,7 @@ NULL #' library(MSnbase) #' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, #' full.names = TRUE) -#' raw_data <- readMSData2(fls) +#' raw_data <- readMSData2(fls[1:2]) #' ## Perform the chromatographic peak detection using the settings defined #' ## above. Note that we are also disabling parallel processing in this #' ## example by registering a "SerialParam" @@ -1657,7 +1664,7 @@ NULL #' p #' #' ############################## -#' ## Chromatographi peak detection and grouping. +#' ## Chromatographic peak detection and grouping. #' ## #' ## Below we perform first a chromatographic peak detection (using the #' ## matchedFilter method) on some of the test files from the faahKO package @@ -2255,8 +2262,8 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' the feature definitions representing the peak grouping results. #' \code{\link{adjustRtime}} for retention time adjustment methods. #' -#' \code{\link{extractChromatograms}} to extract MS data as -#' \code{\link{Chromatogram}} objects. +#' \code{\link[MSnbase]{chromatogram}} to extract MS data as +#' \code{\link[MSnbase]{Chromatogram}} objects. #' #' \code{\link{extractMsData}} for the method to extract MS data as #' \code{data.frame}s. @@ -2272,7 +2279,7 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' ## Now we perform a chromatographic peak detection on this data set using the #' ## matched filter method. We are tuning the settings such that it performs #' ## faster. -#' mfp <- MatchedFilterParam(binSize = 4) +#' mfp <- MatchedFilterParam(binSize = 6) #' xod <- findChromPeaks(od, param = mfp) #' #' ## The results from the peak detection are now stored in the XCMSnExp @@ -2302,12 +2309,15 @@ setClass("MsFeatureData", contains = c("environment", "Versioned"), #' ## spectra method which returns Spectrum objects containing all raw data. #' ## Note that all these methods read the information from the original input #' ## files and subsequently apply eventual data processing steps to them. -#' head(mz(xod, bySample = TRUE)) +#' mzs <- mz(xod, bySample = TRUE) +#' length(mzs) +#' lengths(mzs) #' -#' ## Reading all data -#' spctr <- spectra(xod) +#' ## The full data could also be read using the spectra data, which returns +#' ## a list of Spectrum object containing the mz, intensity and rt values. +#' ## spctr <- spectra(xod) #' ## To get all spectra of the first file we can split them by file -#' head(split(spctr, fromFile(xod))[[1]]) +#' ## head(split(spctr, fromFile(xod))[[1]]) #' #' ############ #' ## Filtering @@ -2395,61 +2405,3 @@ setClass("XCMSnExp", } ) -#' @title Representation of chromatographic MS data -#' -#' @description The \code{Chromatogram} class is designed to store -#' chromatographic MS data, i.e. pairs of retention time and intensity -#' values. Instances of the class can be created with the -#' \code{Chromatogram} constructor function but in most cases the dedicated -#' methods for \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} -#' objects extracting chromatograms should be used instead (i.e. the -#' \code{\link{extractChromatograms}}). -#' -#' @details The \code{mz}, \code{filterMz}, \code{precursorMz} and -#' \code{productMz} are stored as a \code{numeric(2)} representing a range -#' even if the chromatogram was generated for only a single ion (i.e. a -#' single mz value). Using ranges for \code{mz} values allow this class to -#' be used also for e.g. total ion chromatograms or base peak chromatograms. -#' -#' The slots \code{precursorMz} and \code{productMz} allow to represent SRM -#' (single reaction monitoring) and MRM (multiple SRM) chromatograms. As -#' example, a \code{Chromatogram} for a SRM transition 273 -> 153 will have -#' a \code{@precursorMz = c(273, 273)} and a -#' \code{@productMz = c(153, 153)}. -#' -#' @rdname Chromatogram-class -#' -#' @export -#' -#' @author Johannes Rainer -#' -#' @seealso \code{\link{extractChromatograms}} for the method to extract -#' \code{Chromatogram} objects from \code{\link{XCMSnExp}} or -#' \code{\link[MSnbase]{OnDiskMSnExp}} objects. -#' -#' \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. -setClass("Chromatogram", - slots = c( - rtime = "numeric", - intensity = "numeric", - mz = "numeric", - filterMz = "numeric", - precursorMz = "numeric", ## Or call that Q1mz? - productMz = "numeric", ## Or call that Q3mz? - fromFile = "integer", - aggregationFun = "character" - ), - contains = "Versioned", - prototype = prototype( - rtime = numeric(), - intensity = numeric(), - mz = c(NA_real_, NA_real_), - filterMz = c(NA_real_, NA_real_), - precursorMz = c(NA_real_, NA_real_), - productMz = c(NA_real_, NA_real_), - fromFile = integer(), - aggregationFun = character() - ), - validity = function(object) - validChromatogram(object) - ) diff --git a/R/Deprecated.R b/R/Deprecated.R index 0e3a8990c..69ba6bbd9 100644 --- a/R/Deprecated.R +++ b/R/Deprecated.R @@ -332,3 +332,106 @@ xcmsClusterApply <- function(cl, x, fun, msgfun=NULL, ...) { } } + +setMethod("extractChromatograms", + signature(object = "OnDiskMSnExp"), + function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { + .Deprecated(msg = paste0("Use of 'extractChromatograms' is ", + "deprecated, please use 'chromatogram' ", + "instead.")) + chromatogram(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, missing = missing) + }) + +plotChromatogram <- function(x, rt, col = "#00000060", + lty = 1, type = "l", xlab = "retention time", + ylab = "intensity", main = NULL, ...) { + .Deprecated(msg = paste0("Use of 'plotChromatogram' is deprecated, please ", + "use 'plot' instead.")) + if (!is.list(x) & !is(x, "Chromatogram")) + stop("'x' should be a Chromatogram object or a list of Chromatogram", + " objects.") + if (is(x, "Chromatogram")) + x <- list(x) + isOK <- lapply(x, function(z) { + if (is(z, "Chromatogram")) { + return(TRUE) + } else { + if (is.na(z)) + return(TRUE) + } + FALSE + }) + if (any(!unlist(isOK))) + stop("if 'x' is a list it should only contain Chromatogram objects") + ## Subset the Chromatogram objects if rt provided. + if (!missing(rt)) { + rt <- range(rt) + x <- lapply(x, function(z) { + if (is(z, "Chromatogram")) + filterRt(z, rt = rt) + }) + } + if (length(col) != length(x)) { + col <- rep(col[1], length(x)) + } + ## If main is NULL use the mz range. + if (is.null(main)) { + mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) + main <- paste0(format(mzr, digits = 7), collapse = " - ") + } + ## Number of measurements we've got per chromatogram. This can be different + ## between samples, from none (if not a single measurement in the rt/mz) + ## to the number of data points that were actually measured. + lens <- unique(lengths(x)) + max_len <- max(lens) + max_len_vec <- rep_len(NA, max_len) + ## Generate the matrix of rt values, columns are samples, rows retention + ## time values. Fill each column with NAs up to the maximum number of values + ## we've got in a sample/file. + rts <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (cur_len == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- rtime(z) + max_len_vec + } + })) + ## Same for the intensities. + ints <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (length(z) == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- intensity(z) + max_len_vec + } + })) + ## Define the x and y limits + x_lim <- c(0, 1) + y_lim <- c(0, 1) + if (all(is.na(rts))) + if (!missing(rt)) + x_lim <- range(rt) + else + x_lim <- range(rts, na.rm = TRUE, finite = TRUE) + if (!all(is.na(ints))) + y_lim <- range(ints, na.rm = TRUE, finite = TRUE) + ## Identify columns that have only NAs in either intensity or rt - these + ## will not be plotted. + keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | + apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) + ## Finally plot the data. + if (length(keepCol)) { + matplot(x = rts[, keepCol, drop = FALSE], + y = ints[, keepCol, drop = FALSE], type = type, lty = lty, + col = col[keepCol], xlab = xlab, ylab = ylab, main = main, + ...) + } else + plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, + xlim = x_lim, ylim = y_lim) +} + diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index 9c610e2e2..1f81a0c6c 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -49,6 +49,12 @@ #' #' @param valsPerSpect Numeric vector with the number of values for each #' spectrum. +#' +#' @param sleep \code{numeric(1)} defining the number of seconds to wait between +#' iterations. Defaults to \code{sleep = 0}. If \code{> 0} a plot is +#' generated visualizing the identified chromatographic peak. Note: this +#' argument is for backward compatibility only and will be removed in +#' future. #' #' @inheritParams findChromPeaks-centWave #' @@ -127,7 +133,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, - roiScales = NULL) { + roiScales = NULL, + sleep = 0) { if (getOption("originalCentWave", default = TRUE)) { ## message("DEBUG: using original centWave.") .centWave_orig(mz = mz, int = int, scantime = scantime, @@ -137,7 +144,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales) + roiScales = roiScales, sleep = sleep) } else { ## message("DEBUG: using modified centWave.") .centWave_new(mz = mz, int = int, scantime = scantime, @@ -147,7 +154,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales) + roiScales = roiScales, sleep = sleep) } } ############################################################ @@ -597,41 +604,61 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, mzCenterFun = mzCenterFun) } - ## if ((sleep >0) && (!is.null(peaks))) { - ## tdp <- scantime[td]; trange <- range(tdp) - ## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") - ## cdppm <- paste(peaks[,"dppm"],collapse=", ") - ## csn <- paste(peaks[,"sn"],collapse=", ") - ## par(bg = "white") - ## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); - ## par(mar= c(2, 4, 4, 2) + 0.1) - ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') - ## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) - ## par(mar= c(1, 4, 1, 2) + 0.1) - ## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') - ## par(mar= c(4, 4, 1, 2) + 0.1) - ## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) - ## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range - ## abline(h=baseline,col='green') - ## bwh <- length(sr[1]:sr[2]) - length(baseline) - ## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} - ## if (any(!is.na(peaks[,"scpos"]))) - ## { ## plot centers and width found through wavelet analysis - ## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') - ## } - ## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) - ## if (fitgauss) { - ## tdx <- seq(min(td),max(td),length.out=200) - ## tdxp <- seq(trange[1],trange[2],length.out=200) - ## fitted.peaks <- which(!is.na(peaks[,"mu"])) - ## for (p in fitted.peaks) - ## { ## plot gaussian fits - ## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) - ## lines(tdxp,yg,col='blue') - ## } - ## } - ## Sys.sleep(sleep) - ## } + ## BEGIN - plotting/sleep + if ((sleep >0) && (!is.null(peaks))) { + tdp <- scantime[td]; trange <- range(tdp) + egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") + cdppm <- paste(peaks[,"dppm"],collapse=", ") + csn <- paste(peaks[,"sn"],collapse=", ") + par(bg = "white") + l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); + par(mar= c(2, 4, 4, 2) + 0.1) + ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') + ## Do plotRaw manually. + raw_mat <- .rawMat(mz = mz, int = int, scantime = scantime, + valsPerSpect = valsPerSpect, mzrange = mzrange, + rtrange = rtrange, log = TRUE) + if (nrow(raw_mat) > 0) { + y <- raw_mat[, "intensity"] + ylim <- range(y) + y <- y / ylim[2] + colorlut <- terrain.colors(16) + col <- colorlut[y * 15 + 1] + plot(raw_mat[, "time"], raw_mat[, "mz"], pch = 20, cex = .5, + main = "", xlab = "Seconds", ylab = "m/z", col = col, + xlim = trange) + } else { + plot(c(NA, NA), main = "", xlab = "Seconds", ylab = "m/z", + xlim = trange, ylim = mzrange) + } + ## done + title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) + par(mar= c(1, 4, 1, 2) + 0.1) + image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') + par(mar= c(4, 4, 1, 2) + 0.1) + plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) + lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range + abline(h=baseline,col='green') + bwh <- length(sr[1]:sr[2]) - length(baseline) + if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} + if (any(!is.na(peaks[,"scpos"]))) + { ## plot centers and width found through wavelet analysis + abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') + } + abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) + if (fitgauss) { + tdx <- seq(min(td),max(td),length.out=200) + tdxp <- seq(trange[1],trange[2],length.out=200) + fitted.peaks <- which(!is.na(peaks[,"mu"])) + for (p in fitted.peaks) + { ## plot gaussian fits + yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) + lines(tdxp,yg,col='blue') + } + } + Sys.sleep(sleep) + } + ## -- END plotting/sleep if (!is.null(peaks)) { peaklist[[length(peaklist) + 1]] <- peaks @@ -691,6 +718,8 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, noise = 0, ## noise.local=TRUE, sleep = 0, verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL) { + if (sleep) + warning("Parameter 'sleep' is defunct") ## TODO @jo Ensure in upstream method that data is in centroided mode! ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange ## Input argument checking. @@ -1514,7 +1543,8 @@ do_findChromPeaks_matchedFilter <- function(mz, snthresh = 10, steps = 2, mzdiff = 0.8 - binSize * steps, - index = FALSE + index = FALSE, + sleep = 0 ){ ## Use original code if (useOriginalCode()) { @@ -1523,12 +1553,13 @@ do_findChromPeaks_matchedFilter <- function(mz, return(.matchedFilter_orig(mz, int, scantime, valsPerSpect, binSize, impute, baseValue, distance, fwhm, sigma, max, snthresh, - steps, mzdiff, index)) + steps, mzdiff, index, sleep = sleep)) } else { return(.matchedFilter_binYonX_no_iter(mz, int, scantime, valsPerSpect, binSize, impute, baseValue, distance, fwhm, sigma, max, - snthresh, steps, mzdiff, index + snthresh, steps, mzdiff, index, + sleep = sleep )) } } @@ -1546,7 +1577,8 @@ do_findChromPeaks_matchedFilter <- function(mz, snthresh = 10, steps = 2, mzdiff = 0.8 - binSize * steps, - index = FALSE + index = FALSE, + sleep = 0 ){ .Deprecated(msg = paste0("Use of the original code with iterative binning", " is discouraged!")) @@ -1665,6 +1697,21 @@ do_findChromPeaks_matchedFilter <- function(mz, intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) maxo <- max(ysums[peakrange[1]:peakrange[2]]) maxf <- yfilt[maxy] + + ## -- begin sleep/plot + if (sleep > 0) { + plot(scantime, yfilt, type = "l", + main = paste(mass[i], "-", mass[i+1]), + ylim = c(-gmax/3, gmax)) + points(cbind(scantime, yfilt)[peakrange[1]:peakrange[2],], + type = "l", col = "red") + points(scantime, colSums(ymat), type = "l", col = "blue", + lty = "dashed") + abline(h = snthresh*noise, col = "red") + Sys.sleep(sleep) + } + ## -- end sleep plot + yfilt[peakrange[1]:peakrange[2]] <- 0 num <- num + 1 ## Double the size of the output matrix if it's full @@ -1721,7 +1768,8 @@ do_findChromPeaks_matchedFilter <- function(mz, snthresh = 10, steps = 2, mzdiff = 0.8 - binSize * steps, - index = FALSE + index = FALSE, + sleep = 0 ){ ## Input argument checking. if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) @@ -1853,6 +1901,21 @@ do_findChromPeaks_matchedFilter <- function(mz, intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) maxo <- max(ysums[peakrange[1]:peakrange[2]]) maxf <- yfilt[maxy] + + ## begin sleep/plot + if (sleep > 0) { + plot(scantime, yfilt, type = "l", + main = paste(mass[i], "-", mass[i+1]), + ylim=c(-gmax/3, gmax)) + points(cbind(scantime, yfilt)[peakrange[1]:peakrange[2],], + type = "l", col = "red") + points(scantime, colSums(ymat), type = "l", col = "blue", + lty = "dashed") + abline(h = snthresh*noise, col = "red") + Sys.sleep(sleep) + } + ## end sleep/plot + yfilt[peakrange[1]:peakrange[2]] <- 0 num <- num + 1 ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy, diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R deleted file mode 100644 index 56b467595..000000000 --- a/R/functions-Chromatogram.R +++ /dev/null @@ -1,339 +0,0 @@ -#' @include DataClasses.R -.SUPPORTED_AGG_FUN_CHROM <- c("sum", "max", "min", "mean") -names(.SUPPORTED_AGG_FUN_CHROM) <- - c("Total ion chromatogram (TIC).", "Base peak chromatogram (BPC).", - "Intensity representing the minimum intensity across the mz range.", - "Intensity representing the mean intensity across the mz range.") - -#' @title Validation function for Chromatogram objects -#' -#' @description This function can be used instead of the \code{validObject} to -#' check if the chromatogram is valid, without having to call the validity -#' method on all super classes. -#' -#' @param object A \code{Chromatogram} object. -#' -#' @return \code{TRUE} if the \code{object} is valid and the error messages -#' otherwise (i.e. a \code{character}). -#' -#' @author Johannes Rainer -#' -#' @noRd -validChromatogram <- function(object) { - msg <- character() - if (length(object@rtime) != length(object@intensity)) - msg <- c(msg, "Length of 'rt' and 'intensity' have to match!") - if (is.unsorted(object@rtime)) - msg <- c(msg, paste0("'rtime' has to be increasingly ordered!")) - if (length(object@mz) > 0 & length(object@mz) != 2) - msg <- c(msg, paste0("'mz' is supposed to contain the ", - "minimum and maximum mz values for the ", - "chromatogram.")) - if (!all(is.na(object@mz))) - if (is.unsorted(object@mz)) - msg <- c(msg, "'mz' has to be increasingly ordered!") - if (length(object@filterMz) > 0 & length(object@filterMz) != 2) - msg <- c(msg, paste0("'filterMz' is supposed to contain the ", - "minimum and maximum mz values of the filter", - " used to create the chromatogram.")) - if (length(object@precursorMz) > 0 & length(object@precursorMz) != 2) - msg <- c(msg, paste0("'precursorMz' is supposed to be a numeric of", - " length 2.")) - if (length(object@productMz) > 0 & length(object@productMz) != 2) - msg <- c(msg, paste0("'productMz' is supposed to be a numeric of", - " length 2.")) - if (length(object@fromFile) > 1 | any(object@fromFile < 0)) - msg <- c(msg, paste0("'fromFile' is supposed to be a single ", - "positive integer!")) - if (length(object@aggregationFun) > 1) - msg <- c(msg, "Length of 'aggregationFun' has to be 1!") - if (length(object@aggregationFun)) { - if (!object@aggregationFun %in% .SUPPORTED_AGG_FUN_CHROM) - msg <- c(msg, paste0("Invalid value for 'aggregationFun'! only ", - paste0("'", .SUPPORTED_AGG_FUN_CHROM,"'", - collapse = ","), " are allowed!")) - } - if (length(msg) == 0) TRUE - else msg -} - -#' @description \code{Chromatogram}: create an instance of the -#' \code{Chromatogram} class. -#' -#' @param rtime \code{numeric} with the retention times (length has to be equal -#' to the length of \code{intensity}). -#' -#' @param intensity \code{numeric} with the intensity values (length has to be -#' equal to the length of \code{rtime}). -#' -#' @param mz \code{numeric(2)} representing the mz value range (min, max) -#' on which the chromatogram was created. This is supposed to contain the -#' \emph{real} range of mz values in contrast to the \code{filterMz} below. -#' If not applicable use \code{mzrange = c(0, 0)}. -#' -#' @param filterMz \code{numeric(2)} representing the mz value range (min, -#' max) that was used to filter the original object on mz dimension. If not -#' applicable use \code{filterMz = c(0, 0)}. -#' -#' @param precursorMz \code{numeric(2)} for SRM/MRM transitions. -#' Represents the mz of the precursor ion. See details for more information. -#' -#' @param productMz \code{numeric(2)} for SRM/MRM transitions. -#' Represents the mz of the product. See details for more information. -#' -#' @param fromFile \code{integer(1)} the index of the file within the -#' \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the -#' chromatogram was extracted. -#' -#' @param aggregationFun \code{character} string specifying the function that -#' was used to aggregate intensity values for the same retention time across -#' the mz range. Supported are \code{"sum"} (total ion chromatogram), -#' \code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}. -#' -#' @slot .__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun See corresponding parameter above. -#' -#' @rdname Chromatogram-class -Chromatogram <- function(rtime = numeric(), intensity = numeric(), - mz = c(NA_real_, NA_real_), - filterMz = c(NA_real_, NA_real_), - precursorMz = c(NA_real_, NA_real_), - productMz = c(NA_real_, NA_real_), - fromFile = integer(), - aggregationFun = character()) { - ## Check if we have to re-order the data (issue #145). - if (is.unsorted(rtime)) { - idx <- order(rtime) - rtime <- rtime[idx] - intensity <- intensity[idx] - } - return(new("Chromatogram", rtime = rtime, intensity = intensity, - mz = range(mz), filterMz = range(filterMz), - precursorMz = range(precursorMz), productMz = range(productMz), - fromFile = as.integer(fromFile), aggregationFun = aggregationFun)) -} - -#' @title Plot Chromatogram objects -#' -#' @description \code{plotChromatogram} creates a chromatogram plot for a -#' single \code{Chromatogram} object or a \code{list} of -#' \code{\link{Chromatogram}} objects (one line for each -#' \code{\link{Chromatogram}}/sample). -#' -#' @details The \code{plotChromatogram} function allows to efficiently plot -#' the chromatograms of several samples into a single plot. -#' -#' @param x For \code{plotChromatogram}: \code{list} of -#' \code{\link{Chromatogram}} objects. Such as extracted from an -#' \code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} -#' method. -#' For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected -#' peaks. -#' -#' @param rt For \code{plotChromatogram}: \code{numeric(2)}, optional parameter -#' to subset each \code{Chromatogram} by retention time prior to plotting. -#' Alternatively, the plot could be subsetted by passing a \code{xlim} -#' parameter. -#' For \code{highlightChromPeaks}: \code{numeric(2)} with the -#' retention time range from which peaks should be extracted and plotted. -#' -#' @param col For \code{plotChromatogram}: color definition for each -#' line/sample. Has to have the same length as samples/elements in \code{x}, -#' otherwise \code{col[1]} is recycled to generate a vector of -#' \code{length(x)}. -#' For \code{highlightChromPeaks}: color to be used to fill the -#' rectangle. -#' -#' @param lty the line type. See \code{\link[graphics]{plot}} for more details. -#' -#' @param type the plotting type. See \code{\link[graphics]{plot}} for more -#' details. -#' For \code{highlightChromPeaks}: \code{character(1)} defining how the peak -#' should be highlighted: \code{type = "rect"} draws a rectangle -#' representing the peak definition, \code{type = "point"} indicates a -#' chromatographic peak with a single point at the position of the peak's -#' \code{"rt"} and \code{"maxo"}. -#' -#' @param xlab \code{character(1)} with the label for the x-axis. -#' -#' @param ylab \code{character(1)} with the label for the y-axis. -#' -#' @param main The title for the plot. For \code{plotChromatogram}: if -#' \code{main = NULL} the mz range of the \code{Chromatogram} object(s) will -#' be used as the title. -#' -#' @param ... additional parameters to the \code{\link{matplot}} or \code{plot} -#' function. -#' -#' @seealso \code{\link{extractChromatograms}} for how to extract a list of -#' \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} -#' objects. -#' -#' @author Johannes Rainer -#' -#' @examples -#' -#' ## Perform a fast peak detection. -#' library(xcms) -#' library(faahKO) -#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), -#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) -#' -#' od <- readMSData2(faahko_3_files) -#' -#' od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) -#' -#' rtr <- c(2600, 2750) -#' mzr <- c(344, 344) -#' chrs <- extractChromatograms(od, rt = rtr, mz = mzr) -#' -#' ## Plot a single chromatogram -#' plotChromatogram(chrs[[1]]) -#' -#' ## Plot all chromatograms at once, using different colors for each. -#' plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) -#' -#' ## Highlight identified chromatographic peaks. -#' highlightChromPeaks(od, rt = rtr, mz = mzr, -#' col = c("#FF000005", "#00FF0005", "#0000FF05"), -#' border = c("#FF000040", "#00FF0040", "#0000FF40")) -#' -plotChromatogram <- function(x, rt, col = "#00000060", - lty = 1, type = "l", xlab = "retention time", - ylab = "intensity", main = NULL, ...) { - if (!is.list(x) & !is(x, "Chromatogram")) - stop("'x' should be a Chromatogram object or a list of Chromatogram", - " objects.") - if (is(x, "Chromatogram")) - x <- list(x) - isOK <- lapply(x, function(z) { - if (is(z, "Chromatogram")) { - return(TRUE) - } else { - if (is.na(z)) - return(TRUE) - } - FALSE - }) - if (any(!unlist(isOK))) - stop("if 'x' is a list it should only contain Chromatogram objects") - ## Subset the Chromatogram objects if rt provided. - if (!missing(rt)) { - rt <- range(rt) - x <- lapply(x, function(z) { - if (is(z, "Chromatogram")) - filterRt(z, rt = rt) - }) - } - if (length(col) != length(x)) { - col <- rep(col[1], length(x)) - } - ## If main is NULL use the mz range. - if (is.null(main)) { - mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) - main <- paste0(format(mzr, digits = 7), collapse = " - ") - } - ## Number of measurements we've got per chromatogram. This can be different - ## between samples, from none (if not a single measurement in the rt/mz) - ## to the number of data points that were actually measured. - lens <- unique(lengths(x)) - max_len <- max(lens) - max_len_vec <- rep_len(NA, max_len) - ## Generate the matrix of rt values, columns are samples, rows retention - ## time values. Fill each column with NAs up to the maximum number of values - ## we've got in a sample/file. - rts <- do.call(cbind, lapply(x, function(z) { - cur_len <- length(z) - if (cur_len == 0) - max_len_vec - else { - ## max_len_vec[,] <- NA ## don't need that. get's copied. - max_len_vec[seq_len(cur_len)] <- rtime(z) - max_len_vec - } - })) - ## Same for the intensities. - ints <- do.call(cbind, lapply(x, function(z) { - cur_len <- length(z) - if (length(z) == 0) - max_len_vec - else { - ## max_len_vec[,] <- NA ## don't need that. get's copied. - max_len_vec[seq_len(cur_len)] <- intensity(z) - max_len_vec - } - })) - ## Define the x and y limits - x_lim <- c(0, 1) - y_lim <- c(0, 1) - if (all(is.na(rts))) - if (!missing(rt)) - x_lim <- range(rt) - else - x_lim <- range(rts, na.rm = TRUE, finite = TRUE) - if (!all(is.na(ints))) - y_lim <- range(ints, na.rm = TRUE, finite = TRUE) - ## Identify columns that have only NAs in either intensity or rt - these - ## will not be plotted. - keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | - apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) - ## Finally plot the data. - if (length(keepCol)) { - matplot(x = rts[, keepCol, drop = FALSE], - y = ints[, keepCol, drop = FALSE], type = type, lty = lty, - col = col[keepCol], xlab = xlab, ylab = ylab, main = main, - ...) - } else - plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, - xlim = x_lim, ylim = y_lim) -} - - - -#' @description The \code{highlightChromPeaks} function adds chromatographic -#' peak definitions to an existing plot, such as one created by the -#' \code{plotChromatograms} function. -#' -#' @param mz \code{numeric(2)} with the mz range from which the peaks should -#' be extracted and plotted. -#' -#' @param border colors to be used to color the border of the rectangles. Has to -#' be equal to the number of samples in \code{x}. -#' -#' @param lwd \code{numeric(1)} defining the width of the line/border. -#' -#' @rdname plotChromatogram -highlightChromPeaks <- function(x, rt, mz, - border = rep("00000040", length(fileNames(x))), - lwd = 1, col = NA, type = c("rect", "point"), - ...) { - type <- match.arg(type) - if (missing(rt)) - rt <- c(-Inf, Inf) - if (missing(mz)) - mz <- c(-Inf, Inf) - if (!is(x, "XCMSnExp")) - stop("'x' has to be a XCMSnExp object") - if (!hasChromPeaks(x)) - stop("'x' does not contain any detected peaks") - pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0) - if (length(col) != length(fileNames(x))) - col <- rep(col[1], length(fileNames(x))) - if (length(border) != length(fileNames(x))) - border <- rep(border[1], length(fileNames(x))) - if (length(pks)) { - if (type == "rect") - rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], - ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], - border = border[pks[, "sample"]], lwd = lwd, - col = col[pks[, "sample"]]) - if (type == "point") { - if (any(is.na(col))) - col <- border - ## Draw a star at the position defined by the "rt" column - points(x = pks[, "rt"], y = pks[, "maxo"], - col = col[pks[, "sample"]], ...) - } - } -} - diff --git a/R/functions-IO.R b/R/functions-IO.R index 362a1413b..62db4cafd 100644 --- a/R/functions-IO.R +++ b/R/functions-IO.R @@ -127,7 +127,7 @@ readRawData <- function(x, includeMSn = FALSE, dropEmptyScans = TRUE, } if (is.na(backend)) stop("File type of file ", x, " can not be determined.") - if (isMzMLFile(x)) + if (isMzMLFile(x) | backend == "pwiz") header_cols <- c(header_cols, "polarity") msd <- mzR::openMSfile(x, backend = backend) on.exit(if(!is.null(msd)) mzR::close(msd)) diff --git a/R/functions-Params.R b/R/functions-Params.R index 5cd15d4ba..6ea734c77 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -2,13 +2,15 @@ #' @include DataClasses.R ## -##' @description Extract all slot values and put them into a list, names being -##' the slot names. If a slot \code{addParams} exist its content will be -##' appended to the returned list. -##' -##' @param x A Param class. -##' @author Johannes Rainer -##' @noRd +#' @description Extract all slot values and put them into a list, names being +#' the slot names. If a slot \code{addParams} exist its content will be +#' appended to the returned list. +#' +#' @param x A Param class. +#' +#' @author Johannes Rainer +#' +#' @noRd .param2list <- function(x) { ## Get all slot names, skip those matching the provided pattern. sNames <- slotNames(x) @@ -62,22 +64,23 @@ ############################################################ ## GenericParam -#' @return The \code{GenericParam} function returns a \code{GenericParam} object. +#' @return The \code{GenericParam} function returns a \code{GenericParam} +#' object. +#' #' @param fun \code{character} representing the name of the function. +#' #' @param args \code{list} (ideally named) with the arguments to the function. +#' #' @rdname GenericParam GenericParam <- function(fun = character(), args = list()) { return(new("GenericParam", fun = fun, args = args)) } -############################################################ -## CentWaveParam - -##' @return The \code{CentWaveParam} function returns a \code{CentWaveParam} -##' class instance with all of the settings specified for chromatographic peak -##' detection by the centWave method. -##' -##' @rdname findChromPeaks-centWave +#' @return The \code{CentWaveParam} function returns a \code{CentWaveParam} +#' class instance with all of the settings specified for chromatographic +#' peak detection by the centWave method. +#' +#' @rdname findChromPeaks-centWave CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -91,15 +94,12 @@ CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, firstBaselineCheck = firstBaselineCheck, roiScales = roiScales)) } - -############################################################ -## MatchedFilterParam - -##' @return The \code{MatchedFilterParam} function returns a -##' \code{MatchedFilterParam} class instance with all of the settings specified -##' for chromatographic detection by the \emph{matchedFilter} method. -##' -##' @rdname findChromPeaks-matchedFilter +#' @return The \code{MatchedFilterParam} function returns a +#' \code{MatchedFilterParam} class instance with all of the settings +#' specified for chromatographic detection by the \emph{matchedFilter} +#' method. +#' +#' @rdname findChromPeaks-matchedFilter MatchedFilterParam <- function(binSize = 0.1, impute = "none", baseValue = numeric(), distance = numeric(), fwhm = 30, sigma = fwhm / 2.3548, @@ -111,7 +111,8 @@ MatchedFilterParam <- function(binSize = 0.1, impute = "none", mzdiff = mzdiff, index = index)) } #' Convert the impute method to the old-style method name (e.g. for profMat -#' calls) +#' calls) +#' #' @noRd .impute2method <- function(x) { if (impute(x) == "none") @@ -123,14 +124,12 @@ MatchedFilterParam <- function(binSize = 0.1, impute = "none", return("intlin") } -############################################################ -## MassifquantParam - -##' @return The \code{MassifquantParam} function returns a \code{MassifquantParam} -##' class instance with all of the settings specified for chromatographic peak -##' detection by the \emph{massifquant} method. -##' -##' @rdname findChromPeaks-massifquant +#' @return The \code{MassifquantParam} function returns a +#' \code{MassifquantParam} class instance with all of the settings +#' specified for chromatographic peak detection by the \emph{massifquant} +#' method. +#' +#' @rdname findChromPeaks-massifquant MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -147,46 +146,44 @@ MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, withWave = withWave)) } -############################################################ -## MSWParam -##' @inheritParams findChromPeaks-centWave -##' -##' @param scales Numeric defining the scales of the continuous wavelet -##' transform (CWT). -##' -##' @param nearbyPeak logical(1) whether to include nearby peaks of -##' major peaks. -##' -##' @param peakScaleRange numeric(1) defining the scale range of the -##' peak (larger than 5 by default). -##' -##' @param ampTh numeric(1) defining the minimum required relative -##' amplitude of the peak (ratio of the maximum of CWT coefficients). -##' -##' @param minNoiseLevel numeric(1) defining the minimum noise level -##' used in computing the SNR. -##' -##' @param ridgeLength numeric(1) defining the minimum highest scale -##' of the peak in 2-D CWT coefficient matrix. -##' -##' @param peakThr numeric(1) with the minimum absolute intensity -##' (above baseline) of peaks to be picked. If provided, the smoothing function -##' \code{\link[MassSpecWavelet]{sav.gol}} function is called to estimate the -##' local intensity. -##' -##' @param tuneIn logical(1) whther to tune in the parameter -##' estimation of the detected peaks. -##' -##' @param ... Additional parameters to be passed to the -##' \code{\link[MassSpecWavelet]{identifyMajorPeaks}} and -##' \code{\link[MassSpecWavelet]{sav.gol}} functions from the -##' \code{MassSpecWavelet} package. -##' -##' @return The \code{MSWParam} function returns a \code{MSWParam} -##' class instance with all of the settings specified for peak detection by -##' the \emph{MSW} method. -##' -##' @rdname findPeaks-MSW +#' @inheritParams findChromPeaks-centWave +#' +#' @param scales Numeric defining the scales of the continuous wavelet +#' transform (CWT). +#' +#' @param nearbyPeak logical(1) whether to include nearby peaks of +#' major peaks. +#' +#' @param peakScaleRange numeric(1) defining the scale range of the +#' peak (larger than 5 by default). +#' +#' @param ampTh numeric(1) defining the minimum required relative +#' amplitude of the peak (ratio of the maximum of CWT coefficients). +#' +#' @param minNoiseLevel numeric(1) defining the minimum noise level +#' used in computing the SNR. +#' +#' @param ridgeLength numeric(1) defining the minimum highest scale +#' of the peak in 2-D CWT coefficient matrix. +#' +#' @param peakThr numeric(1) with the minimum absolute intensity +#' (above baseline) of peaks to be picked. If provided, the smoothing +#' function \code{\link[MassSpecWavelet]{sav.gol}} function is called to +#' estimate the local intensity. +#' +#' @param tuneIn logical(1) whther to tune in the parameter +#' estimation of the detected peaks. +#' +#' @param ... Additional parameters to be passed to the +#' \code{\link[MassSpecWavelet]{identifyMajorPeaks}} and +#' \code{\link[MassSpecWavelet]{sav.gol}} functions from the +#' \code{MassSpecWavelet} package. +#' +#' @return The \code{MSWParam} function returns a \code{MSWParam} +#' class instance with all of the settings specified for peak detection by +#' the \emph{MSW} method. +#' +#' @rdname findPeaks-MSW MSWParam <- function(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, 2), seq(32, 64, 4)), nearbyPeak = TRUE, peakScaleRange = 5, @@ -203,15 +200,12 @@ MSWParam <- function(snthresh = 3, verboseColumns = FALSE, peakThr = peakThr, tuneIn = tuneIn, addParams = addParams)) } -############################################################ -## CentWavePredIsoParam - -##' @return The \code{CentWavePredIsoParam} function returns a -##' \code{CentWavePredIsoParam} class instance with all of the settings -##' specified for the two-step centWave-based peak detection considering also -##' isotopes. -##' -##' @rdname findChromPeaks-centWaveWithPredIsoROIs +#' @return The \code{CentWavePredIsoParam} function returns a +#' \code{CentWavePredIsoParam} class instance with all of the settings +#' specified for the two-step centWave-based peak detection considering also +#' isotopes. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -230,15 +224,11 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, mzIntervalExtension = mzIntervalExtension, polarity = polarity)) } - -############################################################ -## PeakDensityParam - -##' @return The \code{PeakDensityParam} function returns a -##' \code{PeakDensityParam} class instance with all of the settings -##' specified for chromatographic peak alignment based on peak densities. -##' -##' @rdname groupChromPeaks-density +#' @return The \code{PeakDensityParam} function returns a +#' \code{PeakDensityParam} class instance with all of the settings +#' specified for chromatographic peak alignment based on peak densities. +#' +#' @rdname groupChromPeaks-density PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, minFraction = 0.5, minSamples = 1, binSize = 0.25, maxFeatures = 50) { @@ -247,14 +237,11 @@ PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, binSize = binSize, maxFeatures = maxFeatures)) } -############################################################ -## MzClustParam - -##' @return The \code{MzClustParam} function returns a -##' \code{MzClustParam} class instance with all of the settings -##' specified for high resolution single spectra peak alignment. -##' -##' @rdname groupChromPeaks-mzClust +#' @return The \code{MzClustParam} function returns a +#' \code{MzClustParam} class instance with all of the settings +#' specified for high resolution single spectra peak alignment. +#' +#' @rdname groupChromPeaks-mzClust MzClustParam <- function(sampleGroups = numeric(), ppm = 20, absMz = 0, minFraction = 0.5, minSamples = 1) { return(new("MzClustParam", sampleGroups = sampleGroups, ppm = ppm, @@ -262,15 +249,11 @@ MzClustParam <- function(sampleGroups = numeric(), ppm = 20, absMz = 0, minSamples = minSamples)) } - -############################################################ -## NearestPeaksParam - -##' @return The \code{NearestPeaksParam} function returns a -##' \code{NearestPeaksParam} class instance with all of the settings -##' specified for peak alignment based on peak proximity. -##' -##' @rdname groupChromPeaks-nearest +#' @return The \code{NearestPeaksParam} function returns a +#' \code{NearestPeaksParam} class instance with all of the settings +#' specified for peak alignment based on peak proximity. +#' +#' @rdname groupChromPeaks-nearest NearestPeaksParam <- function(sampleGroups = numeric(), mzVsRtBalance = 10, absMz = 0.2, absRt = 15, kNN = 10) { return(new("NearestPeaksParam", sampleGroups = sampleGroups, @@ -278,16 +261,12 @@ NearestPeaksParam <- function(sampleGroups = numeric(), mzVsRtBalance = 10, kNN = kNN)) } - -############################################################ -## PeakGroupsParam - -##' @return The \code{PeakGroupsParam} function returns a -##' \code{PeakGroupsParam} class instance with all of the settings -##' specified for retention time adjustment based on \emph{house keeping} -##' features/peak groups. -##' -##' @rdname adjustRtime-peakGroups +#' @return The \code{PeakGroupsParam} function returns a +#' \code{PeakGroupsParam} class instance with all of the settings +#' specified for retention time adjustment based on \emph{house keeping} +#' features/peak groups. +#' +#' @rdname adjustRtime-peakGroups PeakGroupsParam <- function(minFraction = 0.9, extraPeaks = 1, smooth = "loess", span = 0.2, family = "gaussian", @@ -297,15 +276,11 @@ PeakGroupsParam <- function(minFraction = 0.9, extraPeaks = 1, family = family, peakGroupsMatrix = peakGroupsMatrix)) } - -############################################################ -## ObiwarpParam - -##' @return The \code{ObiwarpParam} function returns a -##' \code{ObiwarpParam} class instance with all of the settings -##' specified for obiwarp retention time adjustment and alignment. -##' -##' @rdname adjustRtime-obiwarp +#' @return The \code{ObiwarpParam} function returns a +#' \code{ObiwarpParam} class instance with all of the settings +#' specified for obiwarp retention time adjustment and alignment. +#' +#' @rdname adjustRtime-obiwarp ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, distFun = "cor_opt", gapInit = numeric(), gapExtend = numeric(), factorDiag = 2, factorGap = 1, @@ -318,11 +293,9 @@ ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, initPenalty = initPenalty)) } -############################################################ -## FillChromPeaksParam - #' @return The \code{FillChromPeaksParam} function returns a -#' \code{FillChromPeaksParam} object. +#' \code{FillChromPeaksParam} object. +#' #' @rdname fillChromPeaks FillChromPeaksParam <- function(expandMz = 0, expandRt = 0, ppm = 0) { return(new("FillChromPeaksParam", expandMz = expandMz, expandRt = expandRt, diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index b8f81ad90..bb56a23ba 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -183,195 +183,195 @@ dropProcessHistoriesList <- function(x, type, num = -1) { } -#' @description This function extracts chromatograms efficiently for multiple -#' rt and mz ranges by loading the data per file only once and performing -#' the mz subsetting on the already loaded Spectrum1 classes. -#' -#' @note Ensure that x is an OnDiskMSnExp and not an e.g. XCMSnExp object. -#' Subsetting etc an XCMSnExp might take longer. -#' -#' @param rt \code{matrix} with two columns and number of rows corresponding to -#' the number of ranges to extract. -#' -#' @param mz \code{matrix} with two columns and number of rows corresponding to -#' the number of ranges to extract. nrow of rt and mz have to match. -#' -#' @param x OnDiskMSnExp object from which to extract the chromatograms. -#' -#' @param return.type either \code{"list"} or \code{"matrix"} to return the -#' result as a list or as a matrix. -#' -#' @param missingValue value to be used as intensity if no signal was measured -#' for a given rt. -#' -#' @return A \code{list} or \code{matrix} with the \code{Chromatogram} objects. -#' If no data was present for the specified \code{rtrange} and -#' \code{mzrange} the function returns a \code{list} of length \code{0}. -#' The \code{list} is arranged first by ranges and then by files, such that -#' \code{result[[1]]} returns a \code{list} of \code{Chromatogram} objects -#' for the same rt/mz range. -#' For \code{return.type = "matrix"} a \code{matrix} is returned with rows -#' corresponding to ranges and columns to files/samples. \code{result[, 1]} -#' will thus return a \code{list} of \code{Chromatogram} objects for the -#' first sample/file, while \code{result[1, ]} returns a \code{list} of -#' \code{Chromatogram} objects for the same rt/mz range for all files. -#' -#' @author Johannes Rainer -#' -#' @noRd -.extractMultipleChromatograms <- function(x, rt, mz, aggregationFun = "sum", - BPPARAM = bpparam(), - return.type = c("list", "matrix"), - missingValue = NA_real_) { - return.type <- match.arg(return.type) - missingValue <- as.numeric(missingValue) - if (!any(.SUPPORTED_AGG_FUN_CHROM == aggregationFun)) - stop("'aggregationFun' should be one of ", - paste0("'", .SUPPORTED_AGG_FUN_CHROM, "'", collapse = ", ")) - ## Ensure we're working on MS1 only! - x <- filterMsLevel(x, 1) - if (length(x) == 0) - return(list()) - nranges <- 1 - if (missing(rt)) - rt <- matrix(c(-Inf, Inf), nrow = 1) - if (missing(mz)) - mz <- matrix(c(-Inf, Inf), nrow = 1) - if (!missing(rt)) { - if (ncol(rt) != 2) - stop("'rt' has to be a matrix with two columns") - ## Replicate if nrow rt is 1 to match nrow of mz. - if (nrow(rt) == 1) - rt <- matrix(rep(rt, nrow(mz)), ncol = 2, byrow = TRUE) - } - if (!missing(mz)) { - if (ncol(mz) != 2) - stop("'mz' has to be a matrix with two coliumns") - if (nrow(mz) == 1) - mz <- matrix(rep(mz, nrow(rt)), ncol = 2, byrow = TRUE) - } - if (nrow(rt) != nrow(mz)) - stop("dimensions of 'rt' and 'mz' have to match") - ## Identify indices of all spectra that are within the rt ranges. - rtimes <- rtime(x) +## #' @description This function extracts chromatograms efficiently for multiple +## #' rt and mz ranges by loading the data per file only once and performing +## #' the mz subsetting on the already loaded Spectrum1 classes. +## #' +## #' @note Ensure that x is an OnDiskMSnExp and not an e.g. XCMSnExp object. +## #' Subsetting etc an XCMSnExp might take longer. +## #' +## #' @param rt \code{matrix} with two columns and number of rows corresponding to +## #' the number of ranges to extract. +## #' +## #' @param mz \code{matrix} with two columns and number of rows corresponding to +## #' the number of ranges to extract. nrow of rt and mz have to match. +## #' +## #' @param x OnDiskMSnExp object from which to extract the chromatograms. +## #' +## #' @param return.type either \code{"list"} or \code{"matrix"} to return the +## #' result as a list or as a matrix. +## #' +## #' @param missingValue value to be used as intensity if no signal was measured +## #' for a given rt. +## #' +## #' @return A \code{list} or \code{matrix} with the \code{Chromatogram} objects. +## #' If no data was present for the specified \code{rtrange} and +## #' \code{mzrange} the function returns a \code{list} of length \code{0}. +## #' The \code{list} is arranged first by ranges and then by files, such that +## #' \code{result[[1]]} returns a \code{list} of \code{Chromatogram} objects +## #' for the same rt/mz range. +## #' For \code{return.type = "matrix"} a \code{matrix} is returned with rows +## #' corresponding to ranges and columns to files/samples. \code{result[, 1]} +## #' will thus return a \code{list} of \code{Chromatogram} objects for the +## #' first sample/file, while \code{result[1, ]} returns a \code{list} of +## #' \code{Chromatogram} objects for the same rt/mz range for all files. +## #' +## #' @author Johannes Rainer +## #' +## #' @noRd +## .extractMultipleChromatograms <- function(x, rt, mz, aggregationFun = "sum", +## BPPARAM = bpparam(), +## return.type = c("list", "matrix"), +## missingValue = NA_real_) { +## return.type <- match.arg(return.type) +## missingValue <- as.numeric(missingValue) +## if (!any(.SUPPORTED_AGG_FUN_CHROM == aggregationFun)) +## stop("'aggregationFun' should be one of ", +## paste0("'", .SUPPORTED_AGG_FUN_CHROM, "'", collapse = ", ")) +## ## Ensure we're working on MS1 only! +## x <- filterMsLevel(x, 1) +## if (length(x) == 0) +## return(list()) +## nranges <- 1 +## if (missing(rt)) +## rt <- matrix(c(-Inf, Inf), nrow = 1) +## if (missing(mz)) +## mz <- matrix(c(-Inf, Inf), nrow = 1) +## if (!missing(rt)) { +## if (ncol(rt) != 2) +## stop("'rt' has to be a matrix with two columns") +## ## Replicate if nrow rt is 1 to match nrow of mz. +## if (nrow(rt) == 1) +## rt <- matrix(rep(rt, nrow(mz)), ncol = 2, byrow = TRUE) +## } +## if (!missing(mz)) { +## if (ncol(mz) != 2) +## stop("'mz' has to be a matrix with two coliumns") +## if (nrow(mz) == 1) +## mz <- matrix(rep(mz, nrow(rt)), ncol = 2, byrow = TRUE) +## } +## if (nrow(rt) != nrow(mz)) +## stop("dimensions of 'rt' and 'mz' have to match") +## ## Identify indices of all spectra that are within the rt ranges. +## rtimes <- rtime(x) - ## 1) Subset x keeping all spectra that fall into any of the provided rt - ## ranges. - keep_idx <- unlist(apply(rt, MARGIN = 1, function(z) - which(rtimes >= z[1] & rtimes <= z[2])), use.names = FALSE) - keep_idx <- sort(unique(as.integer(keep_idx))) - if (length(keep_idx) == 0) - return(list()) - subs <- x[keep_idx] +## ## 1) Subset x keeping all spectra that fall into any of the provided rt +## ## ranges. +## keep_idx <- unlist(apply(rt, MARGIN = 1, function(z) +## which(rtimes >= z[1] & rtimes <= z[2])), use.names = FALSE) +## keep_idx <- sort(unique(as.integer(keep_idx))) +## if (length(keep_idx) == 0) +## return(list()) +## subs <- x[keep_idx] - ## 2) Call the final subsetting on each file separately. - subs_by_file <- splitByFile(subs, f = factor(seq_along(fileNames(subs)))) - suppressWarnings( - res <- bpmapply( - subs_by_file, - seq_along(fileNames(subs)), - FUN = function(cur_sample, cur_file, rtm, mzm, aggFun) { - ## Load all spectra for that file. applies also any proc steps - sps <- spectra(cur_sample) - rts <- rtime(cur_sample) - cur_res <- vector("list", nrow(rtm)) - ## Loop through rt and mz. - for (i in 1:nrow(rtm)) { - ## - Select all spectra within that range and call a - ## function on them that does first filterMz and then - ## aggregate the values per spectrum. - in_rt <- rts >= rtm[i, 1] & rts <= rtm[i, 2] - ## Return an empty Chromatogram if there is no spectrum/scan - ## within the retention time range. - if (!any(in_rt)) { - cur_res[[i]] <- Chromatogram( - filterMz = mzm[i, ], - fromFile = as.integer(cur_file), - aggregationFun = aggFun) - next - } - cur_sps <- lapply( - sps[in_rt], - function(spct, filter_mz, aggFun) { - spct <- filterMz(spct, filter_mz) - ## Now aggregate the values. - if (!spct@peaksCount) - return(c(NA_real_, NA_real_, missingValue)) - return(c(range(spct@mz, na.rm = TRUE, finite = TRUE), - do.call( - aggFun, - list(spct@intensity, na.rm = TRUE)))) - }, filter_mz = mzm[i, ], aggFun = aggFun) - ## Now build the Chromatogram class. - allVals <- unlist(cur_sps, use.names = FALSE) - idx <- seq(3, length(allVals), by = 3) - ## Or should we drop the names completely? - ints <- allVals[idx] - names(ints) <- names(cur_sps) - ## Don't return a Chromatogram object if no values. - if (!all(is.na(ints))) { - cur_res[[i]] <- Chromatogram( - rtime = rts[in_rt], - intensity = ints, - mz = range(allVals[-idx], na.rm = TRUE, - finite = TRUE), - filterMz = mzm[i, ], - fromFile = as.integer(cur_file), - aggregationFun = aggFun) - } else { - ## If no measurement if non-NA, still report the NAs and - ## use the filter mz as mz. - cur_res[[i]] <- Chromatogram( - rtime = rts[in_rt], - intensity = ints, - mz = mzm[i, ], - filterMz = mzm[i, ], - fromFile = as.integer(cur_file), - aggregationFun = aggFun) - } - } - cur_res - }, MoreArgs = list(rtm = rt, mzm = mz, aggFun = aggregationFun), - BPPARAM = BPPARAM, SIMPLIFY = FALSE) - ) - ## Ensure that the lists have the same length than there are samples! - fns <- fileNames(x) - fromF <- base::match(fileNames(subs), fns) +## ## 2) Call the final subsetting on each file separately. +## subs_by_file <- splitByFile(subs, f = factor(seq_along(fileNames(subs)))) +## suppressWarnings( +## res <- bpmapply( +## subs_by_file, +## seq_along(fileNames(subs)), +## FUN = function(cur_sample, cur_file, rtm, mzm, aggFun) { +## ## Load all spectra for that file. applies also any proc steps +## sps <- spectra(cur_sample) +## rts <- rtime(cur_sample) +## cur_res <- vector("list", nrow(rtm)) +## ## Loop through rt and mz. +## for (i in 1:nrow(rtm)) { +## ## - Select all spectra within that range and call a +## ## function on them that does first filterMz and then +## ## aggregate the values per spectrum. +## in_rt <- rts >= rtm[i, 1] & rts <= rtm[i, 2] +## ## Return an empty Chromatogram if there is no spectrum/scan +## ## within the retention time range. +## if (!any(in_rt)) { +## cur_res[[i]] <- Chromatogram( +## filterMz = mzm[i, ], +## fromFile = as.integer(cur_file), +## aggregationFun = aggFun) +## next +## } +## cur_sps <- lapply( +## sps[in_rt], +## function(spct, filter_mz, aggFun) { +## spct <- filterMz(spct, filter_mz) +## ## Now aggregate the values. +## if (!spct@peaksCount) +## return(c(NA_real_, NA_real_, missingValue)) +## return(c(range(spct@mz, na.rm = TRUE, finite = TRUE), +## do.call( +## aggFun, +## list(spct@intensity, na.rm = TRUE)))) +## }, filter_mz = mzm[i, ], aggFun = aggFun) +## ## Now build the Chromatogram class. +## allVals <- unlist(cur_sps, use.names = FALSE) +## idx <- seq(3, length(allVals), by = 3) +## ## Or should we drop the names completely? +## ints <- allVals[idx] +## names(ints) <- names(cur_sps) +## ## Don't return a Chromatogram object if no values. +## if (!all(is.na(ints))) { +## cur_res[[i]] <- Chromatogram( +## rtime = rts[in_rt], +## intensity = ints, +## mz = range(allVals[-idx], na.rm = TRUE, +## finite = TRUE), +## filterMz = mzm[i, ], +## fromFile = as.integer(cur_file), +## aggregationFun = aggFun) +## } else { +## ## If no measurement if non-NA, still report the NAs and +## ## use the filter mz as mz. +## cur_res[[i]] <- Chromatogram( +## rtime = rts[in_rt], +## intensity = ints, +## mz = mzm[i, ], +## filterMz = mzm[i, ], +## fromFile = as.integer(cur_file), +## aggregationFun = aggFun) +## } +## } +## cur_res +## }, MoreArgs = list(rtm = rt, mzm = mz, aggFun = aggregationFun), +## BPPARAM = BPPARAM, SIMPLIFY = FALSE) +## ) +## ## Ensure that the lists have the same length than there are samples! +## fns <- fileNames(x) +## fromF <- base::match(fileNames(subs), fns) - ## If we've got some files in which we don't have any signal in any range, - ## fill it with empty Chromatograms. This ensures that the result has - ## ALWAYS the same length than there are samples. - if (length(res) != length(fns)) { - res_all_files <- vector(mode = "list", length = length(fns)) - res_all_files[fromF] <- res - empties <- which(lengths(res_all_files) == 0) - ## fill these - for (i in 1:length(empties)) { - empty_list <- vector(mode = "list", length = nrow(rt)) - for(j in 1:nrow(rt)) { - empty_list[j] <- Chromatogram(filterMz = mz[i, ], - fromFile = as.integer(i), - aggregationFun = aggregationFun) - } - res_all_files[[empties[i]]] <- empty_list - } - res <- res_all_files - } - ## Now I need to re-arrange the result. - if (return.type == "list") { - ## Got [[file]][[range]], but want to have [[range]][[file]] - final_res <- vector("list", nrow(rt)) - for (i in 1:nrow(rt)) { - final_res[[i]] <- lapply(res, FUN = `[[`, i) - } - if (nrow(rt) == 1) - final_res <- final_res[[1]] - } - if (return.type == "matrix") { - final_res <- do.call(cbind, res) - } - final_res -} +## ## If we've got some files in which we don't have any signal in any range, +## ## fill it with empty Chromatograms. This ensures that the result has +## ## ALWAYS the same length than there are samples. +## if (length(res) != length(fns)) { +## res_all_files <- vector(mode = "list", length = length(fns)) +## res_all_files[fromF] <- res +## empties <- which(lengths(res_all_files) == 0) +## ## fill these +## for (i in 1:length(empties)) { +## empty_list <- vector(mode = "list", length = nrow(rt)) +## for(j in 1:nrow(rt)) { +## empty_list[j] <- Chromatogram(filterMz = mz[i, ], +## fromFile = as.integer(i), +## aggregationFun = aggregationFun) +## } +## res_all_files[[empties[i]]] <- empty_list +## } +## res <- res_all_files +## } +## ## Now I need to re-arrange the result. +## if (return.type == "list") { +## ## Got [[file]][[range]], but want to have [[range]][[file]] +## final_res <- vector("list", nrow(rt)) +## for (i in 1:nrow(rt)) { +## final_res[[i]] <- lapply(res, FUN = `[[`, i) +## } +## if (nrow(rt) == 1) +## final_res <- final_res[[1]] +## } +## if (return.type == "matrix") { +## final_res <- do.call(cbind, res) +## } +## final_res +## } ## #' @description Integrates the intensities for chromatograpic peak(s). This is @@ -1057,7 +1057,7 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", #' #' ## Perform the peak detection using the centWave method (settings are tuned #' ## to speed up example execution) -#' res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 3000)) +#' res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 3000, snthresh = 40)) #' #' ## Align the samples using obiwarp #' res <- adjustRtime(res, param = ObiwarpParam()) @@ -1118,7 +1118,8 @@ plotChromPeakDensity <- function(object, mz, rt, param = PeakDensityParam(), ## Plot the peaks as points. plot(x = pks[, "rt"], y = ypos[pks[, "sample"]], xlim = xlim, col = col[pks[, "sample"]], xlab = xlab, yaxt = "n", ylab = ylab, - main = paste0(format(mz, digits = 7), collapse = " - "), ...) + main = paste0(format(mz, digits = 7), collapse = " - "), ylim = yl, + ...) axis(side = 2, at = ypos, labels = 1:nsamples) points(x = dens$x, y = dens$y, type = "l") ## Estimate what would be combined to a feature @@ -1146,9 +1147,272 @@ plotChromPeakDensity <- function(object, mz, rt, param = PeakDensityParam(), } } +#' @title Add definition of chromatographic peaks to an extracted chromatogram +#' plot +#' +#' @description The \code{highlightChromPeaks} function adds chromatographic +#' peak definitions to an existing plot, such as one created by the +#' \code{plot} method on a \code{\link[MSnbase]{Chromatogram}} or +#' \code{\link[MSnbase]{Chromatograms}} object. +#' +#' @param x For \code{highlightChromPeaks}: \code{XCMSnExp} object with the +#' detected peaks. +#' +#' @param rt For \code{highlightChromPeaks}: \code{numeric(2)} with the +#' retention time range from which peaks should be extracted and plotted. +#' +#' @param mz \code{numeric(2)} with the mz range from which the peaks should +#' be extracted and plotted. +#' +#' @param border colors to be used to color the border of the rectangles. Has to +#' be equal to the number of samples in \code{x}. +#' +#' @param lwd \code{numeric(1)} defining the width of the line/border. +#' +#' @param col For \code{highlightChromPeaks}: color to be used to fill the +#' rectangle. +#' +#' @param type the plotting type. See \code{\link[graphics]{plot}} for more +#' details. +#' For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +#' should be highlighted: \code{type = "rect"} draws a rectangle +#' representing the peak definition, \code{type = "point"} indicates a +#' chromatographic peak with a single point at the position of the peak's +#' \code{"rt"} and \code{"maxo"}. +#' +#' @param ... additional parameters to the \code{\link{matplot}} or \code{plot} +#' function. +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' ## Read some files from the faahKO package. +#' library(xcms) +#' library(faahKO) +#' faahko_3_files <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_3_files) +#' +#' ## Peak detection using the 'matchedFilter' method. Note that we are using a +#' ## larger binSize to reduce the runtime of the example. +#' xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) +#' +#' ## Extract the ion chromatogram for one chromatographic peak in the data. +#' chrs <- chromatogram(xod, rt = c(2700, 2900), mz = 335) +#' +#' plot(chrs) +#' +#' ## Extract chromatographic peaks for the mz/rt range (if any). +#' chromPeaks(xod, rt = c(2700, 2900), mz = 335) +#' +#' ## Highlight the chromatographic peaks in the area +#' highlightChromPeaks(xod, rt = c(2700, 2900), mz = 335) +highlightChromPeaks <- function(x, rt, mz, + border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), + ...) { + type <- match.arg(type) + if (missing(rt)) + rt <- c(-Inf, Inf) + if (missing(mz)) + mz <- c(-Inf, Inf) + if (!is(x, "XCMSnExp")) + stop("'x' has to be a XCMSnExp object") + if (!hasChromPeaks(x)) + stop("'x' does not contain any detected peaks") + pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0) + if (length(col) != length(fileNames(x))) + col <- rep(col[1], length(fileNames(x))) + if (length(border) != length(fileNames(x))) + border <- rep(border[1], length(fileNames(x))) + if (length(pks)) { + if (type == "rect") + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], + border = border[pks[, "sample"]], lwd = lwd, + col = col[pks[, "sample"]]) + if (type == "point") { + if (any(is.na(col))) + col <- border + ## Draw a star at the position defined by the "rt" column + points(x = pks[, "rt"], y = pks[, "maxo"], + col = col[pks[, "sample"]], ...) + } + } +} + + ## Plot the chromatographic peaks for a file in a two dimensional plot. ## plotChromPeakImage... -## @description Plots the +#' @title General visualizations of peak detection results +#' +#' @description \code{plotChromPeakImage} plots the identified chromatographic +#' peaks from one file into the plane spanned by the retention time and mz +#' dimension (x-axis representing the retention time and y-axis mz). +#' Each chromatographic peak is plotted as a rectangle representing its +#' width in rt and mz dimension. +#' +#' This plot is supposed to provide some initial overview of the +#' chromatographic peak detection results. +#' +#' @details The width and line type of the rectangles indicating the detected +#' chromatographic peaks for the \code{plotChromPeaks} function can be +#' specified using the \code{par} function, i.e. with \code{par(lwd = 3)} +#' and \code{par(lty = 2)}, respectively. +#' +#' @param x \code{\link{XCMSnExp}} object. +#' +#' @param file For \code{plotChromPeaks}: \code{numeric(1)} specifying the +#' index of the file within \code{x} for which the plot should be created. +#' Defaults to \code{1}. +#' +#' @param xlim \code{numeric(2)} specifying the x-axis limits (retention time +#' dimension). Defaults to \code{NULL} in which case the full retention +#' time range of the file is used. +#' +#' @param ylim For \code{plotChromPeaks}: \code{numeric(2)} specifying the +#' y-axis limits (mz dimension). Defaults to \code{NULL} in which case the +#' full mz range of the file is used. +#' +#' @param add For \code{plotChromPeaks}: \code{logical(1)} whether the plot +#' should be added or created as a new plot. +#' +#' @param border For \code{plotChromPeaks}: the color for the rectangles' +#' border. +#' +#' @param col For \code{plotChromPeaks}: the color to be used to fill the +#' rectangles. +#' +#' @param xlab \code{character(1)} defining the x-axis label. +#' +#' @param ylab For \code{plotChromPeaks}: \code{character(1)} defining the +#' y-axis label. +#' +#' @param main \code{character(1)} defining the plot title. By default (i.e. +#' \code{main = NULL} the name of the file will be used as title. +#' +#' @param ... Additional arguments passed to the \code{plot} (for +#' \code{plotChromPeaks}) and \code{image} (for +#' \code{plotChromPeakImage}) functions. Ignored if \code{add = TRUE}. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{highlightChromPeaks}} for the function to highlight +#' detected chromatographic peaks in extracted ion chromatogram plots. +#' +#' @examples +#' +#' ## Perform peak detection on two files from the faahKO package. +#' library(xcms) +#' library(faahKO) +#' faahko_file <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_file) +#' +#' ## Peak detection using the 'matchedFilter' method. Note that we are using a +#' ## larger binSize to reduce the runtime of the example. +#' xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) +#' +#' ## plotChromPeakImage: plot an image for the identified peaks per file +#' plotChromPeakImage(xod) +#' +#' ## Show all detected chromatographic peaks from the first file +#' plotChromPeaks(xod) +#' +#' ## Plot all detected peaks from the second file and restrict the plot to a +#' ## mz-rt slice +#' plotChromPeaks(xod, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) +plotChromPeaks <- function(x, file = 1, xlim = NULL, ylim = NULL, + add = FALSE, border = "#00000060", col = NA, + xlab = "retention time", ylab = "mz", + main = NULL, ...) { + if (!is(x, "XCMSnExp")) + stop("'x' is supposed to be an 'XCMSnExp' object, but I got a ", + class(x)) + suppressMessages( + x_file <- filterFile(x, file = file[1], keepAdjustedRtime = TRUE) + ) + if (is.null(xlim)) + xlim <- range(rtime(x_file)) + if (is.null(ylim)) + ylim <- range(mz(x_file)) + if (is.null(main)) + main <- basename(fileNames(x_file)) + ## Get the peaks from the file, restricting to the current limits (might + ## speed up things). + pks <- chromPeaks(x_file, mz = ylim, rt = xlim) + ## Initialize plot + if (!add) + plot(3, 3, pch = NA, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, + main = main, ...) + if (nrow(pks)) + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = pks[, "mzmin"], ytop = pks[, "mzmax"], col = col, + border = border) +} + +#' @description \code{plotChromPeakImage} plots the number of detected peaks for +#' each sample along the retention time axis as an \emph{image} plot, i.e. +#' with the number of peaks detected in each bin along the retention time +#' represented with the color of the respective cell. +#' +#' @param binSize For \code{plotChromPeakImage}: \code{numeric(1)} defining the +#' size of the bins along the x-axis (retention time). Defaults to +#' \code{binSize = 30}, peaks within each 30 seconds will thus counted and +#' plotted. +#' +#' @param log For \code{plotChromPeakImage}: \code{logical(1)} whether the peak +#' counts should be log2 transformed before plotting. +#' +#' @param yaxt For \code{plotChromPeakImage}: \code{character(1)} defining +#' whether y-axis labels should be added. To disable the y-axis use +#' \code{yaxt = "n"}. For any other value of \code{yaxt} the axis will be +#' drawn. See \code{par} help page for more details. +#' +#' @rdname plotChromPeaks +plotChromPeakImage <- function(x, binSize = 30, xlim = NULL, log = FALSE, + xlab = "retention time", yaxt = par("yaxt"), + main = "Chromatographic peak counts", ...) { + if (!is(x, "XCMSnExp")) + stop("'x' is supposed to be an 'XCMSnExp' object, but I got a ", + class(x)) + if (is.null(xlim)) + xlim <- c(floor(min(rtime(x))), ceiling(max(rtime(x)))) + brks <- seq(xlim[1], xlim[2], by = binSize) + if (brks[length(brks)] < xlim[2]) + brks <- c(brks, brks[length(brks)] + binSize) + pks <- chromPeaks(x, rt = xlim) + if (nrow(pks)) { + rts <- split(pks[, "rt"], pks[, "sample"]) + cnts <- lapply(rts, function(z) { + hst <- hist(z, breaks = brks, plot = FALSE) + hst$counts + }) + ## Add 0 vectors for samples in which no peaks were found. + n_samples <- length(fileNames(x)) + sample_idxs <- 1:n_samples + sample_idxs <- sample_idxs[!(as.character(sample_idxs) %in% names(rts))] + if (length(sample_idxs)) { + all_cnts <- vector("list", n_samples) + all_cnts[as.numeric(names(cnts))] <- cnts + zeros <- rep(0, (length(brks) - 1)) + all_cnts[sample_idxs] <- list(zeros) + cnts <- all_cnts + } + cnts <- t(do.call(rbind, cnts)) + if (log) + cnts <- log2(cnts) + image(z = cnts, x = brks - (brks[2] - brks[1]) / 2, xaxs = "r", + xlab = xlab, yaxt = "n", ...) + if (yaxt != "n") + axis(side = 2, at = seq(0, 1, length.out = n_samples), + labels = basename(fileNames(x)), las = 2) + } +} + ## Find mz ranges with multiple peaks per sample. ## Use the density distribution for that? with a bandwidth = 0.001, check diff --git a/R/functions-normalization.R b/R/functions-normalization.R new file mode 100644 index 000000000..e32a04b1b --- /dev/null +++ b/R/functions-normalization.R @@ -0,0 +1,250 @@ +#' @include DataClasses.R functions-utils.R + + +#' @title Fit linear model row-wise to a matrix or data.frame +#' +#' @description Simple function to fit linear models row-wise to the provided +#' data. +#' +#' @details For \code{method = "lmrob"} robust regression is performed using +#' the \code{\link[robustbase]{lmrob}} function with settings +#' \code{settings = "KS2014"} and \code{method = "SMDB"}. +#' The function will perform by default parallel fitting of the models +#' based on the global parallel processing settings. +#' +#' @note Between batch correction in the form of \code{y ~ idx * batch} is +#' currently problematic, because we don't yet check if there are too few +#' values within each batch. +#' +#' @param formula \code{formula} representing the model. +#' +#' @param data \code{data.frame} containing the data to be fitted (e.g. the +#' \code{pData} of an \code{\link{XCMSnExp}} object. +#' +#' @param y \code{matrix} or \code{data.frame} with the response variable. The +#' model is fit to each row of this matrix (which can be e.g. the +#' \code{\link{featureValues}} matrix). +#' +#' @param minVals \code{integer(1)} defining the minimum number of values to be +#' used for the fitting. Model fitting is skipped for rows in \code{y} with +#' less than \code{minVals} non-NA values. +#' +#' @param method \code{character} defining the method/function to be used for +#' model fitting. Allowed values are \code{"lm"} for least squares +#' regression and \code{"lmrob"} for robust regression using the +#' \code{\link[robustbase]{lmrob}} function. +#' +#' @param BPPARAM optional parameter specifying parallel processing settings. +#' +#' @return A \code{list} with the fitted linear models or \code{NULL} for rows +#' with too few data points. +#' +#' @noRd +#' +#' @author Johannes Rainer +fitModel <- function(formula, data, y, minVals = 4, + method = c("lm", "lmrob"), BPPARAM = bpparam()) { + method <- match.arg(method, c("lm", "lmrob")) + if (missing(formula) || !is(formula, "formula")) + stop("'formula' has to be submitted and should be a formula!") + if (missing(data) || !is(data, "data.frame")) + stop("'data' has to be a 'data.frame'!") + if (missing(y)) + stop("'y' is missing with no default.") + if (ncol(y) != nrow(data)) + stop("ncol(y) has to match nrow(data)!") + ## Check that 'data' contains the variables we're looking for. + vars <- all.vars(formula) + if (vars[1] != "y") + stop("'formula' should start with 'y ~'") + if (!all(vars[-1] %in% colnames(data))) + stop("All of the variables from 'formula' have to be present in 'data'") + ## data shouldn't contain a column y. + if (any(colnames(data) == "y")) + stop("'data' should not contain a column named 'y'") + ## Done with checking. + force(y) + force(data) + force(formula) + force(minVals) + force(method) + force(BPPARAM) + ## Subset data to contain only explanatory variables + data <- data[, vars[-1], drop = FALSE] + if (is.null(rownames(y))) + rownames(y) <- 1:nrow(y) + y <- split.data.frame(y, f = rownames(y)) + ## Determine fetures we skip because of too few data points. + do_em <- which(unlist(lapply(y, function(z) sum(!is.na(z)) >= minVals))) + res <- vector("list", length(y)) + names(res) <- names(y) + sttngs <- list() + if (method == "lmrob") { + ## Force use of the KS2014 settings in lmrob and increase the + ## scale-finding iterations to avoid some of the warnings. + sttngs <- lmrob.control("KS2014") + sttngs$maxit.scale <- 10000 + sttngs$k.max <- 10000 + sttngs$refine.tol <- 1e-7 + } + if (length(do_em)) { + ## fit the model + res[do_em] <- bplapply(y[do_em], FUN = function(z, formula., data., + minVals., lmeth, + sttngs) { + ## TODO: need to check what happens if we're also performing between + ## batch correction and we have too few samples per batch! + ## ## Removing all missing values - could eventually skip that. + ## z <- as.numeric(z) + ## not_na <- !is.na(z) + ## data. <- droplevels(data.frame(y = z[not_na], + ## data.[not_na, , drop = FALSE])) + data. <- data.frame(y = as.numeric(z), data.) + if (lmeth == "lm") + return(lm(formula., data = data., model = FALSE)) + if (lmeth == "lmrob") { + set.seed(123) + return(lmrob(formula., data = data., model = FALSE, + setting = sttngs)) + } + if (lmeth == "rlm") + stop("Not yet implemented") + ## return(MASS::rlm(formula., data = data.)) + }, formula. = formula, data. = data, minVals. = minVals, lmeth = method, + sttngs = sttngs, BPPARAM = BPPARAM) + } + res +} + +## Define a simple function that does the adjustment for us. +#' @title Adjust the injection order-dependent signal drift using linear models +#' +#' @description \code{adjustDriftWithModel} first fits the specified model to +#' each individual row in \code{y} and subsequently adjusts \code{y} based +#' on these fitted models. This enables a signal drift and batch correction +#' as described in [Wehrens 2016]. +#' +#' @details For some rows/features values can become negative after adjustment. +#' To avoid this, a constant can be added to the adjusted intensities of +#' such features. Parameter \code{shiftNegative} allows to specify how this +#' constant is to be determined. For \code{shiftNegative = "min"}, if one +#' of the adjusted values of a row is \code{< 1}, the minimum intensity (+1) +#' is added to each intensity. Shifting values for rows that do not only +#' have negative values, but values \code{< 1}, ensures that adjusted values +#' are larger 1 (which might be important if \code{y} is in log2 scale. +#' This shifting is done on a per-feature basis. Alternatively, the +#' \code{globalMin} \emph{globally} shifts the complete matrix by the +#' minimum value (if it is negative). +#' +#' @note Rows with fewer than \code{minValues} data points that can be used +#' for the model fit are returned un-adjusted. +#' +#' @param x \code{numeric} \code{matrix} or \code{data.frame} with the values +#' that should be corrected. +#' +#' @param data \code{data.frame} with additional variables to the model. +#' +#' @param fitOnSubset \code{numeric} or \code{logical} optionally specifying a +#' subset of columns in \code{y} that should be used for the model fitting. +#' Can be e.g. the index of quality control sample columns in \code{y} if +#' the model should be fit exclusively on those while adjusting all columns +#' of \code{y}. +#' +#' @param minValues \code{numeric(1)} defining the minimum number of data points +#' required to perform the model fitting. +#' +#' @param method \code{character} specifying the model fitting function that +#' should be used (\code{"lm", \code{"rlm"} or \code{"lmrob"}}. +#' +#' @param shiftNegative \code{character} specifying the method to be used to +#' avoid adjusted values to become negative. Allowed values are +#' \code{"none"} (no shift), \code{"min"} (shift intensities of rows with +#' at least one negative value by adding this value +1 to all intensities) +#' and \code{"globalMin"} (shifts the complete +#' matrix by them smallest negative intensity). See details for more +#' information. +#' +#' @return A \code{list} with two elements: \code{"x"} with the adjusted input +#' matrix \code{y} and \code{"fit"} with the fitted models. The latter can +#' be used for quality control purposes or to e.g. identify the most +#' adjusted rows. +#' +#' @author Johannes Rainer +#' +#' @references +#' Wehrens R, Hageman JA, van Eeuwijk F, Kooke R, Flood PJ, Wijnker E, +#' Keurentjes JJ, Lommen A, van Eekelen HD, Hall RD Mumm R and de Vos RC. +#' Improved batch correction in untargeted MS-based metabolomics. +#' \emph{Metabolomics} 2016; 12:88. +#' @noRd +adjustDriftWithModel <- function(y, data = NULL, model = y ~ injection_idx, + fitOnSubset = 1:ncol(y), minVals = 4, + method = "lm", + shiftNegative = c("none", "min","globalMin")) { + shiftNegative <- match.arg(shiftNegative) + ## Input argument checking... + if (is.logical(fitOnSubset)) + fitOnSubset <- which(fitOnSubset) + if (!all(fitOnSubset %in% 1:ncol(y))) + stop("'fitOnSubset' should contain indices between 1 and 'ncol(y)'") + data_fit <- data + if (!is.null(data_fit)) + data_fit <- data_fit[fitOnSubset, , drop = FALSE] + ## First fitting the model. + message("Fitting the model to the features ... ", appendLF = FALSE) + lms <- xcms:::fitModel(formula = model, data = data_fit, + y = y[, fitOnSubset, drop = FALSE], + minVals = minVals, method = method) + message("OK") + message("Applying models to adjust values ... ", appendLF = FALSE) + if (is.null(rownames(y))) + rownames(y) <- seq_len(nrow(y)) + y_cn <- colnames(y) + y <- split.data.frame(y, f = rownames(y)) + res <- mapply(y, lms, FUN = function(z, lmod, data.) { + z <- as.numeric(z) + if (length(lmod) == 0) + return(z) + rownames(data.) <- NULL + preds <- predict(lmod, newdata = data.frame(y = z, data.)) + z + mean(z, na.rm = TRUE) - preds + }, MoreArgs = list(data. = data), SIMPLIFY = FALSE) + res <- do.call(rbind, res) + message("OK") + message("Did not correct ", sum(lengths(lms) == 0), " of the ", length(y), + " rows because of too few data points to fit the model.") + rm(y) + ## Check if we have to shift values... + if (any(res < 1, na.rm = TRUE)) { + if (shiftNegative == "none") { + message("Note: some adjusted values are < 1.") + } + if (shiftNegative == "min") { + ## Shift selected rows by their row min + 1 + ## Include here also < 1 so that values potentially in log scale + ## between 0 and 1 are adjusted as well. + mins <- apply(res, MARGIN = 1, function(z) min(z, na.rm = TRUE)) + idx <- which(mins < 1) + res[idx, ] <- res[idx, ] + abs(mins[idx]) + 1 + message("Shifting ", length(idx), " of the ", nrow(res), " rows ", + "to avoid negative values.") + } + ## if (shiftNegative == "log") { + ## ## Shift selected rows by the difference. + ## mins <- apply(res, MARGIN = 1, function(z) min(z, na.rm = TRUE)) + ## idx <- which(mins < 1) + ## ## res[idx, ] <- res[idx, ] + 1 + ## res[idx, ] <- res[idx, ] + (1 - mins) + ## message("Shifting ", length(idx), " of the ", nrow(res), + ## " rows to avoid values between 0 and 1.") + ## } + if (shiftNegative == "globalMin") { + ## Shifting ALL rows by the smallest value in the matrix. + shiftVal <- abs(min(res, na.rm = TRUE)) + 1 + message("Shifting all values by ", shiftVal) + res <- res + shiftVal + } + } + colnames(res) <- y_cn + return(list(y = res, fit = lms)) +} diff --git a/R/functions-utils.R b/R/functions-utils.R index 70fcb0dc3..bf9945412 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -251,39 +251,39 @@ useOriginalCode <- function(x) { sprintf(paste0("FT%0", ceiling(log10(x + 1L)), "d"), 1:x) } -#' @description Expands stretches of TRUE values in \code{x} by one on both -#' sides. -#' -#' @note The return value for a \code{NA} is always \code{FALSE}. -#' -#' @param x \code{logical} vector. -#' -#' @author Johannes Rainer -#' -#' @noRd -.grow_trues <- function(x) { - previous <- NA - x_new <- rep_len(FALSE, length(x)) - for (i in 1:length(x)) { - if (is.na(x[i])) { - previous <- NA - next - } - ## If current element is TRUE - if (x[i]) { - x_new[i] <- TRUE - ## if last element was FALSE, set last element to TRUE - if (!is.na(previous) && !previous) - x_new[i - 1] <- TRUE - } else { - ## if previous element was TRUE, set current to TRUE. - if (!is.na(previous) && previous) - x_new[i] <- TRUE - } - previous <- x[i] - } - x_new -} +## #' @description Expands stretches of TRUE values in \code{x} by one on both +## #' sides. +## #' +## #' @note The return value for a \code{NA} is always \code{FALSE}. +## #' +## #' @param x \code{logical} vector. +## #' +## #' @author Johannes Rainer +## #' +## #' @noRd +## .grow_trues <- function(x) { +## previous <- NA +## x_new <- rep_len(FALSE, length(x)) +## for (i in 1:length(x)) { +## if (is.na(x[i])) { +## previous <- NA +## next +## } +## ## If current element is TRUE +## if (x[i]) { +## x_new[i] <- TRUE +## ## if last element was FALSE, set last element to TRUE +## if (!is.na(previous) && !previous) +## x_new[i - 1] <- TRUE +## } else { +## ## if previous element was TRUE, set current to TRUE. +## if (!is.na(previous) && previous) +## x_new[i] <- TRUE +## } +## previous <- x[i] +## } +## x_new +## } #' @title Weighted mean around maximum #' diff --git a/R/functions-xcmsRaw.R b/R/functions-xcmsRaw.R index b2476294e..8e6e3c79e 100644 --- a/R/functions-xcmsRaw.R +++ b/R/functions-xcmsRaw.R @@ -55,8 +55,8 @@ xcmsRaw <- function(filename, profstep = 1, profmethod = "bin", } if (!is.null(rawdata$polarity)) { object@polarity <- factor(rawdata$polarity, - levels=c(0,1,-1), - labels=c("negative", "positive", "unknown")) + levels = c(0, 1, -1), + labels = c("negative", "positive", "unknown")) } ## ## After the MS1 data, take care of MSn diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R deleted file mode 100644 index 61dae643d..000000000 --- a/R/methods-Chromatogram.R +++ /dev/null @@ -1,213 +0,0 @@ -#' @include DataClasses.R functions-Chromatogram.R functions-utils.R - -setMethod("initialize", "Chromatogram", function(.Object, ...) { - classVersion(.Object)["Chromatogram"] <- "0.0.1" - callNextMethod(.Object, ...) -}) - - -#' @rdname Chromatogram-class -setMethod("show", "Chromatogram", function(object) { - cat("Object of class: ", class(object), "\n", sep = "") - if (length(object@aggregationFun)) - cat(names(.SUPPORTED_AGG_FUN_CHROM)[.SUPPORTED_AGG_FUN_CHROM == - object@aggregationFun], "\n") - cat("length of object: ", length(object@rtime), "\n", sep = "") - cat("from file: ", object@fromFile, "\n", sep = "") - cat("mz range: [", object@mz[1], ", ", object@mz[2], "]\n", sep = "") - if (length(object@rtime) > 0) { - rtr <- range(object@rtime) - cat("rt range: [", rtr[1], ", ", rtr[2], "]\n", sep = "") - } -}) - -## Methods: - -## rtime -#' @description \code{rtime} returns the retention times for the rentention time -#' - intensity pairs stored in the chromatogram. -#' -#' @param object A \code{Chromatogram} object. -#' -#' @rdname Chromatogram-class -setMethod("rtime", "Chromatogram", function(object) { - return(object@rtime) -}) - -## intensity -#' @description \code{intensity} returns the intensity for the rentention time -#' - intensity pairs stored in the chromatogram. -#' -#' @rdname Chromatogram-class -setMethod("intensity", "Chromatogram", function(object) { - return(object@intensity) -}) - -## mz -#' @description \code{mz} get the mz (range) of the chromatogram. The -#' function returns a \code{numeric(2)} with the lower and upper mz value. -#' -#' @param filter For \code{mz}: whether the mz range used to filter the -#' original object should be returned (\code{filter = TRUE}), or the mz -#' range calculated on the real data (\code{filter = FALSE}). -#' -#' @rdname Chromatogram-class -setMethod("mz", "Chromatogram", function(object, filter = FALSE) { - if (filter) - return(object@filterMz) - return(object@mz) -}) -## #' @rdname Chromatogram-class -## setReplaceMethod("mz", "CentWaveParam", function(object, value) { -## object@mzrange <- value -## if (validObject(object)) -## return(object) -## }) - -#' @description \code{precursorMz} get the mz of the precursor ion. The -#' function returns a \code{numeric(2)} with the lower and upper mz value. -#' -#' @rdname Chromatogram-class -setMethod("precursorMz", "Chromatogram", function(object) { - return(object@precursorMz) -}) - -#' @aliases productMz -#' -#' @description \code{productMz} get the mz of the product chromatogram/ion. The -#' function returns a \code{numeric(2)} with the lower and upper mz value. -#' -#' @rdname Chromatogram-class -setMethod("productMz", "Chromatogram", function(object) { - return(object@productMz) -}) - -## aggregationFun -#' @aliases aggregationFun -#' -#' @description \code{aggregationFun,aggregationFun<-} get or set the -#' aggregation function. -#' -#' @rdname Chromatogram-class -setMethod("aggregationFun", "Chromatogram", function(object) { - return(object@aggregationFun) -}) -## #' @rdname Chromatogram-class -## setReplaceMethod("aggregationFun", "CentWaveParam", function(object, value) { -## object@aggregationFun <- value -## if (validObject(object)) -## return(object) -## }) - -## fromFile -#' @description \code{fromFile} returns the value from the \code{fromFile} slot. -#' -#' @rdname Chromatogram-class -setMethod("fromFile", "Chromatogram", function(object) { - return(object@fromFile) -}) - -## length -#' @description \code{length} returns the length (number of retention time - -#' intensity pairs) of the chromatogram. -#' -#' @param x For \code{as.data.frame} and \code{length}: a \code{Chromatogram} -#' object. -#' -#' @rdname Chromatogram-class -setMethod("length", "Chromatogram", function(x) { - return(length(x@rtime)) -}) - -## as.data.frame -#' @description \code{as.data.frame} returns the \code{rtime} and -#' \code{intensity} values from the object as \code{data.frame}. -#' -#' @rdname Chromatogram-class -setMethod("as.data.frame", "Chromatogram", function(x) { - return(data.frame(rtime = x@rtime, intensity = x@intensity)) -}) - -#' @description \code{filterRt}: filters the chromatogram based on the provided -#' retention time range. -#' -#' @param rt For \code{filterRt}: \code{numeric(2)} defining the lower and -#' upper retention time for the filtering. -#' -#' @rdname Chromatogram-class -#' -#' @examples -#' -#' ## Create a simple Chromatogram object based on random values. -#' chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), -#' rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) -#' chr -#' -#' ## Get the intensities -#' head(intensity(chr)) -#' -#' ## Get the retention time -#' head(rtime(chr)) -#' -#' ## What is the retention time range of the object? -#' range(rtime(chr)) -#' -#' ## Filter the chromatogram to keep only values between 4 and 10 seconds -#' chr2 <- filterRt(chr, rt = c(4, 10)) -#' -#' range(rtime(chr2)) -setMethod("filterRt", "Chromatogram", function(object, rt) { - if (missing(rt)) - return(object) - rt <- range(rt) - ## Use which to be robust against NAs - keep_em <- which(rtime(object) >= rt[1] & rtime(object) <= rt[2]) - if (length(keep_em)) { - object@rtime <- rtime(object)[keep_em] - object@intensity <- intensity(object)[keep_em] - } else { - object@rtime <- numeric() - object@intensity <- numeric() - } - if (validObject(object)) - object -}) - -#' @description \code{clean}: \emph{cleans} a \code{Chromatogram} class by -#' removing all \code{0} and \code{NA} intensity signals (along with the -#' associates retention times). By default (if \code{all = FALSE}) \code{0} -#' values that are directly adjacent to peaks are kept too. \code{NA} -#' values are always removed. -#' -#' @param all For \code{clean}: \code{logical(1)} whether all \code{0} intensity -#' value pairs should be removed (defaults to \code{FALSE}). -#' -#' @return For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. -#' -#' @rdname Chromatogram-class -#' -#' @examples -#' -#' ## Create a simple Chromatogram object -#' -#' chr <- Chromatogram(rtime = 1:12, -#' intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) -#' -#' ## Remove 0-intensity values keeping those adjacent to peaks -#' chr <- clean(chr) -#' intensity(chr) -#' -#' ## Remove all 0-intensity values -#' chr <- clean(chr, all = TRUE) -#' intensity(chr) -setMethod("clean", signature = signature("Chromatogram"), - function(object, all = FALSE) { - if (all) - keep <- which(object@intensity > 0) - else - keep <- which(.grow_trues(object@intensity > 0)) - object@intensity <- object@intensity[keep] - object@rtime <- object@rtime[keep] - if (validObject(object)) - object - }) diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index 8d97c7de2..e8be7d9eb 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -6,58 +6,67 @@ ## DataClasses, before the definition of the CentWaveParam class. ## The centWave peak detection method for OnDiskMSnExp: -##' @title Chromatographic peak detection using the centWave method -##' -##' @description The \code{detectChromPeaks,OnDiskMSnExp,CentWaveParam} method -##' performs chromatographic peak detection using the \emph{centWave} algorithm -##' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -##' data and load the spectra data (mz and intensity values) on the fly from the -##' original files applying also all eventual data manipulations. -##' -##' @details Parallel processing (one process per sample) is supported and can -##' be configured either by the \code{BPPARAM} parameter or by globally defining -##' the parallel processing mode using the \code{\link[BiocParallel]{register}} -##' method from the \code{BiocParallel} package. -##' -##' @param object For \code{findChromPeaks}: an -##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -##' experiment-relevant data. -##' -##' For all other methods: a parameter object. -##' -##' @param param An \code{CentWaveParam} object containing all settings for the -##' centWave algorithm. -##' -##' @param BPPARAM A parameter class specifying if and how parallel processing -##' should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -##' See documentation of the \code{BiocParallel} for more details. If parallel -##' processing is enables, peak detection is performed in parallel on several -##' of the input samples. -##' -##' @param return.type Character specifying what type of object the method should -##' return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or -##' \code{"xcmsSet"}. -##' -##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the peak detection. -##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified peaks. -##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the peak detection. -##' -##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the peak detection. -##' -##' @rdname findChromPeaks-centWave +#' @title Chromatographic peak detection using the centWave method +#' +#' @description The \code{detectChromPeaks,OnDiskMSnExp,CentWaveParam} method +#' performs chromatographic peak detection using the \emph{centWave} +#' algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} +#' object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all +#' experiment specific data and load the spectra data (mz and intensity +#' values) on the fly from the original files applying also all eventual +#' data manipulations. +#' +#' @details Parallel processing (one process per sample) is supported and can +#' be configured either by the \code{BPPARAM} parameter or by globally +#' defining the parallel processing mode using the +#' \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} +#' package. +#' +#' @param object For \code{findChromPeaks}: an +#' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all +#' other experiment-relevant data. +#' +#' For all other methods: a parameter object. +#' +#' @param param An \code{CentWaveParam} object containing all settings for the +#' centWave algorithm. +#' +#' @param BPPARAM A parameter class specifying if and how parallel processing +#' should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. +#' See documentation of the \code{BiocParallel} for more details. If +#' parallel processing is enabled, peak detection is performed in parallel +#' on several of the input samples. +#' +#' @param return.type Character specifying what type of object the method should +#' return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or +#' \code{"xcmsSet"}. +#' +#' @param msLevel \code{integer(1)} defining the MS level on which the peak +#' detection should be performed. Defaults to \code{msLevel = 1}. +#' +#' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +#' \code{\link{XCMSnExp}} object with the results of the peak detection. +#' If \code{return.type = "list"} a list of length equal to the number of +#' samples with matrices specifying the identified peaks. +#' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +#' with the results of the peak detection. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak detection. +#' +#' @rdname findChromPeaks-centWave setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWaveParam"), - function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", + msLevel = 1L) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() ## Restrict to MS1 data. - object <- filterMsLevel(object, msLevel. = 1) + object <- filterMsLevel(object, msLevel. = msLevel) + if (length(object) == 0) + stop("No MS level ", msLevel, " spectra present to perform ", + "peak detection") ## Check if the data is centroided centroided <- isCentroided(object[[1]]) ## issue #181: if there are too few mass peaks the function @@ -119,15 +128,15 @@ setMethod("findChromPeaks", ## ## The centWave peak detection method for MSnExp: -## ##' @title Chromatographic peak detection using the centWave method -## ##' -## ##' @description The \code{findChromPeaks,MSnExp,CentWaveParam} method performs -## ##' peak detection using the \emph{centWave} algorithm on all samples from -## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -## ##' intensity values of all spectra hence no additional data input from the -## ##' original files is required. -## ##' -## ##' @rdname findChromPeaks-centWave +## #' @title Chromatographic peak detection using the centWave method +## #' +## #' @description The \code{findChromPeaks,MSnExp,CentWaveParam} method performs +## #' peak detection using the \emph{centWave} algorithm on all samples from +## #' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## #' intensity values of all spectra hence no additional data input from the +## #' original files is required. +## #' +## #' @rdname findChromPeaks-centWave ## setMethod("findChromPeaks", ## signature(object = "MSnExp", param = "CentWaveParam"), ## function(object, param, BPPARAM = bpparam(), return.type = "list") { @@ -176,50 +185,56 @@ setMethod("findChromPeaks", ## }) ## The matchedFilter peak detection method for OnDiskMSnExp: -##' @title Peak detection in the chromatographic time domain -##' -##' @description The \code{findChromPeaks,OnDiskMSnExp,MatchedFilterParam} -##' method performs peak detection using the \emph{matchedFilter} algorithm -##' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -##' data and load the spectra data (mz and intensity values) on the fly from the -##' original files applying also all eventual data manipulations. -##' -##' @details Parallel processing (one process per sample) is supported and can -##' be configured either by the \code{BPPARAM} parameter or by globally defining -##' the parallel processing mode using the \code{\link[BiocParallel]{register}} -##' method from the \code{BiocParallel} package. -##' -##' @param object For \code{findChromPeaks}: an -##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -##' experiment-relevant data. -##' -##' For all other methods: a parameter object. -##' -##' @param param An \code{MatchedFilterParam} object containing all settings for -##' the matchedFilter algorithm. -##' -##' @inheritParams findChromPeaks-centWave -##' -##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the peak detection. -##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified peaks. -##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the peak detection. -##' -##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the chromatographic peak detection. -##' -##' @rdname findChromPeaks-matchedFilter +#' @title Peak detection in the chromatographic time domain +#' +#' @description The \code{findChromPeaks,OnDiskMSnExp,MatchedFilterParam} +#' method performs peak detection using the \emph{matchedFilter} algorithm +#' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment +#' specific data and load the spectra data (mz and intensity values) on the +#' fly from the original files applying also all eventual data +#' manipulations. +#' +#' @details Parallel processing (one process per sample) is supported and can +#' be configured either by the \code{BPPARAM} parameter or by globally +#' defining the parallel processing mode using the +#' \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} +#' package. +#' +#' @param object For \code{findChromPeaks}: an +#' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all +#' other experiment-relevant data. +#' +#' For all other methods: a parameter object. +#' +#' @param param An \code{MatchedFilterParam} object containing all settings for +#' the matchedFilter algorithm. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +#' \code{\link{XCMSnExp}} object with the results of the peak detection. +#' If \code{return.type = "list"} a list of length equal to the number of +#' samples with matrices specifying the identified peaks. +#' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +#' with the results of the peak detection. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the chromatographic peak detection. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MatchedFilterParam"), - function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", + msLevel = 1L) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## Restrict to MS1 data. - object <- filterMsLevel(object, msLevel. = 1) + ## Restrict to MS x data. + object <- filterMsLevel(object, msLevel. = msLevel) + if (length(object) == 0) + stop("No MS level ", msLevel, " spectra present to perform ", + "peak detection") ## (1) split the object per file. ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), @@ -268,15 +283,15 @@ setMethod("findChromPeaks", } }) -## ##' @title Peak detection in the chromatographic time domain -## ##' -## ##' @description The \code{findChromPeaks,MSnExp,MatchedFilterParam} method -## ##' performs peak detection using the \emph{matchedFilter} method on all -## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -## ##' mz and intensity values of all spectra hence no additional -## ##' data input from the original files is required. -## ##' -## ##' @rdname findChromPeaks-matchedFilter +## #' @title Peak detection in the chromatographic time domain +## #' +## #' @description The \code{findChromPeaks,MSnExp,MatchedFilterParam} method +## #' performs peak detection using the \emph{matchedFilter} method on all +## #' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## #' mz and intensity values of all spectra hence no additional +## #' data input from the original files is required. +## #' +## #' @rdname findChromPeaks-matchedFilter ## setMethod("findChromPeaks", ## signature(object = "MSnExp", param = "MatchedFilterParam"), ## function(object, param, BPPARAM = bpparam(), return.type = "list") { @@ -314,50 +329,57 @@ setMethod("findChromPeaks", ## massifquant ## The massifquant peak detection method for OnDiskMSnExp: -##' @title Chromatographic peak detection using the massifquant method -##' -##' @description The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} -##' method performs chromatographic peak detection using the \emph{massifquant} -##' algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -##' data and load the spectra data (mz and intensity values) on the fly from the -##' original files applying also all eventual data manipulations. -##' -##' @details Parallel processing (one process per sample) is supported and can -##' be configured either by the \code{BPPARAM} parameter or by globally defining -##' the parallel processing mode using the \code{\link[BiocParallel]{register}} -##' method from the \code{BiocParallel} package. -##' -##' @param object For \code{findChromPeaks}: an -##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -##' experiment-relevant data. -##' -##' For all other methods: a parameter object. -##' -##' @param param An \code{MassifquantParam} object containing all settings for -##' the massifquant algorithm. -##' -##' @inheritParams findChromPeaks-centWave -##' -##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the peak detection. -##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified peaks. -##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the peak detection. -##' -##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the peak detection. -##' -##' @rdname findChromPeaks-massifquant +#' @title Chromatographic peak detection using the massifquant method +#' +#' @description The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} +#' method performs chromatographic peak detection using the +#' \emph{massifquant} algorithm on all samples from an +#' \code{\link[MSnbase]{OnDiskMSnExp}} object. +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment +#' specific data and load the spectra data (mz and intensity values) on the +#' fly from the original files applying also all eventual data +#' manipulations. +#' +#' @details Parallel processing (one process per sample) is supported and can +#' be configured either by the \code{BPPARAM} parameter or by globally +#' defining the parallel processing mode using the +#' \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} +#' package. +#' +#' @param object For \code{findChromPeaks}: an +#' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all +#' other experiment-relevant data. +#' +#' For all other methods: a parameter object. +#' +#' @param param An \code{MassifquantParam} object containing all settings for +#' the massifquant algorithm. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +#' \code{\link{XCMSnExp}} object with the results of the peak detection. +#' If \code{return.type = "list"} a list of length equal to the number of +#' samples with matrices specifying the identified peaks. +#' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +#' with the results of the peak detection. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak detection. +#' +#' @rdname findChromPeaks-massifquant setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MassifquantParam"), - function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", + msLevel = 1L) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## Restrict to MS1 data. - object <- filterMsLevel(object, msLevel. = 1) + ## Restrict to MS x data. + object <- filterMsLevel(object, msLevel. = msLevel) + if (length(object) == 0) + stop("No MS level ", msLevel, " spectra present to perform ", + "peak detection") ## (1) split the object per file. ## (2) use bplapply to do the peaks detection. resList <- bplapply(lapply(1:length(fileNames(object)), @@ -407,15 +429,15 @@ setMethod("findChromPeaks", }) -## ##' @title Chromatographic peak detection using the massifquant method -## ##' -## ##' @description The \code{findChromPeaks,MSnExp,MassifquantParam} method -## ##' performs chromatographic peak detection using the \emph{massifquant} method -## ##' on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects -## ##' contain mz and intensity values of all spectra hence no additional -## ##' data input from the original files is required. -## ##' -## ##' @rdname findChromPeaks-massifquant +## #' @title Chromatographic peak detection using the massifquant method +## #' +## #' @description The \code{findChromPeaks,MSnExp,MassifquantParam} method +## #' performs chromatographic peak detection using the \emph{massifquant} method +## #' on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects +## #' contain mz and intensity values of all spectra hence no additional +## #' data input from the original files is required. +## #' +## #' @rdname findChromPeaks-massifquant ## setMethod("findChromPeaks", ## signature(object = "MSnExp", param = "MassifquantParam"), ## function(object, param, BPPARAM = bpparam(), return.type = "list") { @@ -454,51 +476,57 @@ setMethod("findChromPeaks", ## MSW ## The MSW peak detection method for OnDiskMSnExp: -##' @title Single-spectrum non-chromatography MS data peak detection -##' -##' @description The \code{findChromPeaks,OnDiskMSnExp,MSWParam} -##' method performs peak detection in single-spectrum non-chromatography MS -##' data using functionality from the \code{MassSpecWavelet} package on all -##' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -##' data and load the spectra data (mz and intensity values) on the fly from the -##' original files applying also all eventual data manipulations. -##' -##' @details Parallel processing (one process per sample) is supported and can -##' be configured either by the \code{BPPARAM} parameter or by globally defining -##' the parallel processing mode using the \code{\link[BiocParallel]{register}} -##' method from the \code{BiocParallel} package. -##' -##' @param object For \code{findChromPeaks}: an -##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -##' experiment-relevant data. -##' -##' For all other methods: a parameter object. -##' -##' @param param An \code{MSWParam} object containing all settings for -##' the algorithm. -##' -##' @inheritParams findChromPeaks-centWave -##' -##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the peak detection. -##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified peaks. -##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the detection. -##' -##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the peak detection. -##' -##' @rdname findPeaks-MSW +#' @title Single-spectrum non-chromatography MS data peak detection +#' +#' @description The \code{findChromPeaks,OnDiskMSnExp,MSWParam} +#' method performs peak detection in single-spectrum non-chromatography MS +#' data using functionality from the \code{MassSpecWavelet} package on all +#' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment +#' specific data and load the spectra data (mz and intensity values) on the +#' fly from the original files applying also all eventual data +#' manipulations. +#' +#' @details Parallel processing (one process per sample) is supported and can +#' be configured either by the \code{BPPARAM} parameter or by globally +#' defining the parallel processing mode using the +#' \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} +#' package. +#' +#' @param object For \code{findChromPeaks}: an +#' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all +#' other experiment-relevant data. +#' +#' For all other methods: a parameter object. +#' +#' @param param An \code{MSWParam} object containing all settings for +#' the algorithm. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +#' \code{\link{XCMSnExp}} object with the results of the peak detection. +#' If \code{return.type = "list"} a list of length equal to the number of +#' samples with matrices specifying the identified peaks. +#' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +#' with the results of the detection. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak detection. +#' +#' @rdname findPeaks-MSW setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MSWParam"), - function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", + msLevel = 1L) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## Restrict to MS1 data. - object <- filterMsLevel(object, msLevel. = 1) + ## Restrict to MS x data. + object <- filterMsLevel(object, msLevel. = msLevel) + if (length(object) == 0) + stop("No MS level ", msLevel, " spectra present to perform ", + "peak detection") rts <- split(rtime(object), f = fromFile(object)) if (any(lengths(rts)) > 1) @@ -553,16 +581,16 @@ setMethod("findChromPeaks", } }) -## ##' @title Single-spectrum non-chromatography MS data peak detection -## ##' -## ##' @description The \code{findChromPeaks,MSnExp,MSWParam} method -## ##' performs peak detection in single-spectrum non-chromatography MS -## ##' data using functionality from the \code{MassSpecWavelet} package on all -## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -## ##' mz and intensity values of all spectra hence no additional -## ##' data input from the original files is required. -## ##' -## ##' @rdname findPeaks-MSW +## #' @title Single-spectrum non-chromatography MS data peak detection +## #' +## #' @description The \code{findChromPeaks,MSnExp,MSWParam} method +## #' performs peak detection in single-spectrum non-chromatography MS +## #' data using functionality from the \code{MassSpecWavelet} package on all +## #' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## #' mz and intensity values of all spectra hence no additional +## #' data input from the original files is required. +## #' +## #' @rdname findPeaks-MSW ## setMethod("findChromPeaks", ## signature(object = "MSnExp", param = "MSWParam"), ## function(object, param, BPPARAM = bpparam(), return.type = "list") { @@ -599,43 +627,50 @@ setMethod("findChromPeaks", ## }) ## The centWave with predicted isotope peak detection method for OnDiskMSnExp: -##' @title Two-step centWave peak detection considering also isotopes -##' -##' @description The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} method -##' performs a two-step centWave-based chromatographic peak detection on all -##' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -##' data and load the spectra data (mz and intensity values) on the fly from -##' the original files applying also all eventual data manipulations. -##' -##' @details Parallel processing (one process per sample) is supported and can -##' be configured either by the \code{BPPARAM} parameter or by globally defining -##' the parallel processing mode using the \code{\link[BiocParallel]{register}} -##' method from the \code{BiocParallel} package. -##' -##' @param param An \code{CentWavePredIsoParam} object with the settings for the -##' chromatographic peak detection algorithm. -##' @inheritParams findChromPeaks-centWave -##' -##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the peak detection. -##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified peaks. -##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the peak detection. -##' -##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the peak detection. -##' -##' @rdname findChromPeaks-centWaveWithPredIsoROIs +#' @title Two-step centWave peak detection considering also isotopes +#' +#' @description The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} +#' method performs a two-step centWave-based chromatographic peak detection +#' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment +#' specific data and load the spectra data (mz and intensity values) on the +#' fly from the original files applying also all eventual data +#' manipulations. +#' +#' @details Parallel processing (one process per sample) is supported and can +#' be configured either by the \code{BPPARAM} parameter or by globally +#' defining the parallel processing mode using the +#' \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} +#' package. +#' +#' @param param An \code{CentWavePredIsoParam} object with the settings for the +#' chromatographic peak detection algorithm. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +#' \code{\link{XCMSnExp}} object with the results of the peak detection. +#' If \code{return.type = "list"} a list of length equal to the number of +#' samples with matrices specifying the identified peaks. +#' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +#' with the results of the peak detection. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak detection. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWavePredIsoParam"), - function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp", + msLevel = 1L) { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## Restrict to MS1 data. - object <- filterMsLevel(object, msLevel. = 1) + ## Restrict to MS x data. + object <- filterMsLevel(object, msLevel. = msLevel) + if (length(object) == 0) + stop("No MS level ", msLevel, " spectra present to perform ", + "peak detection") ## Check if the data is centroided centroided <- isCentroided(object[[1]]) ## issue #181: if there are too few mass peaks the function @@ -700,15 +735,15 @@ setMethod("findChromPeaks", ## ## The centWave with predicted isotope peak detection method for MSnExp: -## ##' @title Two-step centWave peak detection considering also isotopes -## ##' -## ##' @description The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method -## ##' performs a two-step centWave-based peak detection on all samples from -## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -## ##' intensity values of all spectra hence no additional data input from the -## ##' original files is required. -## ##' -## ##' @rdname findChromPeaks-centWaveWithPredIsoROIs +## #' @title Two-step centWave peak detection considering also isotopes +## #' +## #' @description The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method +## #' performs a two-step centWave-based peak detection on all samples from +## #' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## #' intensity values of all spectra hence no additional data input from the +## #' original files is required. +## #' +## #' @rdname findChromPeaks-centWaveWithPredIsoROIs ## setMethod("findChromPeaks", ## signature(object = "MSnExp", param = "CentWavePredIsoParam"), ## function(object, param, BPPARAM = bpparam(), return.type = "list") { @@ -757,23 +792,23 @@ setMethod("findChromPeaks", ## }) ## profMat method for XCMSnExp/OnDiskMSnExp. -##' @description \code{profMat}: creates a \emph{profile matrix}, which -##' is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and -##' m (columns) the retention time of the corresponding scans. Each cell contains -##' the maximum intensity measured for the specific scan and m/z values. See -##' \code{\link{profMat}} for more details and description of the various binning -##' methods. -##' -##' @param ... Additional parameters. -##' -##' @return For \code{profMat}: a \code{list} with a the profile matrix -##' \code{matrix} (or matrices if \code{fileIndex} was not specified or if -##' \code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for general -##' help and information about the profile matrix. -##' -##' @inheritParams profMat-xcmsSet -##' -##' @rdname XCMSnExp-class +#' @description \code{profMat}: creates a \emph{profile matrix}, which +#' is a n x m matrix, n (rows) representing equally spaced m/z values (bins) +#' and m (columns) the retention time of the corresponding scans. Each cell +#' contains the maximum intensity measured for the specific scan and m/z +#' values. See \code{\link{profMat}} for more details and description of +#' the various binning methods. +#' +#' @param ... Additional parameters. +#' +#' @return For \code{profMat}: a \code{list} with a the profile matrix +#' \code{matrix} (or matrices if \code{fileIndex} was not specified or if +#' \code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for +#' general help and information about the profile matrix. +#' +#' @inheritParams profMat-xcmsSet +#' +#' @rdname XCMSnExp-class setMethod("profMat", signature(object = "OnDiskMSnExp"), function(object, method = "bin", step = 0.1, @@ -825,7 +860,7 @@ setMethod("profMat", signature(object = "OnDiskMSnExp"), function(object, return(res) }) -##' @rdname adjustRtime-obiwarp +#' @rdname adjustRtime-obiwarp setMethod("adjustRtime", signature(object = "OnDiskMSnExp", param = "ObiwarpParam"), function(object, param) { @@ -838,25 +873,6 @@ setMethod("adjustRtime", return(res) }) -#' @rdname extractChromatograms-method -setMethod("extractChromatograms", - signature(object = "OnDiskMSnExp"), - function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { - if (!missing(rt)) { - if (is.null(ncol(rt))) - rt <- matrix(range(rt), ncol = 2, nrow = 1) - } - if (!missing(mz)) { - if (is.null(ncol(mz))) - mz <- matrix(range(mz), ncol = 2, nrow = 1) - } - ## return(.extractChromatogram(x = object, rt = rt, mz = mz, - ## aggregationFun = aggregationFun)) - .extractMultipleChromatograms(object, rt = rt, mz = mz, - aggregationFun = aggregationFun, - missingValue = missing) - }) - #' @rdname extractMsData-method setMethod("extractMsData", signature(object = "OnDiskMSnExp"), function(object, rt, mz) { diff --git a/R/methods-ProcessHistory.R b/R/methods-ProcessHistory.R index 06e51d077..0d18bcdf3 100644 --- a/R/methods-ProcessHistory.R +++ b/R/methods-ProcessHistory.R @@ -11,7 +11,7 @@ setMethod("initialize", "XProcessHistory", function(.Object, ...) { callNextMethod(.Object, ...) }) -##' @rdname ProcessHistory-class +#' @rdname ProcessHistory-class setMethod("show", "ProcessHistory", function(object) { cat("Object of class \"", class(object), "\"\n", sep = "") cat(" type:", object@type, "\n") @@ -19,7 +19,7 @@ setMethod("show", "ProcessHistory", function(object) { cat(" info:", object@info, "\n") cat(" fileIndex:", paste0(object@fileIndex, collapse = ","), "\n") }) -##' @rdname ProcessHistory-class +#' @rdname ProcessHistory-class setMethod("show", "XProcessHistory", function(object) { callNextMethod() pcLabel <- "-none-" @@ -28,26 +28,27 @@ setMethod("show", "XProcessHistory", function(object) { cat(" Parameter class:", pcLabel, "\n") }) -##' @aliases processParam -##' -##' @description Get or set the parameter class from an \code{XProcessHistory} -##' object. -##' -##' @param object A \code{ProcessHistory} or \code{XProcessHistory} object. -##' -##' @return For \code{processParam}: a parameter object extending the -##' \code{Param} class. -##' -##' @author Johannes Rainer -##' -##' @rdname ProcessHistory-class +#' @aliases processParam +#' +#' @description Get or set the parameter class from an \code{XProcessHistory} +#' object. +#' +#' @param object A \code{ProcessHistory} or \code{XProcessHistory} object. +#' +#' @return For \code{processParam}: a parameter object extending the +#' \code{Param} class. +#' +#' @author Johannes Rainer +#' +#' @rdname ProcessHistory-class setMethod("processParam", "XProcessHistory", function(object) { return(object@param) }) -##' @aliases processParam<- -##' -##' @param value An object extending the \code{Param} class. -##' @noRd +#' @aliases processParam<- +#' +#' @param value An object extending the \code{Param} class. +#' +#' @noRd setReplaceMethod("processParam", "XProcessHistory", function(object, value) { object@param <- value if (validObject(object)) @@ -55,72 +56,76 @@ setReplaceMethod("processParam", "XProcessHistory", function(object, value) { }) ## Methods: -##' @aliases processType -##' -##' @description The \code{processType} method returns a character specifying the -##' processing step \emph{type}. -##' -##' @return The \code{processType} method returns a character string with the -##' processing step type. -##' @rdname ProcessHistory-class +#' @aliases processType +#' +#' @description The \code{processType} method returns a character specifying the +#' processing step \emph{type}. +#' +#' @return The \code{processType} method returns a character string with the +#' processing step type. +#' +#' @rdname ProcessHistory-class setMethod("processType", "ProcessHistory", function(object) { return(object@type) }) -##' @noRd +#' @noRd setReplaceMethod("processType", "ProcessHistory", function(object, value) { object@type <- value if (validObject(object)) return(object) }) -##' @aliases processDate -##' -##' @description The \code{processDate} extracts the start date of the processing -##' step. -##' -##' @return The \code{processDate} method returns a character string with the -##' time stamp of the processing step start. -##' @rdname ProcessHistory-class +#' @aliases processDate +#' +#' @description The \code{processDate} extracts the start date of the processing +#' step. +#' +#' @return The \code{processDate} method returns a character string with the +#' time stamp of the processing step start. +#' +#' @rdname ProcessHistory-class setMethod("processDate", "ProcessHistory", function(object) { return(object@date) }) -##' @noRd +#' @noRd setReplaceMethod("processDate", "ProcessHistory", function(object, value) { object@date <- value if (validObject(object)) return(object) }) -##' @aliases processInfo -##' -##' @description The \code{processInfo} extracts optional additional information -##' on the processing step. -##' -##' @return The \code{processInfo} method returns a character string with -##' optional additional informations. -##' @rdname ProcessHistory-class +#' @aliases processInfo +#' +#' @description The \code{processInfo} extracts optional additional information +#' on the processing step. +#' +#' @return The \code{processInfo} method returns a character string with +#' optional additional informations. +#' +#' @rdname ProcessHistory-class setMethod("processInfo", "ProcessHistory", function(object) { return(object@info) }) -##' @noRd +#' @noRd setReplaceMethod("processInfo", "ProcessHistory", function(object, value) { object@info <- value if (validObject(object)) return(object) }) -##' @aliases fileIndex -##' -##' @description The \code{fileIndex} extracts the indices of the files on which -##' the processing step was applied. -##' -##' @return The \code{fileIndex} method returns a integer vector with the index -##' of the files/samples on which the processing step was applied. -##' @rdname ProcessHistory-class +#' @aliases fileIndex +#' +#' @description The \code{fileIndex} extracts the indices of the files on which +#' the processing step was applied. +#' +#' @return The \code{fileIndex} method returns a integer vector with the index +#' of the files/samples on which the processing step was applied. +#' +#' @rdname ProcessHistory-class setMethod("fileIndex", "ProcessHistory", function(object) { return(object@fileIndex) }) -##' @noRd +#' @noRd setReplaceMethod("fileIndex", "ProcessHistory", function(object, value) { object@fileIndex <- as.integer(value) if (validObject(object)) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 16149a3be..d07e051d0 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -925,8 +925,10 @@ setMethod("filterAcquisitionNum", "XCMSnExp", function(object, n, file) { #' ## Read the files #' od <- readMSData2(fs) #' -#' ## Perform peak detection on them using default matched filter settings. -#' mfp <- MatchedFilterParam() +#' ## Perform peak detection on them using the matched filter algorithm. Note +#' ## that we use a large value for binSize to reduce the runtime of the +#' ## example code. +#' mfp <- MatchedFilterParam(binSize = 5) #' xod <- findChromPeaks(od, param = mfp) #' #' ## Subset the dataset to the first and third file. @@ -1819,13 +1821,15 @@ setMethod("featureValues", ## }) -#' @aliases extractChromatograms +#' @aliases chromatogram #' #' @title Extracting chromatograms #' -#' @description \code{extractChromatograms}: the method allows to extract +#' @description \code{chromatogram}: the method allows to extract #' chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and -#' \code{\link{XCMSnExp}} objects. +#' \code{\link{XCMSnExp}} objects. See also the +#' \code{\link[MSnbase]{chromatogram}} implementation for +#' \code{\link[MSnbase]{OnDiskMSnExp}} in the MSnbase package. #' #' @details Arguments \code{rt} and \code{mz} allow to specify the MS #' data slice from which the chromatogram should be extracted. @@ -1834,22 +1838,23 @@ setMethod("featureValues", #' retention time. Setting \code{aggregationFun = "sum"} would e.g. allow #' to calculate the \emph{total ion chromatogram} (TIC), #' \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). -#' The length of the extracted \code{Chromatogram} object, i.e. the number -#' of available data points, corresponds to the number of scans/spectra -#' measured in the specified retention time range. If in a specific scan -#' (for a give retention time) no signal was measured in the specified mz -#' range, a \code{NA_real_} is reported as intensity for the retention time -#' (see Notes for more information). This can be changed using the -#' \code{missing} parameter. -#' -#' @note \code{Chromatogram} objects extracted with \code{extractChromatogram} +#' The length of the extracted \code{\link[MSnbase]{Chromatogram}} object, +#' i.e. the number of available data points, corresponds to the number of +#' scans/spectra measured in the specified retention time range. If in a +#' specific scan (for a give retention time) no signal was measured in the +#' specified mz range, a \code{NA_real_} is reported as intensity for the +#' retention time (see Notes for more information). This can be changed +#' using the \code{missing} parameter. +#' +#' @note \code{\link[MSnbase]{Chromatogram}} objects extracted with +#' \code{chromatogram} #' contain \code{NA_real_} values if, for a given retention time, no #' signal was measured in the specified mz range. If no spectrum/scan is #' present in the defined retention time window a \code{Chromatogram} object #' of length 0 is returned. #' #' For \code{\link{XCMSnExp}} objects, if adjusted retention times are -#' available, the \code{extractChromatograms} method will by default report +#' available, the \code{chromatogram} method will by default report #' and use these (for the subsetting based on the provided parameter #' \code{rt}). This can be overwritten with the parameter #' \code{adjustedRtime}. @@ -1870,11 +1875,11 @@ setMethod("featureValues", #' It is also possible to submit a \code{numeric(1)} in which case #' \code{range} is called on it to transform it to a \code{numeric(2)}. #' -#' @param adjustedRtime For \code{extractChromatograms,XCMSnExp}: whether the +#' @param adjustedRtime For \code{chromatogram,XCMSnExp}: whether the #' adjusted (\code{adjustedRtime = TRUE}) or raw retention times #' (\code{adjustedRtime = FALSE}) should be used for filtering and returned -#' in the resulting \code{\link{Chromatogram}} object. Adjusted retention -#' times are used by default if available. +#' in the resulting \code{\link[MSnbase]{Chromatogram}} object. Adjusted +#' retention times are used by default if available. #' #' @param aggregationFun \code{character} specifying the function to be used to #' aggregate intensity values across the mz value range for the same @@ -1887,41 +1892,30 @@ setMethod("featureValues", #' Details and Notes sections below). Use \code{missing = 0} to resemble the #' behaviour of the \code{getEIC} from the \code{old} user interface. #' -#' @return If a single \code{rt} and \code{mz} range was specified, -#' \code{extractChromatograms} returns a \code{list} of -#' \code{\link{Chromatogram}} classes each element being the chromatogram -#' for one of the samples for the specified range. -#' If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing -#' a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the -#' function returns a \code{list} of \code{list}s. The outer list -#' representing results for the various ranges, the inner the result across -#' files. In other words, \code{result[[1]]} returns a \code{list} with -#' \code{Chromatogram} classes length equal to the number of files, each -#' element representing the \code{Chromatogram} for the first rt/mz range -#' for one file. -#' An empty \code{list} is returned if no MS1 data is present in -#' \code{object} or if not a single spectrum is available for any of the -#' provided retention time ranges in \code{rt}. An empty \code{Chromatogram} -#' object is returned at the correponding position in the result \code{list} -#' if for the specific file no scan/spectrum was measured in the provided -#' rt window. In all other cases, a \code{Chromatogram} with length equal -#' to the number of scans/spectra in the provided rt range is returned. +#' @return \code{chromatogram} returns a \code{\link{Chromatograms}} object with +#' the number of columns corresponding to the number of files in +#' \code{object} and number of rows the number of specified ranges (i.e. +#' number of rows of matrices provided with arguments \code{mz} and/or +#' \code{rt}). #' #' @author Johannes Rainer #' #' @seealso \code{\link{XCMSnExp}} for the data object. -#' \code{\link{Chromatogram}} for the object representing chromatographic -#' data. +#' \code{\link[MSnbase]{Chromatogram}} for the object representing +#' chromatographic data. #' -#' \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or -#' \code{list} of such objects. +#' \code{\link[MSnbase]{Chromatograms}} for the object allowing to arrange +#' multiple \code{Chromatogram} objects. +#' +#' \code{\link[MSnbase]{plot}} to plot a \code{Chromatogram} or +#' \code{Chromatograms} objects. #' #' \code{\link{extractMsData}} for a method to extract the MS data as #' \code{data.frame}. #' #' @export #' -#' @rdname extractChromatograms-method +#' @rdname chromatogram-method #' #' @examples #' ## Read some files from the faahKO package. @@ -1934,28 +1928,37 @@ setMethod("featureValues", #' od <- readMSData2(faahko_3_files) #' #' ## Extract the ion chromatogram for one chromatographic peak in the data. -#' chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) +#' chrs <- chromatogram(od, rt = c(2700, 2900), mz = 335) #' -#' ## plot the data -#' plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", +#' chrs +#' +#' ## Plot the chromatogram +#' plot(rtime(chrs[1, 2]), intensity(chrs[1, 2]), type = "l", xlab = "rtime", #' ylab = "intensity", col = "000080") #' for(i in c(1, 3)) { -#' points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") +#' points(rtime(chrs[1, i]), intensity(chrs[1, i]), type = "l", +#' col = "00000080") #' } #' -#' ## Plot the chromatogram using plotChromatogram -#' plotChromatogram(chrs) +#' ## Plot the chromatogram using the dedicated plot method. +#' plot(chrs) #' #' ## Extract chromatograms for multiple ranges. #' mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) #' rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) -#' chrs <- extractChromatograms(od, mz = mzr, rt = rtr) +#' chrs <- chromatogram(od, mz = mzr, rt = rtr) #' +#' chrs +#' #' ## Plot the extracted chromatograms -#' par(mfrow = c(1, 2)) -#' plotChromatogram(chrs[[1]]) -#' plotChromatogram(chrs[[2]]) -setMethod("extractChromatograms", +#' plot(chrs) +#' +#' ## Get access to all chromatograms for the second mz/rt range +#' chrs[1, ] +#' +#' ## Plot just that one +#' plot(chrs[1, , drop = FALSE]) +setMethod("chromatogram", signature(object = "XCMSnExp"), function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum", missing = NA_real_) { @@ -1967,12 +1970,9 @@ setMethod("extractChromatograms", ## Replace the original rtime with adjusted ones... object@featureData$retentionTime <- adj_rt } - extractChromatograms(object, rt = rt, mz = mz, - aggregationFun = aggregationFun, - missing = missing) - ## .extractChromatogram(x = object, rt = rt, mz = mz, - ## aggregationFun = aggregationFun, - ## adjusted = adjustedRtime) + chromatogram(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missing = missing) }) #' @rdname XCMSnExp-class @@ -2097,7 +2097,7 @@ setMethod("findChromPeaks", #' ## Create a CentWaveParam object. Note that the noise is set to 10000 to #' ## speed up the execution of the example - in a real use case the default #' ## value should be used, or it should be set to a reasonable value. -#' cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) +#' cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 40) #' #' res <- findChromPeaks(raw_data, param = cwp) #' diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index c852b995d..d5819687e 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -309,48 +309,59 @@ setMethod("findPeaks.matchedFilter_orig", "xcmsRaw", ############################################################ ## findPeaks.matchedFilter -##' @title Peak detection in the chromatographic time domain -##' -##' @aliases findPeaks.matchedFilter -##' @description Find peaks in the chromatographic time domain of the -##' profile matrix. For more details see -##' \code{\link{do_findChromPeaks_matchedFilter}}. -##' @param object The \code{\linkS4class{xcmsRaw}} object on which peak detection -##' should be performed. -##' @inheritParams findChromPeaks-matchedFilter -##' @param step numeric(1) specifying the width of the bins/slices in m/z -##' dimension. -##' @param sleep (DEFUNCT). This parameter is no longer functional, as it would cause -##' problems in parallel processing mode. -##' @param scanrange Numeric vector defining the range of scans to which the original -##' \code{object} should be sub-setted before peak detection. -##' @author Colin A. Smith -##' @return A matrix, each row representing an intentified chromatographic peak, -##' with columns: -##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -##' \item{mzmin}{Minimum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rt}{Retention time of the peak's midpoint.} -##' \item{rtmin}{Minimum retention time of the peak.} -##' \item{rtmax}{Maximum retention time of the peak.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{intf}{Integrated intensity of the filtered peak.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{maxf}{Maximum intensity of the filtered peak.} -##' \item{i}{Rank of peak in merged EIC (\code{<= max}).} -##' \item{sn}{Signal to noise ratio of the peak.} -##' } -##' @references -##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and -##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite -##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -##' \emph{Anal. Chem.} 2006, 78:779-787. -##' @family Old peak detection methods -##' @seealso \code{\link{matchedFilter}} for the new user interface. -##' \code{\linkS4class{xcmsRaw}}, -##' \code{\link{do_findChromPeaks_matchedFilter}} for the core function -##' performing the peak detection. +#' @title Peak detection in the chromatographic time domain +#' +#' @aliases findPeaks.matchedFilter +#' +#' @description Find peaks in the chromatographic time domain of the +#' profile matrix. For more details see +#' \code{\link{do_findChromPeaks_matchedFilter}}. +#' +#' @param object The \code{\linkS4class{xcmsRaw}} object on which peak detection +#' should be performed. +#' +#' @inheritParams findChromPeaks-matchedFilter +#' +#' @param step numeric(1) specifying the width of the bins/slices in m/z +#' dimension. +#' +#' @param sleep (DEPRECATED). The use of this parameter is highly discouraged, +#' as it could cause problems in parallel processing mode. +#' +#' @param scanrange Numeric vector defining the range of scans to which the +#' original \code{object} should be sub-setted before peak detection. +#' +#' @author Colin A. Smith +#' +#' @return A matrix, each row representing an intentified chromatographic peak, +#' with columns: +#' \describe{ +#' \item{mz}{Intensity weighted mean of m/z values of the peak across +#' scans.} +#' \item{mzmin}{Minimum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rt}{Retention time of the peak's midpoint.} +#' \item{rtmin}{Minimum retention time of the peak.} +#' \item{rtmax}{Maximum retention time of the peak.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{intf}{Integrated intensity of the filtered peak.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{maxf}{Maximum intensity of the filtered peak.} +#' \item{i}{Rank of peak in merged EIC (\code{<= max}).} +#' \item{sn}{Signal to noise ratio of the peak.} +#' } +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +#' @family Old peak detection methods +#' +#' @seealso \code{\link{matchedFilter}} for the new user interface. +#' \code{\linkS4class{xcmsRaw}}, +#' \code{\link{do_findChromPeaks_matchedFilter}} for the core function +#' performing the peak detection. setMethod("findPeaks.matchedFilter", "xcmsRaw", function(object, fwhm = 30, sigma = fwhm/2.3548, max = 5, snthresh = 10, step = 0.1, steps = 2, @@ -447,7 +458,8 @@ setMethod("findPeaks.matchedFilter", "xcmsRaw", snthresh = snthresh, steps = steps, mzdiff = mzdiff, - index = index + index = index, + sleep = sleep ) invisible(new("xcmsPeaks", res)) }) @@ -502,426 +514,11 @@ setMethod("findPeaks.centWave", "xcmsRaw", function(object, ppm=25, verboseColumns = verbose.columns, roiList = ROI.list, firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales + roiScales = roiScales, + sleep = sleep ) invisible(new("xcmsPeaks", res)) }) -## ## The original code wrapped into a function. This should be REMOVED once we -## ## checked that the do_ function yields identical results. -## .findPeaks.centWave_orig <- function(object, ppm=25, -## peakwidth=c(20,50), -## snthresh=10, -## prefilter=c(3,100), -## mzCenterFun="wMean", -## integrate=1, mzdiff=-0.001, -## fitgauss=FALSE, -## scanrange = numeric(), -## noise=0, ## noise.local=TRUE, -## sleep=0, -## verbose.columns=FALSE, -## ROI.list=list(), -## firstBaselineCheck=TRUE, -## roiScales=NULL) { -## if (!isCentroided(object)) -## warning("It looks like this file is in profile mode. centWave can", -## " process only centroid mode data !\n") - -## mzCenterFun <- paste("mzCenter", mzCenterFun, sep=".") -## if (!exists(mzCenterFun, mode="function")) -## stop("Error: >",mzCenterFun,"< not defined ! \n") - -## if (!is.logical(firstBaselineCheck)) -## stop("Error: parameter >firstBaselineCheck< is not a vector ! \n") -## if (length(firstBaselineCheck) != 1) -## stop("Error: parameter >firstBaselineCheck< is not a single logical ! \n") -## if (!is.null(roiScales)){ -## if (!is.vector(roiScales)) -## stop("Error: parameter >roiScales< is not a vector ! \n") -## if(!is.numeric(roiScales)) -## stop("Error: parameter >roiScales< is not a vector of type numeric ! \n") -## if(!length(roiScales) == length(ROI.list)) -## stop("Error: length of parameter >roiScales< is not equal to the", -## " length of parameter >ROI.list< ! \n") -## } - -## ## Fix issue #64: -## ## Sub-set the xcmsRaw based on scanrange -## if (length(scanrange) < 2) { -## scanrange <- c(1, length(object@scantime)) -## } else { -## scanrange <- range(scanrange) -## } -## if (min(scanrange) < 1 | max(scanrange) > length(object@scantime)) { -## scanrange[1] <- max(1, scanrange[1]) -## scanrange[2] <- min(length(object@scantime), scanrange[2]) -## message("Provided scanrange was adjusted to ", scanrange) -## } -## object <- object[scanrange[1]:scanrange[2]] -## scanrange <- c(1, length(object@scantime)) - -## ## scanrange.old <- scanrange -## ## if (length(scanrange) < 2) -## ## scanrange <- c(1, length(object@scantime)) -## ## else -## ## scanrange <- range(scanrange) -## ## scanrange[1] <- max(1,scanrange[1]) -## ## scanrange[2] <- min(length(object@scantime),scanrange[2]) -## ## if (!(identical(scanrange.old,scanrange)) && (length(scanrange.old) >0)) -## ## cat("Warning: scanrange was adjusted to ",scanrange,"\n") - -## basenames <- c("mz","mzmin","mzmax","rt","rtmin","rtmax","into","intb","maxo","sn") -## verbosenames <- c("egauss","mu","sigma","h","f", "dppm", "scale","scpos","scmin","scmax","lmin","lmax") - -## ## Peak width: seconds to scales -## scalerange <- round((peakwidth / mean(diff(object@scantime))) / 2) - -## if (length(z <- which(scalerange==0))) -## scalerange <- scalerange[-z] - -## if (length(scalerange) < 1) -## stop("No scales ? Please check peak width!\n") - -## if (length(scalerange) > 1) -## scales <- seq(from=scalerange[1], to=scalerange[2], by=2) else -## scales <- scalerange; - -## minPeakWidth <- scales[1]; -## noiserange <- c(minPeakWidth*3, max(scales)*3); -## maxGaussOverlap <- 0.5; -## minPtsAboveBaseLine <- max(4,minPeakWidth-2); -## minCentroids <- minPtsAboveBaseLine ; -## scRangeTol <- maxDescOutlier <- floor(minPeakWidth/2); - -## ## If no ROIs are supplied then search for them. -## if (length(ROI.list) == 0) { -## cat("\n Detecting mass traces at",ppm,"ppm ... \n"); flush.console(); -## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise) -## if (length(ROI.list) == 0) { -## cat("No ROIs found ! \n") - -## if (verbose.columns) { -## nopeaks <- new("xcmsPeaks", matrix(nrow=0, ncol=length(basenames)+length(verbosenames))) -## colnames(nopeaks) <- c(basenames, verbosenames) -## } else { -## nopeaks <- new("xcmsPeaks", matrix(nrow=0, ncol=length(basenames))) -## colnames(nopeaks) <- c(basenames) -## } - -## return(invisible(nopeaks)) -## } -## } - -## peaklist <- list() -## scantime <- object@scantime -## Nscantime <- length(scantime) -## lf <- length(ROI.list) - -## cat('\n Detecting chromatographic peaks ... \n % finished: '); lp <- -1; - -## for (f in 1:lf) { - -## ## Show progress -## perc <- round((f/lf) * 100) -## if ((perc %% 10 == 0) && (perc != lp)) -## { -## cat(perc," ",sep=""); -## lp <- perc; -## } -## flush.console() - -## feat <- ROI.list[[f]] -## N <- feat$scmax - feat$scmin + 1 - -## peaks <- peakinfo <- NULL - -## mzrange <- c(feat$mzmin,feat$mzmax) -## sccenter <- feat$scmin[1] + floor(N/2) - 1 -## scrange <- c(feat$scmin,feat$scmax) -## ## scrange + noiserange, used for baseline detection and wavelet analysis -## sr <- c(max(scanrange[1],scrange[1] - max(noiserange)),min(scanrange[2],scrange[2] + max(noiserange))) -## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr) -## d <- eic$intensity -## td <- sr[1]:sr[2] -## scan.range <- c(sr[1],sr[2]) -## ## original mzROI range -## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) -## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) - -## if (all(omz == 0)){ -## warning("centWave: No peaks found in ROI.\n") -## next -## } - -## od <- mzROI.EIC$intensity -## otd <- mzROI.EIC$scan - -## if (all(od == 0)){ -## warning("centWave: No peaks found in ROI.\n") -## next -## } - -## ## scrange + scRangeTol, used for gauss fitting and continuous data above 1st baseline detection -## ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)], scrange[2] + scRangeTol) -## fd <- d[match(ftd,td)] - -## ## 1st type of baseline: statistic approach -## if (N >= 10*minPeakWidth) ## in case of very long mass trace use full scan range for baseline detection -## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity else -## noised <- d; -## ## 90% trimmed mean as first baseline guess -## noise <- estimateChromNoise(noised, trim=0.05, minPts=3*minPeakWidth) - -## ## any continuous data above 1st baseline ? -## if (firstBaselineCheck & !continuousPtsAboveThreshold(fd,threshold=noise,num=minPtsAboveBaseLine)) -## next; - -## ## 2nd baseline estimate using not-peak-range -## lnoise <- getLocalNoiseEstimate(d,td,ftd,noiserange,Nscantime, threshold=noise,num=minPtsAboveBaseLine) - -## ## Final baseline & Noise estimate -## baseline <- max(1,min(lnoise[1],noise)) -## sdnoise <- max(1,lnoise[2]) -## sdthr <- sdnoise * snthresh - -## ## is there any data above S/N * threshold ? -## if (!(any(fd - baseline >= sdthr))) -## next; - -## wCoefs <- MSW.cwt(d, scales=scales, wavelet='mexh') -## if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr))) -## next; - -## if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise -## wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs)-1,] * 0.99 -## localMax <- MSW.getLocalMaximumCWT(wCoefs) -## rL <- MSW.getRidge(localMax) -## wpeaks <- sapply(rL, -## function(x) { -## w <- min(1:length(x),ncol(wCoefs)) -## any(wCoefs[x,w]- baseline >= sdthr) -## }) -## if (any(wpeaks)) { -## wpeaksidx <- which(wpeaks) -## ## check each peak in ridgeList -## for (p in 1:length(wpeaksidx)) { -## opp <- rL[[wpeaksidx[p]]] -## pp <- unique(opp) -## if (length(pp) >= 1) { -## dv <- td[pp] %in% ftd -## if (any(dv)) { ## peaks in orig. data range -## ## Final S/N check -## if (any(d[pp[dv]]- baseline >= sdthr)) { -## if(!is.null(roiScales)){ -## ## use given scale -## best.scale.nr <- which(scales == roiScales[[f]]) -## if(best.scale.nr > length(opp)) -## best.scale.nr <- length(opp) -## } else { -## ## try to decide which scale describes the peak best -## inti <- numeric(length(opp)) -## irange = rep(ceiling(scales[1]/2),length(opp)) -## for (k in 1:length(opp)) { -## kpos <- opp[k] -## r1 <- ifelse(kpos-irange[k] > 1,kpos-irange[k],1) -## r2 <- ifelse(kpos+irange[k] < length(d),kpos+irange[k],length(d)) -## inti[k] <- sum(d[r1:r2]) -## } -## maxpi <- which.max(inti) -## if (length(maxpi) > 1) { -## m <- wCoefs[opp[maxpi],maxpi] -## bestcol <- which(m == max(m),arr.ind=T)[2] -## best.scale.nr <- maxpi[bestcol] -## } else best.scale.nr <- maxpi -## } - -## best.scale <- scales[best.scale.nr] -## best.scale.pos <- opp[best.scale.nr] - -## pprange <- min(pp):max(pp) -## ## maxint <- max(d[pprange]) -## lwpos <- max(1,best.scale.pos - best.scale) -## rwpos <- min(best.scale.pos + best.scale,length(td)) -## p1 <- match(td[lwpos],otd)[1] -## p2 <- match(td[rwpos],otd); p2 <- p2[length(p2)] -## if (is.na(p1)) p1<-1 -## if (is.na(p2)) p2<-N -## mz.value <- omz[p1:p2] -## mz.int <- od[p1:p2] -## maxint <- max(mz.int) - -## ## re-calculate m/z value for peak range -## mzrange <- range(mz.value) -## mzmean <- do.call(mzCenterFun,list(mz=mz.value,intensity=mz.int)) - -## ## Compute dppm only if needed -## dppm <- NA -## if (verbose.columns) -## if (length(mz.value) >= (minCentroids+1)) -## dppm <- round(min(running(abs(diff(mz.value)) /(mzrange[2] * 1e-6),fun=max,width=minCentroids))) else -## dppm <- round((mzrange[2]-mzrange[1]) / (mzrange[2] * 1e-6)) - -## peaks <- rbind(peaks, -## c(mzmean,mzrange, ## mz -## NA,NA,NA, ## rt, rtmin, rtmax, -## NA, ## intensity (sum) -## NA, ## intensity (-bl) -## maxint, ## max intensity -## round((maxint - baseline) / sdnoise), ## S/N Ratio -## NA, ## Gaussian RMSE -## NA,NA,NA, ## Gaussian Parameters -## f, ## ROI Position -## dppm, ## max. difference between the [minCentroids] peaks in ppm -## best.scale, ## Scale -## td[best.scale.pos], td[lwpos], td[rwpos], ## Peak positions guessed from the wavelet's (scan nr) -## NA,NA )) ## Peak limits (scan nr) - -## peakinfo <- rbind(peakinfo,c(best.scale, best.scale.nr, best.scale.pos, lwpos, rwpos)) ## Peak positions guessed from the wavelet's -## } -## } -## } -## } ##for -## } ## if - - -## ## postprocessing -## if (!is.null(peaks)) { -## colnames(peaks) <- c(basenames, verbosenames) - -## colnames(peakinfo) <- c("scale","scaleNr","scpos","scmin","scmax") - -## for (p in 1:dim(peaks)[1]) { -## ## find minima, assign rt and intensity values -## if (integrate == 1) { -## lm <- descendMin(wCoefs[,peakinfo[p,"scaleNr"]], istart= peakinfo[p,"scpos"]) -## gap <- all(d[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak -## if ((lm[1]==lm[2]) || gap )## fall-back -## lm <- descendMinTol(d, startpos=c(peakinfo[p,"scmin"], peakinfo[p,"scmax"]), maxDescOutlier) -## } else -## lm <- descendMinTol(d,startpos=c(peakinfo[p,"scmin"],peakinfo[p,"scmax"]),maxDescOutlier) - -## ## narrow down peak rt boundaries by skipping zeros -## pd <- d[lm[1]:lm[2]]; np <- length(pd) -## lm.l <- findEqualGreaterUnsorted(pd,1) -## lm.l <- max(1, lm.l - 1) -## lm.r <- findEqualGreaterUnsorted(rev(pd),1) -## lm.r <- max(1, lm.r - 1) -## lm <- lm + c(lm.l - 1, -(lm.r - 1) ) - -## peakrange <- td[lm] -## peaks[p,"rtmin"] <- scantime[peakrange[1]] -## peaks[p,"rtmax"] <- scantime[peakrange[2]] - -## peaks[p,"maxo"] <- max(d[lm[1]:lm[2]]) - -## pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]])/(peakrange[2] - peakrange[1]) -## if (is.na(pwid)) -## pwid <- 1 - -## peaks[p,"into"] <- pwid*sum(d[lm[1]:lm[2]]) - -## db <- d[lm[1]:lm[2]] - baseline -## peaks[p,"intb"] <- pwid*sum(db[db>0]) - -## peaks[p,"lmin"] <- lm[1]; -## peaks[p,"lmax"] <- lm[2]; - -## if (fitgauss) { -## ## perform gaussian fits, use wavelets for inital parameters -## md <- max(d[lm[1]:lm[2]]);d1 <- d[lm[1]:lm[2]]/md; ## normalize data for gaussian error calc. -## pgauss <- fitGauss(td[lm[1]:lm[2]],d[lm[1]:lm[2]],pgauss = -## list(mu=peaks[p,"scpos"],sigma=peaks[p,"scmax"]-peaks[p,"scmin"],h=peaks[p,"maxo"])) -## rtime <- peaks[p,"scpos"] -## if (!any(is.na(pgauss)) && all(pgauss > 0)) { -## gtime <- td[match(round(pgauss$mu),td)] -## if (!is.na(gtime)) { -## rtime <- gtime -## peaks[p,"mu"] <- pgauss$mu; peaks[p,"sigma"] <- pgauss$sigma; peaks[p,"h"] <- pgauss$h; -## peaks[p,"egauss"] <- sqrt((1/length(td[lm[1]:lm[2]])) * sum(((d1-gauss(td[lm[1]:lm[2]],pgauss$h/md,pgauss$mu,pgauss$sigma))^2))) -## } -## } -## peaks[p,"rt"] <- scantime[rtime] -## ## avoid fitting side effects -## if (peaks[p,"rt"] < peaks[p,"rtmin"]) -## peaks[p,"rt"] <- scantime[peaks[p,"scpos"]] -## } else -## peaks[p,"rt"] <- scantime[peaks[p,"scpos"]] -## } -## peaks <- joinOverlappingPeaks(td,d,otd,omz,od,scantime,scan.range,peaks,maxGaussOverlap,mzCenterFun=mzCenterFun) -## } - - - -## if ((sleep >0) && (!is.null(peaks))) { -## tdp <- scantime[td]; trange <- range(tdp) -## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") -## cdppm <- paste(peaks[,"dppm"],collapse=", ") -## csn <- paste(peaks[,"sn"],collapse=", ") -## par(bg = "white") -## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); -## par(mar= c(2, 4, 4, 2) + 0.1) -## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') -## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) -## par(mar= c(1, 4, 1, 2) + 0.1) -## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') -## par(mar= c(4, 4, 1, 2) + 0.1) -## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) -## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range -## abline(h=baseline,col='green') -## bwh <- length(sr[1]:sr[2]) - length(baseline) -## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} -## if (any(!is.na(peaks[,"scpos"]))) -## { ## plot centers and width found through wavelet analysis -## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') -## } -## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) -## if (fitgauss) { -## tdx <- seq(min(td),max(td),length.out=200) -## tdxp <- seq(trange[1],trange[2],length.out=200) -## fitted.peaks <- which(!is.na(peaks[,"mu"])) -## for (p in fitted.peaks) -## { ## plot gaussian fits -## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) -## lines(tdxp,yg,col='blue') -## } -## } -## Sys.sleep(sleep) -## } - -## if (!is.null(peaks)) { -## peaklist[[length(peaklist)+1]] <- peaks -## } - -## } ## f - -## if (length(peaklist) == 0) { -## cat("\nNo peaks found !\n") - -## if (verbose.columns) { -## nopeaks <- new("xcmsPeaks", matrix(nrow=0, ncol=length(basenames)+length(verbosenames))) -## colnames(nopeaks) <- c(basenames, verbosenames) -## } else { -## nopeaks <- new("xcmsPeaks", matrix(nrow=0, ncol=length(basenames))) -## colnames(nopeaks) <- c(basenames) -## } - -## return(invisible(nopeaks)) -## } - -## p <- do.call(rbind,peaklist) - -## if (!verbose.columns) -## p <- p[,basenames,drop=FALSE] - -## uorder <- order(p[,"into"], decreasing=TRUE) -## pm <- as.matrix(p[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE]) -## uindex <- rectUnique(pm,uorder,mzdiff,ydiff = -0.00001) ## allow adjacent peaks -## pr <- p[uindex,,drop=FALSE] -## cat("\n",dim(pr)[1]," Peaks.\n") - -## invisible(new("xcmsPeaks", pr)) -## } - - ############################################################ ## findPeaks.centWaveWithPredictedIsotopeROIs @@ -1032,46 +629,51 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", ############################################################ ## findPeaks.MSW -##' @title Peak detection for single-spectrum non-chromatography MS data -##' @aliases findPeaks.MSW -##' -##' @description This method performs peak detection in mass spectrometry -##' direct injection spectrum using a wavelet based algorithm. -##' -##' @details This is a wrapper around the peak picker in Bioconductor's -##' \code{MassSpecWavelet} package calling -##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and -##' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. -##' -##' @inheritParams findPeaks-MSW -##' @inheritParams findChromPeaks-centWave -##' @param object The \code{\linkS4class{xcmsRaw}} object on which peak -##' detection should be performed. -##' @param verbose.columns Logical whether additional peak meta data columns -##' should be returned. -##' -##' @return -##' A matrix, each row representing an intentified peak, with columns: -##' \describe{ -##' \item{mz}{m/z value of the peak at the centroid position.} -##' \item{mzmin}{Minimum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rt}{Always \code{-1}.} -##' \item{rtmin}{Always \code{-1}.} -##' \item{rtmax}{Always \code{-1}.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{intf}{Always \code{NA}.} -##' \item{maxf}{Maximum MSW-filter response of the peak.} -##' \item{sn}{Signal to noise ratio.} -##' } -##' @seealso \code{\link{MSW}} for the new user interface, -##' \code{\link{do_findPeaks_MSW}} for the downstream analysis -##' function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the -##' \code{MassSpecWavelet} for details on the algorithm and additionally supported -##' parameters. -##' -##' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer +#' @title Peak detection for single-spectrum non-chromatography MS data +#' +#' @aliases findPeaks.MSW +#' +#' @description This method performs peak detection in mass spectrometry +#' direct injection spectrum using a wavelet based algorithm. +#' +#' @details This is a wrapper around the peak picker in Bioconductor's +#' \code{MassSpecWavelet} package calling +#' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and +#' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. +#' +#' @inheritParams findPeaks-MSW +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @param object The \code{\linkS4class{xcmsRaw}} object on which peak +#' detection should be performed. +#' +#' @param verbose.columns Logical whether additional peak meta data columns +#' should be returned. +#' +#' @return +#' A matrix, each row representing an intentified peak, with columns: +#' \describe{ +#' \item{mz}{m/z value of the peak at the centroid position.} +#' \item{mzmin}{Minimum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rt}{Always \code{-1}.} +#' \item{rtmin}{Always \code{-1}.} +#' \item{rtmax}{Always \code{-1}.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{intf}{Always \code{NA}.} +#' \item{maxf}{Maximum MSW-filter response of the peak.} +#' \item{sn}{Signal to noise ratio.} +#' } +#' +#' @seealso \code{\link{MSW}} for the new user interface, +#' \code{\link{do_findPeaks_MSW}} for the downstream analysis +#' function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the +#' \code{MassSpecWavelet} for details on the algorithm and additionally +#' supported parameters. +#' +#' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer setMethod("findPeaks.MSW", "xcmsRaw", function(object, snthresh=3, verbose.columns = FALSE, ...) { res <- do_findPeaks_MSW(mz = object@env$mz, @@ -1196,10 +798,10 @@ setMethod("getEIC", "xcmsRaw", function(object, mzrange, rtrange = NULL, ############################################################ ## rawMat -##' @description Extracts a matrix with columns time (retention time), mz and -##' intensity from an xcmsRaw object. -##' -##' @noRd +#' @description Extracts a matrix with columns time (retention time), mz and +#' intensity from an xcmsRaw object. +#' +#' @noRd setMethod("rawMat", "xcmsRaw", function(object, mzrange = numeric(), rtrange = numeric(), @@ -1246,37 +848,37 @@ setMethod("rawMat", "xcmsRaw", function(object, intensity = int) } -.rawMat2 <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), - rtrange = numeric(), scanrange = numeric, - log = FALSE) { - if (length(rtrange) >= 2) { - rtrange <- range(rtrange) - scanrange <- range(which((scantime >= rtrange[1]) & - (scantime <= rtrange[2]))) - } - if (length(scanrange) < 2) - scanrange <- c(1, length(valsPerSpect)) - else scanrange <- range(scanrange) - if (scanrange[1] == 1) - startidx <- 1 - else - startidx <- sum(valsPerSpect[1:(scanrange[1]-1)]) + 1 - endidx <- sum(valsPerSpect[1:scanrange[2]]) - scans <- rep(scanrange[1]:scanrange[2], - valsPerSpect[scanrange[1]:scanrange[2]]) - masses <- mz[startidx:endidx] - massidx <- 1:length(masses) - if (length(mzrange) >= 2) { - mzrange <- range(mzrange) - massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] - } - int <- int[startidx:endidx][massidx] - if (log && (length(int) > 0)) - int <- log(int + max(1 - min(int), 0)) - cbind(time = scantime[scans[massidx]], - mz = masses[massidx], - intensity = int) -} +## .rawMat2 <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), +## rtrange = numeric(), scanrange = numeric, +## log = FALSE) { +## if (length(rtrange) >= 2) { +## rtrange <- range(rtrange) +## scanrange <- range(which((scantime >= rtrange[1]) & +## (scantime <= rtrange[2]))) +## } +## if (length(scanrange) < 2) +## scanrange <- c(1, length(valsPerSpect)) +## else scanrange <- range(scanrange) +## if (scanrange[1] == 1) +## startidx <- 1 +## else +## startidx <- sum(valsPerSpect[1:(scanrange[1]-1)]) + 1 +## endidx <- sum(valsPerSpect[1:scanrange[2]]) +## scans <- rep(scanrange[1]:scanrange[2], +## valsPerSpect[scanrange[1]:scanrange[2]]) +## masses <- mz[startidx:endidx] +## massidx <- 1:length(masses) +## if (length(mzrange) >= 2) { +## mzrange <- range(mzrange) +## massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] +## } +## int <- int[startidx:endidx][massidx] +## if (log && (length(int) > 0)) +## int <- log(int + max(1 - min(int), 0)) +## cbind(time = scantime[scans[massidx]], +## mz = masses[massidx], +## intensity = int) +## } ############################################################ @@ -2517,40 +2119,50 @@ setMethod("stitch.netCDF.new", "xcmsRaw", function(object, lockMass) { ############################################################ ## [ ## Subset by scan. -##' @title Subset an xcmsRaw object by scans -##' @aliases subset-xcmsRaw -##' -##' @description Subset an \code{\linkS4class{xcmsRaw}} object by scans. The -##' returned \code{\linkS4class{xcmsRaw}} object contains values for all scans -##' specified with argument \code{i}. Note that the \code{scanrange} slot of the -##' returned \code{xcmsRaw} will be \code{c(1, length(object@scantime))} and -##' hence not \code{range(i)}. -##' -##' @details Only subsetting by scan index in increasing order or by a logical -##' vector are supported. If not ordered, argument \code{i} is sorted -##' automatically. Indices which are larger than the total number of scans -##' are discarded. -##' @param x The \code{\linkS4class{xcmsRaw}} object that should be sub-setted. -##' @param i Integer or logical vector specifying the scans/spectra to which \code{x} should be sub-setted. -##' @param j Not supported. -##' @param drop Not supported. -##' @return The sub-setted \code{\linkS4class{xcmsRaw}} object. -##' @author Johannes Rainer -##' @seealso \code{\link{split.xcmsRaw}} -##' @examples -##' ## Load a test file -##' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -##' xraw <- xcmsRaw(file) -##' ## The number of scans/spectra: -##' length(xraw@scantime) -##' -##' ## Subset the object to scans with a scan time from 3500 to 4000. -##' xsub <- xraw[xraw@scantime >= 3500 & xraw@scantime <= 4000] -##' range(xsub@scantime) -##' ## The number of scans: -##' length(xsub@scantime) -##' ## The number of values of the subset: -##' length(xsub@env$mz) +#' @title Subset an xcmsRaw object by scans +#' +#' @aliases subset-xcmsRaw +#' +#' @description Subset an \code{\linkS4class{xcmsRaw}} object by scans. The +#' returned \code{\linkS4class{xcmsRaw}} object contains values for all +#' scans specified with argument \code{i}. Note that the \code{scanrange} +#' slot of the returned \code{xcmsRaw} will be +#' \code{c(1, length(object@scantime))} and hence not \code{range(i)}. +#' +#' @details Only subsetting by scan index in increasing order or by a logical +#' vector are supported. If not ordered, argument \code{i} is sorted +#' automatically. Indices which are larger than the total number of scans +#' are discarded. +#' +#' @param x The \code{\linkS4class{xcmsRaw}} object that should be sub-setted. +#' +#' @param i Integer or logical vector specifying the scans/spectra to which +#' \code{x} should be sub-setted. +#' +#' @param j Not supported. +#' +#' @param drop Not supported. +#' +#' @return The sub-setted \code{\linkS4class{xcmsRaw}} object. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{split.xcmsRaw}} +#' +#' @examples +#' ## Load a test file +#' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") +#' xraw <- xcmsRaw(file) +#' ## The number of scans/spectra: +#' length(xraw@scantime) +#' +#' ## Subset the object to scans with a scan time from 3500 to 4000. +#' xsub <- xraw[xraw@scantime >= 3500 & xraw@scantime <= 4000] +#' range(xsub@scantime) +#' ## The number of scans: +#' length(xsub@scantime) +#' ## The number of values of the subset: +#' length(xsub@env$mz) setMethod("[", signature(x = "xcmsRaw", i = "logicalOrNumeric", j = "missing", @@ -2613,98 +2225,104 @@ setMethod("[", signature(x = "xcmsRaw", return(x) }) -##' @title The profile matrix -##' -##' @aliases profile-matrix profMat profMat,xcmsRaw-method -##' -##' @description The \emph{profile} matrix is an n x m matrix, n (rows) -##' representing equally spaced m/z values (bins) and m (columns) the -##' retention time of the corresponding scans. Each cell contains the maximum -##' intensity measured for the specific scan and m/z values falling within the -##' m/z bin. -##' -##' The \code{profMat} method creates a new profile matrix or returns the -##' profile matrix within the object's \code{@env} slot, if available. Settings -##' for the profile matrix generation, such as \code{step} (the bin size), -##' \code{method} or additional settings are extracted from the respective slots -##' of the \code{\linkS4class{xcmsRaw}} object. Alternatively it is possible to -##' specify all of the settings as additional parameters. -##' -##' @details Profile matrix generation methods: -##' \describe{ -##' \item{bin}{The default profile matrix generation method that does a simple -##' binning, i.e. aggregating of intensity values falling within an m/z bin.} -##' \item{binlin}{Binning followed by linear interpolation to impute missing -##' values. The value for m/z bins without a measured intensity are inferred by -##' a linear interpolation between neighboring bins with a measured intensity.} -##' \item{binlinbase}{Binning followed by a linear interpolation to impute -##' values for empty elements (m/z bins) within a user-definable proximity to -##' non-empty elements while stetting the element's value to the -##' \code{baselevel} otherwise. See \code{impute = "linbase"} parameter of -##' \code{\link{imputeLinInterpol}} for more details.} -##' \item{intlin}{Set the elements' values to the integral of the linearly -##' interpolated data from plus to minus half the step size.} -##' } -##' -##' @note From \code{xcms} version 1.51.1 on only the \code{profMat} method -##' should be used to extract the profile matrix instead of the previously -##' default way to access it directly \emph{via} \code{object@env$profile}. -##' -##' @param object The \code{\linkS4class{xcmsRaw}} object. -##' -##' @param method The profile matrix generation method. Allowed are \code{"bin"}, -##' \code{"binlin"}, \code{"binlinbase"} and \code{"intlin"}. See details -##' section for more information. -##' -##' @param step numeric(1) representing the m/z bin size. -##' -##' @param baselevel numeric(1) representing the base value to which -##' empty elements (i.e. m/z bins without a measured intensity) should be set. -##' Only considered if \code{method = "binlinbase"}. See \code{baseValue} -##' parameter of \code{\link{imputeLinInterpol}} for more details. -##' -##' @param basespace numeric(1) representing the m/z length after -##' which the signal will drop to the base level. Linear interpolation will be -##' used between consecutive data points falling within \code{2 * basespace} to -##' each other. Only considered if \code{method = "binlinbase"}. If not -##' specified, it defaults to \code{0.075}. Internally this parameter is -##' translated into the \code{distance} parameter of the -##' \code{\link{imputeLinInterpol}} function by -##' \code{distance = floor(basespace / step)}. See \code{distance} parameter -##' of \code{\link{imputeLinInterpol}} for more details. -##' -##' @param mzrange. Optional numeric(2) manually specifying the mz value range to -##' be used for binnind. If not provided, the whole mz value range is used. -##' -##' @seealso \code{\linkS4class{xcmsRaw}}, \code{\link{binYonX}} and -##' \code{\link{imputeLinInterpol}} for the employed binning and -##' missing value imputation methods, respectively. -##' \code{\link{profMat,XCMSnExp-method}} for the method on \code{\link{XCMSnExp}} -##' objects. -##' -##' @return \code{profMat} returns the profile matrix (rows representing scans, -##' columns equally spaced m/z values). -##' -##' @author Johannes Rainer -##' -##' @examples -##' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -##' ## Load the data without generating the profile matrix (profstep = 0) -##' xraw <- xcmsRaw(file, profstep = 0) -##' ## Extract the profile matrix -##' profmat <- profMat(xraw, step = 0.3) -##' dim(profmat) -##' ## If not otherwise specified, the settings from the xraw object are used: -##' profinfo(xraw) -##' ## To extract a profile matrix with linear interpolation use -##' profmat <- profMat(xraw, step = 0.3, method = "binlin") -##' ## Alternatively, the profMethod of the xraw objects could be changed -##' profMethod(xraw) <- "binlin" -##' profmat_2 <- profMat(xraw, step = 0.3) -##' all.equal(profmat, profmat_2) -##' -##' @rdname profMat-xcmsSet -##' @name profMat-xcmsSet +#' @title The profile matrix +#' +#' @aliases profile-matrix profMat profMat,xcmsRaw-method +#' +#' @description The \emph{profile} matrix is an n x m matrix, n (rows) +#' representing equally spaced m/z values (bins) and m (columns) the +#' retention time of the corresponding scans. Each cell contains the maximum +#' intensity measured for the specific scan and m/z values falling within +#' the m/z bin. +#' +#' The \code{profMat} method creates a new profile matrix or returns the +#' profile matrix within the object's \code{@env} slot, if available. +#' Settings for the profile matrix generation, such as \code{step} (the bin +#' size), \code{method} or additional settings are extracted from the +#' respective slots of the \code{\linkS4class{xcmsRaw}} object. +#' Alternatively it is possible to specify all of the settings as +#' additional parameters. +#' +#' @details Profile matrix generation methods: +#' \describe{ +#' \item{bin}{The default profile matrix generation method that does a +#' simple binning, i.e. aggregating of intensity values falling within an +#' m/z bin.} +#' \item{binlin}{Binning followed by linear interpolation to impute missing +#' values. The value for m/z bins without a measured intensity are inferred +#' by a linear interpolation between neighboring bins with a measured +#' intensity.} +#' \item{binlinbase}{Binning followed by a linear interpolation to impute +#' values for empty elements (m/z bins) within a user-definable proximity to +#' non-empty elements while stetting the element's value to the +#' \code{baselevel} otherwise. See \code{impute = "linbase"} parameter of +#' \code{\link{imputeLinInterpol}} for more details.} +#' \item{intlin}{Set the elements' values to the integral of the linearly +#' interpolated data from plus to minus half the step size.} +#' } +#' +#' @note From \code{xcms} version 1.51.1 on only the \code{profMat} method +#' should be used to extract the profile matrix instead of the previously +#' default way to access it directly \emph{via} \code{object@env$profile}. +#' +#' @param object The \code{\linkS4class{xcmsRaw}} object. +#' +#' @param method The profile matrix generation method. Allowed are \code{"bin"}, +#' \code{"binlin"}, \code{"binlinbase"} and \code{"intlin"}. See details +#' section for more information. +#' +#' @param step numeric(1) representing the m/z bin size. +#' +#' @param baselevel numeric(1) representing the base value to which +#' empty elements (i.e. m/z bins without a measured intensity) should be +#' set. Only considered if \code{method = "binlinbase"}. See +#' \code{baseValue} parameter of \code{\link{imputeLinInterpol}} for more +#' details. +#' +#' @param basespace numeric(1) representing the m/z length after +#' which the signal will drop to the base level. Linear interpolation will +#' be used between consecutive data points falling within +#' \code{2 * basespace} to each other. Only considered if +#' \code{method = "binlinbase"}. If not specified, it defaults to +#' \code{0.075}. Internally this parameter is translated into the +#' \code{distance} parameter of the \code{\link{imputeLinInterpol}} +#' function by \code{distance = floor(basespace / step)}. See +#' \code{distance} parameter of \code{\link{imputeLinInterpol}} for more +#' details. +#' +#' @param mzrange. Optional numeric(2) manually specifying the mz value range to +#' be used for binnind. If not provided, the whole mz value range is used. +#' +#' @seealso \code{\linkS4class{xcmsRaw}}, \code{\link{binYonX}} and +#' \code{\link{imputeLinInterpol}} for the employed binning and +#' missing value imputation methods, respectively. +#' \code{\link{profMat,XCMSnExp-method}} for the method on +#' \code{\link{XCMSnExp}} objects. +#' +#' @return \code{profMat} returns the profile matrix (rows representing scans, +#' columns equally spaced m/z values). +#' +#' @author Johannes Rainer +#' +#' @examples +#' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") +#' ## Load the data without generating the profile matrix (profstep = 0) +#' xraw <- xcmsRaw(file, profstep = 0) +#' ## Extract the profile matrix +#' profmat <- profMat(xraw, step = 0.3) +#' dim(profmat) +#' ## If not otherwise specified, the settings from the xraw object are used: +#' profinfo(xraw) +#' ## To extract a profile matrix with linear interpolation use +#' profmat <- profMat(xraw, step = 0.3, method = "binlin") +#' ## Alternatively, the profMethod of the xraw objects could be changed +#' profMethod(xraw) <- "binlin" +#' profmat_2 <- profMat(xraw, step = 0.3) +#' all.equal(profmat, profmat_2) +#' +#' @rdname profMat-xcmsSet +#' +#' @name profMat-xcmsSet setMethod("profMat", signature(object = "xcmsRaw"), function(object, method, step, baselevel, diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 116ff3636..284e29b6f 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -55,13 +55,19 @@ setMethod("show", "xcmsSet", function(object) { #' @description This method updates an \emph{old} \code{\linkS4class{xcmsSet}} -#' object to the latest definition. +#' object to the latest definition. +#' #' @title Update an \code{\linkS4class{xcmsSet}} object +#' #' @param object The \code{\linkS4class{xcmsSet}} object to update. +#' #' @param ... Optional additional arguments. Currently ignored. +#' #' @param verbose Currently ignored. +#' #' @return An updated \code{\linkS4class{xcmsSet}} containing all data from #' the input object. +#' #' @author Johannes Rainer setMethod("updateObject", "xcmsSet", function(object, ..., verbose = FALSE) { ## Create a new empty xcmsSet and start filling it with the slot @@ -2064,22 +2070,27 @@ setMethod("specDist", signature(object="xcmsSet"), ############################################################ ## showError -##' @title Extract processing errors -##' @aliases showError -##' -##' @description If peak detection is performed with \code{\link{findPeaks}} -##' setting argument \code{stopOnError = FALSE} eventual errors during the -##' process do not cause to stop the processing but are recorded inside of the -##' resulting \code{\linkS4class{xcmsSet}} object. These errors can be accessed -##' with the \code{showError} method. -##' -##' @param object An \code{\linkS4class{xcmsSet}} object. -##' @param message. Logical indicating whether only the error message, or the -##' error itself should be returned. -##' @param ... Additional arguments. -##' @return A list of error messages (if \code{message. = TRUE}) or errors or an -##' empty list if no errors are present. -##' @author Johannes Rainer +#' @title Extract processing errors +#' +#' @aliases showError +#' +#' @description If peak detection is performed with \code{\link{findPeaks}} +#' setting argument \code{stopOnError = FALSE} eventual errors during the +#' process do not cause to stop the processing but are recorded inside of +#' the resulting \code{\linkS4class{xcmsSet}} object. These errors can be +#' accessed with the \code{showError} method. +#' +#' @param object An \code{\linkS4class{xcmsSet}} object. +#' +#' @param message. Logical indicating whether only the error message, or the +#' error itself should be returned. +#' +#' @param ... Additional arguments. +#' +#' @return A list of error messages (if \code{message. = TRUE}) or errors or an +#' empty list if no errors are present. +#' +#' @author Johannes Rainer setMethod("showError", signature(object = "xcmsSet"), function(object, message. = TRUE, ...) { errs <- .getProcessErrors(object, ...) diff --git a/inst/NEWS b/inst/NEWS index 21f286dc5..fcedb8c84 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,28 @@ +CHANGES IN VERSION 2.99.5 +------------------------- + +USER VISIBLE CHANGES: +- Re-enable sleep parameter in findPeaks.centWave and findPeaks.matchedFilter. + + +CHANGES IN VERSION 2.99.4 +------------------------- + +NEW FEATURES: +- Add plotChromPeaks function to plot the definition (rt and mz range) of + detected chromatographic peaks of one file into the mz-rt plane. +- Add plotChromPeakImage function to plot the number of detected peaks along + the retention time axis per file as an image plot. + +USER VISIBLE CHANGES: +- Move Chromatogram class and functionality to the MSnbase package +- Add argument msLevel to the findChromPeaks method to allow (chromatographic) + peak detection also on MS level > 1. + +BUG FIXES: +- Polarity information was not read from mzXML files (issue #192). + + CHANGES IN VERSION 2.99.3 ------------------------- diff --git a/inst/unitTests/runit.Chromatogram.R b/inst/unitTests/runit.Chromatogram.R index 53861f2d1..6ff5b3549 100644 --- a/inst/unitTests/runit.Chromatogram.R +++ b/inst/unitTests/runit.Chromatogram.R @@ -1,258 +1,174 @@ ## Unit tests related to the Chromatogram class. -library(xcms) -library(RUnit) - -test_Chromatogram_class <- function() { - ch <- new("Chromatogram") - ch@mz <- 3 - checkException(validObject(ch)) - ch@mz <- c(1, 3) - ch@precursorMz <- 4 - checkException(validObject(ch)) - ch@precursorMz <- c(4, 4) - ch@productMz <- 5 - checkException(validObject(ch)) - ## - int <- rnorm(100, mean = 200, sd = 2) - rt <- rnorm(100, mean = 300, sd = 3) - ## check exceptions: - checkException(xcms:::Chromatogram(intensity = int)) - chr <- Chromatogram() - chr@rtime <- rt - chr@intensity <- int - checkException(validObject(chr)) - ## issue #145: values are ordered based on rtime - chr <- Chromatogram(intensity = int, rtime = rt) - checkEquals(rtime(chr), sort(rt)) - checkEquals(intensity(chr), int[order(rt)]) - rt <- sort(rt) - ch <- xcms:::Chromatogram(intensity = int, rtime = rt) - checkEquals(rtime(ch), rt) - checkEquals(intensity(ch), int) - checkException(xcms:::Chromatogram(aggregationFun = "other")) - ch@aggregationFun <- "max" - checkTrue(validObject(ch)) - checkEquals(aggregationFun(ch), "max") - ch@aggregationFun <- "sum" - checkTrue(validObject(ch)) - checkEquals(aggregationFun(ch), "sum") - ch@aggregationFun <- "mean" - checkTrue(validObject(ch)) - checkEquals(aggregationFun(ch), "mean") - ch@aggregationFun <- "min" - checkTrue(validObject(ch)) - checkEquals(aggregationFun(ch), "min") - ch@fromFile <- 3L - checkTrue(validObject(ch)) - checkEquals(fromFile(ch), 3L) - checkEquals(length(ch), length(rt)) - ## as.data.frame - df <- as.data.frame(ch) - checkEquals(df, data.frame(rtime = rt, intensity = int)) - ch <- xcms:::Chromatogram(mz = c(1, 3)) - checkEquals(ch@mz, c(1, 3)) - checkEquals(mz(ch), c(1, 3)) - checkEquals(mz(ch, filter = TRUE), c(NA_real_, NA_real_)) - ch <- xcms:::Chromatogram(filterMz = c(1, 3)) - checkEquals(ch@filterMz, c(1, 3)) - checkEquals(mz(ch, filter = TRUE), c(1, 3)) - checkEquals(mz(ch, filter = FALSE), c(NA_real_, NA_real_)) - ch <- xcms:::Chromatogram(precursorMz = 123) - checkEquals(ch@precursorMz, c(123, 123)) - checkEquals(precursorMz(ch), c(123, 123)) - ch <- xcms:::Chromatogram(productMz = 123) - checkEquals(ch@productMz, c(123, 123)) - checkEquals(productMz(ch), c(123, 123)) +test_deprecated_Chromatogram <- function() { + chrs <- extractChromatograms(filterFile(od_x, file = 2)) + plotChromatogram(chrs) } -test_filterRt_Chromatogram <- function() { - int <- rnorm(100, mean = 200, sd = 2) - rt <- rnorm(100, mean = 300, sd = 3) - chr <- Chromatogram(intensity = int, rtime = sort(rt)) - - chr_2 <- filterRt(chr, rt = c(200, 300)) - checkTrue(all(rtime(chr_2) >= 200)) - checkTrue(all(rtime(chr_2) <= 300)) - ints <- intensity(chr_2) - checkEquals(ints, intensity(chr)[rtime(chr) >= 200 & rtime(chr) <= 300]) - - ## No rt - checkEquals(chr, filterRt(chr)) - - ## Outside range - chr_2 <- filterRt(chr, rt = c(400, 500)) - checkTrue(length(chr_2) == 0) - checkEquals(intensity(chr_2), numeric()) - checkEquals(rtime(chr_2), numeric()) -} test_extractChromatograms <- function() { ## OnDiskMSnExp ## TIC - chrs <- extractChromatograms(filterFile(od_x, file = 2)) - plotChromatogram(chrs) + chrs <- chromatogram(filterFile(od_x, file = 2)) + plot(chrs) spctr <- spectra(filterFile(od_x, file = 2)) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) - checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + checkEquals(intensity(chrs[1, 1]), ints) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## BPC - chrs <- extractChromatograms(filterFile(od_x, file = 2), - aggregationFun = "max") + chrs <- chromatogram(filterFile(od_x, file = 2), + aggregationFun = "max") ints <- unlist(lapply(spctr, function(z) return(max(intensity(z))))) - checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + checkEquals(intensity(chrs[1, 1]), ints) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## XCMSnExp xod_x <- faahko_xod - chrs <- extractChromatograms(filterFile(xod_x, file = 2)) + chrs <- chromatogram(filterFile(xod_x, file = 2)) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) - checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + checkEquals(intensity(chrs[1, 1]), ints) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## BPC - chrs <- extractChromatograms(filterFile(xod_x, file = 2), - aggregationFun = "max") + chrs <- chromatogram(filterFile(xod_x, file = 2), + aggregationFun = "max") ints <- unlist(lapply(spctr, function(z) - return(max(intensity(z))))) - chrs_2 <- xcms:::.extractMultipleChromatograms(filterFile(xod_x, file = 2), - aggregationFun = "max") - checkEquals(intensity(chrs[[1]]), intensity(chrs_2[[1]])) - - checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + return(max(intensity(z))))) + checkEquals(intensity(chrs[1, 1]), ints) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## with adjusted retention times. - chrs <- extractChromatograms(filterFile(xod_xgr, file = 2), - adjustedRtime = FALSE, aggregationFun = "max") - checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) - chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, - keepAdjustedRtime = TRUE), - aggregationFun = "max") + chrs <- chromatogram(filterFile(xod_xgr, file = 2), + adjustedRtime = FALSE, aggregationFun = "max") + checkEquals(intensity(chrs[1, 1]), ints) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) + chrs <- chromatogram(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + aggregationFun = "max") checkEquals(intensity(chrs[[1]]), ints) - checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs[1, 1]), rtime(xod_xgr, bySample = TRUE, + adjusted = TRUE)[[2]]) ## Subset to certain mz range in all files. - chrs_adj <- extractChromatograms(xod_xgr, mz = c(300, 330)) - chrs_raw <- extractChromatograms(xod_x, mz = c(300, 330)) - checkTrue(sum(rtime(chrs_adj[[1]]) != rtime(chrs_raw[[1]])) > - length(chrs_raw[[1]]) / 2) - checkEquals(rtime(chrs_adj[[1]]), rtime(xod_xgr, bySample = TRUE)[[1]]) - checkEquals(rtime(chrs_adj[[2]]), rtime(xod_xgr, bySample = TRUE)[[2]]) - checkEquals(rtime(chrs_adj[[3]]), rtime(xod_xgr, bySample = TRUE)[[3]]) + chrs_adj <- chromatogram(xod_xgr, mz = c(300, 330)) + chrs_raw <- chromatogram(xod_x, mz = c(300, 330)) + checkTrue(sum(rtime(chrs_adj[1, 1]) != rtime(chrs_raw[1, 1])) > + length(chrs_raw[1, 1]) / 2) + checkEquals(rtime(chrs_adj[1, 1]), rtime(xod_xgr, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs_adj[1, 2]), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs_adj[1, 3]), rtime(xod_xgr, bySample = TRUE)[[3]]) ## Now subsetting for mz: tmp <- filterFile(od_x, file = 2) - chrs <- extractChromatograms(tmp, mz = c(300, 400)) - checkEquals(mz(chrs[[1]], filter = TRUE), c(300, 400)) + chrs <- chromatogram(tmp, mz = c(300, 400)) + checkEquals(mz(chrs[1, 1], filter = TRUE), c(300, 400)) suppressWarnings(spctr <- spectra(filterMz(tmp, mz = c(300, 400)))) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) - ints2 <- intensity(chrs[[1]]) + ints2 <- intensity(chrs[1, 1]) ints2[is.na(ints2)] <- 0 checkEquals(ints2, ints) - checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + checkEquals(rtime(chrs[1, 1]), unlist(lapply(spctr, rtime))) ## with adjusted retention times - chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, - keepAdjustedRtime = TRUE), - mz = c(300, 400)) + chrs <- chromatogram(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + mz = c(300, 400)) ints <- unlist(lapply(spctr, function(z) return(sum(intensity(z))))) - ints2 <- intensity(chrs[[1]]) + ints2 <- intensity(chrs[1, 1]) ints2[is.na(ints2)] <- 0 checkEquals(ints2, ints) - checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs[1, 1]), rtime(xod_xgr, bySample = TRUE)[[2]]) ## Now subsetting for rt: - chrs <- extractChromatograms(od_x, rt = c(2700, 2900)) - checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) - checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) - checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + chrs <- chromatogram(od_x, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) + checkTrue(all(rtime(chrs[1, 2]) >= 2700 & rtime(chrs[1, 2]) <= 2900)) + checkTrue(all(rtime(chrs[1, 3]) >= 2700 & rtime(chrs[1, 3]) <= 2900)) spctr <- spectra(filterRt(od_x, rt = c(2700, 2900))) ints <- split(unlist(lapply(spctr, function(z) sum(intensity(z)))), f = unlist(lapply(spctr, fromFile))) - checkEquals(ints[[1]], intensity(chrs[[1]])) - checkEquals(ints[[2]], intensity(chrs[[2]])) - checkEquals(ints[[3]], intensity(chrs[[3]])) + checkEquals(ints[[1]], intensity(chrs[1, 1])) + checkEquals(ints[[2]], intensity(chrs[1, 2])) + checkEquals(ints[[3]], intensity(chrs[1, 3])) ## Using adjusted rt: - chrs2 <- extractChromatograms(xod_xgr, rt = c(2700, 2900)) - checkTrue(all(rtime(chrs2[[1]]) >= 2700 & rtime(chrs2[[1]]) <= 2900)) - checkTrue(all(rtime(chrs2[[2]]) >= 2700 & rtime(chrs2[[2]]) <= 2900)) - checkTrue(all(rtime(chrs2[[3]]) >= 2700 & rtime(chrs2[[3]]) <= 2900)) - checkTrue(length(chrs[[1]]) != length(chrs2[[1]])) - checkTrue(length(chrs[[2]]) == length(chrs2[[2]])) - checkTrue(length(chrs[[3]]) != length(chrs2[[3]])) + chrs2 <- chromatogram(xod_xgr, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs2[1, 1]) >= 2700 & rtime(chrs2[1, 1]) <= 2900)) + checkTrue(all(rtime(chrs2[1, 2]) >= 2700 & rtime(chrs2[1, 2]) <= 2900)) + checkTrue(all(rtime(chrs2[1, 3]) >= 2700 & rtime(chrs2[1, 3]) <= 2900)) + checkTrue(length(chrs[1, 1]) != length(chrs2[1, 1])) + checkTrue(length(chrs[1, 2]) == length(chrs2[1, 2])) + checkTrue(length(chrs[1, 3]) != length(chrs2[1, 3])) tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) - checkEquals(rtime(chrs2[[1]]), rtime(tmp, bySample = TRUE)[[1]]) - checkEquals(rtime(chrs2[[2]]), rtime(tmp, bySample = TRUE)[[2]]) - checkEquals(rtime(chrs2[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + checkEquals(rtime(chrs2[1, 1]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs2[1, 2]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs2[1, 3]), rtime(tmp, bySample = TRUE)[[3]]) ## Check the values... keepSp <- which(adjustedRtime(xod_xgr) >= 2700 & adjustedRtime(xod_xgr) <= 2900) tmp <- xod_xgr[keepSp] ints <- unlist(lapply(spectra(tmp), function(z) sum(intensity(z)))) intsL <- split(ints, fromFile(tmp)) - checkEquals(intensity(chrs2[[1]]), intsL[[1]]) - checkEquals(intensity(chrs2[[2]]), intsL[[2]]) - checkEquals(intensity(chrs2[[3]]), intsL[[3]]) + checkEquals(intensity(chrs2[1, 1]), intsL[[1]]) + checkEquals(intensity(chrs2[1, 2]), intsL[[2]]) + checkEquals(intensity(chrs2[1, 3]), intsL[[3]]) ## Now subsetting for rt and mz: - chrs <- extractChromatograms(od_x, rt = c(2700, 2900), mz = 335) - checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) - checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) - checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + chrs <- chromatogram(od_x, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) + checkTrue(all(rtime(chrs[1, 2]) >= 2700 & rtime(chrs[1, 2]) <= 2900)) + checkTrue(all(rtime(chrs[1, 3]) >= 2700 & rtime(chrs[1, 3]) <= 2900)) spctr <- spectra(filterMz(filterRt(od_x, rt = c(2700, 2900)), mz = 335)) ints <- split(unlist(lapply(spctr, function(z) { if (z@peaksCount) return(sum(intensity(z))) else return(NA) })), f = unlist(lapply(spctr, fromFile))) - checkEquals(ints[[1]], intensity(chrs[[1]])) - checkEquals(ints[[2]], intensity(chrs[[2]])) - checkEquals(ints[[3]], intensity(chrs[[3]])) + checkEquals(ints[[1]], intensity(chrs[1, 1])) + checkEquals(ints[[2]], intensity(chrs[1, 2])) + checkEquals(ints[[3]], intensity(chrs[1, 3])) ## Using adjusted rt: LLLL - chrs <- extractChromatograms(xod_xgr, rt = c(2700, 2900), mz = 335) - checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) - checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) - checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + chrs <- chromatogram(xod_xgr, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) + checkTrue(all(rtime(chrs[1, 2]) >= 2700 & rtime(chrs[1, 2]) <= 2900)) + checkTrue(all(rtime(chrs[1, 3]) >= 2700 & rtime(chrs[1, 3]) <= 2900)) spctr <- spectra(filterMz(filterRt(xod_xgr, rt = c(2700, 2900)), mz = 335)) ints <- split(unlist(lapply(spctr, function(z) { if (z@peaksCount) return(sum(intensity(z))) else return(NA) })), f = unlist(lapply(spctr, fromFile))) - checkEquals(ints[[1]], intensity(chrs[[1]])) - checkEquals(ints[[2]], intensity(chrs[[2]])) - checkEquals(ints[[3]], intensity(chrs[[3]])) + checkEquals(ints[[1]], intensity(chrs[1, 1])) + checkEquals(ints[[2]], intensity(chrs[1, 2])) + checkEquals(ints[[3]], intensity(chrs[1, 3])) ## Check the rtime. tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) - checkEquals(rtime(chrs[[1]]), rtime(tmp, bySample = TRUE)[[1]]) - checkEquals(rtime(chrs[[2]]), rtime(tmp, bySample = TRUE)[[2]]) - checkEquals(rtime(chrs[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + checkEquals(rtime(chrs[1, 1]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs[1, 2]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs[1, 3]), rtime(tmp, bySample = TRUE)[[3]]) ## What if we're completely off? - chrs <- extractChromatograms(od_x, rt = c(5000, 5500)) - checkTrue(length(chrs) == 0) + chrs <- chromatogram(od_x, rt = c(5000, 5500)) + checkTrue(nrow(chrs) == 0) ## Now rt is within range, but mz is completely off. We expect Chromatograms ## with same length than there are spectra in the rt range, but all NA ## values. - chrs <- extractChromatograms(od_x, rt = c(2600, 2700), mz = 12000) + chrs <- chromatogram(od_x, rt = c(2600, 2700), mz = 12000) rts <- split(rtime(od_x), f = fromFile(od_x)) rts <- lapply(rts, function(z) z[z >= 2600 & z <= 2700]) - checkEquals(lengths(chrs), lengths(chrs)) + checkEquals(unname(lengths(chrs[1, , drop = TRUE])), unname(lengths(rts))) ## All have to be NA. - checkTrue(all(unlist(lapply(chrs, function(z) is.na(intensity(z)))))) + checkTrue(all(unlist(lapply(chrs[1, ], function(z) is.na(intensity(z)))))) ## Multiple ranges. rtr <- matrix(c(2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) mzr <- matrix(c(355, 355, 344, 344), ncol = 2, byrow = TRUE) - chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + chrs <- chromatogram(od_x, rt = rtr, mz = mzr) - checkTrue(all(rtime(chrs[[1]][[1]]) >= 2700 & rtime(chrs[[1]][[1]]) <= 2900)) - checkTrue(all(rtime(chrs[[1]][[2]]) >= 2700 & rtime(chrs[[1]][[2]]) <= 2900)) - checkTrue(all(rtime(chrs[[1]][[3]]) >= 2700 & rtime(chrs[[1]][[3]]) <= 2900)) - checkTrue(all(rtime(chrs[[2]][[1]]) >= 2600 & rtime(chrs[[2]][[1]]) <= 2800)) - checkTrue(all(rtime(chrs[[2]][[2]]) >= 2600 & rtime(chrs[[2]][[2]]) <= 2800)) - checkTrue(all(rtime(chrs[[2]][[3]]) >= 2600 & rtime(chrs[[2]][[3]]) <= 2800)) + checkTrue(all(rtime(chrs[1, 1]) >= 2700 & rtime(chrs[1, 1]) <= 2900)) + checkTrue(all(rtime(chrs[1, 2]) >= 2700 & rtime(chrs[1, 2]) <= 2900)) + checkTrue(all(rtime(chrs[1, 3]) >= 2700 & rtime(chrs[1, 3]) <= 2900)) + checkTrue(all(rtime(chrs[2, 1]) >= 2600 & rtime(chrs[2, 1]) <= 2800)) + checkTrue(all(rtime(chrs[2, 2]) >= 2600 & rtime(chrs[2, 2]) <= 2800)) + checkTrue(all(rtime(chrs[2, 3]) >= 2600 & rtime(chrs[2, 3]) <= 2800)) spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[1, ]), mz = mzr[1, ])) ints <- split(unlist(lapply(spctr, function(z) { @@ -260,9 +176,9 @@ test_extractChromatograms <- function() { return(sum(intensity(z))) else return(NA) })), f = unlist(lapply(spctr, fromFile))) - checkEquals(ints[[1]], intensity(chrs[[1]][[1]])) - checkEquals(ints[[2]], intensity(chrs[[1]][[2]])) - checkEquals(ints[[3]], intensity(chrs[[1]][[3]])) + checkEquals(ints[[1]], intensity(chrs[1, 1])) + checkEquals(ints[[2]], intensity(chrs[1, 2])) + checkEquals(ints[[3]], intensity(chrs[1, 3])) spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[2, ]), mz = mzr[2, ])) ints <- split(unlist(lapply(spctr, function(z) { @@ -270,81 +186,57 @@ test_extractChromatograms <- function() { return(sum(intensity(z))) else return(NA) })), f = unlist(lapply(spctr, fromFile))) - checkEquals(ints[[1]], intensity(chrs[[2]][[1]])) - checkEquals(ints[[2]], intensity(chrs[[2]][[2]])) - checkEquals(ints[[3]], intensity(chrs[[2]][[3]])) + checkEquals(ints[[1]], intensity(chrs[2, 1])) + checkEquals(ints[[2]], intensity(chrs[2, 2])) + checkEquals(ints[[3]], intensity(chrs[2, 3])) ## Multiple ranges with complete off ranges. rtr <- matrix(c(2700, 2900, 5000, 5500, 2600, 2800), ncol = 2, byrow = TRUE) mzr <- matrix(c(355, 355, 500, 500, 344, 344), ncol = 2, byrow = TRUE) - chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) - checkTrue(length(chrs) == 3) - checkTrue(all(lengths(chrs[[2]]) == 0)) + chrs <- chromatogram(od_x, rt = rtr, mz = mzr) + checkTrue(nrow(chrs) == 3) + checkTrue(all(lengths(chrs[2, ]) == 0)) rtr <- matrix(c(2700, 2900, 2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) mzr <- matrix(c(355, 355, 100000, 100000, 344, 344), ncol = 2, byrow = TRUE) - chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) - checkTrue(length(chrs) == 3) + chrs <- chromatogram(od_x, rt = rtr, mz = mzr) + checkTrue(nrow(chrs) == 3) ## All values in the 2nd Chromosome object have to be NA. - checkTrue(all(unlist(lapply(chrs[[2]], function(z) is.na(intensity(z)))))) + checkTrue(all(unlist(lapply(chrs[2, ], function(z) is.na(intensity(z)))))) } -test_clean_chromatogram <- function() { - chr <- Chromatogram( - rtime = 1:12, - intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) - chr_clnd <- clean(chr) - checkEquals(rtime(chr_clnd), c(2, 3, 4, 6, 7, 8, 9,10)) +## dontrun_test_with_MRM <- function() { +## ## Test how we could read the data. +## ## chromatogramsInfo +## library(msdata) +## fls <- proteomics(full.names = TRUE) - chr_clnd <- clean(chr, all = TRUE) - checkTrue(length(chr_clnd) == 4) - checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) +## library(mzR) +## msf <- mzR::openMSfile(fls[2], "pwiz") +## chrs <- chromatograms(msf) +## chrsI <- chromatogram(msf) +## ## The same essentially. +## nChrom(msf) +## length(chrs) +## nrow(chrs[[1]]) +## mzR::close(msf) +## ## +## msf <- mzR::openMSfile(fls[1], "pwiz") +## chrs <- chromatograms(msf) +## chrs <- chromatograms(msf) +## nChrom(msf) +## length(chrs) +## nrow(chrs[[1]]) - ## With NA - chr <- Chromatogram( - rtime = 1:12, - intensity = c(0, NA, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) - chr_clnd <- clean(chr) - checkEquals(rtime(chr_clnd), c(3, 4, 6, 7, 8, 9, 10)) - chr <- Chromatogram( - rtime = 1:12, - intensity = c(NA, NA, 20, NA, NA, NA, 123, 124343, 3432, NA, NA, NA)) - chr_clnd <- clean(chr) - checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) -} - -dontrun_test_with_MRM <- function() { - ## Test how we could read the data. - ## chromatogramsInfo - library(msdata) - fls <- proteomics(full.names = TRUE) - - library(mzR) - msf <- mzR::openMSfile(fls[2], "pwiz") - chrs <- chromatograms(msf) - chrsI <- chromatogram(msf) - ## The same essentially. - nChrom(msf) - length(chrs) - nrow(chrs[[1]]) - mzR::close(msf) - ## - msf <- mzR::openMSfile(fls[1], "pwiz") - chrs <- chromatograms(msf) - chrs <- chromatograms(msf) - nChrom(msf) - length(chrs) - nrow(chrs[[1]]) +## ## Now, we've got the following info: cvParam +## ## accession="MS:1000235" name="total ion current chromatogram" value="" +## ## Check http://proteowizard.sourceforge.net/dox/namespacepwiz_1_1msdata.html +## ## Potentially interesting: +## ## o ChromatogramIdentity nope, no header info. +## ## OK, have to look for chromatogram with index="1", then within +## ## for cvParam accession="MS:1000827" and its value -> Q1 or precursorMz +## ## then within for cvParam accession="MS:1000827" and its value +## ## -> Q3. - ## Now, we've got the following info: cvParam - ## accession="MS:1000235" name="total ion current chromatogram" value="" - ## Check http://proteowizard.sourceforge.net/dox/namespacepwiz_1_1msdata.html - ## Potentially interesting: - ## o ChromatogramIdentity nope, no header info. - ## OK, have to look for chromatogram with index="1", then within - ## for cvParam accession="MS:1000827" and its value -> Q1 or precursorMz - ## then within for cvParam accession="MS:1000827" and its value - ## -> Q3. - - ## https://sourceforge.net/p/proteowizard/mailman/message/27571266/ -} +## ## https://sourceforge.net/p/proteowizard/mailman/message/27571266/ +## } diff --git a/inst/unitTests/runit.XCMSnExp.R b/inst/unitTests/runit.XCMSnExp.R index ef22e7f43..062ad07e0 100644 --- a/inst/unitTests/runit.XCMSnExp.R +++ b/inst/unitTests/runit.XCMSnExp.R @@ -216,11 +216,16 @@ test_XCMSnExp_class_accessors <- function() { checkEquals(rtime(xod2, adjusted = TRUE), adjustedRtime(xod2)) ## Indirect test that the ordering of the adjusted retention times matches ## ordering of rtime. - tmp <- unlist(adjustedRtime(xod2, bySample = TRUE)) - tmp_diff <- tmp - rtime(xod2) - tmp_diff_2 <- adjustedRtime(xod2, bySample = FALSE) - rtime(xod2) - checkTrue(max(tmp_diff) > max(tmp_diff_2)) - checkEquals(names(adjustedRtime(xod2)), names(rtime(xod2))) + ## From MSnbase version >= 2.3.9 values are ordered first by file then by + ## spectrum. + if (grepl("^F", names(rtime(xod2)[1]))) { + rts_by_sample <- adjustedRtime(xod2, bySample = TRUE) + rts <- adjustedRtime(xod2) + checkEquals(unname(rts_by_sample[[2]]), + unname(rts[grep(names(rts), pattern = "F2")])) + checkEquals(unname(unlist(rts_by_sample)), + unname(rts)) + } ## Wrong assignments. checkException(adjustedRtime(xod2) <- xs_2@rt$corrected[1:2]) ## bracket subset @@ -1022,7 +1027,7 @@ test_MsFeatureData_class_accessors <- function() { ## Test extraction of chromatograms. -test_extractChromatograms <- function() { +test_chromatogram <- function() { ## Have: od_x: OnDiskMSNnExp ## xod_x: XCMSnExp, with detected chromPeaks. ## xod_xg: with feature groups. @@ -1034,23 +1039,23 @@ test_extractChromatograms <- function() { ## BPC - CDF don't habe a BPC. rtr <- c(2600, 2700) tmp_obj <- filterFile(xod_x, file = c(1, 2)) - res <- xcms:::extractChromatograms(tmp_obj, aggregationFun = "max", rt = rtr) - checkTrue(all(rtime(res[[1]]) >= rtr[1])) - checkTrue(all(rtime(res[[1]]) <= rtr[2])) - checkTrue(all(rtime(res[[2]]) >= rtr[1])) - checkTrue(all(rtime(res[[2]]) <= rtr[2])) + res <- chromatogram(tmp_obj, aggregationFun = "max", rt = rtr) + checkTrue(all(rtime(res[1, 1]) >= rtr[1])) + checkTrue(all(rtime(res[1, 1]) <= rtr[2])) + checkTrue(all(rtime(res[1, 2]) >= rtr[1])) + checkTrue(all(rtime(res[1, 2]) <= rtr[2])) tmp <- filterRt(filterFile(xod_x, file = 2), rt = rtr) - checkEquals(rtime(tmp), rtime(res[[2]])) + checkEquals(rtime(tmp), rtime(res[1, 2])) ints <- spectrapply(tmp, function(z) return(max(intensity(z)))) - checkEquals(unlist(ints), intensity(res[[2]])) + checkEquals(unlist(ints), intensity(res[1, 2])) ## Check names - checkEquals(names(rtime(res[[1]])), names(intensity(res[[1]]))) + checkEquals(names(rtime(res[1, 1])), names(intensity(res[1, 1]))) ## Assure we get the same with an OnDiskMSnExp and grouped XCMSnExp - res_2 <- xcms:::extractChromatograms(filterFile(od_x, file = c(1, 2)), - aggregationFun = "max", rt = rtr) + res_2 <- chromatogram(filterFile(od_x, file = c(1, 2)), + aggregationFun = "max", rt = rtr) checkEquals(res, res_2) - res_3 <- xcms:::extractChromatograms(filterFile(xod_xg, file = c(1, 2)), - aggregationFun = "max", rt = rtr) + res_3 <- chromatogram(filterFile(xod_xg, file = c(1, 2)), + aggregationFun = "max", rt = rtr) checkEquals(res, res_3) ## XCMSnExp: with mzrange and rtrange: @@ -1059,10 +1064,11 @@ test_extractChromatograms <- function() { featureDefinitions(tmp) tmp <- filterRt(xod_xg, rt = rtr) featureDefinitions(tmp) - res_2 <- xcms:::extractChromatograms(xod_xg, rt = rtr, mz = mzr) + res_2 <- chromatogram(xod_xg, rt = rtr, mz = mzr) ## ## XCMSnExp with adjusted rtime + ## SEE runit.Chromatogram.R } test_signal_integration <- function() { @@ -1071,9 +1077,9 @@ test_signal_integration <- function() { tmp <- xod_xgrg rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] - chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) - pkInt <- sum(intensity(chr[[1]]) * - ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + chr <- chromatogram(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[1, 1]) * + ((rtr[2] - rtr[1]) / (length(chr[1, 1]) - 1))) checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) tmp <- filterFile(xod_xgrg, file = 2) @@ -1083,7 +1089,7 @@ test_signal_integration <- function() { for (i in idxs) { rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] - chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + chr <- chromatogram(tmp, rt = rtr, mz = mzr)[1, 1] ints <- intensity(chr) pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") @@ -1096,9 +1102,9 @@ test_signal_integration <- function() { tmp <- findChromPeaks(filterFile(od_x, 2), param = MatchedFilterParam()) rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] - chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) - pkInt <- sum(intensity(chr[[1]]) * - ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + chr <- chromatogram(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[1, 1]) * + ((rtr[2] - rtr[1]) / (length(chr[1, 1]) - 1))) chromPeaks(tmp)[1, "into"] checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) idxs <- sample(1:nrow(chromPeaks(tmp)), 5) @@ -1106,7 +1112,7 @@ test_signal_integration <- function() { for (i in idxs) { rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] - chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + chr <- chromatogram(tmp, rt = rtr, mz = mzr)[1, 1] ints <- intensity(chr) pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") diff --git a/inst/unitTests/runit.absentPresent.R b/inst/unitTests/runit.absentPresent.R index 91af0c204..c988300cc 100644 --- a/inst/unitTests/runit.absentPresent.R +++ b/inst/unitTests/runit.absentPresent.R @@ -42,7 +42,8 @@ testPresentAbsentSum <- function() { ## Same as above, with fillPeaks() ## testPresentAbsentSumAfterFillPeaks <- function() { - xsg <- fillPeaks(group(faahko)) + ## xsg <- fillPeaks(group(faahko)) + xsg <- faahko_grouped_filled ## xsg <- faahko_grouped_filled a <- length(which(absent(xsg, class="WT", minfrac=0))) diff --git a/inst/unitTests/runit.do_findChromPeaks_MSW.R b/inst/unitTests/runit.do_findChromPeaks_MSW.R index 0b3e259c9..c5c93fa33 100644 --- a/inst/unitTests/runit.do_findChromPeaks_MSW.R +++ b/inst/unitTests/runit.do_findChromPeaks_MSW.R @@ -27,6 +27,7 @@ test_findChromPeaks_MSW <- function() { sp1 <- od[[1]] res_1 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1)) mp <- MSWParam() + checkException(findChromPeaks(od1, param = mp, msLevel = 2)) res_2 <- findChromPeaks(od1, param = mp) checkEquals(res_1, chromPeaks(res_2)[, colnames(res_1), drop = FALSE]) ## Changing settings. diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave.R b/inst/unitTests/runit.do_findChromPeaks_centWave.R index 41724742f..c55888d1d 100644 --- a/inst/unitTests/runit.do_findChromPeaks_centWave.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave.R @@ -357,18 +357,18 @@ dontrun_exhaustive_original_new_centWave_comparison <- function() { rtr <- common_pks[i, c("rtmin", "rtmax")] rtr[1] <- rtr[1] - 2 rtr[2] <- rtr[2] + 2 - chr_cmn <- extractChromatograms(raw, rt = rtr, - mz = common_pks[i, c("mzmin", "mzmax")]) + chr_cmn <- chromatogram(raw, rt = rtr, + mz = common_pks[i, c("mzmin", "mzmax")]) rtr <- unique_pks[i, c("rtmin", "rtmax")] rtr[1] <- rtr[1] - 2 rtr[2] <- rtr[2] + 2 - chr_unq <- extractChromatograms(raw, rt = rtr, - mz = unique_pks[i, c("mzmin", "mzmax")]) + chr_unq <- chromatogram(raw, rt = rtr, + mz = unique_pks[i, c("mzmin", "mzmax")]) par(mfrow = c(1, 2)) - plot(rtime(chr_cmn[[1]]), intensity(chr_cmn[[1]]), main = "common peak", + plot(rtime(chr_cmn[[1]]), intensity(chr_cmn[1, 1]), main = "common peak", type = "l") abline(v = common_pks[i, c("rtmin", "rtmax")], col = "grey") - plot(rtime(chr_unq[[1]]), intensity(chr_unq[[1]]), main = "unique peak", + plot(rtime(chr_unq[[1]]), intensity(chr_unq[1, 1]), main = "unique peak", type = "l") abline(v = unique_pks[i, c("rtmin", "rtmax")], col = "grey") } @@ -481,12 +481,13 @@ test_do_findChromPeaks_centWave <- function() { valsPerSpect, snthresh = 200, noise = 4000) + ## Eventually disable the sleep option to improve speed! res2 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect, snthresh = 500, - noise = 4000) + noise = 4000, sleep = 0.01) checkTrue(nrow(res1) > nrow(res2)) ## Check scanrange on findPeaks.centWave. @@ -523,6 +524,7 @@ test_findChromPeaks_centWave <- function() { res <- findChromPeaks(onDisk, param = cwp, return.type = "list") checkEquals(res[[1]], peaks(xs)@.Data) + checkException(findChromPeaks(onDisk, param = cwp, msLevel = 2)) ## ## MSnExp ## inMem <- readMSData(f[1], msLevel. = 1) ## suppressWarnings( diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R index adf493894..220ca2029 100644 --- a/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R @@ -86,6 +86,7 @@ test_findChromPeaks_centWaveWithPredIsoROIs <- function() { res <- findChromPeaks(onDisk, param = cwp, return.type = "list") checkEquals(res[[1]], peaks(xs)@.Data) + checkException(findChromPeaks(onDisk, param = cwp, msLevel = 2)) ## ## MSnExp ## inMem <- readMSData(fs[1], msLevel. = 1) ## res_2 <- findChromPeaks(inMem, param = cwp, return.type = "list") diff --git a/inst/unitTests/runit.do_findChromPeaks_massifquant.R b/inst/unitTests/runit.do_findChromPeaks_massifquant.R index 633c7cd83..9f39d29e8 100644 --- a/inst/unitTests/runit.do_findChromPeaks_massifquant.R +++ b/inst/unitTests/runit.do_findChromPeaks_massifquant.R @@ -60,6 +60,7 @@ test_findChromPeaks_massifquant <- function() { checkEquals(peaks(res_o), peaks(res)) checkEquals(res_o@rt$raw, res@rt$raw, checkNames = FALSE) + checkException(findChromPeaks(onDisk, param = mqp, msLevel = 2)) ## Full data ## onDisk <- readMSData2(mzf) ## res <- findChromPeaks(onDisk, param = mqp) diff --git a/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R b/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R index aaac3ad79..c24148da3 100644 --- a/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R +++ b/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R @@ -55,6 +55,8 @@ test_findChromPeaks_matchedFilter <- function() { res_o <- findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet") checkEquals(peaks(res_o), peaks(res)) checkEquals(res_o@rt$raw, res@rt$raw, checkNames = FALSE) + + checkException(findChromPeaks(onDisk, param = mfp, msLevel = 2)) ## inMem ## inMem <- readMSData(mzf, msLevel. = 1) ## res_i <- findChromPeaks(inMem, param = mfp, return.type = "xcmsSet") diff --git a/inst/unitTests/runit.do_groupChromPeaks.R b/inst/unitTests/runit.do_groupChromPeaks.R index a4f8a3470..003958305 100644 --- a/inst/unitTests/runit.do_groupChromPeaks.R +++ b/inst/unitTests/runit.do_groupChromPeaks.R @@ -349,7 +349,8 @@ test_groupPeaks_MzClustParam <- function() { ## test_do_groupChromPeaks_nearest <- function() { - xs <- faahko + ## xs <- faahko + xs <- faahko_xs features <- peaks(xs) sampleGroups <- sampclass(xs) mzVsRtBalance <- 10 diff --git a/inst/unitTests/runit.fillChromPeaks.R b/inst/unitTests/runit.fillChromPeaks.R index 97e7f3023..9ab32d5eb 100644 --- a/inst/unitTests/runit.fillChromPeaks.R +++ b/inst/unitTests/runit.fillChromPeaks.R @@ -16,8 +16,8 @@ test_fillChromPeaks <- function() { for (i in idxs) { cfp <- fp[i, , drop = FALSE] tmp <- filterFile(xod_xg, file = cfp[1, "sample"]) - chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], - mz = cfp[1, c("mzmin", "mzmax")])[[1]] + chr <- chromatogram(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[1, 1] into <- sum(intensity(chr), na.rm = TRUE) * (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) checkEquals(unname(into), unname(cfp[1, "into"])) @@ -29,8 +29,8 @@ test_fillChromPeaks <- function() { rtr <- c(min(pks[, "rtmin"]), max(pks[, "rtmax"])) rtr[1] <- rtr[1] - 10 rtr[2] <- rtr[2] + 10 - chrs <- extractChromatograms(res, rt = rtr, mz = c(min(pks[, "mzmin"]), - max(pks[, "mzmax"]))) + chrs <- chromatogram(res, rt = rtr, mz = c(min(pks[, "mzmin"]), + max(pks[, "mzmax"])))[1, ] plot(3, 3, pch = NA, xlim = range(lapply(chrs, rtime), na.rm = TRUE), ylim = range(lapply(chrs, intensity), na.rm = TRUE), xlab = "rt", ylab = "int") @@ -72,7 +72,7 @@ test_fillChromPeaks <- function() { c("rtmin", "rtmax", "mzmin", "mzmax")] ## Get the intensities for the first one. pkArea <- apply(tmp, median, MARGIN = 2) - chr <- extractChromatograms(res, rt = pkArea[1:2], mz = pkArea[3:4]) + chr <- chromatogram(res, rt = pkArea[1:2], mz = pkArea[3:4])[1, ] checkTrue(all(unlist(lapply(chr, function(z) is.na(intensity(z)))))) ## Get also the spectra: spctr <- spectra(filterRt(filterFile(xod_xg, file = 1), rt = pkArea[1:2])) diff --git a/inst/unitTests/runit.fillPeaks.R b/inst/unitTests/runit.fillPeaks.R index 2c14b01bf..a384c10c2 100644 --- a/inst/unitTests/runit.fillPeaks.R +++ b/inst/unitTests/runit.fillPeaks.R @@ -5,7 +5,7 @@ testFilledFlag <- function() { checkEqualsNumeric(nrow(peaks(xsg)) + length(xsgf@filled), nrow(peaks(xsgf))) } -testFillPeaksPar <- function() { +dontrun_testFillPeaksPar <- function() { xsg <- group(faahko) xsgfSerial <- fillPeaks(xsg, method="chrom") @@ -18,28 +18,28 @@ testFillPeaksPar <- function() { test.fillPeaksColumns <- function() { - xsg <- group(faahko) - peaks(xsg) <- cbind(peaks(xsg), anotherColumn=4711) - - oldCnames <- colnames(peaks(xsg)) - xsgf <- fillPeaks(xsg) # parallel disabled: , nSlaves=2) - - newCnames <- colnames(peaks(xsgf)) - checkEquals(oldCnames, newCnames) - - ## Check dims if nothing to do - oldDims <- dim(peaks(xsgf)) - xsgf2 <- fillPeaks(xsgf) # parallel disabled: , nSlaves=2) - newDims <- dim(peaks(xsgf2)) - checkEquals(oldDims, newDims) - - ## Case where only some samples have NA values - xsg <- group(faahko, minfrac=1) - xsgf <- fillPeaks(xsg) # parallel disabled: , nSlaves=2) - sampclass(xsgf) <- c(rep("KO", 5), rep("WT", 7)) - xsgf <- group(xsgf, minfrac=1) - xsgf <- fillPeaks(xsgf) # parallel disabled: , nSlaves=2) - + xsg <- group(faahko) + xsg <- group(faahko_xs) + peaks(xsg) <- cbind(peaks(xsg), anotherColumn=4711) + + oldCnames <- colnames(peaks(xsg)) + xsgf <- fillPeaks(xsg) # parallel disabled: , nSlaves=2) + + newCnames <- colnames(peaks(xsgf)) + checkEquals(oldCnames, newCnames) + + ## Check dims if nothing to do + oldDims <- dim(peaks(xsgf)) + xsgf2 <- fillPeaks(xsgf) # parallel disabled: , nSlaves=2) + newDims <- dim(peaks(xsgf2)) + checkEquals(oldDims, newDims) + + ## Case where only some samples have NA values + xsg <- group(faahko_xs, minfrac=1) + xsgf <- fillPeaks(xsg) # parallel disabled: , nSlaves=2) + sampclass(xsgf) <- c(rep("KO", 1), rep("WT", 2)) + xsgf <- group(xsgf, minfrac=1) + xsgf <- fillPeaks(xsgf) # parallel disabled: , nSlaves=2) } test.getPeaks_implementation <- function() { @@ -98,7 +98,7 @@ test.getPeaks_implementation <- function() { } ## Compare the results we get when running the old and new fillPeaks. -test.fillPeaks_old_vs_new <- function() { +dontrun_test.fillPeaks_old_vs_new <- function() { xsg <- group(faahko, minfrac = 1) register(SerialParam()) diff --git a/inst/unitTests/runit.functions-XCMSnExp.R b/inst/unitTests/runit.functions-XCMSnExp.R index a152a9de8..0d86688a6 100644 --- a/inst/unitTests/runit.functions-XCMSnExp.R +++ b/inst/unitTests/runit.functions-XCMSnExp.R @@ -10,3 +10,29 @@ test_plotChromPeakDensity <- function() { plotChromPeakDensity(xod_x, mz = c(0, 1)) plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, xlim = c(2500, 4000)) } + +test_plotChromPeaks <- function() { + ## Plot the full range. + plotChromPeaks(xod_x) + + ## mz range + plotChromPeaks(xod_x, ylim = c(453, 455)) + plotChromPeaks(xod_x, ylim = c(453.2, 453.201), xlim = c(2500, 3500)) + + ## mzr <- c(453.2, 453.201) + ## chrs <- chromatogram(xod_x, mz = mzr, rt = c(2500, 3500)) + ## plot(chrs) + ## highlightChromPeaks(xod_x, mz = mzr) +} + +test_plotChromPeakImage <- function() { + xcms:::plotChromPeakImage(xod_x, binSize = 30, log = FALSE) + ## Check that it works if no peaks were found in one sample. + tmp <- xod_x + pks <- chromPeaks(tmp) + pks <- pks[pks[, "sample"] != 1, ] + chromPeaks(tmp) <- pks + plotChromPeakImage(tmp, binSize = 30, log = FALSE) + plotChromPeakImage(tmp, binSize = 20, log = FALSE) + plotChromPeakImage(tmp, binSize = 10, log = FALSE, col = topo.colors(64)) +} diff --git a/inst/unitTests/runit.functions-normalization.R b/inst/unitTests/runit.functions-normalization.R new file mode 100644 index 000000000..ad1abb54c --- /dev/null +++ b/inst/unitTests/runit.functions-normalization.R @@ -0,0 +1,11 @@ +## Unit tests for functions in functions-normalization.R + +test.fitModel <- function() { + vals <- featureValues(xod_xgrg) + dat <- data.frame(injection_idx = 1:length(fileNames(xod_xgrg))) + fits <- xcms:::fitModel(formula = y ~ injection_idx, y = vals, minVals = 3, + data = dat) + ## Check that we've got NULL for features with less thanb 3 values. + nulls <- apply(vals, MARGIN = 1, function(z) any(is.na(z))) + checkEquals(nulls, lengths(fits) == 0) +} diff --git a/inst/unitTests/runit.functions-utils.R b/inst/unitTests/runit.functions-utils.R index 9f4184e99..82ac939f7 100644 --- a/inst/unitTests/runit.functions-utils.R +++ b/inst/unitTests/runit.functions-utils.R @@ -1,53 +1,53 @@ -library(xcms) -library(RUnit) +## library(xcms) +## library(RUnit) ## Test the .grow_trues -test_grow_trues <- function() { - ## Compare performance with MSnbase:::utils.clean - Test <- c(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, - 1, 0) - Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, - FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, - TRUE, TRUE, TRUE, TRUE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) +## test_grow_trues <- function() { +## ## Compare performance with MSnbase:::utils.clean +## Test <- c(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, +## 1, 0) +## Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, +## FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, +## TRUE, TRUE, TRUE, TRUE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) - Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0) - Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, - TRUE, FALSE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) +## Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0) +## Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, +## TRUE, FALSE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) - Test <- c(0, 1, NA, 0, 0, 1) - Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) +## Test <- c(0, 1, NA, 0, 0, 1) +## Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) - Test <- c(0, NA, 1, 0, 0, 1, 0, 0) - Expect <- c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) +## Test <- c(0, NA, 1, 0, 0, 1, 0, 0) +## Expect <- c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) - Test <- c(0, 1, 0, 0, NA, 0, 1) - Expect <- c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) +## Test <- c(0, 1, 0, 0, NA, 0, 1) +## Expect <- c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) - Test <- c(NA, 1, NA, NA, NA, NA, 1) - Expect <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE) - res_2 <- xcms:::.grow_trues(Test > 0) - checkEquals(res_2, Expect) -} +## Test <- c(NA, 1, NA, NA, NA, NA, 1) +## Expect <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE) +## res_2 <- xcms:::.grow_trues(Test > 0) +## checkEquals(res_2, Expect) +## } -benchmark_grow_trues <- function() { - set.seed(123) - Test <- rnorm(n = 30000) - Test[Test < 0] <- 0 - Test2 <- Test > 0 - res_1 <- MSnbase:::utils.clean(Test) - res_2 <- .clean(Test2) +## benchmark_grow_trues <- function() { +## set.seed(123) +## Test <- rnorm(n = 30000) +## Test[Test < 0] <- 0 +## Test2 <- Test > 0 +## res_1 <- MSnbase:::utils.clean(Test) +## res_2 <- .clean(Test2) - Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0) - Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE) - res_1 <- MSnbase:::utils.clean(Test) +## Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0) +## Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE) +## res_1 <- MSnbase:::utils.clean(Test) -} +## } diff --git a/inst/unitTests/runit.getEIC.R b/inst/unitTests/runit.getEIC.R index 81fd54d80..d93edd976 100644 --- a/inst/unitTests/runit.getEIC.R +++ b/inst/unitTests/runit.getEIC.R @@ -1,45 +1,3 @@ - -## test.getEICxraw <- function() { -## file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -## step <- 0.1 -## xraw <- xcmsRaw(file, profstep=step) -## e <- getEIC(xraw, rtrange=cbind(3000,3500), mzrange=cbind(200,201)) -## ## calculate the EIC manually... -## mass <- seq(floor(xraw@mzrange[1]/step)*step, -## ceiling(xraw@mzrange[2]/step)*step, by = step) -## rtIdx <- which(xraw@scantime >= 3000 & xraw@scantime <= 3500) -## mzIdx <- which(mass >= 200 & mass <= 201) -## ecalc <- apply(xraw@env$profile[mzIdx, rtIdx, drop=FALSE], MARGIN=2, max) -## checkEqualsNumeric(ecalc, e@eic[[1]][[1]][, 2]) -## ## extract the /full/ EIC, i.e. the base peak chromatogram (BPC). -## ## here we have to use the "new" method... -## BioC <- getOption("BioC") -## BioC$xcms$getEIC.method <- "getEICNew" -## options(BioC=BioC) -## e <- getEIC(xraw, mzrange=matrix(xraw@mzrange, nrow=1), -## rtrange=matrix(range(xraw@scantime), nrow=1)) -## rtIdx <- which(xraw@scantime >= min(xraw@scantime) & -## xraw@scantime <= max(xraw@scantime)) -## mzIdx <- which(mass >= xraw@mzrange[1] & mass <= xraw@mzrange[2]) -## ecalc <- apply(xraw@env$profile[mzIdx, rtIdx, drop=FALSE], MARGIN=2, max) -## checkEqualsNumeric(ecalc, e@eic[[1]][[1]][, 2]) -## ## for two ranges... -## mzrange <- rbind(c(200, 201), c(300, 310), c(300, 402)) -## rtrange <- rbind(c(3000, 3500), c(4000, 4300), c(2600, 3000)) -## e <- getEIC(xraw, mzrange=mzrange, rtrange=rtrange) -## ## manually calculate... -## for(i in 1:nrow(mzrange)){ -## rtIdx <- which(xraw@scantime >= rtrange[i, 1] & xraw@scantime <= rtrange[i, 2]) -## mzIdx <- which(mass >= mzrange[i, 1] & mass <= mzrange[i, 2]) -## ecalc <- apply(xraw@env$profile[mzIdx, rtIdx, drop=FALSE], MARGIN=2, max) -## checkEqualsNumeric(ecalc, e@eic[[1]][[i]][, 2]) -## } -## ## restoring the setting... -## BioC <- getOption("BioC") -## BioC$xcms$getEIC.method <- "getEICOld" -## options(BioC=BioC) -## } - ## Testing the profEIC as well as the getEIC method. test_profEIC <- function() { ## file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") @@ -258,7 +216,7 @@ notrun_test_profEIC_implementation <- function() { ## original retention times differ, e.g. when scanrange is used. Using the ## getXcmsRaw method fixes that, since within that method we ensure that all ## vectors, times etc are aligned (i.e. match in length and ordering). -test.issue7 <- function(){ +dontrun_test.issue7 <- function(){ library(xcms) library(faahKO) @@ -341,7 +299,8 @@ test.issue7 <- function(){ } test_getEICxset <- function() { - xset <- fillPeaks(group(faahko)) + ## xset <- fillPeaks(group(faahko)) + xset <- faahko_grouped_filled ## xset <- faahko_grouped_filled e <- getEIC(xset, sampleidx = c(1,2), groupidx = c(1,2), rtrange=200) checkEquals(sampnames(e), c("ko15", "ko16")) @@ -372,7 +331,8 @@ test_getEICxset <- function() { } test.getEICretcor <- function() { - xset <- fillPeaks(group(retcor(group(faahko)))) + ## xset <- fillPeaks(group(retcor(group(faahko)))) + xset <- faahko_grouped_retcor_filled ## xset <- faahko_processed opt.warn <- options("warn")$warn options("warn" = 2) ## turns warning into errors diff --git a/inst/unitTests/runit.getXcmsRaw.R b/inst/unitTests/runit.getXcmsRaw.R index 7d55a65ca..b76f4df1f 100644 --- a/inst/unitTests/runit.getXcmsRaw.R +++ b/inst/unitTests/runit.getXcmsRaw.R @@ -1,9 +1,10 @@ ## just plain function that reads the raw data... test.getXcmsRaw <- function(){ - xsetRaw <- updateObject(faahko) - xset <- fillPeaks(group(retcor(group(xsetRaw)))) - + ## xsetRaw <- updateObject(faahko) + ## xset <- fillPeaks(group(retcor(group(xsetRaw)))) + xset <- faahko_grouped_retcor_filled + ## get the first as raw data file. xr <- getXcmsRaw(xset, sampleidx = 1) ## apply the rt correction diff --git a/man/Chromatogram-class.Rd b/man/Chromatogram-class.Rd deleted file mode 100644 index 51ce6a657..000000000 --- a/man/Chromatogram-class.Rd +++ /dev/null @@ -1,211 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Chromatogram.R, -% R/methods-Chromatogram.R -\docType{class} -\name{Chromatogram-class} -\alias{Chromatogram-class} -\alias{Chromatogram} -\alias{show,Chromatogram-method} -\alias{rtime,Chromatogram-method} -\alias{intensity,Chromatogram-method} -\alias{mz,Chromatogram-method} -\alias{precursorMz,Chromatogram-method} -\alias{productMz,Chromatogram-method} -\alias{productMz} -\alias{aggregationFun,Chromatogram-method} -\alias{aggregationFun} -\alias{fromFile,Chromatogram-method} -\alias{length,Chromatogram-method} -\alias{as.data.frame,Chromatogram-method} -\alias{filterRt,Chromatogram-method} -\alias{clean,Chromatogram-method} -\title{Representation of chromatographic MS data} -\usage{ -Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(NA_real_, - NA_real_), filterMz = c(NA_real_, NA_real_), precursorMz = c(NA_real_, - NA_real_), productMz = c(NA_real_, NA_real_), fromFile = integer(), - aggregationFun = character()) - -\S4method{show}{Chromatogram}(object) - -\S4method{rtime}{Chromatogram}(object) - -\S4method{intensity}{Chromatogram}(object) - -\S4method{mz}{Chromatogram}(object, filter = FALSE) - -\S4method{precursorMz}{Chromatogram}(object) - -\S4method{productMz}{Chromatogram}(object) - -\S4method{aggregationFun}{Chromatogram}(object) - -\S4method{fromFile}{Chromatogram}(object) - -\S4method{length}{Chromatogram}(x) - -\S4method{as.data.frame}{Chromatogram}(x) - -\S4method{filterRt}{Chromatogram}(object, rt) - -\S4method{clean}{Chromatogram}(object, all = FALSE) -} -\arguments{ -\item{rtime}{\code{numeric} with the retention times (length has to be equal -to the length of \code{intensity}).} - -\item{intensity}{\code{numeric} with the intensity values (length has to be -equal to the length of \code{rtime}).} - -\item{mz}{\code{numeric(2)} representing the mz value range (min, max) -on which the chromatogram was created. This is supposed to contain the -\emph{real} range of mz values in contrast to the \code{filterMz} below. -If not applicable use \code{mzrange = c(0, 0)}.} - -\item{filterMz}{\code{numeric(2)} representing the mz value range (min, -max) that was used to filter the original object on mz dimension. If not -applicable use \code{filterMz = c(0, 0)}.} - -\item{precursorMz}{\code{numeric(2)} for SRM/MRM transitions. -Represents the mz of the precursor ion. See details for more information.} - -\item{productMz}{\code{numeric(2)} for SRM/MRM transitions. -Represents the mz of the product. See details for more information.} - -\item{fromFile}{\code{integer(1)} the index of the file within the -\code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the -chromatogram was extracted.} - -\item{aggregationFun}{\code{character} string specifying the function that -was used to aggregate intensity values for the same retention time across -the mz range. Supported are \code{"sum"} (total ion chromatogram), -\code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}.} - -\item{object}{A \code{Chromatogram} object.} - -\item{filter}{For \code{mz}: whether the mz range used to filter the -original object should be returned (\code{filter = TRUE}), or the mz -range calculated on the real data (\code{filter = FALSE}).} - -\item{x}{For \code{as.data.frame} and \code{length}: a \code{Chromatogram} -object.} - -\item{rt}{For \code{filterRt}: \code{numeric(2)} defining the lower and -upper retention time for the filtering.} - -\item{all}{For \code{clean}: \code{logical(1)} whether all \code{0} intensity -value pairs should be removed (defaults to \code{FALSE}).} -} -\value{ -For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. -} -\description{ -The \code{Chromatogram} class is designed to store - chromatographic MS data, i.e. pairs of retention time and intensity - values. Instances of the class can be created with the - \code{Chromatogram} constructor function but in most cases the dedicated - methods for \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} - objects extracting chromatograms should be used instead (i.e. the - \code{\link{extractChromatograms}}). - -\code{Chromatogram}: create an instance of the - \code{Chromatogram} class. - -\code{rtime} returns the retention times for the rentention time - - intensity pairs stored in the chromatogram. - -\code{intensity} returns the intensity for the rentention time - - intensity pairs stored in the chromatogram. - -\code{mz} get the mz (range) of the chromatogram. The - function returns a \code{numeric(2)} with the lower and upper mz value. - -\code{precursorMz} get the mz of the precursor ion. The - function returns a \code{numeric(2)} with the lower and upper mz value. - -\code{productMz} get the mz of the product chromatogram/ion. The - function returns a \code{numeric(2)} with the lower and upper mz value. - -\code{aggregationFun,aggregationFun<-} get or set the - aggregation function. - -\code{fromFile} returns the value from the \code{fromFile} slot. - -\code{length} returns the length (number of retention time - - intensity pairs) of the chromatogram. - -\code{as.data.frame} returns the \code{rtime} and - \code{intensity} values from the object as \code{data.frame}. - -\code{filterRt}: filters the chromatogram based on the provided - retention time range. - -\code{clean}: \emph{cleans} a \code{Chromatogram} class by - removing all \code{0} and \code{NA} intensity signals (along with the - associates retention times). By default (if \code{all = FALSE}) \code{0} - values that are directly adjacent to peaks are kept too. \code{NA} - values are always removed. -} -\details{ -The \code{mz}, \code{filterMz}, \code{precursorMz} and - \code{productMz} are stored as a \code{numeric(2)} representing a range - even if the chromatogram was generated for only a single ion (i.e. a - single mz value). Using ranges for \code{mz} values allow this class to - be used also for e.g. total ion chromatograms or base peak chromatograms. - - The slots \code{precursorMz} and \code{productMz} allow to represent SRM - (single reaction monitoring) and MRM (multiple SRM) chromatograms. As - example, a \code{Chromatogram} for a SRM transition 273 -> 153 will have - a \code{@precursorMz = c(273, 273)} and a - \code{@productMz = c(153, 153)}. -} -\section{Slots}{ - -\describe{ -\item{\code{.__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun}}{See corresponding parameter above.} -}} - -\examples{ - -## Create a simple Chromatogram object based on random values. -chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), - rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) -chr - -## Get the intensities -head(intensity(chr)) - -## Get the retention time -head(rtime(chr)) - -## What is the retention time range of the object? -range(rtime(chr)) - -## Filter the chromatogram to keep only values between 4 and 10 seconds -chr2 <- filterRt(chr, rt = c(4, 10)) - -range(rtime(chr2)) - -## Create a simple Chromatogram object - -chr <- Chromatogram(rtime = 1:12, - intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) - -## Remove 0-intensity values keeping those adjacent to peaks -chr <- clean(chr) -intensity(chr) - -## Remove all 0-intensity values -chr <- clean(chr, all = TRUE) -intensity(chr) -} -\seealso{ -\code{\link{extractChromatograms}} for the method to extract - \code{Chromatogram} objects from \code{\link{XCMSnExp}} or - \code{\link[MSnbase]{OnDiskMSnExp}} objects. - - \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. -} -\author{ -Johannes Rainer -} diff --git a/man/GenericParam.Rd b/man/GenericParam.Rd index 284b9e571..80e9e6f6a 100644 --- a/man/GenericParam.Rd +++ b/man/GenericParam.Rd @@ -21,7 +21,8 @@ GenericParam(fun = character(), args = list()) \item{object}{\code{GenericParam} object.} } \value{ -The \code{GenericParam} function returns a \code{GenericParam} object. +The \code{GenericParam} function returns a \code{GenericParam} + object. } \description{ The \code{GenericParam} class allows to store generic parameter diff --git a/man/ProcessHistory-class.Rd b/man/ProcessHistory-class.Rd index 29a55dd37..5f5484173 100644 --- a/man/ProcessHistory-class.Rd +++ b/man/ProcessHistory-class.Rd @@ -39,19 +39,19 @@ } \value{ For \code{processParam}: a parameter object extending the -\code{Param} class. + \code{Param} class. The \code{processType} method returns a character string with the -processing step type. + processing step type. The \code{processDate} method returns a character string with the -time stamp of the processing step start. + time stamp of the processing step start. The \code{processInfo} method returns a character string with -optional additional informations. + optional additional informations. The \code{fileIndex} method returns a integer vector with the index -of the files/samples on which the processing step was applied. + of the files/samples on which the processing step was applied. } \description{ Objects of the type \code{ProcessHistory} allow to keep track @@ -65,19 +65,19 @@ The \code{XProcessHistory} extends the \code{ProcessHistory} by class of the processing step. Get or set the parameter class from an \code{XProcessHistory} -object. + object. The \code{processType} method returns a character specifying the -processing step \emph{type}. + processing step \emph{type}. The \code{processDate} extracts the start date of the processing -step. + step. The \code{processInfo} extracts optional additional information -on the processing step. + on the processing step. The \code{fileIndex} extracts the indices of the files on which -the processing step was applied. + the processing step was applied. } \section{Slots}{ diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 2f5caf7aa..6e92b408b 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -166,19 +166,21 @@ section for more information.} \item{step}{numeric(1) representing the m/z bin size.} \item{baselevel}{numeric(1) representing the base value to which -empty elements (i.e. m/z bins without a measured intensity) should be set. -Only considered if \code{method = "binlinbase"}. See \code{baseValue} -parameter of \code{\link{imputeLinInterpol}} for more details.} +empty elements (i.e. m/z bins without a measured intensity) should be +set. Only considered if \code{method = "binlinbase"}. See +\code{baseValue} parameter of \code{\link{imputeLinInterpol}} for more +details.} \item{basespace}{numeric(1) representing the m/z length after -which the signal will drop to the base level. Linear interpolation will be -used between consecutive data points falling within \code{2 * basespace} to -each other. Only considered if \code{method = "binlinbase"}. If not -specified, it defaults to \code{0.075}. Internally this parameter is -translated into the \code{distance} parameter of the -\code{\link{imputeLinInterpol}} function by -\code{distance = floor(basespace / step)}. See \code{distance} parameter -of \code{\link{imputeLinInterpol}} for more details.} +which the signal will drop to the base level. Linear interpolation will +be used between consecutive data points falling within +\code{2 * basespace} to each other. Only considered if +\code{method = "binlinbase"}. If not specified, it defaults to +\code{0.075}. Internally this parameter is translated into the +\code{distance} parameter of the \code{\link{imputeLinInterpol}} +function by \code{distance = floor(basespace / step)}. See +\code{distance} parameter of \code{\link{imputeLinInterpol}} for more +details.} \item{mzrange.}{Optional numeric(2) manually specifying the mz value range to be used for binnind. If not provided, the whole mz value range is used.} @@ -241,9 +243,9 @@ return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or } \value{ For \code{profMat}: a \code{list} with a the profile matrix -\code{matrix} (or matrices if \code{fileIndex} was not specified or if -\code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for general -help and information about the profile matrix. + \code{matrix} (or matrices if \code{fileIndex} was not specified or if + \code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for + general help and information about the profile matrix. For \code{adjustedRtime}: if \code{bySample = FALSE} a \code{numeric} vector with the adjusted retention for each spectrum of all files/samples @@ -341,11 +343,11 @@ process histories. These can be passed with argument \code{type} to the \code{processHistory} method to extract specific process step(s). \code{profMat}: creates a \emph{profile matrix}, which -is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and -m (columns) the retention time of the corresponding scans. Each cell contains -the maximum intensity measured for the specific scan and m/z values. See -\code{\link{profMat}} for more details and description of the various binning -methods. + is a n x m matrix, n (rows) representing equally spaced m/z values (bins) + and m (columns) the retention time of the corresponding scans. Each cell + contains the maximum intensity measured for the specific scan and m/z + values. See \code{\link{profMat}} for more details and description of + the various binning methods. \code{hasAdjustedRtime}: whether the object provides adjusted retention times. @@ -481,7 +483,7 @@ od <- readMSData2(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), ## Now we perform a chromatographic peak detection on this data set using the ## matched filter method. We are tuning the settings such that it performs ## faster. -mfp <- MatchedFilterParam(binSize = 4) +mfp <- MatchedFilterParam(binSize = 6) xod <- findChromPeaks(od, param = mfp) ## The results from the peak detection are now stored in the XCMSnExp @@ -511,12 +513,15 @@ head(rtime(xod)) ## spectra method which returns Spectrum objects containing all raw data. ## Note that all these methods read the information from the original input ## files and subsequently apply eventual data processing steps to them. -head(mz(xod, bySample = TRUE)) +mzs <- mz(xod, bySample = TRUE) +length(mzs) +lengths(mzs) -## Reading all data -spctr <- spectra(xod) +## The full data could also be read using the spectra data, which returns +## a list of Spectrum object containing the mz, intensity and rt values. +## spctr <- spectra(xod) ## To get all spectra of the first file we can split them by file -head(split(spctr, fromFile(xod))[[1]]) +## head(split(spctr, fromFile(xod))[[1]]) ############ ## Filtering @@ -554,8 +559,8 @@ head(peaks(xs)) the feature definitions representing the peak grouping results. \code{\link{adjustRtime}} for retention time adjustment methods. - \code{\link{extractChromatograms}} to extract MS data as - \code{\link{Chromatogram}} objects. + \code{\link[MSnbase]{chromatogram}} to extract MS data as + \code{\link[MSnbase]{Chromatogram}} objects. \code{\link{extractMsData}} for the method to extract MS data as \code{data.frame}s. diff --git a/man/XCMSnExp-filter-methods.Rd b/man/XCMSnExp-filter-methods.Rd index 7b0615136..63aa427f2 100644 --- a/man/XCMSnExp-filter-methods.Rd +++ b/man/XCMSnExp-filter-methods.Rd @@ -100,8 +100,10 @@ fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), ## Read the files od <- readMSData2(fs) -## Perform peak detection on them using default matched filter settings. -mfp <- MatchedFilterParam() +## Perform peak detection on them using the matched filter algorithm. Note +## that we use a large value for binSize to reduce the runtime of the +## example code. +mfp <- MatchedFilterParam(binSize = 5) xod <- findChromPeaks(od, param = mfp) ## Subset the dataset to the first and third file. diff --git a/man/adjustRtime-obiwarp.Rd b/man/adjustRtime-obiwarp.Rd index 375b69ad2..2cfb6d484 100644 --- a/man/adjustRtime-obiwarp.Rd +++ b/man/adjustRtime-obiwarp.Rd @@ -155,8 +155,8 @@ the alignment method.} } \value{ The \code{ObiwarpParam} function returns a -\code{ObiwarpParam} class instance with all of the settings -specified for obiwarp retention time adjustment and alignment. + \code{ObiwarpParam} class instance with all of the settings + specified for obiwarp retention time adjustment and alignment. For \code{adjustRtime,XCMSnExp,ObiwarpParam}: a \code{\link{XCMSnExp}} object with the results of the retention time diff --git a/man/adjustRtime-peakGroups.Rd b/man/adjustRtime-peakGroups.Rd index 27492dd68..4a175fb9b 100644 --- a/man/adjustRtime-peakGroups.Rd +++ b/man/adjustRtime-peakGroups.Rd @@ -117,9 +117,9 @@ the retention time correction method..} } \value{ The \code{PeakGroupsParam} function returns a -\code{PeakGroupsParam} class instance with all of the settings -specified for retention time adjustment based on \emph{house keeping} -features/peak groups. + \code{PeakGroupsParam} class instance with all of the settings + specified for retention time adjustment based on \emph{house keeping} + features/peak groups. For \code{adjustRtimePeakGroups}: a \code{matrix}, rows being features, columns samples, of retention times. The features are ordered diff --git a/man/extractChromatograms-method.Rd b/man/chromatogram-method.Rd similarity index 53% rename from man/extractChromatograms-method.Rd rename to man/chromatogram-method.Rd index a95568246..32b91fdaa 100644 --- a/man/extractChromatograms-method.Rd +++ b/man/chromatogram-method.Rd @@ -1,16 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +% Please edit documentation in R/methods-XCMSnExp.R \docType{methods} -\name{extractChromatograms,OnDiskMSnExp-method} -\alias{extractChromatograms,OnDiskMSnExp-method} -\alias{extractChromatograms,XCMSnExp-method} -\alias{extractChromatograms} +\name{chromatogram,XCMSnExp-method} +\alias{chromatogram,XCMSnExp-method} +\alias{chromatogram} \title{Extracting chromatograms} \usage{ -\S4method{extractChromatograms}{OnDiskMSnExp}(object, rt, mz, - aggregationFun = "sum", missing = NA_real_) - -\S4method{extractChromatograms}{XCMSnExp}(object, rt, mz, +\S4method{chromatogram}{XCMSnExp}(object, rt, mz, adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum", missing = NA_real_) } @@ -31,6 +27,12 @@ chromatograms will be calculated on the full mz range. It is also possible to submit a \code{numeric(1)} in which case \code{range} is called on it to transform it to a \code{numeric(2)}.} +\item{adjustedRtime}{For \code{chromatogram,XCMSnExp}: whether the +adjusted (\code{adjustedRtime = TRUE}) or raw retention times +(\code{adjustedRtime = FALSE}) should be used for filtering and returned +in the resulting \code{\link[MSnbase]{Chromatogram}} object. Adjusted +retention times are used by default if available.} + \item{aggregationFun}{\code{character} specifying the function to be used to aggregate intensity values across the mz value range for the same retention time. Allowed values are \code{"sum"}, \code{"max"}, @@ -41,38 +43,20 @@ be used if for a given retention time no signal was measured within the mz range of the corresponding scan. Defaults to \code{NA_real_} (see also Details and Notes sections below). Use \code{missing = 0} to resemble the behaviour of the \code{getEIC} from the \code{old} user interface.} - -\item{adjustedRtime}{For \code{extractChromatograms,XCMSnExp}: whether the -adjusted (\code{adjustedRtime = TRUE}) or raw retention times -(\code{adjustedRtime = FALSE}) should be used for filtering and returned -in the resulting \code{\link{Chromatogram}} object. Adjusted retention -times are used by default if available.} } \value{ -If a single \code{rt} and \code{mz} range was specified, - \code{extractChromatograms} returns a \code{list} of - \code{\link{Chromatogram}} classes each element being the chromatogram - for one of the samples for the specified range. - If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing - a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the - function returns a \code{list} of \code{list}s. The outer list - representing results for the various ranges, the inner the result across - files. In other words, \code{result[[1]]} returns a \code{list} with - \code{Chromatogram} classes length equal to the number of files, each - element representing the \code{Chromatogram} for the first rt/mz range - for one file. - An empty \code{list} is returned if no MS1 data is present in - \code{object} or if not a single spectrum is available for any of the - provided retention time ranges in \code{rt}. An empty \code{Chromatogram} - object is returned at the correponding position in the result \code{list} - if for the specific file no scan/spectrum was measured in the provided - rt window. In all other cases, a \code{Chromatogram} with length equal - to the number of scans/spectra in the provided rt range is returned. +\code{chromatogram} returns a \code{\link{Chromatograms}} object with + the number of columns corresponding to the number of files in + \code{object} and number of rows the number of specified ranges (i.e. + number of rows of matrices provided with arguments \code{mz} and/or + \code{rt}). } \description{ -\code{extractChromatograms}: the method allows to extract +\code{chromatogram}: the method allows to extract chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and - \code{\link{XCMSnExp}} objects. + \code{\link{XCMSnExp}} objects. See also the + \code{\link[MSnbase]{chromatogram}} implementation for + \code{\link[MSnbase]{OnDiskMSnExp}} in the MSnbase package. } \details{ Arguments \code{rt} and \code{mz} allow to specify the MS @@ -82,23 +66,24 @@ Arguments \code{rt} and \code{mz} allow to specify the MS retention time. Setting \code{aggregationFun = "sum"} would e.g. allow to calculate the \emph{total ion chromatogram} (TIC), \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). - The length of the extracted \code{Chromatogram} object, i.e. the number - of available data points, corresponds to the number of scans/spectra - measured in the specified retention time range. If in a specific scan - (for a give retention time) no signal was measured in the specified mz - range, a \code{NA_real_} is reported as intensity for the retention time - (see Notes for more information). This can be changed using the - \code{missing} parameter. + The length of the extracted \code{\link[MSnbase]{Chromatogram}} object, + i.e. the number of available data points, corresponds to the number of + scans/spectra measured in the specified retention time range. If in a + specific scan (for a give retention time) no signal was measured in the + specified mz range, a \code{NA_real_} is reported as intensity for the + retention time (see Notes for more information). This can be changed + using the \code{missing} parameter. } \note{ -\code{Chromatogram} objects extracted with \code{extractChromatogram} +\code{\link[MSnbase]{Chromatogram}} objects extracted with + \code{chromatogram} contain \code{NA_real_} values if, for a given retention time, no signal was measured in the specified mz range. If no spectrum/scan is present in the defined retention time window a \code{Chromatogram} object of length 0 is returned. For \code{\link{XCMSnExp}} objects, if adjusted retention times are - available, the \code{extractChromatograms} method will by default report + available, the \code{chromatogram} method will by default report and use these (for the subsetting based on the provided parameter \code{rt}). This can be overwritten with the parameter \code{adjustedRtime}. @@ -114,35 +99,47 @@ faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), od <- readMSData2(faahko_3_files) ## Extract the ion chromatogram for one chromatographic peak in the data. -chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) +chrs <- chromatogram(od, rt = c(2700, 2900), mz = 335) + +chrs -## plot the data -plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", +## Plot the chromatogram +plot(rtime(chrs[1, 2]), intensity(chrs[1, 2]), type = "l", xlab = "rtime", ylab = "intensity", col = "000080") for(i in c(1, 3)) { - points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") + points(rtime(chrs[1, i]), intensity(chrs[1, i]), type = "l", + col = "00000080") } -## Plot the chromatogram using plotChromatogram -plotChromatogram(chrs) +## Plot the chromatogram using the dedicated plot method. +plot(chrs) ## Extract chromatograms for multiple ranges. mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) -chrs <- extractChromatograms(od, mz = mzr, rt = rtr) +chrs <- chromatogram(od, mz = mzr, rt = rtr) + +chrs ## Plot the extracted chromatograms -par(mfrow = c(1, 2)) -plotChromatogram(chrs[[1]]) -plotChromatogram(chrs[[2]]) +plot(chrs) + +## Get access to all chromatograms for the second mz/rt range +chrs[1, ] + +## Plot just that one +plot(chrs[1, , drop = FALSE]) } \seealso{ \code{\link{XCMSnExp}} for the data object. - \code{\link{Chromatogram}} for the object representing chromatographic - data. + \code{\link[MSnbase]{Chromatogram}} for the object representing + chromatographic data. + + \code{\link[MSnbase]{Chromatograms}} for the object allowing to arrange + multiple \code{Chromatogram} objects. - \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or - \code{list} of such objects. + \code{\link[MSnbase]{plot}} to plot a \code{Chromatogram} or + \code{Chromatograms} objects. \code{\link{extractMsData}} for a method to extract the MS data as \code{data.frame}. diff --git a/man/chromatographic-peak-detection.Rd b/man/chromatographic-peak-detection.Rd index acb43ac8c..1797d16b4 100644 --- a/man/chromatographic-peak-detection.Rd +++ b/man/chromatographic-peak-detection.Rd @@ -33,6 +33,12 @@ The \code{findChromPeaks} methods perform the chromatographic \code{\link{findPeaks}} for the \emph{old} peak detection methods. + \code{\link{plotChromPeaks}} to plot identified chromatographic peaks + for one file. + + \code{\link{highlightChromPeaks}} to highlight identified chromatographic + peaks in an extracted ion chromatogram plot. + Other peak detection methods: \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, \code{\link{findChromPeaks-centWave}}, \code{\link{findChromPeaks-massifquant}}, diff --git a/man/do_findChromPeaks_centWave.Rd b/man/do_findChromPeaks_centWave.Rd index e615910e9..fec33eca5 100644 --- a/man/do_findChromPeaks_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -8,7 +8,7 @@ do_findChromPeaks_centWave(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = NULL) + firstBaselineCheck = TRUE, roiScales = NULL, sleep = 0) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -83,6 +83,12 @@ data within regions of interest is checked to be above the first baseline.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} defining the scale for each region of interest in \code{roiList} that should be used for the centWave-wavelets.} + +\item{sleep}{\code{numeric(1)} defining the number of seconds to wait between +iterations. Defaults to \code{sleep = 0}. If \code{> 0} a plot is +generated visualizing the identified chromatographic peak. Note: this +argument is for backward compatibility only and will be removed in +future.} } \value{ A matrix, each row representing an identified chromatographic peak, diff --git a/man/do_findChromPeaks_matchedFilter.Rd b/man/do_findChromPeaks_matchedFilter.Rd index 9b24370ac..8b0cdc06d 100644 --- a/man/do_findChromPeaks_matchedFilter.Rd +++ b/man/do_findChromPeaks_matchedFilter.Rd @@ -7,7 +7,7 @@ do_findChromPeaks_matchedFilter(mz, int, scantime, valsPerSpect, binSize = 0.1, impute = "none", baseValue, distance, fwhm = 30, sigma = fwhm/2.3548, max = 5, snthresh = 10, steps = 2, mzdiff = 0.8 - - binSize * steps, index = FALSE) + - binSize * steps, index = FALSE, sleep = 0) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -62,6 +62,12 @@ allow overlap.} \item{index}{\code{logical(1)} specifying whether indicies should be returned instead of values for m/z and retention times.} + +\item{sleep}{\code{numeric(1)} defining the number of seconds to wait between +iterations. Defaults to \code{sleep = 0}. If \code{> 0} a plot is +generated visualizing the identified chromatographic peak. Note: this +argument is for backward compatibility only and will be removed in +future.} } \value{ A matrix, each row representing an identified chromatographic peak, diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd index 6299d1b08..03e7f70fb 100644 --- a/man/fillChromPeaks.Rd +++ b/man/fillChromPeaks.Rd @@ -75,7 +75,7 @@ chromatographic peaks.} } \value{ The \code{FillChromPeaksParam} function returns a -\code{FillChromPeaksParam} object. + \code{FillChromPeaksParam} object. A \code{\link{XCMSnExp}} object with previously missing chromatographic peaks for features filled into its \code{chromPeaks} @@ -156,7 +156,7 @@ raw_data <- readMSData2(fls[1:2]) ## Create a CentWaveParam object. Note that the noise is set to 10000 to ## speed up the execution of the example - in a real use case the default ## value should be used, or it should be set to a reasonable value. -cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) +cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 40) res <- findChromPeaks(raw_data, param = cwp) diff --git a/man/findChromPeaks-centWave.Rd b/man/findChromPeaks-centWave.Rd index 9a9bacfb1..755695f63 100644 --- a/man/findChromPeaks-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -68,7 +68,7 @@ CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, roiList = list(), firstBaselineCheck = TRUE, roiScales = numeric()) \S4method{findChromPeaks}{OnDiskMSnExp,CentWaveParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") + BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) \S4method{show}{CentWaveParam}(object) @@ -187,39 +187,42 @@ defining the scale for each region of interest in \code{roiList} that should be used for the centWave-wavelets.} \item{object}{For \code{findChromPeaks}: an -\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -experiment-relevant data. + \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all + other experiment-relevant data. -For all other methods: a parameter object.} + For all other methods: a parameter object.} \item{param}{An \code{CentWaveParam} object containing all settings for the centWave algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, peak detection is performed in parallel on several -of the input samples.} +See documentation of the \code{BiocParallel} for more details. If +parallel processing is enabled, peak detection is performed in parallel +on several of the input samples.} \item{return.type}{Character specifying what type of object the method should return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{msLevel}{\code{integer(1)} defining the MS level on which the peak +detection should be performed. Defaults to \code{msLevel = 1}.} + \item{value}{The value for the slot.} \item{f}{For \code{integrate}: a \code{CentWaveParam} object.} } \value{ The \code{CentWaveParam} function returns a \code{CentWaveParam} -class instance with all of the settings specified for chromatographic peak -detection by the centWave method. + class instance with all of the settings specified for chromatographic + peak detection by the centWave method. For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the peak detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the peak detection. + \code{\link{XCMSnExp}} object with the results of the peak detection. + If \code{return.type = "list"} a list of length equal to the number of + samples with matrices specifying the identified peaks. + If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object + with the results of the peak detection. } \description{ The centWave algorithm perform peak density and wavelet based @@ -231,11 +234,12 @@ The \code{CentWaveParam} class allows to specify all settings should be created with the \code{CentWaveParam} constructor. The \code{detectChromPeaks,OnDiskMSnExp,CentWaveParam} method -performs chromatographic peak detection using the \emph{centWave} algorithm -on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -data and load the spectra data (mz and intensity values) on the fly from the -original files applying also all eventual data manipulations. + performs chromatographic peak detection using the \emph{centWave} + algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} + object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all + experiment specific data and load the spectra data (mz and intensity + values) on the fly from the original files applying also all eventual + data manipulations. \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} slot of the object. @@ -288,9 +292,10 @@ The centWave algorithm is most suitable for high resolution \code{param} parameter. Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. + be configured either by the \code{BPPARAM} parameter or by globally + defining the parallel processing mode using the + \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} + package. } \section{Slots}{ @@ -342,7 +347,7 @@ The \code{\link{do_findChromPeaks_centWave}} core API function and \code{\link{findPeaks.centWave}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the peak detection. + the peak detection. Other peak detection methods: \code{\link{chromatographic-peak-detection}}, \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, diff --git a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd index a86b35f24..a51a90a1e 100644 --- a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd +++ b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd @@ -38,7 +38,7 @@ CentWavePredIsoParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, mzIntervalExtension = TRUE, polarity = "unknown") \S4method{findChromPeaks}{OnDiskMSnExp,CentWavePredIsoParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") + BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) \S4method{show}{CentWavePredIsoParam}(object) @@ -143,38 +143,41 @@ Currently not used, but has to be \code{"positive"}, \code{"negative"} or \code{"unknown"} if provided.} \item{object}{For \code{findChromPeaks}: an -\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -experiment-relevant data. + \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all + other experiment-relevant data. -For all other methods: a parameter object.} + For all other methods: a parameter object.} \item{param}{An \code{CentWavePredIsoParam} object with the settings for the chromatographic peak detection algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, peak detection is performed in parallel on several -of the input samples.} +See documentation of the \code{BiocParallel} for more details. If +parallel processing is enabled, peak detection is performed in parallel +on several of the input samples.} \item{return.type}{Character specifying what type of object the method should return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{msLevel}{\code{integer(1)} defining the MS level on which the peak +detection should be performed. Defaults to \code{msLevel = 1}.} + \item{value}{The value for the slot.} } \value{ The \code{CentWavePredIsoParam} function returns a -\code{CentWavePredIsoParam} class instance with all of the settings -specified for the two-step centWave-based peak detection considering also -isotopes. + \code{CentWavePredIsoParam} class instance with all of the settings + specified for the two-step centWave-based peak detection considering also + isotopes. For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the peak detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the peak detection. + \code{\link{XCMSnExp}} object with the results of the peak detection. + If \code{return.type = "list"} a list of length equal to the number of + samples with matrices specifying the identified peaks. + If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object + with the results of the peak detection. } \description{ This method performs a two-step centWave-based chromatographic @@ -192,12 +195,13 @@ The \code{CentWavePredIsoParam} class allows to specify all \code{\link{CentWaveParam}} for all methods and arguments this class inherits. -The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} method -performs a two-step centWave-based chromatographic peak detection on all -samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -data and load the spectra data (mz and intensity values) on the fly from -the original files applying also all eventual data manipulations. +The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} + method performs a two-step centWave-based chromatographic peak detection + on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. + \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment + specific data and load the spectra data (mz and intensity values) on the + fly from the original files applying also all eventual data + manipulations. \code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and setter for the \code{snthreshIsoROIs} slot of the object. @@ -218,9 +222,10 @@ the original files applying also all eventual data manipulations. See \code{\link{centWave}} for details on the centWave method. Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. + be configured either by the \code{BPPARAM} parameter or by globally + defining the parallel processing mode using the + \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} + package. } \section{Slots}{ @@ -255,7 +260,7 @@ The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core \code{CentWavePredIsoParam} extends. \code{\link{XCMSnExp}} for the object containing the results of -the peak detection. + the peak detection. Other peak detection methods: \code{\link{chromatographic-peak-detection}}, \code{\link{findChromPeaks-centWave}}, diff --git a/man/findChromPeaks-massifquant.Rd b/man/findChromPeaks-massifquant.Rd index ee03f4bd2..cf0dc5cf1 100644 --- a/man/findChromPeaks-massifquant.Rd +++ b/man/findChromPeaks-massifquant.Rd @@ -58,7 +58,7 @@ MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, checkBack = 0, withWave = FALSE) \S4method{findChromPeaks}{OnDiskMSnExp,MassifquantParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") + BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) \S4method{show}{MassifquantParam}(object) @@ -212,39 +212,43 @@ with Massifquant are subsequently filtered with the second step of the centWave algorithm, which includes wavelet estimation.} \item{object}{For \code{findChromPeaks}: an -\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -experiment-relevant data. + \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all + other experiment-relevant data. -For all other methods: a parameter object.} + For all other methods: a parameter object.} \item{param}{An \code{MassifquantParam} object containing all settings for the massifquant algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, peak detection is performed in parallel on several -of the input samples.} +See documentation of the \code{BiocParallel} for more details. If +parallel processing is enabled, peak detection is performed in parallel +on several of the input samples.} \item{return.type}{Character specifying what type of object the method should return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{msLevel}{\code{integer(1)} defining the MS level on which the peak +detection should be performed. Defaults to \code{msLevel = 1}.} + \item{value}{The value for the slot.} \item{f}{For \code{integrate}: a \code{MassifquantParam} object.} } \value{ -The \code{MassifquantParam} function returns a \code{MassifquantParam} -class instance with all of the settings specified for chromatographic peak -detection by the \emph{massifquant} method. +The \code{MassifquantParam} function returns a + \code{MassifquantParam} class instance with all of the settings + specified for chromatographic peak detection by the \emph{massifquant} + method. For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the peak detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the peak detection. + \code{\link{XCMSnExp}} object with the results of the peak detection. + If \code{return.type = "list"} a list of length equal to the number of + samples with matrices specifying the identified peaks. + If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object + with the results of the peak detection. } \description{ Massifquant is a Kalman filter (KF)-based chromatographic peak @@ -259,11 +263,13 @@ The \code{MassifquantParam} class allows to specify all should be created with the \code{MassifquantParam} constructor. The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} -method performs chromatographic peak detection using the \emph{massifquant} -algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -data and load the spectra data (mz and intensity values) on the fly from the -original files applying also all eventual data manipulations. + method performs chromatographic peak detection using the + \emph{massifquant} algorithm on all samples from an + \code{\link[MSnbase]{OnDiskMSnExp}} object. + \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment + specific data and load the spectra data (mz and intensity values) on the + fly from the original files applying also all eventual data + manipulations. \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} slot of the object. @@ -329,9 +335,10 @@ This algorithm's performance has been tested rigorously better accuracy. Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. + be configured either by the \code{BPPARAM} parameter or by globally + defining the parallel processing mode using the + \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} + package. } \section{Slots}{ @@ -380,7 +387,7 @@ The \code{\link{do_findChromPeaks_massifquant}} core API function and \code{\link{findPeaks.massifquant}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the peak detection. + the peak detection. Other peak detection methods: \code{\link{chromatographic-peak-detection}}, \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, diff --git a/man/findChromPeaks-matchedFilter.Rd b/man/findChromPeaks-matchedFilter.Rd index 9bac5e9ef..409ac859d 100644 --- a/man/findChromPeaks-matchedFilter.Rd +++ b/man/findChromPeaks-matchedFilter.Rd @@ -55,7 +55,7 @@ MatchedFilterParam(binSize = 0.1, impute = "none", baseValue = numeric(), index = FALSE) \S4method{findChromPeaks}{OnDiskMSnExp,MatchedFilterParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") + BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) \S4method{show}{MatchedFilterParam}(object) @@ -146,39 +146,43 @@ in m/z for peaks with overlapping retention times} returned instead of values for m/z and retention times.} \item{object}{For \code{findChromPeaks}: an -\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -experiment-relevant data. + \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all + other experiment-relevant data. -For all other methods: a parameter object.} + For all other methods: a parameter object.} \item{param}{An \code{MatchedFilterParam} object containing all settings for the matchedFilter algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, peak detection is performed in parallel on several -of the input samples.} +See documentation of the \code{BiocParallel} for more details. If +parallel processing is enabled, peak detection is performed in parallel +on several of the input samples.} \item{return.type}{Character specifying what type of object the method should return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{msLevel}{\code{integer(1)} defining the MS level on which the peak +detection should be performed. Defaults to \code{msLevel = 1}.} + \item{value}{The value for the slot.} \item{x}{For \code{max}: a \code{MatchedFilterParam} object.} } \value{ The \code{MatchedFilterParam} function returns a -\code{MatchedFilterParam} class instance with all of the settings specified -for chromatographic detection by the \emph{matchedFilter} method. + \code{MatchedFilterParam} class instance with all of the settings + specified for chromatographic detection by the \emph{matchedFilter} + method. For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the peak detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the peak detection. + \code{\link{XCMSnExp}} object with the results of the peak detection. + If \code{return.type = "list"} a list of length equal to the number of + samples with matrices specifying the identified peaks. + If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object + with the results of the peak detection. } \description{ The \emph{matchedFilter} algorithm identifies peaks in the @@ -199,11 +203,12 @@ The \code{MatchedFilterParam} class allows to specify all constructor. The \code{findChromPeaks,OnDiskMSnExp,MatchedFilterParam} -method performs peak detection using the \emph{matchedFilter} algorithm -on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -data and load the spectra data (mz and intensity values) on the fly from the -original files applying also all eventual data manipulations. + method performs peak detection using the \emph{matchedFilter} algorithm + on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. + \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment + specific data and load the spectra data (mz and intensity values) on the + fly from the original files applying also all eventual data + manipulations. \code{binSize},\code{binSize<-}: getter and setter for the \code{binSize} slot of the object. @@ -248,9 +253,10 @@ The intensities are binned by the provided m/z values within each \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. + be configured either by the \code{BPPARAM} parameter or by globally + defining the parallel processing mode using the + \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} + package. } \section{Slots}{ @@ -271,8 +277,9 @@ These methods and classes are part of the updated and modernized } \examples{ -## Create a MatchedFilterParam object -mfp <- MatchedFilterParam(binSize = 0.5) +## Create a MatchedFilterParam object. Note that we use a unnecessarily large +## binSize parameter to reduce the run-time of the example. +mfp <- MatchedFilterParam(binSize = 5) ## Change snthresh parameter snthresh(mfp) <- 15 mfp @@ -284,7 +291,7 @@ library(faahKO) library(MSnbase) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) -raw_data <- readMSData2(fls) +raw_data <- readMSData2(fls[1:2]) ## Perform the chromatographic peak detection using the settings defined ## above. Note that we are also disabling parallel processing in this ## example by registering a "SerialParam" @@ -303,7 +310,7 @@ The \code{\link{do_findChromPeaks_matchedFilter}} core API function and \code{\link{findPeaks.matchedFilter}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the chromatographic peak detection. + the chromatographic peak detection. Other peak detection methods: \code{\link{chromatographic-peak-detection}}, \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, diff --git a/man/findPeaks-MSW.Rd b/man/findPeaks-MSW.Rd index a992c2dba..0568e2b37 100644 --- a/man/findPeaks-MSW.Rd +++ b/man/findPeaks-MSW.Rd @@ -57,7 +57,7 @@ MSWParam(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, peakThr = NULL, tuneIn = FALSE, ...) \S4method{findChromPeaks}{OnDiskMSnExp,MSWParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") + BPPARAM = bpparam(), return.type = "XCMSnExp", msLevel = 1L) \S4method{show}{MSWParam}(object) @@ -130,9 +130,9 @@ used in computing the SNR.} of the peak in 2-D CWT coefficient matrix.} \item{peakThr}{numeric(1) with the minimum absolute intensity -(above baseline) of peaks to be picked. If provided, the smoothing function -\code{\link[MassSpecWavelet]{sav.gol}} function is called to estimate the -local intensity.} +(above baseline) of peaks to be picked. If provided, the smoothing +function \code{\link[MassSpecWavelet]{sav.gol}} function is called to +estimate the local intensity.} \item{tuneIn}{logical(1) whther to tune in the parameter estimation of the detected peaks.} @@ -143,37 +143,40 @@ estimation of the detected peaks.} \code{MassSpecWavelet} package.} \item{object}{For \code{findChromPeaks}: an -\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other -experiment-relevant data. + \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all + other experiment-relevant data. -For all other methods: a parameter object.} + For all other methods: a parameter object.} \item{param}{An \code{MSWParam} object containing all settings for the algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, peak detection is performed in parallel on several -of the input samples.} +See documentation of the \code{BiocParallel} for more details. If +parallel processing is enabled, peak detection is performed in parallel +on several of the input samples.} \item{return.type}{Character specifying what type of object the method should return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} +\item{msLevel}{\code{integer(1)} defining the MS level on which the peak +detection should be performed. Defaults to \code{msLevel = 1}.} + \item{value}{The value for the slot.} } \value{ The \code{MSWParam} function returns a \code{MSWParam} -class instance with all of the settings specified for peak detection by -the \emph{MSW} method. + class instance with all of the settings specified for peak detection by + the \emph{MSW} method. For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the peak detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the detection. + \code{\link{XCMSnExp}} object with the results of the peak detection. + If \code{return.type = "list"} a list of length equal to the number of + samples with matrices specifying the identified peaks. + If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object + with the results of the detection. } \description{ Perform peak detection in mass spectrometry @@ -184,12 +187,13 @@ The \code{MSWParam} class allows to specify all created with the \code{MSWParam} constructor. The \code{findChromPeaks,OnDiskMSnExp,MSWParam} -method performs peak detection in single-spectrum non-chromatography MS -data using functionality from the \code{MassSpecWavelet} package on all -samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. -\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific -data and load the spectra data (mz and intensity values) on the fly from the -original files applying also all eventual data manipulations. + method performs peak detection in single-spectrum non-chromatography MS + data using functionality from the \code{MassSpecWavelet} package on all + samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. + \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment + specific data and load the spectra data (mz and intensity values) on the + fly from the original files applying also all eventual data + manipulations. \code{snthresh},\code{snthresh<-}: getter and setter for the \code{snthresh} slot of the object. @@ -236,9 +240,10 @@ This is a wrapper for the peak picker in Bioconductor's \emph{xcmsDirect} vignette for more information. Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. + be configured either by the \code{BPPARAM} parameter or by globally + defining the parallel processing mode using the + \code{\link[BiocParallel]{register}} method from the \code{BiocParallel} + package. } \section{Slots}{ @@ -282,7 +287,7 @@ The \code{\link{do_findPeaks_MSW}} core API function and \code{\link{findPeaks.MSW}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the peak detection. + the peak detection. Other peak detection methods: \code{\link{chromatographic-peak-detection}}, \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, diff --git a/man/findPeaks.MSW-xcmsRaw-method.Rd b/man/findPeaks.MSW-xcmsRaw-method.Rd index f8bf1c692..eec9ff145 100644 --- a/man/findPeaks.MSW-xcmsRaw-method.Rd +++ b/man/findPeaks.MSW-xcmsRaw-method.Rd @@ -25,36 +25,36 @@ should be returned.} } \value{ A matrix, each row representing an intentified peak, with columns: -\describe{ -\item{mz}{m/z value of the peak at the centroid position.} -\item{mzmin}{Minimum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rt}{Always \code{-1}.} -\item{rtmin}{Always \code{-1}.} -\item{rtmax}{Always \code{-1}.} -\item{into}{Integrated (original) intensity of the peak.} -\item{maxo}{Maximum intensity of the peak.} -\item{intf}{Always \code{NA}.} -\item{maxf}{Maximum MSW-filter response of the peak.} -\item{sn}{Signal to noise ratio.} -} + \describe{ + \item{mz}{m/z value of the peak at the centroid position.} + \item{mzmin}{Minimum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rt}{Always \code{-1}.} + \item{rtmin}{Always \code{-1}.} + \item{rtmax}{Always \code{-1}.} + \item{into}{Integrated (original) intensity of the peak.} + \item{maxo}{Maximum intensity of the peak.} + \item{intf}{Always \code{NA}.} + \item{maxf}{Maximum MSW-filter response of the peak.} + \item{sn}{Signal to noise ratio.} + } } \description{ This method performs peak detection in mass spectrometry -direct injection spectrum using a wavelet based algorithm. + direct injection spectrum using a wavelet based algorithm. } \details{ This is a wrapper around the peak picker in Bioconductor's -\code{MassSpecWavelet} package calling -\code{\link[MassSpecWavelet]{peakDetectionCWT}} and -\code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. + \code{MassSpecWavelet} package calling + \code{\link[MassSpecWavelet]{peakDetectionCWT}} and + \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. } \seealso{ \code{\link{MSW}} for the new user interface, -\code{\link{do_findPeaks_MSW}} for the downstream analysis -function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the -\code{MassSpecWavelet} for details on the algorithm and additionally supported -parameters. + \code{\link{do_findPeaks_MSW}} for the downstream analysis + function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the + \code{MassSpecWavelet} for details on the algorithm and additionally + supported parameters. } \author{ Joachim Kutzera, Steffen Neumann, Johannes Rainer diff --git a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd index 8d0d228dd..b29a6f3e5 100644 --- a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd +++ b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd @@ -42,46 +42,48 @@ in m/z for peaks with overlapping retention times} \item{index}{\code{logical(1)} specifying whether indicies should be returned instead of values for m/z and retention times.} -\item{sleep}{(DEFUNCT). This parameter is no longer functional, as it would cause -problems in parallel processing mode.} +\item{sleep}{(DEPRECATED). The use of this parameter is highly discouraged, +as it could cause problems in parallel processing mode.} -\item{scanrange}{Numeric vector defining the range of scans to which the original -\code{object} should be sub-setted before peak detection.} +\item{scanrange}{Numeric vector defining the range of scans to which the +original \code{object} should be sub-setted before peak detection.} } \value{ A matrix, each row representing an intentified chromatographic peak, -with columns: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -\item{mzmin}{Minimum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rt}{Retention time of the peak's midpoint.} -\item{rtmin}{Minimum retention time of the peak.} -\item{rtmax}{Maximum retention time of the peak.} -\item{into}{Integrated (original) intensity of the peak.} -\item{intf}{Integrated intensity of the filtered peak.} -\item{maxo}{Maximum intensity of the peak.} -\item{maxf}{Maximum intensity of the filtered peak.} -\item{i}{Rank of peak in merged EIC (\code{<= max}).} -\item{sn}{Signal to noise ratio of the peak.} -} + with columns: + \describe{ + \item{mz}{Intensity weighted mean of m/z values of the peak across + scans.} + \item{mzmin}{Minimum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rt}{Retention time of the peak's midpoint.} + \item{rtmin}{Minimum retention time of the peak.} + \item{rtmax}{Maximum retention time of the peak.} + \item{into}{Integrated (original) intensity of the peak.} + \item{intf}{Integrated intensity of the filtered peak.} + \item{maxo}{Maximum intensity of the peak.} + \item{maxf}{Maximum intensity of the filtered peak.} + \item{i}{Rank of peak in merged EIC (\code{<= max}).} + \item{sn}{Signal to noise ratio of the peak.} + } } \description{ Find peaks in the chromatographic time domain of the -profile matrix. For more details see -\code{\link{do_findChromPeaks_matchedFilter}}. + profile matrix. For more details see + \code{\link{do_findChromPeaks_matchedFilter}}. } \references{ Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and -Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite -Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -\emph{Anal. Chem.} 2006, 78:779-787. + Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite + Profiling Using Nonlinear Peak Alignment, Matching, and Identification" + \emph{Anal. Chem.} 2006, 78:779-787. + @family Old peak detection methods } \seealso{ \code{\link{matchedFilter}} for the new user interface. -\code{\linkS4class{xcmsRaw}}, -\code{\link{do_findChromPeaks_matchedFilter}} for the core function -performing the peak detection. + \code{\linkS4class{xcmsRaw}}, + \code{\link{do_findChromPeaks_matchedFilter}} for the core function + performing the peak detection. } \author{ Colin A. Smith diff --git a/man/groupChromPeaks-density.Rd b/man/groupChromPeaks-density.Rd index 1c8b2bd42..1edad87ba 100644 --- a/man/groupChromPeaks-density.Rd +++ b/man/groupChromPeaks-density.Rd @@ -99,8 +99,8 @@ the peak grouping algorithm.} } \value{ The \code{PeakDensityParam} function returns a -\code{PeakDensityParam} class instance with all of the settings -specified for chromatographic peak alignment based on peak densities. + \code{PeakDensityParam} class instance with all of the settings + specified for chromatographic peak alignment based on peak densities. For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the results of the correspondence analysis. The definition of the resulting diff --git a/man/groupChromPeaks-mzClust.Rd b/man/groupChromPeaks-mzClust.Rd index 69ae1f38d..f95ae7014 100644 --- a/man/groupChromPeaks-mzClust.Rd +++ b/man/groupChromPeaks-mzClust.Rd @@ -81,8 +81,8 @@ the peak grouping algorithm.} } \value{ The \code{MzClustParam} function returns a -\code{MzClustParam} class instance with all of the settings -specified for high resolution single spectra peak alignment. + \code{MzClustParam} class instance with all of the settings + specified for high resolution single spectra peak alignment. For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the results of the peak grouping step (i.e. the features). These can be diff --git a/man/groupChromPeaks-nearest.Rd b/man/groupChromPeaks-nearest.Rd index 9af5b51ce..a8aeb7a28 100644 --- a/man/groupChromPeaks-nearest.Rd +++ b/man/groupChromPeaks-nearest.Rd @@ -82,8 +82,8 @@ the peak grouping algorithm.} } \value{ The \code{NearestPeaksParam} function returns a -\code{NearestPeaksParam} class instance with all of the settings -specified for peak alignment based on peak proximity. + \code{NearestPeaksParam} class instance with all of the settings + specified for peak alignment based on peak proximity. For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the results of the peak grouping/correspondence step (i.e. the mz-rt @@ -147,7 +147,7 @@ p <- NearestPeaksParam(kNN = 3) p ############################## -## Chromatographi peak detection and grouping. +## Chromatographic peak detection and grouping. ## ## Below we perform first a chromatographic peak detection (using the ## matchedFilter method) on some of the test files from the faahKO package diff --git a/man/highlightChromPeaks.Rd b/man/highlightChromPeaks.Rd new file mode 100644 index 000000000..35c5e5441 --- /dev/null +++ b/man/highlightChromPeaks.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{highlightChromPeaks} +\alias{highlightChromPeaks} +\title{Add definition of chromatographic peaks to an extracted chromatogram + plot} +\usage{ +highlightChromPeaks(x, rt, mz, border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), ...) +} +\arguments{ +\item{x}{For \code{highlightChromPeaks}: \code{XCMSnExp} object with the +detected peaks.} + +\item{rt}{For \code{highlightChromPeaks}: \code{numeric(2)} with the +retention time range from which peaks should be extracted and plotted.} + +\item{mz}{\code{numeric(2)} with the mz range from which the peaks should +be extracted and plotted.} + +\item{border}{colors to be used to color the border of the rectangles. Has to +be equal to the number of samples in \code{x}.} + +\item{lwd}{\code{numeric(1)} defining the width of the line/border.} + +\item{col}{For \code{highlightChromPeaks}: color to be used to fill the +rectangle.} + +\item{type}{the plotting type. See \code{\link[graphics]{plot}} for more +details. +For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +should be highlighted: \code{type = "rect"} draws a rectangle +representing the peak definition, \code{type = "point"} indicates a +chromatographic peak with a single point at the position of the peak's +\code{"rt"} and \code{"maxo"}.} + +\item{...}{additional parameters to the \code{\link{matplot}} or \code{plot} +function.} +} +\description{ +The \code{highlightChromPeaks} function adds chromatographic + peak definitions to an existing plot, such as one created by the + \code{plot} method on a \code{\link[MSnbase]{Chromatogram}} or + \code{\link[MSnbase]{Chromatograms}} object. +} +\examples{ + +## Read some files from the faahKO package. +library(xcms) +library(faahKO) +faahko_3_files <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_3_files) + +## Peak detection using the 'matchedFilter' method. Note that we are using a +## larger binSize to reduce the runtime of the example. +xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) + +## Extract the ion chromatogram for one chromatographic peak in the data. +chrs <- chromatogram(xod, rt = c(2700, 2900), mz = 335) + +plot(chrs) + +## Extract chromatographic peaks for the mz/rt range (if any). +chromPeaks(xod, rt = c(2700, 2900), mz = 335) + +## Highlight the chromatographic peaks in the area +highlightChromPeaks(xod, rt = c(2700, 2900), mz = 335) +} +\author{ +Johannes Rainer +} diff --git a/man/plotChromPeakDensity.Rd b/man/plotChromPeakDensity.Rd index 9fe333079..4909403c2 100644 --- a/man/plotChromPeakDensity.Rd +++ b/man/plotChromPeakDensity.Rd @@ -71,8 +71,9 @@ fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, ## Reading 2 of the KO samples raw_data <- readMSData2(fls[1:2]) -## Perform the peak detection using the centWave method. -res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 1000)) +## Perform the peak detection using the centWave method (settings are tuned +## to speed up example execution) +res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 3000, snthresh = 40)) ## Align the samples using obiwarp res <- adjustRtime(res, param = ObiwarpParam()) diff --git a/man/plotChromPeaks.Rd b/man/plotChromPeaks.Rd new file mode 100644 index 000000000..81c1fe1db --- /dev/null +++ b/man/plotChromPeaks.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{plotChromPeaks} +\alias{plotChromPeaks} +\alias{plotChromPeakImage} +\title{General visualizations of peak detection results} +\usage{ +plotChromPeaks(x, file = 1, xlim = NULL, ylim = NULL, add = FALSE, + border = "#00000060", col = NA, xlab = "retention time", ylab = "mz", + main = NULL, ...) + +plotChromPeakImage(x, binSize = 30, xlim = NULL, log = FALSE, + xlab = "retention time", yaxt = par("yaxt"), + main = "Chromatographic peak counts", ...) +} +\arguments{ +\item{x}{\code{\link{XCMSnExp}} object.} + +\item{file}{For \code{plotChromPeaks}: \code{numeric(1)} specifying the +index of the file within \code{x} for which the plot should be created. +Defaults to \code{1}.} + +\item{xlim}{\code{numeric(2)} specifying the x-axis limits (retention time +dimension). Defaults to \code{NULL} in which case the full retention +time range of the file is used.} + +\item{ylim}{For \code{plotChromPeaks}: \code{numeric(2)} specifying the +y-axis limits (mz dimension). Defaults to \code{NULL} in which case the +full mz range of the file is used.} + +\item{add}{For \code{plotChromPeaks}: \code{logical(1)} whether the plot +should be added or created as a new plot.} + +\item{border}{For \code{plotChromPeaks}: the color for the rectangles' +border.} + +\item{col}{For \code{plotChromPeaks}: the color to be used to fill the +rectangles.} + +\item{xlab}{\code{character(1)} defining the x-axis label.} + +\item{ylab}{For \code{plotChromPeaks}: \code{character(1)} defining the +y-axis label.} + +\item{main}{\code{character(1)} defining the plot title. By default (i.e. +\code{main = NULL} the name of the file will be used as title.} + +\item{...}{Additional arguments passed to the \code{plot} (for +\code{plotChromPeaks}) and \code{image} (for +\code{plotChromPeakImage}) functions. Ignored if \code{add = TRUE}.} + +\item{binSize}{For \code{plotChromPeakImage}: \code{numeric(1)} defining the +size of the bins along the x-axis (retention time). Defaults to +\code{binSize = 30}, peaks within each 30 seconds will thus counted and +plotted.} + +\item{log}{For \code{plotChromPeakImage}: \code{logical(1)} whether the peak +counts should be log2 transformed before plotting.} + +\item{yaxt}{For \code{plotChromPeakImage}: \code{character(1)} defining +whether y-axis labels should be added. To disable the y-axis use +\code{yaxt = "n"}. For any other value of \code{yaxt} the axis will be +drawn. See \code{par} help page for more details.} +} +\description{ +\code{plotChromPeakImage} plots the identified chromatographic + peaks from one file into the plane spanned by the retention time and mz + dimension (x-axis representing the retention time and y-axis mz). + Each chromatographic peak is plotted as a rectangle representing its + width in rt and mz dimension. + + This plot is supposed to provide some initial overview of the + chromatographic peak detection results. + +\code{plotChromPeakImage} plots the number of detected peaks for + each sample along the retention time axis as an \emph{image} plot, i.e. + with the number of peaks detected in each bin along the retention time + represented with the color of the respective cell. +} +\details{ +The width and line type of the rectangles indicating the detected + chromatographic peaks for the \code{plotChromPeaks} function can be + specified using the \code{par} function, i.e. with \code{par(lwd = 3)} + and \code{par(lty = 2)}, respectively. +} +\examples{ + +## Perform peak detection on two files from the faahKO package. +library(xcms) +library(faahKO) +faahko_file <- c(system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_file) + +## Peak detection using the 'matchedFilter' method. Note that we are using a +## larger binSize to reduce the runtime of the example. +xod <- findChromPeaks(od, param = MatchedFilterParam(binSize = 0.3, snthresh = 20)) + +## plotChromPeakImage: plot an image for the identified peaks per file +plotChromPeakImage(xod) + +## Show all detected chromatographic peaks from the first file +plotChromPeaks(xod) + +## Plot all detected peaks from the second file and restrict the plot to a +## mz-rt slice +plotChromPeaks(xod, file = 2, xlim = c(3500, 3600), ylim = c(400, 600)) +} +\seealso{ +\code{\link{highlightChromPeaks}} for the function to highlight + detected chromatographic peaks in extracted ion chromatogram plots. +} +\author{ +Johannes Rainer +} diff --git a/man/plotChromatogram.Rd b/man/plotChromatogram.Rd deleted file mode 100644 index 03ff29dba..000000000 --- a/man/plotChromatogram.Rd +++ /dev/null @@ -1,115 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/functions-Chromatogram.R -\name{plotChromatogram} -\alias{plotChromatogram} -\alias{highlightChromPeaks} -\title{Plot Chromatogram objects} -\usage{ -plotChromatogram(x, rt, col = "#00000060", lty = 1, type = "l", - xlab = "retention time", ylab = "intensity", main = NULL, ...) - -highlightChromPeaks(x, rt, mz, border = rep("00000040", length(fileNames(x))), - lwd = 1, col = NA, type = c("rect", "point"), ...) -} -\arguments{ -\item{x}{For \code{plotChromatogram}: \code{list} of -\code{\link{Chromatogram}} objects. Such as extracted from an -\code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} -method. -For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected -peaks.} - -\item{rt}{For \code{plotChromatogram}: \code{numeric(2)}, optional parameter -to subset each \code{Chromatogram} by retention time prior to plotting. -Alternatively, the plot could be subsetted by passing a \code{xlim} -parameter. -For \code{highlightChromPeaks}: \code{numeric(2)} with the -retention time range from which peaks should be extracted and plotted.} - -\item{col}{For \code{plotChromatogram}: color definition for each -line/sample. Has to have the same length as samples/elements in \code{x}, -otherwise \code{col[1]} is recycled to generate a vector of -\code{length(x)}. -For \code{highlightChromPeaks}: color to be used to fill the -rectangle.} - -\item{lty}{the line type. See \code{\link[graphics]{plot}} for more details.} - -\item{type}{the plotting type. See \code{\link[graphics]{plot}} for more -details. -For \code{highlightChromPeaks}: \code{character(1)} defining how the peak -should be highlighted: \code{type = "rect"} draws a rectangle -representing the peak definition, \code{type = "point"} indicates a -chromatographic peak with a single point at the position of the peak's -\code{"rt"} and \code{"maxo"}.} - -\item{xlab}{\code{character(1)} with the label for the x-axis.} - -\item{ylab}{\code{character(1)} with the label for the y-axis.} - -\item{main}{The title for the plot. For \code{plotChromatogram}: if -\code{main = NULL} the mz range of the \code{Chromatogram} object(s) will -be used as the title.} - -\item{...}{additional parameters to the \code{\link{matplot}} or \code{plot} -function.} - -\item{mz}{\code{numeric(2)} with the mz range from which the peaks should -be extracted and plotted.} - -\item{border}{colors to be used to color the border of the rectangles. Has to -be equal to the number of samples in \code{x}.} - -\item{lwd}{\code{numeric(1)} defining the width of the line/border.} -} -\description{ -\code{plotChromatogram} creates a chromatogram plot for a - single \code{Chromatogram} object or a \code{list} of - \code{\link{Chromatogram}} objects (one line for each - \code{\link{Chromatogram}}/sample). - -The \code{highlightChromPeaks} function adds chromatographic - peak definitions to an existing plot, such as one created by the - \code{plotChromatograms} function. -} -\details{ -The \code{plotChromatogram} function allows to efficiently plot - the chromatograms of several samples into a single plot. -} -\examples{ - -## Perform a fast peak detection. -library(xcms) -library(faahKO) -faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO")) - -od <- readMSData2(faahko_3_files) - -od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) - -rtr <- c(2600, 2750) -mzr <- c(344, 344) -chrs <- extractChromatograms(od, rt = rtr, mz = mzr) - -## Plot a single chromatogram -plotChromatogram(chrs[[1]]) - -## Plot all chromatograms at once, using different colors for each. -plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) - -## Highlight identified chromatographic peaks. -highlightChromPeaks(od, rt = rtr, mz = mzr, - col = c("#FF000005", "#00FF0005", "#0000FF05"), - border = c("#FF000040", "#00FF0040", "#0000FF40")) - -} -\seealso{ -\code{\link{extractChromatograms}} for how to extract a list of - \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} - objects. -} -\author{ -Johannes Rainer -} diff --git a/man/profMat-xcmsSet.Rd b/man/profMat-xcmsSet.Rd index 8b15474d9..c15ca9b10 100644 --- a/man/profMat-xcmsSet.Rd +++ b/man/profMat-xcmsSet.Rd @@ -21,62 +21,67 @@ section for more information.} \item{step}{numeric(1) representing the m/z bin size.} \item{baselevel}{numeric(1) representing the base value to which -empty elements (i.e. m/z bins without a measured intensity) should be set. -Only considered if \code{method = "binlinbase"}. See \code{baseValue} -parameter of \code{\link{imputeLinInterpol}} for more details.} +empty elements (i.e. m/z bins without a measured intensity) should be +set. Only considered if \code{method = "binlinbase"}. See +\code{baseValue} parameter of \code{\link{imputeLinInterpol}} for more +details.} \item{basespace}{numeric(1) representing the m/z length after -which the signal will drop to the base level. Linear interpolation will be -used between consecutive data points falling within \code{2 * basespace} to -each other. Only considered if \code{method = "binlinbase"}. If not -specified, it defaults to \code{0.075}. Internally this parameter is -translated into the \code{distance} parameter of the -\code{\link{imputeLinInterpol}} function by -\code{distance = floor(basespace / step)}. See \code{distance} parameter -of \code{\link{imputeLinInterpol}} for more details.} +which the signal will drop to the base level. Linear interpolation will +be used between consecutive data points falling within +\code{2 * basespace} to each other. Only considered if +\code{method = "binlinbase"}. If not specified, it defaults to +\code{0.075}. Internally this parameter is translated into the +\code{distance} parameter of the \code{\link{imputeLinInterpol}} +function by \code{distance = floor(basespace / step)}. See +\code{distance} parameter of \code{\link{imputeLinInterpol}} for more +details.} \item{mzrange.}{Optional numeric(2) manually specifying the mz value range to be used for binnind. If not provided, the whole mz value range is used.} } \value{ \code{profMat} returns the profile matrix (rows representing scans, -columns equally spaced m/z values). + columns equally spaced m/z values). } \description{ The \emph{profile} matrix is an n x m matrix, n (rows) -representing equally spaced m/z values (bins) and m (columns) the -retention time of the corresponding scans. Each cell contains the maximum -intensity measured for the specific scan and m/z values falling within the -m/z bin. + representing equally spaced m/z values (bins) and m (columns) the + retention time of the corresponding scans. Each cell contains the maximum + intensity measured for the specific scan and m/z values falling within + the m/z bin. -The \code{profMat} method creates a new profile matrix or returns the -profile matrix within the object's \code{@env} slot, if available. Settings -for the profile matrix generation, such as \code{step} (the bin size), -\code{method} or additional settings are extracted from the respective slots -of the \code{\linkS4class{xcmsRaw}} object. Alternatively it is possible to -specify all of the settings as additional parameters. + The \code{profMat} method creates a new profile matrix or returns the + profile matrix within the object's \code{@env} slot, if available. + Settings for the profile matrix generation, such as \code{step} (the bin + size), \code{method} or additional settings are extracted from the + respective slots of the \code{\linkS4class{xcmsRaw}} object. + Alternatively it is possible to specify all of the settings as + additional parameters. } \details{ Profile matrix generation methods: -\describe{ -\item{bin}{The default profile matrix generation method that does a simple -binning, i.e. aggregating of intensity values falling within an m/z bin.} -\item{binlin}{Binning followed by linear interpolation to impute missing -values. The value for m/z bins without a measured intensity are inferred by -a linear interpolation between neighboring bins with a measured intensity.} -\item{binlinbase}{Binning followed by a linear interpolation to impute -values for empty elements (m/z bins) within a user-definable proximity to -non-empty elements while stetting the element's value to the -\code{baselevel} otherwise. See \code{impute = "linbase"} parameter of -\code{\link{imputeLinInterpol}} for more details.} -\item{intlin}{Set the elements' values to the integral of the linearly -interpolated data from plus to minus half the step size.} -} + \describe{ + \item{bin}{The default profile matrix generation method that does a + simple binning, i.e. aggregating of intensity values falling within an + m/z bin.} + \item{binlin}{Binning followed by linear interpolation to impute missing + values. The value for m/z bins without a measured intensity are inferred + by a linear interpolation between neighboring bins with a measured + intensity.} + \item{binlinbase}{Binning followed by a linear interpolation to impute + values for empty elements (m/z bins) within a user-definable proximity to + non-empty elements while stetting the element's value to the + \code{baselevel} otherwise. See \code{impute = "linbase"} parameter of + \code{\link{imputeLinInterpol}} for more details.} + \item{intlin}{Set the elements' values to the integral of the linearly + interpolated data from plus to minus half the step size.} + } } \note{ From \code{xcms} version 1.51.1 on only the \code{profMat} method -should be used to extract the profile matrix instead of the previously -default way to access it directly \emph{via} \code{object@env$profile}. + should be used to extract the profile matrix instead of the previously + default way to access it directly \emph{via} \code{object@env$profile}. } \examples{ file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") @@ -97,10 +102,10 @@ all.equal(profmat, profmat_2) } \seealso{ \code{\linkS4class{xcmsRaw}}, \code{\link{binYonX}} and -\code{\link{imputeLinInterpol}} for the employed binning and -missing value imputation methods, respectively. -\code{\link{profMat,XCMSnExp-method}} for the method on \code{\link{XCMSnExp}} -objects. + \code{\link{imputeLinInterpol}} for the employed binning and + missing value imputation methods, respectively. + \code{\link{profMat,XCMSnExp-method}} for the method on + \code{\link{XCMSnExp}} objects. } \author{ Johannes Rainer diff --git a/man/showError-xcmsSet-method.Rd b/man/showError-xcmsSet-method.Rd index 65d9d3f90..d841f8df9 100644 --- a/man/showError-xcmsSet-method.Rd +++ b/man/showError-xcmsSet-method.Rd @@ -18,14 +18,14 @@ error itself should be returned.} } \value{ A list of error messages (if \code{message. = TRUE}) or errors or an -empty list if no errors are present. + empty list if no errors are present. } \description{ If peak detection is performed with \code{\link{findPeaks}} -setting argument \code{stopOnError = FALSE} eventual errors during the -process do not cause to stop the processing but are recorded inside of the -resulting \code{\linkS4class{xcmsSet}} object. These errors can be accessed -with the \code{showError} method. + setting argument \code{stopOnError = FALSE} eventual errors during the + process do not cause to stop the processing but are recorded inside of + the resulting \code{\linkS4class{xcmsSet}} object. These errors can be + accessed with the \code{showError} method. } \author{ Johannes Rainer diff --git a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd index 62c25d8fa..702085deb 100644 --- a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd +++ b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd @@ -11,7 +11,8 @@ \arguments{ \item{x}{The \code{\linkS4class{xcmsRaw}} object that should be sub-setted.} -\item{i}{Integer or logical vector specifying the scans/spectra to which \code{x} should be sub-setted.} +\item{i}{Integer or logical vector specifying the scans/spectra to which +\code{x} should be sub-setted.} \item{j}{Not supported.} @@ -22,16 +23,16 @@ The sub-setted \code{\linkS4class{xcmsRaw}} object. } \description{ Subset an \code{\linkS4class{xcmsRaw}} object by scans. The -returned \code{\linkS4class{xcmsRaw}} object contains values for all scans -specified with argument \code{i}. Note that the \code{scanrange} slot of the -returned \code{xcmsRaw} will be \code{c(1, length(object@scantime))} and -hence not \code{range(i)}. + returned \code{\linkS4class{xcmsRaw}} object contains values for all + scans specified with argument \code{i}. Note that the \code{scanrange} + slot of the returned \code{xcmsRaw} will be + \code{c(1, length(object@scantime))} and hence not \code{range(i)}. } \details{ Only subsetting by scan index in increasing order or by a logical -vector are supported. If not ordered, argument \code{i} is sorted -automatically. Indices which are larger than the total number of scans -are discarded. + vector are supported. If not ordered, argument \code{i} is sorted + automatically. Indices which are larger than the total number of scans + are discarded. } \examples{ ## Load a test file diff --git a/man/updateObject-xcmsSet-method.Rd b/man/updateObject-xcmsSet-method.Rd index 18eea6020..726025943 100644 --- a/man/updateObject-xcmsSet-method.Rd +++ b/man/updateObject-xcmsSet-method.Rd @@ -20,7 +20,7 @@ the input object. } \description{ This method updates an \emph{old} \code{\linkS4class{xcmsSet}} -object to the latest definition. + object to the latest definition. } \author{ Johannes Rainer diff --git a/man/xcms-deprecated.Rd b/man/xcms-deprecated.Rd index d2203817f..b7328b11b 100644 --- a/man/xcms-deprecated.Rd +++ b/man/xcms-deprecated.Rd @@ -1,5 +1,9 @@ \name{xcms-deprecated} \alias{xcms-deprecated} +\alias{extractChromatograms} +\alias{extractChromatograms,OnDiskMSnExp-method} +\alias{extractChromatograms,XCMSnExp-method} +\alias{plotChromatogram} \title{Deprecated functions in package \sQuote{xcms}} \description{ @@ -18,7 +22,14 @@ \item{\code{profBin}, \code{profBinM}, \code{profBinLin}, \code{profBinLinM}, \code{profBinLinBase}, \code{profBinLinBaseM} have been deprecated and \code{\link{binYonX}}} in combination - with \code{\link{imputeLinInterpol}} should be used instead. + with \code{\link{imputeLinInterpol}} should be used instead. + + \item{\code{extractChromatograms}}: replaced by \code{\link{chromatogram}}. + + \item{\code{plotChromatogram}}: replaced by \code{plot} method for + \code{\link[MSnbase]{Chromatogram}} or + \code{\link[MSnbase]{Chromatograms}} objects. + } } diff --git a/tests/doRUnit.R b/tests/doRUnit.R index a7f0f5c44..5a96aadb1 100644 --- a/tests/doRUnit.R +++ b/tests/doRUnit.R @@ -22,9 +22,13 @@ if(require("RUnit", quietly=TRUE)) { attr(faahko, "filepaths") <- sapply(as.list(basename(attr(faahko, "filepaths"))), function(x) system.file("cdf", if (length(grep("ko",x)) > 0) "KO" else "WT" ,x, package = "faahKO")) - ## Disable parallel processing for the unit tests library(BiocParallel) - register(SerialParam()) + if (.Platform$OS.type == "unix") { + prm <- MulticoreParam() + } else { + prm <- SnowParam() + } + register(bpstart(prm)) ## Create some objects we can re-use in different tests: ## Needed in runit.XCMSnExp.R @@ -42,6 +46,7 @@ if(require("RUnit", quietly=TRUE)) { snthresh = 40)) faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), method = "centWave", noise = 10000, snthresh = 40) + ## faahko_xod <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 5000)) ## faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), ## method = "centWave", noise = 5000) @@ -51,6 +56,10 @@ if(require("RUnit", quietly=TRUE)) { xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) + + faahko_grouped_filled <- fillPeaks(group(faahko)) + faahko_grouped_retcor_filled <- fillPeaks(group(retcor(group( + updateObject(faahko))))) ## microtofq library(msdata) diff --git a/vignettes/new_functionality.Rmd b/vignettes/new_functionality.Rmd index e658787f0..c1af153ab 100644 --- a/vignettes/new_functionality.Rmd +++ b/vignettes/new_functionality.Rmd @@ -27,10 +27,15 @@ references: This document describes new functionality and changes to existing functionality in the `xcms` package introduced during the update to version *3*. -```{r message = FALSE, warning = FALSE} +```{r message = FALSE, warning = FALSE } library(xcms) library(RColorBrewer) -register(SerialParam()) +## Use socket based parallel processing on Windows systems +if (.Platform$OS.type == "unix") { + register(bpstart(MulticoreParam(2))) +} else { + register(bpstart(SnowParam(2))) +} ``` @@ -69,10 +74,12 @@ an LS/GC-MS experiment are referred to as *chromatographic peaks*. The respectiv method to identify such peaks is hence called `findChromPeaks` and the identified peaks can be accessed using the `XCMSnExp` `chromPeaks` method. The results from an correspondence analysis which aims to match and group chromatographic peaks -within and between samples are called *features*. The definition of such mz-rt -features (i.e. the result from the `groupChromPeaks` method) can be accessed *via* -the `featureDefinitions` method of the `XCMSnExp` class. Finally, alignment -(retention time correction) can be performed using the `adjustRtime` method. +within and between samples are called *features*. A feature corresponds to +individual ions with a unique mass-to-charge ratio (mz) and a unique retention +time (rt). The definition of such mz-rt features (i.e. the result from the +`groupChromPeaks` method) can be accessed *via* the `featureDefinitions` method of +the `XCMSnExp` class. Finally, alignment (retention time correction) can be +performed using the `adjustRtime` method. The settings for any of the new analysis methods are bundled in *parameter* classes, one class for each method. This encapsulation of the parameters to a @@ -88,7 +95,7 @@ object and hence is no longer required in the analysis function. The example below illustrates the new user interface. First we load the raw data files from the `faahKO` package using the `readMSData2` from the `MSnbase` package. -```{r message = FALSE, warning = FALSE} +```{r message = FALSE, warning = FALSE } ## Reading the raw data using the MSnbase package library(xcms) ## Load 6 of the CDF files from the faahKO @@ -104,7 +111,38 @@ pheno <- data.frame(sample_name = sub(basename(cdf_files), pattern = ".CDF", sample_group = s_groups, stringsAsFactors = FALSE) ## Read the data. -raw_data <- readMSData2(cdf_files, pdata = new("NAnnotatedDataFrame", pheno)) +raw_data <- readMSData2(cdf_files, pdata = new("NAnnotatedDataFrame", pheno)) +``` + +The `OnDiskMSnExp` organizes the MS data by spectrum and provides th methods +`intensity`, `mz` and `rtime` to access the raw data from the files (the measured +intensity values, the corresponding m/z and retention time values). In addition, +the `spectra` method could be used to return all data encapsulated in `Spectrum` +classes. Below we extract the retention time values from the object. + +```{r message = FALSE } +head(rtime(raw_data)) + +class(rtime(raw_data)) +``` + +All data is returned as one-dimensional vectors (a numeric vector for `rtime` and +a `list` of numeric vectors for `mz` and `intensity`, each containing the values from +one spectrum), even if the experiment consists of multiple files/samples. The +`fromFile` returns a numeric vector that provides the mapping of the values to the +originating file. Below we use the `fromFile` indices to organize the `mz` values by +file. + +```{r message = FALSE } +mzs <- mz(raw_data) + +class(mzs) +length(mzs) + +## Split the list by file +mzs_by_file <- split(mzs, f = fromFile(raw_data)) + +length(mzs_by_file) ``` We next plot the total ion chromatogram (TIC) for all files within the @@ -113,49 +151,55 @@ using the `filterFile` method, which, for `OnDiskMSnExp` objects, is an efficien way to subset the data while ensuring that all data, including metadata, stays consistent. -```{r faahKO-tic, message = FALSE, fig.align = 'center', fig.width = 8, fig.height = 4} +```{r faahKO-tic, message = FALSE, fig.align = 'center', fig.width = 8, fig.height = 4 } library(RColorBrewer) sample_colors <- brewer.pal(3, "Set1")[1:2] names(sample_colors) <- c("KO", "WT") ## Subset the full raw data by file and plot the data. tmp <- filterFile(raw_data, file = 1) plot(x = rtime(tmp), y = tic(tmp), xlab = "retention time", ylab = "TIC", - col = paste0(sample_colors[pData(tmp)$sample_group], 80), type = "l") + col = paste0(sample_colors[tmp$sample_group], 80), type = "l") for (i in 2:length(fileNames(raw_data))) { tmp <- filterFile(raw_data, file = i) points(rtime(tmp), tic(tmp), type = "l", - col = paste0(sample_colors[pData(tmp)$sample_group], 80)) + col = paste0(sample_colors[tmp$sample_group], 80)) } -legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) +legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) ``` -Alternatively we can use the `extractChromatograms` method that extracts +Alternatively we can use the `chromatogram` method that extracts chromatograms from the object. In the example below we extract the *base peak chromatogram* (BPC) by setting `aggregationFun` to `"max"` and not specifying an `rt` -or `mz` range to extract only a data subset. In contrast to the `tic` and `bpi` +or `mz` range hence extracting the full data. In contrast to the `tic` and `bpi` methods, this function reads the data from the raw files. It takes thus more time to create the plot, but it is based on the actual raw data that is used for the later analysis - the `tic` and `bpi` methods access only the information that is stored in the raw data files by the MS detector during the data acquisition. -```{r faahKO-bpi, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +The `chromatogram` method returns a `Chromatograms` object which is an extension to +the base `matrix` class allowing to arrange multiple `Chromatogram` objects in a +two-dimensional grid. Columns in the `Chromatograms` object represent samples, +rows m/z x rt ranges. + +```{r faahKO-bpi, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } ## Get the base peak chromatograms. This reads data from the files. -bpis <- extractChromatograms(raw_data, aggregationFun = "max") -## Plot the list of Chromatogram objects. -plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) +bpis <- chromatogram(raw_data, aggregationFun = "max") +## Plot all chromatograms. +plot(bpis, col = paste0(sample_colors[raw_data$sample_group], 80)) + ``` -While the `plotChromatogram` function if very convenient (and fast), it would also -not be too difficult to create the plot manually: +While the `plot` method if very convenient (and fast), it would also not be too +difficult to create the plot manually: -```{r faahKO-bbpi-manual, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} -plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), - ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", +```{r faahKO-bbpi-manual, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } +plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis[1, ], rtime))), + ylim = range(unlist(lapply(bpis[1, ], intensity))), main = "BPC", xlab = "rtime", ylab = "intensity") -for (i in 1:length(bpis)) { - points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", - col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) -} +for (i in 1:ncol(bpis)) { + points(rtime(bpis[1, i]), intensity(bpis[1, i]), type = "l", + col = paste0(sample_colors[raw_data$sample_group[i]], 80)) +} ``` Note that we could restrict the analysis to a certain retention time range by @@ -166,11 +210,11 @@ contrast to sub-setting the object we split the numeric vector returned by the `tic` by file using the `fromFile` method that provides the mapping of the experiment's spectra to the originating files. -```{r faahKO-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +```{r faahKO-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } ## Get the total ion current by file tc <- split(tic(raw_data), f = fromFile(raw_data)) -boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), - ylab = "intensity", main = "Total ion current") +boxplot(tc, col = paste0(sample_colors[raw_data$sample_group], 80), + ylab = "intensity", main = "Total ion current") ``` The `tic` (and for mzML files) the `bpi` methods are very fast, even for large data @@ -185,10 +229,10 @@ Next we perform the chromatographic peak detection using the *centWave* algorith parameters, but the settings should be adjusted to each experiment individually based on e.g. the expected width of the chromatographic peaks etc. -```{r faahKO-centWave, message = FALSE, warning = FALSE} +```{r faahKO-centWave, message = FALSE, warning = FALSE } ## Defining the settings for the centWave peak detection. cwp <- CentWaveParam(snthresh = 20, noise = 1000) -xod <- findChromPeaks(raw_data, param = cwp) +xod <- findChromPeaks(raw_data, param = cwp) ``` The identified peaks can be accessed with the `chromPeaks` parameter which returns @@ -196,11 +240,30 @@ a `matrix`, each line representing an identified peak. Column `"sample"` specifi in which *sample* (i.e. file) of the experiment the peak was detected. Below we plot the signal distribution of the identified peaks per sample. -```{r faahKO-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +```{r faahKO-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) ints <- lapply(ints, log2) -boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], - ylab = expression(log[2]~intensity), main = "Peak intensities") +boxplot(ints, varwidth = TRUE, col = sample_colors[xod$sample_group], + ylab = expression(log[2]~intensity), main = "Peak intensities") +``` + +To get a global overview of the peak detection results we can use the +`plotChromPeakImage` function that plots the number of identified peaks for each +sample along the retention time axis as an image plot. This would allow for +example to spot samples in which much fewer peaks were identified. Below we +create the image counting the number of detected peaks within bins of 20 seconds +along the retention time axis. + +```{r faahKO-peak-image, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Chromatographic peak image." } +plotChromPeakImage(xod, binSize = 20) +``` + +The `plotChromPeaks` function can be used to get a global overview of the +identified chromatographic peaks of one file. It highlights the identified peaks +in the full mz/rt plane. + +```{r faahKO-peak-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Chromatographic peaks for one file." } +plotChromPeaks(xod, file = 3) ``` After peak detection it might be advisable to evaluate whether the peak @@ -214,12 +277,12 @@ seconds. In addition we extract also the full chromatogram for the specified mz range (i.e. the full rt range) and identify all chromatographic peaks in that region by passing the same `mz` and `rt` parameters to the `chromPeaks` method. -If two-column matrices are passed to the `extractChromatograms` method with -parameters `rt` and `mz`, the function returns a `list`, each element being a `list` of -`Chromatogram` objects representing the chromatogram for the respective -ranges. +If two-column matrices are passed to the `chromatogram` method with parameters `rt` +and `mz`, the function returns a `Chromatograms` object with each column containing +the data from one sample/file and each row the `Chromatogram` objects for the +respective ranges. -```{r faahKO-chromPeaks-extractChroms, warning = FALSE} +```{r faahKO-chromPeaks-extractChroms, warning = FALSE } rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] ## Increase the range: rtr[1] <- rtr[1] - 60 @@ -230,32 +293,31 @@ mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] rtr <- rbind(c(-Inf, Inf), rtr) mzr <- rbind(mzr, mzr) -chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) +chrs <- chromatogram(xod, rt = rtr, mz = mzr) ## In addition we get all peaks detected in the same region pks <- chromPeaks(xod, rt = rtr, mz = mzr) -pks +pks ``` Next we plot the extracted chromatogram for the data and highlight in addition -the identified peaks. +the identified peaks using the `highlightChromPeaks` function. -```{r faahKO-extracted-chrom-with-peaks, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6} +```{r faahKO-extracted-chrom-with-peaks, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6 } ## Plot the full rt range: -plotChromatogram(chrs[[1]], - col = paste0(sample_colors[pData(xod)$sample_group], 80)) +par(mfrow = c(2, 1)) +plot(chrs[1, , drop = FALSE], col = paste0(sample_colors[xod$sample_group], 80)) ## And now for the peak range. -plotChromatogram(chrs[[2]], - col = paste0(sample_colors[pData(xod)$sample_group], 80)) +plot(chrs[2, , drop = FALSE], col = paste0(sample_colors[xod$sample_group], 80)) ## Highlight also the identified chromatographic peaks. highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], - border = paste0(sample_colors[pData(xod)$sample_group], 40)) + border = paste0(sample_colors[xod$sample_group], 40)) ``` -Note that the `extractChromatograms` does return an `NA` value if in a certain scan -(i.e. for a specific retention time) no signal was measured in the respective mz -range. This is reflected by the lines not being drawn as continuous lines in the -plot above. +Note that `Chromatogram` objects extracted by the `chromatogram` method contain an +`NA` value if in a certain scan (i.e. for a specific retention time) no signal was +measured in the respective mz range. This is reflected by the lines not being +drawn as continuous lines in the plot above. Next we align the samples using the *obiwarp* method [@Prince:2006jj]. This method does not require, in contrast to other alignment/retention time @@ -263,9 +325,9 @@ correction methods, any identified peaks and could thus also be applied to an `OnDiskMSnExp` object. Note that all retention time adjustment methods do also adjust the retention times reported for the individual peaks in `chromPeaks`. -```{r faahKO-obiwarp, message = FALSE} +```{r faahKO-obiwarp, message = FALSE } ## Doing the obiwarp alignment using the default settings. -xod <- adjustRtime(xod, param = ObiwarpParam()) +xod <- adjustRtime(xod, param = ObiwarpParam()) ``` Note that any pre-processing results can be removed at any time using a *drop* @@ -276,15 +338,14 @@ To evaluate the impact of the alignment we can plot again the BPC of each sample. In addition we plot the differences of the adjusted to the raw retention times per sample using the `plotAdjustedRtime` function. -```{r faahKO-bpi-obiwarp, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8} +```{r faahKO-bpi-obiwarp, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8 } ## Get the base peak chromatograms. This reads data from the files. -bpis <- extractChromatograms(xod, aggregationFun = "max") +bpis <- chromatogram(xod, aggregationFun = "max") par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) -plotChromatogram(bpis, - col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) +plot(bpis, col = paste0(sample_colors[xod$sample_group], 80)) ## Plot also the difference of adjusted to raw retention time. -plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) +plotAdjustedRtime(xod, col = paste0(sample_colors[xod$sample_group], 80)) ``` Too large differences between adjusted and raw retention times could indicate @@ -293,7 +354,7 @@ poorly performing samples or alignment. The distribution of retention time differences could also be used for quality assessment. -```{r faahKO-adjusted-rtime-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +```{r faahKO-adjusted-rtime-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } ## Calculate the difference between the adjusted and the raw retention times. diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) @@ -301,8 +362,8 @@ diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) ## get the values grouped by sample we have to split this vector by file/sample diffRt <- split(diffRt, fromFile(xod)) -boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], - main = "Obiwarp alignment results", ylab = "adjusted - raw rt") +boxplot(diffRt, col = sample_colors[xod$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") ``` The 3rd sample was used as *center* sample against which all other samples were @@ -313,37 +374,35 @@ Below we plot the extracted ion chromatogram for the selected peak from the example above before and after retention time correction to evaluate the impact of the alignment. -```{r faahKO-extracted-chrom-with-peaks-aligned, echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8} +```{r faahKO-extracted-chrom-with-peaks-aligned, echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8 } rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] ## Increase the range: rtr[1] <- rtr[1] - 60 rtr[2] <- rtr[2] + 60 mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] -chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) -chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) +chrs <- chromatogram(xod, rt = rtr, mz = mzr) +chrs_raw <- chromatogram(raw_data, rt = rtr, mz = mzr) par(mfrow = c(2, 1)) -plotChromatogram(chrs_raw, - col = paste0(sample_colors[pData(xod)$sample_group], 80)) -plotChromatogram(chrs, - col = paste0(sample_colors[pData(xod)$sample_group], 80)) +plot(chrs_raw, col = paste0(sample_colors[xod$sample_group], 80)) +plot(chrs, col = paste0(sample_colors[xod$sample_group], 80)) highlightChromPeaks(xod, rt = rtr, mzr, - border = paste0(sample_colors[pData(xod)$sample_group], 40)) + border = paste0(sample_colors[xod$sample_group], 40)) ``` After alignment, the peaks are nicely overlapping. Next we group identified chromatographic peaks across samples. We use the *peak -density* method [@Smith:2006ic] specifying that a chromatographic peak have -to be present in at least 1/3 of the samples within each group to be combined to +density* method [@Smith:2006ic] specifying that a chromatographic peak has +to be present in at least 2/3 of the samples within each group to be combined to a mz-rt *feature*. -```{r faahKO-groupPeakDensity, message = FALSE} +```{r faahKO-groupPeakDensity, message = FALSE } ## Define the PeakDensityParam -pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, +pdp <- PeakDensityParam(sampleGroups = xod$sample_group, maxFeatures = 300, minFraction = 0.66) -xod <- groupChromPeaks(xod, param = pdp) +xod <- groupChromPeaks(xod, param = pdp) ``` The definitions of the features can be accessed with the `featureDefinitions`, @@ -351,19 +410,47 @@ which lists the mz-rt space specific to a feature. Column `"peakidx"` lists the indices (in the `chromPeaks` matrix) of the individual chromatographic peaks belonging to the feature. -```{r faahKO-featureDefinitions, message = FALSE} -head(featureDefinitions(xod)) +```{r faahKO-featureDefinitions, message = FALSE } +head(featureDefinitions(xod)) +``` + +The `plotChromPeakDensity` method allows to inspect the result of the peak +grouping on e.g. a known compound/peak. + +```{r faahKO-plot-peak-density, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8 } +## Extract the full chromatogram for the mz of 279 +chrs <- chromatogram(xod, mz = 279) +## Plot the chromatogram objects +par(mfrow = c(2, 1), mar = c(2.5, 4, 1, 1)) +plot(chrs, col = paste0(sample_colors[xod$sample_group], 80)) +## Hightlight the chromatographic peaks identified in this mz slice +highlightChromPeaks(xod, mz = 279, + border = paste0(sample_colors[xod$sample_group], 60)) +## Plot the peak density distribution for the given mz slice providing also the +## parameter class used for the peak grouping. +plotChromPeakDensity(xod, mz = 279, pch = 16, param = pdp, + col = paste0(sample_colors[xod$sample_group], 80)) ``` +The upper panel of the plot above shows the extracted chromatogram for an mz +value of 279 with the identified chromatographic peaks indicated with +rectangles. The lower panel shows the location of identified chromatographic +peaks per sample along the retention time axis (points) and the chromatographic +peak density as black solid line. The grey rectangle indicates which peaks have +been grouped and assigned to the corresponding feature. This type of +visualization can be very helpful to tune the parameters of the peak grouping +for example to evaluate whether closely located peaks known to come from +different compounds were successfully separated. + To extract *values* for the features, the `featureValues` method can be used. This method returns a matrix with rows being the features and column the samples. The `value` parameter allows to specify the value that should be returned. Below we extract the `"into"` signal, i.e. the per-peak integrated intensity for each feature. -```{r faahKO-featureValues, message = FALSE} +```{r faahKO-featureValues, message = FALSE } ## Extract the "into" peak integrated signal. -head(featureValues(xod, value = "into")) +head(featureValues(xod, value = "into")) ``` After correspondence there will always be features that do not include peaks @@ -375,12 +462,12 @@ integrates in files where a peak was not found the signal from the mz-rt area where it is expected and adds it to the `chromPeaks` matrix. Such *filled-in* peaks have a value of `1` in the `"is_filled"` column of the `chromPeaks` matrix. -```{r faahKO-fillPeaks, message = FALSE} +```{r faahKO-fillPeaks, message = FALSE } ## Fill in peaks with default settings. Settings can be adjusted by passing ## a FillChromPeaksParam object to the method. xod <- fillChromPeaks(xod) -head(featureValues(xod, value = "into")) +head(featureValues(xod, value = "into")) ``` Not for all missing peaks a value could be integrated (because at the respective @@ -392,20 +479,20 @@ Next we inspect the `processHistory` of the analysis. As described earlier, this records all (major) processing steps along with the corresponding parameter classes. -```{r faahKO-processHistory, message = FALSE} +```{r faahKO-processHistory, message = FALSE } ## List the full process history -processHistory(xod) +processHistory(xod) ``` It is also possible to extract specific processing steps by specifying its type. Available types can be listed with the `processHistoryTypes` function. Below we extract the parameter class for the alignment/retention time adjustment step. -```{r faahKO-processHistory-select, message = FALSE} +```{r faahKO-processHistory-select, message = FALSE } ph <- processHistory(xod, type = "Retention time correction") ## Access the parameter -processParam(ph[[1]]) +processParam(ph[[1]]) ``` As described earlier, we can remove specific analysis results at any @@ -413,11 +500,11 @@ stage. Below we remove the results from the alignment. Since the correspondence was performed after that processing step its results will be removed too leaving us only with the results from the peak detection step. -```{r faahKO-drop-alignment, message = FALSE} +```{r faahKO-drop-alignment, message = FALSE } ## Remove the alignment results xod <- dropAdjustedRtime(xod) -processHistory(xod) +processHistory(xod) ``` We can now use a different method to perform the alignment. The *peak groups* @@ -426,23 +513,23 @@ present in most samples (so called *well behaved* peaks). This means we have to perform first an initial correspondence analysis to group peaks within and across samples. -```{r faahKO-initial-correspondence, message = FALSE} +```{r faahKO-initial-correspondence, message = FALSE } ## Define the parameter for the correspondence pdparam <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, minFraction = 0.7, maxFeatures = 100) -xod <- groupChromPeaks(xod, param = pdparam) +xod <- groupChromPeaks(xod, param = pdparam) ``` Before performing the alignment we can also inspect which peak groups might be selected for alignment based on the provided `PeakGroupsParam` object. -```{r faahKO-peak-groups-matrix, message = FALSE} +```{r faahKO-peak-groups-matrix, message = FALSE } ## Create the parameter class for the alignment pgparam <- PeakGroupsParam(minFraction = 0.9, span = 0.4) ## Extract the matrix with (raw) retention times for the peak groups that would ## be used for alignment. -adjustRtimePeakGroups(xod, param = pgparam) +adjustRtimePeakGroups(xod, param = pgparam) ``` If we are not happy with these peak groups (e.g. because we don't have a peak @@ -453,22 +540,26 @@ groups, e.g. for internal controls, and add this matrix with the we defined and perform the alignment. This will use the peak groups matrix from above. -```{r faahKO-peak-groups-alignment, message = FALSE} +```{r faahKO-peak-groups-alignment, message = FALSE } ## Perform the alignment using the peak groups method. -xod <- adjustRtime(xod, param = pgparam) +xod <- adjustRtime(xod, param = pgparam) ``` We can now also plot the difference between adjusted and raw retention times. If alignment was performed using the *peak groups* method, also these peak groups are highlighted in the plot. -```{r faahKO-peak-groups-alignment-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} -plotAdjustedRtime(xod, col = sample_colors[pData(xod)$sample_group]) +```{r faahKO-peak-groups-alignment-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 } +plotAdjustedRtime(xod, col = sample_colors[pData(xod)$sample_group]) ``` ## New naming convention +Peaks identified in LC/GC-MS metabolomics are referred to as *chromatographic +peaks* where possible to avoid any misconceptions with *mass peaks* identified in +mz dimension. + Methods for data analysis from the original `xcms` code have been renamed to avoid potential confusions: @@ -515,14 +606,15 @@ the raw data on which the preprocessing was performed. ### `Chromatogram` -The `Chromatogram` class allows a data representation that is orthogonal to the -`Spectrum` class defined in `MSnbase`. The `Chromatogram` class stores retention time -and intensity duplets and is designed to accommodate most use cases, from total -ion chromatogram, base peak chromatogram to extracted ion chromatogram and -SRM/MRM ion traces. +The `Chromatogram` class (available in the `MSnbase` package since version 2.3.8) +allows a data representation that is orthogonal to the `Spectrum` class (also +defined in `MSnbase`). The `Chromatogram` class stores retention time and intensity +duplets and is designed to accommodate most use cases, from total ion +chromatogram, base peak chromatogram to extracted ion chromatogram and SRM/MRM +ion traces. -`Chromatogram` objects can be extracted from `XCMSnExp` objects using the -`extractChromatograms` method. +`Chromatogram` objects can be extracted from `XCMSnExp` (and `MSnExp` and +`OnDiskMSnExp`) objects using the `chromatogram` method. Note that this class is still considered developmental and might thus undergo some changes in the future. @@ -563,7 +655,7 @@ bins have missing values, for which we impute a value using bin, but other aggregation methods are also available (i.e. min, max, mean, sum). -```{r message = FALSE} +```{r message = FALSE } ## Defining the variables: set.seed(123) X <- sort(abs(rnorm(30, mean = 20, sd = 25))) ## 10 @@ -572,7 +664,7 @@ Y <- abs(rnorm(30, mean = 50, sd = 30)) ## Bin the values in Y into 20 bins defined on X res <- binYonX(X, Y, nBins = 22) -res +res ``` As a result we get a `list` with the bin mid-points (`$x`) and the binned `y` values @@ -583,7 +675,7 @@ and the linear imputation approach that was defined in the `profBinLinBase` method. The latter performs linear interpolation only considering a certain neighborhood of missing values otherwise replacing the `NA` with a base value. -```{r binning-imputation-example, message = FALSE, fig.width = 10, fig.height = 7, fig.cap = 'Binning and missing value imputation results. Black points represent the input values, red the results from the binning and blue and green the results from the imputation (with method lin and linbase, respectively).'} +```{r binning-imputation-example, message = FALSE, fig.width = 10, fig.height = 7, fig.cap = 'Binning and missing value imputation results. Black points represent the input values, red the results from the binning and blue and green the results from the imputation (with method lin and linbase, respectively).' } ## Plot the actual data values. plot(X, Y, pch = 16, ylim = c(0, max(Y))) ## Visualizing the bins @@ -601,7 +693,7 @@ points(x = res$x, y = res_lin, col = point_colors[2], type = "b") ## Perform the linear imputation "linbase" res_linbase <- imputeLinInterpol(res$y, method = "linbase") -points(x = res$x, y = res_linbase, col = point_colors[3], type = "b", lty = 2) +points(x = res$x, y = res_linbase, col = point_colors[3], type = "b", lty = 2) ``` The difference between the linear interpolation method `lin` and `linbase` is that @@ -685,7 +777,7 @@ method is set to "binlin". The example below illustrates both differences. -```{r } +```{r } ## Define a vector with empty values at the end. X <- 1:11 set.seed(123) @@ -694,12 +786,12 @@ Y[9:11] <- NA nas <- is.na(Y) ## Do interpolation with profBinLin: resX <- xcms:::profBinLin(X[!nas], Y[!nas], 5, xstart = min(X), - xend = max(X)) + xend = max(X)) resX res <- binYonX(X, Y, nBins = 5L, shiftByHalfBinSize = TRUE) resM <- imputeLinInterpol(res$y, method = "lin", - noInterpolAtEnds = TRUE) -resM + noInterpolAtEnds = TRUE) +resM ``` Plotting the results helps to better compare the differences. The black points @@ -709,7 +801,7 @@ from the `profBinLin` method. The bin values for the first and 4th bin are clear wrong. The green colored points and lines represent the results from the `binYonX` and `imputeLinInterpol` functions (showing the correct binning and interpolation). -```{r profBinLin-problems, message = FALSE, fig.align = 'center', fig.width=10, fig.height = 7, fig.cap = "Illustration of the two bugs in profBinLin. The input values are represented by black points, grey vertical lines indicate the bins. The results from binning and interpolation with profBinLin are shown in blue and those from binYonX in combination with imputeLinInterpol in green."} +```{r profBinLin-problems, message = FALSE, fig.align = 'center', fig.width=10, fig.height = 7, fig.cap = "Illustration of the two bugs in profBinLin. The input values are represented by black points, grey vertical lines indicate the bins. The results from binning and interpolation with profBinLin are shown in blue and those from binYonX in combination with imputeLinInterpol in green." } plot(x = X, y = Y, pch = 16, ylim = c(0, max(Y, na.rm = TRUE)), xlim = c(0, 12)) ## Plot the breaks @@ -719,6 +811,7 @@ points(x = res$x, y = resX, col = "blue", type = "b") ## Results from imputeLinInterpol points(x = res$x, y = resM, col = "green", type = "b", pch = 4, lty = 2) + ``` Note that by default `imputeLinInterpol` would also interpolate missing values at @@ -827,9 +920,10 @@ detection. In the original `fillPeaks.MSW`, the mz range from which the signal is to be integrated was defined using -```{r eval = FALSE} +```{r eval = FALSE } mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), which.min(abs(mzs - peakArea[i, "mzmax"]))) + ``` Depending on the data this could lead to the inclusion of signal in the diff --git a/vignettes/new_functionality.org b/vignettes/new_functionality.org index 8411bc4c3..2898429ea 100644 --- a/vignettes/new_functionality.org +++ b/vignettes/new_functionality.org @@ -42,7 +42,12 @@ in the =xcms= package introduced during the update to version /3/. #+BEGIN_SRC R :ravel message = FALSE, warning = FALSE library(xcms) library(RColorBrewer) - register(SerialParam()) + ## Use socket based parallel processing on Windows systems + if (.Platform$OS.type == "unix") { + register(bpstart(MulticoreParam(2))) + } else { + register(bpstart(SnowParam(2))) + } #+END_SRC ** Modernized user interface @@ -110,7 +115,7 @@ files from the =faahKO= package using the =readMSData2= from the =MSnbase= packa library(xcms) ## Load 6 of the CDF files from the faahKO cdf_files <- dir(system.file("cdf", package = "faahKO"), recursive = TRUE, - full.names = TRUE)[c(1:3, 7:9)] + full.names = TRUE)[c(1:3, 7:9)] ## Define the sample grouping. s_groups <- rep("KO", length(cdf_files)) @@ -118,12 +123,44 @@ files from the =faahKO= package using the =readMSData2= from the =MSnbase= packa ## Define a data.frame that will be used as phenodata pheno <- data.frame(sample_name = sub(basename(cdf_files), pattern = ".CDF", replacement = "", fixed = TRUE), - sample_group = s_groups, stringsAsFactors = FALSE) + sample_group = s_groups, stringsAsFactors = FALSE) ## Read the data. raw_data <- readMSData2(cdf_files, pdata = new("NAnnotatedDataFrame", pheno)) #+END_SRC +The =OnDiskMSnExp= organizes the MS data by spectrum and provides th methods +=intensity=, =mz= and =rtime= to access the raw data from the files (the measured +intensity values, the corresponding m/z and retention time values). In addition, +the =spectra= method could be used to return all data encapsulated in =Spectrum= +classes. Below we extract the retention time values from the object. + +#+BEGIN_SRC R :ravel message = FALSE + head(rtime(raw_data)) + + class(rtime(raw_data)) +#+END_SRC + +All data is returned as one-dimensional vectors (a numeric vector for =rtime= and +a =list= of numeric vectors for =mz= and =intensity=, each containing the values from +one spectrum), even if the experiment consists of multiple files/samples. The +=fromFile= returns a numeric vector that provides the mapping of the values to the +originating file. Below we use the =fromFile= indices to organize the =mz= values by +file. + +#+BEGIN_SRC R :ravel message = FALSE + mzs <- mz(raw_data) + + class(mzs) + length(mzs) + + ## Split the list by file + mzs_by_file <- split(mzs, f = fromFile(raw_data)) + + length(mzs_by_file) +#+END_SRC + + We next plot the total ion chromatogram (TIC) for all files within the experiment. Note that we are iteratively sub-setting the full data per file using the =filterFile= method, which, for =OnDiskMSnExp= objects, is an efficient @@ -138,48 +175,52 @@ consistent. ## Subset the full raw data by file and plot the data. tmp <- filterFile(raw_data, file = 1) plot(x = rtime(tmp), y = tic(tmp), xlab = "retention time", ylab = "TIC", - col = paste0(sample_colors[pData(tmp)$sample_group], 80), type = "l") + col = paste0(sample_colors[tmp$sample_group], 80), type = "l") for (i in 2:length(fileNames(raw_data))) { tmp <- filterFile(raw_data, file = i) points(rtime(tmp), tic(tmp), type = "l", - col = paste0(sample_colors[pData(tmp)$sample_group], 80)) + col = paste0(sample_colors[tmp$sample_group], 80)) } legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) #+END_SRC -Alternatively we can use the =extractChromatograms= method that extracts +Alternatively we can use the =chromatogram= method that extracts chromatograms from the object. In the example below we extract the /base peak chromatogram/ (BPC) by setting =aggregationFun= to ="max"= and not specifying an =rt= -or =mz= range to extract only a data subset. In contrast to the =tic= and =bpi= +or =mz= range hence extracting the full data. In contrast to the =tic= and =bpi= methods, this function reads the data from the raw files. It takes thus more time to create the plot, but it is based on the actual raw data that is used for the later analysis - the =tic= and =bpi= methods access only the information that is stored in the raw data files by the MS detector during the data acquisition. +The =chromatogram= method returns a =Chromatograms= object which is an extension to +the base =matrix= class allowing to arrange multiple =Chromatogram= objects in a +two-dimensional grid. Columns in the =Chromatograms= object represent samples, +rows m/z x rt ranges. + #+NAME: faahKO-bpi #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 ## Get the base peak chromatograms. This reads data from the files. - bpis <- extractChromatograms(raw_data, aggregationFun = "max") - ## Plot the list of Chromatogram objects. - plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) + bpis <- chromatogram(raw_data, aggregationFun = "max") + ## Plot all chromatograms. + plot(bpis, col = paste0(sample_colors[raw_data$sample_group], 80)) #+END_SRC -While the =plotChromatogram= function if very convenient (and fast), it would also -not be too difficult to create the plot manually: +While the =plot= method if very convenient (and fast), it would also not be too +difficult to create the plot manually: #+NAME: faahKO-bbpi-manual #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 - plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), - ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis[1, ], rtime))), + ylim = range(unlist(lapply(bpis[1, ], intensity))), main = "BPC", xlab = "rtime", ylab = "intensity") - for (i in 1:length(bpis)) { - points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", - col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) + for (i in 1:ncol(bpis)) { + points(rtime(bpis[1, i]), intensity(bpis[1, i]), type = "l", + col = paste0(sample_colors[raw_data$sample_group[i]], 80)) } #+END_SRC - Note that we could restrict the analysis to a certain retention time range by first sub-setting =raw_data= with the =filterRt= method. @@ -192,7 +233,7 @@ experiment's spectra to the originating files. #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 ## Get the total ion current by file tc <- split(tic(raw_data), f = fromFile(raw_data)) - boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), + boxplot(tc, col = paste0(sample_colors[raw_data$sample_group], 80), ylab = "intensity", main = "Total ion current") #+END_SRC @@ -224,10 +265,32 @@ plot the signal distribution of the identified peaks per sample. #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) ints <- lapply(ints, log2) - boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], + boxplot(ints, varwidth = TRUE, col = sample_colors[xod$sample_group], ylab = expression(log[2]~intensity), main = "Peak intensities") #+END_SRC +To get a global overview of the peak detection results we can use the +=plotChromPeakImage= function that plots the number of identified peaks for each +sample along the retention time axis as an image plot. This would allow for +example to spot samples in which much fewer peaks were identified. Below we +create the image counting the number of detected peaks within bins of 20 seconds +along the retention time axis. + +#+NAME: faahKO-peak-image +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Chromatographic peak image." + plotChromPeakImage(xod, binSize = 20) +#+END_SRC + +The =plotChromPeaks= function can be used to get a global overview of the +identified chromatographic peaks of one file. It highlights the identified peaks +in the full mz/rt plane. + +#+NAME: faahKO-peak-plot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Chromatographic peaks for one file." + plotChromPeaks(xod, file = 3) +#+END_SRC + + After peak detection it might be advisable to evaluate whether the peak detection identified e.g. compounds known to be present in the sample. Facilitating access to the raw data has thus been one of the major aims @@ -239,10 +302,10 @@ seconds. In addition we extract also the full chromatogram for the specified mz range (i.e. the full rt range) and identify all chromatographic peaks in that region by passing the same =mz= and =rt= parameters to the =chromPeaks= method. -If two-column matrices are passed to the =extractChromatograms= method with -parameters =rt= and =mz=, the function returns a =list=, each element being a =list= of -=Chromatogram= objects representing the chromatogram for the respective -ranges. +If two-column matrices are passed to the =chromatogram= method with parameters =rt= +and =mz=, the function returns a =Chromatograms= object with each column containing +the data from one sample/file and each row the =Chromatogram= objects for the +respective ranges. #+NAME: faahKO-chromPeaks-extractChroms #+BEGIN_SRC R :ravel warning = FALSE @@ -256,7 +319,7 @@ ranges. rtr <- rbind(c(-Inf, Inf), rtr) mzr <- rbind(mzr, mzr) - chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) + chrs <- chromatogram(xod, rt = rtr, mz = mzr) ## In addition we get all peaks detected in the same region pks <- chromPeaks(xod, rt = rtr, mz = mzr) @@ -264,25 +327,24 @@ ranges. #+END_SRC Next we plot the extracted chromatogram for the data and highlight in addition -the identified peaks. +the identified peaks using the =highlightChromPeaks= function. #+NAME: faahKO-extracted-chrom-with-peaks #+BEGIN_SRC R :ravel message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6 ## Plot the full rt range: - plotChromatogram(chrs[[1]], - col = paste0(sample_colors[pData(xod)$sample_group], 80)) + par(mfrow = c(2, 1)) + plot(chrs[1, , drop = FALSE], col = paste0(sample_colors[xod$sample_group], 80)) ## And now for the peak range. - plotChromatogram(chrs[[2]], - col = paste0(sample_colors[pData(xod)$sample_group], 80)) + plot(chrs[2, , drop = FALSE], col = paste0(sample_colors[xod$sample_group], 80)) ## Highlight also the identified chromatographic peaks. highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], - border = paste0(sample_colors[pData(xod)$sample_group], 40)) + border = paste0(sample_colors[xod$sample_group], 40)) #+END_SRC -Note that the =extractChromatograms= does return an =NA= value if in a certain scan -(i.e. for a specific retention time) no signal was measured in the respective mz -range. This is reflected by the lines not being drawn as continuous lines in the -plot above. +Note that =Chromatogram= objects extracted by the =chromatogram= method contain an +=NA= value if in a certain scan (i.e. for a specific retention time) no signal was +measured in the respective mz range. This is reflected by the lines not being +drawn as continuous lines in the plot above. Next we align the samples using the /obiwarp/ method \cite{Prince:2006jj}. This method does not require, in contrast to other alignment/retention time @@ -307,13 +369,12 @@ times per sample using the =plotAdjustedRtime= function. #+NAME: faahKO-bpi-obiwarp #+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8 ## Get the base peak chromatograms. This reads data from the files. - bpis <- extractChromatograms(xod, aggregationFun = "max") + bpis <- chromatogram(xod, aggregationFun = "max") par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) - plotChromatogram(bpis, - col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) + plot(bpis, col = paste0(sample_colors[xod$sample_group], 80)) ## Plot also the difference of adjusted to raw retention time. - plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) + plotAdjustedRtime(xod, col = paste0(sample_colors[xod$sample_group], 80)) #+END_SRC Too large differences between adjusted and raw retention times could indicate @@ -331,8 +392,8 @@ assessment. ## get the values grouped by sample we have to split this vector by file/sample diffRt <- split(diffRt, fromFile(xod)) - boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], - main = "Obiwarp alignment results", ylab = "adjusted - raw rt") + boxplot(diffRt, col = sample_colors[xod$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") #+END_SRC The 3rd sample was used as /center/ sample against which all other samples were @@ -351,30 +412,28 @@ of the alignment. rtr[2] <- rtr[2] + 60 mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] - chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) - chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) + chrs <- chromatogram(xod, rt = rtr, mz = mzr) + chrs_raw <- chromatogram(raw_data, rt = rtr, mz = mzr) par(mfrow = c(2, 1)) - plotChromatogram(chrs_raw, - col = paste0(sample_colors[pData(xod)$sample_group], 80)) - plotChromatogram(chrs, - col = paste0(sample_colors[pData(xod)$sample_group], 80)) + plot(chrs_raw, col = paste0(sample_colors[xod$sample_group], 80)) + plot(chrs, col = paste0(sample_colors[xod$sample_group], 80)) highlightChromPeaks(xod, rt = rtr, mzr, - border = paste0(sample_colors[pData(xod)$sample_group], 40)) + border = paste0(sample_colors[xod$sample_group], 40)) #+END_SRC After alignment, the peaks are nicely overlapping. Next we group identified chromatographic peaks across samples. We use the /peak -density/ method \cite{Smith:2006ic} specifying that a chromatographic peak have -to be present in at least 1/3 of the samples within each group to be combined to +density/ method \cite{Smith:2006ic} specifying that a chromatographic peak has +to be present in at least 2/3 of the samples within each group to be combined to a mz-rt /feature/. #+NAME: faahKO-groupPeakDensity #+BEGIN_SRC R :ravel message = FALSE ## Define the PeakDensityParam - pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, - maxFeatures = 300, minFraction = 0.66) + pdp <- PeakDensityParam(sampleGroups = xod$sample_group, + maxFeatures = 300, minFraction = 0.66) xod <- groupChromPeaks(xod, param = pdp) #+END_SRC @@ -388,6 +447,35 @@ belonging to the feature. head(featureDefinitions(xod)) #+END_SRC +The =plotChromPeakDensity= method allows to inspect the result of the peak +grouping on e.g. a known compound/peak. + +#+NAME: faahKO-plot-peak-density +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8 + ## Extract the full chromatogram for the mz of 279 + chrs <- chromatogram(xod, mz = 279) + ## Plot the chromatogram objects + par(mfrow = c(2, 1), mar = c(2.5, 4, 1, 1)) + plot(chrs, col = paste0(sample_colors[xod$sample_group], 80)) + ## Hightlight the chromatographic peaks identified in this mz slice + highlightChromPeaks(xod, mz = 279, + border = paste0(sample_colors[xod$sample_group], 60)) + ## Plot the peak density distribution for the given mz slice providing also the + ## parameter class used for the peak grouping. + plotChromPeakDensity(xod, mz = 279, pch = 16, param = pdp, + col = paste0(sample_colors[xod$sample_group], 80)) +#+END_SRC + +The upper panel of the plot above shows the extracted chromatogram for an mz +value of 279 with the identified chromatographic peaks indicated with +rectangles. The lower panel shows the location of identified chromatographic +peaks per sample along the retention time axis (points) and the chromatographic +peak density as black solid line. The grey rectangle indicates which peaks have +been grouped and assigned to the corresponding feature. This type of +visualization can be very helpful to tune the parameters of the peak grouping +for example to evaluate whether closely located peaks known to come from +different compounds were successfully separated. + To extract /values/ for the features, the =featureValues= method can be used. This method returns a matrix with rows being the features and column the samples. The =value= parameter allows to specify the value that should be returned. Below we @@ -468,7 +556,7 @@ across samples. #+BEGIN_SRC R :ravel message = FALSE ## Define the parameter for the correspondence pdparam <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, - minFraction = 0.7, maxFeatures = 100) + minFraction = 0.7, maxFeatures = 100) xod <- groupChromPeaks(xod, param = pdparam) #+END_SRC @@ -557,14 +645,15 @@ the raw data on which the preprocessing was performed. *** =Chromatogram= -The =Chromatogram= class allows a data representation that is orthogonal to the -=Spectrum= class defined in =MSnbase=. The =Chromatogram= class stores retention time -and intensity duplets and is designed to accommodate most use cases, from total -ion chromatogram, base peak chromatogram to extracted ion chromatogram and -SRM/MRM ion traces. +The =Chromatogram= class (available in the =MSnbase= package since version 2.3.8) +allows a data representation that is orthogonal to the =Spectrum= class (also +defined in =MSnbase=). The =Chromatogram= class stores retention time and intensity +duplets and is designed to accommodate most use cases, from total ion +chromatogram, base peak chromatogram to extracted ion chromatogram and SRM/MRM +ion traces. -=Chromatogram= objects can be extracted from =XCMSnExp= objects using the -=extractChromatograms= method. +=Chromatogram= objects can be extracted from =XCMSnExp= (and =MSnExp= and +=OnDiskMSnExp=) objects using the =chromatogram= method. Note that this class is still considered developmental and might thus undergo some changes in the future. @@ -1293,8 +1382,10 @@ RT correction. analyte over replicate samples \cite{Smith:2014di}. -** TODO Implement the =Chromatogram= class +** DONE Implement the =Chromatogram= class + CLOSED: [2017-07-10 Mon 15:12] + - State "DONE" from "TODO" [2017-07-10 Mon 15:12] Now, to accommodate all possibilities: https://en.wikipedia.org/wiki/Triple_quadrupole_mass_spectrometer Triple Q-TOF measurements: @@ -1319,6 +1410,14 @@ https://en.wikipedia.org/wiki/Mass_chromatogram#Selected-ion_monitoring_chromato http://proteowizard.sourceforge.net/dox/structpwiz_1_1msdata_1_1_chromatogram.html https://sourceforge.net/p/proteowizard/mailman/message/27571266/ +*** Move =Chromatogram= to MSnbase + ++ [X] Add =Chromatogram= to MSnbase. ++ [ ] Remove =Chromatogram= from xcms. ++ [ ] Move functions and methods to MSnbase. ++ [ ] Fix xcms to import all required stuff from MSnbase. + + ** TODO Implement a =findBackgroundIons= method Check on one of our own files. @@ -1359,4 +1458,221 @@ certain mz (bin?) the signal is higher than a threshold in 70% of the spectra, i.e. that the % of values is larger than a percentage. +** DONE Reduce R CMD check time: + CLOSED: [2017-07-10 Mon 15:12] + + - State "DONE" from "TODO" [2017-07-10 Mon 15:12] +- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: 18m34.630s +- xcms 2.99.3, MSnbase 2.3.4, mzR 2.9.12: 20m41.440s + +After tuning xcms: +- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: 14m30.454s + +After enabling parallel processing for the unit tests: +- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: user 21m46.385s + +After enabling parallel processing (registering multicoreparam) for the unit +tests: +- xcms 2.99.3, MSnbase 2.3.4, mzR 2.11.3: user 15m53.039s. + +tests with long runtime: ++ [ ] testPresentAbsentSumAfterFillPeaks: 13.241 ++ [X] test_extractChromatograms (runit.Chromatogram.R): 23.800: Can not reduce + this. ++ [X] test_obiwarp (runit.do_adjustRtime.R): 17.594: Can not reduce this. ++ [ ] test_findChromPeaks_centWaveWithPredIsoROIs + (runit.do_findChromPeaks_centWave_isotopes.R): 13.623 ++ [X] test_do_groupChromPeaks_nearest (runit.do_groupChromPeaks.R): 25.193: OK. ++ [X] test_fillChromPeaks_matchedFilter (runit.fillChromPeaks.R): 16.843: Can + not reduce. ++ [X] test.fillPeaks_old_vs_new (runit.fillPeaks.R): 37.924: dontrun ++ [X] test.fillPeaksColumns (runit.fillPeaks.R): 33.552: OK. ++ [X] testFillPeaksPar (runit.fillPeaks.R): 24.752: dontrun ++ [X] test_getEICxset (runit.getEIC.R): 27.144: might be faster. ++ [X] test.getEICretcor (runit.getEIC.R): 17.018: nope. ++ [X] test.issue7 (runit.getEIC.R): 66.020: dontrun ++ [X] test.getXcmsRaw (runit.getXcmsRaw.R): 26.558: might be faster. ++ [X] testMultiFactorDiffreport (runit.phenoData.R): 13.067: nothing to do. + + + + +** DONE mzR/MSnbase timings + CLOSED: [2017-06-14 Wed 11:02] + + - State "DONE" from "TODO" [2017-06-14 Wed 11:02] +#+BEGIN_SRC R + library(MSnbase) + library(msdata) + fl <- proteomics(full.names = TRUE)[3] + + + ## MSnbase: 2.3.4 + ## mzR: 2.11.2 + of <- mzR::openMSfile(fl, backend = "pwiz") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.953 0.036 0.986 + mzR::close(of) + + of <- mzR::openMSfile(fl, backend = "Ramp") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.449 0.011 0.460 + mzR::close(of) + + system.time(tmp <- readMSData2(fl)) + ## user system elapsed + ## 1.515 0.089 1.596 + + ########################################### + ## MSnbase: 2.3.4 + ## mzR: 2.11.3 + of <- mzR::openMSfile(fl, backend = "pwiz") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.974 0.039 1.009 + mzR::close(of) + + of <- mzR::openMSfile(fl, backend = "Ramp") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.422 0.010 0.433 + mzR::close(of) + + system.time(tmp <- readMSData2(fl)) + ## user system elapsed + ## 1.509 0.093 1.594 + + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_14.mzML" + of <- mzR::openMSfile(fl, backend = "pwiz") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.138 0.042 0.180 + mzR::close(of) + + of <- mzR::openMSfile(fl, backend = "Ramp") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.067 0.023 0.089 + mzR::close(of) + + system.time(tmp <- readMSData2(fl)) + ## user system elapsed + ## 0.708 0.105 0.814 + + ## tmp: 1720 spectra. + + ############################################ + ## MSnbase: 2.3.4 + ## mzR: 2.11.3, without reading the ion injection time + of <- mzR::openMSfile(fl, backend = "pwiz") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.969 0.040 1.007 + mzR::close(of) + + of <- mzR::openMSfile(fl, backend = "Ramp") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.449 0.011 0.460 + mzR::close(of) + + system.time(tmp <- readMSData2(fl)) + ## user system elapsed + ## 1.556 0.089 1.638 + + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_14.mzML" + of <- mzR::openMSfile(fl, backend = "pwiz") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.138 0.064 0.214 + mzR::close(of) + + of <- mzR::openMSfile(fl, backend = "Ramp") + system.time(hdr <- header(of)) + ## user system elapsed + ## 0.065 0.022 0.088 + mzR::close(of) + + system.time(tmp <- readMSData2(fl)) + ## user system elapsed + ## 0.709 0.110 0.833 + + ## tmp: 1720 spectra. +#+END_SRC + + +** TODO Re-add plotting functions + +There was some request to re-add the plotting functionality to back to =xcms=. +I would however like to create such plots not *during*, but *after* the +analysis. One first example would be the results from the grouping, i.e. the +=group.density= method. + ++ =groupDensity=: loop through the features and create a plot for each one. The + function could be called something like =plotGroupingResult=. + - loop through each feature. + - plot all peaks in the mz range of all peaks in the group (+/- something) and + highlight the peaks belonging to the group. + #+BEGIN_SRC R + pks <- chromPeaks(xod) + pks <- pks[pks[, "sample"] == 1, ] + ## plot the rectangular data. + xod_file <- filterFile(xod, file = 1, keepAdjustedRtime = TRUE) + mzr <- range(mz(xod_file)) + rtr <- range(rtime(xod_file)) + plot(3, 3, pch = NA, xlim = rtr, ylim = mzr, xlab = "rentention time", + ylab = "mz", main = basename(fileNames(xod_file))) + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], ybottom = pks[, "mzmin"], + ytop = pks[, "mzmax"], border = "#00000060") + + ## peak density along retention time axis. + dens <- density(pks[, "rt"]) + plot(dens) + hst <- hist(pks[, "rt"], breaks = 64) + plot(hst$mids, hst$counts, type = "S") + plot(hst) + addi <- diff(hst$mids)[1] / 2 + points(hst$mids + addi, hst$counts, type = "S", col = "red") + + ## Plot of all peaks along retention time axis. + hst <- lapply(split(chromPeaks(xod)[, "rt"], + f = chromPeaks(xod)[, "sample"]), + hist, breaks = 64) + max_count <- max(unlist(lapply(hst, function(z) max(z$counts)))) + ## Initialize plot: + plot(3, 3, pch = NA, xlab = "retention time", ylab = "peak count", + xlim = range(rtime(xod)), ylim = c(0, max_count)) + addi <- diff(hst[[1]]$mids)[1] / 2 + lapply(hst, function(z) points(z$mids + addi, z$counts, col = "#00000060", + type = "S")) + #+END_SRC + ++ Plot identified chromatographic peaks. Identified and failed peaks could be + simply plotted manually. One needs to know however where to look. + - =plot,Chromatogram=. + - =highlightChromPeaks=. + - Eventually it might be nice to create a plot from above, plotting the mz vs + rt of one file and highlighting the identified peaks: =plotChromPeaks=. ++ Plot retention time adjustment results. + - =plotAdjustedRtime= should do the trick. ++ Plot grouping results: + - =plot,Chromatogram=. + - =highlightChromPeaks=. + - =plotChromPeakDensity=. + +** TODO Implement the =calibrate= method in the new user interface + +First thing is to understand what the method does. +See /methods-xcmsSet.R/ for the =calibrate= method. See /matchpeaks.R/ for the +=matchpeaks= and =estimate= functions. +Input: =xcmsSet= object and list of numeric vectors representing the m/z values of +the calibrants. Apparently, the calibrants have to be close to real peaks, +otherwise they will not be adjusted/matched correctly. +For each sample: +- get the peaks of that sample, i.e. the =@peaks= matrix. +- call the =matchpeaks= function on the peaks matrix and the calibrants (which is + supposed to be a numeric vector of mz values. + * References diff --git a/vignettes/xcmsDirect.Rnw b/vignettes/xcmsDirect.Rnw index 90f3b1a9c..c8779eecd 100644 --- a/vignettes/xcmsDirect.Rnw +++ b/vignettes/xcmsDirect.Rnw @@ -41,6 +41,14 @@ are needed for further processing. <>= library(xcms) library(MassSpecWavelet) + +if (.Platform$OS.type == "unix") { + prm <- MulticoreParam(2) +} else { + prm <- SnowParam(2) +} +register(bpstart(prm)) + @ This documentation uses raw mzdata files from \Rpackage{msdata} as example data diff --git a/vignettes/xcmsMSn.Rnw b/vignettes/xcmsMSn.Rnw index 0d9395e8e..5ad4bd148 100644 --- a/vignettes/xcmsMSn.Rnw +++ b/vignettes/xcmsMSn.Rnw @@ -38,6 +38,13 @@ QTOF, ion trap or orbitrap mass spectrometers. <>= library(xcms) library(msdata) + +if (.Platform$OS.type == "unix") { + prm <- MulticoreParam(2) +} else { + prm <- SnowParam(2) +} +register(bpstart(prm)) @ \section{Raw Data File Preparation}