diff --git a/DESCRIPTION b/DESCRIPTION index 7a5846d70..7eac9758a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xcms -Version: 1.51.7 -Date: 2017-02-21 +Version: 1.51.9 +Date: 2017-03-08 Title: LC/MS and GC/MS Data Analysis Author: Colin A. Smith , Ralf Tautenhahn , diff --git a/NAMESPACE b/NAMESPACE index bce2ce4ba..0a92b0689 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,7 +46,7 @@ importMethodsFrom("MSnbase", "intensity", "mz", "rtime", "fileNames", "fromFile" "clean", "featureNames", "filterAcquisitionNum", "filterMz", "filterRt", "normalize", "pickPeaks", "removePeaks", "removeReporters", "smooth", "trimMz", "splitByFile", "[[", - "spectrapply", "peaksCount") + "spectrapply", "peaksCount", "precursorMz") importFrom("MSnbase", "as.data.frame.Spectrum") export( @@ -197,8 +197,9 @@ export( "do_groupChromPeaks_density", "do_groupPeaks_mzClust", "do_groupChromPeaks_nearest", - ## "Chromatogram", - "do_adjustRtime_peakGroups" + "Chromatogram", + "do_adjustRtime_peakGroups", + "processHistoryTypes" ) ## New analysis methods @@ -220,7 +221,9 @@ exportClasses( "MzClustParam", "NearestPeaksParam", "PeakGroupsParam", - "ObiwarpParam" + "ObiwarpParam", + "GenericParam", + "FillChromPeaksParam" ) ## Param methods exportMethods( @@ -356,17 +359,23 @@ exportMethods( "localAlignment", "localAlignment<-", "initPenalty", - "initPenalty<-" + "initPenalty<-", + ## FillChromPeaksParam + "expandMz", + "expandMz<-", + "expandRt", + "expandRt<-" ) ## Param class functions export("CentWaveParam", "MatchedFilterParam", "MassifquantParam", "MSWParam", "CentWavePredIsoParam", "PeakDensityParam", "MzClustParam", - "NearestPeaksParam", "PeakGroupsParam", "ObiwarpParam") + "NearestPeaksParam", "PeakGroupsParam", "ObiwarpParam", "GenericParam", + "FillChromPeaksParam") ## Param class methods. ## New Classes exportClasses("XCMSnExp", "MsFeatureData", "ProcessHistory", - ## "Chromatogram", + "Chromatogram", "XProcessHistory" ) ## New methods for these classes @@ -377,6 +386,7 @@ exportMethods("hasChromPeaks", "adjustedRtime<-", "featureDefinitions", "featureDefinitions<-", + "featureValues", "chromPeaks", "chromPeaks<-", "processHistory", @@ -404,6 +414,10 @@ exportMethods("hasChromPeaks", "mz", "intensity", "aggregationFun", -## "extractChromatograms", - "as.data.frame" + "extractChromatograms", + "precursorMz", + "productMz", + "fillChromPeaks", + "as.data.frame", + "dropFilledChromPeaks" ) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 12ae3aa00..06c199ff8 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -33,10 +33,13 @@ setGeneric("bw<-", function(object, value) standardGeneric("bw<-")) ## C setGeneric("calibrate", function(object, ...) standardGeneric("calibrate")) setGeneric("checkBack", function(object, ...) standardGeneric("checkBack")) -setGeneric("checkBack<-", function(object, value) standardGeneric("checkBack<-")) setGeneric("centerSample", function(object) standardGeneric("centerSample")) setGeneric("centerSample<-", function(object, value) standardGeneric("centerSample<-")) +setGeneric("checkBack<-", function(object, value) standardGeneric("checkBack<-")) +setGeneric("chromPeaks", function(object, ...) standardGeneric("chromPeaks")) +setGeneric("chromPeaks<-", function(object, value) + standardGeneric("chromPeaks<-")) setGeneric("collect", function(object, ...) standardGeneric("collect")) setGeneric("consecMissedLimit", function(object, ...) standardGeneric("consecMissedLimit")) @@ -49,8 +52,6 @@ setGeneric("criticalValue<-", function(object, value) ## D setGeneric("deepCopy", function(object) standardGeneric("deepCopy")) -setGeneric("findChromPeaks", function(object, param, ...) - standardGeneric("findChromPeaks")) setGeneric("diffreport", function(object, ...) standardGeneric("diffreport")) setGeneric("distance", function(object, ...) standardGeneric("distance")) setGeneric("distance<-", function(object, value) standardGeneric("distance<-")) @@ -58,12 +59,22 @@ setGeneric("distFun", function(object) standardGeneric("distFun")) setGeneric("distFun<-", function(object, value) standardGeneric("distFun<-")) setGeneric("dropAdjustedRtime", function(object, ...) standardGeneric("dropAdjustedRtime")) -setGeneric("dropFeatureDefinitions", function(object, ...) - standardGeneric("dropFeatureDefinitions")) setGeneric("dropChromPeaks", function(object, ...) standardGeneric("dropChromPeaks")) +setGeneric("dropFeatureDefinitions", function(object, ...) + standardGeneric("dropFeatureDefinitions")) +setGeneric("dropFilledChromPeaks", function(object, ...) + standardGeneric("dropFilledChromPeaks")) ## E +setGeneric("expandMz", function(object, ...) + standardGeneric("expandMz")) +setGeneric("expandMz<-", function(object, value) + standardGeneric("expandMz<-")) +setGeneric("expandRt", function(object, ...) + standardGeneric("expandRt")) +setGeneric("expandRt<-", function(object, value) + standardGeneric("expandRt<-")) setGeneric("extraPeaks", function(object, ...) standardGeneric("extraPeaks")) setGeneric("extraPeaks<-", function(object, value) @@ -79,21 +90,25 @@ setGeneric("factorGap", function(object) standardGeneric("factorGap")) setGeneric("factorGap<-", function(object, value) standardGeneric("factorGap<-")) setGeneric("family", function(object, ...) standardGeneric("family")) setGeneric("family<-", function(object, value) standardGeneric("family<-")) -setGeneric("chromPeaks", function(object, ...) standardGeneric("chromPeaks")) -setGeneric("chromPeaks<-", function(object, value) - standardGeneric("chromPeaks<-")) -setGeneric("featureDefinitions", function(object, ...) standardGeneric("featureDefinitions")) +setGeneric("featureDefinitions", function(object, ...) + standardGeneric("featureDefinitions")) setGeneric("featureDefinitions<-", function(object, value) standardGeneric("featureDefinitions<-")) +setGeneric("featureValues", function(object, ...) + standardGeneric("featureValues")) setGeneric("fileIndex", function(object) standardGeneric("fileIndex")) setGeneric("fileIndex<-", function(object, value) standardGeneric("fileIndex<-")) setGeneric("filepaths", function(object) standardGeneric("filepaths")) setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-")) +setGeneric("fillChromPeaks", function(object, param, ...) + standardGeneric("fillChromPeaks")) setGeneric("fillPeaks.chrom", function(object, ...) standardGeneric("fillPeaks.chrom")) setGeneric("fillPeaks.MSW", function(object, ...) standardGeneric("fillPeaks.MSW")) setGeneric("fillPeaks", function(object, ...) standardGeneric("fillPeaks")) +setGeneric("findChromPeaks", function(object, param, ...) + standardGeneric("findChromPeaks")) setGeneric("findMZ", function(object, find, ppmE=25, print=TRUE) standardGeneric("findMZ")) setGeneric("findmzROI", function(object, ...) standardGeneric("findmzROI")) @@ -263,6 +278,7 @@ setGeneric("processParam<-", function(object, value) setGeneric("processType", function(object, ...) standardGeneric("processType")) setGeneric("processType<-", function(object, value) standardGeneric("processType<-")) setGeneric("processHistory", function(object, ...) standardGeneric("processHistory")) +setGeneric("productMz", function(object, value) standardGeneric("productMz")) setGeneric("profinfo", function(object) standardGeneric("profinfo")) setGeneric("profinfo<-", function(object, value) standardGeneric("profinfo<-")) setGeneric("profMat", function(object, ...) standardGeneric("profMat")) diff --git a/R/DataClasses.R b/R/DataClasses.R index d7d3fa769..80fd95aa5 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -166,29 +166,15 @@ setClass("xcmsRaw", representation(env = "environment", ############################################################ ## netCdfSource -setClass("netCdfSource", - ## representation(cdf="integer"), - contains="xcmsFileSource", - ## validity=function(object) { - ## if (!is.null(attr(object@cdf, "errortext"))) { - ## mzR:::netCDFClose(object@cdf) - ## attr(object@cdf, "errortext") - ## } else TRUE - ## } - ) +setClass("netCdfSource", contains="xcmsFileSource") ############################################################ ## rampSource -setClass("rampSource", - ## representation(rampid="integer"), - contains="xcmsFileSource", - ## validity=function(object) { - ## if (object@rampid < 0) { - ## mzR:::rampClose(object@rampid) - ## paste("Could not open mzML/mzXML/mzData file:", object) - ## } else TRUE - ## } - ) +setClass("rampSource", contains="xcmsFileSource") + +############################################################ +## pwizSource +setClass("pwizSource", contains="xcmsFileSource") ############################################################ ## xcmsPeaks @@ -200,11 +186,13 @@ setClass("xcmsPeaks", contains = "matrix") .PROCSTEP.PEAK.DETECTION <- "Peak detection" .PROCSTEP.PEAK.GROUPING <- "Peak grouping" .PROCSTEP.RTIME.CORRECTION <- "Retention time correction" +.PROCSTEP.PEAK.FILLING <- "Missing peak filling" .PROCSTEPS <- c( .PROCSTEP.UNKNOWN, .PROCSTEP.PEAK.DETECTION, .PROCSTEP.PEAK.GROUPING, - .PROCSTEP.RTIME.CORRECTION + .PROCSTEP.RTIME.CORRECTION, + .PROCSTEP.PEAK.FILLING ) ############################################################ @@ -278,6 +266,51 @@ setClass("Param", contains = c("Versioned")) setClassUnion("ParamOrNULL", c("Param", "NULL")) +#' @aliases GenericParam +#' @title Generic parameter class +#' +#' @description The \code{GenericParam} class allows to store generic parameter +#' information such as the name of the function that was/has to be called (slot +#' \code{fun}) and its arguments (slot \code{args}). This object is used to track +#' the process history of the data processings of an \code{\link{XCMSnExp}} +#' object. This is in contrast to e.g. the \code{\link{CentWaveParam}} object +#' that is passed to the actual processing method. +#' +#' @seealso \code{\link{processHistory}} for how to access the process history +#' of an \code{\link{XCMSnExp}} object. +#' +#' @slot fun \code{character} specifying the function name. +#' @slot args \code{list} (ideally named) with the arguments to the +#' function. +#' @slot .__classVersion__ the version of the class. +#' +#' @author Johannes Rainer +#' @rdname GenericParam +#' @examples +#' prm <- GenericParam(fun = "mean") +#' +#' prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE)) +setClass("GenericParam", + slots = c(fun = "character", + args = "list"), + contains = "Param", + prototype = prototype( + fun = character(), + args = list() + ), + validity = function(object) { + msg <- character() + if (length(object@args) > 0) + if (!length(object@fun) > 0) + msg <- c(msg, paste0("No function name specified in '@fun'", + " but got '@args'")) + if (length(object@fun) > 1) + msg <- c(msg, paste0("'@fun' has to be of length 1")) + if (length(msg)) msg + else TRUE + } + ) + ##' @aliases XProcessHistory ##' @title Tracking data processing ##' @@ -451,7 +484,7 @@ NULL ##' ## faahKO package. Files are read using the readMSData2 from the MSnbase ##' ## package ##' library(faahKO) -##' library(MSnbase) +##' library(xcms) ##' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, ##' full.names = TRUE) ##' raw_data <- readMSData2(fls[1:2]) @@ -1233,7 +1266,7 @@ setClass("CentWavePredIsoParam", ##' @family peak grouping methods ##' @seealso \code{\link{group}} for the \emph{old} peak grouping methods. ##' @seealso \code{\link{featureDefinitions}} and -##' \code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +##' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping ##' results. ##' ##' @author Johannes Rainer @@ -1278,7 +1311,7 @@ NULL ##' @seealso The \code{\link{do_groupChromPeaks_density}} core ##' API function and \code{\link{group.density}} for the old user interface. ##' @seealso \code{\link{featureDefinitions}} and -##' \code{\link{groupval,XCMSnExp-method}} for methods to access the features +##' \code{\link{featureValues,XCMSnExp-method}} for methods to access the features ##' (i.e. the peak grouping results). ##' ##' @name groupChromPeaks-density @@ -1341,9 +1374,9 @@ NULL ##' ## The definition of the features (peak groups): ##' featureDefinitions(res) ##' -##' ## Using the groupval method to extract a matrix with the intensities of +##' ## Using the featureValues method to extract a matrix with the intensities of ##' ## the features per sample. -##' head(groupval(res, value = "into")) +##' head(featureValues(res, value = "into")) ##' ##' ## The process history: ##' processHistory(res) @@ -1410,7 +1443,7 @@ setClass("PeakDensityParam", ##' @seealso The \code{\link{do_groupPeaks_mzClust}} core ##' API function and \code{\link{group.mzClust}} for the old user interface. ##' @seealso \code{\link{featureDefinitions}} and -##' \code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +##' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping ##' results (i.e. the features). ##' ##' @name groupChromPeaks-mzClust @@ -1525,7 +1558,7 @@ setClass("MzClustParam", ##' @seealso The \code{\link{do_groupChromPeaks_nearest}} core ##' API function and \code{\link{group.nearest}} for the old user interface. ##' @seealso \code{\link{featureDefinitions}} and -##' \code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +##' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping ##' results (i.e. the features). ##' ##' @name groupChromPeaks-nearest @@ -1581,9 +1614,9 @@ NULL ##' ## The results from the peak grouping: ##' featureDefinitions(res) ##' -##' ## Using the groupval method to extract a matrix with the intensities of +##' ## Using the featureValues method to extract a matrix with the intensities of ##' ## the features per sample. -##' head(groupval(res, value = "into")) +##' head(featureValues(res, value = "into")) ##' ##' ## The process history: ##' processHistory(res) @@ -1993,7 +2026,36 @@ setClass("ObiwarpParam", else TRUE }) - +#' @description The \code{FillChromPeaksParam} object encapsules all settings for +#' the signal integration for missing peaks. +#' +#' @slot .__classVersion__,expandMz,expandRt,ppm See corresponding parameter above. \code{.__classVersion__} stores the version of the class. +#' +#' @rdname fillChromPeaks +setClass("FillChromPeaksParam", + slots = c(expandMz = "numeric", + expandRt = "numeric", + ppm = "numeric"), + contains = "Param", + prototype = prototype( + expandMz = 0, + expandRt = 0, + ppm = 0 + ), + validity = function(object) { + msg <- character() + if (length(object@expandMz) > 1 | any(object@expandMz < -1)) + msg <- c(msg, "'expandMz' has to be > -1 and of length 1") + if (length(object@expandRt) > 1 | any(object@expandRt < -1)) + msg <- c(msg, "'expandRt' has to be > -1 and of length 1") + if (length(object@ppm) > 1 | any(object@ppm < 0)) + msg <- c(msg, paste0("'ppm' has to be a positive", + " numeric of length 1!")) + if (length(msg)) + msg + else TRUE + } + ) ##' @aliases MsFeatureData ##' @title Data container storing xcms preprocessing results @@ -2219,22 +2281,35 @@ setClass("XCMSnExp", ##' Instances of the class can be created with the \code{Chromatogram} ##' constructor function but in most cases the dedicated methods for ##' \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} objects extracting -##' chromatograms should be used instead. -##' -##' @details The \code{mz}, \code{filterMz} are stored as a \code{numeric(2)} -##' representing a range even if the chromatogram represent the chromatogram for -##' a single ion (represented as a single mz value). Representing the \code{mz} -##' as a range allows this class also to be used for a total ion chromatogram -##' or base peak chromatogram. +##' chromatograms should be used instead (i.e. the +##' \code{\link{extractChromatograms}}). +##' +##' @details The \code{mz}, \code{filterMz}, \code{precursorMz} and \code{productMz} +##' are stored as a \code{numeric(2)} representing a range even if the +##' chromatogram was generated for only a single ion (i.e. a single mz value). +##' Using ranges for \code{mz} values allow this class to be used also for e.g. +##' total ion chromatograms or base peak chromatograms. +##' +##' The slots \code{precursorMz} and \code{productMz} allow to represent SRM +##' (single reaction monitoring) and MRM (multiple SRM) chromatograms. As example, +##' a \code{Chromatogram} for a SRM transition 273 -> 153 will have a +##' \code{@precursorMz = c(273, 273)} and a \code{@productMz = c(153, 153)}. ##' ##' @rdname Chromatogram-class +##' @export ##' @author Johannes Rainer +##' +##' @seealso \code{\link{extractChromatograms}} for the method to extract +##' \code{Chromatogram} objects from \code{\link{XCMSnExp}} or +##' \code{\link[MSnbase]{OnDiskMSnExp}} objects. setClass("Chromatogram", slots = c( rtime = "numeric", intensity = "numeric", mz = "numeric", filterMz = "numeric", + precursorMz = "numeric", ## Or call that Q1mz? + productMz = "numeric", ## Or call that Q3mz? fromFile = "integer", aggregationFun = "character" ), @@ -2244,6 +2319,8 @@ setClass("Chromatogram", intensity = numeric(), mz = c(0, 0), filterMz = c(0, 0), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), fromFile = integer(), aggregationFun = character() ), diff --git a/R/MPI.R b/R/MPI.R index f682023b7..db770866b 100644 --- a/R/MPI.R +++ b/R/MPI.R @@ -1,4 +1,3 @@ - ## ## findPeaks slave function for parallel execution ## @@ -112,15 +111,14 @@ fillPeaksChromPar <- function(arg) { } - # Expanding the peakrange - peakrange[,"mzmax"] <- peakrange[,"mzmax"] + ( (peakrange[,"mzmax"]-peakrange[,"mzmin"])/2 )*(expand.mz-1) - peakrange[,"mzmin"] <- peakrange[,"mzmin"] - ( (peakrange[,"mzmax"]-peakrange[,"mzmin"])/2 )*(expand.mz-1) - peakrange[,"rtmax"] <- peakrange[,"rtmax"] + ( (peakrange[,"rtmax"]-peakrange[,"rtmin"])/2 )*(expand.rt-1) - peakrange[,"rtmin"] <- peakrange[,"rtmin"] - ( (peakrange[,"rtmax"]-peakrange[,"rtmin"])/2 )*(expand.rt-1) - - - - + ## Expanding the peakrange + incrMz <- (peakrange[, "mzmax"] - peakrange[, "mzmin"]) / 2 * (expand.mz - 1) + peakrange[, "mzmax"] <- peakrange[, "mzmax"] + incrMz + peakrange[, "mzmin"] <- peakrange[, "mzmin"] - incrMz + incrRt <- (peakrange[, "rtmax"] - peakrange[, "rtmin"]) / 2 * (expand.rt - 1) + peakrange[, "rtmax"] <- peakrange[, "rtmax"] + incrRt + peakrange[, "rtmin"] <- peakrange[, "rtmin"] - incrRt + naidx <- which(is.na(gvals[,myID])) newpeaks <- getPeaks(lcraw, peakrange[naidx,,drop=FALSE], step = prof$step) diff --git a/R/cwTools.R b/R/cwTools.R index f6cee5722..05209d87b 100644 --- a/R/cwTools.R +++ b/R/cwTools.R @@ -423,6 +423,15 @@ fitGauss <- function(td, d, pgauss = NA) { as.data.frame(t(fit$m$getPars())) } +## ' @param +## ' @param d numeric vector with intensities of centroids within the peak. +## ' @param otd +## ' @param omz +## ' @param od +## ' @param scantime +## ' @param scan.range +## ' @param peaks +## ' @noRd joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5, mzCenterFun) { diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index 4bb108647..68a93a71a 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -117,8 +117,7 @@ do_adjustRtime_peakGroups <- function(peaks, peakIndex, rtime, lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, degree = 1, family = family)) - rtdevsmo[[i]] <- xcms:::na.flatfill(predict(lo, - data.frame(rt = rtime[[i]]))) + rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) ## Remove singularities from the loess function rtdevsmo[[i]][abs(rtdevsmo[[i]]) > quantile(abs(rtdevsmo[[i]]), 0.9) * 2] <- NA diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index f117b33bd..3d2153c81 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -117,24 +117,569 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL) { - .centWave_orig(mz = mz, int = int, scantime = scantime, - valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, - snthresh = snthresh, prefilter = prefilter, - mzCenterFun = mzCenterFun, integrate = integrate, - mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, - verboseColumns = verboseColumns, roiList = roiList, - firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales) + if (getOption("originalCentWave", default = TRUE)) { + ## message("DEBUG: using original centWave.") + .centWave_orig(mz = mz, int = int, scantime = scantime, + valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, prefilter = prefilter, + mzCenterFun = mzCenterFun, integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, + verboseColumns = verboseColumns, roiList = roiList, + firstBaselineCheck = firstBaselineCheck, + roiScales = roiScales) + } else { + ## message("DEBUG: using modified centWave.") + .centWave_new(mz = mz, int = int, scantime = scantime, + valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, prefilter = prefilter, + mzCenterFun = mzCenterFun, integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, + verboseColumns = verboseColumns, roiList = roiList, + firstBaselineCheck = firstBaselineCheck, + roiScales = roiScales) + } +} +############################################################ +## ORIGINAL code from xcms_1.49.7 +.centWave_orig <- function(mz, int, scantime, valsPerSpect, + ppm = 25, peakwidth = c(20,50), snthresh = 10, + prefilter = c(3,100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, + noise = 0, ## noise.local=TRUE, + sleep = 0, verboseColumns = FALSE, roiList = list(), + firstBaselineCheck = TRUE, roiScales = NULL) { + ## TODO @jo Ensure in upstream method that data is in centroided mode! + ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange + ## Input argument checking. + if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) + stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", + " are required!") + if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) + | length(mz) != sum(valsPerSpect)) + stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", + " have to match. Also, 'length(mz)' should be equal to", + " 'sum(valsPerSpect)'.") + scanindex <- valueCount2ScanIndex(valsPerSpect) ## Get index vector for C calls + if (!is.double(mz)) + mz <- as.double(mz) + if (!is.double(int)) + int <- as.double(int) + ## Fix the mzCenterFun + mzCenterFun <- paste("mzCenter", + gsub(mzCenterFun, pattern = "mzCenter.", + replacement = "", fixed = TRUE), sep=".") + if (!exists(mzCenterFun, mode="function")) + stop("Function '", mzCenterFun, "' not defined !") + + if (!is.logical(firstBaselineCheck)) + stop("Parameter 'firstBaselineCheck' should be logical!") + if (length(firstBaselineCheck) != 1) + stop("Parameter 'firstBaselineCheck' should be a single logical !") + if (length(roiScales) > 0) + if (length(roiScales) != length(roiList) | !is.numeric(roiScales)) + stop("If provided, parameter 'roiScales' has to be a numeric with", + " length equal to the length of 'roiList'!") + ## if (!is.null(roiScales)) { + ## if (!is.numeric(roiScales) | length(roiScales) != length(roiList)) + ## stop("Parameter 'roiScales' has to be a numeric of length equal to", + ## " parameter 'roiList'!") + ##} + + basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", + "into", "intb", "maxo", "sn") + verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale", + "scpos", "scmin", "scmax", "lmin", "lmax") + + ## Peak width: seconds to scales + scalerange <- round((peakwidth / mean(diff(scantime))) / 2) + + if (length(z <- which(scalerange == 0))) + scalerange <- scalerange[-z] + if (length(scalerange) < 1) { + warning("No scales? Please check peak width!") + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + return(invisible(nopeaks)) + } + + if (length(scalerange) > 1) + scales <- seq(from = scalerange[1], to = scalerange[2], by = 2) + else + scales <- scalerange + + minPeakWidth <- scales[1] + noiserange <- c(minPeakWidth * 3, max(scales) * 3) + maxGaussOverlap <- 0.5 + minPtsAboveBaseLine <- max(4, minPeakWidth - 2) + minCentroids <- minPtsAboveBaseLine + scRangeTol <- maxDescOutlier <- floor(minPeakWidth / 2) + scanrange <- c(1, length(scantime)) + + ## If no ROIs are supplied then search for them. + if (length(roiList) == 0) { + message("Detecting mass traces at ", ppm, " ppm ... ", appendLF = FALSE) + ## flush.console(); + ## We're including the findmzROI code in this function to reduce + ## the need to copy objects etc. + ## We could also sort the data by m/z anyway; wouldn't need that + ## much time. Once we're using classes from MSnbase we can be + ## sure that values are correctly sorted. + withRestarts( + tryCatch({ + tmp <- capture.output( + roiList <- .Call("findmzROI", + mz, int, scanindex, + as.double(c(0.0, 0.0)), + as.integer(scanrange), + as.integer(length(scantime)), + as.double(ppm * 1e-6), + as.integer(minCentroids), + as.integer(prefilter), + as.integer(noise), + PACKAGE ='xcms' ) + ) + }, + error = function(e){ + if (grepl("m/z sort assumption violated !", e$message)) { + invokeRestart("fixSort") + } else { + simpleError(e) + } + }), + fixSort = function() { + ## Force ordering of values within spectrum by mz: + ## o split values into a list -> mz per spectrum, intensity per + ## spectrum. + ## o define the ordering. + ## o re-order the mz and intensity and unlist again. + ## Note: the Rle split is faster than the "conventional" factor split. + splitF <- Rle(1:length(valsPerSpect), valsPerSpect) + mzl <- as.list(S4Vectors::split(mz, f = splitF)) + oidx <- lapply(mzl, order) + mz <<- unlist(mapply(mzl, oidx, FUN = function(y, z) { + return(y[z]) + }, SIMPLIFY = FALSE, USE.NAMES = FALSE), use.names = FALSE) + int <<- unlist(mapply(as.list(split(int, f = splitF)), oidx, + FUN=function(y, z) { + return(y[z]) + }, SIMPLIFY = FALSE, USE.NAMES = FALSE), + use.names = FALSE) + rm(mzl) + rm(splitF) + tmp <- capture.output( + roiList <<- .Call("findmzROI", + mz, int, scanindex, + as.double(c(0.0, 0.0)), + as.integer(scanrange), + as.integer(length(scantime)), + as.double(ppm * 1e-6), + as.integer(minCentroids), + as.integer(prefilter), + as.integer(noise), + PACKAGE ='xcms' ) + ) + } + ) + message("OK") + ## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise) + if (length(roiList) == 0) { + warning("No ROIs found! \n") + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + return(invisible(nopeaks)) + } + } + + ## Second stage: process the ROIs + peaklist <- list() + Nscantime <- length(scantime) + lf <- length(roiList) + + ## cat('\n Detecting chromatographic peaks ... \n % finished: ') + ## lp <- -1 + message("Detecting chromatographic peaks in ", length(roiList), + " regions of interest ...", appendLF = FALSE) + + for (f in 1:lf) { + + ## ## Show progress + ## perc <- round((f/lf) * 100) + ## if ((perc %% 10 == 0) && (perc != lp)) + ## { + ## cat(perc," ",sep=""); + ## lp <- perc; + ## } + ## flush.console() + + feat <- roiList[[f]] + N <- feat$scmax - feat$scmin + 1 + peaks <- peakinfo <- NULL + mzrange <- c(feat$mzmin, feat$mzmax) + sccenter <- feat$scmin[1] + floor(N/2) - 1 + scrange <- c(feat$scmin, feat$scmax) + ## scrange + noiserange, used for baseline detection and wavelet analysis + sr <- c(max(scanrange[1], scrange[1] - max(noiserange)), + min(scanrange[2], scrange[2] + max(noiserange))) + eic <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), + as.integer(sr), as.integer(length(scanindex)), + PACKAGE = "xcms") + ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr) + d <- eic$intensity + td <- sr[1]:sr[2] + scan.range <- c(sr[1], sr[2]) + ## original mzROI range + idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) + mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) + ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) + omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), + as.integer(scrange), as.integer(length(scantime)), + PACKAGE = 'xcms') + ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) + if (all(omz == 0)) { + warning("centWave: no peaks found in ROI.") + next + } + od <- mzROI.EIC$intensity + otd <- mzROI.EIC$scan + if (all(od == 0)) { + warning("centWave: no peaks found in ROI.") + next + } + + ## scrange + scRangeTol, used for gauss fitting and continuous + ## data above 1st baseline detection + ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)], + scrange[2] + scRangeTol) + fd <- d[match(ftd, td)] + + ## 1st type of baseline: statistic approach + if (N >= 10*minPeakWidth) { + ## in case of very long mass trace use full scan range + ## for baseline detection + noised <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), + as.integer(scanrange), as.integer(length(scanindex)), + PACKAGE="xcms")$intensity + ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity + } else { + noised <- d + } + ## 90% trimmed mean as first baseline guess + noise <- estimateChromNoise(noised, trim = 0.05, + minPts = 3 * minPeakWidth) + ## any continuous data above 1st baseline ? + if (firstBaselineCheck & + !continuousPtsAboveThreshold(fd, threshold = noise, + num = minPtsAboveBaseLine)) + next + ## 2nd baseline estimate using not-peak-range + lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime, + threshold = noise, + num = minPtsAboveBaseLine) + ## Final baseline & Noise estimate + baseline <- max(1, min(lnoise[1], noise)) + sdnoise <- max(1, lnoise[2]) + sdthr <- sdnoise * snthresh + ## is there any data above S/N * threshold ? + if (!(any(fd - baseline >= sdthr))) + next + wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh') + if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr))) + next + if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise + wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99 + localMax <- MSW.getLocalMaximumCWT(wCoefs) + rL <- MSW.getRidge(localMax) + wpeaks <- sapply(rL, + function(x) { + w <- min(1:length(x),ncol(wCoefs)) + any(wCoefs[x,w]- baseline >= sdthr) + }) + if (any(wpeaks)) { + wpeaksidx <- which(wpeaks) + ## check each peak in ridgeList + for (p in 1:length(wpeaksidx)) { + opp <- rL[[wpeaksidx[p]]] + pp <- unique(opp) + if (length(pp) >= 1) { + dv <- td[pp] %in% ftd + if (any(dv)) { ## peaks in orig. data range + ## Final S/N check + if (any(d[pp[dv]]- baseline >= sdthr)) { + ## if(!is.null(roiScales)) { + ## allow roiScales to be a numeric of length 0 + if(length(roiScales) > 0) { + ## use given scale + best.scale.nr <- which(scales == roiScales[[f]]) + if(best.scale.nr > length(opp)) + best.scale.nr <- length(opp) + } else { + ## try to decide which scale describes the peak best + inti <- numeric(length(opp)) + irange <- rep(ceiling(scales[1]/2), length(opp)) + for (k in 1:length(opp)) { + kpos <- opp[k] + r1 <- ifelse(kpos - irange[k] > 1, + kpos-irange[k], 1) + r2 <- ifelse(kpos + irange[k] < length(d), + kpos + irange[k], length(d)) + inti[k] <- sum(d[r1:r2]) + } + maxpi <- which.max(inti) + if (length(maxpi) > 1) { + m <- wCoefs[opp[maxpi], maxpi] + bestcol <- which(m == max(m), + arr.ind = TRUE)[2] + best.scale.nr <- maxpi[bestcol] + } else best.scale.nr <- maxpi + } + + best.scale <- scales[best.scale.nr] + best.scale.pos <- opp[best.scale.nr] + + pprange <- min(pp):max(pp) + ## maxint <- max(d[pprange]) + lwpos <- max(1,best.scale.pos - best.scale) + rwpos <- min(best.scale.pos + best.scale, length(td)) + p1 <- match(td[lwpos], otd)[1] + p2 <- match(td[rwpos], otd) + p2 <- p2[length(p2)] + if (is.na(p1)) p1 <- 1 + if (is.na(p2)) p2 <- N + mz.value <- omz[p1:p2] + mz.int <- od[p1:p2] + maxint <- max(mz.int) + + ## re-calculate m/z value for peak range + mzrange <- range(mz.value) + mzmean <- do.call(mzCenterFun, + list(mz = mz.value, + intensity = mz.int)) + + ## Compute dppm only if needed + dppm <- NA + if (verboseColumns) { + if (length(mz.value) >= (minCentroids + 1)) { + dppm <- round(min(running(abs(diff(mz.value)) / + (mzrange[2] * 1e-6), + fun = max, + width = minCentroids))) + } else { + dppm <- round((mzrange[2] - mzrange[1]) / + (mzrange[2] * 1e-6)) + } + } + peaks <- rbind(peaks, + c(mzmean,mzrange, ## mz + NA, NA, NA, ## rt, rtmin, rtmax, + NA, ## intensity (sum) + NA, ## intensity (-bl) + maxint, ## max intensity + round((maxint - baseline) / sdnoise), ## S/N Ratio + NA, ## Gaussian RMSE + NA,NA,NA, ## Gaussian Parameters + f, ## ROI Position + dppm, ## max. difference between the [minCentroids] peaks in ppm + best.scale, ## Scale + td[best.scale.pos], + td[lwpos], + td[rwpos], ## Peak positions guessed from the wavelet's (scan nr) + NA, NA)) ## Peak limits (scan nr) + peakinfo <- rbind(peakinfo, + c(best.scale, best.scale.nr, + best.scale.pos, lwpos, rwpos)) + ## Peak positions guessed from the wavelet's + } + } + } + } ##for + } ## if + + ## postprocessing + if (!is.null(peaks)) { + colnames(peaks) <- c(basenames, verbosenames) + colnames(peakinfo) <- c("scale", "scaleNr", "scpos", + "scmin", "scmax") + for (p in 1:dim(peaks)[1]) { + ## find minima, assign rt and intensity values + if (integrate == 1) { + lm <- descendMin(wCoefs[, peakinfo[p,"scaleNr"]], + istart = peakinfo[p,"scpos"]) + gap <- all(d[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak + if ((lm[1] == lm[2]) || gap )## fall-back + lm <- descendMinTol(d, + startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), + maxDescOutlier) + } else { + lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), + maxDescOutlier) + } + ## narrow down peak rt boundaries by skipping zeros + pd <- d[lm[1]:lm[2]] + np <- length(pd) + lm.l <- findEqualGreaterUnsorted(pd, 1) + lm.l <- max(1, lm.l - 1) + lm.r <- findEqualGreaterUnsorted(rev(pd), 1) + lm.r <- max(1, lm.r - 1) + lm <- lm + c(lm.l - 1, -(lm.r - 1) ) + + peakrange <- td[lm] + peaks[p, "rtmin"] <- scantime[peakrange[1]] + peaks[p, "rtmax"] <- scantime[peakrange[2]] + peaks[p, "maxo"] <- max(d[lm[1]:lm[2]]) + pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / + (peakrange[2] - peakrange[1]) + if (is.na(pwid)) + pwid <- 1 + peaks[p, "into"] <- pwid * sum(d[lm[1]:lm[2]]) + db <- d[lm[1]:lm[2]] - baseline + peaks[p, "intb"] <- pwid * sum(db[db>0]) + peaks[p, "lmin"] <- lm[1] + peaks[p, "lmax"] <- lm[2] + + if (fitgauss) { + ## perform gaussian fits, use wavelets for inital parameters + md <- max(d[lm[1]:lm[2]]) + d1 <- d[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. + pgauss <- fitGauss(td[lm[1]:lm[2]], d[lm[1]:lm[2]], + pgauss = list(mu = peaks[p, "scpos"], + sigma = peaks[p, "scmax"] - + peaks[p, "scmin"], + h = peaks[p, "maxo"])) + rtime <- peaks[p, "scpos"] + if (!any(is.na(pgauss)) && all(pgauss > 0)) { + gtime <- td[match(round(pgauss$mu), td)] + if (!is.na(gtime)) { + rtime <- gtime + peaks[p, "mu"] <- pgauss$mu + peaks[p, "sigma"] <- pgauss$sigma + peaks[p, "h"] <- pgauss$h + peaks[p,"egauss"] <- sqrt((1 / length(td[lm[1]:lm[2]])) * + sum(((d1-gauss(td[lm[1]:lm[2]], + pgauss$h / md, + pgauss$mu, + pgauss$sigma))^2))) + } + } + peaks[p, "rt"] <- scantime[rtime] + ## avoid fitting side effects + if (peaks[p, "rt"] < peaks[p, "rtmin"]) + peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] + } else + peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] + } + peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime, + scan.range, peaks, maxGaussOverlap, + mzCenterFun = mzCenterFun) + } + + ## if ((sleep >0) && (!is.null(peaks))) { + ## tdp <- scantime[td]; trange <- range(tdp) + ## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") + ## cdppm <- paste(peaks[,"dppm"],collapse=", ") + ## csn <- paste(peaks[,"sn"],collapse=", ") + ## par(bg = "white") + ## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); + ## par(mar= c(2, 4, 4, 2) + 0.1) + ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') + ## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) + ## par(mar= c(1, 4, 1, 2) + 0.1) + ## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') + ## par(mar= c(4, 4, 1, 2) + 0.1) + ## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) + ## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range + ## abline(h=baseline,col='green') + ## bwh <- length(sr[1]:sr[2]) - length(baseline) + ## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} + ## if (any(!is.na(peaks[,"scpos"]))) + ## { ## plot centers and width found through wavelet analysis + ## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') + ## } + ## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) + ## if (fitgauss) { + ## tdx <- seq(min(td),max(td),length.out=200) + ## tdxp <- seq(trange[1],trange[2],length.out=200) + ## fitted.peaks <- which(!is.na(peaks[,"mu"])) + ## for (p in fitted.peaks) + ## { ## plot gaussian fits + ## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) + ## lines(tdxp,yg,col='blue') + ## } + ## } + ## Sys.sleep(sleep) + ## } + + if (!is.null(peaks)) { + peaklist[[length(peaklist) + 1]] <- peaks + } + } ## f + + if (length(peaklist) == 0) { + warning("No peaks found!") + + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + message(" FAIL: none found!") + return(nopeaks) + } + p <- do.call(rbind, peaklist) + if (!verboseColumns) + p <- p[, basenames, drop = FALSE] + + uorder <- order(p[, "into"], decreasing = TRUE) + pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE]) + uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks + pr <- p[uindex, , drop = FALSE] + message(" OK: ", nrow(pr), " found.") + + return(pr) } -############################################################ -## ORIGINAL code from xcms_1.49.7 -.centWave_orig <- function(mz, int, scantime, valsPerSpect, - ppm = 25, peakwidth = c(20,50), snthresh = 10, - prefilter = c(3,100), mzCenterFun = "wMean", - integrate = 1, mzdiff = -0.001, fitgauss = FALSE, - noise = 0, ## noise.local=TRUE, - sleep = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = NULL) { +## This version fixes issue #135, i.e. that the peak signal is integrated based +## on the mzrange of the ROI and not of the actually reported peak. +## Issue #136. +## +## What's different to the original version? +## +## 1) The mz range of the peaks is calculated only using mz values with a +## measured intensity. This avoids mz ranges from 0 to max mz of the peak, +## with the mz=0 corresponding actually to scans in which no intensity was +## measured. Search for "@MOD1" to jump to the respective code. +## +## 2) The intensities for the peak are reloaded with the refined mz range during +## the postprocessing. Search for "@MOD2" to jump to the respective code. +## +## What I don't like: +## o Might be better if the getEIC and getMZ C functions returned NA instead of 0 +## if nothing was measured. +## o The joinOverlappingPeaks is still calculated using the variable "d" which +## contains all intensities from the ROI - might actually not be too bad +## though. +.centWave_new <- function(mz, int, scantime, valsPerSpect, + ppm = 25, peakwidth = c(20,50), snthresh = 10, + prefilter = c(3,100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, + noise = 0, ## noise.local=TRUE, + sleep = 0, verboseColumns = FALSE, roiList = list(), + firstBaselineCheck = TRUE, roiScales = NULL) { ## TODO @jo Ensure in upstream method that data is in centroided mode! ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange ## Input argument checking. @@ -166,11 +711,6 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, if (length(roiScales) != length(roiList) | !is.numeric(roiScales)) stop("If provided, parameter 'roiScales' has to be a numeric with", " length equal to the length of 'roiList'!") - ## if (!is.null(roiScales)) { - ## if (!is.numeric(roiScales) | length(roiScales) != length(roiList)) - ## stop("Parameter 'roiScales' has to be a numeric of length equal to", - ## " parameter 'roiList'!") - ##} basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn") @@ -274,7 +814,6 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, } ) message("OK") - ## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise) if (length(roiList) == 0) { warning("No ROIs found! \n") if (verboseColumns) { @@ -301,19 +840,12 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, for (f in 1:lf) { - ## ## Show progress - ## perc <- round((f/lf) * 100) - ## if ((perc %% 10 == 0) && (perc != lp)) - ## { - ## cat(perc," ",sep=""); - ## lp <- perc; - ## } - ## flush.console() - + ## cat("\nProcess roi ", f, "\n") feat <- roiList[[f]] N <- feat$scmax - feat$scmin + 1 peaks <- peakinfo <- NULL mzrange <- c(feat$mzmin, feat$mzmax) + mzrange_ROI <- mzrange sccenter <- feat$scmin[1] + floor(N/2) - 1 scrange <- c(feat$scmin, feat$scmax) ## scrange + noiserange, used for baseline detection and wavelet analysis @@ -322,18 +854,15 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, eic <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), as.integer(sr), as.integer(length(scanindex)), PACKAGE = "xcms") - ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr) d <- eic$intensity td <- sr[1]:sr[2] scan.range <- c(sr[1], sr[2]) ## original mzROI range idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) - ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), as.integer(scrange), as.integer(length(scantime)), PACKAGE = 'xcms') - ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) if (all(omz == 0)) { warning("centWave: no peaks found in ROI.") next @@ -344,7 +873,6 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, warning("centWave: no peaks found in ROI.") next } - ## scrange + scRangeTol, used for gauss fitting and continuous ## data above 1st baseline detection ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)], @@ -358,7 +886,6 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, noised <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), as.integer(scanrange), as.integer(length(scanindex)), PACKAGE="xcms")$intensity - ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity } else { noised <- d } @@ -442,14 +969,29 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, p1 <- match(td[lwpos], otd)[1] p2 <- match(td[rwpos], otd) p2 <- p2[length(p2)] + ## cat("p1: ", p1, " p2: ", p2, "\n") if (is.na(p1)) p1 <- 1 if (is.na(p2)) p2 <- N mz.value <- omz[p1:p2] + ## cat("mz.value: ", paste0(mz.value, collapse = ", "), + ## "\n") mz.int <- od[p1:p2] maxint <- max(mz.int) + ## @MOD1: Remove mz values for which no intensity was + ## measured. Would be better if getEIC returned NA + ## if nothing was measured. + mz.value <- mz.value[mz.int > 0] + mz.int <- mz.int[mz.int > 0] + ## cat("mz.value: ", paste0(mz.value, collapse = ", "), + ## "\n") ## re-calculate m/z value for peak range + ## cat("mzrange refined: [", + ## paste0(mzrange, collapse = ", "), "]") + ## hm, shouldn't we get rid of the mz = 0 here? mzrange <- range(mz.value) + ## cat(" -> [", + ## paste0(mzrange, collapse = ", "), "]\n") mzmean <- do.call(mzCenterFun, list(mz = mz.value, intensity = mz.int)) @@ -499,23 +1041,41 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, colnames(peakinfo) <- c("scale", "scaleNr", "scpos", "scmin", "scmax") for (p in 1:dim(peaks)[1]) { + ## @MOD2 + ## Fix for issue #135: reload the EIC data if the + ## mzrange differs from that of the ROI, but only if the mz + ## range of the peak is different from the one of the ROI. + mzr <- peaks[p, c("mzmin", "mzmax")] + if (any(mzr != mzrange_ROI)) { + eic <- .Call("getEIC", mz, int, scanindex, + as.double(mzr), as.integer(sr), + as.integer(length(scanindex)), + PACKAGE = "xcms") + current_ints <- eic$intensity + ## Force re-loading also of a potential additional peak in + ## the same ROI. + mzrange_ROI <- c(0, 0) + } else { + current_ints <- d + } ## find minima, assign rt and intensity values if (integrate == 1) { lm <- descendMin(wCoefs[, peakinfo[p,"scaleNr"]], istart = peakinfo[p,"scpos"]) - gap <- all(d[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak + gap <- all(current_ints[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak if ((lm[1] == lm[2]) || gap )## fall-back - lm <- descendMinTol(d, + lm <- descendMinTol(current_ints, startpos = c(peakinfo[p, "scmin"], peakinfo[p, "scmax"]), maxDescOutlier) } else { - lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"], - peakinfo[p, "scmax"]), + lm <- descendMinTol(current_ints, + startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), maxDescOutlier) } ## narrow down peak rt boundaries by skipping zeros - pd <- d[lm[1]:lm[2]] + pd <- current_ints[lm[1]:lm[2]] np <- length(pd) lm.l <- findEqualGreaterUnsorted(pd, 1) lm.l <- max(1, lm.l - 1) @@ -526,22 +1086,24 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, peakrange <- td[lm] peaks[p, "rtmin"] <- scantime[peakrange[1]] peaks[p, "rtmax"] <- scantime[peakrange[2]] - peaks[p, "maxo"] <- max(d[lm[1]:lm[2]]) + peaks[p, "maxo"] <- max(current_ints[lm[1]:lm[2]]) pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / (peakrange[2] - peakrange[1]) if (is.na(pwid)) pwid <- 1 - peaks[p, "into"] <- pwid * sum(d[lm[1]:lm[2]]) - db <- d[lm[1]:lm[2]] - baseline + peaks[p, "into"] <- pwid * sum(current_ints[lm[1]:lm[2]]) + db <- current_ints[lm[1]:lm[2]] - baseline peaks[p, "intb"] <- pwid * sum(db[db>0]) peaks[p, "lmin"] <- lm[1] peaks[p, "lmax"] <- lm[2] + ## cat("[", paste0(peaks[p, c("rtmin", "rtmax")], collapse = ", "), + ## "] into ", peaks[p, "into"], "\n") if (fitgauss) { ## perform gaussian fits, use wavelets for inital parameters - md <- max(d[lm[1]:lm[2]]) - d1 <- d[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. - pgauss <- fitGauss(td[lm[1]:lm[2]], d[lm[1]:lm[2]], + md <- max(current_ints[lm[1]:lm[2]]) + d1 <- current_ints[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. + pgauss <- fitGauss(td[lm[1]:lm[2]], current_ints[lm[1]:lm[2]], pgauss = list(mu = peaks[p, "scpos"], sigma = peaks[p, "scmax"] - peaks[p, "scmin"], @@ -568,47 +1130,12 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, } else peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] } - peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime, - scan.range, peaks, maxGaussOverlap, + ## Use d here instead of current_ints + peaks <- joinOverlappingPeaks(td, d, otd, omz, od, + scantime, scan.range, peaks, + maxGaussOverlap, mzCenterFun = mzCenterFun) } - - ## if ((sleep >0) && (!is.null(peaks))) { - ## tdp <- scantime[td]; trange <- range(tdp) - ## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") - ## cdppm <- paste(peaks[,"dppm"],collapse=", ") - ## csn <- paste(peaks[,"sn"],collapse=", ") - ## par(bg = "white") - ## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); - ## par(mar= c(2, 4, 4, 2) + 0.1) - ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') - ## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) - ## par(mar= c(1, 4, 1, 2) + 0.1) - ## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') - ## par(mar= c(4, 4, 1, 2) + 0.1) - ## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) - ## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range - ## abline(h=baseline,col='green') - ## bwh <- length(sr[1]:sr[2]) - length(baseline) - ## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} - ## if (any(!is.na(peaks[,"scpos"]))) - ## { ## plot centers and width found through wavelet analysis - ## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') - ## } - ## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) - ## if (fitgauss) { - ## tdx <- seq(min(td),max(td),length.out=200) - ## tdxp <- seq(trange[1],trange[2],length.out=200) - ## fitted.peaks <- which(!is.na(peaks[,"mu"])) - ## for (p in fitted.peaks) - ## { ## plot gaussian fits - ## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) - ## lines(tdxp,yg,col='blue') - ## } - ## } - ## Sys.sleep(sleep) - ## } - if (!is.null(peaks)) { peaklist[[length(peaklist) + 1]] <- peaks } @@ -629,10 +1156,12 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, return(nopeaks) } + ## cat("length peaklist: ", length(peaklist), "\n") p <- do.call(rbind, peaklist) if (!verboseColumns) p <- p[, basenames, drop = FALSE] - + return(p) + uorder <- order(p[, "into"], decreasing = TRUE) pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE]) uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks @@ -1134,389 +1663,6 @@ do_findChromPeaks_matchedFilter <- function(mz, return(rmat) } -## ############################################################ -## ## Same as do_findChromPeaks_matchedFilter except: -## ## -## ## o Using the binYtoX and imputeLinInterpol instead of the -## ## profBin* methods. -## ## THIS IS MATTER OF REMOVAL -## .matchedFilter_binYonX_iter <- function(mz, -## int, -## scantime, -## valsPerSpect, -## binSize = 0.1, -## impute = "none", -## baseValue, -## distance, -## fwhm = 30, -## sigma = fwhm/2.3548, -## max = 5, -## snthresh = 10, -## steps = 2, -## mzdiff = 0.8 - binSize * steps, -## index = FALSE -## ){ - -## ## Input argument checking. -## if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) -## stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", -## " are required!") -## if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) -## | length(mz) != sum(valsPerSpect)) -## stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", -## " have to match. Also, 'length(mz)' should be equal to", -## " 'sum(valsPerSpect)'.") -## ## Get the profile/binning function: allowed: bin, lin, linbase and intlin -## impute <- match.arg(impute, c("none", "lin", "linbase", "intlin")) -## if (impute == "intlin") -## stop("Not yet implemented!") -## toIdx <- cumsum(valsPerSpect) -## fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) - -## ## Create EIC buffer -## mrange <- range(mz) -## mass <- seq(floor(mrange[1]/binSize)*binSize, -## ceiling(mrange[2]/binSize)*binSize, by = binSize) -## bufsize <- min(100, length(mass)) - -## ## Calculate the breaks; we will re-use these in all calls. -## ## Calculate breaks and "correct" binSize; using seq ensures we're closer -## ## to the xcms profBin* results. -## binFromX <- min(mass) -## binToX <- max(mass) -## bin_size <- (binToX - binFromX) / (length(mass) - 1) -## brks <- seq(binFromX - bin_size/2, binToX + bin_size/2, by = bin_size) - -## ## Problem with sequential binning is that we don't want to have the condition -## ## <= last_break in each iteration since this would cause some values being -## ## considered for multiple bins. Thus we add an additional last bin, for which -## ## we however want to get rid of later. -## binRes <- binYonX(mz, int, -## breaks = brks[1:(bufsize+2)], -## fromIdx = fromIdx, -## toIdx = toIdx, -## baseValue = ifelse(impute == "none", yes = 0, no = NA), -## sortedX = TRUE, -## returnIndex = TRUE -## ) -## if (length(toIdx) == 1) -## binRes <- list(binRes) -## ## Remove the last bin value unless bufsize + 2 is equal to the length of brks -## if (length(brks) > (bufsize + 2)) { -## binRes <- lapply(binRes, function(z) { -## len <- length(z$x) -## return(list(x = z$x[-len], y = z$y[-len], index = z$index[-len])) -## }) -## } -## bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index))) -## bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1] -## if (missing(baseValue)) -## baseValue <- numeric() -## if (length(baseValue) == 0) -## baseValue <- min(int, na.rm = TRUE) / 2 -## if (missing(distance)) -## distance <- numeric() -## if (length(distance) == 0) -## distance <- floor(0.075 / bin_size) -## binVals <- lapply(binRes, function(z) { -## return(imputeLinInterpol(z$y, method = impute, -## noInterpolAtEnds = TRUE, -## distance = distance, -## baseValue = baseValue)) -## }) -## buf <- do.call(cbind, binVals) - -## bufidx <- integer(length(mass)) -## idxrange <- c(1, bufsize) -## bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize -## lookahead <- steps-1 -## lookbehind <- 1 - -## N <- nextn(length(scantime)) -## xrange <- range(scantime) -## x <- c(0:(N/2), -(ceiling(N/2-1)):-1)*(xrange[2]-xrange[1])/(length(scantime)-1) - -## filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi))*exp(-x^2/(2*sigma^2)), "x")), "hessian") -## filt <- filt/sqrt(sum(filt^2)) -## filt <- fft(filt, inverse = TRUE)/length(filt) - -## cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf", -## "maxo", "maxf", "i", "sn") -## rmat <- matrix(nrow = 2048, ncol = length(cnames)) -## num <- 0 - -## for (i in seq(length = length(mass)-steps+1)) { -## ## Update EIC buffer if necessary -## if (bufidx[i+lookahead] == 0) { -## bufidx[idxrange[1]:idxrange[2]] <- 0 -## idxrange <- c(max(1, i - lookbehind), min(bufsize+i-1-lookbehind, -## length(mass))) -## bufidx[idxrange[1]:idxrange[2]] <- 1:(diff(idxrange)+1) -## ## Avoid the problem reported above for the sequential buffering: -## ## add an additional bin for which we remove the value afterwards. -## additionalBin <- 0 -## if ((idxrange[2] + 1) < length(brks)) { -## additionalBin <- 1 -## } -## ## Re-fill buffer. -## binRes <- binYonX(mz, int, -## breaks = brks[idxrange[1]:(idxrange[2] + 1 + -## additionalBin)], -## fromIdx = fromIdx, -## toIdx = toIdx, -## baseValue = ifelse(impute == "none", yes = 0, -## no = NA), -## sortedX = TRUE, -## returnIndex = TRUE -## ) -## if (length(toIdx) == 1) -## binRes <- list(binRes) -## if (additionalBin == 1) { -## binRes <- lapply(binRes, function(z) { -## len <- length(z$x) -## return(list(x = z$x[-len], y = z$y[-len], -## index = z$index[-len])) -## }) -## } -## bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index))) -## binVals <- lapply(binRes, function(z) { -## return(imputeLinInterpol(z$y, method = impute, -## noInterpolAtEnds = TRUE, -## distance = distance, -## baseValue = baseValue)) -## }) -## buf <- do.call(cbind, binVals) -## } -## ymat <- buf[bufidx[i:(i+steps-1)],,drop=FALSE] -## ysums <- colMax(ymat) -## yfilt <- filtfft(ysums, filt) -## gmax <- max(yfilt) -## for (j in seq(length = max)) { -## maxy <- which.max(yfilt) -## noise <- mean(ysums[ysums > 0]) -## ##noise <- mean(yfilt[yfilt >= 0]) -## sn <- yfilt[maxy]/noise -## if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) { -## peakrange <- descendZero(yfilt, maxy) -## intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE] -## mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)], -## peakrange[1]:peakrange[2]]], -## nrow = steps) -## which.intMax <- which.colMax(intmat) -## mzmat <- mzmat[which.intMax] -## if (all(is.na(mzmat))) { -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## next -## } -## mzrange <- range(mzmat, na.rm = TRUE) -## massmean <- weighted.mean(mzmat, intmat[which.intMax], na.rm = TRUE) -## ## This case (the only non-na m/z had intensity 0) was reported -## ## by Gregory Alan Barding "binlin processing" -## if(any(is.na(massmean))) { -## massmean <- mean(mzmat, na.rm = TRUE) -## } - -## pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / -## (peakrange[2] - peakrange[1]) -## into <- pwid*sum(ysums[peakrange[1]:peakrange[2]]) -## intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) -## maxo <- max(ysums[peakrange[1]:peakrange[2]]) -## maxf <- yfilt[maxy] -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## num <- num + 1 -## ## Double the size of the output matrix if it's full -## if (num > nrow(rmat)) { -## nrmat <- matrix(nrow = 2*nrow(rmat), ncol = ncol(rmat)) -## nrmat[seq(length = nrow(rmat)),] = rmat -## rmat <- nrmat -## } -## rmat[num,] <- c(massmean, mzrange[1], mzrange[2], maxy, peakrange, -## into, intf, maxo, maxf, j, sn) -## } else -## break -## } -## } -## colnames(rmat) <- cnames -## rmat <- rmat[seq(length = num),] -## max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize) -## if (index) -## mzdiff <- mzdiff/binSize -## else { -## rmat[,"rt"] <- scantime[rmat[,"rt"]] -## rmat[,"rtmin"] <- scantime[rmat[,"rtmin"]] -## rmat[,"rtmax"] <- scantime[rmat[,"rtmax"]] -## } -## ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak and report that. -## uorder <- order(rmat[,"into"], decreasing=TRUE) -## uindex <- rectUnique(rmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE], -## uorder, mzdiff) -## rmat <- rmat[uindex,,drop=FALSE] -## return(rmat) -## } - - -## ############################################################ -## ## The code of this function is basically the same than of the original -## ## findPeaks.matchedFilter method in xcms with the following differences: -## ## o Create the full 'profile matrix' (i.e. the m/z binned matrix) once -## ## instead of repeatedly creating a "buffer" of 100 m/z values. -## ## o Append the identified peaks to a list instead of generating a matrix -## ## with a fixed set of rows which is doubled in its size each time more -## ## peaks are identified than there are rows in the matrix. -## .matchedFilter_no_iter <- function(mz, -## int, -## scantime, -## valsPerSpect, -## binSize = 0.1, -## impute = "none", -## baseValue, -## distance, -## fwhm = 30, -## sigma = fwhm/2.3548, -## max = 5, -## snthresh = 10, -## steps = 2, -## mzdiff = 0.8 - binSize * steps, -## index = FALSE -## ){ -## ## Map arguments to findPeaks.matchedFilter arguments. -## step <- binSize -## profMeths <- c("profBinM", "profBinLinM", "profBinLinBaseM", "profIntLinM") -## names(profMeths) <- c("none", "lin", "linbase", "intlin") -## impute <- match.arg(impute, names(profMeths)) -## profFun <- profMeths[impute] - -## ## Input argument checking. -## if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) -## stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", -## " are required!") -## if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) -## | length(mz) != sum(valsPerSpect)) -## stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", -## " have to match. Also, 'length(mz)' should be equal to", -## " 'sum(valsPerSpect)'.") -## ## Calculate a the "scanindex" from the number of values per spectrum: -## scanindex <- valueCount2ScanIndex(valsPerSpect) - -## ## Create the full profile matrix. -## mrange <- range(mz) -## mass <- seq(floor(mrange[1]/step)*step, ceiling(mrange[2]/step)*step, by = step) -## ## Calculate the /real/ bin size (as in xcms.c code). -## bin_size <- (max(mass) - min(mass)) / (length(mass) - 1) -## ## bufsize <- min(100, length(mass)) -## bufsize <- length(mass) -## ## Define profparam: -## profp <- list() -## if (missing(baseValue)) -## baseValue <- numeric() -## if (length(baseValue) != 0) -## profp$baselevel <- baseValue -## if (missing(distance)) -## distance <- numeric() -## if (length(distance) != 0) -## profp$basespace <- distance * bin_size -## ## This returns a matrix, ncol equals the number of spectra, nrow the bufsize. -## buf <- do.call(profFun, args = list(mz, int, scanindex, bufsize, mass[1], -## mass[bufsize], TRUE, profp)) - -## ## The full matrix, nrow is the total number of (binned) m/z values. -## bufMax <- profMaxIdxM(mz, int, scanindex, bufsize, mass[1], mass[bufsize], -## TRUE, profp) -## ## bufidx <- integer(length(mass)) -## ## idxrange <- c(1, bufsize) -## ## bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize -## bufidx <- 1L:length(mass) -## lookahead <- steps-1 -## lookbehind <- 1 - -## N <- nextn(length(scantime)) -## xrange <- range(scantime) -## x <- c(0:(N/2), -(ceiling(N/2-1)):-1)*(xrange[2]-xrange[1])/(length(scantime)-1) - -## filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi))*exp(-x^2/(2*sigma^2)), "x")), "hessian") -## filt <- filt/sqrt(sum(filt^2)) -## filt <- fft(filt, inverse = TRUE)/length(filt) - -## cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf", -## "maxo", "maxf", "i", "sn") -## num <- 0 - -## ResList <- list() - -## ## Can not do much here, lapply/apply won't work because of the 'steps' parameter. -## ## That's looping through the masses, i.e. rows of the profile matrix. -## for (i in seq(length = (length(mass)-steps+1))) { - -## ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE] -## ysums <- colMax(ymat) -## yfilt <- filtfft(ysums, filt) -## gmax <- max(yfilt) -## for (j in seq(length = max)) { -## maxy <- which.max(yfilt) -## noise <- mean(ysums[ysums > 0]) -## ##noise <- mean(yfilt[yfilt >= 0]) -## sn <- yfilt[maxy]/noise -## if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) { -## peakrange <- descendZero(yfilt, maxy) -## intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE] -## mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)], -## peakrange[1]:peakrange[2]]], -## nrow = steps) -## which.intMax <- which.colMax(intmat) -## mzmat <- mzmat[which.intMax] -## if (all(is.na(mzmat))) { -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## next -## } -## mzrange <- range(mzmat, na.rm = TRUE) -## massmean <- weighted.mean(mzmat, intmat[which.intMax], na.rm = TRUE) -## ## This case (the only non-na m/z had intensity 0) was reported -## ## by Gregory Alan Barding "binlin processing" -## if(any(is.na(massmean))) { -## massmean <- mean(mzmat, na.rm = TRUE) -## } - -## pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / -## (peakrange[2] - peakrange[1]) -## into <- pwid*sum(ysums[peakrange[1]:peakrange[2]]) -## intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) -## maxo <- max(ysums[peakrange[1]:peakrange[2]]) -## maxf <- yfilt[maxy] -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## num <- num + 1 -## ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy, peakrange, -## into, intf, maxo, maxf, j, sn) -## } else -## break -## } -## } -## if (length(ResList) == 0) { -## rmat <- matrix(nrow = 0, ncol = length(cnames)) -## colnames(rmat) <- cnames -## return(rmat) -## } -## rmat <- do.call(rbind, ResList) -## if (is.null(dim(rmat))) { -## rmat <- matrix(rmat, nrow = 1) -## } -## colnames(rmat) <- cnames -## max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/step) -## if (index) -## mzdiff <- mzdiff/step -## else { -## rmat[, "rt"] <- scantime[rmat[, "rt"]] -## rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]] -## rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]] -## } -## ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak and report that. -## uorder <- order(rmat[, "into"], decreasing = TRUE) -## uindex <- rectUnique(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"), -## drop = FALSE], -## uorder, mzdiff) -## rmat <- rmat[uindex,,drop = FALSE] -## return(rmat) -## } - ############################################################ ## The code of this function is basically the same than of the original ## findPeaks.matchedFilter method in xcms with the following differences: @@ -2498,7 +2644,7 @@ do_findChromPeaks_addPredIsoROIs <- "valid signal found!") return(peaks.) } - + ## 3) centWave using the identified ROIs. roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) feats_2 <- do_findChromPeaks_centWave(mz = mz, int = int, @@ -2522,13 +2668,171 @@ do_findChromPeaks_addPredIsoROIs <- "rtmin", "rtmax")])) if (any(any_na)) feats_2 <- feats_2[!any_na, , drop = FALSE] + no_mz_width <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 + no_rt_width <- (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 ## remove empty area - no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || - (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + ## no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || + ## (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + no_area <- no_mz_width || no_rt_width if (any(no_area)) feats_2 <- feats_2[!no_area, , drop = FALSE] } + ## 4) Check and remove ROIs overlapping with peaks. + if (nrow(feats_2) > 0) { + ## Comparing each ROI with each peak; slightly modified from the original + ## code in which we prevent calling apply followed by two lapply. + removeROIs <- rep(FALSE, nrow(feats_2)) + removeFeats <- rep(FALSE, nrow(peaks.)) + overlapProportionThreshold <- 0.01 + for (i in 1:nrow(feats_2)) { + ## Compare ROI i with all peaks (peaks) and check if its + ## overlapping + ## mz + roiMzCenter <- (feats_2[i, "mzmin"] + feats_2[i, "mzmax"]) / 2 + peakMzCenter <- (peaks.[, "mzmin"] + peaks.[, "mzmax"]) / 2 + roiMzRadius <- (feats_2[i, "mzmax"] - feats_2[i, "mzmin"]) / 2 + peakMzRadius <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) / 2 + overlappingMz <- abs(peakMzCenter - roiMzCenter) <= + (roiMzRadius + peakMzRadius) + ## rt + roiRtCenter <- (feats_2[i, "rtmin"] + feats_2[i, "rtmax"]) / 2 + peakRtCenter <- (peaks.[, "rtmin"] + peaks.[, "rtmax"]) / 2 + roiRtRadius <- (feats_2[i, "rtmax"] - feats_2[i, "rtmin"]) / 2 + peakRtRadius <- (peaks.[, "rtmax"] - peaks.[, "rtmin"]) / 2 + overlappingRt <- abs(peakRtCenter - roiRtCenter) <= + (roiRtRadius + peakRtRadius) + is_overlapping <- overlappingMz & overlappingRt + ## Now determine whether we remove the ROI or the peak, depending + ## on the raw signal intensity. + if (any(is_overlapping)) { + if (any(peaks.[is_overlapping, "into"] > feats_2[i, "into"])) { + removeROIs[i] <- TRUE + } else { + removeFeats[is_overlapping] <- TRUE + } + } + } + feats_2 <- feats_2[!removeROIs, , drop = FALSE] + peaks. <- peaks.[!removeFeats, , drop = FALSE] + } + if (!verboseColumns) + peaks. <- peaks.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", + "rtmax", "into", "intb", "maxo", "sn")] + if (nrow(feats_2) == 0) + return(peaks.) + else + return(rbind(peaks., feats_2)) +} + +do_findChromPeaks_addPredIsoROIs_mod <- + function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), + snthresh = 6.25, prefilter = c(3, 100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, + verboseColumns = FALSE, peaks. = NULL, + maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, + polarity = "unknown") { + ## Input argument checking: most of it will be done in + ## do_findChromPeaks_centWave + polarity <- match.arg(polarity, c("positive", "negative", "unknown")) + + ## These variables might at some point be added as function args. + addNewIsotopeROIs <- TRUE + addNewAdductROIs <- FALSE + ## 2) predict isotope and/or adduct ROIs + f_mod <- peaks. + ## Extend the mzmin and mzmax if needed. + tittle <- peaks.[, "mz"] * (ppm / 2) / 1E6 + expand_mz <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) < (tittle * 2) + if (any(expand_mz)) { + f_mod[expand_mz, "mzmin"] <- peaks.[expand_mz, "mz"] - + tittle[expand_mz] + f_mod[expand_mz, "mzmax"] <- peaks.[expand_mz, "mz"] + tittle[expand_mz] + } + ## Add predicted ROIs + if (addNewIsotopeROIs) { + iso_ROIs <- do_define_isotopes(peaks. = f_mod, + maxCharge = maxCharge, + maxIso = maxIso, + mzIntervalExtension = mzIntervalExtension) + } else { + iso_ROIs <- matrix(nrow = 0, ncol = 8) + colnames(iso_ROIs) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", + "length", "intensity", "scale") + } + if (addNewAdductROIs) { + add_ROIs <- do_define_adducts(peaks. = f_mod, polarity = polarity) + } else { + add_ROIs <- matrix(nrow = 0, ncol = 8) + colnames(iso_ROIs) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", + "length", "intensity", "scale") + } + newROIs <- rbind(iso_ROIs, add_ROIs) + rm(f_mod) + if (nrow(newROIs) == 0) + return(peaks.) + ## Remove ROIs that are out of mz range: + mz_range <- range(mz) + newROIs <- newROIs[newROIs[, "mzmin"] >= mz_range[1] & + newROIs[, "mzmax"] <= mz_range[2], , drop = FALSE] + ## Remove ROIs with too low signal: + keep_me <- logical(nrow(newROIs)) + scanindex <- as.integer(valueCount2ScanIndex(valsPerSpect)) + for (i in 1:nrow(newROIs)) { + vals <- .Call("getEIC", mz, int, scanindex, + as.double(newROIs[i, c("mzmin", "mzmax")]), + as.integer(newROIs[i, c("scmin", "scmax")]), + as.integer(length(scantime)), PACKAGE ='xcms' ) + keep_me[i] <- sum(vals$intensity, na.rm = TRUE) >= 10 + } + newROIs <- newROIs[keep_me, , drop = FALSE] + + if (nrow(newROIs) == 0) { + warning("No isotope or adduct ROIs for the identified peaks with a ", + "valid signal found!") + return(peaks.) + } + cat("No. of input peaks: ", nrow(peaks.), "\n") + + ## 3) centWave using the identified ROIs. + roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) + cat("Identified iso ROIs: ", length(roiL), "\n") + feats_2 <- do_findChromPeaks_centWave(mz = mz, int = int, + scantime = scantime, + valsPerSpect = valsPerSpect, + ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, + prefilter = prefilter, + mzCenterFun = mzCenterFun, + integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, + noise = noise, + verboseColumns = verboseColumns, + roiList = roiL, + firstBaselineCheck = FALSE, + roiScales = newROIs[, "scale"]) + cat("No. of chrom. peaks found in ROIs: ", nrow(feats_2), "\n") + ## Clean up of the results: + if (nrow(feats_2) > 0) { + ## remove NaNs + any_na <- is.na(rowSums(feats_2[, c("mz", "mzmin", "mzmax", "rt", + "rtmin", "rtmax")])) + if (any(any_na)) + feats_2 <- feats_2[!any_na, , drop = FALSE] + no_mz_width <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 + no_rt_width <- (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + cat("No. of peaks with NA values: ", sum(any_na), "\n") + cat("No. of peaks without mz width: ", sum(no_mz_width), "\n") + cat("No. of peaks without rt width: ", sum(no_rt_width), "\n") + ## remove empty area + ## no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || + ## (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + ## no_area <- no_mz_width || no_rt_width + no_area <- no_mz_width + if (any(no_area)) + feats_2 <- feats_2[!no_area, , drop = FALSE] + } + cat("After removing NAs or empty are peaks: ", nrow(feats_2), "\n") ## 4) Check and remove ROIs overlapping with peaks. if (nrow(feats_2) > 0) { ## Comparing each ROI with each peak; slightly modified from the original @@ -2567,9 +2871,13 @@ do_findChromPeaks_addPredIsoROIs <- feats_2 <- feats_2[!removeROIs, , drop = FALSE] peaks. <- peaks.[!removeFeats, , drop = FALSE] } + cat("After removing overlapping peaks: ", nrow(feats_2), "\n") + cat("After removing overlapping peaks (peaks.): ", nrow(peaks.), "\n") if (!verboseColumns) peaks. <- peaks.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn")] + ## For now just return the new ones. + return(feats_2) if (nrow(feats_2) == 0) return(peaks.) else diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R index 6f31a6749..35e3c2123 100644 --- a/R/functions-Chromatogram.R +++ b/R/functions-Chromatogram.R @@ -18,34 +18,39 @@ names(.SUPPORTED_AGG_FUN_CHROM) <- ##' @author Johannes Rainer ##' @noRd validChromatogram <- function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@rtime) != length(object@intensity)) - msg <- validMsg(msg, "Length of 'rt' and 'intensity' have to match!") + msg <- c(msg, "Length of 'rt' and 'intensity' have to match!") if (is.unsorted(object@mz)) - msg <- validMsg(msg, "'mz' has to be increasingly ordered!") + msg <- c(msg, "'mz' has to be increasingly ordered!") if (is.unsorted(object@rtime)) - msg <- validMsg(msg, paste0("'rtime' has to be increasingly ordered!")) + msg <- c(msg, paste0("'rtime' has to be increasingly ordered!")) if (length(object@mz) > 0 & length(object@mz) != 2) - msg <- validMsg(msg, paste0("'mz' is supposed to contain the ", - "minimum and maximum mz values for the ", - "chromatogram.")) + msg <- c(msg, paste0("'mz' is supposed to contain the ", + "minimum and maximum mz values for the ", + "chromatogram.")) if (length(object@filterMz) > 0 & length(object@filterMz) != 2) - msg <- validMsg(msg, paste0("'filterMz' is supposed to contain the ", - "minimum and maximum mz values of the filter", - " used to create the chromatogram.")) + msg <- c(msg, paste0("'filterMz' is supposed to contain the ", + "minimum and maximum mz values of the filter", + " used to create the chromatogram.")) + if (length(object@precursorMz) > 0 & length(object@precursorMz) != 2) + msg <- c(msg, paste0("'precursorMz' is supposed to be a numeric of", + " length 2.")) + if (length(object@productMz) > 0 & length(object@productMz) != 2) + msg <- c(msg, paste0("'productMz' is supposed to be a numeric of", + " length 2.")) if (length(object@fromFile) > 1 | any(object@fromFile < 0)) - msg <- validMsg(msg, paste0("'fromFile' is supposed to be a single ", - "positive integer!")) + msg <- c(msg, paste0("'fromFile' is supposed to be a single ", + "positive integer!")) if (length(object@aggregationFun) > 1) - msg <- validMsg(msg, "Length of 'aggregationFun' has to be 1!") + msg <- c(msg, "Length of 'aggregationFun' has to be 1!") if (length(object@aggregationFun)) { if (!object@aggregationFun %in% .SUPPORTED_AGG_FUN_CHROM) - msg <- validMsg(msg, paste0("Invalid value for 'aggregationFun'! ", - "only ", - paste0("'", .SUPPORTED_AGG_FUN_CHROM,"'", - collapse = ","), " are allowed!")) + msg <- c(msg, paste0("Invalid value for 'aggregationFun'! only ", + paste0("'", .SUPPORTED_AGG_FUN_CHROM,"'", + collapse = ","), " are allowed!")) } - if (is.null(msg)) TRUE + if (length(msg) == 0) TRUE else msg } @@ -66,6 +71,12 @@ validChromatogram <- function(object) { ##' @param filterMz \code{numeric(2)} representing the mz value range (min, ##' max) that was used to filter the original object on mz dimension. If not ##' applicable use \code{filterMz = c(0, 0)}. +##' +##' @param precursorMz \code{numeric(2)} for SRM/MRM transitions. +##' Represents the mz of the precursor ion. See details for more information. +##' +##' @param productMz \code{numeric(2)} for SRM/MRM transitions. +##' Represents the mz of the product. See details for more information. ##' ##' @param fromFile \code{integer(1)} the index of the file within the ##' \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the @@ -76,14 +87,17 @@ validChromatogram <- function(object) { ##' mz range. Supported are \code{"sum"} (total ion chromatogram), \code{"max"} ##' (base peak chromatogram), \code{"min"} and \code{"mean"}. ##' -##' @slot rtime,intensity,mzrange,filterMzrange,fromFile,aggregationFun See corresponding parameter above. +##' @slot .__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun See corresponding parameter above. ##' ##' @rdname Chromatogram-class Chromatogram <- function(rtime = numeric(), intensity = numeric(), mz = c(0, 0), filterMz = c(0, 0), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), fromFile = integer(), aggregationFun = character()) { return(new("Chromatogram", rtime = rtime, intensity = intensity, mz = range(mz), filterMz = range(filterMz), + precursorMz = range(precursorMz), productMz = range(productMz), fromFile = as.integer(fromFile), aggregationFun = aggregationFun)) } diff --git a/R/functions-IO.R b/R/functions-IO.R index 031c2c1fb..140f58805 100644 --- a/R/functions-IO.R +++ b/R/functions-IO.R @@ -13,7 +13,7 @@ isCdfFile <- function(x) { res <- sapply(patts, function(z) { grep(z, x, ignore.case = TRUE) }) - return(any(unlist(res))) + any(unlist(res)) } ############################################################ @@ -21,15 +21,30 @@ isCdfFile <- function(x) { ## ## Just guessing whether the file is a mzML file based on its ending. isMzMLFile <- function(x) { - fileEnds <- c("mzxml", "mzml", "mzdata") + fileEnds <- c("mzml") ## check for endings and and ending followed by a . (e.g. mzML.gz) patts <- paste0("\\.", fileEnds, "($|\\.)") res <- sapply(patts, function(z) { grep(z, x, ignore.case = TRUE) }) - return(any(unlist(res))) + any(unlist(res)) } +############################################################ +## isRampFile +## +## Files that have to be read using the Ramp backend. +isRampFile <- function(x) { + fileEnds <- c("mzxml", "mzdata") + ## check for endings and and ending followed by a . (e.g. mzML.gz) + patts <- paste0("\\.", fileEnds, "($|\\.)") + res <- sapply(patts, function(z) { + grep(z, x, ignore.case = TRUE) + }) + any(unlist(res)) +} + + ############################################################ ## readRawData ## @@ -40,8 +55,6 @@ isMzMLFile <- function(x) { ##' @param x The file name. ##' @param includeMSn logical(1) indicating whether MS level > 1 should be loaded ##' too. Only supported for mzML files. -##' @param backendMzML Default backend to be used for mzML files. Can b wither -##' \code{"Ramp"} or \code{"pwiz"}. ##' @param dropEmptyScans Scans/spectra without peaks are not returned if ##' \code{dropEmptyScans = TRUE}. If \code{FALSE} all spectra from the input ##' file are returned. This is to be consistent with the code before @@ -49,20 +62,20 @@ isMzMLFile <- function(x) { ##' https://github.com/sneumann/xcms/issues/67). ##' @return A \code{list} with rt, tic, scanindex, mz and intensity. ##' @noRd -readRawData <- function(x, includeMSn = FALSE, backendMzML = "Ramp", - dropEmptyScans = TRUE) { +readRawData <- function(x, includeMSn = FALSE, dropEmptyScans = TRUE) { ## def_backend <- "Ramp" ## Eventually use pwiz... header_cols <- c("retentionTime", "acquisitionNum", "totIonCurrent") - if (isCdfFile(x)) { + backend <- NA + if (isCdfFile(x)) backend <- "netCDF" - } else { - if (isMzMLFile(x)) { - backend <- backendMzML - header_cols <- c(header_cols, "polarity") - } else { - stop("Unknown file type.") - } + if (isRampFile(x)) + backend <- "Ramp" + if (isMzMLFile(x)) { + backend <- "pwiz" + header_cols <- c(header_cols, "polarity") } + if (is.na(backend)) + stop("Unsupported file type.") msd <- mzR::openMSfile(x, backend = backend) on.exit(if(!is.null(msd)) mzR::close(msd)) ## That's due to issue https://github.com/lgatto/MSnbase/issues/151 @@ -139,5 +152,6 @@ readRawData <- function(x, includeMSn = FALSE, backendMzML = "Ramp", } mzR::close(msd) mzR <- NULL + gc() resList } diff --git a/R/functions-MsFeatureData.R b/R/functions-MsFeatureData.R index 6df12c801..969530841 100644 --- a/R/functions-MsFeatureData.R +++ b/R/functions-MsFeatureData.R @@ -81,13 +81,13 @@ validateMsFeatureData <- function(x) { msg <- c(msg, paste0("Some of the indices in column", " 'peakidx' of element ", - "'featureDefinitionss' do not match ", + "'featureDefinitions' do not match ", "rows of the 'chromPeaks' matrix!")) } } } else { - msg <- c(msg, paste0("The 'featureDefinitionss' element has to", + msg <- c(msg, paste0("The 'featureDefinitions' element has to", " be of type 'DataFrame' and not '", class(x$featureDefinitions), "'!")) } diff --git a/R/functions-OnDiskMSnExp.R b/R/functions-OnDiskMSnExp.R index aaeb21c25..10d675451 100644 --- a/R/functions-OnDiskMSnExp.R +++ b/R/functions-OnDiskMSnExp.R @@ -51,15 +51,19 @@ findChromPeaks_Spectrum_list <- function(x, method = "centWave", param, rt) { stop("Spectra are not ordered by retention time!") mzs <- lapply(x, mz) procDat <- date() - return(list(peaks = do.call(method, - args = c(list(mz = unlist(mzs, - use.names = FALSE), - int = unlist(lapply(x, intensity), - use.names = FALSE), - valsPerSpect = lengths(mzs, FALSE), - scantime = rt), - as(param, "list"))), - date = procDat)) + res <- do.call(method, args = c(list(mz = unlist(mzs, + use.names = FALSE), + int = unlist(lapply(x, intensity), + use.names = FALSE), + valsPerSpect = lengths(mzs, FALSE), + scantime = rt), + as(param, "list"))) + ## Ensure that we call the garbage collector to eventually clean unused stuff + rm(mzs) + rm(x) + rm(rt) + gc() + return(list(peaks = res, date = procDat)) } ## That's a special case since we don't expect to have rt available for this. diff --git a/R/functions-Params.R b/R/functions-Params.R index 2f40eebc2..82ddc882e 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -35,6 +35,41 @@ } } +## Just get the name of the algorithm for each Parameter class. +.param2string <- function(x) { + if (is(x, "CentWaveParam")) + return("centWave") + if (is(x, "MatchedFilterParam")) + return("matchedFilter") + if (is(x, "MassifquantParam")) + return("massifquant") + if (is(x, "MSWParam")) + return("MSW") + if (is(x, "CentWavePredIsoParam")) + return("centWave with predicted isotope ROIs") + if (is(x, "PeakDensityParam")) + return("chromatographic peak density") + if (is(x, "MzClustParam")) + return("mzClust") + if (is(x, "NearestPeaksParam")) + return("nearest peaks") + if (is(x, "PeakGroupsParam")) + return("peak groups") + if (is(x, "ObiwarpParam")) + return("obiwarp") + return("unknown") +} + +############################################################ +## GenericParam +#' @return The \code{GenericParam} function returns a \code{GenericParam} object. +#' @param fun \code{character} representing the name of the function. +#' @param args \code{list} (ideally named) with the arguments to the function. +#' @rdname GenericParam +GenericParam <- function(fun = character(), args = list()) { + return(new("GenericParam", fun = fun, args = args)) +} + ############################################################ ## CentWaveParam @@ -75,6 +110,18 @@ MatchedFilterParam <- function(binSize = 0.1, impute = "none", sigma = sigma, max = max, snthresh = snthresh, steps = steps, mzdiff = mzdiff, index = index)) } +#' Convert the impute method to the old-style method name (e.g. for profMat +#' calls) +#' @noRd +.impute2method <- function(x) { + if (impute(x) == "none") + return("bin") + if (impute(x) == "lin") + return("binlin") + if (impute(x) == "linbase") + return("binlinbase") + return("intlin") +} ############################################################ ## MassifquantParam @@ -269,3 +316,14 @@ ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, factorGap = factorGap, localAlignment = localAlignment, initPenalty = initPenalty)) } + +############################################################ +## FillChromPeaksParam + +#' @return The \code{FillChromPeaksParam} function returns a +#' \code{FillChromPeaksParam} object. +#' @rdname fillChromPeaks +FillChromPeaksParam <- function(expandMz = 0, expandRt = 0, ppm = 0) { + return(new("FillChromPeaksParam", expandMz = expandMz, expandRt = expandRt, + ppm = ppm)) +} diff --git a/R/functions-ProcessHistory.R b/R/functions-ProcessHistory.R index e6843be09..00867241b 100644 --- a/R/functions-ProcessHistory.R +++ b/R/functions-ProcessHistory.R @@ -2,6 +2,15 @@ ## Functions for ProcessHistory objects #' @include DataClasses.R +#' @description \code{processHistoryTypes} returns the available \emph{types} of +#' process histories. These can be passed with argument \code{type} to the +#' \code{processHistory} method to extract specific process step(s). +#' +#' @rdname XCMSnExp-class +processHistoryTypes <- function() { + .PROCSTEPS +} + ############################################################ ## Constructor ProcessHistory <- function(type., date., info., error., fileIndex.) { diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index 6c020d3e7..500e809a3 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -1,4 +1,4 @@ -#' @include DataClasses.R +#' @include DataClasses.R functions-utils.R ##' Takes a XCMSnExp and drops ProcessHistory steps from the @.processHistory ##' slot matching the provided type. @@ -110,6 +110,10 @@ dropProcessHistoriesList <- function(x, type, num = -1) { ## and if this is used at all. ## @filled ... not yet. + if (any(chromPeaks(from)[, "is_filled"] == 1)) { + fld <- which(chromPeaks(from)[, "is_filled"] == 1) + xs@filled <- as.integer(fld) + } ## @dataCorrection (numeric) ? in xcmsSet function, if lockMassFreq. ## @progressInfo skip ## @progressCallback skip @@ -256,34 +260,481 @@ dropProcessHistoriesList <- function(x, type, num = -1) { ## Now, call spectrapply on the object to return the data we need from each ## Spectrum: the aggregated intensity values per spectrum and the mz value ## range. - res <- spectrapply(subs, FUN = function(z) { - if (!z@peaksCount) - return(list()) - return(c(range(z@mz), do.call(aggregationFun, list(z@intensity)))) - }) + ## Note: we're returning NA in case we don't have a valid measurement for a + ## retention time within the specified mz + suppressWarnings( + res <- spectrapply(subs, FUN = function(z) { + if (!z@peaksCount) + return(c(NA_real_, NA_real_, NA_real_)) + ## return(list()) + return(c(range(z@mz), do.call(aggregationFun, list(z@intensity)))) + }) + ) ## Do I want to drop the names? - not_empty <- base::which(base::lengths(res) > 0) - if (length(not_empty)) { - res <- split(res[not_empty], f = fromFile(subs)[not_empty]) - rtm <- split(rtime(subs)[not_empty], f = fromFile(subs)[not_empty]) - ## We want to have one Chromatogram per file. - ## Let's use a simple for loop here - no need for an mapply (yet). - resL <- vector("list", length(res)) - for (i in 1:length(res)) { - allVals <- unlist(res[[i]], use.names = FALSE) - idx <- seq(3, length(allVals), by = 3) - mzr <- range(allVals[-idx], na.rm = TRUE, finite = TRUE) - ## Or should we drop the names completely? - ints <- allVals[idx] - names(ints) <- names(rtm[[i]]) - resL[[i]] <- Chromatogram(rtime = rtm[[i]], - intensity = ints, mz = mzr, - filterMz = fmzr, - fromFile = as.integer(names(res)[i]), - aggregationFun = aggregationFun) - } - return(resL) - } else { + nas <- unlist(lapply(res, function(z) is.na(z[3])), use.names = FALSE) + if (all(nas)) return(list()) + ## not_empty <- base::which(base::lengths(res) > 0) + ## if (length(not_empty)) { + ## res <- split(res[not_empty], f = fromFile(subs)[not_empty]) + ## rtm <- split(rtime(subs, ...)[not_empty], f = fromFile(subs)[not_empty]) + res <- split(res, f = fromFile(subs)) + rtm <- split(rtime(subs, ...), f = fromFile(subs)) + ## We want to have one Chromatogram per file. + ## Let's use a simple for loop here - no need for an mapply (yet). + resL <- vector("list", length(res)) + for (i in 1:length(res)) { + allVals <- unlist(res[[i]], use.names = FALSE) + idx <- seq(3, length(allVals), by = 3) + mzr <- range(allVals[-idx], na.rm = TRUE, finite = TRUE) + ## Or should we drop the names completely? + ints <- allVals[idx] + names(ints) <- names(rtm[[i]]) + resL[[i]] <- Chromatogram(rtime = rtm[[i]], + intensity = ints, mz = mzr, + filterMz = fmzr, + fromFile = as.integer(names(res)[i]), + aggregationFun = aggregationFun) + } + return(resL) + ## } else { + ## return(list()) + ## } +} + +#' Integrates the intensities for chromatograpic peak(s). This is supposed to be +#' called by the fillChromPeaks method. +#' +#' @note Use one of .getPeakInt2 or .getPeakInt3 instead! +#' +#' @param object An \code{XCMSnExp} object representing a single sample. +#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' @noRd +.getPeakInt <- function(object, peakArea) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + res <- numeric(nrow(peakArea)) + for (i in 1:length(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + chr <- extractChromatograms(object, + rt = rtr, + mz = peakArea[i, c("mzmin", "mzmax")])[[1]] + if (length(chr)) + res[i] <- sum(intensity(chr), na.rm = TRUE) * + ((rtr[2] - rtr[1]) / (length(chr) - 1)) + else + res[i] <- NA_real_ + } + return(unname(res)) +} + +#' Integrates the intensities for chromatograpic peak(s). This is supposed to be +#' called by the fillChromPeaks method. +#' +#' @note This reads the full data first and does the subsetting later in R. +#' +#' @param object An \code{XCMSnExp} object representing a single sample. +#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' @noRd +.getPeakInt2 <- function(object, peakArea) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + res <- numeric(nrow(peakArea)) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + for (i in 1:length(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, + valsPerSpect = valsPerSpect, rtrange = rtr, + mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities / (2)by + ## the number of data points (REAL ones, considering also NAs) + ## and multiplied with the (3)rt width. + ## (1) sum(mtx[, 3], na.rm = TRUE) + ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used + ## nrow(mtx) here, which would correspond to the non-NA + ## intensities within the rt range we don't get the same results + ## as e.g. centWave. + ## (3) rtr[2] - rtr[1] + res[i] <- sum(mtx[, 3], na.rm = TRUE) * + ((rtr[2] - rtr[1]) / + (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) + } else { + res[i] <- NA_real_ + } + } else { + res[i] <- NA_real_ + } + } + return(unname(res)) +} + +#' Integrates the intensities for chromatograpic peak(s). This is supposed to be +#' called by the fillChromPeaks method. +#' +#' @note This reads the full data first and does the subsetting later in R. This +#' function uses the C getEIC function. +#' +#' @param object An \code{XCMSnExp} object representing a single sample. +#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' +#' @noRd +.getPeakInt3 <- function(object, peakArea) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + if (nrow(peakArea) == 0) { + return(numeric()) + } + res <- matrix(ncol = 4, nrow = nrow(peakArea)) + res <- numeric(nrow(peakArea)) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + scanindex <- valueCount2ScanIndex(valsPerSpect) ## Index vector for C calls + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + for (i in 1:length(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + sr <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) + eic <- .Call("getEIC", mzs, ints, scanindex, + as.double(peakArea[i, c("mzmin", "mzmax")]), + as.integer(sr), as.integer(length(scanindex)), + PACKAGE = "xcms") + if (length(eic$intensity)) { + ## How to calculate the area: (1)sum of all intensities / (2)by + ## the number of data points (REAL ones, considering also NAs) + ## and multiplied with the (3)rt width. + if (!all(is.na(eic$intensity)) && !all(eic$intensity == 0)) { + res[i] <- sum(eic$intensity, na.rm = TRUE) * + ((rtr[2] - rtr[1]) / (length(eic$intensity) - 1)) + } else { + res[i] <- NA_real_ + } + } else { + res[i] <- NA_real_ + } } + return(unname(res)) +} + + +#' Integrates the intensities for chromatograpic peak(s). This is supposed to be +#' called by the fillChromPeaks method. +#' +#' @note This reads the full data first and does the subsetting later in R. +#' +#' @param object An \code{XCMSnExp} object representing a single sample. +#' +#' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +#' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' +#' @param sample_idx \code{integer(1)} with the index of the sample in the +#' object. +#' +#' @param mzCenterFun Name of the function to be used to calculate the mz value. +#' Defaults to \code{weighted.mean}, i.e. the intensity weighted mean mz. +#' +#' @param cn \code{character} with the names of the result matrix. +#' +#' @return A \code{matrix} with at least columns \code{"mz"}, \code{"rt"}, +#' \code{"into"} and \code{"maxo"} with the by intensity weighted mean of mz, +#' rt or the maximal intensity in the area, the integrated signal in the area +#' and the maximal signal in the area. +#' @noRd +.getChromPeakData <- function(object, peakArea, sample_idx, + mzCenterFun = "weighted.mean", + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, c("mzmin", "mzmax")] <- + peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + rtim_range <- range(rtim) + for (i in 1:nrow(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + ## Ensure that the rt region is within the rtrange of the data. + rtr[1] <- max(rtr[1], rtim_range[1]) + rtr[2] <- min(rtr[2], rtim_range[2]) + mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, + valsPerSpect = valsPerSpect, rtrange = rtr, + mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities / (2)by + ## the number of data points (REAL ones, considering also NAs) + ## and multiplied with the (3)rt width. + ## (1) sum(mtx[, 3], na.rm = TRUE) + ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used + ## nrow(mtx) here, which would correspond to the non-NA + ## intensities within the rt range we don't get the same results + ## as e.g. centWave. + ## (3) rtr[2] - rtr[1] + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) * + ((rtr[2] - rtr[1]) / + (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) + maxi <- which.max(mtx[, 3]) + res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)] + res[i, c("rtmin", "rtmax")] <- rtr + ## Calculate the intensity weighted mean mz + meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3])) + if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2] + res[i, "mz"] <- meanMz + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + +#' Same as getChromPeakData, just without retention time. +#' @note The mz and maxo are however estimated differently than for the +#' getChromPeakData: mz is the mz closest to the median mz of the feature and +#' maxo its intensity. +#' @noRd +.getMSWPeakData <- function(object, peakArea, sample_idx, + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, "rt"] <- -1 + res[, "rtmin"] <- -1 + res[, "rtmax"] <- -1 + res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + for (i in 1:nrow(res)) { + mz_area <- which(mzs >= peakArea[i, "mzmin"] & + mzs <= peakArea[i, "mzmax"]) + ## Alternative version from original code: but this can also pick up + ## mzs from outside of the range! See also comments on issue #130 + ## mz_area <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + ## which.min(abs(mzs - peakArea[i, "mzmax"]))) + mtx <- cbind(time = -1, mz = mzs[mz_area], intensity = ints[mz_area]) + ## mtx <- xcms:::.rawMat(mz = mzs, int = ints, scantime = rtime(object), + ## valsPerSpect = valsPerSpect, + ## mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) + ## Get the index of the mz value(s) closest to the mzmed of the + ## feature + mzDiff <- abs(mtx[, 2] - peakArea[i, "mzmed"]) + mz_idx <- which(mzDiff == min(mzDiff)) + ## Now get the one with the highest intensity. + maxi <- mz_idx[which.max(mtx[mz_idx, 3])] + ## Return these. + res[i, c("mz", "maxo")] <- mtx[maxi, 2:3] + ## ## mz should be the weighted mean! + ## res[i, c("mz", "maxo")] <- c(weighted.mean(mtx[, 2], mtx[, 3]), + ## mtx[maxi[1], 3]) + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} +## The same version as above, but the maxo is the maximum signal of the peak, +## and the mz the intensity weighted mean mz. +.getMSWPeakData2 <- function(object, peakArea, sample_idx, + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, "rt"] <- -1 + res[, "rtmin"] <- -1 + res[, "rtmax"] <- -1 + res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Reguesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + for (i in 1:nrow(res)) { + mtx <- .rawMat(mz = mzs, int = ints, scantime = rtime(object), + valsPerSpect = valsPerSpect, + mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) + res[i, c("mz", "maxo")] <- c(weighted.mean(mtx[, 2], mtx[, 3]), + max(mtx[, 3], na.rm = TRUE)) + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + +## Same as .getChromPeakData but for matchedFilter, i.e. using the profile +## matrix instead of the original signal. +.getChromPeakData_matchedFilter <- function(object, peakArea, sample_idx, + mzCenterFun = "weighted.mean", + param = MatchedFilterParam(), + cn = c("mz", "rt", "into", "maxo", + "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, c("mzmin", "mzmax")] <- + peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object) + mzs <- lapply(spctr, mz) + vps <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + rtim_range <- range(rtim) + ## Now, if we do have "distance" defined it get's tricky: + basespc <- NULL + if (length(distance(param)) > 0) { + mass <- seq(floor(min(mzs) / binSize(param)) * binSize(param), + ceiling(max(mzs) / binSize(param)) * binSize(param), + by = binSize(param)) + bin_size <- (max(mass) - min(mass)) / (length(mass) - 1) + basespc <- distance(param) * bin_size + } + ## Create the profile matrix: + pMat <- .createProfileMatrix(mz = mzs, int = ints, valsPerSpect = vps, + method = .impute2method(param), + step = binSize(param), + baselevel = baseValue(param), + basespace = basespc, + returnBreaks = TRUE, + baseValue = NA) # We want to return NA not 0 + # if nothing was found + brks <- pMat$breaks + pMat <- pMat$profMat ## rows are masses, cols are retention times/scans. + bin_size <- diff(brks[1:2]) + bin_half <- bin_size / 2 + ## Calculate the mean mass per bin using the breaks used for the binning. + mass <- brks[-length(brks)] + bin_half ## midpoint for the breaks + mass_range <- range(mass) + + for (i in 1:nrow(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + mzr <- peakArea[i, c("mzmin", "mzmax")] + ## Ensure that the rt region is within the rtrange of the data. + rtr[1] <- max(rtr[1], rtim_range[1]) + rtr[2] <- min(rtr[2], rtim_range[2]) + mzr[1] <- max(mzr[1], mass_range[1]) + mzr[2] <- min(mzr[2], mass_range[2]) + ## Get the index of rt in rtim that are within the rt range rtr + ## range_rt <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) + ## range_rt <- c(which.min(abs(rtim - rtr[1])), + ## which.min(abs(rtim - rtr[2]))) + range_rt <- findRange(rtim, rtr, TRUE) + idx_rt <- range_rt[1]:range_rt[2] + ## Get the index of the mz in the data that are within the mz range. + ##range_mz <- c(min(which(brks >= mzr[1])) - 1, max(which(brks <= mzr[2]))) + range_mz <- findRange(mass, c(mzr[1] - bin_half, mzr[2] + bin_half), + TRUE) + idx_mz <- range_mz[1]:range_mz[2] + + if (length(idx_mz) > 0 & length(idx_rt) > 0) { + intMat <- pMat[idx_mz, idx_rt, drop = FALSE] + is_na <- is.na(intMat) + if (all(is_na)) { + res[i, ] <- rep(NA_real_, ncols) + next + } + intMat_0 <- intMat + intMat_0[is.na(intMat)] <- 0 + ## Calculate the mean mz value using a intensity weighted mean. + mz_ints <- rowSums(intMat, na.rm = TRUE) + ## mz_ints <- Biobase::rowMax(intMat) + if (length(mz_ints) != length(idx_mz)) { + ## Take from original code... + warning("weighted.mean: x and weights have to have same length!") + mz_ints <- rep(1, length(idx_mz)) + } + ## mean mz: intensity weighted mean + mean_mz <- weighted.mean(mass[idx_mz], mz_ints, na.rm = TRUE) + if (is.nan(mean_mz) || is.na(mean_mz)) + mean_mz <- mean(mzr, na.rm = TRUE) + res[i, "mz"] <- mean_mz + ## mean rt: position of the maximum intensity (along rt.) + rt_ints <- colMax(intMat_0, na.rm = TRUE) + res[i, c("rt", "rtmin", "rtmax")] <- + c(rtim[idx_rt][which.max(rt_ints)], rtr) + ## maxo + res[i, "maxo"] <- max(rt_ints, na.rm = TRUE) + ## into + rt_width <- diff(rtim[range_rt])/diff(range_rt) + res[i, "into"] <- rt_width * sum(rt_ints, na.rm = TRUE) + } else + res[i, ] <- rep(NA_real_, ncols) + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + + +.hasFilledPeaks <- function(object) { + if (hasChromPeaks(object)) + if (any(colnames(chromPeaks(object)) == "is_filled")) + return(any(chromPeaks(object)[, "is_filled"] == 1)) + FALSE } diff --git a/R/functions-utils.R b/R/functions-utils.R index 51efc12a3..66c2d3304 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -90,3 +90,127 @@ useOriginalCode <- function(x) { } return(new_e) } + +## #' Simulates the \code{findRange} function. +## #' @noRd +## findRangeR <- function(x, values) { +## start <- min(which(x >= values[1])) +## end <- max(which(x <= values[2])) +## return(c(start, end)) +## } + +############################################################ +## .createProfileMatrix +##' @title Create the profile matrix +##' +##' @description This function creates a \emph{profile} matrix, i.e. a rt times +##' m/z matrix of aggregated intensity values with values aggregated within bins +##' along the m/z dimension. +##' +##' @details This is somewhat the successor function for the deprecated +##' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, +##' \code{profBinLinBaseM} and \code{profIntLin}). +##' +##' @param mz Numeric representing the m/z values across all scans/spectra. +##' @param int Numeric representing the intensity values across all +##' scans/spectra. +##' @param valsPerSpect Numeric representing the number of measurements for each +##' scan/spectrum. +##' @param method A character string specifying the profile matrix generation +##' method. Allowed are \code{"bin"}, \code{"binlin"}, +##' \code{"binlinbase"} and \code{"intlin"}. +##' @param step Numeric specifying the size of the m/z bins. +##' @param baselevel Numeric specifying the base value. +##' @param basespace Numeric. +##' @param mzrange. numeric(2) optionally specifying the mz value range +##' for binning. This is to adopt the old profStepPad<- method used for obiwarp +##' retention time correction that did the binning from whole-number limits. +##' @param returnBreaks logical(1): hack to return the breaks of the bins. +##' Setting this to TRUE causes the function to return a \code{list} with +##' elements \code{"$profMat"} and \code{"breaks"}. +##' @param baseValue numeric(1) defining the value to be returned if no signal +##' was found in the corresponding bin. Defaults to 0 for backward compatibility. +##' @noRd +.createProfileMatrix <- function(mz, int, valsPerSpect, + method, step = 0.1, baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + returnBreaks = FALSE, + baseValue = 0) { + profMeths <- c("bin", "binlin", "binlinbase", "intlin") + names(profMeths) <- c("none", "lin", "linbase", "intlin") + method <- match.arg(method, profMeths) + impute <- names(profMeths)[profMeths == method] + brks <- NULL + + if (length(mzrange.) != 2) { + mrange <- range(mz) + mzrange. <- c(floor(mrange[1] / step) * step, + ceiling(mrange[2] / step) * step) + } + mass <- seq(mzrange.[1], mzrange.[2], by = step) + mlength <- length(mass) + ## Calculate the "real" bin size; old xcms code oddity that that's different + ## from step. + bin_size <- (mass[mlength] - mass[1]) / (mlength - 1) + ## for profIntLinM we have to use the old code. + if (impute == "intlin") { + profFun <- "profIntLinM" + profp <- list() + scanindex <- valueCount2ScanIndex(valsPerSpect) + buf <- do.call(profFun, args = list(mz, int, + scanindex, mlength, + mass[1], mass[mlength], + TRUE)) + } else { + ## Binning the data. + toIdx <- cumsum(valsPerSpect) + fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) + shiftBy <- TRUE + binFromX <- min(mass) + binToX <- max(mass) + brks <- breaks_on_nBins(fromX = binFromX, toX = binToX, + nBins = mlength, shiftByHalfBinSize = TRUE) + binRes <- binYonX(mz, int, + breaks = brks, + fromIdx = fromIdx, + toIdx = toIdx, + baseValue = ifelse(impute == "none", yes = baseValue, + no = NA), + sortedX = TRUE, + returnIndex = FALSE + ) + if (length(toIdx) == 1) + binRes <- list(binRes) + ## Missing value imputation. + if (impute == "linbase") { + ## need arguments distance and baseValue. + if (length(basespace) > 0) { + if (!is.numeric(basespace)) + stop("'basespace' has to be numeric!") + distance <- floor(basespace[1] / bin_size) + } else { + distance <- floor(0.075 / bin_size) + } + if (length(baselevel) > 0) { + if (!is.numeric(baselevel)) + stop("'baselevel' has to be numeric!") + baseValue <- baselevel + } else { + baseValue <- min(int, na.rm = TRUE) / 2 + } + } else { + distance <- 0 + baseValue <- 0 + } + binVals <- lapply(binRes, function(z) { + return(imputeLinInterpol(z$y, method = impute, distance = distance, + noInterpolAtEnds = TRUE, + baseValue = baseValue)) + }) + buf <- do.call(cbind, binVals) + } + if (returnBreaks) + buf <- list(profMat = buf, breaks = brks) + buf +} diff --git a/R/functions-xcmsRaw.R b/R/functions-xcmsRaw.R index ce1db55d8..4d97b577b 100644 --- a/R/functions-xcmsRaw.R +++ b/R/functions-xcmsRaw.R @@ -537,115 +537,3 @@ remakeTIC<-function(object){ } return(object) } - -############################################################ -## .createProfileMatrix -##' @title Create the profile matrix -##' -##' @description This function creates a \emph{profile} matrix, i.e. a rt times -##' m/z matrix of aggregated intensity values with values aggregated within bins -##' along the m/z dimension. -##' -##' @details This is somewhat the successor function for the deprecated -##' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, -##' \code{profBinLinBaseM} and \code{profIntLin}). -##' -##' @param mz Numeric representing the m/z values across all scans/spectra. -##' @param int Numeric representing the intensity values across all -##' scans/spectra. -##' @param valsPerSpect Numeric representing the number of measurements for each -##' scan/spectrum. -##' @param method A character string specifying the profile matrix generation -##' method. Allowed are \code{"bin"}, \code{"binlin"}, -##' \code{"binlinbase"} and \code{"intlin"}. -##' @param step Numeric specifying the size of the m/z bins. -##' @param baselevel Numeric specifying the base value. -##' @param basespace Numeric. -##' @param mzrange. numeric(2) optionally specifying the mz value range -##' for binning. This is to adopt the old profStepPad<- method used for obiwarp -##' retention time correction that did the binning from whole-number limits. -##' @param returnBreaks logical(1): hack to return the breaks of the bins. -##' Setting this to TRUE causes the function to return a \code{list} with -##' elements \code{"$profMat"} and \code{"breaks"}. -##' @noRd -.createProfileMatrix <- function(mz, int, valsPerSpect, - method, step = 0.1, baselevel = NULL, - basespace = NULL, - mzrange. = NULL, - returnBreaks = FALSE) { - profMeths <- c("bin", "binlin", "binlinbase", "intlin") - names(profMeths) <- c("none", "lin", "linbase", "intlin") - method <- match.arg(method, profMeths) - impute <- names(profMeths)[profMeths == method] - brks <- NULL - - if (length(mzrange.) != 2) { - mrange <- range(mz) - mzrange. <- c(floor(mrange[1] / step) * step, - ceiling(mrange[2] / step) * step) - } - mass <- seq(mzrange.[1], mzrange.[2], by = step) - mlength <- length(mass) - ## Calculate the "real" bin size; old xcms code oddity that that's different - ## from step. - bin_size <- (mass[mlength] - mass[1]) / (mlength - 1) - ## for profIntLinM we have to use the old code. - if (impute == "intlin") { - profFun <- "profIntLinM" - profp <- list() - scanindex <- valueCount2ScanIndex(valsPerSpect) - buf <- do.call(profFun, args = list(mz, int, - scanindex, mlength, - mass[1], mass[mlength], - TRUE)) - } else { - ## Binning the data. - toIdx <- cumsum(valsPerSpect) - fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) - shiftBy <- TRUE - binFromX <- min(mass) - binToX <- max(mass) - brks <- breaks_on_nBins(fromX = binFromX, toX = binToX, - nBins = mlength, shiftByHalfBinSize = TRUE) - binRes <- binYonX(mz, int, - breaks = brks, - fromIdx = fromIdx, - toIdx = toIdx, - baseValue = ifelse(impute == "none", yes = 0, no = NA), - sortedX = TRUE, - returnIndex = FALSE - ) - if (length(toIdx) == 1) - binRes <- list(binRes) - ## Missing value imputation. - if (impute == "linbase") { - ## need arguments distance and baseValue. - if (length(basespace) > 0) { - if (!is.numeric(basespace)) - stop("'basespace' has to be numeric!") - distance <- floor(basespace[1] / bin_size) - } else { - distance <- floor(0.075 / bin_size) - } - if (length(baselevel) > 0) { - if (!is.numeric(baselevel)) - stop("'baselevel' has to be numeric!") - baseValue <- baselevel - } else { - baseValue <- min(int, na.rm = TRUE) / 2 - } - } else { - distance <- 0 - baseValue <- 0 - } - binVals <- lapply(binRes, function(z) { - return(imputeLinInterpol(z$y, method = impute, distance = distance, - noInterpolAtEnds = TRUE, - baseValue = baseValue)) - }) - buf <- do.call(cbind, binVals) - } - if (returnBreaks) - buf <- list(profMat = buf, breaks = brks) - buf -} diff --git a/R/functions-xcmsSet.R b/R/functions-xcmsSet.R index f3c130667..551aa048b 100644 --- a/R/functions-xcmsSet.R +++ b/R/functions-xcmsSet.R @@ -41,16 +41,21 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, if (is.null(files)) files <- getwd() info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, - recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) + if (any(info$isdir)) { + message("Scanning files in directory ", files[info$isdir], " ... ", + appendLF = FALSE) + listed <- list.files(files[info$isdir], pattern = filepattern, + recursive = TRUE, full.names = TRUE) + message("found ", length(listed), " files") + files <- c(files[!info$isdir], listed) + } ## try making paths absolute files_abs <- file.path(getwd(), files) exists <- file.exists(files_abs) files[exists] <- files_abs[exists] if (length(files) == 0 | all(is.na(files))) stop("No NetCDF/mzXML/mzData/mzML files were found.\n") - + if(lockMassFreq==TRUE){ ## remove the 02 files if there here lockMass.files<-grep("02.CDF", files) @@ -237,9 +242,13 @@ c.xcmsSet <- function(...) { rtraw <- c(rtraw, lcsets[[i]]@rt$raw) rtcor <- c(rtcor, lcsets[[i]]@rt$corrected) - sampidx <- seq(along = namelist[[i]]) + nsamp - peaklist[[i]][,"sample"] <- sampidx[peaklist[[i]][,"sample"]] - nsamp <- nsamp + length(namelist[[i]]) + ## Update samples only if we've got any peaks. Issue #133 + if (nrow(peaks(lcsets[[i]]))) { + sampidx <- seq(along = namelist[[i]]) + nsamp + peaklist[[i]][,"sample"] <- sampidx[peaklist[[i]][,"sample"]] + ## Don't increment if we don't have any peaks + nsamp <- nsamp + length(namelist[[i]]) + } if (.hasSlot(lcsets[[i]], ".processHistory")) { ph <- .getProcessHistory(lcsets[[i]]) if (length(ph) > 0) { diff --git a/R/init.R b/R/init.R index 79bb1fdcb..990acc935 100644 --- a/R/init.R +++ b/R/init.R @@ -70,7 +70,7 @@ ## getEIC method getEIC.method="getEICOld" - + ## Sort method; see issue #180 for MSnbase ## sortMeth <- "auto" ## if (as.numeric(R.Version()$major) >= 3 & as.numeric(R.Version()$minor) >= 3) diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R index 8971b1d52..232292111 100644 --- a/R/methods-Chromatogram.R +++ b/R/methods-Chromatogram.R @@ -42,15 +42,15 @@ setMethod("intensity", "Chromatogram", function(object) { }) ## mz -##' @description \code{mz} get or set the mz range of the -##' chromatogram. +##' @description \code{mz} get the mz (range) of the chromatogram. The +##' function returns a \code{numeric(2)} with the lower and upper mz value. ##' ##' @param filter For \code{mz}: whether the mz range used to filter the ##' original object should be returned (\code{filter = TRUE}), or the mz range ##' calculated on the real data (\code{filter = FALSE}). ##' ##' @rdname Chromatogram-class -setMethod("mzrange", "Chromatogram", function(object, filter = FALSE) { +setMethod("mz", "Chromatogram", function(object, filter = FALSE) { if (filter) return(object@filterMz) return(object@mz) @@ -62,6 +62,23 @@ setMethod("mzrange", "Chromatogram", function(object, filter = FALSE) { ## return(object) ## }) +##' @description \code{precursorMz} get the mz of the precursor ion. The +##' function returns a \code{numeric(2)} with the lower and upper mz value. +##' +##' @rdname Chromatogram-class +setMethod("precursorMz", "Chromatogram", function(object) { + return(object@precursorMz) +}) + +##' @aliases productMz +##' @description \code{productMz} get the mz of the product chromatogram/ion. The +##' function returns a \code{numeric(2)} with the lower and upper mz value. +##' +##' @rdname Chromatogram-class +setMethod("productMz", "Chromatogram", function(object) { + return(object@productMz) +}) + ## aggregationFun ##' @aliases aggregationFun ##' @description \code{aggregationFun,aggregationFun<-} get or set the diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index 896ad202d..64284cd9c 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -20,9 +20,9 @@ ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{findChromPeaks}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' @@ -83,7 +83,9 @@ setMethod("findChromPeaks", object@.processHistory <- list(xph) if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - chromPeaks(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) if (validObject(object)) return(object) } @@ -105,62 +107,62 @@ setMethod("findChromPeaks", }) -## The centWave peak detection method for MSnExp: -##' @title Chromatographic peak detection using the centWave method -##' -##' @description The \code{findChromPeaks,MSnExp,CentWaveParam} method performs -##' peak detection using the \emph{centWave} algorithm on all samples from -##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -##' intensity values of all spectra hence no additional data input from the -##' original files is required. -##' -##' @rdname findChromPeaks-centWave -setMethod("findChromPeaks", - signature(object = "MSnExp", param = "CentWaveParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ## Restrict to MS1 data. - ## Man, that's too slow! We're doing the MS1 restriction below. - ## object <- filterMsLevel(object, msLevel. = 1) - ## (1) split the spectra per file - this means we have a second - ## copy of the data, but there is no way around that as - ## filterFile is pretty slow on MSnExp. - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for chromatographic peak", - " detection!") - ## Check if the data is centroided - if (!isCentroided(object[[ms1_idx[1]]])) - warning("Your data appears to be not centroided! CentWave", - " works best on data in centroid mode.") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - ## (2) use bplapply to do the peak detection. - resList <- bplapply(spect_list, function(z) { - findChromPeaks_Spectrum_list(z, - method = "centWave", - param = param) - }, BPPARAM = BPPARAM) - ## (3) collect the results. - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ## The centWave peak detection method for MSnExp: +## ##' @title Chromatographic peak detection using the centWave method +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,CentWaveParam} method performs +## ##' peak detection using the \emph{centWave} algorithm on all samples from +## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## ##' intensity values of all spectra hence no additional data input from the +## ##' original files is required. +## ##' +## ##' @rdname findChromPeaks-centWave +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "CentWaveParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ## Restrict to MS1 data. +## ## Man, that's too slow! We're doing the MS1 restriction below. +## ## object <- filterMsLevel(object, msLevel. = 1) +## ## (1) split the spectra per file - this means we have a second +## ## copy of the data, but there is no way around that as +## ## filterFile is pretty slow on MSnExp. +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## ## Check if the data is centroided +## if (!isCentroided(object[[ms1_idx[1]]])) +## warning("Your data appears to be not centroided! CentWave", +## " works best on data in centroid mode.") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## ## (2) use bplapply to do the peak detection. +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "centWave", +## param = param) +## }, BPPARAM = BPPARAM) +## ## (3) collect the results. +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## The matchedFilter peak detection method for OnDiskMSnExp: ##' @title Peak detection in the chromatographic time domain @@ -176,10 +178,10 @@ setMethod("findChromPeaks", ##' be configured either by the \code{BPPARAM} parameter or by globally defining ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. - -##' @param object For \code{findChromPeaks}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' @@ -228,7 +230,12 @@ setMethod("findChromPeaks", object@.processHistory <- list(xph) if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - chromPeaks(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -249,49 +256,49 @@ setMethod("findChromPeaks", } }) -##' @title Peak detection in the chromatographic time domain -##' -##' @description The \code{findChromPeaks,MSnExp,MatchedFilterParam} method -##' performs peak detection using the \emph{matchedFilter} method on all -##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -##' mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname findChromPeaks-matchedFilter -setMethod("findChromPeaks", - signature(object = "MSnExp", param = "MatchedFilterParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for chromatographic peak", - " detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - findChromPeaks_Spectrum_list(z, - method = "matchedFilter", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Peak detection in the chromatographic time domain +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MatchedFilterParam} method +## ##' performs peak detection using the \emph{matchedFilter} method on all +## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## ##' mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findChromPeaks-matchedFilter +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MatchedFilterParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "matchedFilter", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## massifquant ## The massifquant peak detection method for OnDiskMSnExp: @@ -309,9 +316,9 @@ setMethod("findChromPeaks", ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{findChromPeaks}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' @@ -360,7 +367,12 @@ setMethod("findChromPeaks", object@.processHistory <- list(xph) if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - chromPeaks(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -382,49 +394,49 @@ setMethod("findChromPeaks", }) -##' @title Chromatographic peak detection using the massifquant method -##' -##' @description The \code{findChromPeaks,MSnExp,MassifquantParam} method -##' performs chromatographic peak detection using the \emph{massifquant} method -##' on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects -##' contain mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname findChromPeaks-massifquant -setMethod("findChromPeaks", - signature(object = "MSnExp", param = "MassifquantParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for chromatographic peak", - " detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - findChromPeaks_Spectrum_list(z, - method = "massifquant", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Chromatographic peak detection using the massifquant method +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MassifquantParam} method +## ##' performs chromatographic peak detection using the \emph{massifquant} method +## ##' on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects +## ##' contain mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findChromPeaks-massifquant +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MassifquantParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "massifquant", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## MSW @@ -444,9 +456,9 @@ setMethod("findChromPeaks", ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{findChromPeaks}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' @@ -472,9 +484,14 @@ setMethod("findChromPeaks", return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## TODO @jo: ensure that we're having single spectra files! ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) + + rts <- split(rtime(object), f = fromFile(object)) + if (any(lengths(rts)) > 1) + stop("The MSW method can only be applied to single spectrum,", + " non-chromatogrphic, files (i.e. with a single ", + "retention time).") ## (1) split the object per file. ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), @@ -496,7 +513,12 @@ setMethod("findChromPeaks", object@.processHistory <- list(xph) if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - chromPeaks(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -517,50 +539,50 @@ setMethod("findChromPeaks", } }) -##' @title Single-spectrum non-chromatography MS data peak detection -##' -##' @description The \code{findChromPeaks,MSnExp,MSWParam} method -##' performs peak detection in single-spectrum non-chromatography MS -##' data using functionality from the \code{MassSpecWavelet} package on all -##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -##' mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname findPeaks-MSW -setMethod("findChromPeaks", - signature(object = "MSnExp", param = "MSWParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for chromatographic peak", - " detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - findPeaks_MSW_Spectrum_list(z, - method = "MSW", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Single-spectrum non-chromatography MS data peak detection +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MSWParam} method +## ##' performs peak detection in single-spectrum non-chromatography MS +## ##' data using functionality from the \code{MassSpecWavelet} package on all +## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## ##' mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findPeaks-MSW +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MSWParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findPeaks_MSW_Spectrum_list(z, +## method = "MSW", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## The centWave with predicted isotope peak detection method for OnDiskMSnExp: ##' @title Two-step centWave peak detection considering also isotopes @@ -577,8 +599,10 @@ setMethod("findChromPeaks", ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' +##' @param param An \code{CentWavePredIsoParam} object with the settings for the +##' chromatographic peak detection algorithm. ##' @inheritParams findChromPeaks-centWave -##' +##' ##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an ##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of @@ -623,7 +647,12 @@ setMethod("findChromPeaks", object@.processHistory <- list(xph) if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - chromPeaks(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -645,62 +674,62 @@ setMethod("findChromPeaks", }) -## The centWave with predicted isotope peak detection method for MSnExp: -##' @title Two-step centWave peak detection considering also isotopes -##' -##' @description The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method -##' performs a two-step centWave-based peak detection on all samples from -##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -##' intensity values of all spectra hence no additional data input from the -##' original files is required. -##' -##' @rdname findChromPeaks-centWaveWithPredIsoROIs -setMethod("findChromPeaks", - signature(object = "MSnExp", param = "CentWavePredIsoParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ## Restrict to MS1 data. - ## Man, that's too slow! We're doing the MS1 restriction below. - ## object <- filterMsLevel(object, msLevel. = 1) - ## (1) split the spectra per file - this means we have a second - ## copy of the data, but there is no way around that as - ## filterFile is pretty slow on MSnExp. - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for chromatographic peak", - " detection!") - ## Check if the data is centroided - if (!isCentroided(object[[ms1_idx[1]]])) - warning("Your data appears to be not centroided! CentWave", - " works best on data in centroid mode.") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - ## (2) use bplapply to do the peak detection. - resList <- bplapply(spect_list, function(z) { - findChromPeaks_Spectrum_list(z, - method = "centWaveWithPredIsoROIs", - param = param) - }, BPPARAM = BPPARAM) - ## (3) collect the results. - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ## The centWave with predicted isotope peak detection method for MSnExp: +## ##' @title Two-step centWave peak detection considering also isotopes +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method +## ##' performs a two-step centWave-based peak detection on all samples from +## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## ##' intensity values of all spectra hence no additional data input from the +## ##' original files is required. +## ##' +## ##' @rdname findChromPeaks-centWaveWithPredIsoROIs +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "CentWavePredIsoParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ## Restrict to MS1 data. +## ## Man, that's too slow! We're doing the MS1 restriction below. +## ## object <- filterMsLevel(object, msLevel. = 1) +## ## (1) split the spectra per file - this means we have a second +## ## copy of the data, but there is no way around that as +## ## filterFile is pretty slow on MSnExp. +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## ## Check if the data is centroided +## if (!isCentroided(object[[ms1_idx[1]]])) +## warning("Your data appears to be not centroided! CentWave", +## " works best on data in centroid mode.") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## ## (2) use bplapply to do the peak detection. +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "centWaveWithPredIsoROIs", +## param = param) +## }, BPPARAM = BPPARAM) +## ## (3) collect the results. +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## profMat method for XCMSnExp/OnDiskMSnExp. ##' @description \code{profMat}: creates a \emph{profile matrix}, which @@ -784,13 +813,10 @@ setMethod("adjustRtime", return(res) }) - #' @rdname extractChromatograms-method -#' @noRd setMethod("extractChromatograms", signature(object = "OnDiskMSnExp"), function(object, rt, mz, aggregationFun = "sum") { return(.extractChromatogram(x = object, rt = rt, mz = mz, - aggregationFun = aggregationFun, - adjusted = FALSE)) + aggregationFun = aggregationFun)) }) diff --git a/R/methods-Params.R b/R/methods-Params.R index 531edec32..f9dcde7e0 100644 --- a/R/methods-Params.R +++ b/R/methods-Params.R @@ -16,6 +16,27 @@ setMethod("initialize", "Param", function(.Object, ...) { callNextMethod(.Object, ...) }) +############################################################ +## GenericParam +### +setMethod("initialize", "GenericParam", function(.Object, ...) { + classVersion(.Object)["GenericParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) +#' @param object \code{GenericParam} object. +#' @rdname GenericParam +setMethod("show", "GenericParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat(" fun:", object@fun, "\n") + cat(" arguments:\n") + if (length(object@args) > 0) { + for (i in 1:length(object@args)) { + if (!is.null(names(object@args))) + cat(" ", names(object@args)[i], "= ") + cat(object@args[[i]], "\n") + } + } +}) ############################################################ ## CentWaveParam @@ -1471,3 +1492,61 @@ setReplaceMethod("initPenalty", "ObiwarpParam", function(object, value) { return(object) }) +############################################################ +## FillChromPeaksParam +### +setMethod("initialize", "FillChromPeaksParam", function(.Object, ...) { + classVersion(.Object)["FillChromPeaksParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) +#' @rdname fillChromPeaks +setMethod("show", "FillChromPeaksParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" expandMz:", object@expandMz, "\n") + cat(" expandRt:", object@expandRt, "\n") + cat(" ppm:", object@ppm, "\n") +}) + +#' @aliases expandMz +#' @description \code{expandMz},\code{expandMz<-}: getter and setter +#' for the \code{expandMz} slot of the object. +#' +#' @param value The value for the slot. +#' +#' @rdname fillChromPeaks +setMethod("expandMz", "FillChromPeaksParam", function(object){ + return(object@expandMz)}) +#' @aliases expandMz<- +#' @rdname fillChromPeaks +setReplaceMethod("expandMz", "FillChromPeaksParam", function(object, value) { + object@expandMz <- value + if (validObject(object)) + return(object) +}) + +#' @aliases expandRt +#' @description \code{expandRt},\code{expandRt<-}: getter and setter +#' for the \code{expandRt} slot of the object. +#' @rdname fillChromPeaks +setMethod("expandRt", "FillChromPeaksParam", function(object){ + return(object@expandRt)}) +#' @aliases expandRt<- +#' @rdname fillChromPeaks +setReplaceMethod("expandRt", "FillChromPeaksParam", function(object, value) { + object@expandRt <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{ppm},\code{ppm<-}: getter and setter +#' for the \code{ppm} slot of the object. +#' @rdname fillChromPeaks +setMethod("ppm", "FillChromPeaksParam", function(object){ + return(object@ppm)}) +#' @rdname fillChromPeaks +setReplaceMethod("ppm", "FillChromPeaksParam", function(object, value) { + object@ppm <- value + if (validObject(object)) + return(object) +}) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index f2325c6ba..4a8f8d281 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -17,6 +17,8 @@ setMethod("show", "XCMSnExp", function(object) { cat("- - - xcms preprocessing - - -\n") if (hasChromPeaks(object)) { cat("Chromatographic peak detection:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + cat(" method:", .param2string(ph[[1]]@param), "\n") cat(" ", nrow(chromPeaks(object)), " peaks identified in ", length(fileNames(object)), " samples.\n", sep = "") cat(" On average ", @@ -25,6 +27,8 @@ setMethod("show", "XCMSnExp", function(object) { } if (hasFeatures(object)) { cat("Correspondence:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.GROUPING) + cat(" method:", .param2string(ph[[1]]@param), "\n") cat(" ", nrow(featureDefinitions(object)), " features identified.\n", sep = "") cat(" Median mz range of features: ", @@ -35,8 +39,17 @@ setMethod("show", "XCMSnExp", function(object) { format(median(featureDefinitions(object)[, "rtmax"] - featureDefinitions(object)[, "rtmin"]), digits = 5), "\n", sep = "") + if (.hasFilledPeaks(object)) { + totF <- chromPeaks(object)[, "is_filled"] == 1 + fp <- chromPeaks(object)[totF, , drop = FALSE] + cat("", sum(totF), "filled peaks (on average", + mean(table(fp[, "sample"])), "per sample).\n") + } } if (hasAdjustedRtime(object)) { + cat("Alignment/retention time adjustment:\n") + ph <- processHistory(object, type = .PROCSTEP.RTIME.CORRECTION) + cat(" method:", .param2string(ph[[1]]@param), "\n") } }) @@ -188,17 +201,25 @@ setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { ##' \code{\link{findChromPeaks}} method. ##' ##' @return For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} with -##' at least the following columns: \code{"mz"} (mz value for the largest -##' intensity), \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximal mz -##' value), \code{"rt"} (retention time for the peak apex), \code{"rtmin"} -##' (minimal retention time), \code{"rtmax"} (maximal retention time), -##' \code{"into"} (integrated, original, intensity of the peak) and -##' \code{"sample"} (sample index in which the peak was identified). +##' at least the following columns: +##' \code{"mz"} (intensity-weighted mean of mz values of the peak across scans/ +##' retention times), +##' \code{"mzmin"} (minimal mz value), +##' \code{"mzmax"} (maximal mz value), +##' \code{"rt"} (retention time for the peak apex), +##' \code{"rtmin"} (minimal retention time), +##' \code{"rtmax"} (maximal retention time), +##' \code{"into"} (integrated, original, intensity of the peak), +##' \code{"maxo"} (maximum intentity of the peak), +##' \code{"sample"} (sample index in which the peak was identified) and +##' \code{"is_filled"} defining whether the chromatographic peak was identified +##' by the peak picking algorithm (\code{0}) or was added by the +##' \code{fillChromPeaks} method (\code{1}). ##' Depending on the employed peak detection algorithm and the ##' \code{verboseColumns} parameter of it additional columns might be returned. ##' For \code{bySample = TRUE} the chronatographic peaks are returned as a -##' \code{list} of matrices, each containing the chromatographic peak of a -##' specific sample. For sample in which no feastures were detected a matrix +##' \code{list} of matrices, each containing the chromatographic peaks of a +##' specific sample. For samples in which no peaks were detected a matrix ##' with 0 rows is returned. ##' ##' @rdname XCMSnExp-class @@ -214,7 +235,7 @@ setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE) { res[as.numeric(names(tmp))] <- tmp if (any(lengths(res) == 0)) { emat <- matrix(nrow = 0, ncol = ncol(tmp[[1]])) - colnamers(emat) <- colnames(tmp[[1]]) + colnames(emat) <- colnames(tmp[[1]]) res[lengths(res) == 0] <- emat } return(res) @@ -331,9 +352,12 @@ setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE) { ##' @description \code{spectra}: extracts the ##' \code{\link[MSnbase]{Spectrum}} objects containing all data from -##' \code{object}. These values are extracted from the original data files and -##' eventual processing steps are applied \emph{on the fly}. Setting -##' \code{bySample = TRUE} the spectra are returned grouped by sample/file. +##' \code{object}. The values are extracted from the original data files and +##' eventual processing steps are applied \emph{on the fly}. By setting +##' \code{bySample = TRUE}, the spectra are returned grouped by sample/file. If +##' the \code{XCMSnExp} object contains adjusted retention times, these are +##' returned by default in the \code{Spectrum} objects (can be overwritten +##' by setting \code{adjusted = FALSE}). ##' ##' @return For \code{spectra}: if \code{bySample = FALSE} a \code{list} with ##' \code{\link[MSnbase]{Spectrum}} objects. If \code{bySample = TRUE} the result @@ -342,8 +366,17 @@ setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE) { ##' file. ##' ##' @rdname XCMSnExp-class -setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE) { +setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object)) { res <- callNextMethod(object = object) + ## replace the rtime of these with the adjusted ones - if present. + if (adjusted & hasAdjustedRtime(object)) { + rts <- adjustedRtime(object) + res <- mapply(FUN = function(a, b) { + a@rt <- b + return(a) + }, a = res, b = rts) + } if (bySample) { tmp <- split(res, fromFile(object)) ## That's to ensure that we're always returning something for all files. @@ -368,10 +401,9 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE) { ##' \code{\link{ProcessHistory}} objects should be retrieved. ##' ##' @param type For \code{processHistory}: restrict returned -##' \code{\link{ProcessHistory}} objects to analysis steps of a certain type. -##' Supported values are \code{"Unknown"}, \code{"Peak detection"}, -##' \code{"Peak grouping"} and \code{"Retention time correction"}. -##' +##' \code{\link{ProcessHistory}} objects to analysis steps of a certain type. Use +##' the \code{processHistoryTypes} to list all supported values. +##' ##' @return For \code{processHistory}: a \code{list} of ##' \code{\link{ProcessHistory}} objects providing the details of the individual ##' data processing steps that have been performed. @@ -435,6 +467,7 @@ setMethod("dropChromPeaks", "XCMSnExp", function(object) { ## Make sure we delete all related process history steps object <- dropProcessHistories(object, type = .PROCSTEP.RTIME.CORRECTION) object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.GROUPING) + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) ## idx_fd <- which(unlist(lapply(processHistory(object), processType)) == ## .PROCSTEP.PEAK.DETECTION) ## if (length(idx_fd) > 0) @@ -462,7 +495,8 @@ setMethod("dropChromPeaks", "XCMSnExp", function(object) { ##' results, if these were performed after the last peak grouping (i.e. which ##' base on the results from the peak grouping that are going to be removed). ##' For \code{XCMSnExp} objects also all related process history steps are -##' removed. +##' removed. Also eventually filled in peaks (by \code{\link{fillChromPeaks}}) +##' will be removed too. ##' ##' @param keepAdjRtime For \code{dropFeatureDefinitions,XCMSnExp}: ##' \code{logical(1)} defining whether eventually present retention time @@ -496,6 +530,13 @@ setMethod("dropFeatureDefinitions", "XCMSnExp", function(object, newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) newFd <- dropFeatureDefinitions(newFd) + if (.hasFilledPeaks(object)) { + ## Remove filled in peaks + chromPeaks(newFd) <- + chromPeaks(newFd)[chromPeaks(newFd)[, "is_filled"] == 0, , + drop = FALSE] + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + } lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd ## 2) If retention time correction was performed after the latest peak @@ -762,13 +803,13 @@ setMethod("filterAcquisitionNum", "XCMSnExp", function(object, n, file) { ##' @aliases XCMSnExp-filter ##' @title XCMSnExp filtering and subsetting ##' -##' The methods listed on this page allow to filter and subset +##' @description The methods listed on this page allow to filter and subset ##' \code{\link{XCMSnExp}} objects. Most of them are inherited from the ##' \code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for ##' \code{\link{XCMSnExp}} to enable subsetting also on the preprocessing ##' results. ##' -##' @description \code{filterFile}: allows to reduce the +##' \code{filterFile}: allows to reduce the ##' \code{\link{XCMSnExp}} to data from only certain files. Identified ##' chromatographic peaks for these files are retained while all eventually ##' present features (peak grouping information) are dropped. By default also @@ -936,7 +977,7 @@ setMethod("filterFile", "XCMSnExp", function(object, file, setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { if (missing(mz)) return(object) - if (!is.numeric(mz) | length(mz) != 2) + if (!is.numeric(mz)) stop("'mz' has to be a numeric vector of length(2)!") mz <- range(mz) ## Subset peaks if present. @@ -971,6 +1012,8 @@ setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { ##' @param adjusted For \code{filterRt}: \code{logical} indicating whether the ##' object should be filtered by original (\code{adjusted = FALSE}) or adjusted ##' retention times (\code{adjusted = TRUE}). +##' For \code{spectra}: whether the retention times in the individual +##' \code{Spectrum} objects should be the adjusted or raw retention times. ##' ##' @rdname XCMSnExp-filter-methods setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., @@ -1544,16 +1587,20 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, }) +##' @aliases featureValues ##' @title Accessing mz-rt feature data values ##' -##' @description \code{groupval,XCMSnExp}: extract a \code{matrix} for feature -##' values with rows representing features and columns samples. Parameter -##' \code{value} allows to define which column from the \code{\link{chromPeaks}} -##' matrix should be returned. Multiple chromatographic peaks from the same -##' sample can be assigned to a feature. Parameter \code{method} allows to -##' specify the method to be used in such cases to chose from which of the peaks -##' the value should be returned. -##' +##' @description \code{featureValues,XCMSnExp} : +##' extract a \code{matrix} for feature values with rows representing features +##' and columns samples. Parameter \code{value} allows to define which column +##' from the \code{\link{chromPeaks}} matrix should be returned. Multiple +##' chromatographic peaks from the same sample can be assigned to a feature. +##' Parameter \code{method} allows to specify the method to be used in such +##' cases to chose from which of the peaks the value should be returned. +##' +##' @note This method is equivalent to the \code{\link{groupval}} for +##' \code{xcmsSet} objects. +##' ##' @param object A \code{\link{XCMSnExp}} object providing the feature ##' definitions. ##' @@ -1575,9 +1622,9 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, ##' peak that should be used for the conflict resolution if ##' \code{method = "maxint"}. ##' -##' @return For \code{groupval}: a \code{matrix} with feature values, columns -##' representing samples, rows features. The order of the features -##' matches the order found in the \code{featureDefinitions(object)} +##' @return For \code{featureValues}: a \code{matrix} with +##' feature values, columns representing samples, rows features. The order of +##' the features matches the order found in the \code{featureDefinitions(object)} ##' \code{DataFrame}. An \code{NA} is reported for features without corresponding ##' chromatographic peak in the respective sample(s). ##' @@ -1589,9 +1636,10 @@ setMethod("profMat", signature(object = "XCMSnExp"), function(object, ##' feature definitions. ##' \code{\link{hasFeatures}} to evaluate whether the ##' \code{\link{XCMSnExp}} provides feature definitions. +##' \code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. ##' ##' @rdname XCMSnExp-peak-grouping-results -setMethod("groupval", +setMethod("featureValues", signature(object = "XCMSnExp"), function(object, method = c("medret", "maxint"), value = "index", intensity = "into") { @@ -1647,7 +1695,17 @@ setMethod("groupval", ## base::round(grps$rtmed), sep = "/") return(vals) }) +## ##' @rdname XCMSnExp-peak-grouping-results +## setMethod("groupval", +## signature(object = "XCMSnExp"), +## function(object, method = c("medret", "maxint"), value = "index", +## intensity = "into") { +## featureValues(object = object, method = method, value = value, +## intensity = intensity) +## }) + +#' @aliases extractChromatograms #' @title Extracting chromatograms #' #' @description \code{extractChromatograms}: the method allows to extract @@ -1655,12 +1713,23 @@ setMethod("groupval", #' \code{\link{XCMSnExp}} objects. #' #' @details Arguments \code{rt} and \code{mz} allow to specify the MS -#' data slice from which the chromatogram should be extracted. By specifying the -#' function to be used to aggregate intensity values across the mz range for the -#' same retention time it is possible to extract e.g. a -#' \emph{total ion chromatogram} (TIC, \code{aggregationFun = "sum"}) or a -#' \emph{base peak chromatogram} (BPC, \code{aggregationFun = "max"}). +#' data slice from which the chromatogram should be extracted. The parameter +#' \code{aggregationSum} allows to specify the function to be used to aggregate +#' the intensities across the mz range for the same retention time. Setting +#' \code{aggregationFun = "sum"} would e.g. allow to calculate the \emph{total +#' ion chromatogram} (TIC), \code{aggregationFun = "max"} the \emph{base peak +#' chromatogram} (BPC). +#' +#' @note +#' \code{Chromatogram} objects extracted with \code{extractChromatogram} contain +#' \code{NA_real_} values if, for a given retention time, no valid measurement +#' was available for the provided mz range. #' +#' For \code{\link{XCMSnExp}} objects, if adjusted retention times are +#' available, the \code{extractChromatograms} method will by default report and +#' use these (for the subsetting based on the provided parameter \code{rt}). This +#' can be overwritten with the parameter \code{adjustedRtime}. +#' #' @param object Either a \code{\link[MSnbase]{OnDiskMSnExp}} or #' \code{\link{XCMSnExp}} object from which the chromatograms should be extracted. #' @@ -1691,8 +1760,28 @@ setMethod("groupval", #' @seealso \code{\link{XCMSnExp}} for the data object. #' \code{\link{Chromatogram}} for the object representing chromatographic data. #' -#' @noRd -#' @rdname extractChromatograms-method +#' @export +#' @rdname extractChromatograms-method +#' +#' @examples +#' ## Read some files from the faahKO package. +#' library(xcms) +#' library(faahKO) +#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_3_files) +#' +#' ## Extract the ion chromatogram for one chromatographic peak in the data. +#' chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) +#' +#' ## plot the data +#' plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", +#' ylab = "intensity", col = "000080") +#' for(i in c(1, 3)) { +#' points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") +#' } setMethod("extractChromatograms", signature(object = "XCMSnExp"), function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object), @@ -1702,4 +1791,368 @@ setMethod("extractChromatograms", adjusted = adjustedRtime)) }) +##' @rdname XCMSnExp-class +##' @param param A \code{\link{CentWaveParam}}, \code{\link{MatchedFilterParam}}, +##' \code{\link{MassifquantParam}}, \code{\link{MSWParam}} or +##' \code{\link{CentWavePredIsoParam}} object with the settings for the +##' chromatographic peak detection algorithm. +##' @inheritParams findChromPeaks-centWave +setMethod("findChromPeaks", + signature(object = "XCMSnExp", param = "ANY"), + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + ## Remove all previous results. + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + if (hasAdjustedRtime(object)) + object <- dropAdjustedRtime(object) + if (hasChromPeaks(object)) + object <- dropChromPeaks(object) + suppressMessages( + object <- callNextMethod() + ) + object@.processHistory <- list() + return(object) +}) + +## fillChromPeaks: +#' @aliases fillChromPeaks +#' @title Integrate areas of missing peaks +#' +#' @description Integrate signal in the mz-rt area of a feature (chromatographic +#' peak group) for samples in which no chromatographic peak for this feature was +#' identified and add it to the \code{chromPeaks}. Such peaks will have a value +#' of \code{1} in the \code{"is_filled"} column of the \code{\link{chromPeaks}} +#' matrix of the object. +#' +#' @details After correspondence (i.e. grouping of chromatographic peaks across +#' samples) there will always be features (peak groups) that do not include peaks +#' from every sample. The \code{fillChromPeaks} method defines intensity values +#' for such features in the missing samples by integrating the signal in the +#' mz-rt region of the feature. The mz-rt area is defined by the median mz and +#' rt start and end points of the other detected chromatographic peaks for a +#' given feature. +#' +#' Adjusted retention times will be used if available. +#' +#' Based on the peak finding algorithm that was used to identify the +#' (chromatographic) peaks different internal functions are employed to guarantee +#' that the integrated peak signal matches as much as possible the peak signal +#' integration used during the peak detection. For peaks identified with the +#' \code{\link{matchedFilter}} method, signal integration is performed on the +#' \emph{profile matrix} generated with the same settings used also during peak +#' finding (using the same \code{bin} size for example). For direct injection +#' data and peaks identified with the \code{\link{MSW}} algorithm signal is +#' integrated only along the mz dimension. For all other methods the complete +#' (raw) signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, +#' \code{"rtmin"} and \code{"rtmax"} is used. +#' +#' @note The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and +#' \code{"rtmax"} for the filled peaks represents the actual MS area from which +#' the signal was integrated. +#' Note that no peak is filled in if no signal was present in a file/sample in +#' the respective mz-rt area. These samples will still show a \code{NA} in the +#' matrix returned by the \code{\link{featureValues}} method. This is in contrast +#' to the \code{\link{fillPeaks.chrom}} method that returned an \code{"into"} and +#' \code{"maxo"} of \code{0} for such peak areas. Growing the mz-rt area using +#' the \code{expandMz} and \code{expandRt} might help to reduce the number of +#' missing peak signals after filling. +#' +#' @param object \code{XCMSnExp} object with identified and grouped +#' chromatographic peaks. +#' +#' @param param A \code{FillChromPeaksParam} object with all settings. +#' +#' @param expandMz \code{numeric(1)} defining the value by which the mz width of +#' peaks should be expanded. Each peak is expanded in mz direction by +#' \code{expandMz *} their original mz width. A value of \code{0} means no +#' expansion, a value of \code{1} grows each peak by 1 * the mz width of the peak +#' resulting in peakswith twice their original size in mz direction (expansion +#' by half mz width to both sides). +#' +#' @param expandRt \code{numeric(1)}, same as \code{expandRt} but for the +#' retention time width. +#' +#' @param ppm \code{numeric(1)} optionally specifying a \emph{ppm} by which the +#' mz width of the peak region should be expanded. For peaks with an mz width +#' smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the \code{mzmin} will +#' be replaced by +#' \code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} +#' and \code{mzmax} by +#' \code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. This +#' is applied before eventually expanding the mz width using the \code{expandMz} +#' parameter. +#' +#' @param BPPARAM Parallel processing settings. +#' +#' @return A \code{\link{XCMSnExp}} object with previously missing +#' chromatographic peaks for features filled into its \code{chromPeaks} matrix. +#' +#' @rdname fillChromPeaks +#' +#' @author Johannes Rainer +#' @seealso \code{\link{groupChromPeaks}} for methods to perform the +#' correspondence. +#' \code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +#' +#' @examples +#' +#' ## Perform the peak detection using centWave on some of the files from the +#' ## faahKO package. Files are read using the readMSData2 from the MSnbase +#' ## package +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to +#' ## speed up the execution of the example - in a real use case the default +#' ## value should be used, or it should be set to a reasonable value. +#' cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) +#' +#' res <- findChromPeaks(raw_data, param = cwp) +#' +#' ## Perform the correspondence. +#' res <- groupChromPeaks(res, param = PeakDensityParam()) +#' +#' ## For how many features do we lack an integrated peak signal? +#' sum(is.na(featureValues(res))) +#' +#' ## Filling missing peak data using default settings. +#' res <- fillChromPeaks(res) +#' +#' ## Get the peaks that have been filled in: +#' fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] +#' head(fp) +#' +#' ## Did we get a signal for all missing peaks? +#' sum(is.na(featureValues(res))) +#' +#' ## No. +#' +#' ## Get the process history step along with the parameters used to perform +#' ## The peak filling: +#' ph <- processHistory(res, type = "Missing peak filling")[[1]] +#' ph +#' +#' ## The parameter class: +#' ph@param +#' +#' ## Drop the filled in peaks: +#' res <- dropFilledChromPeaks(res) +#' +#' ## Perform the peak filling with modified settings: allow expansion of the +#' ## mz range by a specified ppm and expanding the mz range by mz width/2 +#' prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) +#' res <- fillChromPeaks(res, param = prm) +#' +#' ## Did we get a signal for all missing peaks? +#' sum(is.na(featureValues(res))) +#' +#' ## Still the same missing peaks. +setMethod("fillChromPeaks", + signature(object = "XCMSnExp", param = "FillChromPeaksParam"), + function(object, param, BPPARAM = bpparam()) { + if (!hasFeatures(object)) + stop("'object' does not provide feature definitions! Please ", + "run 'groupChromPeaks' first.") + ## Don't do that if we have already filled peaks? + if (.hasFilledPeaks(object)) + message("Filled peaks already present, adding still missing", + " peaks.") + + startDate <- date() + expandMz <- expandMz(param) + expandRt <- expandRt(param) + ppm <- ppm(param) + ## Define or extend the peak area from which the signal should be + ## extracted. + ## Original code: use the median of the min/max rt and mz per peak. + fdef <- featureDefinitions(object) + aggFunLow <- median + aggFunHigh <- median + ## Note: we ensure in the downstream function that the rt range is + ## within the rt range. For the mz range it doesn't matter. + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(object)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(aggFunLow(tmp[, 1]), aggFunHigh(tmp[, 2]), + aggFunLow(tmp[, 3]), aggFunHigh(tmp[, 4])) + ## Check if we have to apply ppm replacement: + if (ppm != 0) { + mzmean <- mean(pa[3:4]) + tittle <- mzmean * (ppm / 2) / 1E6 + if ((pa[4] - pa[3]) < (tittle * 2)) { + pa[3] <- mzmean - tittle + pa[4] <- mzmean + tittle + } + } + ## Expand it. + if (expandRt != 0) { + diffRt <- (pa[2] - pa[1]) * expandRt / 2 + pa[1] <- pa[1] - diffRt + pa[2] <- pa[2] + diffRt + } + if (expandMz != 0) { + diffMz <- (pa[4] - pa[3]) * expandMz / 2 + pa[3] <- pa[3] - diffMz + pa[4] <- pa[4] + diffMz + } + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + ## Add mzmed column - needed for MSW peak filling. + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = as.numeric(fdef$mzmed)) + + pkGrpVal <- featureValues(object) + ## Check if there is anything to fill... + if (!any(is.na(rowSums(pkGrpVal)))) { + message("No missing peaks present.") + return(object) + } + ## Split the object by file and define the peaks for which + objectL <- vector("list", length(fileNames(object))) + pkAreaL <- objectL + for (i in 1:length(fileNames(object))) { + suppressMessages( + objectL[[i]] <- filterFile(object, file = i, + keepAdjustedRtime = TRUE) + ) + ## Want to extract intensities only for peaks that were not + ## found in a sample. + pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE] + } + + ## Get to know what algorithm was used for the peak detection. + ## Special cases are MSWParam (no retention time) and + ## MatchedFilterParam (integrate from profile matrix). + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + findPeakMethod <- "unknown" + mzCenterFun <- "wMean" + if (length(ph)) { + if (is(ph[[1]], "XProcessHistory")) { + prm <- ph[[1]]@param + findPeakMethod <- .param2string(prm) + ## Check if the param class has a mzCenterFun slot + if (.hasSlot(prm, "mzCenterFun")) + mzCenterFun <- prm@mzCenterFun + } + } + ## Now rename that to the correct function name in xcms. + mzCenterFun <- paste("mzCenter", + gsub(mzCenterFun, pattern = "mzCenter.", + replacement = "", fixed = TRUE), sep=".") + if (findPeakMethod == "MSW") { + rts <- rtime(object, bySample = TRUE) + ## Ensure that we REALLY have direct injection data. + if (any(lengths(rts) > 1)) + stop("The data is supposed to be direct injection data, ", + "but I got files with more than one spectrum/", + "retention time!") + ## That's not working, because integration uses the rt. + res <- bpmapply(FUN = .getMSWPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object))), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } else if (findPeakMethod == "matchedFilter") { + res <- bpmapply(FUN = .getChromPeakData_matchedFilter, + objectL, pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object)), + param = prm + )) + } else { + res <- bpmapply(FUN = .getChromPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object)), + mzCenterFun = mzCenterFun), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } + + res <- do.call(rbind, res) + if (any(colnames(res) == "is_filled")) + res[, "is_filled"] <- 1 + else + res <- cbind(res, is_filled = 1) + ## cbind the group_idx column to track the feature/peak group. + res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"]) + ## Remove those without a signal + res <- res[!is.na(res[, "into"]), , drop = FALSE] + if (nrow(res) == 0) { + warning("Could not integrate any signal for the missing ", + "peaks! Consider increasing 'expandMz' and 'expandRt'.") + return(object) + } + + ## Get the msFeatureData: + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + incr <- nrow(chromPeaks(object)) + for (i in unique(res[, "group_idx"])) { + fdef$peakidx[[i]] <- c(fdef$peakidx[[i]], + (which(res[, "group_idx"] == i) + incr)) + } + + chromPeaks(newFd) <- rbind(chromPeaks(object), res[, -ncol(res)]) + featureDefinitions(newFd) <- fdef + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + ## Add a process history step + ph <- XProcessHistory(param = param, + date. = startDate, + type. = .PROCSTEP.PEAK.FILLING, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, ph) ## this also validates object. + return(object) + }) + +#' @rdname fillChromPeaks +setMethod( + "fillChromPeaks", + signature(object = "XCMSnExp", param = "missing"), + function(object, + param, + BPPARAM = bpparam()) { + fillChromPeaks(object, param = FillChromPeaksParam(), + BPPARAM = BPPARAM) + }) +##' @aliases dropFilledChromPeaks +##' +##' @description \code{dropFilledChromPeaks}: drops any filled-in chromatographic +##' peaks (filled in by the \code{\link{fillChromPeaks}} method) and all related +##' process history steps. +##' +##' @rdname XCMSnExp-class +##' @seealso \code{\link{fillChromPeaks}} for the method to fill-in eventually +##' missing chromatographic peaks for a feature in some samples. +setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { + if (!.hasFilledPeaks(object)) + return(object) + keep_pks <- which(chromPeaks(object)[, "is_filled"] == 0) + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + ## Update index in featureDefinitions + fd <- featureDefinitions(newFd) + fd <- split(fd, 1:nrow(fd)) + fdL <- lapply(fd, function(z) { + z$peakidx <- list(z$peakidx[[1]][z$peakidx[[1]] %in% keep_pks]) + return(z) + }) + featureDefinitions(newFd) <- do.call(rbind, fdL) + ## Remove peaks + chromPeaks(newFd) <- chromPeaks(newFd)[keep_pks, , drop = FALSE] + ## newFd <- .filterChromPeaks(object@msFeatureData, idx = keep_pks) + object@msFeatureData <- newFd + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + if (validObject(object)) + return(object) +}) diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index c73c2c158..5ec1c89c6 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -1,5 +1,5 @@ ## All Methods for xcmsRaw should be here. -#' @include functions-xcmsRaw.R +#' @include functions-xcmsRaw.R functions-utils.R ############################################################ ## show @@ -1160,6 +1160,7 @@ setMethod("getPeaks", "xcmsRaw", function(object, peakrange, step = 0.1) { stime <- object@scantime ### Create EIC buffer + ## This is NOT calculated for the full file. mrange <- range(peakrange[,1:2]) mass <- seq(floor(mrange[1]/step)*step, ceiling(mrange[2]/step)*step, by = step) bufsize <- min(100, length(mass)) @@ -1186,33 +1187,40 @@ setMethod("getPeaks", "xcmsRaw", function(object, peakrange, step = 0.1) { diff(idxrange)+1, mass[idxrange[1]], mass[idxrange[2]], TRUE, object@profparam) } + ## Extract the intensity matrix for the mz-rt range: rows are mz, cols + ## rt values. ymat <- buf[bufidx[imz[1]:imz[2]],iret[1]:iret[2],drop=FALSE] + ## Define the maximum intensity, is one value per mz. ymax <- colMax(ymat) iymax <- which.max(ymax) + ## The width in rt. pwid <- diff(stime[iret])/diff(iret) + ## Calculate sum across rt. For each mz we get one value. rosm <- rowSums(ymat) limz <- length(imz[1]:imz[2]) if (length(rosm) != limz) { ## that happens for some reason warning("weighted.mean : x and w must have the same length \n") rosm <- rep(1, limz) ## fallback to mean } - rmat[i,1] <- weighted.mean(mass[imz[1]:imz[2]], rosm) + ## mean mz: + rmat[i,1] <- weighted.mean(mass[imz[1]:imz[2]], rosm) ## mz; its not the + ## position of the largest intensity! if (is.nan(rmat[i,1]) || is.na(rmat[i,1])) ## R2.11 : weighted.mean() results in NA (not NaN) for zero weights rmat[i,1] <- mean(peakrange[i,1:2]) - rmat[i,2:3] <- peakrange[i,1:2] - rmat[i,4] <- stime[iret[1]:iret[2]][iymax] - rmat[i,5:6] <- peakrange[i,3:4] + rmat[i,2:3] <- peakrange[i,1:2] ## mzmin, mzmax + rmat[i,4] <- stime[iret[1]:iret[2]][iymax] ## rt + rmat[i,5:6] <- peakrange[i,3:4] ## rtmin, rtmax if (peakrange[i,3] < stime[1] || peakrange[i,4] > stime[length(stime)] || is.nan(pwid)) { warning("getPeaks: Peak m/z:",peakrange[i,1],"-",peakrange[i,2], ", RT:",peakrange[i,3],"-",peakrange[i,4], "is out of retention time range for this sample (",object@filepath,"), using zero intensity value.\n") rmat[i,7:8] <- 0 } else { - rmat[i,7] <- pwid*sum(ymax) - rmat[i,8] <- ymax[iymax] + rmat[i,7] <- pwid*sum(ymax) ## into + rmat[i,8] <- ymax[iymax] ## maxo } } invisible(rmat) @@ -1273,6 +1281,8 @@ setMethod("rawMat", "xcmsRaw", function(object, log = log) }) ## @jo TODO LLL replace that with an implementation in C. +## Note: this function silently drops retention times for which no intensity-mz +## pair was measured. .rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), rtrange = numeric(), scanrange = numeric, log = FALSE) { @@ -1305,6 +1315,38 @@ setMethod("rawMat", "xcmsRaw", function(object, intensity = int) } +.rawMat2 <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), + rtrange = numeric(), scanrange = numeric, + log = FALSE) { + if (length(rtrange) >= 2) { + rtrange <- range(rtrange) + scanrange <- range(which((scantime >= rtrange[1]) & + (scantime <= rtrange[2]))) + } + if (length(scanrange) < 2) + scanrange <- c(1, length(valsPerSpect)) + else scanrange <- range(scanrange) + if (scanrange[1] == 1) + startidx <- 1 + else + startidx <- sum(valsPerSpect[1:(scanrange[1]-1)]) + 1 + endidx <- sum(valsPerSpect[1:scanrange[2]]) + scans <- rep(scanrange[1]:scanrange[2], + valsPerSpect[scanrange[1]:scanrange[2]]) + masses <- mz[startidx:endidx] + massidx <- 1:length(masses) + if (length(mzrange) >= 2) { + mzrange <- range(mzrange) + massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] + } + int <- int[startidx:endidx][massidx] + if (log && (length(int) > 0)) + int <- log(int + max(1 - min(int), 0)) + cbind(time = scantime[scans[massidx]], + mz = masses[massidx], + intensity = int) +} + ############################################################ ## plotRaw diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 0139ce169..e7d869e17 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -1251,16 +1251,30 @@ setMethod("fillPeaks.MSW", "xcmsSet", function(object, mrange=c(0,0), sample=NUL for (g in ngs) { nppos <- nppos+1 - mzpos <- which(abs(lcraw@env$mz - groupmat[g,"mzmed"]) == min(abs(lcraw@env$mz - groupmat[g,"mzmed"]))) - mmzpos <- mzpos[which(lcraw@env$intensity[mzpos] == max(lcraw@env$intensity[mzpos]))] + ## Get the index of the mz value(s) closest to the mzmed of the + ## group; could eventually be more than one. + mzpos <- which(abs(lcraw@env$mz - groupmat[g,"mzmed"]) == + min(abs(lcraw@env$mz - groupmat[g,"mzmed"]))) + ## Get the index of the maximum intensity for the above mz index. + mmzpos <- mzpos[which(lcraw@env$intensity[mzpos] == + max(lcraw@env$intensity[mzpos]))] + ## Eventually increase the range around the mzmed. mmzr <- seq((mmzpos-mrange[1]),(mmzpos+mrange[2])) + ## maxo is the maximum signal for mz values that are closest to + ## the mzmed of the feature. maxo <- max(lcraw@env$intensity[mmzr]) ## this is the new one, summing the scale-range - ## calculating scale, adding intensitiesin this scale + ## calculating scale, adding intensities in this scale medMZmin <- median(peakmat[groupindex[[g]],"mzmin"]) medMZmax <- median(peakmat[groupindex[[g]],"mzmax"]) - minMzpos <- min(which(abs(lcraw@env$mz - medMZmin) == min(abs(lcraw@env$mz - medMZmin)))) - maxMzpos <- max(which(abs(lcraw@env$mz - medMZmax) == min(abs(lcraw@env$mz - medMZmax)))) + ## mz values to be considered: from median mzmin of all peaks in + ## the current peak group/feature to median mzmax. + ## Might eventually be easier to just check for $mz >= medMZmin + minMzpos <- min(which(abs(lcraw@env$mz - medMZmin) == + min(abs(lcraw@env$mz - medMZmin)))) + maxMzpos <- max(which(abs(lcraw@env$mz - medMZmax) == + min(abs(lcraw@env$mz - medMZmax)))) + ## into: the sum of intensities in this range. into = sum(lcraw@env$intensity[minMzpos:maxMzpos]) newpeaks[nppos,] <- c(groupmat[g,"mzmed"],medMZmin,medMZmax,-1,-1,-1,into,maxo,i) } diff --git a/R/xcmsSource.R b/R/xcmsSource.R index 232a72215..71d00c058 100644 --- a/R/xcmsSource.R +++ b/R/xcmsSource.R @@ -2,25 +2,15 @@ #' @include AllGenerics.R DataClasses.R setMethod("xcmsSource", "character", function(object) { - ## if (useOriginalCode()) { - ## if (! file.exists(object)) { - ## stop("xcmsSource: file not found: ", object) - ## } else if (mzR:::netCDFIsFile(object)) { - ## new("netCdfSource", object) - ## } else if (mzR:::rampIsFile(object)) { - ## new("rampSource", object) - ## } else { - ## stop("xcmsSource: Could not determine file type for: ", object) - ## } - ## } else { if (!file.exists(object)) { stop("xcmsSource: file not found: ", object) } else if (isCdfFile(object)) { new("netCdfSource", object) } else if (isMzMLFile(object)) { + new("pwizSource", object) + } else if (isRampFile(object)) { new("rampSource", object) } else { stop("xcmsSource: Could not determine file type for: ", object) } - ## } }) diff --git a/inst/NEWS b/inst/NEWS index 790b5b49a..514f046b2 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,32 @@ +CHANGES IN VERSION 1.51.9 +------------------------- + +NEW FEATURES: ++ fillChromPeaks, dropFilledChromPeaks methods and FillChromPeaksParam class. ++ featureValues method. + +USER VISIBLE CHANGES: ++ Extended new_functionality vignette. ++ Change default backend for reading mzML files to pwiz. + +BUG FIXES: ++ Issue #135: fix peak signal integration for centWave. ++ Issue #139: problem with expand.mz and expand.rt in fillPeaks.chrom. ++ Issue #137: Error in findChromPeaks if no peaks are found. + + +CHANGES IN VERSION 1.51.8 +------------------------- + +NEW FEATURES: ++ Add Chromatogram class and extractChromatograms method. + +BUG FIXES: ++ Issue #118: failing unit test on Windows build machine. ++ Issue #133: error with c() and xcmsSet without peaks. ++ Issue #134: xcmsSet constructor endless loop. + + CHANGES IN VERSION 1.51.7 ------------------------- diff --git a/inst/unitTests/runit.Chromatogram.R b/inst/unitTests/runit.Chromatogram.R index 4c6998d10..f24b58424 100644 --- a/inst/unitTests/runit.Chromatogram.R +++ b/inst/unitTests/runit.Chromatogram.R @@ -6,6 +6,13 @@ test_Chromatogram_class <- function() { ch <- new("Chromatogram") ch@mz <- 3 checkException(validObject(ch)) + ch@mz <- c(1, 3) + ch@precursorMz <- 4 + checkException(validObject(ch)) + ch@precursorMz <- c(4, 4) + ch@productMz <- 5 + checkException(validObject(ch)) + ## int <- rnorm(100, mean = 200, sd = 2) rt <- rnorm(100, mean = 300, sd = 3) ## check exceptions: @@ -37,10 +44,189 @@ test_Chromatogram_class <- function() { checkEquals(df, data.frame(rtime = rt, intensity = int)) ch <- xcms:::Chromatogram(mz = c(1, 3)) checkEquals(ch@mz, c(1, 3)) - checkEquals(mzrange(ch), c(1, 3)) - checkEquals(mzrange(ch, filter = TRUE), c(0, 0)) + checkEquals(mz(ch), c(1, 3)) + checkEquals(mz(ch, filter = TRUE), c(0, 0)) ch <- xcms:::Chromatogram(filterMz = c(1, 3)) checkEquals(ch@filterMz, c(1, 3)) - checkEquals(mzrange(ch, filter = TRUE), c(1, 3)) - checkEquals(mzrange(ch, filter = FALSE), c(0, 0)) + checkEquals(mz(ch, filter = TRUE), c(1, 3)) + checkEquals(mz(ch, filter = FALSE), c(0, 0)) + ch <- xcms:::Chromatogram(precursorMz = 123) + checkEquals(ch@precursorMz, c(123, 123)) + checkEquals(precursorMz(ch), c(123, 123)) + ch <- xcms:::Chromatogram(productMz = 123) + checkEquals(ch@productMz, c(123, 123)) + checkEquals(productMz(ch), c(123, 123)) +} + +test_extractChromatograms <- function() { + ## OnDiskMSnExp + ## TIC + chrs <- extractChromatograms(filterFile(od_x, file = 2)) + spctr <- spectra(filterFile(od_x, file = 2)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## BPC + chrs <- extractChromatograms(filterFile(od_x, file = 2), + aggregationFun = "max") + ints <- unlist(lapply(spctr, function(z) + return(max(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## XCMSnExp + xod_x <- faahko_xod + chrs <- extractChromatograms(filterFile(xod_x, file = 2)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## BPC + chrs <- extractChromatograms(filterFile(xod_x, file = 2), + aggregationFun = "max") + ints <- unlist(lapply(spctr, function(z) + return(max(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## with adjusted retention times. + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2), + adjustedRtime = FALSE, aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + + ## Now subsetting for mz: + tmp <- filterFile(od_x, file = 2) + chrs <- extractChromatograms(tmp, mz = c(300, 400)) + checkEquals(mz(chrs[[1]], filter = TRUE), c(300, 400)) + suppressWarnings(spctr <- spectra(filterMz(tmp, mz = c(300, 400)))) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + ints2 <- intensity(chrs[[1]]) + ints2[is.na(ints2)] <- 0 + checkEquals(ints2, ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## with adjusted retention times + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + mz = c(300, 400)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + ints2 <- intensity(chrs[[1]]) + ints2[is.na(ints2)] <- 0 + checkEquals(ints2, ints) + checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + + ## Now subsetting for rt: + chrs <- extractChromatograms(od_x, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterRt(od_x, rt = c(2700, 2900))) + ints <- split(unlist(lapply(spctr, function(z) sum(intensity(z)))), + f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Using adjusted rt: + chrs2 <- extractChromatograms(xod_xgr, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs2[[1]]) >= 2700 & rtime(chrs2[[1]]) <= 2900)) + checkTrue(all(rtime(chrs2[[2]]) >= 2700 & rtime(chrs2[[2]]) <= 2900)) + checkTrue(all(rtime(chrs2[[3]]) >= 2700 & rtime(chrs2[[3]]) <= 2900)) + checkTrue(length(chrs[[1]]) != length(chrs2[[1]])) + checkTrue(length(chrs[[2]]) == length(chrs2[[2]])) + checkTrue(length(chrs[[3]]) != length(chrs2[[3]])) + tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) + checkEquals(rtime(chrs2[[1]]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs2[[2]]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs2[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + ## Check the values... + keepSp <- which(adjustedRtime(xod_xgr) >= 2700 & + adjustedRtime(xod_xgr) <= 2900) + tmp <- xod_xgr[keepSp] + ints <- unlist(lapply(spectra(tmp), function(z) sum(intensity(z)))) + intsL <- split(ints, fromFile(tmp)) + checkEquals(intensity(chrs2[[1]]), intsL[[1]]) + checkEquals(intensity(chrs2[[2]]), intsL[[2]]) + checkEquals(intensity(chrs2[[3]]), intsL[[3]]) + + ## Now subsetting for rt and mz: + chrs <- extractChromatograms(od_x, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterMz(filterRt(od_x, rt = c(2700, 2900)), mz = 335)) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Using adjusted rt: LLLL + chrs <- extractChromatograms(xod_xgr, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterMz(filterRt(xod_xgr, rt = c(2700, 2900)), mz = 335)) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Check the rtime. + tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) + checkEquals(rtime(chrs[[1]]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs[[2]]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + + ## What if we're completely off? + chrs <- extractChromatograms(od_x, rt = c(5000, 5500)) + checkTrue(length(chrs) == 0) + chrs <- extractChromatograms(od_x, rt = c(2600, 2700), mz = 12000) + checkTrue(length(chrs) == 0) +} + +dontrun_test_with_MRM <- function() { + ## Test how we could read the data. + ## chromatogramsInfo + library(msdata) + fls <- proteomics(full.names = TRUE) + + library(mzR) + msf <- mzR::openMSfile(fls[2], "pwiz") + chrs <- chromatograms(msf) + chrsI <- chromatogram(msf) + ## The same essentially. + nChrom(msf) + length(chrs) + nrow(chrs[[1]]) + mzR::close(msf) + ## + msf <- mzR::openMSfile(fls[1], "pwiz") + chrs <- chromatograms(msf) + chrs <- chromatograms(msf) + nChrom(msf) + length(chrs) + nrow(chrs[[1]]) + + ## Now, we've got the following info: cvParam + ## accession="MS:1000235" name="total ion current chromatogram" value="" + ## Check http://proteowizard.sourceforge.net/dox/namespacepwiz_1_1msdata.html + ## Potentially interesting: + ## o ChromatogramIdentity nope, no header info. + ## OK, have to look for chromatogram with index="1", then within + ## for cvParam accession="MS:1000827" and its value -> Q1 or precursorMz + ## then within for cvParam accession="MS:1000827" and its value + ## -> Q3. + + ## https://sourceforge.net/p/proteowizard/mailman/message/27571266/ } diff --git a/inst/unitTests/runit.Param-classes.R b/inst/unitTests/runit.Param-classes.R index daf4f86cd..ea42248ab 100644 --- a/inst/unitTests/runit.Param-classes.R +++ b/inst/unitTests/runit.Param-classes.R @@ -785,3 +785,42 @@ test_ObiwarpParam <- function() { checkException(factorGap(p) <- -1) } +test_GenericParam <- function() { + prm <- GenericParam(fun = "mean") + checkEquals(prm@fun, "mean") + ## Errors + checkException(GenericParam(args = list(na.rm = TRUE))) + checkException(GenericParam(fun = c("a", "b"))) +} + +test_FillChromPeaksParam <- function() { + library(xcms) + library(RUnit) + ## Check getter/setter methods: + p <- new("FillChromPeaksParam", expandMz = 0.8) + checkEquals(expandMz(p), 0.8) + expandMz(p) <- 0.3 + checkEquals(expandMz(p), 0.3) + p <- FillChromPeaksParam(expandMz = 0.7) + checkEquals(expandMz(p), 0.7) + checkException(expandMz(p) <- c(2, 2)) + checkException(expandMz(p) <- -2) + + p <- new("FillChromPeaksParam", expandRt = 0.8) + checkEquals(expandRt(p), 0.8) + expandRt(p) <- 0.3 + checkEquals(expandRt(p), 0.3) + p <- FillChromPeaksParam(expandRt = 0.7) + checkEquals(expandRt(p), 0.7) + checkException(expandRt(p) <- c(2, 2)) + checkException(expandRt(p) <- -2) + + p <- new("FillChromPeaksParam", ppm = 8) + checkEquals(ppm(p), 8) + ppm(p) <- 3 + checkEquals(ppm(p), 3) + p <- FillChromPeaksParam(ppm = 7) + checkEquals(ppm(p), 7) + checkException(ppm(p) <- c(2, 2)) + checkException(ppm(p) <- -2) +} diff --git a/inst/unitTests/runit.XCMSnExp.R b/inst/unitTests/runit.XCMSnExp.R index fa97db474..c3836ddcf 100644 --- a/inst/unitTests/runit.XCMSnExp.R +++ b/inst/unitTests/runit.XCMSnExp.R @@ -1,10 +1,10 @@ ## tests related to the new XCMSnExp object. -od_x <- faahko_od -xod_x <- faahko_xod -xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) -xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) -xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) +## od_x <- faahko_od +## xod_x <- faahko_xod +## xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) +## xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) +## xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) xs <- faahko_xs @@ -104,6 +104,27 @@ test_XCMSnExp_spectra <- function() { res <- spectra(xod) res_2 <- spectra(xod, bySample = TRUE) checkEquals(split(res, fromFile(xod)), res_2) + ## xod_x + tmp <- filterRt(xod_x, rt = c(2700, 2900)) + res <- spectra(tmp) + rts <- unlist(lapply(res, rtime)) + checkEquals(rts, rtime(tmp)) + ## Check with adjusted retention times. + tmp2 <- filterRt(xod_xgr, rt = c(2700, 2900)) + res2 <- spectra(tmp2) + rts2 <- unlist(lapply(res2, rtime)) + checkEquals(rts2, rtime(tmp2)) + ## Now do it on one file: + tmp <- filterFile(xod_x, file = 2) + res <- spectra(tmp) + checkEquals(rtime(tmp), unlist(lapply(res, rtime))) + tmp2 <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) + res2 <- spectra(tmp2) + checkEquals(rtime(tmp2), unlist(lapply(res2, rtime))) + checkTrue(sum(unlist(lapply(res2, rtime)) == + unlist(lapply(res, rtime))) < length(rtime(tmp)) / 4) + res3 <- spectra(tmp2, adjusted = FALSE) + checkEquals(res, res3) } test_XCMSnExp_class_accessors <- function() { @@ -171,6 +192,13 @@ test_XCMSnExp_class_accessors <- function() { .checkCreationOfEmptyObject() } +test_XCMSnExp_findChromPeaks <- function() { + ## Call findChromPeaks on an XCMSnExp + tmp <- findChromPeaks(xod_x, param = CentWaveParam(noise = 10000, + snthresh = 40)) + checkEquals(chromPeaks(tmp), chromPeaks(xod_x)) +} + test_XCMSnExp_processHistory <- function() { ph <- xcms:::ProcessHistory(fileIndex. = 2, info. = "For file 2") @@ -441,9 +469,9 @@ test_XCMSnExp_filterFile <- function() { checkTrue(!hasAdjustedRtime(tmp)) checkTrue(!hasFeatures(tmp)) checkTrue(all(chromPeaks(tmp)[, "sample"] == 1)) - checkEquals(chromPeaks(tmp)[, -ncol(chromPeaks(tmp))], + checkEquals(chromPeaks(tmp)[, -(ncol(chromPeaks(tmp)) - 1)], chromPeaks(xod_x)[chromPeaks(xod_x)[, "sample"] == 2, - -ncol(chromPeaks(xod_x))]) + -(ncol(chromPeaks(xod_x)) - 1)]) checkEquals(fileIndex(processHistory(tmp)[[1]]), 1) ## check with other index. tmp <- filterFile(xod_x, file = c(1, 3)) @@ -453,7 +481,8 @@ test_XCMSnExp_filterFile <- function() { checkTrue(all(chromPeaks(tmp)[, "sample"] %in% c(1, 2))) a <- chromPeaks(tmp) b <- chromPeaks(xod_x) - checkEquals(a[, -ncol(a)], b[b[, "sample"] %in% c(1, 3), -ncol(b)]) + checkEquals(a[, -(ncol(a) - 1)], + b[b[, "sample"] %in% c(1, 3), -(ncol(b) - 1)]) checkEquals(fileIndex(processHistory(tmp)[[1]]), c(1, 2)) ## Errors @@ -494,14 +523,16 @@ test_XCMSnExp_filterFile <- function() { checkTrue(!hasAdjustedRtime(res)) checkTrue(!hasFeatures(res)) tmp <- chromPeaks(xod_xg) - checkEquals(chromPeaks(res)[, -ncol(tmp)], tmp[tmp[, "sample"] == 2, -ncol(tmp)]) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) checkEquals(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) ## Do filterFile on xod_xgr ## Should remove adjusted rts and revert the original peak rts. res <- filterFile(xod_xgr, file = 2) checkTrue(hasChromPeaks(res)) tmp <- chromPeaks(xod_xg) - checkEquals(chromPeaks(res)[, -ncol(tmp)], tmp[tmp[, "sample"] == 2, -ncol(tmp)]) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) checkEquals(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) checkTrue(!hasAdjustedRtime(res)) checkTrue(!hasFeatures(res)) @@ -511,7 +542,8 @@ test_XCMSnExp_filterFile <- function() { res <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) checkTrue(hasChromPeaks(res)) tmp <- chromPeaks(xod_xgr) - checkEquals(chromPeaks(res)[, -ncol(tmp)], tmp[tmp[, "sample"] == 2, -ncol(tmp)]) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) ## has to be different from the ones in xod_x tmp <- chromPeaks(xod_x) checkTrue(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] == 2, "rt"]) < @@ -527,8 +559,8 @@ test_XCMSnExp_filterFile <- function() { res <- filterFile(xod_xgrg, file = c(1, 3)) checkTrue(hasChromPeaks(res)) tmp <- chromPeaks(xod_x) - checkEquals(chromPeaks(res)[, -ncol(tmp)], - tmp[tmp[, "sample"] %in% c(1, 3), -ncol(tmp)]) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) checkEquals(unname(rtime(res, bySample = TRUE)), unname(rtime(xod_xg, bySample = TRUE)[c(1, 3)])) checkTrue(!hasAdjustedRtime(res)) @@ -539,8 +571,8 @@ test_XCMSnExp_filterFile <- function() { res <- filterFile(xod_xgrg, file = c(1, 3), keepAdjustedRtime = TRUE) checkTrue(hasChromPeaks(res)) tmp <- chromPeaks(xod_xgr) - checkEquals(chromPeaks(res)[, -ncol(tmp)], - tmp[tmp[, "sample"] %in% c(1, 3), -ncol(tmp)]) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) ## has to be different from the ones in xod_x tmp <- chromPeaks(xod_x) checkTrue(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] %in% c(1, 3), "rt"]) < @@ -992,6 +1024,67 @@ test_extractChromatograms <- function() { ## XCMSnExp with adjusted rtime } +test_signal_integration <- function() { + ## Testing the signal integration of peaks. + ## For centWave + tmp <- xod_xgrg + rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[[1]]) * + ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) + + tmp <- filterFile(xod_xgrg, file = 2) + idxs <- sample(1:nrow(chromPeaks(tmp)), 5) + ## Now, for i = 20, for 6 rt I got an NA. Should I remove these measurements? + ## idxs <- 1:nrow(chromPeaks(tmp)) + for (i in idxs) { + rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + ints <- intensity(chr) + pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) + ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") + checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) + } + pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + + ## Now for matchedfilter. + tmp <- findChromPeaks(filterFile(od_x, 2), param = MatchedFilterParam()) + rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[[1]]) * + ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + chromPeaks(tmp)[1, "into"] + checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) + idxs <- sample(1:nrow(chromPeaks(tmp)), 5) + ## idxs <- 1:nrow(chromPeaks(tmp)) + for (i in idxs) { + rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + ints <- intensity(chr) + pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) + ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") + checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) + } + pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + + ## ## matchedFilter with wide mz bins. + ## ## For matchedFilter I will have to do this on the profile matrix! + ## tmp <- findChromPeaks(filterFile(od_x, 2), + ## param = MatchedFilterParam(binSize = 2)) + ## idxs <- 1:nrow(chromPeaks(tmp)) + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) +} + + + ############################################################ ## Test getEIC alternatives. dontrun_getEIC_alternatives <- function() { diff --git a/inst/unitTests/runit.binning.R b/inst/unitTests/runit.binning.R index 3bdff1fea..051a86d52 100644 --- a/inst/unitTests/runit.binning.R +++ b/inst/unitTests/runit.binning.R @@ -640,14 +640,18 @@ test_breaks <- function() { binSize = 0.2) checkEquals(brks, brksR) ## + ## Ultimate fix for issue #118 brksR <- seq((200 - 0.1), (600), by = 0.2) brks <- breaks_on_binSize((200 - 0.1), (600), binSize = 0.2) - cmn <- 1:min(c(length(brks), length(brksR))) + ## Compare them up to the last value, since in R that will be 600-01, while + ## breaks_on_binSize will ensure that the upper limit (600) is still within + ## the breaks + cmn <- 1:(length(brksR) - 1) checkEquals(brks[cmn], brksR[cmn]) ## Now, that below breaks on a windows build machine (issue #127) ## checkTrue(length(brks) > length(brksR)) ## checkEquals(brks[-length(brks)], brksR) - ## checkEquals(brks[length(brks)], 600) + checkEquals(brks[length(brks)], 600) } ############################################################ diff --git a/inst/unitTests/runit.do_adjustRtime.R b/inst/unitTests/runit.do_adjustRtime.R index d035ad4ec..1f74d1b2d 100644 --- a/inst/unitTests/runit.do_adjustRtime.R +++ b/inst/unitTests/runit.do_adjustRtime.R @@ -9,7 +9,7 @@ test_adjustRtime_PeakGroups <- function() { xsg <- group(xs) xodg <- groupChromPeaks(xod, param = PeakDensityParam(sampleGroups = xs$class)) - checkEquals(peaks(xsg), chromPeaks(xodg)) + checkEquals(peaks(xsg), chromPeaks(xodg)[, colnames(peaks(xsg))]) checkEquals(xsg@groupidx, featureDefinitions(xodg)$peakidx) checkTrue(length(processHistory(xodg, type = xcms:::.PROCSTEP.PEAK.DETECTION)) == 1) @@ -37,7 +37,7 @@ test_adjustRtime_PeakGroups <- function() { checkTrue(sum(chromPeaks(xod)[, "rtmin"] != chromPeaks(xodr)[, "rtmin"]) > 200) checkTrue(sum(chromPeaks(xod)[, "rtmax"] != chromPeaks(xodr)[, "rtmax"]) > 200) ## between xcmsSet and XCMSnExp - checkEquals(chromPeaks(xodr), peaks(xsr)) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) ## To compare the adjusted retention time we have to extract it by sample! ## Otherwise the ordering will not be the same, as rtime is ordered by ## retention time, but @rt$raw by sample. @@ -60,7 +60,7 @@ test_adjustRtime_PeakGroups <- function() { xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1) xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1)) - checkEquals(chromPeaks(xodr), peaks(xsr)) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), unlist(xsr@rt$corrected, use.names = FALSE)) @@ -69,7 +69,7 @@ test_adjustRtime_PeakGroups <- function() { xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1, smooth = "linear")) - checkEquals(chromPeaks(xodr), peaks(xsr)) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), unlist(xsr@rt$corrected, use.names = FALSE)) @@ -78,7 +78,7 @@ test_adjustRtime_PeakGroups <- function() { xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, span = 1, family = "symmetric")) - checkEquals(chromPeaks(xodr), peaks(xsr)) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), unlist(xsr@rt$corrected, use.names = FALSE)) } diff --git a/inst/unitTests/runit.do_findChromPeaks_MSW.R b/inst/unitTests/runit.do_findChromPeaks_MSW.R index 535c05711..0b3e259c9 100644 --- a/inst/unitTests/runit.do_findChromPeaks_MSW.R +++ b/inst/unitTests/runit.do_findChromPeaks_MSW.R @@ -1,15 +1,20 @@ ############################################################ ## do_findPeaks_MSW tests -xraw <- deepCopy(microtofq_xr) +## xraw <- deepCopy(microtofq_xr) test_do_findPeaks_MSW <- function() { - feats1 <- xcms:::do_findPeaks_MSW(xraw@env$intensity, - xraw@env$mz, - snthresh = 100) - feats2 <- xcms:::do_findPeaks_MSW(xraw@env$intensity, - xraw@env$mz, - snthresh = 50) + first_file <- filterFile(fticr, file = 1) + spctr <- spectra(first_file) + checkTrue(length(spctr) == 1) + mzs <- unname(mz(spctr[[1]])) + ints <- unname(intensity(spctr[[1]])) + feats1 <- do_findPeaks_MSW(mz = mzs[10000:20000], + int = ints[10000:20000], + snthresh = 100) + feats2 <- do_findPeaks_MSW(mz = mzs[10000:20000], + int = ints[10000:20000], + snthresh = 50) checkTrue(nrow(feats2) > nrow(feats1)) } @@ -49,6 +54,10 @@ test_findChromPeaks_MSW <- function() { peakThr = 200, forder = 2, dorder = 1) res_4 <- findChromPeaks(od1, param = mp, return.type = "list") checkEquals(res_3, res_4[[1]][, colnames(res_3)]) + + ## Compare old vs new: + checkEquals(chromPeaks(fticr_xod)[, -ncol(chromPeaks(fticr_xod))], + peaks(fticr_xs)) } diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave.R b/inst/unitTests/runit.do_findChromPeaks_centWave.R index 4bf20fb56..ec08e68de 100644 --- a/inst/unitTests/runit.do_findChromPeaks_centWave.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave.R @@ -13,6 +13,461 @@ fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), xr <- deepCopy(faahko_xr_1) onDisk <- filterFile(faahko_od, file = 1) +## Ensure that the reported peak integrated signal corresponds to the correct +## data. This is to ensure that issue #135 was fixed correctly. +test_findChromPeaks_centWave_peakIntensity <- function() { + ## Reproduce with msdata files: + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + options(originalCentWave = TRUE) + tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + ## Use the getPeakInt2 which uses the rawMat function. + pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## Use the getPeakInt3 which uses the getEIC C function. + pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## These fail for the original centWave code. + checkTrue(sum(pkI2 != chromPeaks(tmp)[, "into"]) > length(pkI2) / 2) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + checkEquals(pkI2, pkI3) + ## Try with new implementation. + options(originalCentWave = FALSE) + tmp2 <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + ## Find different number of peaks: + checkTrue(nrow(chromPeaks(tmp2)) != nrow(chromPeaks(tmp))) + ## Are the peaks similar? + id_1 <- paste(chromPeaks(tmp)[, "mz"], chromPeaks(tmp)[, "rt"]) + id_2 <- paste(chromPeaks(tmp2)[, "mz"], chromPeaks(tmp2)[, "rt"]) + ## But all of the ones from the old are ALSO in the new one. + checkTrue(all(id_1 %in% id_2)) + ## Are the peaks the same? + cp2 <- chromPeaks(tmp2)[id_2 %in% id_1, ] + cn <- colnames(cp2) + cn <- cn[!(cn %in% c("intb", "into", "rtmin", "rtmax"))] + checkEquals(cp2[, cn], chromPeaks(tmp)[, cn]) + ## Are the values related? + plot(cp2[, "into"], chromPeaks(tmp)[, "into"]) ## Very similar + plot(cp2[, "intb"], chromPeaks(tmp)[, "intb"]) ## Very similar + plot(cp2[, "rtmin"], chromPeaks(tmp)[, "rtmin"]) ## Very similar + plot(cp2[, "rtmax"], chromPeaks(tmp)[, "rtmax"]) ## Very similar + ## Use the getPeakInt3 which uses the getEIC C function. + pkI2_2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + pkI3_2 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## These fail for the original centWave code. + checkEquals(unname(pkI2_2), unname(chromPeaks(tmp2)[, "into"])) + checkEquals(unname(pkI3_2), unname(chromPeaks(tmp2)[, "into"])) + checkEquals(pkI2_2, pkI3_2) + + + ## The same for one of the test files; this works even with the original + ## centWave code + options(originalCentWave = TRUE) + tmp <- filterFile(xod_xgrg, file = 3) + ## Use the getPeakInt2 which uses the rawMat function. + pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## Use the getPeakInt3 which uses the getEIC C function. + pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkI2, pkI3) + checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + ## New modified centWave. + options(originalCentWave = FALSE) + tmp2 <- findChromPeaks(filterFile(faahko_od, file = 3), + CentWaveParam(noise = 10000, snthresh = 40)) + ## Even the identified peaks are identical! + checkEquals(chromPeaks(tmp), chromPeaks(tmp2)) + ## Use the getPeakInt2 which uses the rawMat function. + pkI2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## Use the getPeakInt3 which uses the getEIC C function. + pkI3 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + checkEquals(pkI2, pkI3) + checkEquals(unname(pkI2), unname(chromPeaks(tmp2)[, "into"])) + checkEquals(unname(pkI3), unname(chromPeaks(tmp2)[, "into"])) + options(originalCentWave = TRUE) +} + +## Exhaustive comparison of the original and the modified centWave. We don't +## expect everything to be the same, but the results should be ideally comparable +dontrun_exhaustive_original_new_centWave_comparison <- function() { + ## faahKO test files. + fl <- system.file("cdf/ko15.CDF", package = "msdata") + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam() + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + ## modified settings: + cwp <- CentWaveParam(ppm = 40, peakwidth = c(4, 40)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + ## Another round + cwp <- CentWaveParam(ppm = 10, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + + ## msdata test files. + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + cwp <- CentWaveParam(ppm = 10, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 20)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + ## Are all from orig also in modi? + checkTrue(all(orig_id %in% modi_id)) ## YES + modi_pks <- chromPeaks(modi)[match(orig_id, modi_id), ] + ## Compare the peaks. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], modi_pks[, cn]) ## YES + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], modi_pks[, i]) + checkTrue(cor(chromPeaks(orig)[, i], modi_pks[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + ## Are all from orig also in modi? + checkTrue(all(orig_id %in% modi_id)) ## YES + ## Compare the peaks. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + + ## Other file + fl <- system.file("microtofq/MM8.mzML", package = "msdata") + raw <- readMSData2(fl) + cwp <- CentWaveParam(ppm = 30, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(3, 30)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.99) + } + + ## own files. + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_12.mzML" + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam() + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.99) + } + ## Different settings + cwp <- CentWaveParam(ppm = 30, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO, more with modified versions + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.98) + } + ## Different settings + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 60)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO, more with modified versions + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.98) + } + + ## Is there something common to the peaks found only by the modified version? + common_pks <- chromPeaks(modi)[cmn, ] + unique_pks <- chromPeaks(modi)[-cmn, ] + cn <- colnames(common_pks) + ## mz + boxplot(list(common = common_pks[, "mz"], unique = unique_pks[, "mz"]), + varwidth = TRUE, main = "mz") + ## OK, average mz of unique peaks is smaller. + + ## mzrange + boxplot(list(common = common_pks[, "mzmax"] - common_pks[, "mzmin"], + unique = unique_pks[, "mzmax"] - unique_pks[, "mzmin"]), + varwidth = TRUE, main = "mz range") + ## Seems to be smaller too. + + ## rt + boxplot(list(common = common_pks[, "rt"], unique = unique_pks[, "rt"]), + varwidth = TRUE, main = "rt") + ## hm, rt larger in unique + + ## rtrange + boxplot(list(common = log2(common_pks[, "rtmax"] - common_pks[, "rtmin"]), + unique = log2(unique_pks[, "rtmax"] - unique_pks[, "rtmin"])), + varwidth = TRUE, main = "rt range") + ## rtrange is same. + + ## into + boxplot(list(common = log2(common_pks[, "into"]), + unique = log2(unique_pks[, "into"])), + varwidth = TRUE, main = "into") + ## same. + + ## sn + boxplot(list(common = log2(common_pks[, "sn"]), + unique = log2(unique_pks[, "sn"])), + varwidth = TRUE, main = "sn") + ## sn is higher in unique + + ## Check chromatogram for some: + chrPlot <- function(i, raw) { + rtr <- common_pks[i, c("rtmin", "rtmax")] + rtr[1] <- rtr[1] - 2 + rtr[2] <- rtr[2] + 2 + chr_cmn <- extractChromatograms(raw, rt = rtr, + mz = common_pks[i, c("mzmin", "mzmax")]) + rtr <- unique_pks[i, c("rtmin", "rtmax")] + rtr[1] <- rtr[1] - 2 + rtr[2] <- rtr[2] + 2 + chr_unq <- extractChromatograms(raw, rt = rtr, + mz = unique_pks[i, c("mzmin", "mzmax")]) + par(mfrow = c(1, 2)) + plot(rtime(chr_cmn[[1]]), intensity(chr_cmn[[1]]), main = "common peak", + type = "l") + abline(v = common_pks[i, c("rtmin", "rtmax")], col = "grey") + plot(rtime(chr_unq[[1]]), intensity(chr_unq[[1]]), main = "unique peak", + type = "l") + abline(v = unique_pks[i, c("rtmin", "rtmax")], col = "grey") + } + chrPlot(1, raw = raw) + + i <- sample(1:nrow(unique_pks), 10) + chrPlot(i[1], raw = raw) + chrPlot(i[2], raw = raw) + chrPlot(i[3], raw = raw) + chrPlot(i[4], raw = raw) + chrPlot(i[5], raw = raw) + chrPlot(i[6], raw = raw) + chrPlot(i[7], raw = raw) + chrPlot(i[8], raw = raw) + chrPlot(i[9], raw = raw) + chrPlot(i[10], raw = raw) + + ## Summary: + ## Same peaks are identified by both methods, but the modified centWave finds + ## eventually more peaks. + ## The into, intb, rtmin and rtmax differ for some peaks found by both + ## methods but values are highly correlated (R > 0.99). +} + + +## Compare what's the difference between the original centWave and the new one +## fixing the peak integration problem (issue #135) +dontrun_compare_orig_new_centWave <- function() { + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res_orig <- xcms:::.centWave_orig(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE) + res_new <- xcms:::.centWave_new(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE) + checkEquals(res_orig, res_new) ## That should always work. + + ## Use the previously defined ones as ROIs - this should simulate the + ## addIsoROIs + ## Slightly increase the mz like in addIsoROIs. + rL <- as.data.frame(res_orig) + ## Extend the mzmin and mzmax if needed. + tittle <- rL[, "mz"] * (25 / 2) / 1E6 + expand_mz <- (rL[, "mzmax"] - rL[, "mzmin"]) < (tittle * 2) + if (any(expand_mz)) { + rL[expand_mz, "mzmin"] <- rL[expand_mz, "mz"] - + tittle[expand_mz] + rL[expand_mz, "mzmax"] <- rL[expand_mz, "mz"] + tittle[expand_mz] + } + rL <- split(rL, f = 1:nrow(rL)) + res_orig2 <- xcms:::.centWave_orig(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE, + roiList = rL, + firstBaselineCheck = FALSE) + res_new2 <- xcms:::.centWave_new(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE, + roiList = rL, + firstBaselineCheck = FALSE) + ## I get more peaks with the modified version: + checkTrue(nrow(res_orig2) < nrow(res_new2)) + ## Sort them by rt and mz + idx <- order(res_orig2[, "rt"], res_orig2[, "mz"]) + res_orig2 <- res_orig2[idx, ] + idx <- order(res_new2[, "rt"], res_new2[, "mz"]) + res_new2 <- res_new2[idx, ] + ## Are identified peaks similar? + id_1 <- paste(res_orig2[, "rt"], res_orig2[, "mz"]) + id_2 <- paste(res_new2[, "rt"], res_new2[, "mz"]) + ## Are all from res_orig2 in res_new2? + checkTrue(length(id_1) == sum(id_1 %in% id_2)) ## YES + ## Are the values for these the same? + same_new2 <- res_new2[match(id_1, id_2), ] + ## check columns: + cn <- colnames(same_new2) + cn <- cn[!(cn %in% c("mzmin"))] + for (i in cn) { + checkEquals(res_orig2[, i], same_new2[, i]) + } + ## So, everything is identical, EXCEPT mzmin: + idx <- which(same_new2[, "mzmin"] != res_orig2[, "mzmin"]) + res_orig2[idx, "mzmin"] + same_new2[idx, "mzmin"] + ## Are the different ones 0? + checkTrue(all(res_orig2[idx, "mzmin"] == 0)) +} + test_do_findChromPeaks_centWave <- function() { ## xr <- xcmsRaw(fs[1], profstep = 0) ## We expect that changing a parameter has an influence on the result. @@ -88,7 +543,7 @@ test_findChromPeaks_centWave <- function() { checkTrue(hasChromPeaks(res)) checkTrue(!hasAdjustedRtime(res)) checkTrue(!hasFeatures(res)) - checkEquals(peaks(xs)@.Data, chromPeaks(res)) + checkEquals(peaks(xs)@.Data, chromPeaks(res)[, -ncol(chromPeaks(res))]) } dontrun_test_benchmark_centWaves <- function() { diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R index 56b5a86c3..adf493894 100644 --- a/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R @@ -34,6 +34,34 @@ test_do_findChromPeaks_centWaveWithPredIsoROIs <- function() { ## checkEquals(all_f, old_all@.Data) } +## Check the influence of the modified centWave on the isotope centWave +dontrun_test_original_new_centWave_isotopes <- function() { + fl <- system.file("cdf/ko15.CDF", package = "msdata") + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam(verboseColumns = TRUE) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + + ## Do the isoROIs. + mz <- mz(raw) + vps <- lengths(mz) + options(originalCentWave = TRUE) + iso_orig <- xcms:::do_findChromPeaks_addPredIsoROIs_mod(mz = unlist(mz), + int = unlist(intensity(raw)), + scantime = rtime(raw), + valsPerSpect = vps, + peaks. = chromPeaks(orig)) + options(originalCentWave = FALSE) + iso_modi <- xcms:::do_findChromPeaks_addPredIsoROIs_mod(mz = unlist(mz), + int = unlist(intensity(raw)), + scantime = rtime(raw), + valsPerSpect = vps, + peaks. = chromPeaks(orig)) + + ## LLLL How do peaks look like when they have an mz width of 0? +} + ## Evaluate the peak detection method using the centWaveWithPreIsoROIs method ## on OnDiskMSnExp and on MSnExp objects. test_findChromPeaks_centWaveWithPredIsoROIs <- function() { @@ -74,7 +102,7 @@ test_findChromPeaks_centWaveWithPredIsoROIs <- function() { checkTrue(hasChromPeaks(res)) checkTrue(!hasAdjustedRtime(res)) checkTrue(!hasFeatures(res)) - checkEquals(peaks(xs)@.Data, chromPeaks(res)) + checkEquals(peaks(xs)@.Data, chromPeaks(res)[, colnames(peaks(xs)@.Data)]) ## Check on the full data. ## xs <- xcmsSet(fs, profparam = list(profstep = 0), snthresh = snth, diff --git a/inst/unitTests/runit.do_groupChromPeaks.R b/inst/unitTests/runit.do_groupChromPeaks.R index df734c9d8..b405d9732 100644 --- a/inst/unitTests/runit.do_groupChromPeaks.R +++ b/inst/unitTests/runit.do_groupChromPeaks.R @@ -2,7 +2,7 @@ ## to feature grouping. ## General functions/methods -test_groupval_XCMSnExp <- function() { +test_featureValues_XCMSnExp <- function() { od_x <- faahko_xod xs <- faahko_xs @@ -12,11 +12,11 @@ test_groupval_XCMSnExp <- function() { xs <- group(xs, method = "density") checkEquals(unname(groupval(xs, value = "into")), - unname(groupval(od_x, value = "into"))) + unname(featureValues(od_x, value = "into"))) checkEquals(unname(groupval(xs, method = "maxint", value = "into")), - unname(groupval(od_x, method = "maxint", value = "into"))) + unname(featureValues(od_x, method = "maxint", value = "into"))) ## Checking errors - checkException(groupval(od_x, value = "bla")) + checkException(featureValues(od_x, value = "bla")) } @@ -227,27 +227,24 @@ dontrun_groupChromPeaks_density_implementation <- function() { ############################################################ ## mzClust ## -library(msdata) -fticrf <- list.files(system.file("fticr", package = "msdata"), - recursive = TRUE, full.names = TRUE) - -## old -fticr_xs <- xcmsSet(method="MSW", files=fticrf[1:2], scales=c(1,7), - SNR.method='data.mean' , winSize.noise=500, - peakThr=80000, amp.Th=0.005) -## new -fticr_od <- readMSData2(fticrf[1:2], msLevel. = 1) -p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, - SNR.method = "data.mean", winSize.noise = 500) -fticr_xod <- findChromPeaks(fticr_od, param = p) +## library(msdata) +## fticrf <- list.files(system.file("fticr", package = "msdata"), +## recursive = TRUE, full.names = TRUE) + +## ## old +## ## new +## fticr_od <- readMSData2(fticrf[1:2], msLevel. = 1) +## p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, +## SNR.method = "data.mean", winSize.noise = 500) +## fticr_xod <- findChromPeaks(fticr_od, param = p) test_do_groupPeaks_mzClust <- function() { fts <- peaks(fticr_xs) res <- do_groupPeaks_mzClust(peaks = fts, - sampleGroups = sampclass(fticr_xs)) + sampleGroups = sampclass(fticr_xs)) res_2 <- do_groupPeaks_mzClust(peaks = fts, - sampleGroups = sampclass(fticr_xs), - minFraction = 0, absMz = 2) + sampleGroups = sampclass(fticr_xs), + minFraction = 0, absMz = 2) checkTrue(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) res_x <- group(fticr_xs, method = "mzClust") diff --git a/inst/unitTests/runit.fillChromPeaks.R b/inst/unitTests/runit.fillChromPeaks.R new file mode 100644 index 000000000..c20ab5559 --- /dev/null +++ b/inst/unitTests/runit.fillChromPeaks.R @@ -0,0 +1,696 @@ +test_fillChromPeaks <- function() { + ## No adjusted retention times + checkTrue(!xcms:::.hasFilledPeaks(xod_xg)) + res <- fillChromPeaks(xod_xg) + checkTrue(xcms:::.hasFilledPeaks(res)) + ph <- processHistory(res, type = xcms:::.PROCSTEP.PEAK.FILLING) + checkTrue(length(ph) == 1) + checkEquals(ph[[1]]@param, FillChromPeaksParam()) + ## Check if the signal corresponds to what we expect for some peaks. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 5) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(xod_xg, file = cfp[1, "sample"]) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Plot the data for some... + if (FALSE) { + pk_idx <- featureValues(res)[1, ] + pks <- chromPeaks(res)[pk_idx, ] + rtr <- c(min(pks[, "rtmin"]), max(pks[, "rtmax"])) + rtr[1] <- rtr[1] - 10 + rtr[2] <- rtr[2] + 10 + chrs <- extractChromatograms(res, rt = rtr, mz = c(min(pks[, "mzmin"]), + max(pks[, "mzmax"]))) + plot(3, 3, pch = NA, xlim = range(lapply(chrs, rtime), na.rm = TRUE), + ylim = range(lapply(chrs, intensity), na.rm = TRUE), xlab = "rt", + ylab = "int") + for (i in 1:length(chrs)) { + points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", + col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) + abline(v = pks[i, c("rtmin", "rtmax")], + col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) + } + } + + ## Check if the results are similar that we get with findChromPeaks + for (i in 1:length(fileNames(xod_xg))) { + fnd_pks <- chromPeaks(xod_xg)[chromPeaks(xod_xg)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData(filterFile(xod_xg, i), + peakArea = fnd_pks, + sample_idx = i, + cn = colnames(fnd_pks)) + ## rt + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + checkEquals(fnd_pks[, "mz"], fld_pks[, "mz"]) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## Check for the NAs if there is really no signal + gv <- featureValues(res) + feat_i <- which(is.na(gv[, 1])) + tmp <- chromPeaks(res)[featureDefinitions(res)$peakidx[[feat_i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + chr <- extractChromatograms(res, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(length(chr) == 0) + ## Get also the spectra: + spctr <- spectra(filterRt(filterFile(xod_xg, file = 1), rt = pkArea[1:2])) + mzs <- unlist(lapply(spctr, mz)) + ## No spectra for the fiven mz: + checkEquals(sum(mzs >= pkArea[3] & mzs <= pkArea[4]), 0) + + ## Check increasing the expandRt and expandMz to see whether we get rid of + ## the NA. + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandMz = 1)) + ## Check if the mzrange is now indeed broader for the integrated ones. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + fp2 <- chromPeaks(res_2) + fp2 <- fp2[fp2[, "is_filled"] == 1, ] + checkEquals(fp2[, "mzmax"] - fp2[, "mzmin"], + 2 * (fp[, "mzmax"] - fp[, "mzmin"])) + + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandRt = 1)) + ## Check if the mzrange is now indeed broader for the integrated ones. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + fp2 <- chromPeaks(res_2) + fp2 <- fp2[fp2[, "is_filled"] == 1, ] + checkEquals(fp2[, "rtmax"] - fp2[, "rtmin"], + 2 * (fp[, "rtmax"] - fp[, "rtmin"])) + ## Check using ppm + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(ppm = 40, + expandMz = 5, + expandRt = 2)) + checkTrue(all(!is.na(rowSums(featureValues(res_2))))) + ## Drop them. + res_rem <- dropFilledChromPeaks(res) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkEquals(res_rem, xod_xg) + ## Drop feature definitions from res -> also filled peaks should be dropped. + res_rem <- dropFeatureDefinitions(res) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkTrue(!any(chromPeaks(res_rem)[, "is_filled"] == 1)) + checkEquals(res_rem, xod_x) + + ## With adjusted rtime. + res_2 <- fillChromPeaks(xod_xgrg) + ## Check if the signal corresponds to what we expect for some peaks. + fp <- chromPeaks(res_2) + fp <- fp[fp[, "is_filled"] == 1, ] + ## These have to be different from before! + fp_raw <- chromPeaks(res) + fp_raw <- fp_raw[fp_raw[, "is_filled"] == 1, ] + checkTrue(all(fp_raw[, "rt"] != fp[, "rt"])) + checkTrue(all(fp_raw[, "rtmin"] != fp[, "rtmin"])) + checkTrue(all(fp_raw[, "rtmax"] != fp[, "rtmax"])) + checkEquals(fp_raw[, "mz"], fp[, "mz"]) + checkEquals(fp_raw[, "mzmin"], fp[, "mzmin"]) + checkEquals(fp_raw[, "mzmax"], fp[, "mzmax"]) + ## Values are expected to be different, but still correlated! + checkTrue(all(fp_raw[, "into"] != fp[, "into"])) + checkTrue(cor(fp_raw[, "into"], fp[, "into"]) > 0.99) + ## Check if we can get the same data using the provided range. + ## Use the .rawMat function + first <- filterFile(xod_xgrg, file = 1, keepAdjustedRtime = TRUE) + spctr <- spectra(first) + mzs <- lapply(spctr, mz) + vps <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(first) + idx <- which(fp[, "sample"] == 1) + for (i in idx) { + mtx <- xcms:::.rawMat(mz = mzs, int = ints, scantime = rtim, + valsPerSpect = vps, + rtrange = fp[i, c("rtmin", "rtmax")], + mzrange = fp[i, c("mzmin", "mzmax")]) + into <- sum(mtx[, 3], na.rm = TRUE) * + ((fp[i, "rtmax"] - fp[i, "rtmin"]) / + (sum(rtim >= fp[i, "rtmin"] & rtim <= fp[i, "rtmax"]) - 1)) + checkEquals(unname(into), unname(fp[i, "into"])) + } + + ## Drop them. + res_rem <- dropFilledChromPeaks(res_2) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkEquals(res_rem, xod_xgrg) + + ## ## Alternative without rawMat: + ## for (i in idx) { + ## chrs <- extractChromatograms(first, rt = fp[i, c("rtmin", "rtmax")], + ## mz = fp[i, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chrs), na.rm = TRUE) / (length(chrs) - 1) * + ## (fp[i, "rtmax"] - fp[i, "rtmin"]) + ## checkEquals(unname(into), unname(fp[i, "into"])) + ## } +} + +## fillChromPeaks for MSW peak detection. +test_fillChromPeaks_MSW <- function() { + p <- MzClustParam() + fticr_xodg <- groupChromPeaks(fticr_xod, param = p) + checkException(res <- fillChromPeaks(fticr_xod)) + res <- fillChromPeaks(fticr_xodg) + + ## Got a signal for all of em. + checkTrue(!any(is.na(featureValues(res)))) + ## 1) Compare with what I get for xcmsSet. + tmp_x <- fticr_xs + tmp_x <- group(tmp_x, method = "mzClust") + tmp_x <- fillPeaks(tmp_x, method = "MSW") + ## Compare + checkEquals(unname(groupval(tmp_x)), unname(featureValues(res))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(res, value = "maxo"))) + checkEquals(unname(groupval(tmp_x, value = "into")), + unname(featureValues(res, value = "into"))) + checkEquals(unname(groupval(tmp_x, value = "mz")), + unname(featureValues(res, value = "mz"))) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(res, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(res, value = "mzmax"))) + ## OK + ## 2) Check if the fillChromPeaks returns same/similar data than the + ## findChromPeaks does: + fdef <- featureDefinitions(fticr_xodg) + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(fticr_xodg)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(median(tmp[, 1]), median(tmp[, 2]), + median(tmp[, 3]), median(tmp[, 4])) + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = fdef$mzmed) + ## Get peak data for all peaks in the first file + allPks <- xcms:::.getMSWPeakData(filterFile(fticr_xodg, file = 1), + peakArea = pkArea, + sample_idx = 1, + cn = colnames(chromPeaks(fticr_xodg))) + curP <- chromPeaks(res)[chromPeaks(res)[, "sample"] == 1, ] + curP <- curP[order(curP[, "mz"]), ] + checkEquals(allPks[, "mz"], curP[, "mz"]) + checkEquals(allPks[, "maxo"], curP[, "maxo"]) + checkTrue(cor(allPks[, "into"], curP[, "into"]) > 0.99) ## Not exactly the + ## same but highly similar. +} + +test_fillChromPeaks_matchedFilter <- function() { + tmp <- findChromPeaks(faahko_od, param = MatchedFilterParam()) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + + tmp_filled <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_filled))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.97) + ## plot(featureValues(tmp, value = "into")[!nas, 1], + ## featureValues(tmp, value = "into")[!nas, 2]) + checkTrue(cor(featureValues(tmp_filled, value = "into")[, 1], + featureValues(tmp_filled, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## plot(featureValues(tmp_filled, value = "into")[, 1], + ## featureValues(tmp_filled, value = "into")[, 2]) + + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; most likely due to slightly different binning. + diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + checkTrue(max(diffs) < 1e-4) + ## into + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## modify fillChromPeaks settings. + tmp_fld_2 <- fillChromPeaks( + tmp, param = FillChromPeaksParam(ppm = 40, expandRt = 1)) + checkTrue(sum(is.na(featureValues(tmp_filled))) < + sum(is.na(featureValues(tmp)))) + checkTrue(sum(is.na(featureValues(tmp_fld_2))) < + sum(is.na(featureValues(tmp_filled)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp_fld_2, value = "into")[, 1], + featureValues(tmp_fld_2, value = "into")[, 2], + use = "complete.obs") > 0.97) +} + +dontrun_exhaustive_fillChromPeaks_matchedFilter <- function() { + ## Different step sizes. + prm <- MatchedFilterParam(binSize = 0.6) + tmp <- findChromPeaks(faahko_od, param = prm) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_fld))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.97) + checkTrue(cor(featureValues(tmp_fld, value = "into")[, 1], + featureValues(tmp_fld, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## Imputation. + prm <- MatchedFilterParam(binSize = 0.2, impute = "lin") + tmp <- findChromPeaks(faahko_od, param = prm) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_fld))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.9) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkTrue(cor(fnd_pks[, "maxo"], fld_pks[, "maxo"]) > 0.99) + } + + ## Own files. + fls <- c("/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_15.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_11.mzML") + raw <- readMSData2(fls) + pks <- findChromPeaks(raw, param = MatchedFilterParam(binSize = 0.05)) + pks <- groupChromPeaks(pks, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(pks) + nas <- is.na(featureValues(pks)[, 1]) | is.na(featureValues(pks)[, 2]) + checkTrue(cor(featureValues(pks, value = "into")[!nas, 1], + featureValues(pks, value = "into")[!nas, 2]) > 0.97) + checkTrue(cor(featureValues(tmp_fld, value = "into")[, 1], + featureValues(tmp_fld, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(pks))) { + fnd_pks <- chromPeaks(pks)[chromPeaks(pks)[, "sample"] == i, ] + prm <- processHistory(pks, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(pks, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkTrue(cor(fnd_pks[, "maxo"], fld_pks[, "maxo"]) > 0.99) + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } +} + +dontrun_exhaustive_fillChromPeaks_MSW <- function() { + library(xcms) + library(RUnit) + library(msdata) + fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) + fticr <- readMSData2(fticrf, msLevel. = 1) + p <- MSWParam(scales = c(1, 7), ampTh = 0.005, + SNR.method = "data.mean", winSize.noise = 500) + fticr <- findChromPeaks(fticr, param = p) + ## Now create the MzClustParam parameter object: we're assuming here that + ## both samples are from the same sample group. + p <- MzClustParam() + fticr <- groupChromPeaks(fticr, param = p) + res <- fillChromPeaks(fticr) + ## Got a signal for all of em. + checkTrue(!any(is.na(featureValues(res)))) + + ## 1) Compare with what I get for xcmsSet. + tmp_x <- xcmsSet(fticrf, method = "MSW", SNR.method = "data.mean", + winSize.noise = 500, scales = c(1, 7), + amp.Th = 0.005) + sampclass(tmp_x) <- rep(1, length(sampnames(tmp_x))) + tmp_x <- group(tmp_x, method = "mzClust") + checkEquals(unname(groupval(tmp_x)), unname(featureValues(fticr))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(fticr, value = "maxo"))) + checkEquals(unname(groupval(tmp_x, value = "into")), + unname(featureValues(fticr, value = "into"))) + checkEquals(unname(groupval(tmp_x, value = "mz")), + unname(featureValues(fticr, value = "mz"))) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(fticr, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(fticr, value = "mzmax"))) + ## Fill peaks + tmp_x <- fillPeaks(tmp_x, method = "MSW") + checkTrue(!any(is.na(groupval(tmp_x)))) + checkEquals(unname(groupval(tmp_x)), unname(featureValues(res))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(res, value = "maxo"))) + ## This below could be made equal if we used the same approach to define + ## which values to use. See comments on issue #130. + ## checkEquals(unname(groupval(tmp_x, value = "into")), + ## unname(featureValues(res, value = "into"))) + ## plot(groupval(tmp_x, value = "into"), featureValues(res, value = "into")) + checkTrue(cor(as.numeric(groupval(tmp_x, value = "into")), + as.numeric(featureValues(res, value = "into"))) > 0.999) + ## plot(groupval(tmp_x, value = "mz"), featureValues(res, value = "mz")) + checkTrue(cor(as.numeric(groupval(tmp_x, value = "mz")), + as.numeric(featureValues(res, value = "mz"))) > 0.999) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(res, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(res, value = "mzmax"))) + + ## Check if I could get what MSW gets. + fdef <- featureDefinitions(fticr) + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(fticr)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(median(tmp[, 1]), median(tmp[, 2]), + median(tmp[, 3]), median(tmp[, 4])) + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = fdef$mzmed) + allPks <- xcms:::.getMSWPeakData(filterFile(fticr, file = 1), + peakArea = pkArea, + sample_idx = 1, + cn = colnames(chromPeaks(fticr))) + ## Get all chrom peaks part of a feature + curP <- chromPeaks(res)[unique(unlist(featureDefinitions(res)$peakidx)), ] + curP <- curP[curP[, "sample"] == 1, ] + curP <- curP[order(curP[, "mz"]), ] + ## checkEquals(allPks[, "mz"], curP[, "mz"]) + ## checkEquals(allPks[, "maxo"], curP[, "maxo"]) + + ## ## INVESTIGATE FURTHER! + fld <- curP[, "is_filled"] == 1 + plot(allPks[fld, "mz"], curP[fld, "mz"], main = "filled") + abline(0, 1, col = "grey") ## same + checkEquals(allPks[fld, "mz"], curP[fld, "mz"]) + plot(allPks[!fld, "mz"], curP[!fld, "mz"], main = "not filled") + abline(0, 1, col = "grey") ## highly similar!!! + ## checkEquals(allPks[!fld, "mz"], curP[!fld, "mz"]) ## HIGHLY similar though + ## into: + plot(allPks[fld, "into"], curP[fld, "into"], main = "filled") + checkEquals(allPks[fld, "into"], curP[fld, "into"]) + abline(0, 1, col = "grey") ## same + plot(allPks[!fld, "into"], curP[!fld, "into"], main = "not filled") + abline(0, 1, col = "grey") ## somewhat different!!! + + plot(allPks[fld, "maxo"], curP[fld, "maxo"], main = "filled") + checkEquals(allPks[fld, "maxo"], curP[fld, "maxo"]) + abline(0, 1, col = "grey") ## same + plot(allPks[!fld, "maxo"], curP[!fld, "maxo"], main = "not filled") + abline(0, 1, col = "grey") ## somewhat different!!! +} + + +dontrun_exhaustive_fillChromPeaks_test <- function() { + fls <- c("/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_15.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_11.mzML") + raw <- readMSData2(fls) + pks <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(0.8, 20), + ppm = 40)) + pks_noRt <- groupChromPeaks(pks, param = PeakDensityParam(minFraction = 0.6)) + filled <- fillChromPeaks(pks_noRt) + fp <- chromPeaks(filled) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 20) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(pks_noRt, file = cfp[1, "sample"], + keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chr), na.rm = TRUE) * + ## (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / + ## (sum(rtime(tmp) >= cfp[1, "rtmin"] & rtime(tmp) <= cfp[1, "rtmax"]) - 1) + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Check those with an NA. + gv <- featureValues(filled) + with_na <- is.na(rowSums(gv)) + idxs <- sample(which(with_na), 20) + for (i in idxs) { + tmp <- chromPeaks(pks_noRt)[featureDefinitions(pks_noRt)$peakidx[[i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + smpl <- which(is.na(gv[i, ])) + tmp <- filterFile(pks_noRt, file = smpl, keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(length(chr) == 0) + } + still_missing <- is.na(rowSums(featureValues(filled))) + ## Try using ppm: + filled <- fillChromPeaks(pks_noRt, param = FillChromPeaksParam(ppm = 40)) + checkTrue(sum(still_missing) > sum(is.na(rowSums(featureValues(filled))))) + filled <- fillChromPeaks(pks_noRt, param = FillChromPeaksParam(ppm = 40, + expandMz = 2)) + checkTrue(sum(still_missing) > sum(is.na(rowSums(featureValues(filled))))) + ## Check that the mz and rt are all within the mzmin-mzmax and rtmin-rtmax + ## of the features. + fts <- featureDefinitions(filled) + for (i in 1:nrow(fts)) { + pks <- chromPeaks(filled)[fts[i, "peakidx"][[1]], ] + checkTrue(all(pks[, "mz"] >= fts[i, "mzmin"] & + pks[, "mz"] <= fts[i, "mzmax"])) + checkTrue(all(pks[, "rt"] >= fts[i, "rtmin"] & + pks[, "rt"] <= fts[i, "rtmax"])) + } + checkTrue(all(chromPeaks(filled)[, "rt"] >= chromPeaks(filled)[, "rtmin"] & + chromPeaks(filled)[, "rt"] <= chromPeaks(filled)[, "rtmax"])) + to_test <- chromPeaks(filled)[, "mz"] >= chromPeaks(filled)[, "mzmin"] & + chromPeaks(filled)[, "mz"] <= chromPeaks(filled)[, "mzmax"] + chromPeaks(filled)[!to_test, ] + ## checkTrue(all(to_test)) + + ## With adjusted retention times. + pks <- adjustRtime(pks, param = ObiwarpParam()) + pks <- groupChromPeaks(pks, param = PeakDensityParam(minFraction = 0.6)) + filled <- fillChromPeaks(pks) + ## + fp <- chromPeaks(filled) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 20) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(pks, file = cfp[1, "sample"], keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chr), na.rm = TRUE) * + ## (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / + ## (sum(rtime(tmp) >= cfp[1, "rtmin"] & rtime(tmp) <= cfp[1, "rtmax"]) - 1) + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Check those with an NA. + gv <- featureValues(filled) + with_na <- is.na(rowSums(gv)) + idxs <- sample(which(with_na), 20) + for (i in idxs) { + tmp <- chromPeaks(pks)[featureDefinitions(pks)$peakidx[[i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + smpl <- which(is.na(gv[i, ])) + tmp <- filterFile(pks, file = smpl, keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(length(chr) == 0) + } + + ## what is the mz for those with NA against those without? + boxplot(list(filled = featureDefinitions(filled)[!with_na, "mzmed"], + failed = featureDefinitions(filled)[with_na, "mzmed"]), + varwidth = TRUE) + ## They are about the same, no difference. + + ## what is the intensity for those with NA against those without? + maxo <- rowMeans(featureValues(filled, value = "maxo"), na.rm = TRUE) + boxplot(list(filled = log2(maxo[!with_na]), failed = log2(maxo[with_na])), + varwidth = TRUE) + ## No difference. +} + +dontrun_test_getPeakInt_functions <- function() { + ## Testing whether the .getPeakInt functions are correct and which one is + ## more performant. + tmp <- filterFile(xod_xgrg, file = 3) + pkInt <- xcms:::.getPeakInt(tmp, chromPeaks(tmp)) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt, pkInt2) + checkEquals(pkInt, pkInt3) + checkEquals(pkInt, chromPeaks(tmp)[, "into"]) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + library(microbenchmark) + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + times = 10) + ## 258 ms vs 503 ms vs 499 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1:5, ]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1:5, ]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1:5, ]), + times = 10) + ## 1269 ms vs 586 ms vs 587 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1:10, ]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1:10, ]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1:10, ]), + times = 10) + ## 2577 ms vs 594 ms vs 562 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)), + times = 10) + ## 16447 ms vs 676 ms vs 556 ms + ## Well. getPeakInt2 and getPeakInt3 are considerably faster! +} + +## This provides some extensive tests. +dontrun_getPeakInt_validity <- function() { + ## Do extensive tests on the .getPeakInt3 function to ensure it is really + ## returning what it should. + ## faahKO centWave peaks + tmp <- filterFile(xod_xgrg, file = 2) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + + ## faahKO matchedFilter peaks + tmp <- findChromPeaks(od_x, param = MatchedFilterParam()) + tmp <- filterFile(tmp, file = 1) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + + ## own file centWave peaks + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML" + raw_x <- readMSData2(fl) + ## Default centWave - completely off. + tmp <- findChromPeaks(raw_x, param = CentWaveParam(verboseColumns = TRUE)) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + ## With OK settings + options(originalCentWave = FALSE) + tmp <- findChromPeaks(raw_x, param = CentWaveParam(peakwidth = c(0.8, 40))) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + library(microbenchmark) + microbenchmark(xcms:::.getPeakInt3(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)), times = 10) + ## + + ## Reproduce with msdata files: + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(1, 20))) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + ## own file matchedFilter peaks + ## TODO: continue here - but be aware, for MatchedFilterParam we might have + ## to do the stuff on the profile matrix! + tmp <- findChromPeaks(raw_x, param = MatchedFilterParam()) +} diff --git a/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R b/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R index b141062c6..9edc00084 100755 --- a/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R +++ b/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R @@ -5,7 +5,19 @@ test.addPredictedIsotopeFeatures <- function() { p1 <- findPeaks.centWave(xr, verbose.columns = TRUE, noise = 10000) p2 <- findPeaks.addPredictedIsotopeFeatures( - object = xr, xcmsPeaks = p1, noise = 10000 ) + object = xr, xcmsPeaks = p1, noise = 10000) checkTrue(nrow(p1) < nrow(p2)) + ## Now the same with the new modified centWave: + options(originalCentWave = FALSE) + p1_2 <- findPeaks.centWave(xr, verbose.columns = TRUE, noise = 10000) + nrow(p1) + nrow(p1_2) + checkEquals(p1, p1_2) + + p2_2 <- findPeaks.addPredictedIsotopeFeatures( + object = xr, xcmsPeaks = p1_2, noise = 10000) + + options(originalCentWave = TRUE) + } diff --git a/inst/unitTests/runit.new_read_data.R b/inst/unitTests/runit.new_read_data.R index e47b75530..2f370efa5 100644 --- a/inst/unitTests/runit.new_read_data.R +++ b/inst/unitTests/runit.new_read_data.R @@ -56,7 +56,7 @@ test_evaluate_xcmsSource <- function() { library(msdata) mz_file <- system.file("microtofq/MM8.mzML", package = "msdata") src <- xcms:::xcmsSource(mz_file) - checkTrue(is(src, "rampSource")) + checkTrue(is(src, "pwizSource")) tmp <- loadRaw(src) checkEquals(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", "mz", "intensity", "polarity")) @@ -81,18 +81,22 @@ test_evaluate_xcmsSource <- function() { rawdata$MSn <- mzR:::rampRawDataMSn(rid) mzR:::rampClose(rid) rm(rid) + ## Ramp does not read polarity! + tmp$polarity <- rawdata$polarity checkEquals(rawdata, tmp) ## Next example: msnfile <- system.file("microtofq/MSMSpos20_6.mzML", package = "msdata") src <- xcms:::xcmsSource(msnfile) tmp <- loadRaw(src, includeMSn = TRUE) + ## checkTrue(all(tmp$polarity == 1)) ## OLD code: rid <- mzR:::rampOpen(msnfile) rawdata <- mzR:::rampRawData(rid) rawdata$MSn <- mzR:::rampRawDataMSn(rid) mzR:::rampClose(rid) rm(rid) + rawdata$polarity <- tmp$polarity checkEquals(rawdata, tmp) } diff --git a/inst/unitTests/runit.splitCombine.R b/inst/unitTests/runit.splitCombine.R index 36e346351..bed18f255 100644 --- a/inst/unitTests/runit.splitCombine.R +++ b/inst/unitTests/runit.splitCombine.R @@ -162,6 +162,18 @@ testSplitPhenoData <- function(){ checkEquals(xset@polarity, xsetList[[1]]@polarity) } +## Issue #133 +test_c_empty <- function() { + library(msdata) + suppressWarnings( + xs <- xcmsSet(system.file("microtofq/MM8.mzML", package="msdata"), + method="centWave", ppm=25, peakwidth=c(20, 50)) + ) + xs2 <- xcmsSet(system.file("microtofq/MM14.mzML", package="msdata"), + method="centWave", ppm=25, peakwidth=c(20, 50)) + comb <- c(xs, xs2) + checkTrue(nrow(peaks(comb)) == 0) +} testSubset <- function(){ ## first testing just the plain xset diff --git a/man/Chromatogram-class.Rd b/man/Chromatogram-class.Rd index 789a753d6..ba3d52bc1 100644 --- a/man/Chromatogram-class.Rd +++ b/man/Chromatogram-class.Rd @@ -10,13 +10,18 @@ \alias{fromFile,Chromatogram-method} \alias{intensity,Chromatogram-method} \alias{length,Chromatogram-method} -\alias{mzrange,Chromatogram-method} +\alias{mz,Chromatogram-method} +\alias{precursorMz,Chromatogram-method} +\alias{productMz} +\alias{productMz,Chromatogram-method} \alias{rtime,Chromatogram-method} \alias{show,Chromatogram-method} \title{Representation of chromatographic MS data} \usage{ Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(0, 0), - filterMz = c(0, 0), fromFile = integer(), aggregationFun = character()) + filterMz = c(0, 0), precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), fromFile = integer(), + aggregationFun = character()) \S4method{show}{Chromatogram}(object) @@ -24,7 +29,11 @@ Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(0, 0), \S4method{intensity}{Chromatogram}(object) -\S4method{mzrange}{Chromatogram}(object, filter = FALSE) +\S4method{mz}{Chromatogram}(object, filter = FALSE) + +\S4method{precursorMz}{Chromatogram}(object) + +\S4method{productMz}{Chromatogram}(object) \S4method{aggregationFun}{Chromatogram}(object) @@ -50,6 +59,12 @@ If not applicable use \code{mzrange = c(0, 0)}.} max) that was used to filter the original object on mz dimension. If not applicable use \code{filterMz = c(0, 0)}.} +\item{precursorMz}{\code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the precursor ion. See details for more information.} + +\item{productMz}{\code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the product. See details for more information.} + \item{fromFile}{\code{integer(1)} the index of the file within the \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the chromatogram was extracted.} @@ -74,7 +89,8 @@ chromatographic MS data, i.e. pairs of retention time and intensity values. Instances of the class can be created with the \code{Chromatogram} constructor function but in most cases the dedicated methods for \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} objects extracting -chromatograms should be used instead. +chromatograms should be used instead (i.e. the +\code{\link{extractChromatograms}}). \code{Chromatogram}: create an instance of the \code{Chromatogram} class. @@ -85,8 +101,14 @@ chromatograms should be used instead. \code{intensity} returns the intensity for the rentention time - intensity pairs stored in the chromatogram. -\code{mz} get or set the mz range of the -chromatogram. +\code{mz} get the mz (range) of the chromatogram. The +function returns a \code{numeric(2)} with the lower and upper mz value. + +\code{precursorMz} get the mz of the precursor ion. The +function returns a \code{numeric(2)} with the lower and upper mz value. + +\code{productMz} get the mz of the product chromatogram/ion. The +function returns a \code{numeric(2)} with the lower and upper mz value. \code{aggregationFun,aggregationFun<-} get or set the aggregation function. @@ -100,18 +122,28 @@ intensity pairs) of the chromatogram. \code{intensity} values from the object as \code{data.frame}. } \details{ -The \code{mz}, \code{filterMz} are stored as a \code{numeric(2)} -representing a range even if the chromatogram represent the chromatogram for -a single ion (represented as a single mz value). Representing the \code{mz} -as a range allows this class also to be used for a total ion chromatogram -or base peak chromatogram. +The \code{mz}, \code{filterMz}, \code{precursorMz} and \code{productMz} +are stored as a \code{numeric(2)} representing a range even if the +chromatogram was generated for only a single ion (i.e. a single mz value). +Using ranges for \code{mz} values allow this class to be used also for e.g. +total ion chromatograms or base peak chromatograms. + +The slots \code{precursorMz} and \code{productMz} allow to represent SRM +(single reaction monitoring) and MRM (multiple SRM) chromatograms. As example, +a \code{Chromatogram} for a SRM transition 273 -> 153 will have a +\code{@precursorMz = c(273, 273)} and a \code{@productMz = c(153, 153)}. } \section{Slots}{ \describe{ -\item{\code{rtime,intensity,mzrange,filterMzrange,fromFile,aggregationFun}}{See corresponding parameter above.} +\item{\code{.__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun}}{See corresponding parameter above.} }} \author{ Johannes Rainer } +\seealso{ +\code{\link{extractChromatograms}} for the method to extract +\code{Chromatogram} objects from \code{\link{XCMSnExp}} or +\code{\link[MSnbase]{OnDiskMSnExp}} objects. +} diff --git a/man/GenericParam.Rd b/man/GenericParam.Rd new file mode 100644 index 000000000..ff582a012 --- /dev/null +++ b/man/GenericParam.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-Params.R +\docType{class} +\name{GenericParam-class} +\alias{GenericParam} +\alias{GenericParam-class} +\alias{show,GenericParam-method} +\title{Generic parameter class} +\usage{ +GenericParam(fun = character(), args = list()) + +\S4method{show}{GenericParam}(object) +} +\arguments{ +\item{fun}{\code{character} representing the name of the function.} + +\item{args}{\code{list} (ideally named) with the arguments to the function.} + +\item{object}{\code{GenericParam} object.} +} +\value{ +The \code{GenericParam} function returns a \code{GenericParam} object. +} +\description{ +The \code{GenericParam} class allows to store generic parameter +information such as the name of the function that was/has to be called (slot +\code{fun}) and its arguments (slot \code{args}). This object is used to track +the process history of the data processings of an \code{\link{XCMSnExp}} +object. This is in contrast to e.g. the \code{\link{CentWaveParam}} object +that is passed to the actual processing method. +} +\section{Slots}{ + +\describe{ +\item{\code{fun}}{\code{character} specifying the function name.} + +\item{\code{args}}{\code{list} (ideally named) with the arguments to the +function.} + +\item{\code{.__classVersion__}}{the version of the class.} +}} +\examples{ +prm <- GenericParam(fun = "mean") + +prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE)) +} +\author{ +Johannes Rainer +} +\seealso{ +\code{\link{processHistory}} for how to access the process history +of an \code{\link{XCMSnExp}} object. +} + diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 877fc4644..78f3b5388 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/methods-MsFeatureData.R, R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +% Please edit documentation in R/DataClasses.R, R/functions-ProcessHistory.R, R/methods-MsFeatureData.R, R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R \docType{class} \name{MsFeatureData-class} \alias{MsFeatureData} @@ -27,12 +27,15 @@ \alias{dropFeatureDefinitions} \alias{dropFeatureDefinitions,MsFeatureData-method} \alias{dropFeatureDefinitions,XCMSnExp-method} +\alias{dropFilledChromPeaks} +\alias{dropFilledChromPeaks,XCMSnExp-method} \alias{featureDefinitions} \alias{featureDefinitions,MsFeatureData-method} \alias{featureDefinitions,XCMSnExp-method} \alias{featureDefinitions<-} \alias{featureDefinitions<-,MsFeatureData-method} \alias{featureDefinitions<-,XCMSnExp-method} +\alias{findChromPeaks,XCMSnExp,ANY-method} \alias{hasAdjustedRtime} \alias{hasAdjustedRtime,MsFeatureData-method} \alias{hasAdjustedRtime,XCMSnExp-method} @@ -46,6 +49,7 @@ \alias{mz,XCMSnExp-method} \alias{processHistory} \alias{processHistory,XCMSnExp-method} +\alias{processHistoryTypes} \alias{profMat,OnDiskMSnExp-method} \alias{profMat,XCMSnExp-method} \alias{rtime,XCMSnExp-method} @@ -55,6 +59,8 @@ \alias{spectra,XCMSnExp-method} \title{Data container storing xcms preprocessing results} \usage{ +processHistoryTypes() + \S4method{show}{MsFeatureData}(object) \S4method{hasAdjustedRtime}{MsFeatureData}(object) @@ -111,7 +117,8 @@ \S4method{intensity}{XCMSnExp}(object, bySample = FALSE) -\S4method{spectra}{XCMSnExp}(object, bySample = FALSE) +\S4method{spectra}{XCMSnExp}(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object)) \S4method{processHistory}{XCMSnExp}(object, fileIndex, type) @@ -124,6 +131,11 @@ \S4method{profMat}{XCMSnExp}(object, method = "bin", step = 0.1, baselevel = NULL, basespace = NULL, mzrange. = NULL, fileIndex, ...) + +\S4method{findChromPeaks}{XCMSnExp,ANY}(object, param, BPPARAM = bpparam(), + return.type = "XCMSnExp") + +\S4method{dropFilledChromPeaks}{XCMSnExp}(object) } \arguments{ \item{object}{For \code{adjustedRtime}, \code{featureDefinitions}, @@ -180,9 +192,8 @@ sample.} retention times reported in the files) should be returned.} \item{type}{For \code{processHistory}: restrict returned -\code{\link{ProcessHistory}} objects to analysis steps of a certain type. -Supported values are \code{"Unknown"}, \code{"Peak detection"}, -\code{"Peak grouping"} and \code{"Retention time correction"}.} +\code{\link{ProcessHistory}} objects to analysis steps of a certain type. Use +the \code{processHistoryTypes} to list all supported values.} \item{keepAdjRtime}{For \code{dropFeatureDefinitions,XCMSnExp}: \code{logical(1)} defining whether eventually present retention time @@ -195,6 +206,21 @@ history steps to remove. By default \code{dropLastN = -1}, dropping the chromatographic peaks removes all process history steps related to peak grouping. Setting e.g. \code{dropLastN = 1} will only remove the most recent peak grouping related process history step.} + +\item{param}{A \code{\link{CentWaveParam}}, \code{\link{MatchedFilterParam}}, +\code{\link{MassifquantParam}}, \code{\link{MSWParam}} or +\code{\link{CentWavePredIsoParam}} object with the settings for the +chromatographic peak detection algorithm.} + +\item{BPPARAM}{A parameter class specifying if and how parallel processing +should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. +See documentation of the \code{BiocParallel} for more details. If parallel +processing is enables, peak detection is performed in parallel on several +of the input samples.} + +\item{return.type}{Character specifying what type of object the method should +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or +\code{"xcmsSet"}.} } \value{ For \code{profMat}: a \code{list} with a the profile matrix @@ -220,17 +246,25 @@ method that belong to that feature group. The method returns \code{NULL} if no feature definitions are present. For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} with -at least the following columns: \code{"mz"} (mz value for the largest -intensity), \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximal mz -value), \code{"rt"} (retention time for the peak apex), \code{"rtmin"} -(minimal retention time), \code{"rtmax"} (maximal retention time), -\code{"into"} (integrated, original, intensity of the peak) and -\code{"sample"} (sample index in which the peak was identified). +at least the following columns: +\code{"mz"} (intensity-weighted mean of mz values of the peak across scans/ +retention times), +\code{"mzmin"} (minimal mz value), +\code{"mzmax"} (maximal mz value), +\code{"rt"} (retention time for the peak apex), +\code{"rtmin"} (minimal retention time), +\code{"rtmax"} (maximal retention time), +\code{"into"} (integrated, original, intensity of the peak), +\code{"maxo"} (maximum intentity of the peak), +\code{"sample"} (sample index in which the peak was identified) and +\code{"is_filled"} defining whether the chromatographic peak was identified +by the peak picking algorithm (\code{0}) or was added by the +\code{fillChromPeaks} method (\code{1}). Depending on the employed peak detection algorithm and the \code{verboseColumns} parameter of it additional columns might be returned. For \code{bySample = TRUE} the chronatographic peaks are returned as a -\code{list} of matrices, each containing the chromatographic peak of a -specific sample. For sample in which no feastures were detected a matrix +\code{list} of matrices, each containing the chromatographic peaks of a +specific sample. For samples in which no peaks were detected a matrix with 0 rows is returned. For \code{rtime}: if \code{bySample = FALSE} a numeric vector with the @@ -282,6 +316,10 @@ result from the \code{\link{findChromPeaks}} method. \code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}} objects using the \code{as} method. +\code{processHistoryTypes} returns the available \emph{types} of +process histories. These can be passed with argument \code{type} to the +\code{processHistory} method to extract specific process step(s). + \code{profMat}: creates a \emph{profile matrix}, which is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and m (columns) the retention time of the corresponding scans. Each cell contains @@ -344,9 +382,12 @@ a grouping by sample/file. \code{spectra}: extracts the \code{\link[MSnbase]{Spectrum}} objects containing all data from -\code{object}. These values are extracted from the original data files and -eventual processing steps are applied \emph{on the fly}. Setting -\code{bySample = TRUE} the spectra are returned grouped by sample/file. +\code{object}. The values are extracted from the original data files and +eventual processing steps are applied \emph{on the fly}. By setting +\code{bySample = TRUE}, the spectra are returned grouped by sample/file. If +the \code{XCMSnExp} object contains adjusted retention times, these are +returned by default in the \code{Spectrum} objects (can be overwritten +by setting \code{adjusted = FALSE}). \code{processHistory}: returns a \code{list} with \code{\link{ProcessHistory}} objects (or objects inheriting from this base @@ -369,7 +410,8 @@ features and returns the object without that information. Note that for results, if these were performed after the last peak grouping (i.e. which base on the results from the peak grouping that are going to be removed). For \code{XCMSnExp} objects also all related process history steps are -removed. +removed. Also eventually filled in peaks (by \code{\link{fillChromPeaks}}) +will be removed too. \code{dropAdjustedRtime}: drops any retention time adjustment information and returns the object without adjusted retention @@ -380,6 +422,10 @@ raw, ones (after chromatographic peak detection). Note that for if these were performed \emph{after} the retention time adjustment. For \code{XCMSnExp} objects the method drops also any related process history steps. + +\code{dropFilledChromPeaks}: drops any filled-in chromatographic +peaks (filled in by the \code{\link{fillChromPeaks}} method) and all related +process history steps. } \section{Slots}{ @@ -484,5 +530,8 @@ methods and \code{\link{featureDefinitions}} for the method to extract the feature definitions representing the peak grouping results. \code{\link{adjustRtime}} for retention time adjustment methods. + +\code{\link{fillChromPeaks}} for the method to fill-in eventually +missing chromatographic peaks for a feature in some samples. } diff --git a/man/XCMSnExp-filter-methods.Rd b/man/XCMSnExp-filter-methods.Rd index 499cd56ad..1a4758e6e 100644 --- a/man/XCMSnExp-filter-methods.Rd +++ b/man/XCMSnExp-filter-methods.Rd @@ -6,13 +6,7 @@ \alias{filterFile,XCMSnExp-method} \alias{filterMz,XCMSnExp-method} \alias{filterRt,XCMSnExp-method} -\title{XCMSnExp filtering and subsetting - -The methods listed on this page allow to filter and subset -\code{\link{XCMSnExp}} objects. Most of them are inherited from the -\code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for -\code{\link{XCMSnExp}} to enable subsetting also on the preprocessing -results.} +\title{XCMSnExp filtering and subsetting} \usage{ \S4method{filterFile}{XCMSnExp}(object, file, keepAdjustedRtime = FALSE) @@ -48,12 +42,20 @@ window (lower and upper bound) for the filtering.} \item{adjusted}{For \code{filterRt}: \code{logical} indicating whether the object should be filtered by original (\code{adjusted = FALSE}) or adjusted -retention times (\code{adjusted = TRUE}).} +retention times (\code{adjusted = TRUE}). +For \code{spectra}: whether the retention times in the individual +\code{Spectrum} objects should be the adjusted or raw retention times.} } \value{ All methods return an \code{\link{XCMSnExp}} object. } \description{ +The methods listed on this page allow to filter and subset +\code{\link{XCMSnExp}} objects. Most of them are inherited from the +\code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for +\code{\link{XCMSnExp}} to enable subsetting also on the preprocessing +results. + \code{filterFile}: allows to reduce the \code{\link{XCMSnExp}} to data from only certain files. Identified chromatographic peaks for these files are retained while all eventually diff --git a/man/XCMSnExp-peak-grouping-results.Rd b/man/XCMSnExp-peak-grouping-results.Rd index 785fbb6c7..6a49d2dc3 100644 --- a/man/XCMSnExp-peak-grouping-results.Rd +++ b/man/XCMSnExp-peak-grouping-results.Rd @@ -1,11 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-XCMSnExp.R \docType{methods} -\name{groupval,XCMSnExp-method} -\alias{groupval,XCMSnExp-method} +\name{featureValues,XCMSnExp-method} +\alias{featureValues} +\alias{featureValues,XCMSnExp-method} \title{Accessing mz-rt feature data values} \usage{ -\S4method{groupval}{XCMSnExp}(object, method = c("medret", "maxint"), +\S4method{featureValues}{XCMSnExp}(object, method = c("medret", "maxint"), value = "index", intensity = "into") } \arguments{ @@ -31,20 +32,24 @@ peak that should be used for the conflict resolution if \code{method = "maxint"}.} } \value{ -For \code{groupval}: a \code{matrix} with feature values, columns -representing samples, rows features. The order of the features -matches the order found in the \code{featureDefinitions(object)} +For \code{featureValues}: a \code{matrix} with +feature values, columns representing samples, rows features. The order of +the features matches the order found in the \code{featureDefinitions(object)} \code{DataFrame}. An \code{NA} is reported for features without corresponding chromatographic peak in the respective sample(s). } \description{ -\code{groupval,XCMSnExp}: extract a \code{matrix} for feature -values with rows representing features and columns samples. Parameter -\code{value} allows to define which column from the \code{\link{chromPeaks}} -matrix should be returned. Multiple chromatographic peaks from the same -sample can be assigned to a feature. Parameter \code{method} allows to -specify the method to be used in such cases to chose from which of the peaks -the value should be returned. +\code{featureValues,XCMSnExp} : +extract a \code{matrix} for feature values with rows representing features +and columns samples. Parameter \code{value} allows to define which column +from the \code{\link{chromPeaks}} matrix should be returned. Multiple +chromatographic peaks from the same sample can be assigned to a feature. +Parameter \code{method} allows to specify the method to be used in such +cases to chose from which of the peaks the value should be returned. +} +\note{ +This method is equivalent to the \code{\link{groupval}} for +\code{xcmsSet} objects. } \author{ Johannes Rainer @@ -55,5 +60,6 @@ Johannes Rainer feature definitions. \code{\link{hasFeatures}} to evaluate whether the \code{\link{XCMSnExp}} provides feature definitions. +\code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. } diff --git a/man/extractChromatograms-method.Rd b/man/extractChromatograms-method.Rd new file mode 100644 index 000000000..5856f0402 --- /dev/null +++ b/man/extractChromatograms-method.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +\docType{methods} +\name{extractChromatograms,OnDiskMSnExp-method} +\alias{extractChromatograms} +\alias{extractChromatograms,OnDiskMSnExp-method} +\alias{extractChromatograms,XCMSnExp-method} +\title{Extracting chromatograms} +\usage{ +\S4method{extractChromatograms}{OnDiskMSnExp}(object, rt, mz, + aggregationFun = "sum") + +\S4method{extractChromatograms}{XCMSnExp}(object, rt, mz, + adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum") +} +\arguments{ +\item{object}{Either a \code{\link[MSnbase]{OnDiskMSnExp}} or +\code{\link{XCMSnExp}} object from which the chromatograms should be extracted.} + +\item{rt}{\code{numeric(2)} defining the lower and upper boundary for the +retention time range. If not specified, the full retention time range of the +original data will be used. It is also possible to submit a \code{numeric(1)} +in which case \code{range} is called on it to transform it to a +\code{numeric(2)}.} + +\item{mz}{\code{numeric(2)} defining the lower and upper mz value for the +MS data slice. If not specified, the chromatograms will be calculated on the +full mz range. It is also possible to submit a \code{numeric(1)} in which case +\code{range} is called on it to transform it to a \code{numeric(2)}.} + +\item{aggregationFun}{\code{character} specifying the function to be used to +aggregate intensity values across the mz value range for the same retention +time. Allowed values are \code{"sum"}, \code{"max"}, \code{"mean"} and +\code{"min"}.} + +\item{adjustedRtime}{For \code{extractChromatograms,XCMSnExp}: whether the +adjusted (\code{adjustedRtime = TRUE}) or raw retention times +(\code{adjustedRtime = FALSE}) should be used for filtering and returned in +the resulting \code{\link{Chromatogram}} object. Adjusted retention times are +used by default if available.} +} +\description{ +\code{extractChromatograms}: the method allows to extract +chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and +\code{\link{XCMSnExp}} objects. +} +\details{ +Arguments \code{rt} and \code{mz} allow to specify the MS +data slice from which the chromatogram should be extracted. The parameter +\code{aggregationSum} allows to specify the function to be used to aggregate +the intensities across the mz range for the same retention time. Setting +\code{aggregationFun = "sum"} would e.g. allow to calculate the \emph{total +ion chromatogram} (TIC), \code{aggregationFun = "max"} the \emph{base peak +chromatogram} (BPC). +} +\note{ +\code{Chromatogram} objects extracted with \code{extractChromatogram} contain +\code{NA_real_} values if, for a given retention time, no valid measurement +was available for the provided mz range. + +For \code{\link{XCMSnExp}} objects, if adjusted retention times are +available, the \code{extractChromatograms} method will by default report and +use these (for the subsetting based on the provided parameter \code{rt}). This +can be overwritten with the parameter \code{adjustedRtime}. +} +\examples{ +## Read some files from the faahKO package. +library(xcms) +library(faahKO) +faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_3_files) + +## Extract the ion chromatogram for one chromatographic peak in the data. +chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) + +## plot the data +plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", + ylab = "intensity", col = "000080") +for(i in c(1, 3)) { + points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") +} +} +\author{ +Johannes Rainer +} +\seealso{ +\code{\link{XCMSnExp}} for the data object. +\code{\link{Chromatogram}} for the object representing chromatographic data. +} + diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd new file mode 100644 index 000000000..e7a03d4bf --- /dev/null +++ b/man/fillChromPeaks.Rd @@ -0,0 +1,206 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{FillChromPeaksParam-class} +\alias{FillChromPeaksParam} +\alias{FillChromPeaksParam-class} +\alias{expandMz} +\alias{expandMz,FillChromPeaksParam-method} +\alias{expandMz<-} +\alias{expandMz<-,FillChromPeaksParam-method} +\alias{expandRt} +\alias{expandRt,FillChromPeaksParam-method} +\alias{expandRt<-} +\alias{expandRt<-,FillChromPeaksParam-method} +\alias{fillChromPeaks} +\alias{fillChromPeaks,XCMSnExp,FillChromPeaksParam-method} +\alias{fillChromPeaks,XCMSnExp,missing-method} +\alias{ppm,FillChromPeaksParam-method} +\alias{ppm<-,FillChromPeaksParam-method} +\alias{show,FillChromPeaksParam-method} +\title{Integrate areas of missing peaks} +\usage{ +FillChromPeaksParam(expandMz = 0, expandRt = 0, ppm = 0) + +\S4method{show}{FillChromPeaksParam}(object) + +\S4method{expandMz}{FillChromPeaksParam}(object) + +\S4method{expandMz}{FillChromPeaksParam}(object) <- value + +\S4method{expandRt}{FillChromPeaksParam}(object) + +\S4method{expandRt}{FillChromPeaksParam}(object) <- value + +\S4method{ppm}{FillChromPeaksParam}(object) + +\S4method{ppm}{FillChromPeaksParam}(object) <- value + +\S4method{fillChromPeaks}{XCMSnExp,FillChromPeaksParam}(object, param, + BPPARAM = bpparam()) + +\S4method{fillChromPeaks}{XCMSnExp,missing}(object, param, + BPPARAM = bpparam()) +} +\arguments{ +\item{expandMz}{\code{numeric(1)} defining the value by which the mz width of +peaks should be expanded. Each peak is expanded in mz direction by +\code{expandMz *} their original mz width. A value of \code{0} means no +expansion, a value of \code{1} grows each peak by 1 * the mz width of the peak +resulting in peakswith twice their original size in mz direction (expansion +by half mz width to both sides).} + +\item{expandRt}{\code{numeric(1)}, same as \code{expandRt} but for the +retention time width.} + +\item{ppm}{\code{numeric(1)} optionally specifying a \emph{ppm} by which the +mz width of the peak region should be expanded. For peaks with an mz width +smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the \code{mzmin} will +be replaced by +\code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} +and \code{mzmax} by +\code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. This +is applied before eventually expanding the mz width using the \code{expandMz} +parameter.} + +\item{object}{\code{XCMSnExp} object with identified and grouped +chromatographic peaks.} + +\item{value}{The value for the slot.} + +\item{param}{A \code{FillChromPeaksParam} object with all settings.} + +\item{BPPARAM}{Parallel processing settings.} +} +\value{ +The \code{FillChromPeaksParam} function returns a +\code{FillChromPeaksParam} object. + +A \code{\link{XCMSnExp}} object with previously missing +chromatographic peaks for features filled into its \code{chromPeaks} matrix. +} +\description{ +The \code{FillChromPeaksParam} object encapsules all settings for +the signal integration for missing peaks. + +\code{expandMz},\code{expandMz<-}: getter and setter +for the \code{expandMz} slot of the object. + +\code{expandRt},\code{expandRt<-}: getter and setter +for the \code{expandRt} slot of the object. + +\code{ppm},\code{ppm<-}: getter and setter +for the \code{ppm} slot of the object. + +Integrate signal in the mz-rt area of a feature (chromatographic +peak group) for samples in which no chromatographic peak for this feature was +identified and add it to the \code{chromPeaks}. Such peaks will have a value +of \code{1} in the \code{"is_filled"} column of the \code{\link{chromPeaks}} +matrix of the object. +} +\details{ +After correspondence (i.e. grouping of chromatographic peaks across +samples) there will always be features (peak groups) that do not include peaks +from every sample. The \code{fillChromPeaks} method defines intensity values +for such features in the missing samples by integrating the signal in the +mz-rt region of the feature. The mz-rt area is defined by the median mz and +rt start and end points of the other detected chromatographic peaks for a +given feature. + +Adjusted retention times will be used if available. + +Based on the peak finding algorithm that was used to identify the +(chromatographic) peaks different internal functions are employed to guarantee +that the integrated peak signal matches as much as possible the peak signal +integration used during the peak detection. For peaks identified with the +\code{\link{matchedFilter}} method, signal integration is performed on the +\emph{profile matrix} generated with the same settings used also during peak +finding (using the same \code{bin} size for example). For direct injection +data and peaks identified with the \code{\link{MSW}} algorithm signal is +integrated only along the mz dimension. For all other methods the complete +(raw) signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, +\code{"rtmin"} and \code{"rtmax"} is used. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,expandMz,expandRt,ppm}}{See corresponding parameter above. \code{.__classVersion__} stores the version of the class.} +}} +\note{ +The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and +\code{"rtmax"} for the filled peaks represents the actual MS area from which +the signal was integrated. +Note that no peak is filled in if no signal was present in a file/sample in +the respective mz-rt area. These samples will still show a \code{NA} in the +matrix returned by the \code{\link{featureValues}} method. This is in contrast +to the \code{\link{fillPeaks.chrom}} method that returned an \code{"into"} and +\code{"maxo"} of \code{0} for such peak areas. Growing the mz-rt area using +the \code{expandMz} and \code{expandRt} might help to reduce the number of +missing peak signals after filling. +} +\examples{ + +## Perform the peak detection using centWave on some of the files from the +## faahKO package. Files are read using the readMSData2 from the MSnbase +## package +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) +raw_data <- readMSData2(fls[1:2]) + +## Create a CentWaveParam object. Note that the noise is set to 10000 to +## speed up the execution of the example - in a real use case the default +## value should be used, or it should be set to a reasonable value. +cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) + +res <- findChromPeaks(raw_data, param = cwp) + +## Perform the correspondence. +res <- groupChromPeaks(res, param = PeakDensityParam()) + +## For how many features do we lack an integrated peak signal? +sum(is.na(featureValues(res))) + +## Filling missing peak data using default settings. +res <- fillChromPeaks(res) + +## Get the peaks that have been filled in: +fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] +head(fp) + +## Did we get a signal for all missing peaks? +sum(is.na(featureValues(res))) + +## No. + +## Get the process history step along with the parameters used to perform +## The peak filling: +ph <- processHistory(res, type = "Missing peak filling")[[1]] +ph + +## The parameter class: +ph@param + +## Drop the filled in peaks: +res <- dropFilledChromPeaks(res) + +## Perform the peak filling with modified settings: allow expansion of the +## mz range by a specified ppm and expanding the mz range by mz width/2 +prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) +res <- fillChromPeaks(res, param = prm) + +## Did we get a signal for all missing peaks? +sum(is.na(featureValues(res))) + +## Still the same missing peaks. +} +\author{ +Johannes Rainer +} +\seealso{ +\code{\link{groupChromPeaks}} for methods to perform the +correspondence. +\code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +} + diff --git a/man/fillPeaks.MSW-methods.Rd b/man/fillPeaks.MSW-methods.Rd index 18521b5d6..2e6d6eca6 100644 --- a/man/fillPeaks.MSW-methods.Rd +++ b/man/fillPeaks.MSW-methods.Rd @@ -27,6 +27,13 @@ end points of the other detected peaks. } +\note{ + In contrast to the \code{\link{fillPeaks.chrom}} method the maximum + intensity reported in column \code{"maxo"} is not the maximum + intensity measured in the expected peak area (defined by columns + \code{"mzmin"} and \code{"mzmax"}), but the largest intensity of mz + value(s) closest to the \code{"mzmed"} of the feature. +} \value{ A \code{xcmsSet} objects with filled in peak groups. } diff --git a/man/findChromPeaks-centWave.Rd b/man/findChromPeaks-centWave.Rd index 4c7651779..3c6f76efe 100644 --- a/man/findChromPeaks-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -5,7 +5,6 @@ \alias{CentWaveParam} \alias{CentWaveParam-class} \alias{centWave} -\alias{findChromPeaks,MSnExp,CentWaveParam-method} \alias{findChromPeaks,OnDiskMSnExp,CentWaveParam-method} \alias{findChromPeaks-centWave} \alias{firstBaselineCheck} @@ -70,9 +69,6 @@ CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, \S4method{findChromPeaks}{OnDiskMSnExp,CentWaveParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{findChromPeaks}{MSnExp,CentWaveParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{CentWaveParam}(object) \S4method{ppm}{CentWaveParam}(object) @@ -185,9 +181,9 @@ data within regions of interest is checked to be above the first baseline.} defining the scale for each region of interest in \code{roiList} that should be used for the centWave-wavelets.} -\item{object}{For \code{findChromPeaks}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -236,12 +232,6 @@ on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{findChromPeaks,MSnExp,CentWaveParam} method performs -peak detection using the \emph{centWave} algorithm on all samples from -an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -intensity values of all spectra hence no additional data input from the -original files is required. - \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} slot of the object. @@ -325,7 +315,7 @@ cwp ## faahKO package. Files are read using the readMSData2 from the MSnbase ## package library(faahKO) -library(MSnbase) +library(xcms) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) raw_data <- readMSData2(fls[1:2]) diff --git a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd index 6dcfb64b4..52319c37f 100644 --- a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd +++ b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd @@ -5,7 +5,6 @@ \alias{CentWavePredIsoParam} \alias{CentWavePredIsoParam-class} \alias{centWaveWithPredIsoROIs} -\alias{findChromPeaks,MSnExp,CentWavePredIsoParam-method} \alias{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam-method} \alias{findChromPeaks-centWaveWithPredIsoROIs} \alias{maxCharge} @@ -40,9 +39,6 @@ CentWavePredIsoParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, \S4method{findChromPeaks}{OnDiskMSnExp,CentWavePredIsoParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{findChromPeaks}{MSnExp,CentWavePredIsoParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{CentWavePredIsoParam}(object) \S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) @@ -141,14 +137,14 @@ intensity peaks.} Currently not used, but has to be \code{"positive"}, \code{"negative"} or \code{"unknown"} if provided.} -\item{object}{For \code{findChromPeaks}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} -\item{param}{An \code{CentWaveParam} object containing all settings for the -centWave algorithm.} +\item{param}{An \code{CentWavePredIsoParam} object with the settings for the +chromatographic peak detection algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. @@ -197,12 +193,6 @@ samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method -performs a two-step centWave-based peak detection on all samples from -an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -intensity values of all spectra hence no additional data input from the -original files is required. - \code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and setter for the \code{snthreshIsoROIs} slot of the object. diff --git a/man/findChromPeaks-massifquant.Rd b/man/findChromPeaks-massifquant.Rd index 2ab109bd5..28acc7bfa 100644 --- a/man/findChromPeaks-massifquant.Rd +++ b/man/findChromPeaks-massifquant.Rd @@ -16,7 +16,6 @@ \alias{criticalValue,MassifquantParam-method} \alias{criticalValue<-} \alias{criticalValue<-,MassifquantParam-method} -\alias{findChromPeaks,MSnExp,MassifquantParam-method} \alias{findChromPeaks,OnDiskMSnExp,MassifquantParam-method} \alias{findChromPeaks-massifquant} \alias{fitgauss,MassifquantParam-method} @@ -60,9 +59,6 @@ MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, \S4method{findChromPeaks}{OnDiskMSnExp,MassifquantParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{findChromPeaks}{MSnExp,MassifquantParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{MassifquantParam}(object) \S4method{ppm}{MassifquantParam}(object) @@ -212,9 +208,9 @@ to being turned off.} with Massifquant are subsequently filtered with the second step of the centWave algorithm, which includes wavelet estimation.} -\item{object}{For \code{findChromPeaks}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -266,12 +262,6 @@ algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{findChromPeaks,MSnExp,MassifquantParam} method -performs chromatographic peak detection using the \emph{massifquant} method -on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects -contain mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} slot of the object. diff --git a/man/findChromPeaks-matchedFilter.Rd b/man/findChromPeaks-matchedFilter.Rd index 1e2c1d925..3b7e5dbd0 100644 --- a/man/findChromPeaks-matchedFilter.Rd +++ b/man/findChromPeaks-matchedFilter.Rd @@ -16,7 +16,6 @@ \alias{distance,MatchedFilterParam-method} \alias{distance<-} \alias{distance<-,MatchedFilterParam-method} -\alias{findChromPeaks,MSnExp,MatchedFilterParam-method} \alias{findChromPeaks,OnDiskMSnExp,MatchedFilterParam-method} \alias{findChromPeaks-matchedFilter} \alias{fwhm} @@ -57,9 +56,6 @@ MatchedFilterParam(binSize = 0.1, impute = "none", baseValue = numeric(), \S4method{findChromPeaks}{OnDiskMSnExp,MatchedFilterParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{findChromPeaks}{MSnExp,MatchedFilterParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{MatchedFilterParam}(object) \S4method{binSize}{MatchedFilterParam}(object) @@ -148,9 +144,9 @@ in m/z for peaks with overlapping retention times} \item{index}{\code{logical(1)} specifying whether indicies should be returned instead of values for m/z and retention times.} -\item{object}{For \code{findChromPeaks}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -207,12 +203,6 @@ on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{findChromPeaks,MSnExp,MatchedFilterParam} method -performs peak detection using the \emph{matchedFilter} method on all -samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{binSize},\code{binSize<-}: getter and setter for the \code{binSize} slot of the object. diff --git a/man/findPeaks-MSW.Rd b/man/findPeaks-MSW.Rd index d70e520fa..cc67b0f57 100644 --- a/man/findPeaks-MSW.Rd +++ b/man/findPeaks-MSW.Rd @@ -13,7 +13,6 @@ \alias{ampTh,MSWParam-method} \alias{ampTh<-} \alias{ampTh<-,MSWParam-method} -\alias{findChromPeaks,MSnExp,MSWParam-method} \alias{findChromPeaks,OnDiskMSnExp,MSWParam-method} \alias{findPeaks-MSW} \alias{minNoiseLevel} @@ -59,9 +58,6 @@ MSWParam(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, \S4method{findChromPeaks}{OnDiskMSnExp,MSWParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{findChromPeaks}{MSnExp,MSWParam}(object, param, BPPARAM = bpparam(), - return.type = "list") - \S4method{show}{MSWParam}(object) \S4method{snthresh}{MSWParam}(object) @@ -145,9 +141,9 @@ estimation of the detected peaks.} \code{\link[MassSpecWavelet]{sav.gol}} functions from the \code{MassSpecWavelet} package.} -\item{object}{For \code{findChromPeaks}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -194,13 +190,6 @@ samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{findChromPeaks,MSnExp,MSWParam} method -performs peak detection in single-spectrum non-chromatography MS -data using functionality from the \code{MassSpecWavelet} package on all -samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{snthresh},\code{snthresh<-}: getter and setter for the \code{snthresh} slot of the object. diff --git a/man/groupChromPeaks-density.Rd b/man/groupChromPeaks-density.Rd index 152d4b66d..e3b14785a 100644 --- a/man/groupChromPeaks-density.Rd +++ b/man/groupChromPeaks-density.Rd @@ -192,9 +192,9 @@ res <- groupChromPeaks(res, fdp) ## The definition of the features (peak groups): featureDefinitions(res) -## Using the groupval method to extract a matrix with the intensities of +## Using the featureValues method to extract a matrix with the intensities of ## the features per sample. -head(groupval(res, value = "into")) +head(featureValues(res, value = "into")) ## The process history: processHistory(res) @@ -213,7 +213,7 @@ The \code{\link{do_groupChromPeaks_density}} core API function and \code{\link{group.density}} for the old user interface. \code{\link{featureDefinitions}} and -\code{\link{groupval,XCMSnExp-method}} for methods to access the features +\code{\link{featureValues,XCMSnExp-method}} for methods to access the features (i.e. the peak grouping results). \code{\link{XCMSnExp}} for the object containing the results of diff --git a/man/groupChromPeaks-mzClust.Rd b/man/groupChromPeaks-mzClust.Rd index 19b837cdd..3c69e0d8e 100644 --- a/man/groupChromPeaks-mzClust.Rd +++ b/man/groupChromPeaks-mzClust.Rd @@ -164,7 +164,7 @@ The \code{\link{do_groupPeaks_mzClust}} core API function and \code{\link{group.mzClust}} for the old user interface. \code{\link{featureDefinitions}} and -\code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +\code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping results (i.e. the features). \code{\link{XCMSnExp}} for the object containing the results of diff --git a/man/groupChromPeaks-nearest.Rd b/man/groupChromPeaks-nearest.Rd index 54b1c3933..c72d2b0a4 100644 --- a/man/groupChromPeaks-nearest.Rd +++ b/man/groupChromPeaks-nearest.Rd @@ -171,9 +171,9 @@ res <- groupChromPeaks(res, param = p) ## The results from the peak grouping: featureDefinitions(res) -## Using the groupval method to extract a matrix with the intensities of +## Using the featureValues method to extract a matrix with the intensities of ## the features per sample. -head(groupval(res, value = "into")) +head(featureValues(res, value = "into")) ## The process history: processHistory(res) @@ -188,7 +188,7 @@ The \code{\link{do_groupChromPeaks_nearest}} core API function and \code{\link{group.nearest}} for the old user interface. \code{\link{featureDefinitions}} and -\code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +\code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping results (i.e. the features). \code{\link{XCMSnExp}} for the object containing the results of diff --git a/man/groupChromPeaks.Rd b/man/groupChromPeaks.Rd index dc907d0e5..0b45f1549 100644 --- a/man/groupChromPeaks.Rd +++ b/man/groupChromPeaks.Rd @@ -32,7 +32,7 @@ Johannes Rainer \code{\link{group}} for the \emph{old} peak grouping methods. \code{\link{featureDefinitions}} and -\code{\link{groupval,XCMSnExp-method}} for methods to access peak grouping +\code{\link{featureValues,XCMSnExp-method}} for methods to access peak grouping results. Other peak grouping methods: \code{\link{groupChromPeaks-density}}, diff --git a/tests/doRUnit.R b/tests/doRUnit.R index 334dedf45..928f639fc 100644 --- a/tests/doRUnit.R +++ b/tests/doRUnit.R @@ -46,6 +46,12 @@ if(require("RUnit", quietly=TRUE)) { snthresh = 40)) faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), method = "centWave", noise = 10000, snthresh = 40) + ## Doing also the retention time correction etc + od_x <- faahko_od + xod_x <- faahko_xod + xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) + xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) + xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) ## microtofq library(msdata) @@ -54,6 +60,19 @@ if(require("RUnit", quietly=TRUE)) { microtofq_xr <- xcmsRaw(microtofq_fs[1], profstep = 0) microtofq_od <- readMSData2(microtofq_fs) + ## Direct injection data: + fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) + fticr <- readMSData2(fticrf[1:2], msLevel. = 1) + fticr_xod <- findChromPeaks(fticr, MSWParam(scales = c(1, 7), + peakThr = 80000, ampTh = 0.005, + SNR.method = "data.mean", + winSize.noise = 500)) + fticr_xs <- xcmsSet(method="MSW", files=fticrf[1:2], scales=c(1,7), + SNR.method='data.mean' , winSize.noise=500, + peakThr=80000, amp.Th=0.005) + + ## microtofq_xod <- findChromPeaks(microtofq_od, param = MSWParam()) ## If desired, load the name space to allow testing of private functions ## if (is.element(pkg, loadedNamespaces())) ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) diff --git a/vignettes/new_functionality.Rmd b/vignettes/new_functionality.Rmd index 2e3d715fd..0bfaf3fc8 100644 --- a/vignettes/new_functionality.Rmd +++ b/vignettes/new_functionality.Rmd @@ -1,5 +1,3 @@ -s#+TITLE: New and modified functionality in xcms - --- title: "New and modified functionality in xcms" author: "Johannes Rainer" @@ -44,27 +42,27 @@ of R functions, the so called core API functions (or `do_` functions). These functions take standard R data structures as input and return standard R data types as result and can hence be easily included in other R packages. -Over and above, the new user interface aims at simplifying and streamlining the -`xcms` workflow while guaranteeing data integrity and performance also for large -scale metabolomics experiments. An important aspect was also to enable an easy -access to the raw data files. +The new user interface aims at simplifying and streamlining the `xcms` workflow +while guaranteeing data integrity and performance also for large scale +metabolomics experiments. Importantly, a simplified access to the original raw +data should be provided throughout the whole metabolomics data analysis workflow. The new interface re-uses objects from the `MSnbase` Bioconductor package, such as the `OnDiskMSnExp` object. This object is specifically designed for large scale MS experiments as it initially reads just the scan header information from the mzML while the mz-intensity value pairs from all or from selected spectra of a file -or are read on demand hence minimizing the memory footprint. Also in contrast to -the old `xcmsRaw` object the `OnDiskMSnExp` contains information from all files of +are read on demand hence minimizing the memory demand. Also, in contrast to +the old `xcmsRaw` object, the `OnDiskMSnExp` contains information from all files of an experiment. In addition, all data normalization and adjustment methods implemented in the `MSnbase` package can be directly applied to the MS data without the need to re-implement such methods in `xcms`. Results from `xcms` preprocessings, such as chromatographic peak detection or correspondence are stored into the new `XCMSnExp` object. This object extends the `OnDiskMSnExp` object -and enables thus an easy access of the raw data on which the preprocessing was -performed. +and inherits thus all of its methods including raw data access. -Class and method/function names follow also a new naming convention and avoid -the partially confusing nomenclature of the original `xcms` methods. To +Class and method/function names follow also a new naming convention trying tp +avoid the partially confusing nomenclature of the original `xcms` methods (such as +the `group` method to perform the correspondence of peaks across samples). To distinguish them from mass peaks, the peaks identified by the peak detection in an LS/GC-MS experiment are referred to as *chromatographic peaks*. The respective method to identify such peaks is hence called `findChromPeaks` and the identified @@ -81,16 +79,16 @@ function into a parameter class (such as `CentWaveParam`) avoids busy function calls (with many single parameters) and enables saving, reloading and reusing the settings. In addition, the parameter classes are added, along with other information to the process history of an `XCMSnExp` object thus providing a -detailed documentation of each processing step of an analysis. In addition, -validation of the parameters can be performed within the parameter object and -hence is no longer required in the analysis function. +detailed documentation of each processing step of an analysis, with the +possibility to recall all settings of the performed analyses at any stage. In +addition, validation of the parameters can be performed within the parameter +object and hence is no longer required in the analysis function. The example below illustrates the new user interface. First we load the raw data files from the `faahKO` package using the `readMSData2` from the `MSnbase` package. ```{r message = FALSE, warning = FALSE} ## Reading the raw data using the MSnbase package -library(MSnbase) library(xcms) ## Load 6 of the CDF files from the faahKO cdf_files <- dir(system.file("cdf", package = "faahKO"), recursive = TRUE, @@ -130,17 +128,208 @@ for (i in 2:length(fileNames(raw_data))) { legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) ``` -In addition we can plot the distribution of the total ion counts per file. +Alternatively we can use the `extractChromatograms` method that extracts a +chromatogram from the object. In the example below we extract the *base peak +chromatogram* (BPC) by setting `aggregationFun` to `"max"` and not specifying an `rt` +or `mz` range to extract only a data subset. Note that this function reads the +data from the raw files. + +```{r faahKO-bpi, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Get the base peak chromatograms. This reads data from the files. +bpis <- extractChromatograms(raw_data, aggregationFun = "max") +plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") +for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) +} +``` + +Note that we could restrict the analysis to a certain retention time range by +sub-setting `raw_data` with the `filterRt` method. + +In addition we can plot the distribution of the total ion counts per file. In +contrast to sub-setting the object we split the numeric vector returned by the +`tic` by file using the `fromFile` method that provides the mapping of the +experiment's spectra to the originating files. + +```{r faahKO-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Get the total ion current by file +tc <- split(tic(raw_data), f = fromFile(raw_data)) +boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), + ylab = "intensity", main = "Total ion current") +``` The `tic` (and for mzML files) the `bpi` methods are very fast, even for large data sets, as these information are stored in the header of the raw files avoiding -the need to read the raw data from each file. +the need to read the raw data from each file. Also, we could subset the whole +object using the filter functions `filterFile`, `filterRt` or `filterMz` to +e.g. remove problematic samples or restrict the retention time range in which we +want to perform the chromatographic peak detection. + +Next we perform the chromatographic peak detection using the *centWave* algorithm +[@Tautenhahn:2008fx]. In the example below we use most of the standard +parameters, but the settings should be adjusted to each experiment individually +based on e.g. the expected width of the chromatographic peaks etc. + +```{r faahKO-centWave} +## Defining the settings for the centWave peak detection. +cwp <- CentWaveParam(snthresh = 20, noise = 1000) +xod <- findChromPeaks(raw_data, param = cwp) +``` + +The identified peaks can be accessed with the `chromPeaks` parameter which returns +a `matrix`, each line representing an identified peak. Column `"sample"` specifies +in which *sample* (i.e. file) of the experiment the peak was detected. Below we +plot the signal distribution of the identified peaks per sample. -- Do the chromatographic peak detection. +```{r faahKO-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) +ints <- lapply(ints, log2) +boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], + ylab = expression(log[2]~intensity), main = "Peak intensities") +``` -- Describe the peak detection methods. +Next we align the samples using the *obiwarp* method [@Prince:2006jj]. This +method does, in contrast to other alignment/retention time correction methods, +not require any identified peaks and could thus also be applied to an +`OnDiskMSnExp` object. Note that all retention time adjustment methods do also +adjust the retention times reported for the individual peaks in `chromPeaks`. -- Describe subsetting methods filter etc. +```{r faahKO-obiwarp, message = FALSE} +## Doing the obiwarp alignment using the default settings. +xod <- adjustRtime(xod, param = ObiwarpParam()) +``` + +Note that any pre-processing results can be removed at any time using a *drop* +method, such as `dropChromPeaks`, `dropFeatureDefinitions` or +`dropAdjustedRtime`. + +To evaluate the impact of the alignment we can plot again the BPC of each +sample. + +```{r faahKO-bpi-obiwarp, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Get the base peak chromatograms. This reads data from the files. +bpis <- extractChromatograms(xod, aggregationFun = "max") +plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") +for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) +} +``` + +Below we draw the difference of the adjusted to the raw retention times. The +deviation from the raw retention times should be within the expected +variance/shift from the chromatography. Too large differences could indicate +poorly performing samples. + +```{r faahKO-adjusted-rtime-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Calculate the difference between the adjusted and the raw retention times. +diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) + +## By default, rtime and most other accessor methods return a numeric vector. To +## get the values grouped by sample we have to split this vector by file/sample +diffRt <- split(diffRt, fromFile(xod)) + +## Get the raw retention times grouped by sample +rawRt <- rtime(xod, bySample = TRUE, adjusted = FALSE) +plot(rawRt[[1]], diffRt[[1]], type = "l", ylab = "adjusted - raw rt", + xlab = "raw rt", main = "Obiwarp alignment results", ylim = range(diffRt), + col = paste0(sample_colors[pData(xod)$sample_group[1]], 80)) +for (i in 2:length(rawRt)) { + points(rawRt[[i]], diffRt[[i]], type = "l", + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) +} +``` + +The distribution of retention time differences could also be used for quality +assessment. + +```{r faahKO-adjusted-rtime-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") +``` + +The 3rd sample was used as *center* sample against which all other samples were +aligned to, hence its adjusted retention times are identical to the raw +retention times. + +Next we group identified chromatographic peaks across samples. We use the *peak +density* method [@Smith:2006ic] specifying that a chromatographic peak have +to be present in at least 1/3 of the samples within each group to be combined to +a mz-rt *feature*. + +```{r faahKO-groupPeakDensity, message = FALSE} +## Define the PeakDensityParam +pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + maxFeatures = 300, minFraction = 0.66) +xod <- groupChromPeaks(xod, param = pdp) +``` + +The definitions of the features can be accessed with the `featureDefinitions`, +which lists the mz-rt space specific to a feature. Column `"peakidx"` lists the +indices (in the `chromPeaks` matrix) of the individual chromatographic peaks +belonging to the feature. + +```{r faahKO-featureDefinitions, message = FALSE} +head(featureDefinitions(xod)) +``` + +To extract *values* for the features, the `featureValues` method can be used. This +method returns a matrix with rows being the features and column the samples. The +`value` parameter allows to specify the value that should be returned. Below we +extract the `"into"` signal, i.e. the per-peak integrated intensity for each +feature. + +```{r faahKO-featureValues, message = FALSE} +## Extract the "into" peak integrated signal. +head(featureValues(xod, value = "into")) +``` + +After correspondence there will always be features that do not include peaks +from every sample (being it that the peak finding algorithm failed to identify a +peak or that no signal was measured in the respective mz-rt area). For such +features an `NA` is returned by the `featureValues` method. Here, `xcms` allows to +infer values for such missing peaks using the `fillChromPeaks` method. This method +integrates in files where a peak was not found the signal from the mz-rt area +where it is expected and adds it to the `chromPeaks` matrix. Such *filled-in* peaks +have a value of `1` in the `"is_filled"` column of the `chromPeaks` matrix. + +```{r faahKO-fillPeaks, message = FALSE} +## Fill in peaks with default settings. Settings can be adjusted by passing +## a FillChromPeaksParam object to the method. +xod <- fillChromPeaks(xod) + +head(featureValues(xod)) +``` + +Not for all missing peaks a value could be integrated (because at the respective +location no measurements are available). The peak area from which signal is to +be extracted can also be increased modifying the settings by passing a +`FillChromPeaksParam` object. + +At last we can inspect the `processHistory` of the analysis. As described earlier, +this records all (major) processing steps along with the corresponding parameter +classes. + +```{r faahKO-processHistory, message = FALSE} +## List the full process history +processHistory(xod) +``` + +It is also possible to extract specific processing steps by specifying its +type. Available types can be listed with the `processHistoryTypes` function. Below +we extract the parameter class for the alignment/retention time adjustment step. + +```{r faahKO-processHistory-select, message = FALSE} +ph <- processHistory(xod, type = "Retention time correction") + +## Access the parameter +processParam(ph[[1]]) +``` ## New naming convention @@ -297,7 +486,7 @@ objects get copied by R which *could* eventually result in a larger memory deman or performance decrease (while no such was decrease was observed up to now). -## Usability improvements +## Usability improvements in the *old* user interface - `[` subsetting method for `xcmsRaw` objects that enables to subset an `xcmsRaw` object to specific scans/spectra. @@ -417,6 +606,16 @@ generation uses now the `binYonX` method which fixed some problems in the origin binning and linear interpolation methods. Thus results might be slightly different. +Also, the `retcor.obiwarp` method reports (un-rounded) adjusted retention times, +but adjusts the retention time of eventually already identified peaks using +rounded adjusted retention times. The new `adjustRtime` method(s) does adjust +identified peaks using the reported adjusted retention times (not rounded). This +guarantees that e.g. removing retention time adjustment/alignment results from +an object restores the object to its initial state (i.e. the adjusted retention +times of the identified peaks are reverted to the retention times before +alignment). +See issue [#122](https://github.com/sneumann/xcms/issues/122) for more details. + ## `retcor.peaksgroups`: change in the way how *well behaved* peak groups are ordered @@ -455,6 +654,30 @@ These problems have been fixed in version 1.51.1 by first sub-setting the detection. +## `fillPeaks` (`fillChromPeaks`) differences + +In the original `fillPeaks.MSW`, the mz range from which the signal is to be +integrated was defined using + +```{r eval = FALSE} +mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + which.min(abs(mzs - peakArea[i, "mzmax"]))) +``` + +Depending on the data this could lead to the inclusion of signal in the +integration that are just outside of the mz range. In the new `fillChromPeaks` +method signal is integrated only for mz values >= mzmin and <= mzmax thus +ensuring that only signal is used that is truly within the peak area defined by +columns `"mzmin"`, `"mzmax"`, `"rtmin"` and `"rtmax"`. + +Also, the `fillPeaks.chrom` method did return `"into"` and `"maxo"` values of `0` if no +signal was found in the peak area. The new method does not integrate any signal +in such cases and does not fill in that peak. + +See also issue [#130](https://github.com/sneumann/xcms/issues/130) for more +information. + + # Under the hood changes These changes and updates will not have any large impact on the day-to-day use of diff --git a/vignettes/new_functionality.org b/vignettes/new_functionality.org index 4f24a2d22..892015f23 100644 --- a/vignettes/new_functionality.org +++ b/vignettes/new_functionality.org @@ -1,4 +1,4 @@ -s#+TITLE: New and modified functionality in xcms +#+TITLE: New and modified functionality in xcms #+AUTHOR: Johannes Rainer #+EMAIL: johannes.rainer@eurac.edu #+DESCRIPTION: @@ -53,10 +53,10 @@ of R functions, the so called core API functions (or =do_= functions). These functions take standard R data structures as input and return standard R data types as result and can hence be easily included in other R packages. -Over and above, the new user interface aims at simplifying and streamlining the -=xcms= workflow while guaranteeing data integrity and performance also for large -scale metabolomics experiments. An important aspect was also to enable an easy -access to the raw data files. +The new user interface aims at simplifying and streamlining the =xcms= workflow +while guaranteeing data integrity and performance also for large scale +metabolomics experiments. Importantly, a simplified access to the original raw +data should be provided throughout the whole metabolomics data analysis workflow. # All objects in the new user interface ensuring # data integrity /via/ validation methods and class versioning, all methods are @@ -66,18 +66,18 @@ The new interface re-uses objects from the =MSnbase= Bioconductor package, such the =OnDiskMSnExp= object. This object is specifically designed for large scale MS experiments as it initially reads just the scan header information from the mzML while the mz-intensity value pairs from all or from selected spectra of a file -or are read on demand hence minimizing the memory footprint. Also in contrast to -the old =xcmsRaw= object the =OnDiskMSnExp= contains information from all files of +are read on demand hence minimizing the memory demand. Also, in contrast to +the old =xcmsRaw= object, the =OnDiskMSnExp= contains information from all files of an experiment. In addition, all data normalization and adjustment methods implemented in the =MSnbase= package can be directly applied to the MS data without the need to re-implement such methods in =xcms=. Results from =xcms= preprocessings, such as chromatographic peak detection or correspondence are stored into the new =XCMSnExp= object. This object extends the =OnDiskMSnExp= object -and enables thus an easy access of the raw data on which the preprocessing was -performed. +and inherits thus all of its methods including raw data access. -Class and method/function names follow also a new naming convention and avoid -the partially confusing nomenclature of the original =xcms= methods. To +Class and method/function names follow also a new naming convention trying tp +avoid the partially confusing nomenclature of the original =xcms= methods (such as +the =group= method to perform the correspondence of peaks across samples). To distinguish them from mass peaks, the peaks identified by the peak detection in an LS/GC-MS experiment are referred to as /chromatographic peaks/. The respective method to identify such peaks is hence called =findChromPeaks= and the identified @@ -94,16 +94,16 @@ function into a parameter class (such as =CentWaveParam=) avoids busy function calls (with many single parameters) and enables saving, reloading and reusing the settings. In addition, the parameter classes are added, along with other information to the process history of an =XCMSnExp= object thus providing a -detailed documentation of each processing step of an analysis. In addition, -validation of the parameters can be performed within the parameter object and -hence is no longer required in the analysis function. +detailed documentation of each processing step of an analysis, with the +possibility to recall all settings of the performed analyses at any stage. In +addition, validation of the parameters can be performed within the parameter +object and hence is no longer required in the analysis function. The example below illustrates the new user interface. First we load the raw data files from the =faahKO= package using the =readMSData2= from the =MSnbase= package. #+BEGIN_SRC R :ravel message = FALSE, warning = FALSE ## Reading the raw data using the MSnbase package - library(MSnbase) library(xcms) ## Load 6 of the CDF files from the faahKO cdf_files <- dir(system.file("cdf", package = "faahKO"), recursive = TRUE, @@ -144,18 +144,225 @@ consistent. legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) #+END_SRC -In addition we can plot the distribution of the total ion counts per file. +Alternatively we can use the =extractChromatograms= method that extracts a +chromatogram from the object. In the example below we extract the /base peak +chromatogram/ (BPC) by setting =aggregationFun= to ="max"= and not specifying an =rt= +or =mz= range to extract only a data subset. Note that this function reads the +data from the raw files. + +#+NAME: faahKO-bpi +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Get the base peak chromatograms. This reads data from the files. + bpis <- extractChromatograms(raw_data, aggregationFun = "max") + plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") + for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) + } + +#+END_SRC + +Note that we could restrict the analysis to a certain retention time range by +sub-setting =raw_data= with the =filterRt= method. + +In addition we can plot the distribution of the total ion counts per file. In +contrast to sub-setting the object we split the numeric vector returned by the +=tic= by file using the =fromFile= method that provides the mapping of the +experiment's spectra to the originating files. +#+NAME: faahKO-tic-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Get the total ion current by file + tc <- split(tic(raw_data), f = fromFile(raw_data)) + boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), + ylab = "intensity", main = "Total ion current") +#+END_SRC The =tic= (and for mzML files) the =bpi= methods are very fast, even for large data sets, as these information are stored in the header of the raw files avoiding -the need to read the raw data from each file. +the need to read the raw data from each file. Also, we could subset the whole +object using the filter functions =filterFile=, =filterRt= or =filterMz= to +e.g. remove problematic samples or restrict the retention time range in which we +want to perform the chromatographic peak detection. + +Next we perform the chromatographic peak detection using the /centWave/ algorithm +\cite{Tautenhahn:2008fx}. In the example below we use most of the standard +parameters, but the settings should be adjusted to each experiment individually +based on e.g. the expected width of the chromatographic peaks etc. + +#+NAME: faahKO-centWave +#+BEGIN_SRC R :message = FALSE + ## Defining the settings for the centWave peak detection. + cwp <- CentWaveParam(snthresh = 20, noise = 1000) + xod <- findChromPeaks(raw_data, param = cwp) +#+END_SRC + +The identified peaks can be accessed with the =chromPeaks= parameter which returns +a =matrix=, each line representing an identified peak. Column ="sample"= specifies +in which /sample/ (i.e. file) of the experiment the peak was detected. Below we +plot the signal distribution of the identified peaks per sample. + +#+NAME: faahKO-peak-intensity-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) + ints <- lapply(ints, log2) + boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], + ylab = expression(log[2]~intensity), main = "Peak intensities") +#+END_SRC + +Next we align the samples using the /obiwarp/ method \cite{Prince:2006jj}. This +method does, in contrast to other alignment/retention time correction methods, +not require any identified peaks and could thus also be applied to an +=OnDiskMSnExp= object. Note that all retention time adjustment methods do also +adjust the retention times reported for the individual peaks in =chromPeaks=. + +#+NAME: faahKO-obiwarp +#+BEGIN_SRC R :ravel message = FALSE + ## Doing the obiwarp alignment using the default settings. + xod <- adjustRtime(xod, param = ObiwarpParam()) +#+END_SRC + +Note that any pre-processing results can be removed at any time using a /drop/ +method, such as =dropChromPeaks=, =dropFeatureDefinitions= or +=dropAdjustedRtime=. + +To evaluate the impact of the alignment we can plot again the BPC of each +sample. + +#+NAME: faahKO-bpi-obiwarp +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Get the base peak chromatograms. This reads data from the files. + bpis <- extractChromatograms(xod, aggregationFun = "max") + plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") + for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) + } + +#+END_SRC + +Below we draw the difference of the adjusted to the raw retention times. The +deviation from the raw retention times should be within the expected +variance/shift from the chromatography. Too large differences could indicate +poorly performing samples. + +#+NAME: faahKO-adjusted-rtime-plot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Calculate the difference between the adjusted and the raw retention times. + diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) + + ## By default, rtime and most other accessor methods return a numeric vector. To + ## get the values grouped by sample we have to split this vector by file/sample + diffRt <- split(diffRt, fromFile(xod)) + + ## Get the raw retention times grouped by sample + rawRt <- rtime(xod, bySample = TRUE, adjusted = FALSE) + plot(rawRt[[1]], diffRt[[1]], type = "l", ylab = "adjusted - raw rt", + xlab = "raw rt", main = "Obiwarp alignment results", ylim = range(diffRt), + col = paste0(sample_colors[pData(xod)$sample_group[1]], 80)) + for (i in 2:length(rawRt)) { + points(rawRt[[i]], diffRt[[i]], type = "l", + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) + } +#+END_SRC + +The distribution of retention time differences could also be used for quality +assessment. + +#+NAME: faahKO-adjusted-rtime-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") +#+END_SRC + +The 3rd sample was used as /center/ sample against which all other samples were +aligned to, hence its adjusted retention times are identical to the raw +retention times. + +Next we group identified chromatographic peaks across samples. We use the /peak +density/ method \cite{Smith:2006ic} specifying that a chromatographic peak have +to be present in at least 1/3 of the samples within each group to be combined to +a mz-rt /feature/. + +#+NAME: faahKO-groupPeakDensity +#+BEGIN_SRC R :ravel message = FALSE + ## Define the PeakDensityParam + pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + maxFeatures = 300, minFraction = 0.66) + xod <- groupChromPeaks(xod, param = pdp) +#+END_SRC + +The definitions of the features can be accessed with the =featureDefinitions=, +which lists the mz-rt space specific to a feature. Column ="peakidx"= lists the +indices (in the =chromPeaks= matrix) of the individual chromatographic peaks +belonging to the feature. + +#+NAME: faahKO-featureDefinitions +#+BEGIN_SRC R :ravel message = FALSE + head(featureDefinitions(xod)) +#+END_SRC + +To extract /values/ for the features, the =featureValues= method can be used. This +method returns a matrix with rows being the features and column the samples. The +=value= parameter allows to specify the value that should be returned. Below we +extract the ="into"= signal, i.e. the per-peak integrated intensity for each +feature. + +#+NAME: faahKO-featureValues +#+BEGIN_SRC R :ravel message = FALSE + ## Extract the "into" peak integrated signal. + head(featureValues(xod, value = "into")) +#+END_SRC + +After correspondence there will always be features that do not include peaks +from every sample (being it that the peak finding algorithm failed to identify a +peak or that no signal was measured in the respective mz-rt area). For such +features an =NA= is returned by the =featureValues= method. Here, =xcms= allows to +infer values for such missing peaks using the =fillChromPeaks= method. This method +integrates in files where a peak was not found the signal from the mz-rt area +where it is expected and adds it to the =chromPeaks= matrix. Such /filled-in/ peaks +have a value of =1= in the ="is_filled"= column of the =chromPeaks= matrix. -+ Do the chromatographic peak detection. +#+NAME: faahKO-fillPeaks +#+BEGIN_SRC R :ravel message = FALSE + ## Fill in peaks with default settings. Settings can be adjusted by passing + ## a FillChromPeaksParam object to the method. + xod <- fillChromPeaks(xod) + + head(featureValues(xod)) +#+END_SRC + +Not for all missing peaks a value could be integrated (because at the respective +location no measurements are available). The peak area from which signal is to +be extracted can also be increased modifying the settings by passing a +=FillChromPeaksParam= object. + +At last we can inspect the =processHistory= of the analysis. As described earlier, +this records all (major) processing steps along with the corresponding parameter +classes. -+ Describe the peak detection methods. +#+NAME: faahKO-processHistory +#+BEGIN_SRC R :ravel message = FALSE + ## List the full process history + processHistory(xod) +#+END_SRC + +It is also possible to extract specific processing steps by specifying its +type. Available types can be listed with the =processHistoryTypes= function. Below +we extract the parameter class for the alignment/retention time adjustment step. + +#+NAME: faahKO-processHistory-select +#+BEGIN_SRC R :ravel message = FALSE + ph <- processHistory(xod, type = "Retention time correction") + + ## Access the parameter + processParam(ph[[1]]) +#+END_SRC -+ Describe subsetting methods filter etc. ** New naming convention @@ -305,7 +512,7 @@ One possible drawback from the introduction of this new layer is, that more objects get copied by R which /could/ eventually result in a larger memory demand or performance decrease (while no such was decrease was observed up to now). -** Usability improvements +** Usability improvements in the /old/ user interface + =[= subsetting method for =xcmsRaw= objects that enables to subset an =xcmsRaw= object to specific scans/spectra. @@ -421,6 +628,15 @@ generation uses now the =binYonX= method which fixed some problems in the origin binning and linear interpolation methods. Thus results might be slightly different. +Also, the =retcor.obiwarp= method reports (un-rounded) adjusted retention times, +but adjusts the retention time of eventually already identified peaks using +rounded adjusted retention times. The new =adjustRtime= method(s) does adjust +identified peaks using the reported adjusted retention times (not rounded). This +guarantees that e.g. removing retention time adjustment/alignment results from +an object restores the object to its initial state (i.e. the adjusted retention +times of the identified peaks are reverted to the retention times before +alignment). +See issue [[https://github.com/sneumann/xcms/issues/122][#122]] for more details. ** =retcor.peaksgroups=: change in the way how /well behaved/ peak groups are ordered @@ -457,6 +673,30 @@ These problems have been fixed in version 1.51.1 by first sub-setting the =xcmsRaw= object (using the =[= method) before actually performing the feature detection. +** =fillPeaks= (=fillChromPeaks=) differences + +In the original =fillPeaks.MSW=, the mz range from which the signal is to be +integrated was defined using + +#+BEGIN_SRC R :eval = "never", :ravel eval = FALSE + mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + which.min(abs(mzs - peakArea[i, "mzmax"]))) + +#+END_SRC + +Depending on the data this could lead to the inclusion of signal in the +integration that are just outside of the mz range. In the new =fillChromPeaks= +method signal is integrated only for mz values >= mzmin and <= mzmax thus +ensuring that only signal is used that is truly within the peak area defined by +columns ="mzmin"=, ="mzmax"=, ="rtmin"= and ="rtmax"=. + +Also, the =fillPeaks.chrom= method did return ="into"= and ="maxo"= values of =0= if no +signal was found in the peak area. The new method does not integrate any signal +in such cases and does not fill in that peak. + +See also issue [[https://github.com/sneumann/xcms/issues/130][#130]] for more +information. + ** Problems with iterative binning of small data sub-sets in =findPeaks.matchedFilter= :noexport: The problem described here has been fixed in =xcms= >= 1.51.1. @@ -770,13 +1010,19 @@ Here we list all of the functions and related files that are deprecated. * TODOs :noexport: -** TODO Deprecate binning functions. +** DONE Deprecate binning functions. + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] All done except for the retention time correction!!! -** TODO Continue implementing the =do_= functions. -** TODO Define a new object to contain the preprocessing results +** DONE Continue implementing the =do_= functions. + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] +** DONE Define a new object to contain the preprocessing results + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] This object should replace in the long run the =xcmsSet= object providing the same functionality while in addition add a better integration of the original raw data files. The object should contain: @@ -811,15 +1057,17 @@ to avoid copying etc of the data. Check also =assayDataElement()= in =MSnbase=. + Rename =peaks= to =features=. + Better alternative for =groups=: =alignedFeatures=. -+ =groupval=? =featureMatrix=. ++ =groupval=? =featureValues=. *** Design and implementation: + =features= should be still implemented as =matrix= (for performance issues). + Alignment information could be implemented as =DataFrame= with the indices added to a column =idx=. -** TODO Rename objects, functions and methods +** DONE Rename objects, functions and methods + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] + [X] =features=: =chromPeaks=. + [X] =hasDetectedFeatures=: =hasChromPeaks=. + [ ] feature: chromatographic peak. @@ -863,4 +1111,71 @@ RT correction. + [X] Correspondence: (grouping) registration of recurring signals from the same analyte over replicate samples \cite{Smith:2014di}. + +** TODO Implement the =Chromatogram= class + +Now, to accommodate all possibilities: +https://en.wikipedia.org/wiki/Triple_quadrupole_mass_spectrometer +Triple Q-TOF measurements: ++ Product Ion Scan + - Q1 fixed + - Q3 scan ++ Precursor Ion Scan + - Q1 scan + - Q3 fixed ++ Neutral Loss Scan + - Q1 scan at mz = m_{product} + - Q3 scan at mz = m_{product} - m_{neutral molecule} ++ Selected Reaction monitoring (SRM, MRM): Q1 is used to select the precursor + ion, Q3 cycles through the product ions. Precursor/product pair is referred to + as a /transition/. + - Q1 fixed at mz = m_{precursor} + - Q3 scan at mz = m_{product} + + +Other resources: +https://en.wikipedia.org/wiki/Mass_chromatogram#Selected-ion_monitoring_chromatogram_.28SIM.29 +http://proteowizard.sourceforge.net/dox/structpwiz_1_1msdata_1_1_chromatogram.html +https://sourceforge.net/p/proteowizard/mailman/message/27571266/ + +** TODO Implement a =findBackgroundIons= method + +Check on one of our own files. + +#+BEGIN_SRC R + library(xcms) + + rd <- readMSData2("/Volumes/Ext64/data/2016/2016-11/NoSN/250516_QC_NORM_3_POS_3.mzML") + + ## Evaluate the mz-rt matrix - can we spot already something there? + sps <- spectra(rd) + dfs <- lapply(sps, as.data.frame) + ## cut the intensities at 5000 + dfs <- lapply(dfs, function(z) { + z[z[, "i"] > 5000, "i"] <- 5000 + return(z) + }) + + library(RColorBrewer) + library(lattice) + colR <- colorRampPalette(brewer.pal(9, "YlOrRd"))(255) + brks <- do.breaks(c(0, 5000), length(colR)) + + mzR <- range(mz(rd)) + rtR <- range(rtime(rd)) + + plot(3, 3, pch = NA, xlim = rtR, ylim = mzR) + for(i in 1:length(dfs)) { + intC <- level.colors(dfs[[i]]$i, at = brks, col.regions = colR) + xs <- rep(rtime(rd)[i], length(intC)) + points(x = xs, y = dfs[[i]]$mz, col = intC, cex = 0.1, pch = 16) + } + ## level.colors(x, at = brks, col.regions = colR) +#+END_SRC + +A simple approach would be to walk along the mz and evaluate whether, for a +certain mz (bin?) the signal is higher than a threshold in 70% of the spectra, +i.e. that the % of values is larger than a percentage. + + * References diff --git a/vignettes/references.bib b/vignettes/references.bib index 4b4f86c1b..e17352a13 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -23,3 +23,14 @@ @article{Smith:2006ic month = feb } +@article{Prince:2006jj, +author = {Prince, John T and Marcotte, Edward M}, +title = {{Chromatographic alignment of ESI-LC-MS proteomics data sets by ordered bijective interpolated warping.}}, +journal = {Analytical chemistry}, +year = {2006}, +volume = {78}, +number = {17}, +pages = {6140--6152}, +month = sep +} +